diff --git a/litecord/embed/messages.py b/litecord/embed/messages.py index 30e5386..4fd39ef 100644 --- a/litecord/embed/messages.py +++ b/litecord/embed/messages.py @@ -19,66 +19,53 @@ along with this program. If not, see . import re import asyncio +import urllib.parse +from pathlib import Path from logbook import Logger -from litecord.embed.sanitizer import proxify, fetch_metadata +from litecord.embed.sanitizer import proxify, fetch_metadata, fetch_embed log = Logger(__name__) -async def process_url_embed(config, storage, dispatcher, - session, payload: dict, *, delay=0): - """Process URLs in a message and generate embeds based on that.""" - await asyncio.sleep(delay) +MEDIA_EXTENSIONS = ( + 'png', + 'jpg', 'jpeg', + 'gif', 'webm' +) + + +async def insert_media_meta(url, config, session): + """Insert media metadata as an embed.""" + img_proxy_url = proxify(url, config=config) + meta = await fetch_metadata(url, config=config, session=session) + + if meta is None: + return + + if not meta['image']: + return + + return { + 'type': 'image', + 'url': url, + 'thumbnail': { + 'width': meta['width'], + 'height': meta['height'], + 'url': url, + 'proxy_url': img_proxy_url + } + } + + +async def _update_and_dispatch(payload, new_embeds, storage, dispatcher): + """Update the message with the given embeds and dispatch a MESSAGE_UPDATE + to users.""" message_id = int(payload['id']) channel_id = int(payload['channel_id']) - # if we already have embeds - # we shouldn't add our own. - embeds = payload['embeds'] - - if embeds: - log.debug('url processor: ignoring existing embeds @ mid {}', - message_id) - return - - # use regex to get URLs - urls = re.findall(r'(https?://\S+)', payload['content']) - urls = urls[:5] - - new_embeds = [] - - # fetch metadata for each url - for url in urls: - img_proxy_url = proxify(url, config=config) - meta = await fetch_metadata(url, config=config, session=session) - - if meta is None: - continue - - if not meta['image']: - continue - - new_embeds.append({ - 'type': 'image', - 'url': url, - 'thumbnail': { - 'width': meta['width'], - 'height': meta['height'], - 'url': url, - 'proxy_url': img_proxy_url - } - }) - - # update if we got embeds - if not new_embeds: - return - - log.debug('made {} thumbnail embeds for mid {}', - len(new_embeds), message_id) - await storage.execute_with_json(""" UPDATE messages SET embeds = $1 @@ -96,3 +83,65 @@ async def process_url_embed(config, storage, dispatcher, await dispatcher.dispatch( 'channel', channel_id, 'MESSAGE_UPDATE', update_payload) + + +async def insert_mp_embed(parsed, config, session): + """Insert mediaproxy embed.""" + embed = await fetch_embed(parsed, config=config, session=session) + return embed + + +async def process_url_embed(config, storage, dispatcher, + session, payload: dict, *, delay=0): + """Process URLs in a message and generate embeds based on that.""" + await asyncio.sleep(delay) + + message_id = int(payload['id']) + + # if we already have embeds + # we shouldn't add our own. + embeds = payload['embeds'] + + if embeds: + log.debug('url processor: ignoring existing embeds @ mid {}', + message_id) + return + + # now, we have two types of embeds: + # - image embeds + # - url embeds + + # use regex to get URLs + urls = re.findall(r'(https?://\S+)', payload['content']) + urls = urls[:5] + + # from there, we need to parse each found url and check its path. + # if it ends with png/jpg/gif/some other extension, we treat it as + # media metadata to fetch. + + # if it isn't, we forward an /embed/ scope call to mediaproxy + # to generate an embed for us out of the url. + + new_embeds = [] + + for url in urls: + parsed = urllib.parse.urlparse(url) + path = Path(parsed.path) + extension = path.name.split('.')[-1] + + if extension in MEDIA_EXTENSIONS: + embed = await insert_media_meta(url, config, session) + else: + embed = await insert_mp_embed(parsed, config, session) + + if not embed: + continue + + # update if we got embeds + if not new_embeds: + return + + log.debug('made {} thumbnail embeds for mid {}', + len(new_embeds), message_id) + + await _update_and_dispatch(payload, new_embeds, storage, dispatcher) diff --git a/litecord/embed/sanitizer.py b/litecord/embed/sanitizer.py index 6126eda..4b0b0b9 100644 --- a/litecord/embed/sanitizer.py +++ b/litecord/embed/sanitizer.py @@ -128,8 +128,35 @@ async def fetch_metadata(url, *, config=None, session=None) -> dict: return await resp.json() +async def fetch_embed(parsed, *, config=None, session=None) -> dict: + """Fetch an embed""" + + if session is None: + session = app.session + + if config is None: + config = app.config + + # TODO: handle query string + md_path = f'{parsed.scheme}/{parsed.netloc}{parsed.path}' + + md_base_url = config['MEDIA_PROXY'] + secure = 's' if config['IS_SSL'] else '' + + request_url = f'http{secure}://{md_base_url}/embed/{md_path}' + + async with session.get(request_url) as resp: + if resp.status != 200: + body = await resp.text() + log.warning('failed to embed {!r}, {} {!r}', + parsed, resp.status, body) + return + + return await resp.json() + + async def fill_embed(embed: Embed) -> Embed: - """Fill an embed with more information.""" + """Fill an embed with more information, such as proxy URLs.""" embed = sanitize_embed(embed) if path_exists(embed, 'footer.icon_url'):