diff --git a/litecord/blueprints/channel/messages.py b/litecord/blueprints/channel/messages.py index 72aba0e..fd8a060 100644 --- a/litecord/blueprints/channel/messages.py +++ b/litecord/blueprints/channel/messages.py @@ -17,9 +17,7 @@ along with this program. If not, see . """ -import re import json -import asyncio from PIL import Image from quart import Blueprint, request, current_app as app, jsonify @@ -34,7 +32,8 @@ from litecord.snowflake import get_snowflake from litecord.schemas import validate, MESSAGE_CREATE from litecord.utils import pg_set_json -from litecord.embed.sanitizer import fill_embed, proxify, fetch_metadata +from litecord.embed.sanitizer import fill_embed +from litecord.embed.messages import process_url_embed from litecord.blueprints.channel.dm_checks import dm_pre_check @@ -249,77 +248,6 @@ async def _guild_text_mentions(payload: dict, guild_id: int, """, user_id, channel_id) -async def process_url_embed(config, storage, dispatcher, - session, payload: dict, *, delay=0): - """Process URLs in a message and generate embeds based on that.""" - await asyncio.sleep(delay) - - message_id = int(payload['id']) - channel_id = int(payload['channel_id']) - - # if we already have embeds - # we shouldn't add our own. - embeds = payload['embeds'] - - if embeds: - log.debug('url processor: ignoring existing embeds @ mid {}', - message_id) - return - - # use regex to get URLs - urls = re.findall(r'(https?://\S+)', payload['content']) - urls = urls[:5] - - new_embeds = [] - - # fetch metadata for each url - for url in urls: - img_proxy_url = proxify(url, config=config) - meta = await fetch_metadata(url, config=config, session=session) - - if meta is None: - continue - - if not meta['image']: - continue - - new_embeds.append({ - 'type': 'image', - 'url': url, - 'thumbnail': { - 'width': meta['width'], - 'height': meta['height'], - 'url': url, - 'proxy_url': img_proxy_url - } - }) - - # update if we got embeds - if not new_embeds: - return - - log.debug('made {} thumbnail embeds for mid {}', - len(new_embeds), message_id) - - await storage.execute_with_json(""" - UPDATE messages - SET embeds = $1 - WHERE messages.id = $2 - """, new_embeds, message_id) - - update_payload = { - 'id': str(message_id), - 'channel_id': str(channel_id), - 'embeds': new_embeds, - } - - if 'guild_id' in payload: - update_payload['guild_id'] = payload['guild_id'] - - await dispatcher.dispatch( - 'channel', channel_id, 'MESSAGE_UPDATE', update_payload) - - async def _msg_input() -> tuple: """Extract the json input and any file information the client gave to us in the request. @@ -542,7 +470,7 @@ async def edit_message(channel_id, message_id): # if there weren't any embed changes BUT # we had a content change, we dispatch process_url_embed but with # an artificial delay. - + # the artificial delay keeps consistency between the events, since # it makes more sense for the MESSAGE_UPDATE with new content to come # BEFORE the MESSAGE_UPDATE with the new embeds (based on content) diff --git a/litecord/embed/messages.py b/litecord/embed/messages.py new file mode 100644 index 0000000..b3e4c41 --- /dev/null +++ b/litecord/embed/messages.py @@ -0,0 +1,149 @@ +""" + +Litecord +Copyright (C) 2018-2019 Luna Mendes + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, version 3 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see . + +""" + +import re +import asyncio +import urllib.parse +from pathlib import Path + +from logbook import Logger + +from litecord.embed.sanitizer import proxify, fetch_metadata, fetch_embed + +log = Logger(__name__) + + +MEDIA_EXTENSIONS = ( + 'png', + 'jpg', 'jpeg', + 'gif', 'webm' +) + + +async def insert_media_meta(url, config, session): + """Insert media metadata as an embed.""" + img_proxy_url = proxify(url, config=config) + meta = await fetch_metadata(url, config=config, session=session) + + if meta is None: + return + + if not meta['image']: + return + + return { + 'type': 'image', + 'url': url, + 'thumbnail': { + 'width': meta['width'], + 'height': meta['height'], + 'url': url, + 'proxy_url': img_proxy_url + } + } + + +async def _update_and_dispatch(payload, new_embeds, storage, dispatcher): + """Update the message with the given embeds and dispatch a MESSAGE_UPDATE + to users.""" + + message_id = int(payload['id']) + channel_id = int(payload['channel_id']) + + await storage.execute_with_json(""" + UPDATE messages + SET embeds = $1 + WHERE messages.id = $2 + """, new_embeds, message_id) + + update_payload = { + 'id': str(message_id), + 'channel_id': str(channel_id), + 'embeds': new_embeds, + } + + if 'guild_id' in payload: + update_payload['guild_id'] = payload['guild_id'] + + await dispatcher.dispatch( + 'channel', channel_id, 'MESSAGE_UPDATE', update_payload) + + +async def insert_mp_embed(parsed, config, session): + """Insert mediaproxy embed.""" + embed = await fetch_embed(parsed, config=config, session=session) + return embed + + +async def process_url_embed(config, storage, dispatcher, + session, payload: dict, *, delay=0): + """Process URLs in a message and generate embeds based on that.""" + await asyncio.sleep(delay) + + message_id = int(payload['id']) + + # if we already have embeds + # we shouldn't add our own. + embeds = payload['embeds'] + + if embeds: + log.debug('url processor: ignoring existing embeds @ mid {}', + message_id) + return + + # now, we have two types of embeds: + # - image embeds + # - url embeds + + # use regex to get URLs + urls = re.findall(r'(https?://\S+)', payload['content']) + urls = urls[:5] + + # from there, we need to parse each found url and check its path. + # if it ends with png/jpg/gif/some other extension, we treat it as + # media metadata to fetch. + + # if it isn't, we forward an /embed/ scope call to mediaproxy + # to generate an embed for us out of the url. + + new_embeds = [] + + for url in urls: + parsed = urllib.parse.urlparse(url) + path = Path(parsed.path) + extension = path.name.split('.')[-1] + + if extension in MEDIA_EXTENSIONS: + embed = await insert_media_meta(url, config, session) + else: + embed = await insert_mp_embed(parsed, config, session) + + if not embed: + continue + + new_embeds.append(embed) + + # update if we got embeds + if not new_embeds: + return + + log.debug('made {} thumbnail embeds for mid {}', + len(new_embeds), message_id) + + await _update_and_dispatch(payload, new_embeds, storage, dispatcher) diff --git a/litecord/embed/sanitizer.py b/litecord/embed/sanitizer.py index 6126eda..4b0b0b9 100644 --- a/litecord/embed/sanitizer.py +++ b/litecord/embed/sanitizer.py @@ -128,8 +128,35 @@ async def fetch_metadata(url, *, config=None, session=None) -> dict: return await resp.json() +async def fetch_embed(parsed, *, config=None, session=None) -> dict: + """Fetch an embed""" + + if session is None: + session = app.session + + if config is None: + config = app.config + + # TODO: handle query string + md_path = f'{parsed.scheme}/{parsed.netloc}{parsed.path}' + + md_base_url = config['MEDIA_PROXY'] + secure = 's' if config['IS_SSL'] else '' + + request_url = f'http{secure}://{md_base_url}/embed/{md_path}' + + async with session.get(request_url) as resp: + if resp.status != 200: + body = await resp.text() + log.warning('failed to embed {!r}, {} {!r}', + parsed, resp.status, body) + return + + return await resp.json() + + async def fill_embed(embed: Embed) -> Embed: - """Fill an embed with more information.""" + """Fill an embed with more information, such as proxy URLs.""" embed = sanitize_embed(embed) if path_exists(embed, 'footer.icon_url'):