From f4f4a64e9f6766a34f61ebf905cf726169f7e0b3 Mon Sep 17 00:00:00 2001 From: Luna Date: Fri, 22 Feb 2019 18:57:10 -0300 Subject: [PATCH 1/3] split process_url_embed into litecord.embed.message --- litecord/blueprints/channel/messages.py | 78 +------------------- litecord/embed/messages.py | 98 +++++++++++++++++++++++++ 2 files changed, 101 insertions(+), 75 deletions(-) create mode 100644 litecord/embed/messages.py diff --git a/litecord/blueprints/channel/messages.py b/litecord/blueprints/channel/messages.py index 72aba0e..fd8a060 100644 --- a/litecord/blueprints/channel/messages.py +++ b/litecord/blueprints/channel/messages.py @@ -17,9 +17,7 @@ along with this program. If not, see . """ -import re import json -import asyncio from PIL import Image from quart import Blueprint, request, current_app as app, jsonify @@ -34,7 +32,8 @@ from litecord.snowflake import get_snowflake from litecord.schemas import validate, MESSAGE_CREATE from litecord.utils import pg_set_json -from litecord.embed.sanitizer import fill_embed, proxify, fetch_metadata +from litecord.embed.sanitizer import fill_embed +from litecord.embed.messages import process_url_embed from litecord.blueprints.channel.dm_checks import dm_pre_check @@ -249,77 +248,6 @@ async def _guild_text_mentions(payload: dict, guild_id: int, """, user_id, channel_id) -async def process_url_embed(config, storage, dispatcher, - session, payload: dict, *, delay=0): - """Process URLs in a message and generate embeds based on that.""" - await asyncio.sleep(delay) - - message_id = int(payload['id']) - channel_id = int(payload['channel_id']) - - # if we already have embeds - # we shouldn't add our own. - embeds = payload['embeds'] - - if embeds: - log.debug('url processor: ignoring existing embeds @ mid {}', - message_id) - return - - # use regex to get URLs - urls = re.findall(r'(https?://\S+)', payload['content']) - urls = urls[:5] - - new_embeds = [] - - # fetch metadata for each url - for url in urls: - img_proxy_url = proxify(url, config=config) - meta = await fetch_metadata(url, config=config, session=session) - - if meta is None: - continue - - if not meta['image']: - continue - - new_embeds.append({ - 'type': 'image', - 'url': url, - 'thumbnail': { - 'width': meta['width'], - 'height': meta['height'], - 'url': url, - 'proxy_url': img_proxy_url - } - }) - - # update if we got embeds - if not new_embeds: - return - - log.debug('made {} thumbnail embeds for mid {}', - len(new_embeds), message_id) - - await storage.execute_with_json(""" - UPDATE messages - SET embeds = $1 - WHERE messages.id = $2 - """, new_embeds, message_id) - - update_payload = { - 'id': str(message_id), - 'channel_id': str(channel_id), - 'embeds': new_embeds, - } - - if 'guild_id' in payload: - update_payload['guild_id'] = payload['guild_id'] - - await dispatcher.dispatch( - 'channel', channel_id, 'MESSAGE_UPDATE', update_payload) - - async def _msg_input() -> tuple: """Extract the json input and any file information the client gave to us in the request. @@ -542,7 +470,7 @@ async def edit_message(channel_id, message_id): # if there weren't any embed changes BUT # we had a content change, we dispatch process_url_embed but with # an artificial delay. - + # the artificial delay keeps consistency between the events, since # it makes more sense for the MESSAGE_UPDATE with new content to come # BEFORE the MESSAGE_UPDATE with the new embeds (based on content) diff --git a/litecord/embed/messages.py b/litecord/embed/messages.py new file mode 100644 index 0000000..30e5386 --- /dev/null +++ b/litecord/embed/messages.py @@ -0,0 +1,98 @@ +""" + +Litecord +Copyright (C) 2018-2019 Luna Mendes + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, version 3 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see . + +""" + +import re +import asyncio + +from logbook import Logger + +from litecord.embed.sanitizer import proxify, fetch_metadata + +log = Logger(__name__) + + +async def process_url_embed(config, storage, dispatcher, + session, payload: dict, *, delay=0): + """Process URLs in a message and generate embeds based on that.""" + await asyncio.sleep(delay) + + message_id = int(payload['id']) + channel_id = int(payload['channel_id']) + + # if we already have embeds + # we shouldn't add our own. + embeds = payload['embeds'] + + if embeds: + log.debug('url processor: ignoring existing embeds @ mid {}', + message_id) + return + + # use regex to get URLs + urls = re.findall(r'(https?://\S+)', payload['content']) + urls = urls[:5] + + new_embeds = [] + + # fetch metadata for each url + for url in urls: + img_proxy_url = proxify(url, config=config) + meta = await fetch_metadata(url, config=config, session=session) + + if meta is None: + continue + + if not meta['image']: + continue + + new_embeds.append({ + 'type': 'image', + 'url': url, + 'thumbnail': { + 'width': meta['width'], + 'height': meta['height'], + 'url': url, + 'proxy_url': img_proxy_url + } + }) + + # update if we got embeds + if not new_embeds: + return + + log.debug('made {} thumbnail embeds for mid {}', + len(new_embeds), message_id) + + await storage.execute_with_json(""" + UPDATE messages + SET embeds = $1 + WHERE messages.id = $2 + """, new_embeds, message_id) + + update_payload = { + 'id': str(message_id), + 'channel_id': str(channel_id), + 'embeds': new_embeds, + } + + if 'guild_id' in payload: + update_payload['guild_id'] = payload['guild_id'] + + await dispatcher.dispatch( + 'channel', channel_id, 'MESSAGE_UPDATE', update_payload) From 428502d373d655fcc1a75a1bb5b3e22d0f58471d Mon Sep 17 00:00:00 2001 From: Luna Date: Fri, 22 Feb 2019 19:59:35 -0300 Subject: [PATCH 2/3] add (untested) impl for url embeds this happens on process_url_embed, so there isn't a need for other code to worry about it. - litecord.embed.sanitizer: add fetch_embed --- litecord/embed/messages.py | 147 ++++++++++++++++++++++++------------ litecord/embed/sanitizer.py | 29 ++++++- 2 files changed, 126 insertions(+), 50 deletions(-) diff --git a/litecord/embed/messages.py b/litecord/embed/messages.py index 30e5386..4fd39ef 100644 --- a/litecord/embed/messages.py +++ b/litecord/embed/messages.py @@ -19,66 +19,53 @@ along with this program. If not, see . import re import asyncio +import urllib.parse +from pathlib import Path from logbook import Logger -from litecord.embed.sanitizer import proxify, fetch_metadata +from litecord.embed.sanitizer import proxify, fetch_metadata, fetch_embed log = Logger(__name__) -async def process_url_embed(config, storage, dispatcher, - session, payload: dict, *, delay=0): - """Process URLs in a message and generate embeds based on that.""" - await asyncio.sleep(delay) +MEDIA_EXTENSIONS = ( + 'png', + 'jpg', 'jpeg', + 'gif', 'webm' +) + + +async def insert_media_meta(url, config, session): + """Insert media metadata as an embed.""" + img_proxy_url = proxify(url, config=config) + meta = await fetch_metadata(url, config=config, session=session) + + if meta is None: + return + + if not meta['image']: + return + + return { + 'type': 'image', + 'url': url, + 'thumbnail': { + 'width': meta['width'], + 'height': meta['height'], + 'url': url, + 'proxy_url': img_proxy_url + } + } + + +async def _update_and_dispatch(payload, new_embeds, storage, dispatcher): + """Update the message with the given embeds and dispatch a MESSAGE_UPDATE + to users.""" message_id = int(payload['id']) channel_id = int(payload['channel_id']) - # if we already have embeds - # we shouldn't add our own. - embeds = payload['embeds'] - - if embeds: - log.debug('url processor: ignoring existing embeds @ mid {}', - message_id) - return - - # use regex to get URLs - urls = re.findall(r'(https?://\S+)', payload['content']) - urls = urls[:5] - - new_embeds = [] - - # fetch metadata for each url - for url in urls: - img_proxy_url = proxify(url, config=config) - meta = await fetch_metadata(url, config=config, session=session) - - if meta is None: - continue - - if not meta['image']: - continue - - new_embeds.append({ - 'type': 'image', - 'url': url, - 'thumbnail': { - 'width': meta['width'], - 'height': meta['height'], - 'url': url, - 'proxy_url': img_proxy_url - } - }) - - # update if we got embeds - if not new_embeds: - return - - log.debug('made {} thumbnail embeds for mid {}', - len(new_embeds), message_id) - await storage.execute_with_json(""" UPDATE messages SET embeds = $1 @@ -96,3 +83,65 @@ async def process_url_embed(config, storage, dispatcher, await dispatcher.dispatch( 'channel', channel_id, 'MESSAGE_UPDATE', update_payload) + + +async def insert_mp_embed(parsed, config, session): + """Insert mediaproxy embed.""" + embed = await fetch_embed(parsed, config=config, session=session) + return embed + + +async def process_url_embed(config, storage, dispatcher, + session, payload: dict, *, delay=0): + """Process URLs in a message and generate embeds based on that.""" + await asyncio.sleep(delay) + + message_id = int(payload['id']) + + # if we already have embeds + # we shouldn't add our own. + embeds = payload['embeds'] + + if embeds: + log.debug('url processor: ignoring existing embeds @ mid {}', + message_id) + return + + # now, we have two types of embeds: + # - image embeds + # - url embeds + + # use regex to get URLs + urls = re.findall(r'(https?://\S+)', payload['content']) + urls = urls[:5] + + # from there, we need to parse each found url and check its path. + # if it ends with png/jpg/gif/some other extension, we treat it as + # media metadata to fetch. + + # if it isn't, we forward an /embed/ scope call to mediaproxy + # to generate an embed for us out of the url. + + new_embeds = [] + + for url in urls: + parsed = urllib.parse.urlparse(url) + path = Path(parsed.path) + extension = path.name.split('.')[-1] + + if extension in MEDIA_EXTENSIONS: + embed = await insert_media_meta(url, config, session) + else: + embed = await insert_mp_embed(parsed, config, session) + + if not embed: + continue + + # update if we got embeds + if not new_embeds: + return + + log.debug('made {} thumbnail embeds for mid {}', + len(new_embeds), message_id) + + await _update_and_dispatch(payload, new_embeds, storage, dispatcher) diff --git a/litecord/embed/sanitizer.py b/litecord/embed/sanitizer.py index 6126eda..4b0b0b9 100644 --- a/litecord/embed/sanitizer.py +++ b/litecord/embed/sanitizer.py @@ -128,8 +128,35 @@ async def fetch_metadata(url, *, config=None, session=None) -> dict: return await resp.json() +async def fetch_embed(parsed, *, config=None, session=None) -> dict: + """Fetch an embed""" + + if session is None: + session = app.session + + if config is None: + config = app.config + + # TODO: handle query string + md_path = f'{parsed.scheme}/{parsed.netloc}{parsed.path}' + + md_base_url = config['MEDIA_PROXY'] + secure = 's' if config['IS_SSL'] else '' + + request_url = f'http{secure}://{md_base_url}/embed/{md_path}' + + async with session.get(request_url) as resp: + if resp.status != 200: + body = await resp.text() + log.warning('failed to embed {!r}, {} {!r}', + parsed, resp.status, body) + return + + return await resp.json() + + async def fill_embed(embed: Embed) -> Embed: - """Fill an embed with more information.""" + """Fill an embed with more information, such as proxy URLs.""" embed = sanitize_embed(embed) if path_exists(embed, 'footer.icon_url'): From 573da4fe6bd416efb6fe0f1555c696fed12ff2c0 Mon Sep 17 00:00:00 2001 From: Luna Date: Fri, 22 Feb 2019 20:09:50 -0300 Subject: [PATCH 3/3] litecord.embed.messages: insert embeds to new_embeds --- litecord/embed/messages.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/litecord/embed/messages.py b/litecord/embed/messages.py index 4fd39ef..b3e4c41 100644 --- a/litecord/embed/messages.py +++ b/litecord/embed/messages.py @@ -137,6 +137,8 @@ async def process_url_embed(config, storage, dispatcher, if not embed: continue + new_embeds.append(embed) + # update if we got embeds if not new_embeds: return