Merge branch 'message-mediaproxy-embeds' into 'master'

mediaproxy embed support Closes #27 See merge request litecord/litecord!19
2019-02-22 23:18:03 +00:00 · 2019-02-22 23:18:03 +00:00 · c225c17400
parent 73b1a96e40 573da4fe6b
commit c225c17400
3 changed files with 180 additions and 76 deletions
--- a/litecord/blueprints/channel/messages.py
+++ b/litecord/blueprints/channel/messages.py
@ -17,9 +17,7 @@ along with this program.  If not, see <http://www.gnu.org/licenses/>.

 """

-import re
 import json
-import asyncio

 from PIL import Image
 from quart import Blueprint, request, current_app as app, jsonify
@ -34,7 +32,8 @@ from litecord.snowflake import get_snowflake
 from litecord.schemas import validate, MESSAGE_CREATE
 from litecord.utils import pg_set_json

-from litecord.embed.sanitizer import fill_embed, proxify, fetch_metadata
+from litecord.embed.sanitizer import fill_embed
+from litecord.embed.messages import process_url_embed
 from litecord.blueprints.channel.dm_checks import dm_pre_check


@ -249,77 +248,6 @@ async def _guild_text_mentions(payload: dict, guild_id: int,
        """, user_id, channel_id)


-async def process_url_embed(config, storage, dispatcher,
-                            session, payload: dict, *, delay=0):
-    """Process URLs in a message and generate embeds based on that."""
-    await asyncio.sleep(delay)
-
-    message_id = int(payload['id'])
-    channel_id = int(payload['channel_id'])
-
-    # if we already have embeds
-    # we shouldn't add our own.
-    embeds = payload['embeds']
-
-    if embeds:
-        log.debug('url processor: ignoring existing embeds @ mid {}',
-                  message_id)
-        return
-
-    # use regex to get URLs
-    urls = re.findall(r'(https?://\S+)', payload['content'])
-    urls = urls[:5]
-
-    new_embeds = []
-
-    # fetch metadata for each url
-    for url in urls:
-        img_proxy_url = proxify(url, config=config)
-        meta = await fetch_metadata(url, config=config, session=session)
-
-        if meta is None:
-            continue
-
-        if not meta['image']:
-            continue
-
-        new_embeds.append({
-            'type': 'image',
-            'url': url,
-            'thumbnail': {
-                'width': meta['width'],
-                'height': meta['height'],
-                'url': url,
-                'proxy_url': img_proxy_url
-            }
-        })
-
-    # update if we got embeds
-    if not new_embeds:
-        return
-
-    log.debug('made {} thumbnail embeds for mid {}',
-              len(new_embeds), message_id)
-
-    await storage.execute_with_json("""
-    UPDATE messages
-    SET embeds = $1
-    WHERE messages.id = $2
-    """, new_embeds, message_id)
-
-    update_payload = {
-        'id': str(message_id),
-        'channel_id': str(channel_id),
-        'embeds': new_embeds,
-    }
-
-    if 'guild_id' in payload:
-        update_payload['guild_id'] = payload['guild_id']
-
-    await dispatcher.dispatch(
-        'channel', channel_id, 'MESSAGE_UPDATE', update_payload)
-
-
 async def _msg_input() -> tuple:
    """Extract the json input and any file information
    the client gave to us in the request.
@ -542,7 +470,7 @@ async def edit_message(channel_id, message_id):
        # if there weren't any embed changes BUT
        # we had a content change, we dispatch process_url_embed but with
        # an artificial delay.
-        
+
        # the artificial delay keeps consistency between the events, since
        # it makes more sense for the MESSAGE_UPDATE with new content to come
        # BEFORE the MESSAGE_UPDATE with the new embeds (based on content)
--- a/litecord/embed/messages.py
+++ b/litecord/embed/messages.py
@ -0,0 +1,149 @@
+"""
+
+Litecord
+Copyright (C) 2018-2019  Luna Mendes
+
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation, version 3 of the License.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+"""
+
+import re
+import asyncio
+import urllib.parse
+from pathlib import Path
+
+from logbook import Logger
+
+from litecord.embed.sanitizer import proxify, fetch_metadata, fetch_embed
+
+log = Logger(__name__)
+
+
+MEDIA_EXTENSIONS = (
+    'png',
+    'jpg', 'jpeg',
+    'gif', 'webm'
+)
+
+
+async def insert_media_meta(url, config, session):
+    """Insert media metadata as an embed."""
+    img_proxy_url = proxify(url, config=config)
+    meta = await fetch_metadata(url, config=config, session=session)
+
+    if meta is None:
+        return
+
+    if not meta['image']:
+        return
+
+    return {
+        'type': 'image',
+        'url': url,
+        'thumbnail': {
+            'width': meta['width'],
+            'height': meta['height'],
+            'url': url,
+            'proxy_url': img_proxy_url
+        }
+    }
+
+
+async def _update_and_dispatch(payload, new_embeds, storage, dispatcher):
+    """Update the message with the given embeds and dispatch a MESSAGE_UPDATE
+    to users."""
+
+    message_id = int(payload['id'])
+    channel_id = int(payload['channel_id'])
+
+    await storage.execute_with_json("""
+    UPDATE messages
+    SET embeds = $1
+    WHERE messages.id = $2
+    """, new_embeds, message_id)
+
+    update_payload = {
+        'id': str(message_id),
+        'channel_id': str(channel_id),
+        'embeds': new_embeds,
+    }
+
+    if 'guild_id' in payload:
+        update_payload['guild_id'] = payload['guild_id']
+
+    await dispatcher.dispatch(
+        'channel', channel_id, 'MESSAGE_UPDATE', update_payload)
+
+
+async def insert_mp_embed(parsed, config, session):
+    """Insert mediaproxy embed."""
+    embed = await fetch_embed(parsed, config=config, session=session)
+    return embed
+
+
+async def process_url_embed(config, storage, dispatcher,
+                            session, payload: dict, *, delay=0):
+    """Process URLs in a message and generate embeds based on that."""
+    await asyncio.sleep(delay)
+
+    message_id = int(payload['id'])
+
+    # if we already have embeds
+    # we shouldn't add our own.
+    embeds = payload['embeds']
+
+    if embeds:
+        log.debug('url processor: ignoring existing embeds @ mid {}',
+                  message_id)
+        return
+
+    # now, we have two types of embeds:
+    # - image embeds
+    # - url embeds
+
+    # use regex to get URLs
+    urls = re.findall(r'(https?://\S+)', payload['content'])
+    urls = urls[:5]
+
+    # from there, we need to parse each found url and check its path.
+    # if it ends with png/jpg/gif/some other extension, we treat it as
+    # media metadata to fetch.
+
+    # if it isn't, we forward an /embed/ scope call to mediaproxy
+    # to generate an embed for us out of the url.
+
+    new_embeds = []
+
+    for url in urls:
+        parsed = urllib.parse.urlparse(url)
+        path = Path(parsed.path)
+        extension = path.name.split('.')[-1]
+
+        if extension in MEDIA_EXTENSIONS:
+            embed = await insert_media_meta(url, config, session)
+        else:
+            embed = await insert_mp_embed(parsed, config, session)
+
+        if not embed:
+            continue
+
+        new_embeds.append(embed)
+
+    # update if we got embeds
+    if not new_embeds:
+        return
+
+    log.debug('made {} thumbnail embeds for mid {}',
+              len(new_embeds), message_id)
+
+    await _update_and_dispatch(payload, new_embeds, storage, dispatcher)
--- a/litecord/embed/sanitizer.py
+++ b/litecord/embed/sanitizer.py
@ -128,8 +128,35 @@ async def fetch_metadata(url, *, config=None, session=None) -> dict:
        return await resp.json()


+async def fetch_embed(parsed, *, config=None, session=None) -> dict:
+    """Fetch an embed"""
+
+    if session is None:
+        session = app.session
+
+    if config is None:
+        config = app.config
+
+    # TODO: handle query string
+    md_path = f'{parsed.scheme}/{parsed.netloc}{parsed.path}'
+
+    md_base_url = config['MEDIA_PROXY']
+    secure = 's' if config['IS_SSL'] else ''
+
+    request_url = f'http{secure}://{md_base_url}/embed/{md_path}'
+
+    async with session.get(request_url) as resp:
+        if resp.status != 200:
+            body = await resp.text()
+            log.warning('failed to embed {!r}, {} {!r}',
+                        parsed, resp.status, body)
+            return
+
+        return await resp.json()
+
+
 async def fill_embed(embed: Embed) -> Embed:
-    """Fill an embed with more information."""
+    """Fill an embed with more information, such as proxy URLs."""
    embed = sanitize_embed(embed)

    if path_exists(embed, 'footer.icon_url'):