From f4f4a64e9f6766a34f61ebf905cf726169f7e0b3 Mon Sep 17 00:00:00 2001
From: Luna <git@l4.pm>
Date: Fri, 22 Feb 2019 18:57:10 -0300
Subject: [PATCH 1/3] split process_url_embed into litecord.embed.message

---
 litecord/blueprints/channel/messages.py | 78 +-------------------
 litecord/embed/messages.py              | 98 +++++++++++++++++++++++++
 2 files changed, 101 insertions(+), 75 deletions(-)
 create mode 100644 litecord/embed/messages.py

diff --git a/litecord/blueprints/channel/messages.py b/litecord/blueprints/channel/messages.py
index 72aba0e..fd8a060 100644
--- a/litecord/blueprints/channel/messages.py
+++ b/litecord/blueprints/channel/messages.py
@@ -17,9 +17,7 @@ along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
 """
 
-import re
 import json
-import asyncio
 
 from PIL import Image
 from quart import Blueprint, request, current_app as app, jsonify
@@ -34,7 +32,8 @@ from litecord.snowflake import get_snowflake
 from litecord.schemas import validate, MESSAGE_CREATE
 from litecord.utils import pg_set_json
 
-from litecord.embed.sanitizer import fill_embed, proxify, fetch_metadata
+from litecord.embed.sanitizer import fill_embed
+from litecord.embed.messages import process_url_embed
 from litecord.blueprints.channel.dm_checks import dm_pre_check
 
 
@@ -249,77 +248,6 @@ async def _guild_text_mentions(payload: dict, guild_id: int,
         """, user_id, channel_id)
 
 
-async def process_url_embed(config, storage, dispatcher,
-                            session, payload: dict, *, delay=0):
-    """Process URLs in a message and generate embeds based on that."""
-    await asyncio.sleep(delay)
-
-    message_id = int(payload['id'])
-    channel_id = int(payload['channel_id'])
-
-    # if we already have embeds
-    # we shouldn't add our own.
-    embeds = payload['embeds']
-
-    if embeds:
-        log.debug('url processor: ignoring existing embeds @ mid {}',
-                  message_id)
-        return
-
-    # use regex to get URLs
-    urls = re.findall(r'(https?://\S+)', payload['content'])
-    urls = urls[:5]
-
-    new_embeds = []
-
-    # fetch metadata for each url
-    for url in urls:
-        img_proxy_url = proxify(url, config=config)
-        meta = await fetch_metadata(url, config=config, session=session)
-
-        if meta is None:
-            continue
-
-        if not meta['image']:
-            continue
-
-        new_embeds.append({
-            'type': 'image',
-            'url': url,
-            'thumbnail': {
-                'width': meta['width'],
-                'height': meta['height'],
-                'url': url,
-                'proxy_url': img_proxy_url
-            }
-        })
-
-    # update if we got embeds
-    if not new_embeds:
-        return
-
-    log.debug('made {} thumbnail embeds for mid {}',
-              len(new_embeds), message_id)
-
-    await storage.execute_with_json("""
-    UPDATE messages
-    SET embeds = $1
-    WHERE messages.id = $2
-    """, new_embeds, message_id)
-
-    update_payload = {
-        'id': str(message_id),
-        'channel_id': str(channel_id),
-        'embeds': new_embeds,
-    }
-
-    if 'guild_id' in payload:
-        update_payload['guild_id'] = payload['guild_id']
-
-    await dispatcher.dispatch(
-        'channel', channel_id, 'MESSAGE_UPDATE', update_payload)
-
-
 async def _msg_input() -> tuple:
     """Extract the json input and any file information
     the client gave to us in the request.
@@ -542,7 +470,7 @@ async def edit_message(channel_id, message_id):
         # if there weren't any embed changes BUT
         # we had a content change, we dispatch process_url_embed but with
         # an artificial delay.
-        
+
         # the artificial delay keeps consistency between the events, since
         # it makes more sense for the MESSAGE_UPDATE with new content to come
         # BEFORE the MESSAGE_UPDATE with the new embeds (based on content)
diff --git a/litecord/embed/messages.py b/litecord/embed/messages.py
new file mode 100644
index 0000000..30e5386
--- /dev/null
+++ b/litecord/embed/messages.py
@@ -0,0 +1,98 @@
+"""
+
+Litecord
+Copyright (C) 2018-2019  Luna Mendes
+
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation, version 3 of the License.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+"""
+
+import re
+import asyncio
+
+from logbook import Logger
+
+from litecord.embed.sanitizer import proxify, fetch_metadata
+
+log = Logger(__name__)
+
+
+async def process_url_embed(config, storage, dispatcher,
+                            session, payload: dict, *, delay=0):
+    """Process URLs in a message and generate embeds based on that."""
+    await asyncio.sleep(delay)
+
+    message_id = int(payload['id'])
+    channel_id = int(payload['channel_id'])
+
+    # if we already have embeds
+    # we shouldn't add our own.
+    embeds = payload['embeds']
+
+    if embeds:
+        log.debug('url processor: ignoring existing embeds @ mid {}',
+                  message_id)
+        return
+
+    # use regex to get URLs
+    urls = re.findall(r'(https?://\S+)', payload['content'])
+    urls = urls[:5]
+
+    new_embeds = []
+
+    # fetch metadata for each url
+    for url in urls:
+        img_proxy_url = proxify(url, config=config)
+        meta = await fetch_metadata(url, config=config, session=session)
+
+        if meta is None:
+            continue
+
+        if not meta['image']:
+            continue
+
+        new_embeds.append({
+            'type': 'image',
+            'url': url,
+            'thumbnail': {
+                'width': meta['width'],
+                'height': meta['height'],
+                'url': url,
+                'proxy_url': img_proxy_url
+            }
+        })
+
+    # update if we got embeds
+    if not new_embeds:
+        return
+
+    log.debug('made {} thumbnail embeds for mid {}',
+              len(new_embeds), message_id)
+
+    await storage.execute_with_json("""
+    UPDATE messages
+    SET embeds = $1
+    WHERE messages.id = $2
+    """, new_embeds, message_id)
+
+    update_payload = {
+        'id': str(message_id),
+        'channel_id': str(channel_id),
+        'embeds': new_embeds,
+    }
+
+    if 'guild_id' in payload:
+        update_payload['guild_id'] = payload['guild_id']
+
+    await dispatcher.dispatch(
+        'channel', channel_id, 'MESSAGE_UPDATE', update_payload)

From 428502d373d655fcc1a75a1bb5b3e22d0f58471d Mon Sep 17 00:00:00 2001
From: Luna <git@l4.pm>
Date: Fri, 22 Feb 2019 19:59:35 -0300
Subject: [PATCH 2/3] add (untested) impl for url embeds

this happens on process_url_embed, so there isn't a need for other code
to worry about it.

 - litecord.embed.sanitizer: add fetch_embed
---
 litecord/embed/messages.py  | 147 ++++++++++++++++++++++++------------
 litecord/embed/sanitizer.py |  29 ++++++-
 2 files changed, 126 insertions(+), 50 deletions(-)

diff --git a/litecord/embed/messages.py b/litecord/embed/messages.py
index 30e5386..4fd39ef 100644
--- a/litecord/embed/messages.py
+++ b/litecord/embed/messages.py
@@ -19,66 +19,53 @@ along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
 import re
 import asyncio
+import urllib.parse
+from pathlib import Path
 
 from logbook import Logger
 
-from litecord.embed.sanitizer import proxify, fetch_metadata
+from litecord.embed.sanitizer import proxify, fetch_metadata, fetch_embed
 
 log = Logger(__name__)
 
 
-async def process_url_embed(config, storage, dispatcher,
-                            session, payload: dict, *, delay=0):
-    """Process URLs in a message and generate embeds based on that."""
-    await asyncio.sleep(delay)
+MEDIA_EXTENSIONS = (
+    'png',
+    'jpg', 'jpeg',
+    'gif', 'webm'
+)
+
+
+async def insert_media_meta(url, config, session):
+    """Insert media metadata as an embed."""
+    img_proxy_url = proxify(url, config=config)
+    meta = await fetch_metadata(url, config=config, session=session)
+
+    if meta is None:
+        return
+
+    if not meta['image']:
+        return
+
+    return {
+        'type': 'image',
+        'url': url,
+        'thumbnail': {
+            'width': meta['width'],
+            'height': meta['height'],
+            'url': url,
+            'proxy_url': img_proxy_url
+        }
+    }
+
+
+async def _update_and_dispatch(payload, new_embeds, storage, dispatcher):
+    """Update the message with the given embeds and dispatch a MESSAGE_UPDATE
+    to users."""
 
     message_id = int(payload['id'])
     channel_id = int(payload['channel_id'])
 
-    # if we already have embeds
-    # we shouldn't add our own.
-    embeds = payload['embeds']
-
-    if embeds:
-        log.debug('url processor: ignoring existing embeds @ mid {}',
-                  message_id)
-        return
-
-    # use regex to get URLs
-    urls = re.findall(r'(https?://\S+)', payload['content'])
-    urls = urls[:5]
-
-    new_embeds = []
-
-    # fetch metadata for each url
-    for url in urls:
-        img_proxy_url = proxify(url, config=config)
-        meta = await fetch_metadata(url, config=config, session=session)
-
-        if meta is None:
-            continue
-
-        if not meta['image']:
-            continue
-
-        new_embeds.append({
-            'type': 'image',
-            'url': url,
-            'thumbnail': {
-                'width': meta['width'],
-                'height': meta['height'],
-                'url': url,
-                'proxy_url': img_proxy_url
-            }
-        })
-
-    # update if we got embeds
-    if not new_embeds:
-        return
-
-    log.debug('made {} thumbnail embeds for mid {}',
-              len(new_embeds), message_id)
-
     await storage.execute_with_json("""
     UPDATE messages
     SET embeds = $1
@@ -96,3 +83,65 @@ async def process_url_embed(config, storage, dispatcher,
 
     await dispatcher.dispatch(
         'channel', channel_id, 'MESSAGE_UPDATE', update_payload)
+
+
+async def insert_mp_embed(parsed, config, session):
+    """Insert mediaproxy embed."""
+    embed = await fetch_embed(parsed, config=config, session=session)
+    return embed
+
+
+async def process_url_embed(config, storage, dispatcher,
+                            session, payload: dict, *, delay=0):
+    """Process URLs in a message and generate embeds based on that."""
+    await asyncio.sleep(delay)
+
+    message_id = int(payload['id'])
+
+    # if we already have embeds
+    # we shouldn't add our own.
+    embeds = payload['embeds']
+
+    if embeds:
+        log.debug('url processor: ignoring existing embeds @ mid {}',
+                  message_id)
+        return
+
+    # now, we have two types of embeds:
+    # - image embeds
+    # - url embeds
+
+    # use regex to get URLs
+    urls = re.findall(r'(https?://\S+)', payload['content'])
+    urls = urls[:5]
+
+    # from there, we need to parse each found url and check its path.
+    # if it ends with png/jpg/gif/some other extension, we treat it as
+    # media metadata to fetch.
+
+    # if it isn't, we forward an /embed/ scope call to mediaproxy
+    # to generate an embed for us out of the url.
+
+    new_embeds = []
+
+    for url in urls:
+        parsed = urllib.parse.urlparse(url)
+        path = Path(parsed.path)
+        extension = path.name.split('.')[-1]
+
+        if extension in MEDIA_EXTENSIONS:
+            embed = await insert_media_meta(url, config, session)
+        else:
+            embed = await insert_mp_embed(parsed, config, session)
+
+        if not embed:
+            continue
+
+    # update if we got embeds
+    if not new_embeds:
+        return
+
+    log.debug('made {} thumbnail embeds for mid {}',
+              len(new_embeds), message_id)
+
+    await _update_and_dispatch(payload, new_embeds, storage, dispatcher)
diff --git a/litecord/embed/sanitizer.py b/litecord/embed/sanitizer.py
index 6126eda..4b0b0b9 100644
--- a/litecord/embed/sanitizer.py
+++ b/litecord/embed/sanitizer.py
@@ -128,8 +128,35 @@ async def fetch_metadata(url, *, config=None, session=None) -> dict:
         return await resp.json()
 
 
+async def fetch_embed(parsed, *, config=None, session=None) -> dict:
+    """Fetch an embed"""
+
+    if session is None:
+        session = app.session
+
+    if config is None:
+        config = app.config
+
+    # TODO: handle query string
+    md_path = f'{parsed.scheme}/{parsed.netloc}{parsed.path}'
+
+    md_base_url = config['MEDIA_PROXY']
+    secure = 's' if config['IS_SSL'] else ''
+
+    request_url = f'http{secure}://{md_base_url}/embed/{md_path}'
+
+    async with session.get(request_url) as resp:
+        if resp.status != 200:
+            body = await resp.text()
+            log.warning('failed to embed {!r}, {} {!r}',
+                        parsed, resp.status, body)
+            return
+
+        return await resp.json()
+
+
 async def fill_embed(embed: Embed) -> Embed:
-    """Fill an embed with more information."""
+    """Fill an embed with more information, such as proxy URLs."""
     embed = sanitize_embed(embed)
 
     if path_exists(embed, 'footer.icon_url'):

From 573da4fe6bd416efb6fe0f1555c696fed12ff2c0 Mon Sep 17 00:00:00 2001
From: Luna <git@l4.pm>
Date: Fri, 22 Feb 2019 20:09:50 -0300
Subject: [PATCH 3/3] litecord.embed.messages: insert embeds to new_embeds

---
 litecord/embed/messages.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/litecord/embed/messages.py b/litecord/embed/messages.py
index 4fd39ef..b3e4c41 100644
--- a/litecord/embed/messages.py
+++ b/litecord/embed/messages.py
@@ -137,6 +137,8 @@ async def process_url_embed(config, storage, dispatcher,
         if not embed:
             continue
 
+        new_embeds.append(embed)
+
     # update if we got embeds
     if not new_embeds:
         return