Merge branch 'message-mediaproxy-embeds' into 'master'

mediaproxy embed support

Closes #27

See merge request litecord/litecord!19
This commit is contained in:
Luna 2019-02-22 23:18:03 +00:00
commit c225c17400
3 changed files with 180 additions and 76 deletions

View File

@ -17,9 +17,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
""" """
import re
import json import json
import asyncio
from PIL import Image from PIL import Image
from quart import Blueprint, request, current_app as app, jsonify from quart import Blueprint, request, current_app as app, jsonify
@ -34,7 +32,8 @@ from litecord.snowflake import get_snowflake
from litecord.schemas import validate, MESSAGE_CREATE from litecord.schemas import validate, MESSAGE_CREATE
from litecord.utils import pg_set_json from litecord.utils import pg_set_json
from litecord.embed.sanitizer import fill_embed, proxify, fetch_metadata from litecord.embed.sanitizer import fill_embed
from litecord.embed.messages import process_url_embed
from litecord.blueprints.channel.dm_checks import dm_pre_check from litecord.blueprints.channel.dm_checks import dm_pre_check
@ -249,77 +248,6 @@ async def _guild_text_mentions(payload: dict, guild_id: int,
""", user_id, channel_id) """, user_id, channel_id)
async def process_url_embed(config, storage, dispatcher,
session, payload: dict, *, delay=0):
"""Process URLs in a message and generate embeds based on that."""
await asyncio.sleep(delay)
message_id = int(payload['id'])
channel_id = int(payload['channel_id'])
# if we already have embeds
# we shouldn't add our own.
embeds = payload['embeds']
if embeds:
log.debug('url processor: ignoring existing embeds @ mid {}',
message_id)
return
# use regex to get URLs
urls = re.findall(r'(https?://\S+)', payload['content'])
urls = urls[:5]
new_embeds = []
# fetch metadata for each url
for url in urls:
img_proxy_url = proxify(url, config=config)
meta = await fetch_metadata(url, config=config, session=session)
if meta is None:
continue
if not meta['image']:
continue
new_embeds.append({
'type': 'image',
'url': url,
'thumbnail': {
'width': meta['width'],
'height': meta['height'],
'url': url,
'proxy_url': img_proxy_url
}
})
# update if we got embeds
if not new_embeds:
return
log.debug('made {} thumbnail embeds for mid {}',
len(new_embeds), message_id)
await storage.execute_with_json("""
UPDATE messages
SET embeds = $1
WHERE messages.id = $2
""", new_embeds, message_id)
update_payload = {
'id': str(message_id),
'channel_id': str(channel_id),
'embeds': new_embeds,
}
if 'guild_id' in payload:
update_payload['guild_id'] = payload['guild_id']
await dispatcher.dispatch(
'channel', channel_id, 'MESSAGE_UPDATE', update_payload)
async def _msg_input() -> tuple: async def _msg_input() -> tuple:
"""Extract the json input and any file information """Extract the json input and any file information
the client gave to us in the request. the client gave to us in the request.
@ -542,7 +470,7 @@ async def edit_message(channel_id, message_id):
# if there weren't any embed changes BUT # if there weren't any embed changes BUT
# we had a content change, we dispatch process_url_embed but with # we had a content change, we dispatch process_url_embed but with
# an artificial delay. # an artificial delay.
# the artificial delay keeps consistency between the events, since # the artificial delay keeps consistency between the events, since
# it makes more sense for the MESSAGE_UPDATE with new content to come # it makes more sense for the MESSAGE_UPDATE with new content to come
# BEFORE the MESSAGE_UPDATE with the new embeds (based on content) # BEFORE the MESSAGE_UPDATE with the new embeds (based on content)

149
litecord/embed/messages.py Normal file
View File

@ -0,0 +1,149 @@
"""
Litecord
Copyright (C) 2018-2019 Luna Mendes
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, version 3 of the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
"""
import re
import asyncio
import urllib.parse
from pathlib import Path
from logbook import Logger
from litecord.embed.sanitizer import proxify, fetch_metadata, fetch_embed
log = Logger(__name__)
MEDIA_EXTENSIONS = (
'png',
'jpg', 'jpeg',
'gif', 'webm'
)
async def insert_media_meta(url, config, session):
"""Insert media metadata as an embed."""
img_proxy_url = proxify(url, config=config)
meta = await fetch_metadata(url, config=config, session=session)
if meta is None:
return
if not meta['image']:
return
return {
'type': 'image',
'url': url,
'thumbnail': {
'width': meta['width'],
'height': meta['height'],
'url': url,
'proxy_url': img_proxy_url
}
}
async def _update_and_dispatch(payload, new_embeds, storage, dispatcher):
"""Update the message with the given embeds and dispatch a MESSAGE_UPDATE
to users."""
message_id = int(payload['id'])
channel_id = int(payload['channel_id'])
await storage.execute_with_json("""
UPDATE messages
SET embeds = $1
WHERE messages.id = $2
""", new_embeds, message_id)
update_payload = {
'id': str(message_id),
'channel_id': str(channel_id),
'embeds': new_embeds,
}
if 'guild_id' in payload:
update_payload['guild_id'] = payload['guild_id']
await dispatcher.dispatch(
'channel', channel_id, 'MESSAGE_UPDATE', update_payload)
async def insert_mp_embed(parsed, config, session):
"""Insert mediaproxy embed."""
embed = await fetch_embed(parsed, config=config, session=session)
return embed
async def process_url_embed(config, storage, dispatcher,
session, payload: dict, *, delay=0):
"""Process URLs in a message and generate embeds based on that."""
await asyncio.sleep(delay)
message_id = int(payload['id'])
# if we already have embeds
# we shouldn't add our own.
embeds = payload['embeds']
if embeds:
log.debug('url processor: ignoring existing embeds @ mid {}',
message_id)
return
# now, we have two types of embeds:
# - image embeds
# - url embeds
# use regex to get URLs
urls = re.findall(r'(https?://\S+)', payload['content'])
urls = urls[:5]
# from there, we need to parse each found url and check its path.
# if it ends with png/jpg/gif/some other extension, we treat it as
# media metadata to fetch.
# if it isn't, we forward an /embed/ scope call to mediaproxy
# to generate an embed for us out of the url.
new_embeds = []
for url in urls:
parsed = urllib.parse.urlparse(url)
path = Path(parsed.path)
extension = path.name.split('.')[-1]
if extension in MEDIA_EXTENSIONS:
embed = await insert_media_meta(url, config, session)
else:
embed = await insert_mp_embed(parsed, config, session)
if not embed:
continue
new_embeds.append(embed)
# update if we got embeds
if not new_embeds:
return
log.debug('made {} thumbnail embeds for mid {}',
len(new_embeds), message_id)
await _update_and_dispatch(payload, new_embeds, storage, dispatcher)

View File

@ -128,8 +128,35 @@ async def fetch_metadata(url, *, config=None, session=None) -> dict:
return await resp.json() return await resp.json()
async def fetch_embed(parsed, *, config=None, session=None) -> dict:
"""Fetch an embed"""
if session is None:
session = app.session
if config is None:
config = app.config
# TODO: handle query string
md_path = f'{parsed.scheme}/{parsed.netloc}{parsed.path}'
md_base_url = config['MEDIA_PROXY']
secure = 's' if config['IS_SSL'] else ''
request_url = f'http{secure}://{md_base_url}/embed/{md_path}'
async with session.get(request_url) as resp:
if resp.status != 200:
body = await resp.text()
log.warning('failed to embed {!r}, {} {!r}',
parsed, resp.status, body)
return
return await resp.json()
async def fill_embed(embed: Embed) -> Embed: async def fill_embed(embed: Embed) -> Embed:
"""Fill an embed with more information.""" """Fill an embed with more information, such as proxy URLs."""
embed = sanitize_embed(embed) embed = sanitize_embed(embed)
if path_exists(embed, 'footer.icon_url'): if path_exists(embed, 'footer.icon_url'):