Merge branch 'message-mediaproxy-embeds' into 'master'

mediaproxy embed support

Closes #27

See merge request litecord/litecord!19
This commit is contained in:
Luna 2019-02-22 23:18:03 +00:00
commit c225c17400
3 changed files with 180 additions and 76 deletions

View File

@ -17,9 +17,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
"""
import re
import json
import asyncio
from PIL import Image
from quart import Blueprint, request, current_app as app, jsonify
@ -34,7 +32,8 @@ from litecord.snowflake import get_snowflake
from litecord.schemas import validate, MESSAGE_CREATE
from litecord.utils import pg_set_json
from litecord.embed.sanitizer import fill_embed, proxify, fetch_metadata
from litecord.embed.sanitizer import fill_embed
from litecord.embed.messages import process_url_embed
from litecord.blueprints.channel.dm_checks import dm_pre_check
@ -249,77 +248,6 @@ async def _guild_text_mentions(payload: dict, guild_id: int,
""", user_id, channel_id)
async def process_url_embed(config, storage, dispatcher,
session, payload: dict, *, delay=0):
"""Process URLs in a message and generate embeds based on that."""
await asyncio.sleep(delay)
message_id = int(payload['id'])
channel_id = int(payload['channel_id'])
# if we already have embeds
# we shouldn't add our own.
embeds = payload['embeds']
if embeds:
log.debug('url processor: ignoring existing embeds @ mid {}',
message_id)
return
# use regex to get URLs
urls = re.findall(r'(https?://\S+)', payload['content'])
urls = urls[:5]
new_embeds = []
# fetch metadata for each url
for url in urls:
img_proxy_url = proxify(url, config=config)
meta = await fetch_metadata(url, config=config, session=session)
if meta is None:
continue
if not meta['image']:
continue
new_embeds.append({
'type': 'image',
'url': url,
'thumbnail': {
'width': meta['width'],
'height': meta['height'],
'url': url,
'proxy_url': img_proxy_url
}
})
# update if we got embeds
if not new_embeds:
return
log.debug('made {} thumbnail embeds for mid {}',
len(new_embeds), message_id)
await storage.execute_with_json("""
UPDATE messages
SET embeds = $1
WHERE messages.id = $2
""", new_embeds, message_id)
update_payload = {
'id': str(message_id),
'channel_id': str(channel_id),
'embeds': new_embeds,
}
if 'guild_id' in payload:
update_payload['guild_id'] = payload['guild_id']
await dispatcher.dispatch(
'channel', channel_id, 'MESSAGE_UPDATE', update_payload)
async def _msg_input() -> tuple:
"""Extract the json input and any file information
the client gave to us in the request.
@ -542,7 +470,7 @@ async def edit_message(channel_id, message_id):
# if there weren't any embed changes BUT
# we had a content change, we dispatch process_url_embed but with
# an artificial delay.
# the artificial delay keeps consistency between the events, since
# it makes more sense for the MESSAGE_UPDATE with new content to come
# BEFORE the MESSAGE_UPDATE with the new embeds (based on content)

149
litecord/embed/messages.py Normal file
View File

@ -0,0 +1,149 @@
"""
Litecord
Copyright (C) 2018-2019 Luna Mendes
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, version 3 of the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
"""
import re
import asyncio
import urllib.parse
from pathlib import Path
from logbook import Logger
from litecord.embed.sanitizer import proxify, fetch_metadata, fetch_embed
log = Logger(__name__)
MEDIA_EXTENSIONS = (
'png',
'jpg', 'jpeg',
'gif', 'webm'
)
async def insert_media_meta(url, config, session):
"""Insert media metadata as an embed."""
img_proxy_url = proxify(url, config=config)
meta = await fetch_metadata(url, config=config, session=session)
if meta is None:
return
if not meta['image']:
return
return {
'type': 'image',
'url': url,
'thumbnail': {
'width': meta['width'],
'height': meta['height'],
'url': url,
'proxy_url': img_proxy_url
}
}
async def _update_and_dispatch(payload, new_embeds, storage, dispatcher):
"""Update the message with the given embeds and dispatch a MESSAGE_UPDATE
to users."""
message_id = int(payload['id'])
channel_id = int(payload['channel_id'])
await storage.execute_with_json("""
UPDATE messages
SET embeds = $1
WHERE messages.id = $2
""", new_embeds, message_id)
update_payload = {
'id': str(message_id),
'channel_id': str(channel_id),
'embeds': new_embeds,
}
if 'guild_id' in payload:
update_payload['guild_id'] = payload['guild_id']
await dispatcher.dispatch(
'channel', channel_id, 'MESSAGE_UPDATE', update_payload)
async def insert_mp_embed(parsed, config, session):
"""Insert mediaproxy embed."""
embed = await fetch_embed(parsed, config=config, session=session)
return embed
async def process_url_embed(config, storage, dispatcher,
session, payload: dict, *, delay=0):
"""Process URLs in a message and generate embeds based on that."""
await asyncio.sleep(delay)
message_id = int(payload['id'])
# if we already have embeds
# we shouldn't add our own.
embeds = payload['embeds']
if embeds:
log.debug('url processor: ignoring existing embeds @ mid {}',
message_id)
return
# now, we have two types of embeds:
# - image embeds
# - url embeds
# use regex to get URLs
urls = re.findall(r'(https?://\S+)', payload['content'])
urls = urls[:5]
# from there, we need to parse each found url and check its path.
# if it ends with png/jpg/gif/some other extension, we treat it as
# media metadata to fetch.
# if it isn't, we forward an /embed/ scope call to mediaproxy
# to generate an embed for us out of the url.
new_embeds = []
for url in urls:
parsed = urllib.parse.urlparse(url)
path = Path(parsed.path)
extension = path.name.split('.')[-1]
if extension in MEDIA_EXTENSIONS:
embed = await insert_media_meta(url, config, session)
else:
embed = await insert_mp_embed(parsed, config, session)
if not embed:
continue
new_embeds.append(embed)
# update if we got embeds
if not new_embeds:
return
log.debug('made {} thumbnail embeds for mid {}',
len(new_embeds), message_id)
await _update_and_dispatch(payload, new_embeds, storage, dispatcher)

View File

@ -128,8 +128,35 @@ async def fetch_metadata(url, *, config=None, session=None) -> dict:
return await resp.json()
async def fetch_embed(parsed, *, config=None, session=None) -> dict:
"""Fetch an embed"""
if session is None:
session = app.session
if config is None:
config = app.config
# TODO: handle query string
md_path = f'{parsed.scheme}/{parsed.netloc}{parsed.path}'
md_base_url = config['MEDIA_PROXY']
secure = 's' if config['IS_SSL'] else ''
request_url = f'http{secure}://{md_base_url}/embed/{md_path}'
async with session.get(request_url) as resp:
if resp.status != 200:
body = await resp.text()
log.warning('failed to embed {!r}, {} {!r}',
parsed, resp.status, body)
return
return await resp.json()
async def fill_embed(embed: Embed) -> Embed:
"""Fill an embed with more information."""
"""Fill an embed with more information, such as proxy URLs."""
embed = sanitize_embed(embed)
if path_exists(embed, 'footer.icon_url'):