mirror of https://gitlab.com/litecord/litecord.git
add (untested) impl for url embeds
this happens on process_url_embed, so there isn't a need for other code to worry about it. - litecord.embed.sanitizer: add fetch_embed
This commit is contained in:
parent
f4f4a64e9f
commit
428502d373
|
|
@ -19,66 +19,53 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
import re
|
import re
|
||||||
import asyncio
|
import asyncio
|
||||||
|
import urllib.parse
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
from logbook import Logger
|
from logbook import Logger
|
||||||
|
|
||||||
from litecord.embed.sanitizer import proxify, fetch_metadata
|
from litecord.embed.sanitizer import proxify, fetch_metadata, fetch_embed
|
||||||
|
|
||||||
log = Logger(__name__)
|
log = Logger(__name__)
|
||||||
|
|
||||||
|
|
||||||
async def process_url_embed(config, storage, dispatcher,
|
MEDIA_EXTENSIONS = (
|
||||||
session, payload: dict, *, delay=0):
|
'png',
|
||||||
"""Process URLs in a message and generate embeds based on that."""
|
'jpg', 'jpeg',
|
||||||
await asyncio.sleep(delay)
|
'gif', 'webm'
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
async def insert_media_meta(url, config, session):
|
||||||
|
"""Insert media metadata as an embed."""
|
||||||
|
img_proxy_url = proxify(url, config=config)
|
||||||
|
meta = await fetch_metadata(url, config=config, session=session)
|
||||||
|
|
||||||
|
if meta is None:
|
||||||
|
return
|
||||||
|
|
||||||
|
if not meta['image']:
|
||||||
|
return
|
||||||
|
|
||||||
|
return {
|
||||||
|
'type': 'image',
|
||||||
|
'url': url,
|
||||||
|
'thumbnail': {
|
||||||
|
'width': meta['width'],
|
||||||
|
'height': meta['height'],
|
||||||
|
'url': url,
|
||||||
|
'proxy_url': img_proxy_url
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
async def _update_and_dispatch(payload, new_embeds, storage, dispatcher):
|
||||||
|
"""Update the message with the given embeds and dispatch a MESSAGE_UPDATE
|
||||||
|
to users."""
|
||||||
|
|
||||||
message_id = int(payload['id'])
|
message_id = int(payload['id'])
|
||||||
channel_id = int(payload['channel_id'])
|
channel_id = int(payload['channel_id'])
|
||||||
|
|
||||||
# if we already have embeds
|
|
||||||
# we shouldn't add our own.
|
|
||||||
embeds = payload['embeds']
|
|
||||||
|
|
||||||
if embeds:
|
|
||||||
log.debug('url processor: ignoring existing embeds @ mid {}',
|
|
||||||
message_id)
|
|
||||||
return
|
|
||||||
|
|
||||||
# use regex to get URLs
|
|
||||||
urls = re.findall(r'(https?://\S+)', payload['content'])
|
|
||||||
urls = urls[:5]
|
|
||||||
|
|
||||||
new_embeds = []
|
|
||||||
|
|
||||||
# fetch metadata for each url
|
|
||||||
for url in urls:
|
|
||||||
img_proxy_url = proxify(url, config=config)
|
|
||||||
meta = await fetch_metadata(url, config=config, session=session)
|
|
||||||
|
|
||||||
if meta is None:
|
|
||||||
continue
|
|
||||||
|
|
||||||
if not meta['image']:
|
|
||||||
continue
|
|
||||||
|
|
||||||
new_embeds.append({
|
|
||||||
'type': 'image',
|
|
||||||
'url': url,
|
|
||||||
'thumbnail': {
|
|
||||||
'width': meta['width'],
|
|
||||||
'height': meta['height'],
|
|
||||||
'url': url,
|
|
||||||
'proxy_url': img_proxy_url
|
|
||||||
}
|
|
||||||
})
|
|
||||||
|
|
||||||
# update if we got embeds
|
|
||||||
if not new_embeds:
|
|
||||||
return
|
|
||||||
|
|
||||||
log.debug('made {} thumbnail embeds for mid {}',
|
|
||||||
len(new_embeds), message_id)
|
|
||||||
|
|
||||||
await storage.execute_with_json("""
|
await storage.execute_with_json("""
|
||||||
UPDATE messages
|
UPDATE messages
|
||||||
SET embeds = $1
|
SET embeds = $1
|
||||||
|
|
@ -96,3 +83,65 @@ async def process_url_embed(config, storage, dispatcher,
|
||||||
|
|
||||||
await dispatcher.dispatch(
|
await dispatcher.dispatch(
|
||||||
'channel', channel_id, 'MESSAGE_UPDATE', update_payload)
|
'channel', channel_id, 'MESSAGE_UPDATE', update_payload)
|
||||||
|
|
||||||
|
|
||||||
|
async def insert_mp_embed(parsed, config, session):
|
||||||
|
"""Insert mediaproxy embed."""
|
||||||
|
embed = await fetch_embed(parsed, config=config, session=session)
|
||||||
|
return embed
|
||||||
|
|
||||||
|
|
||||||
|
async def process_url_embed(config, storage, dispatcher,
|
||||||
|
session, payload: dict, *, delay=0):
|
||||||
|
"""Process URLs in a message and generate embeds based on that."""
|
||||||
|
await asyncio.sleep(delay)
|
||||||
|
|
||||||
|
message_id = int(payload['id'])
|
||||||
|
|
||||||
|
# if we already have embeds
|
||||||
|
# we shouldn't add our own.
|
||||||
|
embeds = payload['embeds']
|
||||||
|
|
||||||
|
if embeds:
|
||||||
|
log.debug('url processor: ignoring existing embeds @ mid {}',
|
||||||
|
message_id)
|
||||||
|
return
|
||||||
|
|
||||||
|
# now, we have two types of embeds:
|
||||||
|
# - image embeds
|
||||||
|
# - url embeds
|
||||||
|
|
||||||
|
# use regex to get URLs
|
||||||
|
urls = re.findall(r'(https?://\S+)', payload['content'])
|
||||||
|
urls = urls[:5]
|
||||||
|
|
||||||
|
# from there, we need to parse each found url and check its path.
|
||||||
|
# if it ends with png/jpg/gif/some other extension, we treat it as
|
||||||
|
# media metadata to fetch.
|
||||||
|
|
||||||
|
# if it isn't, we forward an /embed/ scope call to mediaproxy
|
||||||
|
# to generate an embed for us out of the url.
|
||||||
|
|
||||||
|
new_embeds = []
|
||||||
|
|
||||||
|
for url in urls:
|
||||||
|
parsed = urllib.parse.urlparse(url)
|
||||||
|
path = Path(parsed.path)
|
||||||
|
extension = path.name.split('.')[-1]
|
||||||
|
|
||||||
|
if extension in MEDIA_EXTENSIONS:
|
||||||
|
embed = await insert_media_meta(url, config, session)
|
||||||
|
else:
|
||||||
|
embed = await insert_mp_embed(parsed, config, session)
|
||||||
|
|
||||||
|
if not embed:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# update if we got embeds
|
||||||
|
if not new_embeds:
|
||||||
|
return
|
||||||
|
|
||||||
|
log.debug('made {} thumbnail embeds for mid {}',
|
||||||
|
len(new_embeds), message_id)
|
||||||
|
|
||||||
|
await _update_and_dispatch(payload, new_embeds, storage, dispatcher)
|
||||||
|
|
|
||||||
|
|
@ -128,8 +128,35 @@ async def fetch_metadata(url, *, config=None, session=None) -> dict:
|
||||||
return await resp.json()
|
return await resp.json()
|
||||||
|
|
||||||
|
|
||||||
|
async def fetch_embed(parsed, *, config=None, session=None) -> dict:
|
||||||
|
"""Fetch an embed"""
|
||||||
|
|
||||||
|
if session is None:
|
||||||
|
session = app.session
|
||||||
|
|
||||||
|
if config is None:
|
||||||
|
config = app.config
|
||||||
|
|
||||||
|
# TODO: handle query string
|
||||||
|
md_path = f'{parsed.scheme}/{parsed.netloc}{parsed.path}'
|
||||||
|
|
||||||
|
md_base_url = config['MEDIA_PROXY']
|
||||||
|
secure = 's' if config['IS_SSL'] else ''
|
||||||
|
|
||||||
|
request_url = f'http{secure}://{md_base_url}/embed/{md_path}'
|
||||||
|
|
||||||
|
async with session.get(request_url) as resp:
|
||||||
|
if resp.status != 200:
|
||||||
|
body = await resp.text()
|
||||||
|
log.warning('failed to embed {!r}, {} {!r}',
|
||||||
|
parsed, resp.status, body)
|
||||||
|
return
|
||||||
|
|
||||||
|
return await resp.json()
|
||||||
|
|
||||||
|
|
||||||
async def fill_embed(embed: Embed) -> Embed:
|
async def fill_embed(embed: Embed) -> Embed:
|
||||||
"""Fill an embed with more information."""
|
"""Fill an embed with more information, such as proxy URLs."""
|
||||||
embed = sanitize_embed(embed)
|
embed = sanitize_embed(embed)
|
||||||
|
|
||||||
if path_exists(embed, 'footer.icon_url'):
|
if path_exists(embed, 'footer.icon_url'):
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue