mirror of https://gitlab.com/litecord/litecord.git
embed.sanitizer: full refactor
- refactor the three main mediaproxy api client functions into a
template function
- simplify common code between everyone
- add typings to fill_embed()
- embed.schemas: add querystring to EmbedURL.to_md_path
This commit is contained in:
parent
c9a7b3dfb5
commit
ff9d421a3c
|
|
@ -22,7 +22,8 @@ litecord.embed.sanitizer
|
||||||
sanitize embeds by giving common values
|
sanitize embeds by giving common values
|
||||||
such as type: rich
|
such as type: rich
|
||||||
"""
|
"""
|
||||||
from typing import Dict, Any, Optional, Union, List
|
import urllib.parse
|
||||||
|
from typing import Dict, Any, Optional, Union, List, Tuple
|
||||||
|
|
||||||
from logbook import Logger
|
from logbook import Logger
|
||||||
from quart import current_app as app
|
from quart import current_app as app
|
||||||
|
|
@ -76,27 +77,9 @@ def path_exists(embed: Embed, components_in: Union[List[str], str]):
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
def proxify(url, *, config=None) -> str:
|
|
||||||
"""Return a mediaproxy url for the given EmbedURL."""
|
|
||||||
|
|
||||||
if not config:
|
|
||||||
config = app.config
|
|
||||||
|
|
||||||
if isinstance(url, str):
|
|
||||||
url = EmbedURL(url)
|
|
||||||
|
|
||||||
md_base_url = config['MEDIA_PROXY']
|
|
||||||
parsed = url.parsed
|
|
||||||
proto = 'https' if config['IS_SSL'] else 'http'
|
|
||||||
|
|
||||||
return (
|
|
||||||
# base mediaproxy url
|
|
||||||
f'{proto}://{md_base_url}/img/'
|
|
||||||
f'{parsed.scheme}/{parsed.netloc}{parsed.path}'
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def _mk_cfg_sess(config, session) -> tuple:
|
def _mk_cfg_sess(config, session) -> tuple:
|
||||||
|
"""Return a tuple of (config, session)."""
|
||||||
if config is None:
|
if config is None:
|
||||||
config = app.config
|
config = app.config
|
||||||
|
|
||||||
|
|
@ -107,86 +90,132 @@ def _mk_cfg_sess(config, session) -> tuple:
|
||||||
|
|
||||||
|
|
||||||
def _md_base(config) -> tuple:
|
def _md_base(config) -> tuple:
|
||||||
|
"""Return the protocol and base url for the mediaproxy."""
|
||||||
md_base_url = config['MEDIA_PROXY']
|
md_base_url = config['MEDIA_PROXY']
|
||||||
proto = 'https' if config['IS_SSL'] else 'http'
|
proto = 'https' if config['IS_SSL'] else 'http'
|
||||||
|
|
||||||
return proto, md_base_url
|
return proto, md_base_url
|
||||||
|
|
||||||
|
|
||||||
async def fetch_metadata(url, *, config=None, session=None) -> Optional[Dict]:
|
def _make_md_req_url(config, scope: str, url):
|
||||||
"""Fetch metadata for a url."""
|
"""Make a mediaproxy request URL given the config, scope, and the url
|
||||||
|
to be proxied."""
|
||||||
|
proto, base_url = _md_base(config)
|
||||||
|
return f'{proto}://{base_url}/{scope}/{url.to_md_path}'
|
||||||
|
|
||||||
|
|
||||||
|
def proxify(url, *, config=None) -> str:
|
||||||
|
"""Return a mediaproxy url for the given EmbedURL. Returns an
|
||||||
|
/img/ scope."""
|
||||||
|
config, _sess = _mk_cfg_sess(config, None)
|
||||||
|
|
||||||
|
if isinstance(url, str):
|
||||||
|
url = EmbedURL(url)
|
||||||
|
|
||||||
|
return _make_md_req_url(config, 'img', url)
|
||||||
|
|
||||||
|
|
||||||
|
async def _md_client_req(config, session, scope: str,
|
||||||
|
url, *, ret_resp=False) -> Optional[Union[Tuple, Dict]]:
|
||||||
|
"""Makes a request to the mediaproxy.
|
||||||
|
|
||||||
|
This has common code between all the main mediaproxy request functions
|
||||||
|
to decrease code repetition.
|
||||||
|
|
||||||
|
Note that config and session exist because there are cases where the app
|
||||||
|
isn't retrievable (as those functions usually run in background tasks,
|
||||||
|
not in the app itself).
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
config: dict-like
|
||||||
|
the app configuration, if None, this will get the global one from the
|
||||||
|
app instance.
|
||||||
|
session: aiohttp client session
|
||||||
|
the aiohttp ClientSession instance to use, if None, this will get
|
||||||
|
the global one from the app.
|
||||||
|
|
||||||
|
scope: str
|
||||||
|
the scope of your request. one of 'meta', 'img', or 'embed' are
|
||||||
|
available for the mediaproxy's API.
|
||||||
|
url: string or EmbedURL
|
||||||
|
the url in question to give to the mediaproxy.
|
||||||
|
|
||||||
|
ret_resp: bool, default false
|
||||||
|
if this function returns the response and its bytes as a tuple, instead
|
||||||
|
of the raw json object. used by 'img' scope to proxy images, as we want
|
||||||
|
the raw bytes of the response, but by the time this function is
|
||||||
|
returned, the response object is invalid and the socket is closed
|
||||||
|
"""
|
||||||
config, session = _mk_cfg_sess(config, session)
|
config, session = _mk_cfg_sess(config, session)
|
||||||
|
|
||||||
if not isinstance(url, EmbedURL):
|
if not isinstance(url, EmbedURL):
|
||||||
url = EmbedURL(url)
|
url = EmbedURL(url)
|
||||||
|
|
||||||
proto, md_base_url = _md_base(config)
|
request_url = _make_md_req_url(config, scope, url)
|
||||||
request_url = f'{proto}://{md_base_url}/meta/{url.to_md_path}'
|
|
||||||
|
|
||||||
async with session.get(request_url) as resp:
|
async with session.get(request_url) as resp:
|
||||||
if resp.status != 200:
|
if resp.status == 200:
|
||||||
body = await resp.text()
|
if ret_resp:
|
||||||
|
return resp, await resp.read()
|
||||||
|
|
||||||
log.warning('failed to generate meta for {!r}: {} {!r}',
|
return await resp.json()
|
||||||
url, resp.status, body)
|
|
||||||
return None
|
|
||||||
|
|
||||||
return await resp.json()
|
body = await resp.text()
|
||||||
|
log.warning('failed to call {!r}, {} {!r}',
|
||||||
|
request_url, resp.status, body)
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
async def fetch_metadata(url, *, config=None,
|
||||||
|
session=None) -> Optional[Dict]:
|
||||||
|
"""Fetch metadata for a url (image width, mime, etc)."""
|
||||||
|
return await _md_client_req(
|
||||||
|
config, session, 'meta', url
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
async def fetch_raw_img(url, *, config=None, session=None) -> Optional[tuple]:
|
async def fetch_raw_img(url, *, config=None, session=None) -> Optional[tuple]:
|
||||||
"""Fetch metadata for a url."""
|
"""Fetch raw data for a url (the bytes given off, used to proxy images).
|
||||||
config, session = _mk_cfg_sess(config, session)
|
|
||||||
|
|
||||||
if not isinstance(url, EmbedURL):
|
Returns a tuple containing the response object and the raw bytes given by
|
||||||
url = EmbedURL(url)
|
the website.
|
||||||
|
"""
|
||||||
|
tup = await _md_client_req(
|
||||||
|
config, session, 'img', url, ret_resp=True
|
||||||
|
)
|
||||||
|
|
||||||
proto, md_base_url = _md_base(config)
|
if not tup:
|
||||||
# NOTE: the img, instead of /meta/.
|
return None
|
||||||
request_url = f'{proto}://{md_base_url}/img/{url.to_md_path}'
|
|
||||||
|
|
||||||
async with session.get(request_url) as resp:
|
return tup
|
||||||
if resp.status != 200:
|
|
||||||
body = await resp.text()
|
|
||||||
|
|
||||||
log.warning('failed to get img for {!r}: {} {!r}',
|
|
||||||
url, resp.status, body)
|
|
||||||
return None
|
|
||||||
|
|
||||||
return resp, await resp.read()
|
|
||||||
|
|
||||||
|
|
||||||
async def fetch_embed(url, *, config=None, session=None) -> dict:
|
async def fetch_embed(url, *, config=None, session=None) -> Dict[str, Any]:
|
||||||
"""Fetch an embed"""
|
"""Fetch an embed for a given webpage (an automatically generated embed
|
||||||
config, session = _mk_cfg_sess(config, session)
|
by the mediaproxy, look over the project on how it generates embeds).
|
||||||
|
|
||||||
if not isinstance(url, EmbedURL):
|
Returns a discord embed object.
|
||||||
url = EmbedURL(url)
|
"""
|
||||||
|
return await _md_client_req(
|
||||||
parsed = url.parsed
|
config, session, 'embed', url
|
||||||
|
)
|
||||||
# TODO: handle query string
|
|
||||||
md_path = f'{parsed.scheme}/{parsed.netloc}{parsed.path}'
|
|
||||||
|
|
||||||
md_base_url = config['MEDIA_PROXY']
|
|
||||||
secure = 's' if config['IS_SSL'] else ''
|
|
||||||
|
|
||||||
request_url = f'http{secure}://{md_base_url}/embed/{md_path}'
|
|
||||||
|
|
||||||
async with session.get(request_url) as resp:
|
|
||||||
if resp.status != 200:
|
|
||||||
body = await resp.text()
|
|
||||||
log.warning('failed to embed {!r}, {} {!r}',
|
|
||||||
parsed, resp.status, body)
|
|
||||||
return
|
|
||||||
|
|
||||||
return await resp.json()
|
|
||||||
|
|
||||||
|
|
||||||
async def fill_embed(embed: Embed) -> Embed:
|
async def fill_embed(embed: Optional[Embed]) -> Optional[Embed]:
|
||||||
"""Fill an embed with more information, such as proxy URLs."""
|
"""Fill an embed with more information, such as proxy URLs.
|
||||||
|
|
||||||
|
Uses path_exists() to check if a given element exists in an embed by
|
||||||
|
checking if its parent fields also exist, which is why we do
|
||||||
|
`path_exists(embed, 'footer.icon_url')`
|
||||||
|
instead of
|
||||||
|
`embed.get('icon_url', embed.get('footer', {}))`.
|
||||||
|
|
||||||
|
Uses the proxify function so that clients don't directly contact websites
|
||||||
|
in embeds and instead use the mediaproxy.
|
||||||
|
"""
|
||||||
if embed is None:
|
if embed is None:
|
||||||
return
|
return None
|
||||||
|
|
||||||
embed = sanitize_embed(embed)
|
embed = sanitize_embed(embed)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -47,9 +47,12 @@ class EmbedURL:
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def to_md_path(self) -> str:
|
def to_md_path(self) -> str:
|
||||||
"""Convert the EmbedURL to a mediaproxy path."""
|
"""Convert the EmbedURL to a mediaproxy path (post img/meta)."""
|
||||||
parsed = self.parsed
|
parsed = self.parsed
|
||||||
return f'{parsed.scheme}/{parsed.netloc}{parsed.path}'
|
return (
|
||||||
|
f'{parsed.scheme}/{parsed.netloc}'
|
||||||
|
f'{parsed.path}?{parsed.query}'
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
EMBED_FOOTER = {
|
EMBED_FOOTER = {
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue