summaryrefslogtreecommitdiff
path: root/synapse/rest/media
diff options
context:
space:
mode:
authorAndrej Shadura <andrewsh@debian.org>2019-11-11 15:27:12 +0100
committerAndrej Shadura <andrewsh@debian.org>2019-11-11 15:27:12 +0100
commitc3e687e74cf69e8b60c663af1cd7a8817cbd1c82 (patch)
treee5587cacb2ad3e4364829d102b4a0b0f25b8bb5b /synapse/rest/media
parent790aead4376316a1414bc48787cf84c2ee80e6f5 (diff)
New upstream version 1.5.1
Diffstat (limited to 'synapse/rest/media')
-rw-r--r--synapse/rest/media/v1/preview_url_resource.py21
1 files changed, 20 insertions, 1 deletions
diff --git a/synapse/rest/media/v1/preview_url_resource.py b/synapse/rest/media/v1/preview_url_resource.py
index 0c68c3aa..ec9c4619 100644
--- a/synapse/rest/media/v1/preview_url_resource.py
+++ b/synapse/rest/media/v1/preview_url_resource.py
@@ -56,6 +56,9 @@ logger = logging.getLogger(__name__)
_charset_match = re.compile(br"<\s*meta[^>]*charset\s*=\s*([a-z0-9-]+)", flags=re.I)
_content_type_match = re.compile(r'.*; *charset="?(.*?)"?(;|$)', flags=re.I)
+OG_TAG_NAME_MAXLEN = 50
+OG_TAG_VALUE_MAXLEN = 1000
+
class PreviewUrlResource(DirectServeResource):
isLeaf = True
@@ -167,7 +170,7 @@ class PreviewUrlResource(DirectServeResource):
ts (int):
Returns:
- Deferred[str]: json-encoded og data
+ Deferred[bytes]: json-encoded og data
"""
# check the URL cache in the DB (which will also provide us with
# historical previews, if we have any)
@@ -268,6 +271,18 @@ class PreviewUrlResource(DirectServeResource):
logger.warn("Failed to find any OG data in %s", url)
og = {}
+ # filter out any stupidly long values
+ keys_to_remove = []
+ for k, v in og.items():
+ # values can be numeric as well as strings, hence the cast to str
+ if len(k) > OG_TAG_NAME_MAXLEN or len(str(v)) > OG_TAG_VALUE_MAXLEN:
+ logger.warning(
+ "Pruning overlong tag %s from OG data", k[:OG_TAG_NAME_MAXLEN]
+ )
+ keys_to_remove.append(k)
+ for k in keys_to_remove:
+ del og[k]
+
logger.debug("Calculated OG for %s as %s" % (url, og))
jsonog = json.dumps(og)
@@ -502,6 +517,10 @@ def _calc_og(tree, media_uri):
og = {}
for tag in tree.xpath("//*/meta[starts-with(@property, 'og:')]"):
if "content" in tag.attrib:
+ # if we've got more than 50 tags, someone is taking the piss
+ if len(og) >= 50:
+ logger.warning("Skipping OG for page with too many 'og:' tags")
+ return {}
og[tag.attrib["property"]] = tag.attrib["content"]
# TODO: grab article: meta tags too, e.g.: