summaryrefslogtreecommitdiff
path: root/synapse/rest/media
diff options
context:
space:
mode:
authorAndrej Shadura <andrewsh@debian.org>2022-06-19 15:20:00 +0200
committerAndrej Shadura <andrewsh@debian.org>2022-06-19 15:20:00 +0200
commit6dc64c92c6991f09910f3e6db368e6eeb4b1981e (patch)
treed8bab73ee460e0a96bbda9c5988d8025dbbe2eb3 /synapse/rest/media
parentc2d3cd76c24f663449bfa209ac920305f0501d3a (diff)
New upstream version 1.61.0
Diffstat (limited to 'synapse/rest/media')
-rw-r--r--synapse/rest/media/v1/media_repository.py358
-rw-r--r--synapse/rest/media/v1/preview_html.py64
-rw-r--r--synapse/rest/media/v1/preview_url_resource.py53
-rw-r--r--synapse/rest/media/v1/thumbnailer.py71
4 files changed, 370 insertions, 176 deletions
diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py
index 3e5d6c62..7435fd91 100644
--- a/synapse/rest/media/v1/media_repository.py
+++ b/synapse/rest/media/v1/media_repository.py
@@ -65,7 +65,12 @@ if TYPE_CHECKING:
logger = logging.getLogger(__name__)
-UPDATE_RECENTLY_ACCESSED_TS = 60 * 1000
+# How often to run the background job to update the "recently accessed"
+# attribute of local and remote media.
+UPDATE_RECENTLY_ACCESSED_TS = 60 * 1000 # 1 minute
+# How often to run the background job to check for local and remote media
+# that should be purged according to the configured media retention settings.
+MEDIA_RETENTION_CHECK_PERIOD_MS = 60 * 60 * 1000 # 1 hour
class MediaRepository:
@@ -122,11 +127,36 @@ class MediaRepository:
self._start_update_recently_accessed, UPDATE_RECENTLY_ACCESSED_TS
)
+ # Media retention configuration options
+ self._media_retention_local_media_lifetime_ms = (
+ hs.config.media.media_retention_local_media_lifetime_ms
+ )
+ self._media_retention_remote_media_lifetime_ms = (
+ hs.config.media.media_retention_remote_media_lifetime_ms
+ )
+
+ # Check whether local or remote media retention is configured
+ if (
+ hs.config.media.media_retention_local_media_lifetime_ms is not None
+ or hs.config.media.media_retention_remote_media_lifetime_ms is not None
+ ):
+ # Run the background job to apply media retention rules routinely,
+ # with the duration between runs dictated by the homeserver config.
+ self.clock.looping_call(
+ self._start_apply_media_retention_rules,
+ MEDIA_RETENTION_CHECK_PERIOD_MS,
+ )
+
def _start_update_recently_accessed(self) -> Deferred:
return run_as_background_process(
"update_recently_accessed_media", self._update_recently_accessed
)
+ def _start_apply_media_retention_rules(self) -> Deferred:
+ return run_as_background_process(
+ "apply_media_retention_rules", self._apply_media_retention_rules
+ )
+
async def _update_recently_accessed(self) -> None:
remote_media = self.recently_accessed_remotes
self.recently_accessed_remotes = set()
@@ -557,15 +587,16 @@ class MediaRepository:
)
return None
- t_byte_source = await defer_to_thread(
- self.hs.get_reactor(),
- self._generate_thumbnail,
- thumbnailer,
- t_width,
- t_height,
- t_method,
- t_type,
- )
+ with thumbnailer:
+ t_byte_source = await defer_to_thread(
+ self.hs.get_reactor(),
+ self._generate_thumbnail,
+ thumbnailer,
+ t_width,
+ t_height,
+ t_method,
+ t_type,
+ )
if t_byte_source:
try:
@@ -627,15 +658,16 @@ class MediaRepository:
)
return None
- t_byte_source = await defer_to_thread(
- self.hs.get_reactor(),
- self._generate_thumbnail,
- thumbnailer,
- t_width,
- t_height,
- t_method,
- t_type,
- )
+ with thumbnailer:
+ t_byte_source = await defer_to_thread(
+ self.hs.get_reactor(),
+ self._generate_thumbnail,
+ thumbnailer,
+ t_width,
+ t_height,
+ t_method,
+ t_type,
+ )
if t_byte_source:
try:
@@ -719,124 +751,182 @@ class MediaRepository:
)
return None
- m_width = thumbnailer.width
- m_height = thumbnailer.height
+ with thumbnailer:
+ m_width = thumbnailer.width
+ m_height = thumbnailer.height
- if m_width * m_height >= self.max_image_pixels:
- logger.info(
- "Image too large to thumbnail %r x %r > %r",
- m_width,
- m_height,
- self.max_image_pixels,
- )
- return None
-
- if thumbnailer.transpose_method is not None:
- m_width, m_height = await defer_to_thread(
- self.hs.get_reactor(), thumbnailer.transpose
- )
-
- # We deduplicate the thumbnail sizes by ignoring the cropped versions if
- # they have the same dimensions of a scaled one.
- thumbnails: Dict[Tuple[int, int, str], str] = {}
- for requirement in requirements:
- if requirement.method == "crop":
- thumbnails.setdefault(
- (requirement.width, requirement.height, requirement.media_type),
- requirement.method,
- )
- elif requirement.method == "scale":
- t_width, t_height = thumbnailer.aspect(
- requirement.width, requirement.height
+ if m_width * m_height >= self.max_image_pixels:
+ logger.info(
+ "Image too large to thumbnail %r x %r > %r",
+ m_width,
+ m_height,
+ self.max_image_pixels,
)
- t_width = min(m_width, t_width)
- t_height = min(m_height, t_height)
- thumbnails[
- (t_width, t_height, requirement.media_type)
- ] = requirement.method
-
- # Now we generate the thumbnails for each dimension, store it
- for (t_width, t_height, t_type), t_method in thumbnails.items():
- # Generate the thumbnail
- if t_method == "crop":
- t_byte_source = await defer_to_thread(
- self.hs.get_reactor(), thumbnailer.crop, t_width, t_height, t_type
+ return None
+
+ if thumbnailer.transpose_method is not None:
+ m_width, m_height = await defer_to_thread(
+ self.hs.get_reactor(), thumbnailer.transpose
)
- elif t_method == "scale":
- t_byte_source = await defer_to_thread(
- self.hs.get_reactor(), thumbnailer.scale, t_width, t_height, t_type
+
+ # We deduplicate the thumbnail sizes by ignoring the cropped versions if
+ # they have the same dimensions of a scaled one.
+ thumbnails: Dict[Tuple[int, int, str], str] = {}
+ for requirement in requirements:
+ if requirement.method == "crop":
+ thumbnails.setdefault(
+ (requirement.width, requirement.height, requirement.media_type),
+ requirement.method,
+ )
+ elif requirement.method == "scale":
+ t_width, t_height = thumbnailer.aspect(
+ requirement.width, requirement.height
+ )
+ t_width = min(m_width, t_width)
+ t_height = min(m_height, t_height)
+ thumbnails[
+ (t_width, t_height, requirement.media_type)
+ ] = requirement.method
+
+ # Now we generate the thumbnails for each dimension, store it
+ for (t_width, t_height, t_type), t_method in thumbnails.items():
+ # Generate the thumbnail
+ if t_method == "crop":
+ t_byte_source = await defer_to_thread(
+ self.hs.get_reactor(),
+ thumbnailer.crop,
+ t_width,
+ t_height,
+ t_type,
+ )
+ elif t_method == "scale":
+ t_byte_source = await defer_to_thread(
+ self.hs.get_reactor(),
+ thumbnailer.scale,
+ t_width,
+ t_height,
+ t_type,
+ )
+ else:
+ logger.error("Unrecognized method: %r", t_method)
+ continue
+
+ if not t_byte_source:
+ continue
+
+ file_info = FileInfo(
+ server_name=server_name,
+ file_id=file_id,
+ url_cache=url_cache,
+ thumbnail=ThumbnailInfo(
+ width=t_width,
+ height=t_height,
+ method=t_method,
+ type=t_type,
+ ),
)
- else:
- logger.error("Unrecognized method: %r", t_method)
- continue
-
- if not t_byte_source:
- continue
-
- file_info = FileInfo(
- server_name=server_name,
- file_id=file_id,
- url_cache=url_cache,
- thumbnail=ThumbnailInfo(
- width=t_width,
- height=t_height,
- method=t_method,
- type=t_type,
- ),
- )
- with self.media_storage.store_into_file(file_info) as (f, fname, finish):
- try:
- await self.media_storage.write_to_file(t_byte_source, f)
- await finish()
- finally:
- t_byte_source.close()
-
- t_len = os.path.getsize(fname)
-
- # Write to database
- if server_name:
- # Multiple remote media download requests can race (when
- # using multiple media repos), so this may throw a violation
- # constraint exception. If it does we'll delete the newly
- # generated thumbnail from disk (as we're in the ctx
- # manager).
- #
- # However: we've already called `finish()` so we may have
- # also written to the storage providers. This is preferable
- # to the alternative where we call `finish()` *after* this,
- # where we could end up having an entry in the DB but fail
- # to write the files to the storage providers.
+ with self.media_storage.store_into_file(file_info) as (
+ f,
+ fname,
+ finish,
+ ):
try:
- await self.store.store_remote_media_thumbnail(
- server_name,
- media_id,
- file_id,
- t_width,
- t_height,
- t_type,
- t_method,
- t_len,
- )
- except Exception as e:
- thumbnail_exists = await self.store.get_remote_media_thumbnail(
- server_name,
- media_id,
- t_width,
- t_height,
- t_type,
+ await self.media_storage.write_to_file(t_byte_source, f)
+ await finish()
+ finally:
+ t_byte_source.close()
+
+ t_len = os.path.getsize(fname)
+
+ # Write to database
+ if server_name:
+ # Multiple remote media download requests can race (when
+ # using multiple media repos), so this may throw a violation
+ # constraint exception. If it does we'll delete the newly
+ # generated thumbnail from disk (as we're in the ctx
+ # manager).
+ #
+ # However: we've already called `finish()` so we may have
+ # also written to the storage providers. This is preferable
+ # to the alternative where we call `finish()` *after* this,
+ # where we could end up having an entry in the DB but fail
+ # to write the files to the storage providers.
+ try:
+ await self.store.store_remote_media_thumbnail(
+ server_name,
+ media_id,
+ file_id,
+ t_width,
+ t_height,
+ t_type,
+ t_method,
+ t_len,
+ )
+ except Exception as e:
+ thumbnail_exists = (
+ await self.store.get_remote_media_thumbnail(
+ server_name,
+ media_id,
+ t_width,
+ t_height,
+ t_type,
+ )
+ )
+ if not thumbnail_exists:
+ raise e
+ else:
+ await self.store.store_local_thumbnail(
+ media_id, t_width, t_height, t_type, t_method, t_len
)
- if not thumbnail_exists:
- raise e
- else:
- await self.store.store_local_thumbnail(
- media_id, t_width, t_height, t_type, t_method, t_len
- )
return {"width": m_width, "height": m_height}
+ async def _apply_media_retention_rules(self) -> None:
+ """
+ Purge old local and remote media according to the media retention rules
+ defined in the homeserver config.
+ """
+ # Purge remote media
+ if self._media_retention_remote_media_lifetime_ms is not None:
+ # Calculate a threshold timestamp derived from the configured lifetime. Any
+ # media that has not been accessed since this timestamp will be removed.
+ remote_media_threshold_timestamp_ms = (
+ self.clock.time_msec() - self._media_retention_remote_media_lifetime_ms
+ )
+
+ logger.info(
+ "Purging remote media last accessed before"
+ f" {remote_media_threshold_timestamp_ms}"
+ )
+
+ await self.delete_old_remote_media(
+ before_ts=remote_media_threshold_timestamp_ms
+ )
+
+ # And now do the same for local media
+ if self._media_retention_local_media_lifetime_ms is not None:
+ # This works the same as the remote media threshold
+ local_media_threshold_timestamp_ms = (
+ self.clock.time_msec() - self._media_retention_local_media_lifetime_ms
+ )
+
+ logger.info(
+ "Purging local media last accessed before"
+ f" {local_media_threshold_timestamp_ms}"
+ )
+
+ await self.delete_old_local_media(
+ before_ts=local_media_threshold_timestamp_ms,
+ keep_profiles=True,
+ delete_quarantined_media=False,
+ delete_protected_media=False,
+ )
+
async def delete_old_remote_media(self, before_ts: int) -> Dict[str, int]:
- old_media = await self.store.get_remote_media_before(before_ts)
+ old_media = await self.store.get_remote_media_ids(
+ before_ts, include_quarantined_media=False
+ )
deleted = 0
@@ -889,6 +979,8 @@ class MediaRepository:
before_ts: int,
size_gt: int = 0,
keep_profiles: bool = True,
+ delete_quarantined_media: bool = False,
+ delete_protected_media: bool = False,
) -> Tuple[List[str], int]:
"""
Delete local or remote media from this server by size and timestamp. Removes
@@ -896,18 +988,22 @@ class MediaRepository:
Args:
before_ts: Unix timestamp in ms.
- Files that were last used before this timestamp will be deleted
- size_gt: Size of the media in bytes. Files that are larger will be deleted
+ Files that were last used before this timestamp will be deleted.
+ size_gt: Size of the media in bytes. Files that are larger will be deleted.
keep_profiles: Switch to delete also files that are still used in image data
- (e.g user profile, room avatar)
- If false these files will be deleted
+ (e.g user profile, room avatar). If false these files will be deleted.
+ delete_quarantined_media: If True, media marked as quarantined will be deleted.
+ delete_protected_media: If True, media marked as protected will be deleted.
+
Returns:
A tuple of (list of deleted media IDs, total deleted media IDs).
"""
- old_media = await self.store.get_local_media_before(
+ old_media = await self.store.get_local_media_ids(
before_ts,
size_gt,
keep_profiles,
+ include_quarantined_media=delete_quarantined_media,
+ include_protected_media=delete_protected_media,
)
return await self._remove_local_media_from_disk(old_media)
diff --git a/synapse/rest/media/v1/preview_html.py b/synapse/rest/media/v1/preview_html.py
index ca73965f..ed8f21a4 100644
--- a/synapse/rest/media/v1/preview_html.py
+++ b/synapse/rest/media/v1/preview_html.py
@@ -30,6 +30,9 @@ _xml_encoding_match = re.compile(
)
_content_type_match = re.compile(r'.*; *charset="?(.*?)"?(;|$)', flags=re.I)
+# Certain elements aren't meant for display.
+ARIA_ROLES_TO_IGNORE = {"directory", "menu", "menubar", "toolbar"}
+
def _normalise_encoding(encoding: str) -> Optional[str]:
"""Use the Python codec's name as the normalised entry."""
@@ -174,13 +177,15 @@ def parse_html_to_open_graph(tree: "etree.Element") -> Dict[str, Optional[str]]:
# "og:video:secure_url": "https://www.youtube.com/v/LXDBoHyjmtw?version=3",
og: Dict[str, Optional[str]] = {}
- for tag in tree.xpath("//*/meta[starts-with(@property, 'og:')]"):
- if "content" in tag.attrib:
- # if we've got more than 50 tags, someone is taking the piss
- if len(og) >= 50:
- logger.warning("Skipping OG for page with too many 'og:' tags")
- return {}
- og[tag.attrib["property"]] = tag.attrib["content"]
+ for tag in tree.xpath(
+ "//*/meta[starts-with(@property, 'og:')][@content][not(@content='')]"
+ ):
+ # if we've got more than 50 tags, someone is taking the piss
+ if len(og) >= 50:
+ logger.warning("Skipping OG for page with too many 'og:' tags")
+ return {}
+
+ og[tag.attrib["property"]] = tag.attrib["content"]
# TODO: grab article: meta tags too, e.g.:
@@ -192,21 +197,23 @@ def parse_html_to_open_graph(tree: "etree.Element") -> Dict[str, Optional[str]]:
# "article:modified_time" content="2016-04-01T18:31:53+00:00" />
if "og:title" not in og:
- # do some basic spidering of the HTML
- title = tree.xpath("(//title)[1] | (//h1)[1] | (//h2)[1] | (//h3)[1]")
- if title and title[0].text is not None:
- og["og:title"] = title[0].text.strip()
+ # Attempt to find a title from the title tag, or the biggest header on the page.
+ title = tree.xpath("((//title)[1] | (//h1)[1] | (//h2)[1] | (//h3)[1])/text()")
+ if title:
+ og["og:title"] = title[0].strip()
else:
og["og:title"] = None
if "og:image" not in og:
- # TODO: extract a favicon failing all else
meta_image = tree.xpath(
- "//*/meta[translate(@itemprop, 'IMAGE', 'image')='image']/@content"
+ "//*/meta[translate(@itemprop, 'IMAGE', 'image')='image'][not(@content='')]/@content[1]"
)
+ # If a meta image is found, use it.
if meta_image:
og["og:image"] = meta_image[0]
else:
+ # Try to find images which are larger than 10px by 10px.
+ #
# TODO: consider inlined CSS styles as well as width & height attribs
images = tree.xpath("//img[@src][number(@width)>10][number(@height)>10]")
images = sorted(
@@ -215,17 +222,24 @@ def parse_html_to_open_graph(tree: "etree.Element") -> Dict[str, Optional[str]]:
-1 * float(i.attrib["width"]) * float(i.attrib["height"])
),
)
+ # If no images were found, try to find *any* images.
if not images:
- images = tree.xpath("//img[@src]")
+ images = tree.xpath("//img[@src][1]")
if images:
og["og:image"] = images[0].attrib["src"]
+ # Finally, fallback to the favicon if nothing else.
+ else:
+ favicons = tree.xpath("//link[@href][contains(@rel, 'icon')]/@href[1]")
+ if favicons:
+ og["og:image"] = favicons[0]
+
if "og:description" not in og:
+ # Check the first meta description tag for content.
meta_description = tree.xpath(
- "//*/meta"
- "[translate(@name, 'DESCRIPTION', 'description')='description']"
- "/@content"
+ "//*/meta[translate(@name, 'DESCRIPTION', 'description')='description'][not(@content='')]/@content[1]"
)
+ # If a meta description is found with content, use it.
if meta_description:
og["og:description"] = meta_description[0]
else:
@@ -246,7 +260,9 @@ def parse_html_description(tree: "etree.Element") -> Optional[str]:
Grabs any text nodes which are inside the <body/> tag, unless they are within
an HTML5 semantic markup tag (<header/>, <nav/>, <aside/>, <footer/>), or
- if they are within a <script/> or <style/> tag.
+ if they are within a <script/>, <svg/> or <style/> tag, or if they are within
+ a tag whose content is usually only shown to old browsers
+ (<iframe/>, <video/>, <canvas/>, <picture/>).
This is a very very very coarse approximation to a plain text render of the page.
@@ -268,6 +284,12 @@ def parse_html_description(tree: "etree.Element") -> Optional[str]:
"script",
"noscript",
"style",
+ "svg",
+ "iframe",
+ "video",
+ "canvas",
+ "img",
+ "picture",
etree.Comment,
)
@@ -281,7 +303,7 @@ def parse_html_description(tree: "etree.Element") -> Optional[str]:
def _iterate_over_text(
- tree: "etree.Element", *tags_to_ignore: Iterable[Union[str, "etree.Comment"]]
+ tree: "etree.Element", *tags_to_ignore: Union[str, "etree.Comment"]
) -> Generator[str, None, None]:
"""Iterate over the tree returning text nodes in a depth first fashion,
skipping text nodes inside certain tags.
@@ -298,6 +320,10 @@ def _iterate_over_text(
if isinstance(el, str):
yield el
elif el.tag not in tags_to_ignore:
+ # If the element isn't meant for display, ignore it.
+ if el.get("role") in ARIA_ROLES_TO_IGNORE:
+ continue
+
# el.text is the text before the first child, so we can immediately
# return it if the text exists.
if el.text:
diff --git a/synapse/rest/media/v1/preview_url_resource.py b/synapse/rest/media/v1/preview_url_resource.py
index 50383bdb..54a849ea 100644
--- a/synapse/rest/media/v1/preview_url_resource.py
+++ b/synapse/rest/media/v1/preview_url_resource.py
@@ -586,12 +586,16 @@ class PreviewUrlResource(DirectServeJsonResource):
og: The Open Graph dictionary. This is modified with image information.
"""
# If there's no image or it is blank, there's nothing to do.
- if "og:image" not in og or not og["og:image"]:
+ if "og:image" not in og:
+ return
+
+ # Remove the raw image URL, this will be replaced with an MXC URL, if successful.
+ image_url = og.pop("og:image")
+ if not image_url:
return
# The image URL from the HTML might be relative to the previewed page,
# convert it to an URL which can be requested directly.
- image_url = og["og:image"]
url_parts = urlparse(image_url)
if url_parts.scheme != "data":
image_url = urljoin(media_info.uri, image_url)
@@ -599,7 +603,16 @@ class PreviewUrlResource(DirectServeJsonResource):
# FIXME: it might be cleaner to use the same flow as the main /preview_url
# request itself and benefit from the same caching etc. But for now we
# just rely on the caching on the master request to speed things up.
- image_info = await self._handle_url(image_url, user, allow_data_urls=True)
+ try:
+ image_info = await self._handle_url(image_url, user, allow_data_urls=True)
+ except Exception as e:
+ # Pre-caching the image failed, don't block the entire URL preview.
+ logger.warning(
+ "Pre-caching image failed during URL preview: %s errored with %s",
+ image_url,
+ e,
+ )
+ return
if _is_media(image_info.media_type):
# TODO: make sure we don't choke on white-on-transparent images
@@ -611,13 +624,11 @@ class PreviewUrlResource(DirectServeJsonResource):
og["og:image:width"] = dims["width"]
og["og:image:height"] = dims["height"]
else:
- logger.warning("Couldn't get dims for %s", og["og:image"])
+ logger.warning("Couldn't get dims for %s", image_url)
og["og:image"] = f"mxc://{self.server_name}/{image_info.filesystem_id}"
og["og:image:type"] = image_info.media_type
og["matrix:image:size"] = image_info.media_length
- else:
- del og["og:image"]
async def _handle_oembed_response(
self, url: str, media_info: MediaInfo, expiration_ms: int
@@ -668,7 +679,7 @@ class PreviewUrlResource(DirectServeJsonResource):
logger.debug("Running url preview cache expiry")
if not (await self.store.db_pool.updates.has_completed_background_updates()):
- logger.info("Still running DB updates; skipping expiry")
+ logger.debug("Still running DB updates; skipping url preview cache expiry")
return
def try_remove_parent_dirs(dirs: Iterable[str]) -> None:
@@ -688,7 +699,9 @@ class PreviewUrlResource(DirectServeJsonResource):
# Failed, skip deleting the rest of the parent dirs
if e.errno != errno.ENOTEMPTY:
logger.warning(
- "Failed to remove media directory: %r: %s", dir, e
+ "Failed to remove media directory while clearing url preview cache: %r: %s",
+ dir,
+ e,
)
break
@@ -703,7 +716,11 @@ class PreviewUrlResource(DirectServeJsonResource):
except FileNotFoundError:
pass # If the path doesn't exist, meh
except OSError as e:
- logger.warning("Failed to remove media: %r: %s", media_id, e)
+ logger.warning(
+ "Failed to remove media while clearing url preview cache: %r: %s",
+ media_id,
+ e,
+ )
continue
removed_media.append(media_id)
@@ -714,9 +731,11 @@ class PreviewUrlResource(DirectServeJsonResource):
await self.store.delete_url_cache(removed_media)
if removed_media:
- logger.info("Deleted %d entries from url cache", len(removed_media))
+ logger.debug(
+ "Deleted %d entries from url preview cache", len(removed_media)
+ )
else:
- logger.debug("No entries removed from url cache")
+ logger.debug("No entries removed from url preview cache")
# Now we delete old images associated with the url cache.
# These may be cached for a bit on the client (i.e., they
@@ -733,7 +752,9 @@ class PreviewUrlResource(DirectServeJsonResource):
except FileNotFoundError:
pass # If the path doesn't exist, meh
except OSError as e:
- logger.warning("Failed to remove media: %r: %s", media_id, e)
+ logger.warning(
+ "Failed to remove media from url preview cache: %r: %s", media_id, e
+ )
continue
dirs = self.filepaths.url_cache_filepath_dirs_to_delete(media_id)
@@ -745,7 +766,9 @@ class PreviewUrlResource(DirectServeJsonResource):
except FileNotFoundError:
pass # If the path doesn't exist, meh
except OSError as e:
- logger.warning("Failed to remove media: %r: %s", media_id, e)
+ logger.warning(
+ "Failed to remove media from url preview cache: %r: %s", media_id, e
+ )
continue
removed_media.append(media_id)
@@ -758,9 +781,9 @@ class PreviewUrlResource(DirectServeJsonResource):
await self.store.delete_url_cache_media(removed_media)
if removed_media:
- logger.info("Deleted %d media from url cache", len(removed_media))
+ logger.debug("Deleted %d media from url preview cache", len(removed_media))
else:
- logger.debug("No media removed from url cache")
+ logger.debug("No media removed from url preview cache")
def _is_media(content_type: str) -> bool:
diff --git a/synapse/rest/media/v1/thumbnailer.py b/synapse/rest/media/v1/thumbnailer.py
index 390491eb..9b93b9b4 100644
--- a/synapse/rest/media/v1/thumbnailer.py
+++ b/synapse/rest/media/v1/thumbnailer.py
@@ -14,7 +14,8 @@
# limitations under the License.
import logging
from io import BytesIO
-from typing import Tuple
+from types import TracebackType
+from typing import Optional, Tuple, Type
from PIL import Image
@@ -45,6 +46,9 @@ class Thumbnailer:
Image.MAX_IMAGE_PIXELS = max_image_pixels
def __init__(self, input_path: str):
+ # Have we closed the image?
+ self._closed = False
+
try:
self.image = Image.open(input_path)
except OSError as e:
@@ -89,7 +93,8 @@ class Thumbnailer:
# Safety: `transpose` takes an int rather than e.g. an IntEnum.
# self.transpose_method is set above to be a value in
# EXIF_TRANSPOSE_MAPPINGS, and that only contains correct values.
- self.image = self.image.transpose(self.transpose_method) # type: ignore[arg-type]
+ with self.image:
+ self.image = self.image.transpose(self.transpose_method) # type: ignore[arg-type]
self.width, self.height = self.image.size
self.transpose_method = None
# We don't need EXIF any more
@@ -122,9 +127,11 @@ class Thumbnailer:
# If the image has transparency, use RGBA instead.
if self.image.mode in ["1", "L", "P"]:
if self.image.info.get("transparency", None) is not None:
- self.image = self.image.convert("RGBA")
+ with self.image:
+ self.image = self.image.convert("RGBA")
else:
- self.image = self.image.convert("RGB")
+ with self.image:
+ self.image = self.image.convert("RGB")
return self.image.resize((width, height), Image.ANTIALIAS)
def scale(self, width: int, height: int, output_type: str) -> BytesIO:
@@ -133,8 +140,8 @@ class Thumbnailer:
Returns:
BytesIO: the bytes of the encoded image ready to be written to disk
"""
- scaled = self._resize(width, height)
- return self._encode_image(scaled, output_type)
+ with self._resize(width, height) as scaled:
+ return self._encode_image(scaled, output_type)
def crop(self, width: int, height: int, output_type: str) -> BytesIO:
"""Rescales and crops the image to the given dimensions preserving
@@ -151,18 +158,21 @@ class Thumbnailer:
BytesIO: the bytes of the encoded image ready to be written to disk
"""
if width * self.height > height * self.width:
+ scaled_width = width
scaled_height = (width * self.height) // self.width
- scaled_image = self._resize(width, scaled_height)
crop_top = (scaled_height - height) // 2
crop_bottom = height + crop_top
- cropped = scaled_image.crop((0, crop_top, width, crop_bottom))
+ crop = (0, crop_top, width, crop_bottom)
else:
scaled_width = (height * self.width) // self.height
- scaled_image = self._resize(scaled_width, height)
+ scaled_height = height
crop_left = (scaled_width - width) // 2
crop_right = width + crop_left
- cropped = scaled_image.crop((crop_left, 0, crop_right, height))
- return self._encode_image(cropped, output_type)
+ crop = (crop_left, 0, crop_right, height)
+
+ with self._resize(scaled_width, scaled_height) as scaled_image:
+ with scaled_image.crop(crop) as cropped:
+ return self._encode_image(cropped, output_type)
def _encode_image(self, output_image: Image.Image, output_type: str) -> BytesIO:
output_bytes_io = BytesIO()
@@ -171,3 +181,42 @@ class Thumbnailer:
output_image = output_image.convert("RGB")
output_image.save(output_bytes_io, fmt, quality=80)
return output_bytes_io
+
+ def close(self) -> None:
+ """Closes the underlying image file.
+
+ Once closed no other functions can be called.
+
+ Can be called multiple times.
+ """
+
+ if self._closed:
+ return
+
+ self._closed = True
+
+ # Since we run this on the finalizer then we need to handle `__init__`
+ # raising an exception before it can define `self.image`.
+ image = getattr(self, "image", None)
+ if image is None:
+ return
+
+ image.close()
+
+ def __enter__(self) -> "Thumbnailer":
+ """Make `Thumbnailer` a context manager that calls `close` on
+ `__exit__`.
+ """
+ return self
+
+ def __exit__(
+ self,
+ type: Optional[Type[BaseException]],
+ value: Optional[BaseException],
+ traceback: Optional[TracebackType],
+ ) -> None:
+ self.close()
+
+ def __del__(self) -> None:
+ # Make sure we actually do close the image, rather than leak data.
+ self.close()