diff options
author | Andrej Shadura <andrewsh@debian.org> | 2022-06-19 15:20:00 +0200 |
---|---|---|
committer | Andrej Shadura <andrewsh@debian.org> | 2022-06-19 15:20:00 +0200 |
commit | 6dc64c92c6991f09910f3e6db368e6eeb4b1981e (patch) | |
tree | d8bab73ee460e0a96bbda9c5988d8025dbbe2eb3 /synapse/rest/media | |
parent | c2d3cd76c24f663449bfa209ac920305f0501d3a (diff) |
New upstream version 1.61.0
Diffstat (limited to 'synapse/rest/media')
-rw-r--r-- | synapse/rest/media/v1/media_repository.py | 358 | ||||
-rw-r--r-- | synapse/rest/media/v1/preview_html.py | 64 | ||||
-rw-r--r-- | synapse/rest/media/v1/preview_url_resource.py | 53 | ||||
-rw-r--r-- | synapse/rest/media/v1/thumbnailer.py | 71 |
4 files changed, 370 insertions, 176 deletions
diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index 3e5d6c62..7435fd91 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -65,7 +65,12 @@ if TYPE_CHECKING: logger = logging.getLogger(__name__) -UPDATE_RECENTLY_ACCESSED_TS = 60 * 1000 +# How often to run the background job to update the "recently accessed" +# attribute of local and remote media. +UPDATE_RECENTLY_ACCESSED_TS = 60 * 1000 # 1 minute +# How often to run the background job to check for local and remote media +# that should be purged according to the configured media retention settings. +MEDIA_RETENTION_CHECK_PERIOD_MS = 60 * 60 * 1000 # 1 hour class MediaRepository: @@ -122,11 +127,36 @@ class MediaRepository: self._start_update_recently_accessed, UPDATE_RECENTLY_ACCESSED_TS ) + # Media retention configuration options + self._media_retention_local_media_lifetime_ms = ( + hs.config.media.media_retention_local_media_lifetime_ms + ) + self._media_retention_remote_media_lifetime_ms = ( + hs.config.media.media_retention_remote_media_lifetime_ms + ) + + # Check whether local or remote media retention is configured + if ( + hs.config.media.media_retention_local_media_lifetime_ms is not None + or hs.config.media.media_retention_remote_media_lifetime_ms is not None + ): + # Run the background job to apply media retention rules routinely, + # with the duration between runs dictated by the homeserver config. + self.clock.looping_call( + self._start_apply_media_retention_rules, + MEDIA_RETENTION_CHECK_PERIOD_MS, + ) + def _start_update_recently_accessed(self) -> Deferred: return run_as_background_process( "update_recently_accessed_media", self._update_recently_accessed ) + def _start_apply_media_retention_rules(self) -> Deferred: + return run_as_background_process( + "apply_media_retention_rules", self._apply_media_retention_rules + ) + async def _update_recently_accessed(self) -> None: remote_media = self.recently_accessed_remotes self.recently_accessed_remotes = set() @@ -557,15 +587,16 @@ class MediaRepository: ) return None - t_byte_source = await defer_to_thread( - self.hs.get_reactor(), - self._generate_thumbnail, - thumbnailer, - t_width, - t_height, - t_method, - t_type, - ) + with thumbnailer: + t_byte_source = await defer_to_thread( + self.hs.get_reactor(), + self._generate_thumbnail, + thumbnailer, + t_width, + t_height, + t_method, + t_type, + ) if t_byte_source: try: @@ -627,15 +658,16 @@ class MediaRepository: ) return None - t_byte_source = await defer_to_thread( - self.hs.get_reactor(), - self._generate_thumbnail, - thumbnailer, - t_width, - t_height, - t_method, - t_type, - ) + with thumbnailer: + t_byte_source = await defer_to_thread( + self.hs.get_reactor(), + self._generate_thumbnail, + thumbnailer, + t_width, + t_height, + t_method, + t_type, + ) if t_byte_source: try: @@ -719,124 +751,182 @@ class MediaRepository: ) return None - m_width = thumbnailer.width - m_height = thumbnailer.height + with thumbnailer: + m_width = thumbnailer.width + m_height = thumbnailer.height - if m_width * m_height >= self.max_image_pixels: - logger.info( - "Image too large to thumbnail %r x %r > %r", - m_width, - m_height, - self.max_image_pixels, - ) - return None - - if thumbnailer.transpose_method is not None: - m_width, m_height = await defer_to_thread( - self.hs.get_reactor(), thumbnailer.transpose - ) - - # We deduplicate the thumbnail sizes by ignoring the cropped versions if - # they have the same dimensions of a scaled one. - thumbnails: Dict[Tuple[int, int, str], str] = {} - for requirement in requirements: - if requirement.method == "crop": - thumbnails.setdefault( - (requirement.width, requirement.height, requirement.media_type), - requirement.method, - ) - elif requirement.method == "scale": - t_width, t_height = thumbnailer.aspect( - requirement.width, requirement.height + if m_width * m_height >= self.max_image_pixels: + logger.info( + "Image too large to thumbnail %r x %r > %r", + m_width, + m_height, + self.max_image_pixels, ) - t_width = min(m_width, t_width) - t_height = min(m_height, t_height) - thumbnails[ - (t_width, t_height, requirement.media_type) - ] = requirement.method - - # Now we generate the thumbnails for each dimension, store it - for (t_width, t_height, t_type), t_method in thumbnails.items(): - # Generate the thumbnail - if t_method == "crop": - t_byte_source = await defer_to_thread( - self.hs.get_reactor(), thumbnailer.crop, t_width, t_height, t_type + return None + + if thumbnailer.transpose_method is not None: + m_width, m_height = await defer_to_thread( + self.hs.get_reactor(), thumbnailer.transpose ) - elif t_method == "scale": - t_byte_source = await defer_to_thread( - self.hs.get_reactor(), thumbnailer.scale, t_width, t_height, t_type + + # We deduplicate the thumbnail sizes by ignoring the cropped versions if + # they have the same dimensions of a scaled one. + thumbnails: Dict[Tuple[int, int, str], str] = {} + for requirement in requirements: + if requirement.method == "crop": + thumbnails.setdefault( + (requirement.width, requirement.height, requirement.media_type), + requirement.method, + ) + elif requirement.method == "scale": + t_width, t_height = thumbnailer.aspect( + requirement.width, requirement.height + ) + t_width = min(m_width, t_width) + t_height = min(m_height, t_height) + thumbnails[ + (t_width, t_height, requirement.media_type) + ] = requirement.method + + # Now we generate the thumbnails for each dimension, store it + for (t_width, t_height, t_type), t_method in thumbnails.items(): + # Generate the thumbnail + if t_method == "crop": + t_byte_source = await defer_to_thread( + self.hs.get_reactor(), + thumbnailer.crop, + t_width, + t_height, + t_type, + ) + elif t_method == "scale": + t_byte_source = await defer_to_thread( + self.hs.get_reactor(), + thumbnailer.scale, + t_width, + t_height, + t_type, + ) + else: + logger.error("Unrecognized method: %r", t_method) + continue + + if not t_byte_source: + continue + + file_info = FileInfo( + server_name=server_name, + file_id=file_id, + url_cache=url_cache, + thumbnail=ThumbnailInfo( + width=t_width, + height=t_height, + method=t_method, + type=t_type, + ), ) - else: - logger.error("Unrecognized method: %r", t_method) - continue - - if not t_byte_source: - continue - - file_info = FileInfo( - server_name=server_name, - file_id=file_id, - url_cache=url_cache, - thumbnail=ThumbnailInfo( - width=t_width, - height=t_height, - method=t_method, - type=t_type, - ), - ) - with self.media_storage.store_into_file(file_info) as (f, fname, finish): - try: - await self.media_storage.write_to_file(t_byte_source, f) - await finish() - finally: - t_byte_source.close() - - t_len = os.path.getsize(fname) - - # Write to database - if server_name: - # Multiple remote media download requests can race (when - # using multiple media repos), so this may throw a violation - # constraint exception. If it does we'll delete the newly - # generated thumbnail from disk (as we're in the ctx - # manager). - # - # However: we've already called `finish()` so we may have - # also written to the storage providers. This is preferable - # to the alternative where we call `finish()` *after* this, - # where we could end up having an entry in the DB but fail - # to write the files to the storage providers. + with self.media_storage.store_into_file(file_info) as ( + f, + fname, + finish, + ): try: - await self.store.store_remote_media_thumbnail( - server_name, - media_id, - file_id, - t_width, - t_height, - t_type, - t_method, - t_len, - ) - except Exception as e: - thumbnail_exists = await self.store.get_remote_media_thumbnail( - server_name, - media_id, - t_width, - t_height, - t_type, + await self.media_storage.write_to_file(t_byte_source, f) + await finish() + finally: + t_byte_source.close() + + t_len = os.path.getsize(fname) + + # Write to database + if server_name: + # Multiple remote media download requests can race (when + # using multiple media repos), so this may throw a violation + # constraint exception. If it does we'll delete the newly + # generated thumbnail from disk (as we're in the ctx + # manager). + # + # However: we've already called `finish()` so we may have + # also written to the storage providers. This is preferable + # to the alternative where we call `finish()` *after* this, + # where we could end up having an entry in the DB but fail + # to write the files to the storage providers. + try: + await self.store.store_remote_media_thumbnail( + server_name, + media_id, + file_id, + t_width, + t_height, + t_type, + t_method, + t_len, + ) + except Exception as e: + thumbnail_exists = ( + await self.store.get_remote_media_thumbnail( + server_name, + media_id, + t_width, + t_height, + t_type, + ) + ) + if not thumbnail_exists: + raise e + else: + await self.store.store_local_thumbnail( + media_id, t_width, t_height, t_type, t_method, t_len ) - if not thumbnail_exists: - raise e - else: - await self.store.store_local_thumbnail( - media_id, t_width, t_height, t_type, t_method, t_len - ) return {"width": m_width, "height": m_height} + async def _apply_media_retention_rules(self) -> None: + """ + Purge old local and remote media according to the media retention rules + defined in the homeserver config. + """ + # Purge remote media + if self._media_retention_remote_media_lifetime_ms is not None: + # Calculate a threshold timestamp derived from the configured lifetime. Any + # media that has not been accessed since this timestamp will be removed. + remote_media_threshold_timestamp_ms = ( + self.clock.time_msec() - self._media_retention_remote_media_lifetime_ms + ) + + logger.info( + "Purging remote media last accessed before" + f" {remote_media_threshold_timestamp_ms}" + ) + + await self.delete_old_remote_media( + before_ts=remote_media_threshold_timestamp_ms + ) + + # And now do the same for local media + if self._media_retention_local_media_lifetime_ms is not None: + # This works the same as the remote media threshold + local_media_threshold_timestamp_ms = ( + self.clock.time_msec() - self._media_retention_local_media_lifetime_ms + ) + + logger.info( + "Purging local media last accessed before" + f" {local_media_threshold_timestamp_ms}" + ) + + await self.delete_old_local_media( + before_ts=local_media_threshold_timestamp_ms, + keep_profiles=True, + delete_quarantined_media=False, + delete_protected_media=False, + ) + async def delete_old_remote_media(self, before_ts: int) -> Dict[str, int]: - old_media = await self.store.get_remote_media_before(before_ts) + old_media = await self.store.get_remote_media_ids( + before_ts, include_quarantined_media=False + ) deleted = 0 @@ -889,6 +979,8 @@ class MediaRepository: before_ts: int, size_gt: int = 0, keep_profiles: bool = True, + delete_quarantined_media: bool = False, + delete_protected_media: bool = False, ) -> Tuple[List[str], int]: """ Delete local or remote media from this server by size and timestamp. Removes @@ -896,18 +988,22 @@ class MediaRepository: Args: before_ts: Unix timestamp in ms. - Files that were last used before this timestamp will be deleted - size_gt: Size of the media in bytes. Files that are larger will be deleted + Files that were last used before this timestamp will be deleted. + size_gt: Size of the media in bytes. Files that are larger will be deleted. keep_profiles: Switch to delete also files that are still used in image data - (e.g user profile, room avatar) - If false these files will be deleted + (e.g user profile, room avatar). If false these files will be deleted. + delete_quarantined_media: If True, media marked as quarantined will be deleted. + delete_protected_media: If True, media marked as protected will be deleted. + Returns: A tuple of (list of deleted media IDs, total deleted media IDs). """ - old_media = await self.store.get_local_media_before( + old_media = await self.store.get_local_media_ids( before_ts, size_gt, keep_profiles, + include_quarantined_media=delete_quarantined_media, + include_protected_media=delete_protected_media, ) return await self._remove_local_media_from_disk(old_media) diff --git a/synapse/rest/media/v1/preview_html.py b/synapse/rest/media/v1/preview_html.py index ca73965f..ed8f21a4 100644 --- a/synapse/rest/media/v1/preview_html.py +++ b/synapse/rest/media/v1/preview_html.py @@ -30,6 +30,9 @@ _xml_encoding_match = re.compile( ) _content_type_match = re.compile(r'.*; *charset="?(.*?)"?(;|$)', flags=re.I) +# Certain elements aren't meant for display. +ARIA_ROLES_TO_IGNORE = {"directory", "menu", "menubar", "toolbar"} + def _normalise_encoding(encoding: str) -> Optional[str]: """Use the Python codec's name as the normalised entry.""" @@ -174,13 +177,15 @@ def parse_html_to_open_graph(tree: "etree.Element") -> Dict[str, Optional[str]]: # "og:video:secure_url": "https://www.youtube.com/v/LXDBoHyjmtw?version=3", og: Dict[str, Optional[str]] = {} - for tag in tree.xpath("//*/meta[starts-with(@property, 'og:')]"): - if "content" in tag.attrib: - # if we've got more than 50 tags, someone is taking the piss - if len(og) >= 50: - logger.warning("Skipping OG for page with too many 'og:' tags") - return {} - og[tag.attrib["property"]] = tag.attrib["content"] + for tag in tree.xpath( + "//*/meta[starts-with(@property, 'og:')][@content][not(@content='')]" + ): + # if we've got more than 50 tags, someone is taking the piss + if len(og) >= 50: + logger.warning("Skipping OG for page with too many 'og:' tags") + return {} + + og[tag.attrib["property"]] = tag.attrib["content"] # TODO: grab article: meta tags too, e.g.: @@ -192,21 +197,23 @@ def parse_html_to_open_graph(tree: "etree.Element") -> Dict[str, Optional[str]]: # "article:modified_time" content="2016-04-01T18:31:53+00:00" /> if "og:title" not in og: - # do some basic spidering of the HTML - title = tree.xpath("(//title)[1] | (//h1)[1] | (//h2)[1] | (//h3)[1]") - if title and title[0].text is not None: - og["og:title"] = title[0].text.strip() + # Attempt to find a title from the title tag, or the biggest header on the page. + title = tree.xpath("((//title)[1] | (//h1)[1] | (//h2)[1] | (//h3)[1])/text()") + if title: + og["og:title"] = title[0].strip() else: og["og:title"] = None if "og:image" not in og: - # TODO: extract a favicon failing all else meta_image = tree.xpath( - "//*/meta[translate(@itemprop, 'IMAGE', 'image')='image']/@content" + "//*/meta[translate(@itemprop, 'IMAGE', 'image')='image'][not(@content='')]/@content[1]" ) + # If a meta image is found, use it. if meta_image: og["og:image"] = meta_image[0] else: + # Try to find images which are larger than 10px by 10px. + # # TODO: consider inlined CSS styles as well as width & height attribs images = tree.xpath("//img[@src][number(@width)>10][number(@height)>10]") images = sorted( @@ -215,17 +222,24 @@ def parse_html_to_open_graph(tree: "etree.Element") -> Dict[str, Optional[str]]: -1 * float(i.attrib["width"]) * float(i.attrib["height"]) ), ) + # If no images were found, try to find *any* images. if not images: - images = tree.xpath("//img[@src]") + images = tree.xpath("//img[@src][1]") if images: og["og:image"] = images[0].attrib["src"] + # Finally, fallback to the favicon if nothing else. + else: + favicons = tree.xpath("//link[@href][contains(@rel, 'icon')]/@href[1]") + if favicons: + og["og:image"] = favicons[0] + if "og:description" not in og: + # Check the first meta description tag for content. meta_description = tree.xpath( - "//*/meta" - "[translate(@name, 'DESCRIPTION', 'description')='description']" - "/@content" + "//*/meta[translate(@name, 'DESCRIPTION', 'description')='description'][not(@content='')]/@content[1]" ) + # If a meta description is found with content, use it. if meta_description: og["og:description"] = meta_description[0] else: @@ -246,7 +260,9 @@ def parse_html_description(tree: "etree.Element") -> Optional[str]: Grabs any text nodes which are inside the <body/> tag, unless they are within an HTML5 semantic markup tag (<header/>, <nav/>, <aside/>, <footer/>), or - if they are within a <script/> or <style/> tag. + if they are within a <script/>, <svg/> or <style/> tag, or if they are within + a tag whose content is usually only shown to old browsers + (<iframe/>, <video/>, <canvas/>, <picture/>). This is a very very very coarse approximation to a plain text render of the page. @@ -268,6 +284,12 @@ def parse_html_description(tree: "etree.Element") -> Optional[str]: "script", "noscript", "style", + "svg", + "iframe", + "video", + "canvas", + "img", + "picture", etree.Comment, ) @@ -281,7 +303,7 @@ def parse_html_description(tree: "etree.Element") -> Optional[str]: def _iterate_over_text( - tree: "etree.Element", *tags_to_ignore: Iterable[Union[str, "etree.Comment"]] + tree: "etree.Element", *tags_to_ignore: Union[str, "etree.Comment"] ) -> Generator[str, None, None]: """Iterate over the tree returning text nodes in a depth first fashion, skipping text nodes inside certain tags. @@ -298,6 +320,10 @@ def _iterate_over_text( if isinstance(el, str): yield el elif el.tag not in tags_to_ignore: + # If the element isn't meant for display, ignore it. + if el.get("role") in ARIA_ROLES_TO_IGNORE: + continue + # el.text is the text before the first child, so we can immediately # return it if the text exists. if el.text: diff --git a/synapse/rest/media/v1/preview_url_resource.py b/synapse/rest/media/v1/preview_url_resource.py index 50383bdb..54a849ea 100644 --- a/synapse/rest/media/v1/preview_url_resource.py +++ b/synapse/rest/media/v1/preview_url_resource.py @@ -586,12 +586,16 @@ class PreviewUrlResource(DirectServeJsonResource): og: The Open Graph dictionary. This is modified with image information. """ # If there's no image or it is blank, there's nothing to do. - if "og:image" not in og or not og["og:image"]: + if "og:image" not in og: + return + + # Remove the raw image URL, this will be replaced with an MXC URL, if successful. + image_url = og.pop("og:image") + if not image_url: return # The image URL from the HTML might be relative to the previewed page, # convert it to an URL which can be requested directly. - image_url = og["og:image"] url_parts = urlparse(image_url) if url_parts.scheme != "data": image_url = urljoin(media_info.uri, image_url) @@ -599,7 +603,16 @@ class PreviewUrlResource(DirectServeJsonResource): # FIXME: it might be cleaner to use the same flow as the main /preview_url # request itself and benefit from the same caching etc. But for now we # just rely on the caching on the master request to speed things up. - image_info = await self._handle_url(image_url, user, allow_data_urls=True) + try: + image_info = await self._handle_url(image_url, user, allow_data_urls=True) + except Exception as e: + # Pre-caching the image failed, don't block the entire URL preview. + logger.warning( + "Pre-caching image failed during URL preview: %s errored with %s", + image_url, + e, + ) + return if _is_media(image_info.media_type): # TODO: make sure we don't choke on white-on-transparent images @@ -611,13 +624,11 @@ class PreviewUrlResource(DirectServeJsonResource): og["og:image:width"] = dims["width"] og["og:image:height"] = dims["height"] else: - logger.warning("Couldn't get dims for %s", og["og:image"]) + logger.warning("Couldn't get dims for %s", image_url) og["og:image"] = f"mxc://{self.server_name}/{image_info.filesystem_id}" og["og:image:type"] = image_info.media_type og["matrix:image:size"] = image_info.media_length - else: - del og["og:image"] async def _handle_oembed_response( self, url: str, media_info: MediaInfo, expiration_ms: int @@ -668,7 +679,7 @@ class PreviewUrlResource(DirectServeJsonResource): logger.debug("Running url preview cache expiry") if not (await self.store.db_pool.updates.has_completed_background_updates()): - logger.info("Still running DB updates; skipping expiry") + logger.debug("Still running DB updates; skipping url preview cache expiry") return def try_remove_parent_dirs(dirs: Iterable[str]) -> None: @@ -688,7 +699,9 @@ class PreviewUrlResource(DirectServeJsonResource): # Failed, skip deleting the rest of the parent dirs if e.errno != errno.ENOTEMPTY: logger.warning( - "Failed to remove media directory: %r: %s", dir, e + "Failed to remove media directory while clearing url preview cache: %r: %s", + dir, + e, ) break @@ -703,7 +716,11 @@ class PreviewUrlResource(DirectServeJsonResource): except FileNotFoundError: pass # If the path doesn't exist, meh except OSError as e: - logger.warning("Failed to remove media: %r: %s", media_id, e) + logger.warning( + "Failed to remove media while clearing url preview cache: %r: %s", + media_id, + e, + ) continue removed_media.append(media_id) @@ -714,9 +731,11 @@ class PreviewUrlResource(DirectServeJsonResource): await self.store.delete_url_cache(removed_media) if removed_media: - logger.info("Deleted %d entries from url cache", len(removed_media)) + logger.debug( + "Deleted %d entries from url preview cache", len(removed_media) + ) else: - logger.debug("No entries removed from url cache") + logger.debug("No entries removed from url preview cache") # Now we delete old images associated with the url cache. # These may be cached for a bit on the client (i.e., they @@ -733,7 +752,9 @@ class PreviewUrlResource(DirectServeJsonResource): except FileNotFoundError: pass # If the path doesn't exist, meh except OSError as e: - logger.warning("Failed to remove media: %r: %s", media_id, e) + logger.warning( + "Failed to remove media from url preview cache: %r: %s", media_id, e + ) continue dirs = self.filepaths.url_cache_filepath_dirs_to_delete(media_id) @@ -745,7 +766,9 @@ class PreviewUrlResource(DirectServeJsonResource): except FileNotFoundError: pass # If the path doesn't exist, meh except OSError as e: - logger.warning("Failed to remove media: %r: %s", media_id, e) + logger.warning( + "Failed to remove media from url preview cache: %r: %s", media_id, e + ) continue removed_media.append(media_id) @@ -758,9 +781,9 @@ class PreviewUrlResource(DirectServeJsonResource): await self.store.delete_url_cache_media(removed_media) if removed_media: - logger.info("Deleted %d media from url cache", len(removed_media)) + logger.debug("Deleted %d media from url preview cache", len(removed_media)) else: - logger.debug("No media removed from url cache") + logger.debug("No media removed from url preview cache") def _is_media(content_type: str) -> bool: diff --git a/synapse/rest/media/v1/thumbnailer.py b/synapse/rest/media/v1/thumbnailer.py index 390491eb..9b93b9b4 100644 --- a/synapse/rest/media/v1/thumbnailer.py +++ b/synapse/rest/media/v1/thumbnailer.py @@ -14,7 +14,8 @@ # limitations under the License. import logging from io import BytesIO -from typing import Tuple +from types import TracebackType +from typing import Optional, Tuple, Type from PIL import Image @@ -45,6 +46,9 @@ class Thumbnailer: Image.MAX_IMAGE_PIXELS = max_image_pixels def __init__(self, input_path: str): + # Have we closed the image? + self._closed = False + try: self.image = Image.open(input_path) except OSError as e: @@ -89,7 +93,8 @@ class Thumbnailer: # Safety: `transpose` takes an int rather than e.g. an IntEnum. # self.transpose_method is set above to be a value in # EXIF_TRANSPOSE_MAPPINGS, and that only contains correct values. - self.image = self.image.transpose(self.transpose_method) # type: ignore[arg-type] + with self.image: + self.image = self.image.transpose(self.transpose_method) # type: ignore[arg-type] self.width, self.height = self.image.size self.transpose_method = None # We don't need EXIF any more @@ -122,9 +127,11 @@ class Thumbnailer: # If the image has transparency, use RGBA instead. if self.image.mode in ["1", "L", "P"]: if self.image.info.get("transparency", None) is not None: - self.image = self.image.convert("RGBA") + with self.image: + self.image = self.image.convert("RGBA") else: - self.image = self.image.convert("RGB") + with self.image: + self.image = self.image.convert("RGB") return self.image.resize((width, height), Image.ANTIALIAS) def scale(self, width: int, height: int, output_type: str) -> BytesIO: @@ -133,8 +140,8 @@ class Thumbnailer: Returns: BytesIO: the bytes of the encoded image ready to be written to disk """ - scaled = self._resize(width, height) - return self._encode_image(scaled, output_type) + with self._resize(width, height) as scaled: + return self._encode_image(scaled, output_type) def crop(self, width: int, height: int, output_type: str) -> BytesIO: """Rescales and crops the image to the given dimensions preserving @@ -151,18 +158,21 @@ class Thumbnailer: BytesIO: the bytes of the encoded image ready to be written to disk """ if width * self.height > height * self.width: + scaled_width = width scaled_height = (width * self.height) // self.width - scaled_image = self._resize(width, scaled_height) crop_top = (scaled_height - height) // 2 crop_bottom = height + crop_top - cropped = scaled_image.crop((0, crop_top, width, crop_bottom)) + crop = (0, crop_top, width, crop_bottom) else: scaled_width = (height * self.width) // self.height - scaled_image = self._resize(scaled_width, height) + scaled_height = height crop_left = (scaled_width - width) // 2 crop_right = width + crop_left - cropped = scaled_image.crop((crop_left, 0, crop_right, height)) - return self._encode_image(cropped, output_type) + crop = (crop_left, 0, crop_right, height) + + with self._resize(scaled_width, scaled_height) as scaled_image: + with scaled_image.crop(crop) as cropped: + return self._encode_image(cropped, output_type) def _encode_image(self, output_image: Image.Image, output_type: str) -> BytesIO: output_bytes_io = BytesIO() @@ -171,3 +181,42 @@ class Thumbnailer: output_image = output_image.convert("RGB") output_image.save(output_bytes_io, fmt, quality=80) return output_bytes_io + + def close(self) -> None: + """Closes the underlying image file. + + Once closed no other functions can be called. + + Can be called multiple times. + """ + + if self._closed: + return + + self._closed = True + + # Since we run this on the finalizer then we need to handle `__init__` + # raising an exception before it can define `self.image`. + image = getattr(self, "image", None) + if image is None: + return + + image.close() + + def __enter__(self) -> "Thumbnailer": + """Make `Thumbnailer` a context manager that calls `close` on + `__exit__`. + """ + return self + + def __exit__( + self, + type: Optional[Type[BaseException]], + value: Optional[BaseException], + traceback: Optional[TracebackType], + ) -> None: + self.close() + + def __del__(self) -> None: + # Make sure we actually do close the image, rather than leak data. + self.close() |