summaryrefslogtreecommitdiff
path: root/tests/rest/media
diff options
context:
space:
mode:
authorAndrej Shadura <andrewsh@debian.org>2022-06-19 15:20:00 +0200
committerAndrej Shadura <andrewsh@debian.org>2022-06-19 15:20:00 +0200
commit6dc64c92c6991f09910f3e6db368e6eeb4b1981e (patch)
treed8bab73ee460e0a96bbda9c5988d8025dbbe2eb3 /tests/rest/media
parentc2d3cd76c24f663449bfa209ac920305f0501d3a (diff)
New upstream version 1.61.0
Diffstat (limited to 'tests/rest/media')
-rw-r--r--tests/rest/media/test_media_retention.py321
-rw-r--r--tests/rest/media/v1/test_html_preview.py37
-rw-r--r--tests/rest/media/v1/test_url_preview.py35
3 files changed, 392 insertions, 1 deletions
diff --git a/tests/rest/media/test_media_retention.py b/tests/rest/media/test_media_retention.py
new file mode 100644
index 00000000..14af07c5
--- /dev/null
+++ b/tests/rest/media/test_media_retention.py
@@ -0,0 +1,321 @@
+# Copyright 2022 The Matrix.org Foundation C.I.C.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import io
+from typing import Iterable, Optional, Tuple
+
+from twisted.test.proto_helpers import MemoryReactor
+
+from synapse.rest import admin
+from synapse.rest.client import login, register, room
+from synapse.server import HomeServer
+from synapse.types import UserID
+from synapse.util import Clock
+
+from tests import unittest
+from tests.unittest import override_config
+from tests.utils import MockClock
+
+
+class MediaRetentionTestCase(unittest.HomeserverTestCase):
+
+ ONE_DAY_IN_MS = 24 * 60 * 60 * 1000
+ THIRTY_DAYS_IN_MS = 30 * ONE_DAY_IN_MS
+
+ servlets = [
+ room.register_servlets,
+ login.register_servlets,
+ register.register_servlets,
+ admin.register_servlets_for_client_rest_resource,
+ ]
+
+ def make_homeserver(self, reactor: MemoryReactor, clock: Clock) -> HomeServer:
+ # We need to be able to test advancing time in the homeserver, so we
+ # replace the test homeserver's default clock with a MockClock, which
+ # supports advancing time.
+ return self.setup_test_homeserver(clock=MockClock())
+
+ def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
+ self.remote_server_name = "remote.homeserver"
+ self.store = hs.get_datastores().main
+
+ # Create a user to upload media with
+ test_user_id = self.register_user("alice", "password")
+
+ # Inject media (recently accessed, old access, never accessed, old access
+ # quarantined media) into both the local store and the remote cache, plus
+ # one additional local media that is marked as protected from quarantine.
+ media_repository = hs.get_media_repository()
+ test_media_content = b"example string"
+
+ def _create_media_and_set_attributes(
+ last_accessed_ms: Optional[int],
+ is_quarantined: Optional[bool] = False,
+ is_protected: Optional[bool] = False,
+ ) -> str:
+ # "Upload" some media to the local media store
+ mxc_uri = self.get_success(
+ media_repository.create_content(
+ media_type="text/plain",
+ upload_name=None,
+ content=io.BytesIO(test_media_content),
+ content_length=len(test_media_content),
+ auth_user=UserID.from_string(test_user_id),
+ )
+ )
+
+ media_id = mxc_uri.split("/")[-1]
+
+ # Set the last recently accessed time for this media
+ if last_accessed_ms is not None:
+ self.get_success(
+ self.store.update_cached_last_access_time(
+ local_media=(media_id,),
+ remote_media=(),
+ time_ms=last_accessed_ms,
+ )
+ )
+
+ if is_quarantined:
+ # Mark this media as quarantined
+ self.get_success(
+ self.store.quarantine_media_by_id(
+ server_name=self.hs.config.server.server_name,
+ media_id=media_id,
+ quarantined_by="@theadmin:test",
+ )
+ )
+
+ if is_protected:
+ # Mark this media as protected from quarantine
+ self.get_success(
+ self.store.mark_local_media_as_safe(
+ media_id=media_id,
+ safe=True,
+ )
+ )
+
+ return media_id
+
+ def _cache_remote_media_and_set_attributes(
+ media_id: str,
+ last_accessed_ms: Optional[int],
+ is_quarantined: Optional[bool] = False,
+ ) -> str:
+ # Pretend to cache some remote media
+ self.get_success(
+ self.store.store_cached_remote_media(
+ origin=self.remote_server_name,
+ media_id=media_id,
+ media_type="text/plain",
+ media_length=1,
+ time_now_ms=clock.time_msec(),
+ upload_name="testfile.txt",
+ filesystem_id="abcdefg12345",
+ )
+ )
+
+ # Set the last recently accessed time for this media
+ if last_accessed_ms is not None:
+ self.get_success(
+ hs.get_datastores().main.update_cached_last_access_time(
+ local_media=(),
+ remote_media=((self.remote_server_name, media_id),),
+ time_ms=last_accessed_ms,
+ )
+ )
+
+ if is_quarantined:
+ # Mark this media as quarantined
+ self.get_success(
+ self.store.quarantine_media_by_id(
+ server_name=self.remote_server_name,
+ media_id=media_id,
+ quarantined_by="@theadmin:test",
+ )
+ )
+
+ return media_id
+
+ # Start with the local media store
+ self.local_recently_accessed_media = _create_media_and_set_attributes(
+ last_accessed_ms=self.THIRTY_DAYS_IN_MS,
+ )
+ self.local_not_recently_accessed_media = _create_media_and_set_attributes(
+ last_accessed_ms=self.ONE_DAY_IN_MS,
+ )
+ self.local_not_recently_accessed_quarantined_media = (
+ _create_media_and_set_attributes(
+ last_accessed_ms=self.ONE_DAY_IN_MS,
+ is_quarantined=True,
+ )
+ )
+ self.local_not_recently_accessed_protected_media = (
+ _create_media_and_set_attributes(
+ last_accessed_ms=self.ONE_DAY_IN_MS,
+ is_protected=True,
+ )
+ )
+ self.local_never_accessed_media = _create_media_and_set_attributes(
+ last_accessed_ms=None,
+ )
+
+ # And now the remote media store
+ self.remote_recently_accessed_media = _cache_remote_media_and_set_attributes(
+ media_id="a",
+ last_accessed_ms=self.THIRTY_DAYS_IN_MS,
+ )
+ self.remote_not_recently_accessed_media = (
+ _cache_remote_media_and_set_attributes(
+ media_id="b",
+ last_accessed_ms=self.ONE_DAY_IN_MS,
+ )
+ )
+ self.remote_not_recently_accessed_quarantined_media = (
+ _cache_remote_media_and_set_attributes(
+ media_id="c",
+ last_accessed_ms=self.ONE_DAY_IN_MS,
+ is_quarantined=True,
+ )
+ )
+ # Remote media will always have a "last accessed" attribute, as it would not
+ # be fetched from the remote homeserver unless instigated by a user.
+
+ @override_config(
+ {
+ "media_retention": {
+ # Enable retention for local media
+ "local_media_lifetime": "30d"
+ # Cached remote media should not be purged
+ }
+ }
+ )
+ def test_local_media_retention(self) -> None:
+ """
+ Tests that local media that have not been accessed recently is purged, while
+ cached remote media is unaffected.
+ """
+ # Advance 31 days (in seconds)
+ self.reactor.advance(31 * 24 * 60 * 60)
+
+ # Check that media has been correctly purged.
+ # Local media accessed <30 days ago should still exist.
+ # Remote media should be unaffected.
+ self._assert_if_mxc_uris_purged(
+ purged=[
+ (
+ self.hs.config.server.server_name,
+ self.local_not_recently_accessed_media,
+ ),
+ (self.hs.config.server.server_name, self.local_never_accessed_media),
+ ],
+ not_purged=[
+ (self.hs.config.server.server_name, self.local_recently_accessed_media),
+ (
+ self.hs.config.server.server_name,
+ self.local_not_recently_accessed_quarantined_media,
+ ),
+ (
+ self.hs.config.server.server_name,
+ self.local_not_recently_accessed_protected_media,
+ ),
+ (self.remote_server_name, self.remote_recently_accessed_media),
+ (self.remote_server_name, self.remote_not_recently_accessed_media),
+ (
+ self.remote_server_name,
+ self.remote_not_recently_accessed_quarantined_media,
+ ),
+ ],
+ )
+
+ @override_config(
+ {
+ "media_retention": {
+ # Enable retention for cached remote media
+ "remote_media_lifetime": "30d"
+ # Local media should not be purged
+ }
+ }
+ )
+ def test_remote_media_cache_retention(self) -> None:
+ """
+ Tests that entries from the remote media cache that have not been accessed
+ recently is purged, while local media is unaffected.
+ """
+ # Advance 31 days (in seconds)
+ self.reactor.advance(31 * 24 * 60 * 60)
+
+ # Check that media has been correctly purged.
+ # Local media should be unaffected.
+ # Remote media accessed <30 days ago should still exist.
+ self._assert_if_mxc_uris_purged(
+ purged=[
+ (self.remote_server_name, self.remote_not_recently_accessed_media),
+ ],
+ not_purged=[
+ (self.remote_server_name, self.remote_recently_accessed_media),
+ (self.hs.config.server.server_name, self.local_recently_accessed_media),
+ (
+ self.hs.config.server.server_name,
+ self.local_not_recently_accessed_media,
+ ),
+ (
+ self.hs.config.server.server_name,
+ self.local_not_recently_accessed_quarantined_media,
+ ),
+ (
+ self.hs.config.server.server_name,
+ self.local_not_recently_accessed_protected_media,
+ ),
+ (
+ self.remote_server_name,
+ self.remote_not_recently_accessed_quarantined_media,
+ ),
+ (self.hs.config.server.server_name, self.local_never_accessed_media),
+ ],
+ )
+
+ def _assert_if_mxc_uris_purged(
+ self, purged: Iterable[Tuple[str, str]], not_purged: Iterable[Tuple[str, str]]
+ ) -> None:
+ def _assert_mxc_uri_purge_state(
+ server_name: str, media_id: str, expect_purged: bool
+ ) -> None:
+ """Given an MXC URI, assert whether it has been purged or not."""
+ if server_name == self.hs.config.server.server_name:
+ found_media_dict = self.get_success(
+ self.store.get_local_media(media_id)
+ )
+ else:
+ found_media_dict = self.get_success(
+ self.store.get_cached_remote_media(server_name, media_id)
+ )
+
+ mxc_uri = f"mxc://{server_name}/{media_id}"
+
+ if expect_purged:
+ self.assertIsNone(
+ found_media_dict, msg=f"{mxc_uri} unexpectedly not purged"
+ )
+ else:
+ self.assertIsNotNone(
+ found_media_dict,
+ msg=f"{mxc_uri} unexpectedly purged",
+ )
+
+ # Assert that the given MXC URIs have either been correctly purged or not.
+ for server_name, media_id in purged:
+ _assert_mxc_uri_purge_state(server_name, media_id, expect_purged=True)
+ for server_name, media_id in not_purged:
+ _assert_mxc_uri_purge_state(server_name, media_id, expect_purged=False)
diff --git a/tests/rest/media/v1/test_html_preview.py b/tests/rest/media/v1/test_html_preview.py
index 62e30881..ea9e5889 100644
--- a/tests/rest/media/v1/test_html_preview.py
+++ b/tests/rest/media/v1/test_html_preview.py
@@ -145,7 +145,7 @@ class SummarizeTestCase(unittest.TestCase):
)
-class CalcOgTestCase(unittest.TestCase):
+class OpenGraphFromHtmlTestCase(unittest.TestCase):
if not lxml:
skip = "url preview feature requires lxml"
@@ -235,6 +235,21 @@ class CalcOgTestCase(unittest.TestCase):
self.assertEqual(og, {"og:title": None, "og:description": "Some text."})
+ # Another variant is a title with no content.
+ html = b"""
+ <html>
+ <head><title></title></head>
+ <body>
+ <h1>Title</h1>
+ </body>
+ </html>
+ """
+
+ tree = decode_body(html, "http://example.com/test.html")
+ og = parse_html_to_open_graph(tree)
+
+ self.assertEqual(og, {"og:title": "Title", "og:description": "Title"})
+
def test_h1_as_title(self) -> None:
html = b"""
<html>
@@ -250,6 +265,26 @@ class CalcOgTestCase(unittest.TestCase):
self.assertEqual(og, {"og:title": "Title", "og:description": "Some text."})
+ def test_empty_description(self) -> None:
+ """Description tags with empty content should be ignored."""
+ html = b"""
+ <html>
+ <meta property="og:description" content=""/>
+ <meta property="og:description"/>
+ <meta name="description" content=""/>
+ <meta name="description"/>
+ <meta name="description" content="Finally!"/>
+ <body>
+ <h1>Title</h1>
+ </body>
+ </html>
+ """
+
+ tree = decode_body(html, "http://example.com/test.html")
+ og = parse_html_to_open_graph(tree)
+
+ self.assertEqual(og, {"og:title": "Title", "og:description": "Finally!"})
+
def test_missing_title_and_broken_h1(self) -> None:
html = b"""
<html>
diff --git a/tests/rest/media/v1/test_url_preview.py b/tests/rest/media/v1/test_url_preview.py
index 3b24d0ac..2c321f8d 100644
--- a/tests/rest/media/v1/test_url_preview.py
+++ b/tests/rest/media/v1/test_url_preview.py
@@ -656,6 +656,41 @@ class URLPreviewTests(unittest.HomeserverTestCase):
server.data,
)
+ def test_nonexistent_image(self) -> None:
+ """If the preview image doesn't exist, ensure some data is returned."""
+ self.lookups["matrix.org"] = [(IPv4Address, "10.1.2.3")]
+
+ end_content = (
+ b"""<html><body><img src="http://cdn.matrix.org/foo.jpg"></body></html>"""
+ )
+
+ channel = self.make_request(
+ "GET",
+ "preview_url?url=http://matrix.org",
+ shorthand=False,
+ await_result=False,
+ )
+ self.pump()
+
+ client = self.reactor.tcpClients[0][2].buildProtocol(None)
+ server = AccumulatingProtocol()
+ server.makeConnection(FakeTransport(client, self.reactor))
+ client.makeConnection(FakeTransport(server, self.reactor))
+ client.dataReceived(
+ (
+ b"HTTP/1.0 200 OK\r\nContent-Length: %d\r\n"
+ b'Content-Type: text/html; charset="utf8"\r\n\r\n'
+ )
+ % (len(end_content),)
+ + end_content
+ )
+
+ self.pump()
+ self.assertEqual(channel.code, 200)
+
+ # The image should not be in the result.
+ self.assertNotIn("og:image", channel.json_body)
+
def test_data_url(self) -> None:
"""
Requesting to preview a data URL is not supported.