summaryrefslogtreecommitdiff
path: root/synapse/storage/databases
diff options
context:
space:
mode:
authorAndrej Shadura <andrewsh@debian.org>2020-08-19 21:17:45 +0200
committerAndrej Shadura <andrewsh@debian.org>2020-08-19 21:17:45 +0200
commit84ab8cd16f9d3f03971b5fbb5e8b89480f15c1f5 (patch)
tree3321b710a139c23925928ca2a7f147173959e765 /synapse/storage/databases
parente59dea7b4cbe3528d78b694713505a5da00996b1 (diff)
New upstream version 1.19.0
Diffstat (limited to 'synapse/storage/databases')
-rw-r--r--synapse/storage/databases/__init__.py101
-rw-r--r--synapse/storage/databases/main/__init__.py596
-rw-r--r--synapse/storage/databases/main/account_data.py438
-rw-r--r--synapse/storage/databases/main/appservice.py366
-rw-r--r--synapse/storage/databases/main/cache.py306
-rw-r--r--synapse/storage/databases/main/censor_events.py207
-rw-r--r--synapse/storage/databases/main/client_ips.py573
-rw-r--r--synapse/storage/databases/main/deviceinbox.py479
-rw-r--r--synapse/storage/databases/main/devices.py1339
-rw-r--r--synapse/storage/databases/main/directory.py194
-rw-r--r--synapse/storage/databases/main/e2e_room_keys.py436
-rw-r--r--synapse/storage/databases/main/end_to_end_keys.py750
-rw-r--r--synapse/storage/databases/main/event_federation.py714
-rw-r--r--synapse/storage/databases/main/event_push_actions.py884
-rw-r--r--synapse/storage/databases/main/events.py1481
-rw-r--r--synapse/storage/databases/main/events_bg_updates.py585
-rw-r--r--synapse/storage/databases/main/events_worker.py1370
-rw-r--r--synapse/storage/databases/main/filtering.py74
-rw-r--r--synapse/storage/databases/main/group_server.py1288
-rw-r--r--synapse/storage/databases/main/keys.py210
-rw-r--r--synapse/storage/databases/main/media_repository.py398
-rw-r--r--synapse/storage/databases/main/metrics.py122
-rw-r--r--synapse/storage/databases/main/monthly_active_users.py354
-rw-r--r--synapse/storage/databases/main/openid.py33
-rw-r--r--synapse/storage/databases/main/presence.py183
-rw-r--r--synapse/storage/databases/main/profile.py173
-rw-r--r--synapse/storage/databases/main/purge_events.py400
-rw-r--r--synapse/storage/databases/main/push_rule.py691
-rw-r--r--synapse/storage/databases/main/pusher.py356
-rw-r--r--synapse/storage/databases/main/receipts.py590
-rw-r--r--synapse/storage/databases/main/registration.py1565
-rw-r--r--synapse/storage/databases/main/rejections.py31
-rw-r--r--synapse/storage/databases/main/relations.py328
-rw-r--r--synapse/storage/databases/main/room.py1429
-rw-r--r--synapse/storage/databases/main/roommember.py1072
-rw-r--r--synapse/storage/databases/main/schema/delta/12/v12.sql63
-rw-r--r--synapse/storage/databases/main/schema/delta/13/v13.sql19
-rw-r--r--synapse/storage/databases/main/schema/delta/14/v14.sql23
-rw-r--r--synapse/storage/databases/main/schema/delta/15/appservice_txns.sql31
-rw-r--r--synapse/storage/databases/main/schema/delta/15/presence_indices.sql2
-rw-r--r--synapse/storage/databases/main/schema/delta/15/v15.sql24
-rw-r--r--synapse/storage/databases/main/schema/delta/16/events_order_index.sql4
-rw-r--r--synapse/storage/databases/main/schema/delta/16/remote_media_cache_index.sql2
-rw-r--r--synapse/storage/databases/main/schema/delta/16/remove_duplicates.sql9
-rw-r--r--synapse/storage/databases/main/schema/delta/16/room_alias_index.sql3
-rw-r--r--synapse/storage/databases/main/schema/delta/16/unique_constraints.sql72
-rw-r--r--synapse/storage/databases/main/schema/delta/16/users.sql56
-rw-r--r--synapse/storage/databases/main/schema/delta/17/drop_indexes.sql18
-rw-r--r--synapse/storage/databases/main/schema/delta/17/server_keys.sql24
-rw-r--r--synapse/storage/databases/main/schema/delta/17/user_threepids.sql9
-rw-r--r--synapse/storage/databases/main/schema/delta/18/server_keys_bigger_ints.sql32
-rw-r--r--synapse/storage/databases/main/schema/delta/19/event_index.sql19
-rw-r--r--synapse/storage/databases/main/schema/delta/20/dummy.sql1
-rw-r--r--synapse/storage/databases/main/schema/delta/20/pushers.py88
-rw-r--r--synapse/storage/databases/main/schema/delta/21/end_to_end_keys.sql34
-rw-r--r--synapse/storage/databases/main/schema/delta/21/receipts.sql38
-rw-r--r--synapse/storage/databases/main/schema/delta/22/receipts_index.sql22
-rw-r--r--synapse/storage/databases/main/schema/delta/22/user_threepids_unique.sql19
-rw-r--r--synapse/storage/databases/main/schema/delta/24/stats_reporting.sql18
-rw-r--r--synapse/storage/databases/main/schema/delta/25/fts.py80
-rw-r--r--synapse/storage/databases/main/schema/delta/25/guest_access.sql25
-rw-r--r--synapse/storage/databases/main/schema/delta/25/history_visibility.sql25
-rw-r--r--synapse/storage/databases/main/schema/delta/25/tags.sql38
-rw-r--r--synapse/storage/databases/main/schema/delta/26/account_data.sql17
-rw-r--r--synapse/storage/databases/main/schema/delta/27/account_data.sql36
-rw-r--r--synapse/storage/databases/main/schema/delta/27/forgotten_memberships.sql26
-rw-r--r--synapse/storage/databases/main/schema/delta/27/ts.py59
-rw-r--r--synapse/storage/databases/main/schema/delta/28/event_push_actions.sql27
-rw-r--r--synapse/storage/databases/main/schema/delta/28/events_room_stream.sql20
-rw-r--r--synapse/storage/databases/main/schema/delta/28/public_roms_index.sql20
-rw-r--r--synapse/storage/databases/main/schema/delta/28/receipts_user_id_index.sql22
-rw-r--r--synapse/storage/databases/main/schema/delta/28/upgrade_times.sql21
-rw-r--r--synapse/storage/databases/main/schema/delta/28/users_is_guest.sql22
-rw-r--r--synapse/storage/databases/main/schema/delta/29/push_actions.sql35
-rw-r--r--synapse/storage/databases/main/schema/delta/30/alias_creator.sql16
-rw-r--r--synapse/storage/databases/main/schema/delta/30/as_users.py67
-rw-r--r--synapse/storage/databases/main/schema/delta/30/deleted_pushers.sql25
-rw-r--r--synapse/storage/databases/main/schema/delta/30/presence_stream.sql30
-rw-r--r--synapse/storage/databases/main/schema/delta/30/public_rooms.sql23
-rw-r--r--synapse/storage/databases/main/schema/delta/30/push_rule_stream.sql38
-rw-r--r--synapse/storage/databases/main/schema/delta/30/threepid_guest_access_tokens.sql24
-rw-r--r--synapse/storage/databases/main/schema/delta/31/invites.sql42
-rw-r--r--synapse/storage/databases/main/schema/delta/31/local_media_repository_url_cache.sql27
-rw-r--r--synapse/storage/databases/main/schema/delta/31/pushers.py87
-rw-r--r--synapse/storage/databases/main/schema/delta/31/pushers_index.sql22
-rw-r--r--synapse/storage/databases/main/schema/delta/31/search_update.py64
-rw-r--r--synapse/storage/databases/main/schema/delta/32/events.sql16
-rw-r--r--synapse/storage/databases/main/schema/delta/32/openid.sql9
-rw-r--r--synapse/storage/databases/main/schema/delta/32/pusher_throttle.sql23
-rw-r--r--synapse/storage/databases/main/schema/delta/32/remove_indices.sql33
-rw-r--r--synapse/storage/databases/main/schema/delta/32/reports.sql25
-rw-r--r--synapse/storage/databases/main/schema/delta/33/access_tokens_device_index.sql17
-rw-r--r--synapse/storage/databases/main/schema/delta/33/devices.sql21
-rw-r--r--synapse/storage/databases/main/schema/delta/33/devices_for_e2e_keys.sql19
-rw-r--r--synapse/storage/databases/main/schema/delta/33/devices_for_e2e_keys_clear_unknown_device.sql20
-rw-r--r--synapse/storage/databases/main/schema/delta/33/event_fields.py59
-rw-r--r--synapse/storage/databases/main/schema/delta/33/remote_media_ts.py30
-rw-r--r--synapse/storage/databases/main/schema/delta/33/user_ips_index.sql17
-rw-r--r--synapse/storage/databases/main/schema/delta/34/appservice_stream.sql23
-rw-r--r--synapse/storage/databases/main/schema/delta/34/cache_stream.py46
-rw-r--r--synapse/storage/databases/main/schema/delta/34/device_inbox.sql24
-rw-r--r--synapse/storage/databases/main/schema/delta/34/push_display_name_rename.sql20
-rw-r--r--synapse/storage/databases/main/schema/delta/34/received_txn_purge.py32
-rw-r--r--synapse/storage/databases/main/schema/delta/35/contains_url.sql17
-rw-r--r--synapse/storage/databases/main/schema/delta/35/device_outbox.sql39
-rw-r--r--synapse/storage/databases/main/schema/delta/35/device_stream_id.sql21
-rw-r--r--synapse/storage/databases/main/schema/delta/35/event_push_actions_index.sql17
-rw-r--r--synapse/storage/databases/main/schema/delta/35/public_room_list_change_stream.sql33
-rw-r--r--synapse/storage/databases/main/schema/delta/35/stream_order_to_extrem.sql37
-rw-r--r--synapse/storage/databases/main/schema/delta/36/readd_public_rooms.sql26
-rw-r--r--synapse/storage/databases/main/schema/delta/37/remove_auth_idx.py85
-rw-r--r--synapse/storage/databases/main/schema/delta/37/user_threepids.sql52
-rw-r--r--synapse/storage/databases/main/schema/delta/38/postgres_fts_gist.sql19
-rw-r--r--synapse/storage/databases/main/schema/delta/39/appservice_room_list.sql29
-rw-r--r--synapse/storage/databases/main/schema/delta/39/device_federation_stream_idx.sql16
-rw-r--r--synapse/storage/databases/main/schema/delta/39/event_push_index.sql17
-rw-r--r--synapse/storage/databases/main/schema/delta/39/federation_out_position.sql22
-rw-r--r--synapse/storage/databases/main/schema/delta/39/membership_profile.sql20
-rw-r--r--synapse/storage/databases/main/schema/delta/40/current_state_idx.sql17
-rw-r--r--synapse/storage/databases/main/schema/delta/40/device_inbox.sql21
-rw-r--r--synapse/storage/databases/main/schema/delta/40/device_list_streams.sql60
-rw-r--r--synapse/storage/databases/main/schema/delta/40/event_push_summary.sql37
-rw-r--r--synapse/storage/databases/main/schema/delta/40/pushers.sql39
-rw-r--r--synapse/storage/databases/main/schema/delta/41/device_list_stream_idx.sql17
-rw-r--r--synapse/storage/databases/main/schema/delta/41/device_outbound_index.sql16
-rw-r--r--synapse/storage/databases/main/schema/delta/41/event_search_event_id_idx.sql17
-rw-r--r--synapse/storage/databases/main/schema/delta/41/ratelimit.sql22
-rw-r--r--synapse/storage/databases/main/schema/delta/42/current_state_delta.sql26
-rw-r--r--synapse/storage/databases/main/schema/delta/42/device_list_last_id.sql33
-rw-r--r--synapse/storage/databases/main/schema/delta/42/event_auth_state_only.sql17
-rw-r--r--synapse/storage/databases/main/schema/delta/42/user_dir.py84
-rw-r--r--synapse/storage/databases/main/schema/delta/43/blocked_rooms.sql21
-rw-r--r--synapse/storage/databases/main/schema/delta/43/quarantine_media.sql17
-rw-r--r--synapse/storage/databases/main/schema/delta/43/url_cache.sql16
-rw-r--r--synapse/storage/databases/main/schema/delta/43/user_share.sql33
-rw-r--r--synapse/storage/databases/main/schema/delta/44/expire_url_cache.sql41
-rw-r--r--synapse/storage/databases/main/schema/delta/45/group_server.sql167
-rw-r--r--synapse/storage/databases/main/schema/delta/45/profile_cache.sql28
-rw-r--r--synapse/storage/databases/main/schema/delta/46/drop_refresh_tokens.sql17
-rw-r--r--synapse/storage/databases/main/schema/delta/46/drop_unique_deleted_pushers.sql35
-rw-r--r--synapse/storage/databases/main/schema/delta/46/group_server.sql32
-rw-r--r--synapse/storage/databases/main/schema/delta/46/local_media_repository_url_idx.sql24
-rw-r--r--synapse/storage/databases/main/schema/delta/46/user_dir_null_room_ids.sql35
-rw-r--r--synapse/storage/databases/main/schema/delta/46/user_dir_typos.sql24
-rw-r--r--synapse/storage/databases/main/schema/delta/47/last_access_media.sql16
-rw-r--r--synapse/storage/databases/main/schema/delta/47/postgres_fts_gin.sql17
-rw-r--r--synapse/storage/databases/main/schema/delta/47/push_actions_staging.sql28
-rw-r--r--synapse/storage/databases/main/schema/delta/48/add_user_consent.sql18
-rw-r--r--synapse/storage/databases/main/schema/delta/48/add_user_ips_last_seen_index.sql17
-rw-r--r--synapse/storage/databases/main/schema/delta/48/deactivated_users.sql25
-rw-r--r--synapse/storage/databases/main/schema/delta/48/group_unique_indexes.py63
-rw-r--r--synapse/storage/databases/main/schema/delta/48/groups_joinable.sql22
-rw-r--r--synapse/storage/databases/main/schema/delta/49/add_user_consent_server_notice_sent.sql20
-rw-r--r--synapse/storage/databases/main/schema/delta/49/add_user_daily_visits.sql21
-rw-r--r--synapse/storage/databases/main/schema/delta/49/add_user_ips_last_seen_only_index.sql17
-rw-r--r--synapse/storage/databases/main/schema/delta/50/add_creation_ts_users_index.sql19
-rw-r--r--synapse/storage/databases/main/schema/delta/50/erasure_store.sql21
-rw-r--r--synapse/storage/databases/main/schema/delta/50/make_event_content_nullable.py96
-rw-r--r--synapse/storage/databases/main/schema/delta/51/e2e_room_keys.sql39
-rw-r--r--synapse/storage/databases/main/schema/delta/51/monthly_active_users.sql27
-rw-r--r--synapse/storage/databases/main/schema/delta/52/add_event_to_state_group_index.sql19
-rw-r--r--synapse/storage/databases/main/schema/delta/52/device_list_streams_unique_idx.sql36
-rw-r--r--synapse/storage/databases/main/schema/delta/52/e2e_room_keys.sql53
-rw-r--r--synapse/storage/databases/main/schema/delta/53/add_user_type_to_users.sql19
-rw-r--r--synapse/storage/databases/main/schema/delta/53/drop_sent_transactions.sql16
-rw-r--r--synapse/storage/databases/main/schema/delta/53/event_format_version.sql16
-rw-r--r--synapse/storage/databases/main/schema/delta/53/user_dir_populate.sql30
-rw-r--r--synapse/storage/databases/main/schema/delta/53/user_ips_index.sql30
-rw-r--r--synapse/storage/databases/main/schema/delta/53/user_share.sql44
-rw-r--r--synapse/storage/databases/main/schema/delta/53/user_threepid_id.sql29
-rw-r--r--synapse/storage/databases/main/schema/delta/53/users_in_public_rooms.sql28
-rw-r--r--synapse/storage/databases/main/schema/delta/54/account_validity_with_renewal.sql30
-rw-r--r--synapse/storage/databases/main/schema/delta/54/add_validity_to_server_keys.sql23
-rw-r--r--synapse/storage/databases/main/schema/delta/54/delete_forward_extremities.sql23
-rw-r--r--synapse/storage/databases/main/schema/delta/54/drop_legacy_tables.sql30
-rw-r--r--synapse/storage/databases/main/schema/delta/54/drop_presence_list.sql16
-rw-r--r--synapse/storage/databases/main/schema/delta/54/relations.sql27
-rw-r--r--synapse/storage/databases/main/schema/delta/54/stats.sql80
-rw-r--r--synapse/storage/databases/main/schema/delta/54/stats2.sql28
-rw-r--r--synapse/storage/databases/main/schema/delta/55/access_token_expiry.sql18
-rw-r--r--synapse/storage/databases/main/schema/delta/55/track_threepid_validations.sql31
-rw-r--r--synapse/storage/databases/main/schema/delta/55/users_alter_deactivated.sql19
-rw-r--r--synapse/storage/databases/main/schema/delta/56/add_spans_to_device_lists.sql20
-rw-r--r--synapse/storage/databases/main/schema/delta/56/current_state_events_membership.sql22
-rw-r--r--synapse/storage/databases/main/schema/delta/56/current_state_events_membership_mk2.sql24
-rw-r--r--synapse/storage/databases/main/schema/delta/56/delete_keys_from_deleted_backups.sql25
-rw-r--r--synapse/storage/databases/main/schema/delta/56/destinations_failure_ts.sql25
-rw-r--r--synapse/storage/databases/main/schema/delta/56/destinations_retry_interval_type.sql.postgres18
-rw-r--r--synapse/storage/databases/main/schema/delta/56/device_stream_id_insert.sql20
-rw-r--r--synapse/storage/databases/main/schema/delta/56/devices_last_seen.sql24
-rw-r--r--synapse/storage/databases/main/schema/delta/56/drop_unused_event_tables.sql20
-rw-r--r--synapse/storage/databases/main/schema/delta/56/event_expiry.sql21
-rw-r--r--synapse/storage/databases/main/schema/delta/56/event_labels.sql30
-rw-r--r--synapse/storage/databases/main/schema/delta/56/event_labels_background_update.sql17
-rw-r--r--synapse/storage/databases/main/schema/delta/56/fix_room_keys_index.sql18
-rw-r--r--synapse/storage/databases/main/schema/delta/56/hidden_devices.sql18
-rw-r--r--synapse/storage/databases/main/schema/delta/56/hidden_devices_fix.sql.sqlite42
-rw-r--r--synapse/storage/databases/main/schema/delta/56/nuke_empty_communities_from_db.sql29
-rw-r--r--synapse/storage/databases/main/schema/delta/56/public_room_list_idx.sql16
-rw-r--r--synapse/storage/databases/main/schema/delta/56/redaction_censor.sql16
-rw-r--r--synapse/storage/databases/main/schema/delta/56/redaction_censor2.sql22
-rw-r--r--synapse/storage/databases/main/schema/delta/56/redaction_censor3_fix_update.sql.postgres25
-rw-r--r--synapse/storage/databases/main/schema/delta/56/redaction_censor4.sql16
-rw-r--r--synapse/storage/databases/main/schema/delta/56/remove_tombstoned_rooms_from_directory.sql18
-rw-r--r--synapse/storage/databases/main/schema/delta/56/room_key_etag.sql17
-rw-r--r--synapse/storage/databases/main/schema/delta/56/room_membership_idx.sql18
-rw-r--r--synapse/storage/databases/main/schema/delta/56/room_retention.sql33
-rw-r--r--synapse/storage/databases/main/schema/delta/56/signing_keys.sql56
-rw-r--r--synapse/storage/databases/main/schema/delta/56/signing_keys_nonunique_signatures.sql22
-rw-r--r--synapse/storage/databases/main/schema/delta/56/stats_separated.sql156
-rw-r--r--synapse/storage/databases/main/schema/delta/56/unique_user_filter_index.py52
-rw-r--r--synapse/storage/databases/main/schema/delta/56/user_external_ids.sql24
-rw-r--r--synapse/storage/databases/main/schema/delta/56/users_in_public_rooms_idx.sql17
-rw-r--r--synapse/storage/databases/main/schema/delta/57/delete_old_current_state_events.sql22
-rw-r--r--synapse/storage/databases/main/schema/delta/57/device_list_remote_cache_stale.sql25
-rw-r--r--synapse/storage/databases/main/schema/delta/57/local_current_membership.py98
-rw-r--r--synapse/storage/databases/main/schema/delta/57/remove_sent_outbound_pokes.sql21
-rw-r--r--synapse/storage/databases/main/schema/delta/57/rooms_version_column.sql24
-rw-r--r--synapse/storage/databases/main/schema/delta/57/rooms_version_column_2.sql.postgres35
-rw-r--r--synapse/storage/databases/main/schema/delta/57/rooms_version_column_2.sql.sqlite22
-rw-r--r--synapse/storage/databases/main/schema/delta/57/rooms_version_column_3.sql.postgres39
-rw-r--r--synapse/storage/databases/main/schema/delta/57/rooms_version_column_3.sql.sqlite23
-rw-r--r--synapse/storage/databases/main/schema/delta/58/02remove_dup_outbound_pokes.sql22
-rw-r--r--synapse/storage/databases/main/schema/delta/58/03persist_ui_auth.sql36
-rw-r--r--synapse/storage/databases/main/schema/delta/58/05cache_instance.sql.postgres30
-rw-r--r--synapse/storage/databases/main/schema/delta/58/06dlols_unique_idx.py80
-rw-r--r--synapse/storage/databases/main/schema/delta/58/08_media_safe_from_quarantine.sql.postgres18
-rw-r--r--synapse/storage/databases/main/schema/delta/58/08_media_safe_from_quarantine.sql.sqlite18
-rw-r--r--synapse/storage/databases/main/schema/delta/58/10drop_local_rejections_stream.sql22
-rw-r--r--synapse/storage/databases/main/schema/delta/58/10federation_pos_instance_name.sql22
-rw-r--r--synapse/storage/databases/main/schema/delta/58/11user_id_seq.py34
-rw-r--r--synapse/storage/databases/main/schema/delta/58/12room_stats.sql32
-rw-r--r--synapse/storage/databases/main/schema/full_schemas/16/application_services.sql37
-rw-r--r--synapse/storage/databases/main/schema/full_schemas/16/event_edges.sql70
-rw-r--r--synapse/storage/databases/main/schema/full_schemas/16/event_signatures.sql38
-rw-r--r--synapse/storage/databases/main/schema/full_schemas/16/im.sql120
-rw-r--r--synapse/storage/databases/main/schema/full_schemas/16/keys.sql26
-rw-r--r--synapse/storage/databases/main/schema/full_schemas/16/media_repository.sql68
-rw-r--r--synapse/storage/databases/main/schema/full_schemas/16/presence.sql32
-rw-r--r--synapse/storage/databases/main/schema/full_schemas/16/profiles.sql20
-rw-r--r--synapse/storage/databases/main/schema/full_schemas/16/push.sql74
-rw-r--r--synapse/storage/databases/main/schema/full_schemas/16/redactions.sql22
-rw-r--r--synapse/storage/databases/main/schema/full_schemas/16/room_aliases.sql29
-rw-r--r--synapse/storage/databases/main/schema/full_schemas/16/state.sql40
-rw-r--r--synapse/storage/databases/main/schema/full_schemas/16/transactions.sql44
-rw-r--r--synapse/storage/databases/main/schema/full_schemas/16/users.sql42
-rw-r--r--synapse/storage/databases/main/schema/full_schemas/54/full.sql.postgres1983
-rw-r--r--synapse/storage/databases/main/schema/full_schemas/54/full.sql.sqlite253
-rw-r--r--synapse/storage/databases/main/schema/full_schemas/54/stream_positions.sql8
-rw-r--r--synapse/storage/databases/main/schema/full_schemas/README.md21
-rw-r--r--synapse/storage/databases/main/search.py711
-rw-r--r--synapse/storage/databases/main/signatures.py68
-rw-r--r--synapse/storage/databases/main/state.py509
-rw-r--r--synapse/storage/databases/main/state_deltas.py121
-rw-r--r--synapse/storage/databases/main/stats.py886
-rw-r--r--synapse/storage/databases/main/stream.py1064
-rw-r--r--synapse/storage/databases/main/tags.py291
-rw-r--r--synapse/storage/databases/main/transactions.py266
-rw-r--r--synapse/storage/databases/main/ui_auth.py300
-rw-r--r--synapse/storage/databases/main/user_directory.py809
-rw-r--r--synapse/storage/databases/main/user_erasure_store.py113
-rw-r--r--synapse/storage/databases/state/__init__.py16
-rw-r--r--synapse/storage/databases/state/bg_updates.py370
-rw-r--r--synapse/storage/databases/state/schema/delta/23/drop_state_index.sql16
-rw-r--r--synapse/storage/databases/state/schema/delta/30/state_stream.sql33
-rw-r--r--synapse/storage/databases/state/schema/delta/32/remove_state_indices.sql19
-rw-r--r--synapse/storage/databases/state/schema/delta/35/add_state_index.sql17
-rw-r--r--synapse/storage/databases/state/schema/delta/35/state.sql22
-rw-r--r--synapse/storage/databases/state/schema/delta/35/state_dedupe.sql17
-rw-r--r--synapse/storage/databases/state/schema/delta/47/state_group_seq.py34
-rw-r--r--synapse/storage/databases/state/schema/delta/56/state_group_room_idx.sql17
-rw-r--r--synapse/storage/databases/state/schema/full_schemas/54/full.sql37
-rw-r--r--synapse/storage/databases/state/schema/full_schemas/54/sequence.sql.postgres21
-rw-r--r--synapse/storage/databases/state/store.py644
274 files changed, 35407 insertions, 0 deletions
diff --git a/synapse/storage/databases/__init__.py b/synapse/storage/databases/__init__.py
new file mode 100644
index 00000000..4406e582
--- /dev/null
+++ b/synapse/storage/databases/__init__.py
@@ -0,0 +1,101 @@
+# -*- coding: utf-8 -*-
+# Copyright 2019 The Matrix.org Foundation C.I.C.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import logging
+
+from synapse.storage.database import DatabasePool, make_conn
+from synapse.storage.databases.main.events import PersistEventsStore
+from synapse.storage.databases.state import StateGroupDataStore
+from synapse.storage.engines import create_engine
+from synapse.storage.prepare_database import prepare_database
+
+logger = logging.getLogger(__name__)
+
+
+class Databases(object):
+ """The various databases.
+
+ These are low level interfaces to physical databases.
+
+ Attributes:
+ main (DataStore)
+ """
+
+ def __init__(self, main_store_class, hs):
+ # Note we pass in the main store class here as workers use a different main
+ # store.
+
+ self.databases = []
+ main = None
+ state = None
+ persist_events = None
+
+ for database_config in hs.config.database.databases:
+ db_name = database_config.name
+ engine = create_engine(database_config.config)
+
+ with make_conn(database_config, engine) as db_conn:
+ logger.info("Preparing database %r...", db_name)
+
+ engine.check_database(db_conn)
+ prepare_database(
+ db_conn, engine, hs.config, databases=database_config.databases,
+ )
+
+ database = DatabasePool(hs, database_config, engine)
+
+ if "main" in database_config.databases:
+ logger.info("Starting 'main' data store")
+
+ # Sanity check we don't try and configure the main store on
+ # multiple databases.
+ if main:
+ raise Exception("'main' data store already configured")
+
+ main = main_store_class(database, db_conn, hs)
+
+ # If we're on a process that can persist events also
+ # instantiate a `PersistEventsStore`
+ if hs.config.worker.writers.events == hs.get_instance_name():
+ persist_events = PersistEventsStore(hs, database, main)
+
+ if "state" in database_config.databases:
+ logger.info("Starting 'state' data store")
+
+ # Sanity check we don't try and configure the state store on
+ # multiple databases.
+ if state:
+ raise Exception("'state' data store already configured")
+
+ state = StateGroupDataStore(database, db_conn, hs)
+
+ db_conn.commit()
+
+ self.databases.append(database)
+
+ logger.info("Database %r prepared", db_name)
+
+ # Sanity check that we have actually configured all the required stores.
+ if not main:
+ raise Exception("No 'main' data store configured")
+
+ if not state:
+ raise Exception("No 'main' data store configured")
+
+ # We use local variables here to ensure that the databases do not have
+ # optional types.
+ self.main = main
+ self.state = state
+ self.persist_events = persist_events
diff --git a/synapse/storage/databases/main/__init__.py b/synapse/storage/databases/main/__init__.py
new file mode 100644
index 00000000..17fa4709
--- /dev/null
+++ b/synapse/storage/databases/main/__init__.py
@@ -0,0 +1,596 @@
+# -*- coding: utf-8 -*-
+# Copyright 2014-2016 OpenMarket Ltd
+# Copyright 2018 New Vector Ltd
+# Copyright 2019 The Matrix.org Foundation C.I.C.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import calendar
+import logging
+import time
+
+from synapse.api.constants import PresenceState
+from synapse.config.homeserver import HomeServerConfig
+from synapse.storage.database import DatabasePool
+from synapse.storage.engines import PostgresEngine
+from synapse.storage.util.id_generators import (
+ IdGenerator,
+ MultiWriterIdGenerator,
+ StreamIdGenerator,
+)
+from synapse.util.caches.stream_change_cache import StreamChangeCache
+
+from .account_data import AccountDataStore
+from .appservice import ApplicationServiceStore, ApplicationServiceTransactionStore
+from .cache import CacheInvalidationWorkerStore
+from .censor_events import CensorEventsStore
+from .client_ips import ClientIpStore
+from .deviceinbox import DeviceInboxStore
+from .devices import DeviceStore
+from .directory import DirectoryStore
+from .e2e_room_keys import EndToEndRoomKeyStore
+from .end_to_end_keys import EndToEndKeyStore
+from .event_federation import EventFederationStore
+from .event_push_actions import EventPushActionsStore
+from .events_bg_updates import EventsBackgroundUpdatesStore
+from .filtering import FilteringStore
+from .group_server import GroupServerStore
+from .keys import KeyStore
+from .media_repository import MediaRepositoryStore
+from .metrics import ServerMetricsStore
+from .monthly_active_users import MonthlyActiveUsersStore
+from .openid import OpenIdStore
+from .presence import PresenceStore, UserPresenceState
+from .profile import ProfileStore
+from .purge_events import PurgeEventsStore
+from .push_rule import PushRuleStore
+from .pusher import PusherStore
+from .receipts import ReceiptsStore
+from .registration import RegistrationStore
+from .rejections import RejectionsStore
+from .relations import RelationsStore
+from .room import RoomStore
+from .roommember import RoomMemberStore
+from .search import SearchStore
+from .signatures import SignatureStore
+from .state import StateStore
+from .stats import StatsStore
+from .stream import StreamStore
+from .tags import TagsStore
+from .transactions import TransactionStore
+from .ui_auth import UIAuthStore
+from .user_directory import UserDirectoryStore
+from .user_erasure_store import UserErasureStore
+
+logger = logging.getLogger(__name__)
+
+
+class DataStore(
+ EventsBackgroundUpdatesStore,
+ RoomMemberStore,
+ RoomStore,
+ RegistrationStore,
+ StreamStore,
+ ProfileStore,
+ PresenceStore,
+ TransactionStore,
+ DirectoryStore,
+ KeyStore,
+ StateStore,
+ SignatureStore,
+ ApplicationServiceStore,
+ PurgeEventsStore,
+ EventFederationStore,
+ MediaRepositoryStore,
+ RejectionsStore,
+ FilteringStore,
+ PusherStore,
+ PushRuleStore,
+ ApplicationServiceTransactionStore,
+ ReceiptsStore,
+ EndToEndKeyStore,
+ EndToEndRoomKeyStore,
+ SearchStore,
+ TagsStore,
+ AccountDataStore,
+ EventPushActionsStore,
+ OpenIdStore,
+ ClientIpStore,
+ DeviceStore,
+ DeviceInboxStore,
+ UserDirectoryStore,
+ GroupServerStore,
+ UserErasureStore,
+ MonthlyActiveUsersStore,
+ StatsStore,
+ RelationsStore,
+ CensorEventsStore,
+ UIAuthStore,
+ CacheInvalidationWorkerStore,
+ ServerMetricsStore,
+):
+ def __init__(self, database: DatabasePool, db_conn, hs):
+ self.hs = hs
+ self._clock = hs.get_clock()
+ self.database_engine = database.engine
+
+ self._presence_id_gen = StreamIdGenerator(
+ db_conn, "presence_stream", "stream_id"
+ )
+ self._device_inbox_id_gen = StreamIdGenerator(
+ db_conn, "device_inbox", "stream_id"
+ )
+ self._public_room_id_gen = StreamIdGenerator(
+ db_conn, "public_room_list_stream", "stream_id"
+ )
+ self._device_list_id_gen = StreamIdGenerator(
+ db_conn,
+ "device_lists_stream",
+ "stream_id",
+ extra_tables=[
+ ("user_signature_stream", "stream_id"),
+ ("device_lists_outbound_pokes", "stream_id"),
+ ],
+ )
+ self._cross_signing_id_gen = StreamIdGenerator(
+ db_conn, "e2e_cross_signing_keys", "stream_id"
+ )
+
+ self._access_tokens_id_gen = IdGenerator(db_conn, "access_tokens", "id")
+ self._event_reports_id_gen = IdGenerator(db_conn, "event_reports", "id")
+ self._push_rule_id_gen = IdGenerator(db_conn, "push_rules", "id")
+ self._push_rules_enable_id_gen = IdGenerator(db_conn, "push_rules_enable", "id")
+ self._pushers_id_gen = StreamIdGenerator(
+ db_conn, "pushers", "id", extra_tables=[("deleted_pushers", "stream_id")]
+ )
+ self._group_updates_id_gen = StreamIdGenerator(
+ db_conn, "local_group_updates", "stream_id"
+ )
+
+ if isinstance(self.database_engine, PostgresEngine):
+ self._cache_id_gen = MultiWriterIdGenerator(
+ db_conn,
+ database,
+ instance_name="master",
+ table="cache_invalidation_stream_by_instance",
+ instance_column="instance_name",
+ id_column="stream_id",
+ sequence_name="cache_invalidation_stream_seq",
+ )
+ else:
+ self._cache_id_gen = None
+
+ super(DataStore, self).__init__(database, db_conn, hs)
+
+ self._presence_on_startup = self._get_active_presence(db_conn)
+
+ presence_cache_prefill, min_presence_val = self.db_pool.get_cache_dict(
+ db_conn,
+ "presence_stream",
+ entity_column="user_id",
+ stream_column="stream_id",
+ max_value=self._presence_id_gen.get_current_token(),
+ )
+ self.presence_stream_cache = StreamChangeCache(
+ "PresenceStreamChangeCache",
+ min_presence_val,
+ prefilled_cache=presence_cache_prefill,
+ )
+
+ max_device_inbox_id = self._device_inbox_id_gen.get_current_token()
+ device_inbox_prefill, min_device_inbox_id = self.db_pool.get_cache_dict(
+ db_conn,
+ "device_inbox",
+ entity_column="user_id",
+ stream_column="stream_id",
+ max_value=max_device_inbox_id,
+ limit=1000,
+ )
+ self._device_inbox_stream_cache = StreamChangeCache(
+ "DeviceInboxStreamChangeCache",
+ min_device_inbox_id,
+ prefilled_cache=device_inbox_prefill,
+ )
+ # The federation outbox and the local device inbox uses the same
+ # stream_id generator.
+ device_outbox_prefill, min_device_outbox_id = self.db_pool.get_cache_dict(
+ db_conn,
+ "device_federation_outbox",
+ entity_column="destination",
+ stream_column="stream_id",
+ max_value=max_device_inbox_id,
+ limit=1000,
+ )
+ self._device_federation_outbox_stream_cache = StreamChangeCache(
+ "DeviceFederationOutboxStreamChangeCache",
+ min_device_outbox_id,
+ prefilled_cache=device_outbox_prefill,
+ )
+
+ device_list_max = self._device_list_id_gen.get_current_token()
+ self._device_list_stream_cache = StreamChangeCache(
+ "DeviceListStreamChangeCache", device_list_max
+ )
+ self._user_signature_stream_cache = StreamChangeCache(
+ "UserSignatureStreamChangeCache", device_list_max
+ )
+ self._device_list_federation_stream_cache = StreamChangeCache(
+ "DeviceListFederationStreamChangeCache", device_list_max
+ )
+
+ events_max = self._stream_id_gen.get_current_token()
+ curr_state_delta_prefill, min_curr_state_delta_id = self.db_pool.get_cache_dict(
+ db_conn,
+ "current_state_delta_stream",
+ entity_column="room_id",
+ stream_column="stream_id",
+ max_value=events_max, # As we share the stream id with events token
+ limit=1000,
+ )
+ self._curr_state_delta_stream_cache = StreamChangeCache(
+ "_curr_state_delta_stream_cache",
+ min_curr_state_delta_id,
+ prefilled_cache=curr_state_delta_prefill,
+ )
+
+ _group_updates_prefill, min_group_updates_id = self.db_pool.get_cache_dict(
+ db_conn,
+ "local_group_updates",
+ entity_column="user_id",
+ stream_column="stream_id",
+ max_value=self._group_updates_id_gen.get_current_token(),
+ limit=1000,
+ )
+ self._group_updates_stream_cache = StreamChangeCache(
+ "_group_updates_stream_cache",
+ min_group_updates_id,
+ prefilled_cache=_group_updates_prefill,
+ )
+
+ self._stream_order_on_start = self.get_room_max_stream_ordering()
+ self._min_stream_order_on_start = self.get_room_min_stream_ordering()
+
+ # Used in _generate_user_daily_visits to keep track of progress
+ self._last_user_visit_update = self._get_start_of_day()
+
+ def take_presence_startup_info(self):
+ active_on_startup = self._presence_on_startup
+ self._presence_on_startup = None
+ return active_on_startup
+
+ def _get_active_presence(self, db_conn):
+ """Fetch non-offline presence from the database so that we can register
+ the appropriate time outs.
+ """
+
+ sql = (
+ "SELECT user_id, state, last_active_ts, last_federation_update_ts,"
+ " last_user_sync_ts, status_msg, currently_active FROM presence_stream"
+ " WHERE state != ?"
+ )
+ sql = self.database_engine.convert_param_style(sql)
+
+ txn = db_conn.cursor()
+ txn.execute(sql, (PresenceState.OFFLINE,))
+ rows = self.db_pool.cursor_to_dict(txn)
+ txn.close()
+
+ for row in rows:
+ row["currently_active"] = bool(row["currently_active"])
+
+ return [UserPresenceState(**row) for row in rows]
+
+ def count_daily_users(self):
+ """
+ Counts the number of users who used this homeserver in the last 24 hours.
+ """
+ yesterday = int(self._clock.time_msec()) - (1000 * 60 * 60 * 24)
+ return self.db_pool.runInteraction(
+ "count_daily_users", self._count_users, yesterday
+ )
+
+ def count_monthly_users(self):
+ """
+ Counts the number of users who used this homeserver in the last 30 days.
+ Note this method is intended for phonehome metrics only and is different
+ from the mau figure in synapse.storage.monthly_active_users which,
+ amongst other things, includes a 3 day grace period before a user counts.
+ """
+ thirty_days_ago = int(self._clock.time_msec()) - (1000 * 60 * 60 * 24 * 30)
+ return self.db_pool.runInteraction(
+ "count_monthly_users", self._count_users, thirty_days_ago
+ )
+
+ def _count_users(self, txn, time_from):
+ """
+ Returns number of users seen in the past time_from period
+ """
+ sql = """
+ SELECT COALESCE(count(*), 0) FROM (
+ SELECT user_id FROM user_ips
+ WHERE last_seen > ?
+ GROUP BY user_id
+ ) u
+ """
+ txn.execute(sql, (time_from,))
+ (count,) = txn.fetchone()
+ return count
+
+ def count_r30_users(self):
+ """
+ Counts the number of 30 day retained users, defined as:-
+ * Users who have created their accounts more than 30 days ago
+ * Where last seen at most 30 days ago
+ * Where account creation and last_seen are > 30 days apart
+
+ Returns counts globaly for a given user as well as breaking
+ by platform
+ """
+
+ def _count_r30_users(txn):
+ thirty_days_in_secs = 86400 * 30
+ now = int(self._clock.time())
+ thirty_days_ago_in_secs = now - thirty_days_in_secs
+
+ sql = """
+ SELECT platform, COALESCE(count(*), 0) FROM (
+ SELECT
+ users.name, platform, users.creation_ts * 1000,
+ MAX(uip.last_seen)
+ FROM users
+ INNER JOIN (
+ SELECT
+ user_id,
+ last_seen,
+ CASE
+ WHEN user_agent LIKE '%%Android%%' THEN 'android'
+ WHEN user_agent LIKE '%%iOS%%' THEN 'ios'
+ WHEN user_agent LIKE '%%Electron%%' THEN 'electron'
+ WHEN user_agent LIKE '%%Mozilla%%' THEN 'web'
+ WHEN user_agent LIKE '%%Gecko%%' THEN 'web'
+ ELSE 'unknown'
+ END
+ AS platform
+ FROM user_ips
+ ) uip
+ ON users.name = uip.user_id
+ AND users.appservice_id is NULL
+ AND users.creation_ts < ?
+ AND uip.last_seen/1000 > ?
+ AND (uip.last_seen/1000) - users.creation_ts > 86400 * 30
+ GROUP BY users.name, platform, users.creation_ts
+ ) u GROUP BY platform
+ """
+
+ results = {}
+ txn.execute(sql, (thirty_days_ago_in_secs, thirty_days_ago_in_secs))
+
+ for row in txn:
+ if row[0] == "unknown":
+ pass
+ results[row[0]] = row[1]
+
+ sql = """
+ SELECT COALESCE(count(*), 0) FROM (
+ SELECT users.name, users.creation_ts * 1000,
+ MAX(uip.last_seen)
+ FROM users
+ INNER JOIN (
+ SELECT
+ user_id,
+ last_seen
+ FROM user_ips
+ ) uip
+ ON users.name = uip.user_id
+ AND appservice_id is NULL
+ AND users.creation_ts < ?
+ AND uip.last_seen/1000 > ?
+ AND (uip.last_seen/1000) - users.creation_ts > 86400 * 30
+ GROUP BY users.name, users.creation_ts
+ ) u
+ """
+
+ txn.execute(sql, (thirty_days_ago_in_secs, thirty_days_ago_in_secs))
+
+ (count,) = txn.fetchone()
+ results["all"] = count
+
+ return results
+
+ return self.db_pool.runInteraction("count_r30_users", _count_r30_users)
+
+ def _get_start_of_day(self):
+ """
+ Returns millisecond unixtime for start of UTC day.
+ """
+ now = time.gmtime()
+ today_start = calendar.timegm((now.tm_year, now.tm_mon, now.tm_mday, 0, 0, 0))
+ return today_start * 1000
+
+ def generate_user_daily_visits(self):
+ """
+ Generates daily visit data for use in cohort/ retention analysis
+ """
+
+ def _generate_user_daily_visits(txn):
+ logger.info("Calling _generate_user_daily_visits")
+ today_start = self._get_start_of_day()
+ a_day_in_milliseconds = 24 * 60 * 60 * 1000
+ now = self.clock.time_msec()
+
+ sql = """
+ INSERT INTO user_daily_visits (user_id, device_id, timestamp)
+ SELECT u.user_id, u.device_id, ?
+ FROM user_ips AS u
+ LEFT JOIN (
+ SELECT user_id, device_id, timestamp FROM user_daily_visits
+ WHERE timestamp = ?
+ ) udv
+ ON u.user_id = udv.user_id AND u.device_id=udv.device_id
+ INNER JOIN users ON users.name=u.user_id
+ WHERE last_seen > ? AND last_seen <= ?
+ AND udv.timestamp IS NULL AND users.is_guest=0
+ AND users.appservice_id IS NULL
+ GROUP BY u.user_id, u.device_id
+ """
+
+ # This means that the day has rolled over but there could still
+ # be entries from the previous day. There is an edge case
+ # where if the user logs in at 23:59 and overwrites their
+ # last_seen at 00:01 then they will not be counted in the
+ # previous day's stats - it is important that the query is run
+ # often to minimise this case.
+ if today_start > self._last_user_visit_update:
+ yesterday_start = today_start - a_day_in_milliseconds
+ txn.execute(
+ sql,
+ (
+ yesterday_start,
+ yesterday_start,
+ self._last_user_visit_update,
+ today_start,
+ ),
+ )
+ self._last_user_visit_update = today_start
+
+ txn.execute(
+ sql, (today_start, today_start, self._last_user_visit_update, now)
+ )
+ # Update _last_user_visit_update to now. The reason to do this
+ # rather just clamping to the beginning of the day is to limit
+ # the size of the join - meaning that the query can be run more
+ # frequently
+ self._last_user_visit_update = now
+
+ return self.db_pool.runInteraction(
+ "generate_user_daily_visits", _generate_user_daily_visits
+ )
+
+ def get_users(self):
+ """Function to retrieve a list of users in users table.
+
+ Args:
+ Returns:
+ defer.Deferred: resolves to list[dict[str, Any]]
+ """
+ return self.db_pool.simple_select_list(
+ table="users",
+ keyvalues={},
+ retcols=[
+ "name",
+ "password_hash",
+ "is_guest",
+ "admin",
+ "user_type",
+ "deactivated",
+ ],
+ desc="get_users",
+ )
+
+ def get_users_paginate(
+ self, start, limit, name=None, guests=True, deactivated=False
+ ):
+ """Function to retrieve a paginated list of users from
+ users list. This will return a json list of users and the
+ total number of users matching the filter criteria.
+
+ Args:
+ start (int): start number to begin the query from
+ limit (int): number of rows to retrieve
+ name (string): filter for user names
+ guests (bool): whether to in include guest users
+ deactivated (bool): whether to include deactivated users
+ Returns:
+ defer.Deferred: resolves to list[dict[str, Any]], int
+ """
+
+ def get_users_paginate_txn(txn):
+ filters = []
+ args = []
+
+ if name:
+ filters.append("name LIKE ?")
+ args.append("%" + name + "%")
+
+ if not guests:
+ filters.append("is_guest = 0")
+
+ if not deactivated:
+ filters.append("deactivated = 0")
+
+ where_clause = "WHERE " + " AND ".join(filters) if len(filters) > 0 else ""
+
+ sql = "SELECT COUNT(*) as total_users FROM users %s" % (where_clause)
+ txn.execute(sql, args)
+ count = txn.fetchone()[0]
+
+ args = [self.hs.config.server_name] + args + [limit, start]
+ sql = """
+ SELECT name, user_type, is_guest, admin, deactivated, displayname, avatar_url
+ FROM users as u
+ LEFT JOIN profiles AS p ON u.name = '@' || p.user_id || ':' || ?
+ {}
+ ORDER BY u.name LIMIT ? OFFSET ?
+ """.format(
+ where_clause
+ )
+ txn.execute(sql, args)
+ users = self.db_pool.cursor_to_dict(txn)
+ return users, count
+
+ return self.db_pool.runInteraction(
+ "get_users_paginate_txn", get_users_paginate_txn
+ )
+
+ def search_users(self, term):
+ """Function to search users list for one or more users with
+ the matched term.
+
+ Args:
+ term (str): search term
+ col (str): column to query term should be matched to
+ Returns:
+ defer.Deferred: resolves to list[dict[str, Any]]
+ """
+ return self.db_pool.simple_search_list(
+ table="users",
+ term=term,
+ col="name",
+ retcols=["name", "password_hash", "is_guest", "admin", "user_type"],
+ desc="search_users",
+ )
+
+
+def check_database_before_upgrade(cur, database_engine, config: HomeServerConfig):
+ """Called before upgrading an existing database to check that it is broadly sane
+ compared with the configuration.
+ """
+ domain = config.server_name
+
+ sql = database_engine.convert_param_style(
+ "SELECT COUNT(*) FROM users WHERE name NOT LIKE ?"
+ )
+ pat = "%:" + domain
+ cur.execute(sql, (pat,))
+ num_not_matching = cur.fetchall()[0][0]
+ if num_not_matching == 0:
+ return
+
+ raise Exception(
+ "Found users in database not native to %s!\n"
+ "You cannot changed a synapse server_name after it's been configured"
+ % (domain,)
+ )
+
+
+__all__ = ["DataStore", "check_database_before_upgrade"]
diff --git a/synapse/storage/databases/main/account_data.py b/synapse/storage/databases/main/account_data.py
new file mode 100644
index 00000000..82aac2bb
--- /dev/null
+++ b/synapse/storage/databases/main/account_data.py
@@ -0,0 +1,438 @@
+# -*- coding: utf-8 -*-
+# Copyright 2014-2016 OpenMarket Ltd
+# Copyright 2018 New Vector Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import abc
+import logging
+from typing import List, Optional, Tuple
+
+from twisted.internet import defer
+
+from synapse.storage._base import SQLBaseStore, db_to_json
+from synapse.storage.database import DatabasePool
+from synapse.storage.util.id_generators import StreamIdGenerator
+from synapse.types import JsonDict
+from synapse.util import json_encoder
+from synapse.util.caches.descriptors import _CacheContext, cached
+from synapse.util.caches.stream_change_cache import StreamChangeCache
+
+logger = logging.getLogger(__name__)
+
+
+class AccountDataWorkerStore(SQLBaseStore):
+ """This is an abstract base class where subclasses must implement
+ `get_max_account_data_stream_id` which can be called in the initializer.
+ """
+
+ # This ABCMeta metaclass ensures that we cannot be instantiated without
+ # the abstract methods being implemented.
+ __metaclass__ = abc.ABCMeta
+
+ def __init__(self, database: DatabasePool, db_conn, hs):
+ account_max = self.get_max_account_data_stream_id()
+ self._account_data_stream_cache = StreamChangeCache(
+ "AccountDataAndTagsChangeCache", account_max
+ )
+
+ super(AccountDataWorkerStore, self).__init__(database, db_conn, hs)
+
+ @abc.abstractmethod
+ def get_max_account_data_stream_id(self):
+ """Get the current max stream ID for account data stream
+
+ Returns:
+ int
+ """
+ raise NotImplementedError()
+
+ @cached()
+ def get_account_data_for_user(self, user_id):
+ """Get all the client account_data for a user.
+
+ Args:
+ user_id(str): The user to get the account_data for.
+ Returns:
+ A deferred pair of a dict of global account_data and a dict
+ mapping from room_id string to per room account_data dicts.
+ """
+
+ def get_account_data_for_user_txn(txn):
+ rows = self.db_pool.simple_select_list_txn(
+ txn,
+ "account_data",
+ {"user_id": user_id},
+ ["account_data_type", "content"],
+ )
+
+ global_account_data = {
+ row["account_data_type"]: db_to_json(row["content"]) for row in rows
+ }
+
+ rows = self.db_pool.simple_select_list_txn(
+ txn,
+ "room_account_data",
+ {"user_id": user_id},
+ ["room_id", "account_data_type", "content"],
+ )
+
+ by_room = {}
+ for row in rows:
+ room_data = by_room.setdefault(row["room_id"], {})
+ room_data[row["account_data_type"]] = db_to_json(row["content"])
+
+ return global_account_data, by_room
+
+ return self.db_pool.runInteraction(
+ "get_account_data_for_user", get_account_data_for_user_txn
+ )
+
+ @cached(num_args=2, max_entries=5000)
+ async def get_global_account_data_by_type_for_user(
+ self, data_type: str, user_id: str
+ ) -> Optional[JsonDict]:
+ """
+ Returns:
+ The account data.
+ """
+ result = await self.db_pool.simple_select_one_onecol(
+ table="account_data",
+ keyvalues={"user_id": user_id, "account_data_type": data_type},
+ retcol="content",
+ desc="get_global_account_data_by_type_for_user",
+ allow_none=True,
+ )
+
+ if result:
+ return db_to_json(result)
+ else:
+ return None
+
+ @cached(num_args=2)
+ def get_account_data_for_room(self, user_id, room_id):
+ """Get all the client account_data for a user for a room.
+
+ Args:
+ user_id(str): The user to get the account_data for.
+ room_id(str): The room to get the account_data for.
+ Returns:
+ A deferred dict of the room account_data
+ """
+
+ def get_account_data_for_room_txn(txn):
+ rows = self.db_pool.simple_select_list_txn(
+ txn,
+ "room_account_data",
+ {"user_id": user_id, "room_id": room_id},
+ ["account_data_type", "content"],
+ )
+
+ return {
+ row["account_data_type"]: db_to_json(row["content"]) for row in rows
+ }
+
+ return self.db_pool.runInteraction(
+ "get_account_data_for_room", get_account_data_for_room_txn
+ )
+
+ @cached(num_args=3, max_entries=5000)
+ def get_account_data_for_room_and_type(self, user_id, room_id, account_data_type):
+ """Get the client account_data of given type for a user for a room.
+
+ Args:
+ user_id(str): The user to get the account_data for.
+ room_id(str): The room to get the account_data for.
+ account_data_type (str): The account data type to get.
+ Returns:
+ A deferred of the room account_data for that type, or None if
+ there isn't any set.
+ """
+
+ def get_account_data_for_room_and_type_txn(txn):
+ content_json = self.db_pool.simple_select_one_onecol_txn(
+ txn,
+ table="room_account_data",
+ keyvalues={
+ "user_id": user_id,
+ "room_id": room_id,
+ "account_data_type": account_data_type,
+ },
+ retcol="content",
+ allow_none=True,
+ )
+
+ return db_to_json(content_json) if content_json else None
+
+ return self.db_pool.runInteraction(
+ "get_account_data_for_room_and_type", get_account_data_for_room_and_type_txn
+ )
+
+ async def get_updated_global_account_data(
+ self, last_id: int, current_id: int, limit: int
+ ) -> List[Tuple[int, str, str]]:
+ """Get the global account_data that has changed, for the account_data stream
+
+ Args:
+ last_id: the last stream_id from the previous batch.
+ current_id: the maximum stream_id to return up to
+ limit: the maximum number of rows to return
+
+ Returns:
+ A list of tuples of stream_id int, user_id string,
+ and type string.
+ """
+ if last_id == current_id:
+ return []
+
+ def get_updated_global_account_data_txn(txn):
+ sql = (
+ "SELECT stream_id, user_id, account_data_type"
+ " FROM account_data WHERE ? < stream_id AND stream_id <= ?"
+ " ORDER BY stream_id ASC LIMIT ?"
+ )
+ txn.execute(sql, (last_id, current_id, limit))
+ return txn.fetchall()
+
+ return await self.db_pool.runInteraction(
+ "get_updated_global_account_data", get_updated_global_account_data_txn
+ )
+
+ async def get_updated_room_account_data(
+ self, last_id: int, current_id: int, limit: int
+ ) -> List[Tuple[int, str, str, str]]:
+ """Get the global account_data that has changed, for the account_data stream
+
+ Args:
+ last_id: the last stream_id from the previous batch.
+ current_id: the maximum stream_id to return up to
+ limit: the maximum number of rows to return
+
+ Returns:
+ A list of tuples of stream_id int, user_id string,
+ room_id string and type string.
+ """
+ if last_id == current_id:
+ return []
+
+ def get_updated_room_account_data_txn(txn):
+ sql = (
+ "SELECT stream_id, user_id, room_id, account_data_type"
+ " FROM room_account_data WHERE ? < stream_id AND stream_id <= ?"
+ " ORDER BY stream_id ASC LIMIT ?"
+ )
+ txn.execute(sql, (last_id, current_id, limit))
+ return txn.fetchall()
+
+ return await self.db_pool.runInteraction(
+ "get_updated_room_account_data", get_updated_room_account_data_txn
+ )
+
+ def get_updated_account_data_for_user(self, user_id, stream_id):
+ """Get all the client account_data for a that's changed for a user
+
+ Args:
+ user_id(str): The user to get the account_data for.
+ stream_id(int): The point in the stream since which to get updates
+ Returns:
+ A deferred pair of a dict of global account_data and a dict
+ mapping from room_id string to per room account_data dicts.
+ """
+
+ def get_updated_account_data_for_user_txn(txn):
+ sql = (
+ "SELECT account_data_type, content FROM account_data"
+ " WHERE user_id = ? AND stream_id > ?"
+ )
+
+ txn.execute(sql, (user_id, stream_id))
+
+ global_account_data = {row[0]: db_to_json(row[1]) for row in txn}
+
+ sql = (
+ "SELECT room_id, account_data_type, content FROM room_account_data"
+ " WHERE user_id = ? AND stream_id > ?"
+ )
+
+ txn.execute(sql, (user_id, stream_id))
+
+ account_data_by_room = {}
+ for row in txn:
+ room_account_data = account_data_by_room.setdefault(row[0], {})
+ room_account_data[row[1]] = db_to_json(row[2])
+
+ return global_account_data, account_data_by_room
+
+ changed = self._account_data_stream_cache.has_entity_changed(
+ user_id, int(stream_id)
+ )
+ if not changed:
+ return defer.succeed(({}, {}))
+
+ return self.db_pool.runInteraction(
+ "get_updated_account_data_for_user", get_updated_account_data_for_user_txn
+ )
+
+ @cached(num_args=2, cache_context=True, max_entries=5000)
+ async def is_ignored_by(
+ self, ignored_user_id: str, ignorer_user_id: str, cache_context: _CacheContext
+ ) -> bool:
+ ignored_account_data = await self.get_global_account_data_by_type_for_user(
+ "m.ignored_user_list",
+ ignorer_user_id,
+ on_invalidate=cache_context.invalidate,
+ )
+ if not ignored_account_data:
+ return False
+
+ return ignored_user_id in ignored_account_data.get("ignored_users", {})
+
+
+class AccountDataStore(AccountDataWorkerStore):
+ def __init__(self, database: DatabasePool, db_conn, hs):
+ self._account_data_id_gen = StreamIdGenerator(
+ db_conn,
+ "account_data_max_stream_id",
+ "stream_id",
+ extra_tables=[
+ ("room_account_data", "stream_id"),
+ ("room_tags_revisions", "stream_id"),
+ ],
+ )
+
+ super(AccountDataStore, self).__init__(database, db_conn, hs)
+
+ def get_max_account_data_stream_id(self) -> int:
+ """Get the current max stream id for the private user data stream
+
+ Returns:
+ The maximum stream ID.
+ """
+ return self._account_data_id_gen.get_current_token()
+
+ async def add_account_data_to_room(
+ self, user_id: str, room_id: str, account_data_type: str, content: JsonDict
+ ) -> int:
+ """Add some account_data to a room for a user.
+
+ Args:
+ user_id: The user to add a tag for.
+ room_id: The room to add a tag for.
+ account_data_type: The type of account_data to add.
+ content: A json object to associate with the tag.
+
+ Returns:
+ The maximum stream ID.
+ """
+ content_json = json_encoder.encode(content)
+
+ with self._account_data_id_gen.get_next() as next_id:
+ # no need to lock here as room_account_data has a unique constraint
+ # on (user_id, room_id, account_data_type) so simple_upsert will
+ # retry if there is a conflict.
+ await self.db_pool.simple_upsert(
+ desc="add_room_account_data",
+ table="room_account_data",
+ keyvalues={
+ "user_id": user_id,
+ "room_id": room_id,
+ "account_data_type": account_data_type,
+ },
+ values={"stream_id": next_id, "content": content_json},
+ lock=False,
+ )
+
+ # it's theoretically possible for the above to succeed and the
+ # below to fail - in which case we might reuse a stream id on
+ # restart, and the above update might not get propagated. That
+ # doesn't sound any worse than the whole update getting lost,
+ # which is what would happen if we combined the two into one
+ # transaction.
+ await self._update_max_stream_id(next_id)
+
+ self._account_data_stream_cache.entity_has_changed(user_id, next_id)
+ self.get_account_data_for_user.invalidate((user_id,))
+ self.get_account_data_for_room.invalidate((user_id, room_id))
+ self.get_account_data_for_room_and_type.prefill(
+ (user_id, room_id, account_data_type), content
+ )
+
+ return self._account_data_id_gen.get_current_token()
+
+ async def add_account_data_for_user(
+ self, user_id: str, account_data_type: str, content: JsonDict
+ ) -> int:
+ """Add some account_data to a room for a user.
+
+ Args:
+ user_id: The user to add a tag for.
+ account_data_type: The type of account_data to add.
+ content: A json object to associate with the tag.
+
+ Returns:
+ The maximum stream ID.
+ """
+ content_json = json_encoder.encode(content)
+
+ with self._account_data_id_gen.get_next() as next_id:
+ # no need to lock here as account_data has a unique constraint on
+ # (user_id, account_data_type) so simple_upsert will retry if
+ # there is a conflict.
+ await self.db_pool.simple_upsert(
+ desc="add_user_account_data",
+ table="account_data",
+ keyvalues={"user_id": user_id, "account_data_type": account_data_type},
+ values={"stream_id": next_id, "content": content_json},
+ lock=False,
+ )
+
+ # it's theoretically possible for the above to succeed and the
+ # below to fail - in which case we might reuse a stream id on
+ # restart, and the above update might not get propagated. That
+ # doesn't sound any worse than the whole update getting lost,
+ # which is what would happen if we combined the two into one
+ # transaction.
+ #
+ # Note: This is only here for backwards compat to allow admins to
+ # roll back to a previous Synapse version. Next time we update the
+ # database version we can remove this table.
+ await self._update_max_stream_id(next_id)
+
+ self._account_data_stream_cache.entity_has_changed(user_id, next_id)
+ self.get_account_data_for_user.invalidate((user_id,))
+ self.get_global_account_data_by_type_for_user.invalidate(
+ (account_data_type, user_id)
+ )
+
+ return self._account_data_id_gen.get_current_token()
+
+ def _update_max_stream_id(self, next_id: int):
+ """Update the max stream_id
+
+ Args:
+ next_id: The the revision to advance to.
+ """
+
+ # Note: This is only here for backwards compat to allow admins to
+ # roll back to a previous Synapse version. Next time we update the
+ # database version we can remove this table.
+
+ def _update(txn):
+ update_max_id_sql = (
+ "UPDATE account_data_max_stream_id"
+ " SET stream_id = ?"
+ " WHERE stream_id < ?"
+ )
+ txn.execute(update_max_id_sql, (next_id, next_id))
+
+ return self.db_pool.runInteraction("update_account_data_max_stream_id", _update)
diff --git a/synapse/storage/databases/main/appservice.py b/synapse/storage/databases/main/appservice.py
new file mode 100644
index 00000000..5cf1a883
--- /dev/null
+++ b/synapse/storage/databases/main/appservice.py
@@ -0,0 +1,366 @@
+# -*- coding: utf-8 -*-
+# Copyright 2015, 2016 OpenMarket Ltd
+# Copyright 2018 New Vector Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import logging
+import re
+
+from canonicaljson import json
+
+from synapse.appservice import AppServiceTransaction
+from synapse.config.appservice import load_appservices
+from synapse.storage._base import SQLBaseStore, db_to_json
+from synapse.storage.database import DatabasePool
+from synapse.storage.databases.main.events_worker import EventsWorkerStore
+
+logger = logging.getLogger(__name__)
+
+
+def _make_exclusive_regex(services_cache):
+ # We precompile a regex constructed from all the regexes that the AS's
+ # have registered for exclusive users.
+ exclusive_user_regexes = [
+ regex.pattern
+ for service in services_cache
+ for regex in service.get_exclusive_user_regexes()
+ ]
+ if exclusive_user_regexes:
+ exclusive_user_regex = "|".join("(" + r + ")" for r in exclusive_user_regexes)
+ exclusive_user_regex = re.compile(exclusive_user_regex)
+ else:
+ # We handle this case specially otherwise the constructed regex
+ # will always match
+ exclusive_user_regex = None
+
+ return exclusive_user_regex
+
+
+class ApplicationServiceWorkerStore(SQLBaseStore):
+ def __init__(self, database: DatabasePool, db_conn, hs):
+ self.services_cache = load_appservices(
+ hs.hostname, hs.config.app_service_config_files
+ )
+ self.exclusive_user_regex = _make_exclusive_regex(self.services_cache)
+
+ super(ApplicationServiceWorkerStore, self).__init__(database, db_conn, hs)
+
+ def get_app_services(self):
+ return self.services_cache
+
+ def get_if_app_services_interested_in_user(self, user_id):
+ """Check if the user is one associated with an app service (exclusively)
+ """
+ if self.exclusive_user_regex:
+ return bool(self.exclusive_user_regex.match(user_id))
+ else:
+ return False
+
+ def get_app_service_by_user_id(self, user_id):
+ """Retrieve an application service from their user ID.
+
+ All application services have associated with them a particular user ID.
+ There is no distinguishing feature on the user ID which indicates it
+ represents an application service. This function allows you to map from
+ a user ID to an application service.
+
+ Args:
+ user_id(str): The user ID to see if it is an application service.
+ Returns:
+ synapse.appservice.ApplicationService or None.
+ """
+ for service in self.services_cache:
+ if service.sender == user_id:
+ return service
+ return None
+
+ def get_app_service_by_token(self, token):
+ """Get the application service with the given appservice token.
+
+ Args:
+ token (str): The application service token.
+ Returns:
+ synapse.appservice.ApplicationService or None.
+ """
+ for service in self.services_cache:
+ if service.token == token:
+ return service
+ return None
+
+ def get_app_service_by_id(self, as_id):
+ """Get the application service with the given appservice ID.
+
+ Args:
+ as_id (str): The application service ID.
+ Returns:
+ synapse.appservice.ApplicationService or None.
+ """
+ for service in self.services_cache:
+ if service.id == as_id:
+ return service
+ return None
+
+
+class ApplicationServiceStore(ApplicationServiceWorkerStore):
+ # This is currently empty due to there not being any AS storage functions
+ # that can't be run on the workers. Since this may change in future, and
+ # to keep consistency with the other stores, we keep this empty class for
+ # now.
+ pass
+
+
+class ApplicationServiceTransactionWorkerStore(
+ ApplicationServiceWorkerStore, EventsWorkerStore
+):
+ async def get_appservices_by_state(self, state):
+ """Get a list of application services based on their state.
+
+ Args:
+ state(ApplicationServiceState): The state to filter on.
+ Returns:
+ A list of ApplicationServices, which may be empty.
+ """
+ results = await self.db_pool.simple_select_list(
+ "application_services_state", {"state": state}, ["as_id"]
+ )
+ # NB: This assumes this class is linked with ApplicationServiceStore
+ as_list = self.get_app_services()
+ services = []
+
+ for res in results:
+ for service in as_list:
+ if service.id == res["as_id"]:
+ services.append(service)
+ return services
+
+ async def get_appservice_state(self, service):
+ """Get the application service state.
+
+ Args:
+ service(ApplicationService): The service whose state to set.
+ Returns:
+ An ApplicationServiceState.
+ """
+ result = await self.db_pool.simple_select_one(
+ "application_services_state",
+ {"as_id": service.id},
+ ["state"],
+ allow_none=True,
+ desc="get_appservice_state",
+ )
+ if result:
+ return result.get("state")
+ return None
+
+ def set_appservice_state(self, service, state):
+ """Set the application service state.
+
+ Args:
+ service(ApplicationService): The service whose state to set.
+ state(ApplicationServiceState): The connectivity state to apply.
+ Returns:
+ A Deferred which resolves when the state was set successfully.
+ """
+ return self.db_pool.simple_upsert(
+ "application_services_state", {"as_id": service.id}, {"state": state}
+ )
+
+ def create_appservice_txn(self, service, events):
+ """Atomically creates a new transaction for this application service
+ with the given list of events.
+
+ Args:
+ service(ApplicationService): The service who the transaction is for.
+ events(list<Event>): A list of events to put in the transaction.
+ Returns:
+ AppServiceTransaction: A new transaction.
+ """
+
+ def _create_appservice_txn(txn):
+ # work out new txn id (highest txn id for this service += 1)
+ # The highest id may be the last one sent (in which case it is last_txn)
+ # or it may be the highest in the txns list (which are waiting to be/are
+ # being sent)
+ last_txn_id = self._get_last_txn(txn, service.id)
+
+ txn.execute(
+ "SELECT MAX(txn_id) FROM application_services_txns WHERE as_id=?",
+ (service.id,),
+ )
+ highest_txn_id = txn.fetchone()[0]
+ if highest_txn_id is None:
+ highest_txn_id = 0
+
+ new_txn_id = max(highest_txn_id, last_txn_id) + 1
+
+ # Insert new txn into txn table
+ event_ids = json.dumps([e.event_id for e in events])
+ txn.execute(
+ "INSERT INTO application_services_txns(as_id, txn_id, event_ids) "
+ "VALUES(?,?,?)",
+ (service.id, new_txn_id, event_ids),
+ )
+ return AppServiceTransaction(service=service, id=new_txn_id, events=events)
+
+ return self.db_pool.runInteraction(
+ "create_appservice_txn", _create_appservice_txn
+ )
+
+ def complete_appservice_txn(self, txn_id, service):
+ """Completes an application service transaction.
+
+ Args:
+ txn_id(str): The transaction ID being completed.
+ service(ApplicationService): The application service which was sent
+ this transaction.
+ Returns:
+ A Deferred which resolves if this transaction was stored
+ successfully.
+ """
+ txn_id = int(txn_id)
+
+ def _complete_appservice_txn(txn):
+ # Debugging query: Make sure the txn being completed is EXACTLY +1 from
+ # what was there before. If it isn't, we've got problems (e.g. the AS
+ # has probably missed some events), so whine loudly but still continue,
+ # since it shouldn't fail completion of the transaction.
+ last_txn_id = self._get_last_txn(txn, service.id)
+ if (last_txn_id + 1) != txn_id:
+ logger.error(
+ "appservice: Completing a transaction which has an ID > 1 from "
+ "the last ID sent to this AS. We've either dropped events or "
+ "sent it to the AS out of order. FIX ME. last_txn=%s "
+ "completing_txn=%s service_id=%s",
+ last_txn_id,
+ txn_id,
+ service.id,
+ )
+
+ # Set current txn_id for AS to 'txn_id'
+ self.db_pool.simple_upsert_txn(
+ txn,
+ "application_services_state",
+ {"as_id": service.id},
+ {"last_txn": txn_id},
+ )
+
+ # Delete txn
+ self.db_pool.simple_delete_txn(
+ txn,
+ "application_services_txns",
+ {"txn_id": txn_id, "as_id": service.id},
+ )
+
+ return self.db_pool.runInteraction(
+ "complete_appservice_txn", _complete_appservice_txn
+ )
+
+ async def get_oldest_unsent_txn(self, service):
+ """Get the oldest transaction which has not been sent for this
+ service.
+
+ Args:
+ service(ApplicationService): The app service to get the oldest txn.
+ Returns:
+ An AppServiceTransaction or None.
+ """
+
+ def _get_oldest_unsent_txn(txn):
+ # Monotonically increasing txn ids, so just select the smallest
+ # one in the txns table (we delete them when they are sent)
+ txn.execute(
+ "SELECT * FROM application_services_txns WHERE as_id=?"
+ " ORDER BY txn_id ASC LIMIT 1",
+ (service.id,),
+ )
+ rows = self.db_pool.cursor_to_dict(txn)
+ if not rows:
+ return None
+
+ entry = rows[0]
+
+ return entry
+
+ entry = await self.db_pool.runInteraction(
+ "get_oldest_unsent_appservice_txn", _get_oldest_unsent_txn
+ )
+
+ if not entry:
+ return None
+
+ event_ids = db_to_json(entry["event_ids"])
+
+ events = await self.get_events_as_list(event_ids)
+
+ return AppServiceTransaction(service=service, id=entry["txn_id"], events=events)
+
+ def _get_last_txn(self, txn, service_id):
+ txn.execute(
+ "SELECT last_txn FROM application_services_state WHERE as_id=?",
+ (service_id,),
+ )
+ last_txn_id = txn.fetchone()
+ if last_txn_id is None or last_txn_id[0] is None: # no row exists
+ return 0
+ else:
+ return int(last_txn_id[0]) # select 'last_txn' col
+
+ def set_appservice_last_pos(self, pos):
+ def set_appservice_last_pos_txn(txn):
+ txn.execute(
+ "UPDATE appservice_stream_position SET stream_ordering = ?", (pos,)
+ )
+
+ return self.db_pool.runInteraction(
+ "set_appservice_last_pos", set_appservice_last_pos_txn
+ )
+
+ async def get_new_events_for_appservice(self, current_id, limit):
+ """Get all new evnets"""
+
+ def get_new_events_for_appservice_txn(txn):
+ sql = (
+ "SELECT e.stream_ordering, e.event_id"
+ " FROM events AS e"
+ " WHERE"
+ " (SELECT stream_ordering FROM appservice_stream_position)"
+ " < e.stream_ordering"
+ " AND e.stream_ordering <= ?"
+ " ORDER BY e.stream_ordering ASC"
+ " LIMIT ?"
+ )
+
+ txn.execute(sql, (current_id, limit))
+ rows = txn.fetchall()
+
+ upper_bound = current_id
+ if len(rows) == limit:
+ upper_bound = rows[-1][0]
+
+ return upper_bound, [row[1] for row in rows]
+
+ upper_bound, event_ids = await self.db_pool.runInteraction(
+ "get_new_events_for_appservice", get_new_events_for_appservice_txn
+ )
+
+ events = await self.get_events_as_list(event_ids)
+
+ return upper_bound, events
+
+
+class ApplicationServiceTransactionStore(ApplicationServiceTransactionWorkerStore):
+ # This is currently empty due to there not being any AS storage functions
+ # that can't be run on the workers. Since this may change in future, and
+ # to keep consistency with the other stores, we keep this empty class for
+ # now.
+ pass
diff --git a/synapse/storage/databases/main/cache.py b/synapse/storage/databases/main/cache.py
new file mode 100644
index 00000000..10de4460
--- /dev/null
+++ b/synapse/storage/databases/main/cache.py
@@ -0,0 +1,306 @@
+# -*- coding: utf-8 -*-
+# Copyright 2019 The Matrix.org Foundation C.I.C.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+import itertools
+import logging
+from typing import Any, Iterable, List, Optional, Tuple
+
+from synapse.api.constants import EventTypes
+from synapse.replication.tcp.streams import BackfillStream, CachesStream
+from synapse.replication.tcp.streams.events import (
+ EventsStream,
+ EventsStreamCurrentStateRow,
+ EventsStreamEventRow,
+)
+from synapse.storage._base import SQLBaseStore
+from synapse.storage.database import DatabasePool
+from synapse.storage.engines import PostgresEngine
+from synapse.util.iterutils import batch_iter
+
+logger = logging.getLogger(__name__)
+
+
+# This is a special cache name we use to batch multiple invalidations of caches
+# based on the current state when notifying workers over replication.
+CURRENT_STATE_CACHE_NAME = "cs_cache_fake"
+
+
+class CacheInvalidationWorkerStore(SQLBaseStore):
+ def __init__(self, database: DatabasePool, db_conn, hs):
+ super().__init__(database, db_conn, hs)
+
+ self._instance_name = hs.get_instance_name()
+
+ async def get_all_updated_caches(
+ self, instance_name: str, last_id: int, current_id: int, limit: int
+ ) -> Tuple[List[Tuple[int, tuple]], int, bool]:
+ """Get updates for caches replication stream.
+
+ Args:
+ instance_name: The writer we want to fetch updates from. Unused
+ here since there is only ever one writer.
+ last_id: The token to fetch updates from. Exclusive.
+ current_id: The token to fetch updates up to. Inclusive.
+ limit: The requested limit for the number of rows to return. The
+ function may return more or fewer rows.
+
+ Returns:
+ A tuple consisting of: the updates, a token to use to fetch
+ subsequent updates, and whether we returned fewer rows than exists
+ between the requested tokens due to the limit.
+
+ The token returned can be used in a subsequent call to this
+ function to get further updatees.
+
+ The updates are a list of 2-tuples of stream ID and the row data
+ """
+
+ if last_id == current_id:
+ return [], current_id, False
+
+ def get_all_updated_caches_txn(txn):
+ # We purposefully don't bound by the current token, as we want to
+ # send across cache invalidations as quickly as possible. Cache
+ # invalidations are idempotent, so duplicates are fine.
+ sql = """
+ SELECT stream_id, cache_func, keys, invalidation_ts
+ FROM cache_invalidation_stream_by_instance
+ WHERE stream_id > ? AND instance_name = ?
+ ORDER BY stream_id ASC
+ LIMIT ?
+ """
+ txn.execute(sql, (last_id, instance_name, limit))
+ updates = [(row[0], row[1:]) for row in txn]
+ limited = False
+ upto_token = current_id
+ if len(updates) >= limit:
+ upto_token = updates[-1][0]
+ limited = True
+
+ return updates, upto_token, limited
+
+ return await self.db_pool.runInteraction(
+ "get_all_updated_caches", get_all_updated_caches_txn
+ )
+
+ def process_replication_rows(self, stream_name, instance_name, token, rows):
+ if stream_name == EventsStream.NAME:
+ for row in rows:
+ self._process_event_stream_row(token, row)
+ elif stream_name == BackfillStream.NAME:
+ for row in rows:
+ self._invalidate_caches_for_event(
+ -token,
+ row.event_id,
+ row.room_id,
+ row.type,
+ row.state_key,
+ row.redacts,
+ row.relates_to,
+ backfilled=True,
+ )
+ elif stream_name == CachesStream.NAME:
+ if self._cache_id_gen:
+ self._cache_id_gen.advance(instance_name, token)
+
+ for row in rows:
+ if row.cache_func == CURRENT_STATE_CACHE_NAME:
+ if row.keys is None:
+ raise Exception(
+ "Can't send an 'invalidate all' for current state cache"
+ )
+
+ room_id = row.keys[0]
+ members_changed = set(row.keys[1:])
+ self._invalidate_state_caches(room_id, members_changed)
+ else:
+ self._attempt_to_invalidate_cache(row.cache_func, row.keys)
+
+ super().process_replication_rows(stream_name, instance_name, token, rows)
+
+ def _process_event_stream_row(self, token, row):
+ data = row.data
+
+ if row.type == EventsStreamEventRow.TypeId:
+ self._invalidate_caches_for_event(
+ token,
+ data.event_id,
+ data.room_id,
+ data.type,
+ data.state_key,
+ data.redacts,
+ data.relates_to,
+ backfilled=False,
+ )
+ elif row.type == EventsStreamCurrentStateRow.TypeId:
+ self._curr_state_delta_stream_cache.entity_has_changed(
+ row.data.room_id, token
+ )
+
+ if data.type == EventTypes.Member:
+ self.get_rooms_for_user_with_stream_ordering.invalidate(
+ (data.state_key,)
+ )
+ else:
+ raise Exception("Unknown events stream row type %s" % (row.type,))
+
+ def _invalidate_caches_for_event(
+ self,
+ stream_ordering,
+ event_id,
+ room_id,
+ etype,
+ state_key,
+ redacts,
+ relates_to,
+ backfilled,
+ ):
+ self._invalidate_get_event_cache(event_id)
+
+ self.get_latest_event_ids_in_room.invalidate((room_id,))
+
+ self.get_unread_event_push_actions_by_room_for_user.invalidate_many((room_id,))
+
+ if not backfilled:
+ self._events_stream_cache.entity_has_changed(room_id, stream_ordering)
+
+ if redacts:
+ self._invalidate_get_event_cache(redacts)
+
+ if etype == EventTypes.Member:
+ self._membership_stream_cache.entity_has_changed(state_key, stream_ordering)
+ self.get_invited_rooms_for_local_user.invalidate((state_key,))
+
+ if relates_to:
+ self.get_relations_for_event.invalidate_many((relates_to,))
+ self.get_aggregation_groups_for_event.invalidate_many((relates_to,))
+ self.get_applicable_edit.invalidate((relates_to,))
+
+ async def invalidate_cache_and_stream(self, cache_name: str, keys: Tuple[Any, ...]):
+ """Invalidates the cache and adds it to the cache stream so slaves
+ will know to invalidate their caches.
+
+ This should only be used to invalidate caches where slaves won't
+ otherwise know from other replication streams that the cache should
+ be invalidated.
+ """
+ cache_func = getattr(self, cache_name, None)
+ if not cache_func:
+ return
+
+ cache_func.invalidate(keys)
+ await self.db_pool.runInteraction(
+ "invalidate_cache_and_stream",
+ self._send_invalidation_to_replication,
+ cache_func.__name__,
+ keys,
+ )
+
+ def _invalidate_cache_and_stream(self, txn, cache_func, keys):
+ """Invalidates the cache and adds it to the cache stream so slaves
+ will know to invalidate their caches.
+
+ This should only be used to invalidate caches where slaves won't
+ otherwise know from other replication streams that the cache should
+ be invalidated.
+ """
+ txn.call_after(cache_func.invalidate, keys)
+ self._send_invalidation_to_replication(txn, cache_func.__name__, keys)
+
+ def _invalidate_all_cache_and_stream(self, txn, cache_func):
+ """Invalidates the entire cache and adds it to the cache stream so slaves
+ will know to invalidate their caches.
+ """
+
+ txn.call_after(cache_func.invalidate_all)
+ self._send_invalidation_to_replication(txn, cache_func.__name__, None)
+
+ def _invalidate_state_caches_and_stream(self, txn, room_id, members_changed):
+ """Special case invalidation of caches based on current state.
+
+ We special case this so that we can batch the cache invalidations into a
+ single replication poke.
+
+ Args:
+ txn
+ room_id (str): Room where state changed
+ members_changed (iterable[str]): The user_ids of members that have changed
+ """
+ txn.call_after(self._invalidate_state_caches, room_id, members_changed)
+
+ if members_changed:
+ # We need to be careful that the size of the `members_changed` list
+ # isn't so large that it causes problems sending over replication, so we
+ # send them in chunks.
+ # Max line length is 16K, and max user ID length is 255, so 50 should
+ # be safe.
+ for chunk in batch_iter(members_changed, 50):
+ keys = itertools.chain([room_id], chunk)
+ self._send_invalidation_to_replication(
+ txn, CURRENT_STATE_CACHE_NAME, keys
+ )
+ else:
+ # if no members changed, we still need to invalidate the other caches.
+ self._send_invalidation_to_replication(
+ txn, CURRENT_STATE_CACHE_NAME, [room_id]
+ )
+
+ def _send_invalidation_to_replication(
+ self, txn, cache_name: str, keys: Optional[Iterable[Any]]
+ ):
+ """Notifies replication that given cache has been invalidated.
+
+ Note that this does *not* invalidate the cache locally.
+
+ Args:
+ txn
+ cache_name
+ keys: Entry to invalidate. If None will invalidate all.
+ """
+
+ if cache_name == CURRENT_STATE_CACHE_NAME and keys is None:
+ raise Exception(
+ "Can't stream invalidate all with magic current state cache"
+ )
+
+ if isinstance(self.database_engine, PostgresEngine):
+ # get_next() returns a context manager which is designed to wrap
+ # the transaction. However, we want to only get an ID when we want
+ # to use it, here, so we need to call __enter__ manually, and have
+ # __exit__ called after the transaction finishes.
+ stream_id = self._cache_id_gen.get_next_txn(txn)
+ txn.call_after(self.hs.get_notifier().on_new_replication_data)
+
+ if keys is not None:
+ keys = list(keys)
+
+ self.db_pool.simple_insert_txn(
+ txn,
+ table="cache_invalidation_stream_by_instance",
+ values={
+ "stream_id": stream_id,
+ "instance_name": self._instance_name,
+ "cache_func": cache_name,
+ "keys": keys,
+ "invalidation_ts": self.clock.time_msec(),
+ },
+ )
+
+ def get_cache_stream_token(self, instance_name):
+ if self._cache_id_gen:
+ return self._cache_id_gen.get_current_token(instance_name)
+ else:
+ return 0
diff --git a/synapse/storage/databases/main/censor_events.py b/synapse/storage/databases/main/censor_events.py
new file mode 100644
index 00000000..f211ddba
--- /dev/null
+++ b/synapse/storage/databases/main/censor_events.py
@@ -0,0 +1,207 @@
+# -*- coding: utf-8 -*-
+# Copyright 2020 The Matrix.org Foundation C.I.C.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import logging
+from typing import TYPE_CHECKING
+
+from synapse.events.utils import prune_event_dict
+from synapse.metrics.background_process_metrics import run_as_background_process
+from synapse.storage._base import SQLBaseStore
+from synapse.storage.database import DatabasePool
+from synapse.storage.databases.main.cache import CacheInvalidationWorkerStore
+from synapse.storage.databases.main.events import encode_json
+from synapse.storage.databases.main.events_worker import EventsWorkerStore
+
+if TYPE_CHECKING:
+ from synapse.server import HomeServer
+
+
+logger = logging.getLogger(__name__)
+
+
+class CensorEventsStore(EventsWorkerStore, CacheInvalidationWorkerStore, SQLBaseStore):
+ def __init__(self, database: DatabasePool, db_conn, hs: "HomeServer"):
+ super().__init__(database, db_conn, hs)
+
+ def _censor_redactions():
+ return run_as_background_process(
+ "_censor_redactions", self._censor_redactions
+ )
+
+ if self.hs.config.redaction_retention_period is not None:
+ hs.get_clock().looping_call(_censor_redactions, 5 * 60 * 1000)
+
+ async def _censor_redactions(self):
+ """Censors all redactions older than the configured period that haven't
+ been censored yet.
+
+ By censor we mean update the event_json table with the redacted event.
+ """
+
+ if self.hs.config.redaction_retention_period is None:
+ return
+
+ if not (
+ await self.db_pool.updates.has_completed_background_update(
+ "redactions_have_censored_ts_idx"
+ )
+ ):
+ # We don't want to run this until the appropriate index has been
+ # created.
+ return
+
+ before_ts = self._clock.time_msec() - self.hs.config.redaction_retention_period
+
+ # We fetch all redactions that:
+ # 1. point to an event we have,
+ # 2. has a received_ts from before the cut off, and
+ # 3. we haven't yet censored.
+ #
+ # This is limited to 100 events to ensure that we don't try and do too
+ # much at once. We'll get called again so this should eventually catch
+ # up.
+ sql = """
+ SELECT redactions.event_id, redacts FROM redactions
+ LEFT JOIN events AS original_event ON (
+ redacts = original_event.event_id
+ )
+ WHERE NOT have_censored
+ AND redactions.received_ts <= ?
+ ORDER BY redactions.received_ts ASC
+ LIMIT ?
+ """
+
+ rows = await self.db_pool.execute(
+ "_censor_redactions_fetch", None, sql, before_ts, 100
+ )
+
+ updates = []
+
+ for redaction_id, event_id in rows:
+ redaction_event = await self.get_event(redaction_id, allow_none=True)
+ original_event = await self.get_event(
+ event_id, allow_rejected=True, allow_none=True
+ )
+
+ # The SQL above ensures that we have both the redaction and
+ # original event, so if the `get_event` calls return None it
+ # means that the redaction wasn't allowed. Either way we know that
+ # the result won't change so we mark the fact that we've checked.
+ if (
+ redaction_event
+ and original_event
+ and original_event.internal_metadata.is_redacted()
+ ):
+ # Redaction was allowed
+ pruned_json = encode_json(
+ prune_event_dict(
+ original_event.room_version, original_event.get_dict()
+ )
+ )
+ else:
+ # Redaction wasn't allowed
+ pruned_json = None
+
+ updates.append((redaction_id, event_id, pruned_json))
+
+ def _update_censor_txn(txn):
+ for redaction_id, event_id, pruned_json in updates:
+ if pruned_json:
+ self._censor_event_txn(txn, event_id, pruned_json)
+
+ self.db_pool.simple_update_one_txn(
+ txn,
+ table="redactions",
+ keyvalues={"event_id": redaction_id},
+ updatevalues={"have_censored": True},
+ )
+
+ await self.db_pool.runInteraction("_update_censor_txn", _update_censor_txn)
+
+ def _censor_event_txn(self, txn, event_id, pruned_json):
+ """Censor an event by replacing its JSON in the event_json table with the
+ provided pruned JSON.
+
+ Args:
+ txn (LoggingTransaction): The database transaction.
+ event_id (str): The ID of the event to censor.
+ pruned_json (str): The pruned JSON
+ """
+ self.db_pool.simple_update_one_txn(
+ txn,
+ table="event_json",
+ keyvalues={"event_id": event_id},
+ updatevalues={"json": pruned_json},
+ )
+
+ async def expire_event(self, event_id: str) -> None:
+ """Retrieve and expire an event that has expired, and delete its associated
+ expiry timestamp. If the event can't be retrieved, delete its associated
+ timestamp so we don't try to expire it again in the future.
+
+ Args:
+ event_id: The ID of the event to delete.
+ """
+ # Try to retrieve the event's content from the database or the event cache.
+ event = await self.get_event(event_id)
+
+ def delete_expired_event_txn(txn):
+ # Delete the expiry timestamp associated with this event from the database.
+ self._delete_event_expiry_txn(txn, event_id)
+
+ if not event:
+ # If we can't find the event, log a warning and delete the expiry date
+ # from the database so that we don't try to expire it again in the
+ # future.
+ logger.warning(
+ "Can't expire event %s because we don't have it.", event_id
+ )
+ return
+
+ # Prune the event's dict then convert it to JSON.
+ pruned_json = encode_json(
+ prune_event_dict(event.room_version, event.get_dict())
+ )
+
+ # Update the event_json table to replace the event's JSON with the pruned
+ # JSON.
+ self._censor_event_txn(txn, event.event_id, pruned_json)
+
+ # We need to invalidate the event cache entry for this event because we
+ # changed its content in the database. We can't call
+ # self._invalidate_cache_and_stream because self.get_event_cache isn't of the
+ # right type.
+ txn.call_after(self._get_event_cache.invalidate, (event.event_id,))
+ # Send that invalidation to replication so that other workers also invalidate
+ # the event cache.
+ self._send_invalidation_to_replication(
+ txn, "_get_event_cache", (event.event_id,)
+ )
+
+ await self.db_pool.runInteraction(
+ "delete_expired_event", delete_expired_event_txn
+ )
+
+ def _delete_event_expiry_txn(self, txn, event_id):
+ """Delete the expiry timestamp associated with an event ID without deleting the
+ actual event.
+
+ Args:
+ txn (LoggingTransaction): The transaction to use to perform the deletion.
+ event_id (str): The event ID to delete the associated expiry timestamp of.
+ """
+ return self.db_pool.simple_delete_txn(
+ txn=txn, table="event_expiry", keyvalues={"event_id": event_id}
+ )
diff --git a/synapse/storage/databases/main/client_ips.py b/synapse/storage/databases/main/client_ips.py
new file mode 100644
index 00000000..216a5925
--- /dev/null
+++ b/synapse/storage/databases/main/client_ips.py
@@ -0,0 +1,573 @@
+# -*- coding: utf-8 -*-
+# Copyright 2016 OpenMarket Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import logging
+from typing import Dict, Optional, Tuple
+
+from synapse.metrics.background_process_metrics import wrap_as_background_process
+from synapse.storage._base import SQLBaseStore
+from synapse.storage.database import DatabasePool, make_tuple_comparison_clause
+from synapse.util.caches.descriptors import Cache
+
+logger = logging.getLogger(__name__)
+
+# Number of msec of granularity to store the user IP 'last seen' time. Smaller
+# times give more inserts into the database even for readonly API hits
+# 120 seconds == 2 minutes
+LAST_SEEN_GRANULARITY = 120 * 1000
+
+
+class ClientIpBackgroundUpdateStore(SQLBaseStore):
+ def __init__(self, database: DatabasePool, db_conn, hs):
+ super(ClientIpBackgroundUpdateStore, self).__init__(database, db_conn, hs)
+
+ self.db_pool.updates.register_background_index_update(
+ "user_ips_device_index",
+ index_name="user_ips_device_id",
+ table="user_ips",
+ columns=["user_id", "device_id", "last_seen"],
+ )
+
+ self.db_pool.updates.register_background_index_update(
+ "user_ips_last_seen_index",
+ index_name="user_ips_last_seen",
+ table="user_ips",
+ columns=["user_id", "last_seen"],
+ )
+
+ self.db_pool.updates.register_background_index_update(
+ "user_ips_last_seen_only_index",
+ index_name="user_ips_last_seen_only",
+ table="user_ips",
+ columns=["last_seen"],
+ )
+
+ self.db_pool.updates.register_background_update_handler(
+ "user_ips_analyze", self._analyze_user_ip
+ )
+
+ self.db_pool.updates.register_background_update_handler(
+ "user_ips_remove_dupes", self._remove_user_ip_dupes
+ )
+
+ # Register a unique index
+ self.db_pool.updates.register_background_index_update(
+ "user_ips_device_unique_index",
+ index_name="user_ips_user_token_ip_unique_index",
+ table="user_ips",
+ columns=["user_id", "access_token", "ip"],
+ unique=True,
+ )
+
+ # Drop the old non-unique index
+ self.db_pool.updates.register_background_update_handler(
+ "user_ips_drop_nonunique_index", self._remove_user_ip_nonunique
+ )
+
+ # Update the last seen info in devices.
+ self.db_pool.updates.register_background_update_handler(
+ "devices_last_seen", self._devices_last_seen_update
+ )
+
+ async def _remove_user_ip_nonunique(self, progress, batch_size):
+ def f(conn):
+ txn = conn.cursor()
+ txn.execute("DROP INDEX IF EXISTS user_ips_user_ip")
+ txn.close()
+
+ await self.db_pool.runWithConnection(f)
+ await self.db_pool.updates._end_background_update(
+ "user_ips_drop_nonunique_index"
+ )
+ return 1
+
+ async def _analyze_user_ip(self, progress, batch_size):
+ # Background update to analyze user_ips table before we run the
+ # deduplication background update. The table may not have been analyzed
+ # for ages due to the table locks.
+ #
+ # This will lock out the naive upserts to user_ips while it happens, but
+ # the analyze should be quick (28GB table takes ~10s)
+ def user_ips_analyze(txn):
+ txn.execute("ANALYZE user_ips")
+
+ await self.db_pool.runInteraction("user_ips_analyze", user_ips_analyze)
+
+ await self.db_pool.updates._end_background_update("user_ips_analyze")
+
+ return 1
+
+ async def _remove_user_ip_dupes(self, progress, batch_size):
+ # This works function works by scanning the user_ips table in batches
+ # based on `last_seen`. For each row in a batch it searches the rest of
+ # the table to see if there are any duplicates, if there are then they
+ # are removed and replaced with a suitable row.
+
+ # Fetch the start of the batch
+ begin_last_seen = progress.get("last_seen", 0)
+
+ def get_last_seen(txn):
+ txn.execute(
+ """
+ SELECT last_seen FROM user_ips
+ WHERE last_seen > ?
+ ORDER BY last_seen
+ LIMIT 1
+ OFFSET ?
+ """,
+ (begin_last_seen, batch_size),
+ )
+ row = txn.fetchone()
+ if row:
+ return row[0]
+ else:
+ return None
+
+ # Get a last seen that has roughly `batch_size` since `begin_last_seen`
+ end_last_seen = await self.db_pool.runInteraction(
+ "user_ips_dups_get_last_seen", get_last_seen
+ )
+
+ # If it returns None, then we're processing the last batch
+ last = end_last_seen is None
+
+ logger.info(
+ "Scanning for duplicate 'user_ips' rows in range: %s <= last_seen < %s",
+ begin_last_seen,
+ end_last_seen,
+ )
+
+ def remove(txn):
+ # This works by looking at all entries in the given time span, and
+ # then for each (user_id, access_token, ip) tuple in that range
+ # checking for any duplicates in the rest of the table (via a join).
+ # It then only returns entries which have duplicates, and the max
+ # last_seen across all duplicates, which can the be used to delete
+ # all other duplicates.
+ # It is efficient due to the existence of (user_id, access_token,
+ # ip) and (last_seen) indices.
+
+ # Define the search space, which requires handling the last batch in
+ # a different way
+ if last:
+ clause = "? <= last_seen"
+ args = (begin_last_seen,)
+ else:
+ clause = "? <= last_seen AND last_seen < ?"
+ args = (begin_last_seen, end_last_seen)
+
+ # (Note: The DISTINCT in the inner query is important to ensure that
+ # the COUNT(*) is accurate, otherwise double counting may happen due
+ # to the join effectively being a cross product)
+ txn.execute(
+ """
+ SELECT user_id, access_token, ip,
+ MAX(device_id), MAX(user_agent), MAX(last_seen),
+ COUNT(*)
+ FROM (
+ SELECT DISTINCT user_id, access_token, ip
+ FROM user_ips
+ WHERE {}
+ ) c
+ INNER JOIN user_ips USING (user_id, access_token, ip)
+ GROUP BY user_id, access_token, ip
+ HAVING count(*) > 1
+ """.format(
+ clause
+ ),
+ args,
+ )
+ res = txn.fetchall()
+
+ # We've got some duplicates
+ for i in res:
+ user_id, access_token, ip, device_id, user_agent, last_seen, count = i
+
+ # We want to delete the duplicates so we end up with only a
+ # single row.
+ #
+ # The naive way of doing this would be just to delete all rows
+ # and reinsert a constructed row. However, if there are a lot of
+ # duplicate rows this can cause the table to grow a lot, which
+ # can be problematic in two ways:
+ # 1. If user_ips is already large then this can cause the
+ # table to rapidly grow, potentially filling the disk.
+ # 2. Reinserting a lot of rows can confuse the table
+ # statistics for postgres, causing it to not use the
+ # correct indices for the query above, resulting in a full
+ # table scan. This is incredibly slow for large tables and
+ # can kill database performance. (This seems to mainly
+ # happen for the last query where the clause is simply `? <
+ # last_seen`)
+ #
+ # So instead we want to delete all but *one* of the duplicate
+ # rows. That is hard to do reliably, so we cheat and do a two
+ # step process:
+ # 1. Delete all rows with a last_seen strictly less than the
+ # max last_seen. This hopefully results in deleting all but
+ # one row the majority of the time, but there may be
+ # duplicate last_seen
+ # 2. If multiple rows remain, we fall back to the naive method
+ # and simply delete all rows and reinsert.
+ #
+ # Note that this relies on no new duplicate rows being inserted,
+ # but if that is happening then this entire process is futile
+ # anyway.
+
+ # Do step 1:
+
+ txn.execute(
+ """
+ DELETE FROM user_ips
+ WHERE user_id = ? AND access_token = ? AND ip = ? AND last_seen < ?
+ """,
+ (user_id, access_token, ip, last_seen),
+ )
+ if txn.rowcount == count - 1:
+ # We deleted all but one of the duplicate rows, i.e. there
+ # is exactly one remaining and so there is nothing left to
+ # do.
+ continue
+ elif txn.rowcount >= count:
+ raise Exception(
+ "We deleted more duplicate rows from 'user_ips' than expected"
+ )
+
+ # The previous step didn't delete enough rows, so we fallback to
+ # step 2:
+
+ # Drop all the duplicates
+ txn.execute(
+ """
+ DELETE FROM user_ips
+ WHERE user_id = ? AND access_token = ? AND ip = ?
+ """,
+ (user_id, access_token, ip),
+ )
+
+ # Add in one to be the last_seen
+ txn.execute(
+ """
+ INSERT INTO user_ips
+ (user_id, access_token, ip, device_id, user_agent, last_seen)
+ VALUES (?, ?, ?, ?, ?, ?)
+ """,
+ (user_id, access_token, ip, device_id, user_agent, last_seen),
+ )
+
+ self.db_pool.updates._background_update_progress_txn(
+ txn, "user_ips_remove_dupes", {"last_seen": end_last_seen}
+ )
+
+ await self.db_pool.runInteraction("user_ips_dups_remove", remove)
+
+ if last:
+ await self.db_pool.updates._end_background_update("user_ips_remove_dupes")
+
+ return batch_size
+
+ async def _devices_last_seen_update(self, progress, batch_size):
+ """Background update to insert last seen info into devices table
+ """
+
+ last_user_id = progress.get("last_user_id", "")
+ last_device_id = progress.get("last_device_id", "")
+
+ def _devices_last_seen_update_txn(txn):
+ # This consists of two queries:
+ #
+ # 1. The sub-query searches for the next N devices and joins
+ # against user_ips to find the max last_seen associated with
+ # that device.
+ # 2. The outer query then joins again against user_ips on
+ # user/device/last_seen. This *should* hopefully only
+ # return one row, but if it does return more than one then
+ # we'll just end up updating the same device row multiple
+ # times, which is fine.
+
+ where_clause, where_args = make_tuple_comparison_clause(
+ self.database_engine,
+ [("user_id", last_user_id), ("device_id", last_device_id)],
+ )
+
+ sql = """
+ SELECT
+ last_seen, ip, user_agent, user_id, device_id
+ FROM (
+ SELECT
+ user_id, device_id, MAX(u.last_seen) AS last_seen
+ FROM devices
+ INNER JOIN user_ips AS u USING (user_id, device_id)
+ WHERE %(where_clause)s
+ GROUP BY user_id, device_id
+ ORDER BY user_id ASC, device_id ASC
+ LIMIT ?
+ ) c
+ INNER JOIN user_ips AS u USING (user_id, device_id, last_seen)
+ """ % {
+ "where_clause": where_clause
+ }
+ txn.execute(sql, where_args + [batch_size])
+
+ rows = txn.fetchall()
+ if not rows:
+ return 0
+
+ sql = """
+ UPDATE devices
+ SET last_seen = ?, ip = ?, user_agent = ?
+ WHERE user_id = ? AND device_id = ?
+ """
+ txn.execute_batch(sql, rows)
+
+ _, _, _, user_id, device_id = rows[-1]
+ self.db_pool.updates._background_update_progress_txn(
+ txn,
+ "devices_last_seen",
+ {"last_user_id": user_id, "last_device_id": device_id},
+ )
+
+ return len(rows)
+
+ updated = await self.db_pool.runInteraction(
+ "_devices_last_seen_update", _devices_last_seen_update_txn
+ )
+
+ if not updated:
+ await self.db_pool.updates._end_background_update("devices_last_seen")
+
+ return updated
+
+
+class ClientIpStore(ClientIpBackgroundUpdateStore):
+ def __init__(self, database: DatabasePool, db_conn, hs):
+
+ self.client_ip_last_seen = Cache(
+ name="client_ip_last_seen", keylen=4, max_entries=50000
+ )
+
+ super(ClientIpStore, self).__init__(database, db_conn, hs)
+
+ self.user_ips_max_age = hs.config.user_ips_max_age
+
+ # (user_id, access_token, ip,) -> (user_agent, device_id, last_seen)
+ self._batch_row_update = {}
+
+ self._client_ip_looper = self._clock.looping_call(
+ self._update_client_ips_batch, 5 * 1000
+ )
+ self.hs.get_reactor().addSystemEventTrigger(
+ "before", "shutdown", self._update_client_ips_batch
+ )
+
+ if self.user_ips_max_age:
+ self._clock.looping_call(self._prune_old_user_ips, 5 * 1000)
+
+ async def insert_client_ip(
+ self, user_id, access_token, ip, user_agent, device_id, now=None
+ ):
+ if not now:
+ now = int(self._clock.time_msec())
+ key = (user_id, access_token, ip)
+
+ try:
+ last_seen = self.client_ip_last_seen.get(key)
+ except KeyError:
+ last_seen = None
+ await self.populate_monthly_active_users(user_id)
+ # Rate-limited inserts
+ if last_seen is not None and (now - last_seen) < LAST_SEEN_GRANULARITY:
+ return
+
+ self.client_ip_last_seen.prefill(key, now)
+
+ self._batch_row_update[key] = (user_agent, device_id, now)
+
+ @wrap_as_background_process("update_client_ips")
+ def _update_client_ips_batch(self):
+
+ # If the DB pool has already terminated, don't try updating
+ if not self.db_pool.is_running():
+ return
+
+ to_update = self._batch_row_update
+ self._batch_row_update = {}
+
+ return self.db_pool.runInteraction(
+ "_update_client_ips_batch", self._update_client_ips_batch_txn, to_update
+ )
+
+ def _update_client_ips_batch_txn(self, txn, to_update):
+ if "user_ips" in self.db_pool._unsafe_to_upsert_tables or (
+ not self.database_engine.can_native_upsert
+ ):
+ self.database_engine.lock_table(txn, "user_ips")
+
+ for entry in to_update.items():
+ (user_id, access_token, ip), (user_agent, device_id, last_seen) = entry
+
+ try:
+ self.db_pool.simple_upsert_txn(
+ txn,
+ table="user_ips",
+ keyvalues={
+ "user_id": user_id,
+ "access_token": access_token,
+ "ip": ip,
+ },
+ values={
+ "user_agent": user_agent,
+ "device_id": device_id,
+ "last_seen": last_seen,
+ },
+ lock=False,
+ )
+
+ # Technically an access token might not be associated with
+ # a device so we need to check.
+ if device_id:
+ # this is always an update rather than an upsert: the row should
+ # already exist, and if it doesn't, that may be because it has been
+ # deleted, and we don't want to re-create it.
+ self.db_pool.simple_update_txn(
+ txn,
+ table="devices",
+ keyvalues={"user_id": user_id, "device_id": device_id},
+ updatevalues={
+ "user_agent": user_agent,
+ "last_seen": last_seen,
+ "ip": ip,
+ },
+ )
+ except Exception as e:
+ # Failed to upsert, log and continue
+ logger.error("Failed to insert client IP %r: %r", entry, e)
+
+ async def get_last_client_ip_by_device(
+ self, user_id: str, device_id: Optional[str]
+ ) -> Dict[Tuple[str, str], dict]:
+ """For each device_id listed, give the user_ip it was last seen on
+
+ Args:
+ user_id: The user to fetch devices for.
+ device_id: If None fetches all devices for the user
+
+ Returns:
+ A dictionary mapping a tuple of (user_id, device_id) to dicts, with
+ keys giving the column names from the devices table.
+ """
+
+ keyvalues = {"user_id": user_id}
+ if device_id is not None:
+ keyvalues["device_id"] = device_id
+
+ res = await self.db_pool.simple_select_list(
+ table="devices",
+ keyvalues=keyvalues,
+ retcols=("user_id", "ip", "user_agent", "device_id", "last_seen"),
+ )
+
+ ret = {(d["user_id"], d["device_id"]): d for d in res}
+ for key in self._batch_row_update:
+ uid, access_token, ip = key
+ if uid == user_id:
+ user_agent, did, last_seen = self._batch_row_update[key]
+ if not device_id or did == device_id:
+ ret[(user_id, device_id)] = {
+ "user_id": user_id,
+ "access_token": access_token,
+ "ip": ip,
+ "user_agent": user_agent,
+ "device_id": did,
+ "last_seen": last_seen,
+ }
+ return ret
+
+ async def get_user_ip_and_agents(self, user):
+ user_id = user.to_string()
+ results = {}
+
+ for key in self._batch_row_update:
+ uid, access_token, ip, = key
+ if uid == user_id:
+ user_agent, _, last_seen = self._batch_row_update[key]
+ results[(access_token, ip)] = (user_agent, last_seen)
+
+ rows = await self.db_pool.simple_select_list(
+ table="user_ips",
+ keyvalues={"user_id": user_id},
+ retcols=["access_token", "ip", "user_agent", "last_seen"],
+ desc="get_user_ip_and_agents",
+ )
+
+ results.update(
+ ((row["access_token"], row["ip"]), (row["user_agent"], row["last_seen"]))
+ for row in rows
+ )
+ return [
+ {
+ "access_token": access_token,
+ "ip": ip,
+ "user_agent": user_agent,
+ "last_seen": last_seen,
+ }
+ for (access_token, ip), (user_agent, last_seen) in results.items()
+ ]
+
+ @wrap_as_background_process("prune_old_user_ips")
+ async def _prune_old_user_ips(self):
+ """Removes entries in user IPs older than the configured period.
+ """
+
+ if self.user_ips_max_age is None:
+ # Nothing to do
+ return
+
+ if not await self.db_pool.updates.has_completed_background_update(
+ "devices_last_seen"
+ ):
+ # Only start pruning if we have finished populating the devices
+ # last seen info.
+ return
+
+ # We do a slightly funky SQL delete to ensure we don't try and delete
+ # too much at once (as the table may be very large from before we
+ # started pruning).
+ #
+ # This works by finding the max last_seen that is less than the given
+ # time, but has no more than N rows before it, deleting all rows with
+ # a lesser last_seen time. (We COALESCE so that the sub-SELECT always
+ # returns exactly one row).
+ sql = """
+ DELETE FROM user_ips
+ WHERE last_seen <= (
+ SELECT COALESCE(MAX(last_seen), -1)
+ FROM (
+ SELECT last_seen FROM user_ips
+ WHERE last_seen <= ?
+ ORDER BY last_seen ASC
+ LIMIT 5000
+ ) AS u
+ )
+ """
+
+ timestamp = self.clock.time_msec() - self.user_ips_max_age
+
+ def _prune_old_user_ips_txn(txn):
+ txn.execute(sql, (timestamp,))
+
+ await self.db_pool.runInteraction(
+ "_prune_old_user_ips", _prune_old_user_ips_txn
+ )
diff --git a/synapse/storage/databases/main/deviceinbox.py b/synapse/storage/databases/main/deviceinbox.py
new file mode 100644
index 00000000..1f6e995c
--- /dev/null
+++ b/synapse/storage/databases/main/deviceinbox.py
@@ -0,0 +1,479 @@
+# -*- coding: utf-8 -*-
+# Copyright 2016 OpenMarket Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import logging
+from typing import List, Tuple
+
+from synapse.logging.opentracing import log_kv, set_tag, trace
+from synapse.storage._base import SQLBaseStore, db_to_json, make_in_list_sql_clause
+from synapse.storage.database import DatabasePool
+from synapse.util import json_encoder
+from synapse.util.caches.expiringcache import ExpiringCache
+
+logger = logging.getLogger(__name__)
+
+
+class DeviceInboxWorkerStore(SQLBaseStore):
+ def get_to_device_stream_token(self):
+ return self._device_inbox_id_gen.get_current_token()
+
+ async def get_new_messages_for_device(
+ self,
+ user_id: str,
+ device_id: str,
+ last_stream_id: int,
+ current_stream_id: int,
+ limit: int = 100,
+ ) -> Tuple[List[dict], int]:
+ """
+ Args:
+ user_id: The recipient user_id.
+ device_id: The recipient device_id.
+ last_stream_id: The last stream ID checked.
+ current_stream_id: The current position of the to device
+ message stream.
+ limit: The maximum number of messages to retrieve.
+
+ Returns:
+ A list of messages for the device and where in the stream the messages got to.
+ """
+ has_changed = self._device_inbox_stream_cache.has_entity_changed(
+ user_id, last_stream_id
+ )
+ if not has_changed:
+ return ([], current_stream_id)
+
+ def get_new_messages_for_device_txn(txn):
+ sql = (
+ "SELECT stream_id, message_json FROM device_inbox"
+ " WHERE user_id = ? AND device_id = ?"
+ " AND ? < stream_id AND stream_id <= ?"
+ " ORDER BY stream_id ASC"
+ " LIMIT ?"
+ )
+ txn.execute(
+ sql, (user_id, device_id, last_stream_id, current_stream_id, limit)
+ )
+ messages = []
+ for row in txn:
+ stream_pos = row[0]
+ messages.append(db_to_json(row[1]))
+ if len(messages) < limit:
+ stream_pos = current_stream_id
+ return messages, stream_pos
+
+ return await self.db_pool.runInteraction(
+ "get_new_messages_for_device", get_new_messages_for_device_txn
+ )
+
+ @trace
+ async def delete_messages_for_device(
+ self, user_id: str, device_id: str, up_to_stream_id: int
+ ) -> int:
+ """
+ Args:
+ user_id: The recipient user_id.
+ device_id: The recipient device_id.
+ up_to_stream_id: Where to delete messages up to.
+
+ Returns:
+ The number of messages deleted.
+ """
+ # If we have cached the last stream id we've deleted up to, we can
+ # check if there is likely to be anything that needs deleting
+ last_deleted_stream_id = self._last_device_delete_cache.get(
+ (user_id, device_id), None
+ )
+
+ set_tag("last_deleted_stream_id", last_deleted_stream_id)
+
+ if last_deleted_stream_id:
+ has_changed = self._device_inbox_stream_cache.has_entity_changed(
+ user_id, last_deleted_stream_id
+ )
+ if not has_changed:
+ log_kv({"message": "No changes in cache since last check"})
+ return 0
+
+ def delete_messages_for_device_txn(txn):
+ sql = (
+ "DELETE FROM device_inbox"
+ " WHERE user_id = ? AND device_id = ?"
+ " AND stream_id <= ?"
+ )
+ txn.execute(sql, (user_id, device_id, up_to_stream_id))
+ return txn.rowcount
+
+ count = await self.db_pool.runInteraction(
+ "delete_messages_for_device", delete_messages_for_device_txn
+ )
+
+ log_kv(
+ {"message": "deleted {} messages for device".format(count), "count": count}
+ )
+
+ # Update the cache, ensuring that we only ever increase the value
+ last_deleted_stream_id = self._last_device_delete_cache.get(
+ (user_id, device_id), 0
+ )
+ self._last_device_delete_cache[(user_id, device_id)] = max(
+ last_deleted_stream_id, up_to_stream_id
+ )
+
+ return count
+
+ @trace
+ async def get_new_device_msgs_for_remote(
+ self, destination, last_stream_id, current_stream_id, limit
+ ) -> Tuple[List[dict], int]:
+ """
+ Args:
+ destination(str): The name of the remote server.
+ last_stream_id(int|long): The last position of the device message stream
+ that the server sent up to.
+ current_stream_id(int|long): The current position of the device
+ message stream.
+ Returns:
+ A list of messages for the device and where in the stream the messages got to.
+ """
+
+ set_tag("destination", destination)
+ set_tag("last_stream_id", last_stream_id)
+ set_tag("current_stream_id", current_stream_id)
+ set_tag("limit", limit)
+
+ has_changed = self._device_federation_outbox_stream_cache.has_entity_changed(
+ destination, last_stream_id
+ )
+ if not has_changed or last_stream_id == current_stream_id:
+ log_kv({"message": "No new messages in stream"})
+ return ([], current_stream_id)
+
+ if limit <= 0:
+ # This can happen if we run out of room for EDUs in the transaction.
+ return ([], last_stream_id)
+
+ @trace
+ def get_new_messages_for_remote_destination_txn(txn):
+ sql = (
+ "SELECT stream_id, messages_json FROM device_federation_outbox"
+ " WHERE destination = ?"
+ " AND ? < stream_id AND stream_id <= ?"
+ " ORDER BY stream_id ASC"
+ " LIMIT ?"
+ )
+ txn.execute(sql, (destination, last_stream_id, current_stream_id, limit))
+ messages = []
+ for row in txn:
+ stream_pos = row[0]
+ messages.append(db_to_json(row[1]))
+ if len(messages) < limit:
+ log_kv({"message": "Set stream position to current position"})
+ stream_pos = current_stream_id
+ return messages, stream_pos
+
+ return await self.db_pool.runInteraction(
+ "get_new_device_msgs_for_remote",
+ get_new_messages_for_remote_destination_txn,
+ )
+
+ @trace
+ def delete_device_msgs_for_remote(self, destination, up_to_stream_id):
+ """Used to delete messages when the remote destination acknowledges
+ their receipt.
+
+ Args:
+ destination(str): The destination server_name
+ up_to_stream_id(int): Where to delete messages up to.
+ Returns:
+ A deferred that resolves when the messages have been deleted.
+ """
+
+ def delete_messages_for_remote_destination_txn(txn):
+ sql = (
+ "DELETE FROM device_federation_outbox"
+ " WHERE destination = ?"
+ " AND stream_id <= ?"
+ )
+ txn.execute(sql, (destination, up_to_stream_id))
+
+ return self.db_pool.runInteraction(
+ "delete_device_msgs_for_remote", delete_messages_for_remote_destination_txn
+ )
+
+ async def get_all_new_device_messages(
+ self, instance_name: str, last_id: int, current_id: int, limit: int
+ ) -> Tuple[List[Tuple[int, tuple]], int, bool]:
+ """Get updates for to device replication stream.
+
+ Args:
+ instance_name: The writer we want to fetch updates from. Unused
+ here since there is only ever one writer.
+ last_id: The token to fetch updates from. Exclusive.
+ current_id: The token to fetch updates up to. Inclusive.
+ limit: The requested limit for the number of rows to return. The
+ function may return more or fewer rows.
+
+ Returns:
+ A tuple consisting of: the updates, a token to use to fetch
+ subsequent updates, and whether we returned fewer rows than exists
+ between the requested tokens due to the limit.
+
+ The token returned can be used in a subsequent call to this
+ function to get further updatees.
+
+ The updates are a list of 2-tuples of stream ID and the row data
+ """
+
+ if last_id == current_id:
+ return [], current_id, False
+
+ def get_all_new_device_messages_txn(txn):
+ # We limit like this as we might have multiple rows per stream_id, and
+ # we want to make sure we always get all entries for any stream_id
+ # we return.
+ upper_pos = min(current_id, last_id + limit)
+ sql = (
+ "SELECT max(stream_id), user_id"
+ " FROM device_inbox"
+ " WHERE ? < stream_id AND stream_id <= ?"
+ " GROUP BY user_id"
+ )
+ txn.execute(sql, (last_id, upper_pos))
+ updates = [(row[0], row[1:]) for row in txn]
+
+ sql = (
+ "SELECT max(stream_id), destination"
+ " FROM device_federation_outbox"
+ " WHERE ? < stream_id AND stream_id <= ?"
+ " GROUP BY destination"
+ )
+ txn.execute(sql, (last_id, upper_pos))
+ updates.extend((row[0], row[1:]) for row in txn)
+
+ # Order by ascending stream ordering
+ updates.sort()
+
+ limited = False
+ upto_token = current_id
+ if len(updates) >= limit:
+ upto_token = updates[-1][0]
+ limited = True
+
+ return updates, upto_token, limited
+
+ return await self.db_pool.runInteraction(
+ "get_all_new_device_messages", get_all_new_device_messages_txn
+ )
+
+
+class DeviceInboxBackgroundUpdateStore(SQLBaseStore):
+ DEVICE_INBOX_STREAM_ID = "device_inbox_stream_drop"
+
+ def __init__(self, database: DatabasePool, db_conn, hs):
+ super(DeviceInboxBackgroundUpdateStore, self).__init__(database, db_conn, hs)
+
+ self.db_pool.updates.register_background_index_update(
+ "device_inbox_stream_index",
+ index_name="device_inbox_stream_id_user_id",
+ table="device_inbox",
+ columns=["stream_id", "user_id"],
+ )
+
+ self.db_pool.updates.register_background_update_handler(
+ self.DEVICE_INBOX_STREAM_ID, self._background_drop_index_device_inbox
+ )
+
+ async def _background_drop_index_device_inbox(self, progress, batch_size):
+ def reindex_txn(conn):
+ txn = conn.cursor()
+ txn.execute("DROP INDEX IF EXISTS device_inbox_stream_id")
+ txn.close()
+
+ await self.db_pool.runWithConnection(reindex_txn)
+
+ await self.db_pool.updates._end_background_update(self.DEVICE_INBOX_STREAM_ID)
+
+ return 1
+
+
+class DeviceInboxStore(DeviceInboxWorkerStore, DeviceInboxBackgroundUpdateStore):
+ DEVICE_INBOX_STREAM_ID = "device_inbox_stream_drop"
+
+ def __init__(self, database: DatabasePool, db_conn, hs):
+ super(DeviceInboxStore, self).__init__(database, db_conn, hs)
+
+ # Map of (user_id, device_id) to the last stream_id that has been
+ # deleted up to. This is so that we can no op deletions.
+ self._last_device_delete_cache = ExpiringCache(
+ cache_name="last_device_delete_cache",
+ clock=self._clock,
+ max_len=10000,
+ expiry_ms=30 * 60 * 1000,
+ )
+
+ @trace
+ async def add_messages_to_device_inbox(
+ self,
+ local_messages_by_user_then_device: dict,
+ remote_messages_by_destination: dict,
+ ) -> int:
+ """Used to send messages from this server.
+
+ Args:
+ local_messages_by_user_and_device:
+ Dictionary of user_id to device_id to message.
+ remote_messages_by_destination:
+ Dictionary of destination server_name to the EDU JSON to send.
+
+ Returns:
+ The new stream_id.
+ """
+
+ def add_messages_txn(txn, now_ms, stream_id):
+ # Add the local messages directly to the local inbox.
+ self._add_messages_to_local_device_inbox_txn(
+ txn, stream_id, local_messages_by_user_then_device
+ )
+
+ # Add the remote messages to the federation outbox.
+ # We'll send them to a remote server when we next send a
+ # federation transaction to that destination.
+ sql = (
+ "INSERT INTO device_federation_outbox"
+ " (destination, stream_id, queued_ts, messages_json)"
+ " VALUES (?,?,?,?)"
+ )
+ rows = []
+ for destination, edu in remote_messages_by_destination.items():
+ edu_json = json_encoder.encode(edu)
+ rows.append((destination, stream_id, now_ms, edu_json))
+ txn.executemany(sql, rows)
+
+ with self._device_inbox_id_gen.get_next() as stream_id:
+ now_ms = self.clock.time_msec()
+ await self.db_pool.runInteraction(
+ "add_messages_to_device_inbox", add_messages_txn, now_ms, stream_id
+ )
+ for user_id in local_messages_by_user_then_device.keys():
+ self._device_inbox_stream_cache.entity_has_changed(user_id, stream_id)
+ for destination in remote_messages_by_destination.keys():
+ self._device_federation_outbox_stream_cache.entity_has_changed(
+ destination, stream_id
+ )
+
+ return self._device_inbox_id_gen.get_current_token()
+
+ async def add_messages_from_remote_to_device_inbox(
+ self, origin: str, message_id: str, local_messages_by_user_then_device: dict
+ ) -> int:
+ def add_messages_txn(txn, now_ms, stream_id):
+ # Check if we've already inserted a matching message_id for that
+ # origin. This can happen if the origin doesn't receive our
+ # acknowledgement from the first time we received the message.
+ already_inserted = self.db_pool.simple_select_one_txn(
+ txn,
+ table="device_federation_inbox",
+ keyvalues={"origin": origin, "message_id": message_id},
+ retcols=("message_id",),
+ allow_none=True,
+ )
+ if already_inserted is not None:
+ return
+
+ # Add an entry for this message_id so that we know we've processed
+ # it.
+ self.db_pool.simple_insert_txn(
+ txn,
+ table="device_federation_inbox",
+ values={
+ "origin": origin,
+ "message_id": message_id,
+ "received_ts": now_ms,
+ },
+ )
+
+ # Add the messages to the approriate local device inboxes so that
+ # they'll be sent to the devices when they next sync.
+ self._add_messages_to_local_device_inbox_txn(
+ txn, stream_id, local_messages_by_user_then_device
+ )
+
+ with self._device_inbox_id_gen.get_next() as stream_id:
+ now_ms = self.clock.time_msec()
+ await self.db_pool.runInteraction(
+ "add_messages_from_remote_to_device_inbox",
+ add_messages_txn,
+ now_ms,
+ stream_id,
+ )
+ for user_id in local_messages_by_user_then_device.keys():
+ self._device_inbox_stream_cache.entity_has_changed(user_id, stream_id)
+
+ return stream_id
+
+ def _add_messages_to_local_device_inbox_txn(
+ self, txn, stream_id, messages_by_user_then_device
+ ):
+ local_by_user_then_device = {}
+ for user_id, messages_by_device in messages_by_user_then_device.items():
+ messages_json_for_user = {}
+ devices = list(messages_by_device.keys())
+ if len(devices) == 1 and devices[0] == "*":
+ # Handle wildcard device_ids.
+ sql = "SELECT device_id FROM devices WHERE user_id = ?"
+ txn.execute(sql, (user_id,))
+ message_json = json_encoder.encode(messages_by_device["*"])
+ for row in txn:
+ # Add the message for all devices for this user on this
+ # server.
+ device = row[0]
+ messages_json_for_user[device] = message_json
+ else:
+ if not devices:
+ continue
+
+ clause, args = make_in_list_sql_clause(
+ txn.database_engine, "device_id", devices
+ )
+ sql = "SELECT device_id FROM devices WHERE user_id = ? AND " + clause
+
+ # TODO: Maybe this needs to be done in batches if there are
+ # too many local devices for a given user.
+ txn.execute(sql, [user_id] + list(args))
+ for row in txn:
+ # Only insert into the local inbox if the device exists on
+ # this server
+ device = row[0]
+ message_json = json_encoder.encode(messages_by_device[device])
+ messages_json_for_user[device] = message_json
+
+ if messages_json_for_user:
+ local_by_user_then_device[user_id] = messages_json_for_user
+
+ if not local_by_user_then_device:
+ return
+
+ sql = (
+ "INSERT INTO device_inbox"
+ " (user_id, device_id, stream_id, message_json)"
+ " VALUES (?,?,?,?)"
+ )
+ rows = []
+ for user_id, messages_by_device in local_by_user_then_device.items():
+ for device_id, message_json in messages_by_device.items():
+ rows.append((user_id, device_id, stream_id, message_json))
+
+ txn.executemany(sql, rows)
diff --git a/synapse/storage/databases/main/devices.py b/synapse/storage/databases/main/devices.py
new file mode 100644
index 00000000..2b330604
--- /dev/null
+++ b/synapse/storage/databases/main/devices.py
@@ -0,0 +1,1339 @@
+# -*- coding: utf-8 -*-
+# Copyright 2016 OpenMarket Ltd
+# Copyright 2019 New Vector Ltd
+# Copyright 2019 The Matrix.org Foundation C.I.C.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import logging
+from typing import Dict, Iterable, List, Optional, Set, Tuple
+
+from synapse.api.errors import Codes, StoreError
+from synapse.logging.opentracing import (
+ get_active_span_text_map,
+ set_tag,
+ trace,
+ whitelisted_homeserver,
+)
+from synapse.metrics.background_process_metrics import run_as_background_process
+from synapse.storage._base import SQLBaseStore, db_to_json, make_in_list_sql_clause
+from synapse.storage.database import (
+ DatabasePool,
+ LoggingTransaction,
+ make_tuple_comparison_clause,
+)
+from synapse.types import Collection, JsonDict, get_verify_key_from_cross_signing_key
+from synapse.util import json_encoder
+from synapse.util.caches.descriptors import Cache, cached, cachedList
+from synapse.util.iterutils import batch_iter
+from synapse.util.stringutils import shortstr
+
+logger = logging.getLogger(__name__)
+
+DROP_DEVICE_LIST_STREAMS_NON_UNIQUE_INDEXES = (
+ "drop_device_list_streams_non_unique_indexes"
+)
+
+BG_UPDATE_REMOVE_DUP_OUTBOUND_POKES = "remove_dup_outbound_pokes"
+
+
+class DeviceWorkerStore(SQLBaseStore):
+ def get_device(self, user_id: str, device_id: str):
+ """Retrieve a device. Only returns devices that are not marked as
+ hidden.
+
+ Args:
+ user_id: The ID of the user which owns the device
+ device_id: The ID of the device to retrieve
+ Returns:
+ defer.Deferred for a dict containing the device information
+ Raises:
+ StoreError: if the device is not found
+ """
+ return self.db_pool.simple_select_one(
+ table="devices",
+ keyvalues={"user_id": user_id, "device_id": device_id, "hidden": False},
+ retcols=("user_id", "device_id", "display_name"),
+ desc="get_device",
+ )
+
+ async def get_devices_by_user(self, user_id: str) -> Dict[str, Dict[str, str]]:
+ """Retrieve all of a user's registered devices. Only returns devices
+ that are not marked as hidden.
+
+ Args:
+ user_id:
+ Returns:
+ A mapping from device_id to a dict containing "device_id", "user_id"
+ and "display_name" for each device.
+ """
+ devices = await self.db_pool.simple_select_list(
+ table="devices",
+ keyvalues={"user_id": user_id, "hidden": False},
+ retcols=("user_id", "device_id", "display_name"),
+ desc="get_devices_by_user",
+ )
+
+ return {d["device_id"]: d for d in devices}
+
+ @trace
+ async def get_device_updates_by_remote(
+ self, destination: str, from_stream_id: int, limit: int
+ ) -> Tuple[int, List[Tuple[str, dict]]]:
+ """Get a stream of device updates to send to the given remote server.
+
+ Args:
+ destination: The host the device updates are intended for
+ from_stream_id: The minimum stream_id to filter updates by, exclusive
+ limit: Maximum number of device updates to return
+
+ Returns:
+ A mapping from the current stream id (ie, the stream id of the last
+ update included in the response), and the list of updates, where
+ each update is a pair of EDU type and EDU contents.
+ """
+ now_stream_id = self._device_list_id_gen.get_current_token()
+
+ has_changed = self._device_list_federation_stream_cache.has_entity_changed(
+ destination, int(from_stream_id)
+ )
+ if not has_changed:
+ return now_stream_id, []
+
+ updates = await self.db_pool.runInteraction(
+ "get_device_updates_by_remote",
+ self._get_device_updates_by_remote_txn,
+ destination,
+ from_stream_id,
+ now_stream_id,
+ limit,
+ )
+
+ # Return an empty list if there are no updates
+ if not updates:
+ return now_stream_id, []
+
+ # get the cross-signing keys of the users in the list, so that we can
+ # determine which of the device changes were cross-signing keys
+ users = {r[0] for r in updates}
+ master_key_by_user = {}
+ self_signing_key_by_user = {}
+ for user in users:
+ cross_signing_key = await self.get_e2e_cross_signing_key(user, "master")
+ if cross_signing_key:
+ key_id, verify_key = get_verify_key_from_cross_signing_key(
+ cross_signing_key
+ )
+ # verify_key is a VerifyKey from signedjson, which uses
+ # .version to denote the portion of the key ID after the
+ # algorithm and colon, which is the device ID
+ master_key_by_user[user] = {
+ "key_info": cross_signing_key,
+ "device_id": verify_key.version,
+ }
+
+ cross_signing_key = await self.get_e2e_cross_signing_key(
+ user, "self_signing"
+ )
+ if cross_signing_key:
+ key_id, verify_key = get_verify_key_from_cross_signing_key(
+ cross_signing_key
+ )
+ self_signing_key_by_user[user] = {
+ "key_info": cross_signing_key,
+ "device_id": verify_key.version,
+ }
+
+ # Perform the equivalent of a GROUP BY
+ #
+ # Iterate through the updates list and copy non-duplicate
+ # (user_id, device_id) entries into a map, with the value being
+ # the max stream_id across each set of duplicate entries
+ #
+ # maps (user_id, device_id) -> (stream_id, opentracing_context)
+ #
+ # opentracing_context contains the opentracing metadata for the request
+ # that created the poke
+ #
+ # The most recent request's opentracing_context is used as the
+ # context which created the Edu.
+
+ query_map = {}
+ cross_signing_keys_by_user = {}
+ for user_id, device_id, update_stream_id, update_context in updates:
+ if (
+ user_id in master_key_by_user
+ and device_id == master_key_by_user[user_id]["device_id"]
+ ):
+ result = cross_signing_keys_by_user.setdefault(user_id, {})
+ result["master_key"] = master_key_by_user[user_id]["key_info"]
+ elif (
+ user_id in self_signing_key_by_user
+ and device_id == self_signing_key_by_user[user_id]["device_id"]
+ ):
+ result = cross_signing_keys_by_user.setdefault(user_id, {})
+ result["self_signing_key"] = self_signing_key_by_user[user_id][
+ "key_info"
+ ]
+ else:
+ key = (user_id, device_id)
+
+ previous_update_stream_id, _ = query_map.get(key, (0, None))
+
+ if update_stream_id > previous_update_stream_id:
+ query_map[key] = (update_stream_id, update_context)
+
+ results = await self._get_device_update_edus_by_remote(
+ destination, from_stream_id, query_map
+ )
+
+ # add the updated cross-signing keys to the results list
+ for user_id, result in cross_signing_keys_by_user.items():
+ result["user_id"] = user_id
+ # FIXME: switch to m.signing_key_update when MSC1756 is merged into the spec
+ results.append(("org.matrix.signing_key_update", result))
+
+ return now_stream_id, results
+
+ def _get_device_updates_by_remote_txn(
+ self,
+ txn: LoggingTransaction,
+ destination: str,
+ from_stream_id: int,
+ now_stream_id: int,
+ limit: int,
+ ):
+ """Return device update information for a given remote destination
+
+ Args:
+ txn: The transaction to execute
+ destination: The host the device updates are intended for
+ from_stream_id: The minimum stream_id to filter updates by, exclusive
+ now_stream_id: The maximum stream_id to filter updates by, inclusive
+ limit: Maximum number of device updates to return
+
+ Returns:
+ List: List of device updates
+ """
+ # get the list of device updates that need to be sent
+ sql = """
+ SELECT user_id, device_id, stream_id, opentracing_context FROM device_lists_outbound_pokes
+ WHERE destination = ? AND ? < stream_id AND stream_id <= ?
+ ORDER BY stream_id
+ LIMIT ?
+ """
+ txn.execute(sql, (destination, from_stream_id, now_stream_id, limit))
+
+ return list(txn)
+
+ async def _get_device_update_edus_by_remote(
+ self,
+ destination: str,
+ from_stream_id: int,
+ query_map: Dict[Tuple[str, str], Tuple[int, Optional[str]]],
+ ) -> List[Tuple[str, dict]]:
+ """Returns a list of device update EDUs as well as E2EE keys
+
+ Args:
+ destination: The host the device updates are intended for
+ from_stream_id: The minimum stream_id to filter updates by, exclusive
+ query_map (Dict[(str, str): (int, str|None)]): Dictionary mapping
+ user_id/device_id to update stream_id and the relevant json-encoded
+ opentracing context
+
+ Returns:
+ List of objects representing an device update EDU
+ """
+ devices = (
+ await self.db_pool.runInteraction(
+ "_get_e2e_device_keys_txn",
+ self._get_e2e_device_keys_txn,
+ query_map.keys(),
+ include_all_devices=True,
+ include_deleted_devices=True,
+ )
+ if query_map
+ else {}
+ )
+
+ results = []
+ for user_id, user_devices in devices.items():
+ # The prev_id for the first row is always the last row before
+ # `from_stream_id`
+ prev_id = await self._get_last_device_update_for_remote_user(
+ destination, user_id, from_stream_id
+ )
+
+ # make sure we go through the devices in stream order
+ device_ids = sorted(
+ user_devices.keys(), key=lambda i: query_map[(user_id, i)][0],
+ )
+
+ for device_id in device_ids:
+ device = user_devices[device_id]
+ stream_id, opentracing_context = query_map[(user_id, device_id)]
+ result = {
+ "user_id": user_id,
+ "device_id": device_id,
+ "prev_id": [prev_id] if prev_id else [],
+ "stream_id": stream_id,
+ "org.matrix.opentracing_context": opentracing_context,
+ }
+
+ prev_id = stream_id
+
+ if device is not None:
+ key_json = device.get("key_json", None)
+ if key_json:
+ result["keys"] = db_to_json(key_json)
+
+ if "signatures" in device:
+ for sig_user_id, sigs in device["signatures"].items():
+ result["keys"].setdefault("signatures", {}).setdefault(
+ sig_user_id, {}
+ ).update(sigs)
+
+ device_display_name = device.get("device_display_name", None)
+ if device_display_name:
+ result["device_display_name"] = device_display_name
+ else:
+ result["deleted"] = True
+
+ results.append(("m.device_list_update", result))
+
+ return results
+
+ def _get_last_device_update_for_remote_user(
+ self, destination: str, user_id: str, from_stream_id: int
+ ):
+ def f(txn):
+ prev_sent_id_sql = """
+ SELECT coalesce(max(stream_id), 0) as stream_id
+ FROM device_lists_outbound_last_success
+ WHERE destination = ? AND user_id = ? AND stream_id <= ?
+ """
+ txn.execute(prev_sent_id_sql, (destination, user_id, from_stream_id))
+ rows = txn.fetchall()
+ return rows[0][0]
+
+ return self.db_pool.runInteraction("get_last_device_update_for_remote_user", f)
+
+ def mark_as_sent_devices_by_remote(self, destination: str, stream_id: int):
+ """Mark that updates have successfully been sent to the destination.
+ """
+ return self.db_pool.runInteraction(
+ "mark_as_sent_devices_by_remote",
+ self._mark_as_sent_devices_by_remote_txn,
+ destination,
+ stream_id,
+ )
+
+ def _mark_as_sent_devices_by_remote_txn(
+ self, txn: LoggingTransaction, destination: str, stream_id: int
+ ) -> None:
+ # We update the device_lists_outbound_last_success with the successfully
+ # poked users.
+ sql = """
+ SELECT user_id, coalesce(max(o.stream_id), 0)
+ FROM device_lists_outbound_pokes as o
+ WHERE destination = ? AND o.stream_id <= ?
+ GROUP BY user_id
+ """
+ txn.execute(sql, (destination, stream_id))
+ rows = txn.fetchall()
+
+ self.db_pool.simple_upsert_many_txn(
+ txn=txn,
+ table="device_lists_outbound_last_success",
+ key_names=("destination", "user_id"),
+ key_values=((destination, user_id) for user_id, _ in rows),
+ value_names=("stream_id",),
+ value_values=((stream_id,) for _, stream_id in rows),
+ )
+
+ # Delete all sent outbound pokes
+ sql = """
+ DELETE FROM device_lists_outbound_pokes
+ WHERE destination = ? AND stream_id <= ?
+ """
+ txn.execute(sql, (destination, stream_id))
+
+ async def add_user_signature_change_to_streams(
+ self, from_user_id: str, user_ids: List[str]
+ ) -> int:
+ """Persist that a user has made new signatures
+
+ Args:
+ from_user_id: the user who made the signatures
+ user_ids: the users who were signed
+
+ Returns:
+ THe new stream ID.
+ """
+
+ with self._device_list_id_gen.get_next() as stream_id:
+ await self.db_pool.runInteraction(
+ "add_user_sig_change_to_streams",
+ self._add_user_signature_change_txn,
+ from_user_id,
+ user_ids,
+ stream_id,
+ )
+ return stream_id
+
+ def _add_user_signature_change_txn(
+ self,
+ txn: LoggingTransaction,
+ from_user_id: str,
+ user_ids: List[str],
+ stream_id: int,
+ ) -> None:
+ txn.call_after(
+ self._user_signature_stream_cache.entity_has_changed,
+ from_user_id,
+ stream_id,
+ )
+ self.db_pool.simple_insert_txn(
+ txn,
+ "user_signature_stream",
+ values={
+ "stream_id": stream_id,
+ "from_user_id": from_user_id,
+ "user_ids": json_encoder.encode(user_ids),
+ },
+ )
+
+ def get_device_stream_token(self) -> int:
+ return self._device_list_id_gen.get_current_token()
+
+ @trace
+ async def get_user_devices_from_cache(
+ self, query_list: List[Tuple[str, str]]
+ ) -> Tuple[Set[str], Dict[str, Dict[str, JsonDict]]]:
+ """Get the devices (and keys if any) for remote users from the cache.
+
+ Args:
+ query_list: List of (user_id, device_ids), if device_ids is
+ falsey then return all device ids for that user.
+
+ Returns:
+ A tuple of (user_ids_not_in_cache, results_map), where
+ user_ids_not_in_cache is a set of user_ids and results_map is a
+ mapping of user_id -> device_id -> device_info.
+ """
+ user_ids = {user_id for user_id, _ in query_list}
+ user_map = await self.get_device_list_last_stream_id_for_remotes(list(user_ids))
+
+ # We go and check if any of the users need to have their device lists
+ # resynced. If they do then we remove them from the cached list.
+ users_needing_resync = await self.get_user_ids_requiring_device_list_resync(
+ user_ids
+ )
+ user_ids_in_cache = {
+ user_id for user_id, stream_id in user_map.items() if stream_id
+ } - users_needing_resync
+ user_ids_not_in_cache = user_ids - user_ids_in_cache
+
+ results = {}
+ for user_id, device_id in query_list:
+ if user_id not in user_ids_in_cache:
+ continue
+
+ if device_id:
+ device = await self._get_cached_user_device(user_id, device_id)
+ results.setdefault(user_id, {})[device_id] = device
+ else:
+ results[user_id] = await self.get_cached_devices_for_user(user_id)
+
+ set_tag("in_cache", results)
+ set_tag("not_in_cache", user_ids_not_in_cache)
+
+ return user_ids_not_in_cache, results
+
+ @cached(num_args=2, tree=True)
+ async def _get_cached_user_device(self, user_id: str, device_id: str) -> JsonDict:
+ content = await self.db_pool.simple_select_one_onecol(
+ table="device_lists_remote_cache",
+ keyvalues={"user_id": user_id, "device_id": device_id},
+ retcol="content",
+ desc="_get_cached_user_device",
+ )
+ return db_to_json(content)
+
+ @cached()
+ async def get_cached_devices_for_user(self, user_id: str) -> Dict[str, JsonDict]:
+ devices = await self.db_pool.simple_select_list(
+ table="device_lists_remote_cache",
+ keyvalues={"user_id": user_id},
+ retcols=("device_id", "content"),
+ desc="get_cached_devices_for_user",
+ )
+ return {
+ device["device_id"]: db_to_json(device["content"]) for device in devices
+ }
+
+ def get_devices_with_keys_by_user(self, user_id: str):
+ """Get all devices (with any device keys) for a user
+
+ Returns:
+ Deferred which resolves to (stream_id, devices)
+ """
+ return self.db_pool.runInteraction(
+ "get_devices_with_keys_by_user",
+ self._get_devices_with_keys_by_user_txn,
+ user_id,
+ )
+
+ def _get_devices_with_keys_by_user_txn(
+ self, txn: LoggingTransaction, user_id: str
+ ) -> Tuple[int, List[JsonDict]]:
+ now_stream_id = self._device_list_id_gen.get_current_token()
+
+ devices = self._get_e2e_device_keys_txn(
+ txn, [(user_id, None)], include_all_devices=True
+ )
+
+ if devices:
+ user_devices = devices[user_id]
+ results = []
+ for device_id, device in user_devices.items():
+ result = {"device_id": device_id}
+
+ key_json = device.get("key_json", None)
+ if key_json:
+ result["keys"] = db_to_json(key_json)
+
+ if "signatures" in device:
+ for sig_user_id, sigs in device["signatures"].items():
+ result["keys"].setdefault("signatures", {}).setdefault(
+ sig_user_id, {}
+ ).update(sigs)
+
+ device_display_name = device.get("device_display_name", None)
+ if device_display_name:
+ result["device_display_name"] = device_display_name
+
+ results.append(result)
+
+ return now_stream_id, results
+
+ return now_stream_id, []
+
+ async def get_users_whose_devices_changed(
+ self, from_key: str, user_ids: Iterable[str]
+ ) -> Set[str]:
+ """Get set of users whose devices have changed since `from_key` that
+ are in the given list of user_ids.
+
+ Args:
+ from_key: The device lists stream token
+ user_ids: The user IDs to query for devices.
+
+ Returns:
+ The set of user_ids whose devices have changed since `from_key`
+ """
+ from_key = int(from_key)
+
+ # Get set of users who *may* have changed. Users not in the returned
+ # list have definitely not changed.
+ to_check = self._device_list_stream_cache.get_entities_changed(
+ user_ids, from_key
+ )
+
+ if not to_check:
+ return set()
+
+ def _get_users_whose_devices_changed_txn(txn):
+ changes = set()
+
+ sql = """
+ SELECT DISTINCT user_id FROM device_lists_stream
+ WHERE stream_id > ?
+ AND
+ """
+
+ for chunk in batch_iter(to_check, 100):
+ clause, args = make_in_list_sql_clause(
+ txn.database_engine, "user_id", chunk
+ )
+ txn.execute(sql + clause, (from_key,) + tuple(args))
+ changes.update(user_id for user_id, in txn)
+
+ return changes
+
+ return await self.db_pool.runInteraction(
+ "get_users_whose_devices_changed", _get_users_whose_devices_changed_txn
+ )
+
+ async def get_users_whose_signatures_changed(
+ self, user_id: str, from_key: str
+ ) -> Set[str]:
+ """Get the users who have new cross-signing signatures made by `user_id` since
+ `from_key`.
+
+ Args:
+ user_id: the user who made the signatures
+ from_key: The device lists stream token
+
+ Returns:
+ A set of user IDs with updated signatures.
+ """
+ from_key = int(from_key)
+ if self._user_signature_stream_cache.has_entity_changed(user_id, from_key):
+ sql = """
+ SELECT DISTINCT user_ids FROM user_signature_stream
+ WHERE from_user_id = ? AND stream_id > ?
+ """
+ rows = await self.db_pool.execute(
+ "get_users_whose_signatures_changed", None, sql, user_id, from_key
+ )
+ return {user for row in rows for user in db_to_json(row[0])}
+ else:
+ return set()
+
+ async def get_all_device_list_changes_for_remotes(
+ self, instance_name: str, last_id: int, current_id: int, limit: int
+ ) -> Tuple[List[Tuple[int, tuple]], int, bool]:
+ """Get updates for device lists replication stream.
+
+ Args:
+ instance_name: The writer we want to fetch updates from. Unused
+ here since there is only ever one writer.
+ last_id: The token to fetch updates from. Exclusive.
+ current_id: The token to fetch updates up to. Inclusive.
+ limit: The requested limit for the number of rows to return. The
+ function may return more or fewer rows.
+
+ Returns:
+ A tuple consisting of: the updates, a token to use to fetch
+ subsequent updates, and whether we returned fewer rows than exists
+ between the requested tokens due to the limit.
+
+ The token returned can be used in a subsequent call to this
+ function to get further updates.
+
+ The updates are a list of 2-tuples of stream ID and the row data
+ """
+
+ if last_id == current_id:
+ return [], current_id, False
+
+ def _get_all_device_list_changes_for_remotes(txn):
+ # This query Does The Right Thing where it'll correctly apply the
+ # bounds to the inner queries.
+ sql = """
+ SELECT stream_id, entity FROM (
+ SELECT stream_id, user_id AS entity FROM device_lists_stream
+ UNION ALL
+ SELECT stream_id, destination AS entity FROM device_lists_outbound_pokes
+ ) AS e
+ WHERE ? < stream_id AND stream_id <= ?
+ LIMIT ?
+ """
+
+ txn.execute(sql, (last_id, current_id, limit))
+ updates = [(row[0], row[1:]) for row in txn]
+ limited = False
+ upto_token = current_id
+ if len(updates) >= limit:
+ upto_token = updates[-1][0]
+ limited = True
+
+ return updates, upto_token, limited
+
+ return await self.db_pool.runInteraction(
+ "get_all_device_list_changes_for_remotes",
+ _get_all_device_list_changes_for_remotes,
+ )
+
+ @cached(max_entries=10000)
+ def get_device_list_last_stream_id_for_remote(self, user_id: str):
+ """Get the last stream_id we got for a user. May be None if we haven't
+ got any information for them.
+ """
+ return self.db_pool.simple_select_one_onecol(
+ table="device_lists_remote_extremeties",
+ keyvalues={"user_id": user_id},
+ retcol="stream_id",
+ desc="get_device_list_last_stream_id_for_remote",
+ allow_none=True,
+ )
+
+ @cachedList(
+ cached_method_name="get_device_list_last_stream_id_for_remote",
+ list_name="user_ids",
+ inlineCallbacks=True,
+ )
+ def get_device_list_last_stream_id_for_remotes(self, user_ids: str):
+ rows = yield self.db_pool.simple_select_many_batch(
+ table="device_lists_remote_extremeties",
+ column="user_id",
+ iterable=user_ids,
+ retcols=("user_id", "stream_id"),
+ desc="get_device_list_last_stream_id_for_remotes",
+ )
+
+ results = {user_id: None for user_id in user_ids}
+ results.update({row["user_id"]: row["stream_id"] for row in rows})
+
+ return results
+
+ async def get_user_ids_requiring_device_list_resync(
+ self, user_ids: Optional[Collection[str]] = None,
+ ) -> Set[str]:
+ """Given a list of remote users return the list of users that we
+ should resync the device lists for. If None is given instead of a list,
+ return every user that we should resync the device lists for.
+
+ Returns:
+ The IDs of users whose device lists need resync.
+ """
+ if user_ids:
+ rows = await self.db_pool.simple_select_many_batch(
+ table="device_lists_remote_resync",
+ column="user_id",
+ iterable=user_ids,
+ retcols=("user_id",),
+ desc="get_user_ids_requiring_device_list_resync_with_iterable",
+ )
+ else:
+ rows = await self.db_pool.simple_select_list(
+ table="device_lists_remote_resync",
+ keyvalues=None,
+ retcols=("user_id",),
+ desc="get_user_ids_requiring_device_list_resync",
+ )
+
+ return {row["user_id"] for row in rows}
+
+ def mark_remote_user_device_cache_as_stale(self, user_id: str):
+ """Records that the server has reason to believe the cache of the devices
+ for the remote users is out of date.
+ """
+ return self.db_pool.simple_upsert(
+ table="device_lists_remote_resync",
+ keyvalues={"user_id": user_id},
+ values={},
+ insertion_values={"added_ts": self._clock.time_msec()},
+ desc="make_remote_user_device_cache_as_stale",
+ )
+
+ def mark_remote_user_device_list_as_unsubscribed(self, user_id: str):
+ """Mark that we no longer track device lists for remote user.
+ """
+
+ def _mark_remote_user_device_list_as_unsubscribed_txn(txn):
+ self.db_pool.simple_delete_txn(
+ txn,
+ table="device_lists_remote_extremeties",
+ keyvalues={"user_id": user_id},
+ )
+ self._invalidate_cache_and_stream(
+ txn, self.get_device_list_last_stream_id_for_remote, (user_id,)
+ )
+
+ return self.db_pool.runInteraction(
+ "mark_remote_user_device_list_as_unsubscribed",
+ _mark_remote_user_device_list_as_unsubscribed_txn,
+ )
+
+
+class DeviceBackgroundUpdateStore(SQLBaseStore):
+ def __init__(self, database: DatabasePool, db_conn, hs):
+ super(DeviceBackgroundUpdateStore, self).__init__(database, db_conn, hs)
+
+ self.db_pool.updates.register_background_index_update(
+ "device_lists_stream_idx",
+ index_name="device_lists_stream_user_id",
+ table="device_lists_stream",
+ columns=["user_id", "device_id"],
+ )
+
+ # create a unique index on device_lists_remote_cache
+ self.db_pool.updates.register_background_index_update(
+ "device_lists_remote_cache_unique_idx",
+ index_name="device_lists_remote_cache_unique_id",
+ table="device_lists_remote_cache",
+ columns=["user_id", "device_id"],
+ unique=True,
+ )
+
+ # And one on device_lists_remote_extremeties
+ self.db_pool.updates.register_background_index_update(
+ "device_lists_remote_extremeties_unique_idx",
+ index_name="device_lists_remote_extremeties_unique_idx",
+ table="device_lists_remote_extremeties",
+ columns=["user_id"],
+ unique=True,
+ )
+
+ # once they complete, we can remove the old non-unique indexes.
+ self.db_pool.updates.register_background_update_handler(
+ DROP_DEVICE_LIST_STREAMS_NON_UNIQUE_INDEXES,
+ self._drop_device_list_streams_non_unique_indexes,
+ )
+
+ # clear out duplicate device list outbound pokes
+ self.db_pool.updates.register_background_update_handler(
+ BG_UPDATE_REMOVE_DUP_OUTBOUND_POKES, self._remove_duplicate_outbound_pokes,
+ )
+
+ # a pair of background updates that were added during the 1.14 release cycle,
+ # but replaced with 58/06dlols_unique_idx.py
+ self.db_pool.updates.register_noop_background_update(
+ "device_lists_outbound_last_success_unique_idx",
+ )
+ self.db_pool.updates.register_noop_background_update(
+ "drop_device_lists_outbound_last_success_non_unique_idx",
+ )
+
+ async def _drop_device_list_streams_non_unique_indexes(self, progress, batch_size):
+ def f(conn):
+ txn = conn.cursor()
+ txn.execute("DROP INDEX IF EXISTS device_lists_remote_cache_id")
+ txn.execute("DROP INDEX IF EXISTS device_lists_remote_extremeties_id")
+ txn.close()
+
+ await self.db_pool.runWithConnection(f)
+ await self.db_pool.updates._end_background_update(
+ DROP_DEVICE_LIST_STREAMS_NON_UNIQUE_INDEXES
+ )
+ return 1
+
+ async def _remove_duplicate_outbound_pokes(self, progress, batch_size):
+ # for some reason, we have accumulated duplicate entries in
+ # device_lists_outbound_pokes, which makes prune_outbound_device_list_pokes less
+ # efficient.
+ #
+ # For each duplicate, we delete all the existing rows and put one back.
+
+ KEY_COLS = ["stream_id", "destination", "user_id", "device_id"]
+ last_row = progress.get(
+ "last_row",
+ {"stream_id": 0, "destination": "", "user_id": "", "device_id": ""},
+ )
+
+ def _txn(txn):
+ clause, args = make_tuple_comparison_clause(
+ self.db_pool.engine, [(x, last_row[x]) for x in KEY_COLS]
+ )
+ sql = """
+ SELECT stream_id, destination, user_id, device_id, MAX(ts) AS ts
+ FROM device_lists_outbound_pokes
+ WHERE %s
+ GROUP BY %s
+ HAVING count(*) > 1
+ ORDER BY %s
+ LIMIT ?
+ """ % (
+ clause, # WHERE
+ ",".join(KEY_COLS), # GROUP BY
+ ",".join(KEY_COLS), # ORDER BY
+ )
+ txn.execute(sql, args + [batch_size])
+ rows = self.db_pool.cursor_to_dict(txn)
+
+ row = None
+ for row in rows:
+ self.db_pool.simple_delete_txn(
+ txn, "device_lists_outbound_pokes", {x: row[x] for x in KEY_COLS},
+ )
+
+ row["sent"] = False
+ self.db_pool.simple_insert_txn(
+ txn, "device_lists_outbound_pokes", row,
+ )
+
+ if row:
+ self.db_pool.updates._background_update_progress_txn(
+ txn, BG_UPDATE_REMOVE_DUP_OUTBOUND_POKES, {"last_row": row},
+ )
+
+ return len(rows)
+
+ rows = await self.db_pool.runInteraction(
+ BG_UPDATE_REMOVE_DUP_OUTBOUND_POKES, _txn
+ )
+
+ if not rows:
+ await self.db_pool.updates._end_background_update(
+ BG_UPDATE_REMOVE_DUP_OUTBOUND_POKES
+ )
+
+ return rows
+
+
+class DeviceStore(DeviceWorkerStore, DeviceBackgroundUpdateStore):
+ def __init__(self, database: DatabasePool, db_conn, hs):
+ super(DeviceStore, self).__init__(database, db_conn, hs)
+
+ # Map of (user_id, device_id) -> bool. If there is an entry that implies
+ # the device exists.
+ self.device_id_exists_cache = Cache(
+ name="device_id_exists", keylen=2, max_entries=10000
+ )
+
+ self._clock.looping_call(self._prune_old_outbound_device_pokes, 60 * 60 * 1000)
+
+ async def store_device(
+ self, user_id: str, device_id: str, initial_device_display_name: str
+ ) -> bool:
+ """Ensure the given device is known; add it to the store if not
+
+ Args:
+ user_id: id of user associated with the device
+ device_id: id of device
+ initial_device_display_name: initial displayname of the device.
+ Ignored if device exists.
+
+ Returns:
+ Whether the device was inserted or an existing device existed with that ID.
+
+ Raises:
+ StoreError: if the device is already in use
+ """
+ key = (user_id, device_id)
+ if self.device_id_exists_cache.get(key, None):
+ return False
+
+ try:
+ inserted = await self.db_pool.simple_insert(
+ "devices",
+ values={
+ "user_id": user_id,
+ "device_id": device_id,
+ "display_name": initial_device_display_name,
+ "hidden": False,
+ },
+ desc="store_device",
+ or_ignore=True,
+ )
+ if not inserted:
+ # if the device already exists, check if it's a real device, or
+ # if the device ID is reserved by something else
+ hidden = await self.db_pool.simple_select_one_onecol(
+ "devices",
+ keyvalues={"user_id": user_id, "device_id": device_id},
+ retcol="hidden",
+ )
+ if hidden:
+ raise StoreError(400, "The device ID is in use", Codes.FORBIDDEN)
+ self.device_id_exists_cache.prefill(key, True)
+ return inserted
+ except StoreError:
+ raise
+ except Exception as e:
+ logger.error(
+ "store_device with device_id=%s(%r) user_id=%s(%r)"
+ " display_name=%s(%r) failed: %s",
+ type(device_id).__name__,
+ device_id,
+ type(user_id).__name__,
+ user_id,
+ type(initial_device_display_name).__name__,
+ initial_device_display_name,
+ e,
+ )
+ raise StoreError(500, "Problem storing device.")
+
+ async def delete_device(self, user_id: str, device_id: str) -> None:
+ """Delete a device.
+
+ Args:
+ user_id: The ID of the user which owns the device
+ device_id: The ID of the device to delete
+ """
+ await self.db_pool.simple_delete_one(
+ table="devices",
+ keyvalues={"user_id": user_id, "device_id": device_id, "hidden": False},
+ desc="delete_device",
+ )
+
+ self.device_id_exists_cache.invalidate((user_id, device_id))
+
+ async def delete_devices(self, user_id: str, device_ids: List[str]) -> None:
+ """Deletes several devices.
+
+ Args:
+ user_id: The ID of the user which owns the devices
+ device_ids: The IDs of the devices to delete
+ """
+ await self.db_pool.simple_delete_many(
+ table="devices",
+ column="device_id",
+ iterable=device_ids,
+ keyvalues={"user_id": user_id, "hidden": False},
+ desc="delete_devices",
+ )
+ for device_id in device_ids:
+ self.device_id_exists_cache.invalidate((user_id, device_id))
+
+ async def update_device(
+ self, user_id: str, device_id: str, new_display_name: Optional[str] = None
+ ) -> None:
+ """Update a device. Only updates the device if it is not marked as
+ hidden.
+
+ Args:
+ user_id: The ID of the user which owns the device
+ device_id: The ID of the device to update
+ new_display_name: new displayname for device; None to leave unchanged
+ Raises:
+ StoreError: if the device is not found
+ """
+ updates = {}
+ if new_display_name is not None:
+ updates["display_name"] = new_display_name
+ if not updates:
+ return None
+ await self.db_pool.simple_update_one(
+ table="devices",
+ keyvalues={"user_id": user_id, "device_id": device_id, "hidden": False},
+ updatevalues=updates,
+ desc="update_device",
+ )
+
+ def update_remote_device_list_cache_entry(
+ self, user_id: str, device_id: str, content: JsonDict, stream_id: int
+ ):
+ """Updates a single device in the cache of a remote user's devicelist.
+
+ Note: assumes that we are the only thread that can be updating this user's
+ device list.
+
+ Args:
+ user_id: User to update device list for
+ device_id: ID of decivice being updated
+ content: new data on this device
+ stream_id: the version of the device list
+
+ Returns:
+ Deferred[None]
+ """
+ return self.db_pool.runInteraction(
+ "update_remote_device_list_cache_entry",
+ self._update_remote_device_list_cache_entry_txn,
+ user_id,
+ device_id,
+ content,
+ stream_id,
+ )
+
+ def _update_remote_device_list_cache_entry_txn(
+ self,
+ txn: LoggingTransaction,
+ user_id: str,
+ device_id: str,
+ content: JsonDict,
+ stream_id: int,
+ ) -> None:
+ if content.get("deleted"):
+ self.db_pool.simple_delete_txn(
+ txn,
+ table="device_lists_remote_cache",
+ keyvalues={"user_id": user_id, "device_id": device_id},
+ )
+
+ txn.call_after(self.device_id_exists_cache.invalidate, (user_id, device_id))
+ else:
+ self.db_pool.simple_upsert_txn(
+ txn,
+ table="device_lists_remote_cache",
+ keyvalues={"user_id": user_id, "device_id": device_id},
+ values={"content": json_encoder.encode(content)},
+ # we don't need to lock, because we assume we are the only thread
+ # updating this user's devices.
+ lock=False,
+ )
+
+ txn.call_after(self._get_cached_user_device.invalidate, (user_id, device_id))
+ txn.call_after(self.get_cached_devices_for_user.invalidate, (user_id,))
+ txn.call_after(
+ self.get_device_list_last_stream_id_for_remote.invalidate, (user_id,)
+ )
+
+ self.db_pool.simple_upsert_txn(
+ txn,
+ table="device_lists_remote_extremeties",
+ keyvalues={"user_id": user_id},
+ values={"stream_id": stream_id},
+ # again, we can assume we are the only thread updating this user's
+ # extremity.
+ lock=False,
+ )
+
+ def update_remote_device_list_cache(
+ self, user_id: str, devices: List[dict], stream_id: int
+ ):
+ """Replace the entire cache of the remote user's devices.
+
+ Note: assumes that we are the only thread that can be updating this user's
+ device list.
+
+ Args:
+ user_id: User to update device list for
+ devices: list of device objects supplied over federation
+ stream_id: the version of the device list
+
+ Returns:
+ Deferred[None]
+ """
+ return self.db_pool.runInteraction(
+ "update_remote_device_list_cache",
+ self._update_remote_device_list_cache_txn,
+ user_id,
+ devices,
+ stream_id,
+ )
+
+ def _update_remote_device_list_cache_txn(
+ self, txn: LoggingTransaction, user_id: str, devices: List[dict], stream_id: int
+ ):
+ self.db_pool.simple_delete_txn(
+ txn, table="device_lists_remote_cache", keyvalues={"user_id": user_id}
+ )
+
+ self.db_pool.simple_insert_many_txn(
+ txn,
+ table="device_lists_remote_cache",
+ values=[
+ {
+ "user_id": user_id,
+ "device_id": content["device_id"],
+ "content": json_encoder.encode(content),
+ }
+ for content in devices
+ ],
+ )
+
+ txn.call_after(self.get_cached_devices_for_user.invalidate, (user_id,))
+ txn.call_after(self._get_cached_user_device.invalidate_many, (user_id,))
+ txn.call_after(
+ self.get_device_list_last_stream_id_for_remote.invalidate, (user_id,)
+ )
+
+ self.db_pool.simple_upsert_txn(
+ txn,
+ table="device_lists_remote_extremeties",
+ keyvalues={"user_id": user_id},
+ values={"stream_id": stream_id},
+ # we don't need to lock, because we can assume we are the only thread
+ # updating this user's extremity.
+ lock=False,
+ )
+
+ # If we're replacing the remote user's device list cache presumably
+ # we've done a full resync, so we remove the entry that says we need
+ # to resync
+ self.db_pool.simple_delete_txn(
+ txn, table="device_lists_remote_resync", keyvalues={"user_id": user_id},
+ )
+
+ async def add_device_change_to_streams(
+ self, user_id: str, device_ids: Collection[str], hosts: List[str]
+ ):
+ """Persist that a user's devices have been updated, and which hosts
+ (if any) should be poked.
+ """
+ if not device_ids:
+ return
+
+ with self._device_list_id_gen.get_next_mult(len(device_ids)) as stream_ids:
+ await self.db_pool.runInteraction(
+ "add_device_change_to_stream",
+ self._add_device_change_to_stream_txn,
+ user_id,
+ device_ids,
+ stream_ids,
+ )
+
+ if not hosts:
+ return stream_ids[-1]
+
+ context = get_active_span_text_map()
+ with self._device_list_id_gen.get_next_mult(
+ len(hosts) * len(device_ids)
+ ) as stream_ids:
+ await self.db_pool.runInteraction(
+ "add_device_outbound_poke_to_stream",
+ self._add_device_outbound_poke_to_stream_txn,
+ user_id,
+ device_ids,
+ hosts,
+ stream_ids,
+ context,
+ )
+
+ return stream_ids[-1]
+
+ def _add_device_change_to_stream_txn(
+ self,
+ txn: LoggingTransaction,
+ user_id: str,
+ device_ids: Collection[str],
+ stream_ids: List[str],
+ ):
+ txn.call_after(
+ self._device_list_stream_cache.entity_has_changed, user_id, stream_ids[-1],
+ )
+
+ min_stream_id = stream_ids[0]
+
+ # Delete older entries in the table, as we really only care about
+ # when the latest change happened.
+ txn.executemany(
+ """
+ DELETE FROM device_lists_stream
+ WHERE user_id = ? AND device_id = ? AND stream_id < ?
+ """,
+ [(user_id, device_id, min_stream_id) for device_id in device_ids],
+ )
+
+ self.db_pool.simple_insert_many_txn(
+ txn,
+ table="device_lists_stream",
+ values=[
+ {"stream_id": stream_id, "user_id": user_id, "device_id": device_id}
+ for stream_id, device_id in zip(stream_ids, device_ids)
+ ],
+ )
+
+ def _add_device_outbound_poke_to_stream_txn(
+ self,
+ txn: LoggingTransaction,
+ user_id: str,
+ device_ids: Collection[str],
+ hosts: List[str],
+ stream_ids: List[str],
+ context: Dict[str, str],
+ ):
+ for host in hosts:
+ txn.call_after(
+ self._device_list_federation_stream_cache.entity_has_changed,
+ host,
+ stream_ids[-1],
+ )
+
+ now = self._clock.time_msec()
+ next_stream_id = iter(stream_ids)
+
+ self.db_pool.simple_insert_many_txn(
+ txn,
+ table="device_lists_outbound_pokes",
+ values=[
+ {
+ "destination": destination,
+ "stream_id": next(next_stream_id),
+ "user_id": user_id,
+ "device_id": device_id,
+ "sent": False,
+ "ts": now,
+ "opentracing_context": json_encoder.encode(context)
+ if whitelisted_homeserver(destination)
+ else "{}",
+ }
+ for destination in hosts
+ for device_id in device_ids
+ ],
+ )
+
+ def _prune_old_outbound_device_pokes(self, prune_age: int = 24 * 60 * 60 * 1000):
+ """Delete old entries out of the device_lists_outbound_pokes to ensure
+ that we don't fill up due to dead servers.
+
+ Normally, we try to send device updates as a delta since a previous known point:
+ this is done by setting the prev_id in the m.device_list_update EDU. However,
+ for that to work, we have to have a complete record of each change to
+ each device, which can add up to quite a lot of data.
+
+ An alternative mechanism is that, if the remote server sees that it has missed
+ an entry in the stream_id sequence for a given user, it will request a full
+ list of that user's devices. Hence, we can reduce the amount of data we have to
+ store (and transmit in some future transaction), by clearing almost everything
+ for a given destination out of the database, and having the remote server
+ resync.
+
+ All we need to do is make sure we keep at least one row for each
+ (user, destination) pair, to remind us to send a m.device_list_update EDU for
+ that user when the destination comes back. It doesn't matter which device
+ we keep.
+ """
+ yesterday = self._clock.time_msec() - prune_age
+
+ def _prune_txn(txn):
+ # look for (user, destination) pairs which have an update older than
+ # the cutoff.
+ #
+ # For each pair, we also need to know the most recent stream_id, and
+ # an arbitrary device_id at that stream_id.
+ select_sql = """
+ SELECT
+ dlop1.destination,
+ dlop1.user_id,
+ MAX(dlop1.stream_id) AS stream_id,
+ (SELECT MIN(dlop2.device_id) AS device_id FROM
+ device_lists_outbound_pokes dlop2
+ WHERE dlop2.destination = dlop1.destination AND
+ dlop2.user_id=dlop1.user_id AND
+ dlop2.stream_id=MAX(dlop1.stream_id)
+ )
+ FROM device_lists_outbound_pokes dlop1
+ GROUP BY destination, user_id
+ HAVING min(ts) < ? AND count(*) > 1
+ """
+
+ txn.execute(select_sql, (yesterday,))
+ rows = txn.fetchall()
+
+ if not rows:
+ return
+
+ logger.info(
+ "Pruning old outbound device list updates for %i users/destinations: %s",
+ len(rows),
+ shortstr((row[0], row[1]) for row in rows),
+ )
+
+ # we want to keep the update with the highest stream_id for each user.
+ #
+ # there might be more than one update (with different device_ids) with the
+ # same stream_id, so we also delete all but one rows with the max stream id.
+ delete_sql = """
+ DELETE FROM device_lists_outbound_pokes
+ WHERE destination = ? AND user_id = ? AND (
+ stream_id < ? OR
+ (stream_id = ? AND device_id != ?)
+ )
+ """
+ count = 0
+ for (destination, user_id, stream_id, device_id) in rows:
+ txn.execute(
+ delete_sql, (destination, user_id, stream_id, stream_id, device_id)
+ )
+ count += txn.rowcount
+
+ # Since we've deleted unsent deltas, we need to remove the entry
+ # of last successful sent so that the prev_ids are correctly set.
+ sql = """
+ DELETE FROM device_lists_outbound_last_success
+ WHERE destination = ? AND user_id = ?
+ """
+ txn.executemany(sql, ((row[0], row[1]) for row in rows))
+
+ logger.info("Pruned %d device list outbound pokes", count)
+
+ return run_as_background_process(
+ "prune_old_outbound_device_pokes",
+ self.db_pool.runInteraction,
+ "_prune_old_outbound_device_pokes",
+ _prune_txn,
+ )
diff --git a/synapse/storage/databases/main/directory.py b/synapse/storage/databases/main/directory.py
new file mode 100644
index 00000000..037e0260
--- /dev/null
+++ b/synapse/storage/databases/main/directory.py
@@ -0,0 +1,194 @@
+# -*- coding: utf-8 -*-
+# Copyright 2014-2016 OpenMarket Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from collections import namedtuple
+from typing import Iterable, Optional
+
+from synapse.api.errors import SynapseError
+from synapse.storage._base import SQLBaseStore
+from synapse.types import RoomAlias
+from synapse.util.caches.descriptors import cached
+
+RoomAliasMapping = namedtuple("RoomAliasMapping", ("room_id", "room_alias", "servers"))
+
+
+class DirectoryWorkerStore(SQLBaseStore):
+ async def get_association_from_room_alias(
+ self, room_alias: RoomAlias
+ ) -> Optional[RoomAliasMapping]:
+ """Gets the room_id and server list for a given room_alias
+
+ Args:
+ room_alias: The alias to translate to an ID.
+
+ Returns:
+ The room alias mapping or None if no association can be found.
+ """
+ room_id = await self.db_pool.simple_select_one_onecol(
+ "room_aliases",
+ {"room_alias": room_alias.to_string()},
+ "room_id",
+ allow_none=True,
+ desc="get_association_from_room_alias",
+ )
+
+ if not room_id:
+ return None
+
+ servers = await self.db_pool.simple_select_onecol(
+ "room_alias_servers",
+ {"room_alias": room_alias.to_string()},
+ "server",
+ desc="get_association_from_room_alias",
+ )
+
+ if not servers:
+ return None
+
+ return RoomAliasMapping(room_id, room_alias.to_string(), servers)
+
+ def get_room_alias_creator(self, room_alias):
+ return self.db_pool.simple_select_one_onecol(
+ table="room_aliases",
+ keyvalues={"room_alias": room_alias},
+ retcol="creator",
+ desc="get_room_alias_creator",
+ )
+
+ @cached(max_entries=5000)
+ def get_aliases_for_room(self, room_id):
+ return self.db_pool.simple_select_onecol(
+ "room_aliases",
+ {"room_id": room_id},
+ "room_alias",
+ desc="get_aliases_for_room",
+ )
+
+
+class DirectoryStore(DirectoryWorkerStore):
+ async def create_room_alias_association(
+ self,
+ room_alias: RoomAlias,
+ room_id: str,
+ servers: Iterable[str],
+ creator: Optional[str] = None,
+ ) -> None:
+ """ Creates an association between a room alias and room_id/servers
+
+ Args:
+ room_alias: The alias to create.
+ room_id: The target of the alias.
+ servers: A list of servers through which it may be possible to join the room
+ creator: Optional user_id of creator.
+ """
+
+ def alias_txn(txn):
+ self.db_pool.simple_insert_txn(
+ txn,
+ "room_aliases",
+ {
+ "room_alias": room_alias.to_string(),
+ "room_id": room_id,
+ "creator": creator,
+ },
+ )
+
+ self.db_pool.simple_insert_many_txn(
+ txn,
+ table="room_alias_servers",
+ values=[
+ {"room_alias": room_alias.to_string(), "server": server}
+ for server in servers
+ ],
+ )
+
+ self._invalidate_cache_and_stream(
+ txn, self.get_aliases_for_room, (room_id,)
+ )
+
+ try:
+ await self.db_pool.runInteraction(
+ "create_room_alias_association", alias_txn
+ )
+ except self.database_engine.module.IntegrityError:
+ raise SynapseError(
+ 409, "Room alias %s already exists" % room_alias.to_string()
+ )
+
+ async def delete_room_alias(self, room_alias: RoomAlias) -> str:
+ room_id = await self.db_pool.runInteraction(
+ "delete_room_alias", self._delete_room_alias_txn, room_alias
+ )
+
+ return room_id
+
+ def _delete_room_alias_txn(self, txn, room_alias: RoomAlias) -> str:
+ txn.execute(
+ "SELECT room_id FROM room_aliases WHERE room_alias = ?",
+ (room_alias.to_string(),),
+ )
+
+ res = txn.fetchone()
+ if res:
+ room_id = res[0]
+ else:
+ return None
+
+ txn.execute(
+ "DELETE FROM room_aliases WHERE room_alias = ?", (room_alias.to_string(),)
+ )
+
+ txn.execute(
+ "DELETE FROM room_alias_servers WHERE room_alias = ?",
+ (room_alias.to_string(),),
+ )
+
+ self._invalidate_cache_and_stream(txn, self.get_aliases_for_room, (room_id,))
+
+ return room_id
+
+ def update_aliases_for_room(
+ self, old_room_id: str, new_room_id: str, creator: Optional[str] = None,
+ ):
+ """Repoint all of the aliases for a given room, to a different room.
+
+ Args:
+ old_room_id:
+ new_room_id:
+ creator: The user to record as the creator of the new mapping.
+ If None, the creator will be left unchanged.
+ """
+
+ def _update_aliases_for_room_txn(txn):
+ update_creator_sql = ""
+ sql_params = (new_room_id, old_room_id)
+ if creator:
+ update_creator_sql = ", creator = ?"
+ sql_params = (new_room_id, creator, old_room_id)
+
+ sql = "UPDATE room_aliases SET room_id = ? %s WHERE room_id = ?" % (
+ update_creator_sql,
+ )
+ txn.execute(sql, sql_params)
+ self._invalidate_cache_and_stream(
+ txn, self.get_aliases_for_room, (old_room_id,)
+ )
+ self._invalidate_cache_and_stream(
+ txn, self.get_aliases_for_room, (new_room_id,)
+ )
+
+ return self.db_pool.runInteraction(
+ "_update_aliases_for_room_txn", _update_aliases_for_room_txn
+ )
diff --git a/synapse/storage/databases/main/e2e_room_keys.py b/synapse/storage/databases/main/e2e_room_keys.py
new file mode 100644
index 00000000..2eeb9f97
--- /dev/null
+++ b/synapse/storage/databases/main/e2e_room_keys.py
@@ -0,0 +1,436 @@
+# -*- coding: utf-8 -*-
+# Copyright 2017 New Vector Ltd
+# Copyright 2019 Matrix.org Foundation C.I.C.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from synapse.api.errors import StoreError
+from synapse.logging.opentracing import log_kv, trace
+from synapse.storage._base import SQLBaseStore, db_to_json
+from synapse.util import json_encoder
+
+
+class EndToEndRoomKeyStore(SQLBaseStore):
+ async def update_e2e_room_key(
+ self, user_id, version, room_id, session_id, room_key
+ ):
+ """Replaces the encrypted E2E room key for a given session in a given backup
+
+ Args:
+ user_id(str): the user whose backup we're setting
+ version(str): the version ID of the backup we're updating
+ room_id(str): the ID of the room whose keys we're setting
+ session_id(str): the session whose room_key we're setting
+ room_key(dict): the room_key being set
+ Raises:
+ StoreError
+ """
+
+ await self.db_pool.simple_update_one(
+ table="e2e_room_keys",
+ keyvalues={
+ "user_id": user_id,
+ "version": version,
+ "room_id": room_id,
+ "session_id": session_id,
+ },
+ updatevalues={
+ "first_message_index": room_key["first_message_index"],
+ "forwarded_count": room_key["forwarded_count"],
+ "is_verified": room_key["is_verified"],
+ "session_data": json_encoder.encode(room_key["session_data"]),
+ },
+ desc="update_e2e_room_key",
+ )
+
+ async def add_e2e_room_keys(self, user_id, version, room_keys):
+ """Bulk add room keys to a given backup.
+
+ Args:
+ user_id (str): the user whose backup we're adding to
+ version (str): the version ID of the backup for the set of keys we're adding to
+ room_keys (iterable[(str, str, dict)]): the keys to add, in the form
+ (roomID, sessionID, keyData)
+ """
+
+ values = []
+ for (room_id, session_id, room_key) in room_keys:
+ values.append(
+ {
+ "user_id": user_id,
+ "version": version,
+ "room_id": room_id,
+ "session_id": session_id,
+ "first_message_index": room_key["first_message_index"],
+ "forwarded_count": room_key["forwarded_count"],
+ "is_verified": room_key["is_verified"],
+ "session_data": json_encoder.encode(room_key["session_data"]),
+ }
+ )
+ log_kv(
+ {
+ "message": "Set room key",
+ "room_id": room_id,
+ "session_id": session_id,
+ "room_key": room_key,
+ }
+ )
+
+ await self.db_pool.simple_insert_many(
+ table="e2e_room_keys", values=values, desc="add_e2e_room_keys"
+ )
+
+ @trace
+ async def get_e2e_room_keys(self, user_id, version, room_id=None, session_id=None):
+ """Bulk get the E2E room keys for a given backup, optionally filtered to a given
+ room, or a given session.
+
+ Args:
+ user_id (str): the user whose backup we're querying
+ version (str): the version ID of the backup for the set of keys we're querying
+ room_id (str): Optional. the ID of the room whose keys we're querying, if any.
+ If not specified, we return the keys for all the rooms in the backup.
+ session_id (str): Optional. the session whose room_key we're querying, if any.
+ If specified, we also require the room_id to be specified.
+ If not specified, we return all the keys in this version of
+ the backup (or for the specified room)
+
+ Returns:
+ A list of dicts giving the session_data and message metadata for
+ these room keys.
+ """
+
+ try:
+ version = int(version)
+ except ValueError:
+ return {"rooms": {}}
+
+ keyvalues = {"user_id": user_id, "version": version}
+ if room_id:
+ keyvalues["room_id"] = room_id
+ if session_id:
+ keyvalues["session_id"] = session_id
+
+ rows = await self.db_pool.simple_select_list(
+ table="e2e_room_keys",
+ keyvalues=keyvalues,
+ retcols=(
+ "user_id",
+ "room_id",
+ "session_id",
+ "first_message_index",
+ "forwarded_count",
+ "is_verified",
+ "session_data",
+ ),
+ desc="get_e2e_room_keys",
+ )
+
+ sessions = {"rooms": {}}
+ for row in rows:
+ room_entry = sessions["rooms"].setdefault(row["room_id"], {"sessions": {}})
+ room_entry["sessions"][row["session_id"]] = {
+ "first_message_index": row["first_message_index"],
+ "forwarded_count": row["forwarded_count"],
+ # is_verified must be returned to the client as a boolean
+ "is_verified": bool(row["is_verified"]),
+ "session_data": db_to_json(row["session_data"]),
+ }
+
+ return sessions
+
+ def get_e2e_room_keys_multi(self, user_id, version, room_keys):
+ """Get multiple room keys at a time. The difference between this function and
+ get_e2e_room_keys is that this function can be used to retrieve
+ multiple specific keys at a time, whereas get_e2e_room_keys is used for
+ getting all the keys in a backup version, all the keys for a room, or a
+ specific key.
+
+ Args:
+ user_id (str): the user whose backup we're querying
+ version (str): the version ID of the backup we're querying about
+ room_keys (dict[str, dict[str, iterable[str]]]): a map from
+ room ID -> {"session": [session ids]} indicating the session IDs
+ that we want to query
+
+ Returns:
+ Deferred[dict[str, dict[str, dict]]]: a map of room IDs to session IDs to room key
+ """
+
+ return self.db_pool.runInteraction(
+ "get_e2e_room_keys_multi",
+ self._get_e2e_room_keys_multi_txn,
+ user_id,
+ version,
+ room_keys,
+ )
+
+ @staticmethod
+ def _get_e2e_room_keys_multi_txn(txn, user_id, version, room_keys):
+ if not room_keys:
+ return {}
+
+ where_clauses = []
+ params = [user_id, version]
+ for room_id, room in room_keys.items():
+ sessions = list(room["sessions"])
+ if not sessions:
+ continue
+ params.append(room_id)
+ params.extend(sessions)
+ where_clauses.append(
+ "(room_id = ? AND session_id IN (%s))"
+ % (",".join(["?" for _ in sessions]),)
+ )
+
+ # check if we're actually querying something
+ if not where_clauses:
+ return {}
+
+ sql = """
+ SELECT room_id, session_id, first_message_index, forwarded_count,
+ is_verified, session_data
+ FROM e2e_room_keys
+ WHERE user_id = ? AND version = ? AND (%s)
+ """ % (
+ " OR ".join(where_clauses)
+ )
+
+ txn.execute(sql, params)
+
+ ret = {}
+
+ for row in txn:
+ room_id = row[0]
+ session_id = row[1]
+ ret.setdefault(room_id, {})
+ ret[room_id][session_id] = {
+ "first_message_index": row[2],
+ "forwarded_count": row[3],
+ "is_verified": row[4],
+ "session_data": db_to_json(row[5]),
+ }
+
+ return ret
+
+ def count_e2e_room_keys(self, user_id, version):
+ """Get the number of keys in a backup version.
+
+ Args:
+ user_id (str): the user whose backup we're querying
+ version (str): the version ID of the backup we're querying about
+ """
+
+ return self.db_pool.simple_select_one_onecol(
+ table="e2e_room_keys",
+ keyvalues={"user_id": user_id, "version": version},
+ retcol="COUNT(*)",
+ desc="count_e2e_room_keys",
+ )
+
+ @trace
+ async def delete_e2e_room_keys(
+ self, user_id, version, room_id=None, session_id=None
+ ):
+ """Bulk delete the E2E room keys for a given backup, optionally filtered to a given
+ room or a given session.
+
+ Args:
+ user_id(str): the user whose backup we're deleting from
+ version(str): the version ID of the backup for the set of keys we're deleting
+ room_id(str): Optional. the ID of the room whose keys we're deleting, if any.
+ If not specified, we delete the keys for all the rooms in the backup.
+ session_id(str): Optional. the session whose room_key we're querying, if any.
+ If specified, we also require the room_id to be specified.
+ If not specified, we delete all the keys in this version of
+ the backup (or for the specified room)
+
+ Returns:
+ The deletion transaction
+ """
+
+ keyvalues = {"user_id": user_id, "version": int(version)}
+ if room_id:
+ keyvalues["room_id"] = room_id
+ if session_id:
+ keyvalues["session_id"] = session_id
+
+ await self.db_pool.simple_delete(
+ table="e2e_room_keys", keyvalues=keyvalues, desc="delete_e2e_room_keys"
+ )
+
+ @staticmethod
+ def _get_current_version(txn, user_id):
+ txn.execute(
+ "SELECT MAX(version) FROM e2e_room_keys_versions "
+ "WHERE user_id=? AND deleted=0",
+ (user_id,),
+ )
+ row = txn.fetchone()
+ if not row:
+ raise StoreError(404, "No current backup version")
+ return row[0]
+
+ def get_e2e_room_keys_version_info(self, user_id, version=None):
+ """Get info metadata about a version of our room_keys backup.
+
+ Args:
+ user_id(str): the user whose backup we're querying
+ version(str): Optional. the version ID of the backup we're querying about
+ If missing, we return the information about the current version.
+ Raises:
+ StoreError: with code 404 if there are no e2e_room_keys_versions present
+ Returns:
+ A deferred dict giving the info metadata for this backup version, with
+ fields including:
+ version(str)
+ algorithm(str)
+ auth_data(object): opaque dict supplied by the client
+ etag(int): tag of the keys in the backup
+ """
+
+ def _get_e2e_room_keys_version_info_txn(txn):
+ if version is None:
+ this_version = self._get_current_version(txn, user_id)
+ else:
+ try:
+ this_version = int(version)
+ except ValueError:
+ # Our versions are all ints so if we can't convert it to an integer,
+ # it isn't there.
+ raise StoreError(404, "No row found")
+
+ result = self.db_pool.simple_select_one_txn(
+ txn,
+ table="e2e_room_keys_versions",
+ keyvalues={"user_id": user_id, "version": this_version, "deleted": 0},
+ retcols=("version", "algorithm", "auth_data", "etag"),
+ )
+ result["auth_data"] = db_to_json(result["auth_data"])
+ result["version"] = str(result["version"])
+ if result["etag"] is None:
+ result["etag"] = 0
+ return result
+
+ return self.db_pool.runInteraction(
+ "get_e2e_room_keys_version_info", _get_e2e_room_keys_version_info_txn
+ )
+
+ @trace
+ def create_e2e_room_keys_version(self, user_id, info):
+ """Atomically creates a new version of this user's e2e_room_keys store
+ with the given version info.
+
+ Args:
+ user_id(str): the user whose backup we're creating a version
+ info(dict): the info about the backup version to be created
+
+ Returns:
+ A deferred string for the newly created version ID
+ """
+
+ def _create_e2e_room_keys_version_txn(txn):
+ txn.execute(
+ "SELECT MAX(version) FROM e2e_room_keys_versions WHERE user_id=?",
+ (user_id,),
+ )
+ current_version = txn.fetchone()[0]
+ if current_version is None:
+ current_version = "0"
+
+ new_version = str(int(current_version) + 1)
+
+ self.db_pool.simple_insert_txn(
+ txn,
+ table="e2e_room_keys_versions",
+ values={
+ "user_id": user_id,
+ "version": new_version,
+ "algorithm": info["algorithm"],
+ "auth_data": json_encoder.encode(info["auth_data"]),
+ },
+ )
+
+ return new_version
+
+ return self.db_pool.runInteraction(
+ "create_e2e_room_keys_version_txn", _create_e2e_room_keys_version_txn
+ )
+
+ @trace
+ def update_e2e_room_keys_version(
+ self, user_id, version, info=None, version_etag=None
+ ):
+ """Update a given backup version
+
+ Args:
+ user_id(str): the user whose backup version we're updating
+ version(str): the version ID of the backup version we're updating
+ info (dict): the new backup version info to store. If None, then
+ the backup version info is not updated
+ version_etag (Optional[int]): etag of the keys in the backup. If
+ None, then the etag is not updated
+ """
+ updatevalues = {}
+
+ if info is not None and "auth_data" in info:
+ updatevalues["auth_data"] = json_encoder.encode(info["auth_data"])
+ if version_etag is not None:
+ updatevalues["etag"] = version_etag
+
+ if updatevalues:
+ return self.db_pool.simple_update(
+ table="e2e_room_keys_versions",
+ keyvalues={"user_id": user_id, "version": version},
+ updatevalues=updatevalues,
+ desc="update_e2e_room_keys_version",
+ )
+
+ @trace
+ def delete_e2e_room_keys_version(self, user_id, version=None):
+ """Delete a given backup version of the user's room keys.
+ Doesn't delete their actual key data.
+
+ Args:
+ user_id(str): the user whose backup version we're deleting
+ version(str): Optional. the version ID of the backup version we're deleting
+ If missing, we delete the current backup version info.
+ Raises:
+ StoreError: with code 404 if there are no e2e_room_keys_versions present,
+ or if the version requested doesn't exist.
+ """
+
+ def _delete_e2e_room_keys_version_txn(txn):
+ if version is None:
+ this_version = self._get_current_version(txn, user_id)
+ if this_version is None:
+ raise StoreError(404, "No current backup version")
+ else:
+ this_version = version
+
+ self.db_pool.simple_delete_txn(
+ txn,
+ table="e2e_room_keys",
+ keyvalues={"user_id": user_id, "version": this_version},
+ )
+
+ return self.db_pool.simple_update_one_txn(
+ txn,
+ table="e2e_room_keys_versions",
+ keyvalues={"user_id": user_id, "version": this_version},
+ updatevalues={"deleted": 1},
+ )
+
+ return self.db_pool.runInteraction(
+ "delete_e2e_room_keys_version", _delete_e2e_room_keys_version_txn
+ )
diff --git a/synapse/storage/databases/main/end_to_end_keys.py b/synapse/storage/databases/main/end_to_end_keys.py
new file mode 100644
index 00000000..f93e0d32
--- /dev/null
+++ b/synapse/storage/databases/main/end_to_end_keys.py
@@ -0,0 +1,750 @@
+# -*- coding: utf-8 -*-
+# Copyright 2015, 2016 OpenMarket Ltd
+# Copyright 2019 New Vector Ltd
+# Copyright 2019 The Matrix.org Foundation C.I.C.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Dict, Iterable, List, Optional, Tuple
+
+from canonicaljson import encode_canonical_json
+
+from twisted.enterprise.adbapi import Connection
+
+from synapse.logging.opentracing import log_kv, set_tag, trace
+from synapse.storage._base import SQLBaseStore, db_to_json
+from synapse.storage.database import make_in_list_sql_clause
+from synapse.util import json_encoder
+from synapse.util.caches.descriptors import cached, cachedList
+from synapse.util.iterutils import batch_iter
+
+
+class EndToEndKeyWorkerStore(SQLBaseStore):
+ @trace
+ async def get_e2e_device_keys(
+ self, query_list, include_all_devices=False, include_deleted_devices=False
+ ):
+ """Fetch a list of device keys.
+ Args:
+ query_list(list): List of pairs of user_ids and device_ids.
+ include_all_devices (bool): whether to include entries for devices
+ that don't have device keys
+ include_deleted_devices (bool): whether to include null entries for
+ devices which no longer exist (but were in the query_list).
+ This option only takes effect if include_all_devices is true.
+ Returns:
+ Dict mapping from user-id to dict mapping from device_id to
+ key data. The key data will be a dict in the same format as the
+ DeviceKeys type returned by POST /_matrix/client/r0/keys/query.
+ """
+ set_tag("query_list", query_list)
+ if not query_list:
+ return {}
+
+ results = await self.db_pool.runInteraction(
+ "get_e2e_device_keys",
+ self._get_e2e_device_keys_txn,
+ query_list,
+ include_all_devices,
+ include_deleted_devices,
+ )
+
+ # Build the result structure, un-jsonify the results, and add the
+ # "unsigned" section
+ rv = {}
+ for user_id, device_keys in results.items():
+ rv[user_id] = {}
+ for device_id, device_info in device_keys.items():
+ r = db_to_json(device_info.pop("key_json"))
+ r["unsigned"] = {}
+ display_name = device_info["device_display_name"]
+ if display_name is not None:
+ r["unsigned"]["device_display_name"] = display_name
+ if "signatures" in device_info:
+ for sig_user_id, sigs in device_info["signatures"].items():
+ r.setdefault("signatures", {}).setdefault(
+ sig_user_id, {}
+ ).update(sigs)
+ rv[user_id][device_id] = r
+
+ return rv
+
+ @trace
+ def _get_e2e_device_keys_txn(
+ self, txn, query_list, include_all_devices=False, include_deleted_devices=False
+ ):
+ set_tag("include_all_devices", include_all_devices)
+ set_tag("include_deleted_devices", include_deleted_devices)
+
+ query_clauses = []
+ query_params = []
+ signature_query_clauses = []
+ signature_query_params = []
+
+ if include_all_devices is False:
+ include_deleted_devices = False
+
+ if include_deleted_devices:
+ deleted_devices = set(query_list)
+
+ for (user_id, device_id) in query_list:
+ query_clause = "user_id = ?"
+ query_params.append(user_id)
+ signature_query_clause = "target_user_id = ?"
+ signature_query_params.append(user_id)
+
+ if device_id is not None:
+ query_clause += " AND device_id = ?"
+ query_params.append(device_id)
+ signature_query_clause += " AND target_device_id = ?"
+ signature_query_params.append(device_id)
+
+ signature_query_clause += " AND user_id = ?"
+ signature_query_params.append(user_id)
+
+ query_clauses.append(query_clause)
+ signature_query_clauses.append(signature_query_clause)
+
+ sql = (
+ "SELECT user_id, device_id, "
+ " d.display_name AS device_display_name, "
+ " k.key_json"
+ " FROM devices d"
+ " %s JOIN e2e_device_keys_json k USING (user_id, device_id)"
+ " WHERE %s AND NOT d.hidden"
+ ) % (
+ "LEFT" if include_all_devices else "INNER",
+ " OR ".join("(" + q + ")" for q in query_clauses),
+ )
+
+ txn.execute(sql, query_params)
+ rows = self.db_pool.cursor_to_dict(txn)
+
+ result = {}
+ for row in rows:
+ if include_deleted_devices:
+ deleted_devices.remove((row["user_id"], row["device_id"]))
+ result.setdefault(row["user_id"], {})[row["device_id"]] = row
+
+ if include_deleted_devices:
+ for user_id, device_id in deleted_devices:
+ result.setdefault(user_id, {})[device_id] = None
+
+ # get signatures on the device
+ signature_sql = ("SELECT * FROM e2e_cross_signing_signatures WHERE %s") % (
+ " OR ".join("(" + q + ")" for q in signature_query_clauses)
+ )
+
+ txn.execute(signature_sql, signature_query_params)
+ rows = self.db_pool.cursor_to_dict(txn)
+
+ # add each cross-signing signature to the correct device in the result dict.
+ for row in rows:
+ signing_user_id = row["user_id"]
+ signing_key_id = row["key_id"]
+ target_user_id = row["target_user_id"]
+ target_device_id = row["target_device_id"]
+ signature = row["signature"]
+
+ target_user_result = result.get(target_user_id)
+ if not target_user_result:
+ continue
+
+ target_device_result = target_user_result.get(target_device_id)
+ if not target_device_result:
+ # note that target_device_result will be None for deleted devices.
+ continue
+
+ target_device_signatures = target_device_result.setdefault("signatures", {})
+ signing_user_signatures = target_device_signatures.setdefault(
+ signing_user_id, {}
+ )
+ signing_user_signatures[signing_key_id] = signature
+
+ log_kv(result)
+ return result
+
+ async def get_e2e_one_time_keys(
+ self, user_id: str, device_id: str, key_ids: List[str]
+ ) -> Dict[Tuple[str, str], str]:
+ """Retrieve a number of one-time keys for a user
+
+ Args:
+ user_id(str): id of user to get keys for
+ device_id(str): id of device to get keys for
+ key_ids(list[str]): list of key ids (excluding algorithm) to
+ retrieve
+
+ Returns:
+ A map from (algorithm, key_id) to json string for key
+ """
+
+ rows = await self.db_pool.simple_select_many_batch(
+ table="e2e_one_time_keys_json",
+ column="key_id",
+ iterable=key_ids,
+ retcols=("algorithm", "key_id", "key_json"),
+ keyvalues={"user_id": user_id, "device_id": device_id},
+ desc="add_e2e_one_time_keys_check",
+ )
+ result = {(row["algorithm"], row["key_id"]): row["key_json"] for row in rows}
+ log_kv({"message": "Fetched one time keys for user", "one_time_keys": result})
+ return result
+
+ async def add_e2e_one_time_keys(
+ self,
+ user_id: str,
+ device_id: str,
+ time_now: int,
+ new_keys: Iterable[Tuple[str, str, str]],
+ ) -> None:
+ """Insert some new one time keys for a device. Errors if any of the
+ keys already exist.
+
+ Args:
+ user_id: id of user to get keys for
+ device_id: id of device to get keys for
+ time_now: insertion time to record (ms since epoch)
+ new_keys: keys to add - each a tuple of (algorithm, key_id, key json)
+ """
+
+ def _add_e2e_one_time_keys(txn):
+ set_tag("user_id", user_id)
+ set_tag("device_id", device_id)
+ set_tag("new_keys", new_keys)
+ # We are protected from race between lookup and insertion due to
+ # a unique constraint. If there is a race of two calls to
+ # `add_e2e_one_time_keys` then they'll conflict and we will only
+ # insert one set.
+ self.db_pool.simple_insert_many_txn(
+ txn,
+ table="e2e_one_time_keys_json",
+ values=[
+ {
+ "user_id": user_id,
+ "device_id": device_id,
+ "algorithm": algorithm,
+ "key_id": key_id,
+ "ts_added_ms": time_now,
+ "key_json": json_bytes,
+ }
+ for algorithm, key_id, json_bytes in new_keys
+ ],
+ )
+ self._invalidate_cache_and_stream(
+ txn, self.count_e2e_one_time_keys, (user_id, device_id)
+ )
+
+ await self.db_pool.runInteraction(
+ "add_e2e_one_time_keys_insert", _add_e2e_one_time_keys
+ )
+
+ @cached(max_entries=10000)
+ def count_e2e_one_time_keys(self, user_id, device_id):
+ """ Count the number of one time keys the server has for a device
+ Returns:
+ Dict mapping from algorithm to number of keys for that algorithm.
+ """
+
+ def _count_e2e_one_time_keys(txn):
+ sql = (
+ "SELECT algorithm, COUNT(key_id) FROM e2e_one_time_keys_json"
+ " WHERE user_id = ? AND device_id = ?"
+ " GROUP BY algorithm"
+ )
+ txn.execute(sql, (user_id, device_id))
+ result = {}
+ for algorithm, key_count in txn:
+ result[algorithm] = key_count
+ return result
+
+ return self.db_pool.runInteraction(
+ "count_e2e_one_time_keys", _count_e2e_one_time_keys
+ )
+
+ async def get_e2e_cross_signing_key(
+ self, user_id: str, key_type: str, from_user_id: Optional[str] = None
+ ) -> Optional[dict]:
+ """Returns a user's cross-signing key.
+
+ Args:
+ user_id: the user whose key is being requested
+ key_type: the type of key that is being requested: either 'master'
+ for a master key, 'self_signing' for a self-signing key, or
+ 'user_signing' for a user-signing key
+ from_user_id: if specified, signatures made by this user on
+ the self-signing key will be included in the result
+
+ Returns:
+ dict of the key data or None if not found
+ """
+ res = await self.get_e2e_cross_signing_keys_bulk([user_id], from_user_id)
+ user_keys = res.get(user_id)
+ if not user_keys:
+ return None
+ return user_keys.get(key_type)
+
+ @cached(num_args=1)
+ def _get_bare_e2e_cross_signing_keys(self, user_id):
+ """Dummy function. Only used to make a cache for
+ _get_bare_e2e_cross_signing_keys_bulk.
+ """
+ raise NotImplementedError()
+
+ @cachedList(
+ cached_method_name="_get_bare_e2e_cross_signing_keys",
+ list_name="user_ids",
+ num_args=1,
+ )
+ def _get_bare_e2e_cross_signing_keys_bulk(
+ self, user_ids: List[str]
+ ) -> Dict[str, Dict[str, dict]]:
+ """Returns the cross-signing keys for a set of users. The output of this
+ function should be passed to _get_e2e_cross_signing_signatures_txn if
+ the signatures for the calling user need to be fetched.
+
+ Args:
+ user_ids (list[str]): the users whose keys are being requested
+
+ Returns:
+ dict[str, dict[str, dict]]: mapping from user ID to key type to key
+ data. If a user's cross-signing keys were not found, either
+ their user ID will not be in the dict, or their user ID will map
+ to None.
+
+ """
+ return self.db_pool.runInteraction(
+ "get_bare_e2e_cross_signing_keys_bulk",
+ self._get_bare_e2e_cross_signing_keys_bulk_txn,
+ user_ids,
+ )
+
+ def _get_bare_e2e_cross_signing_keys_bulk_txn(
+ self, txn: Connection, user_ids: List[str],
+ ) -> Dict[str, Dict[str, dict]]:
+ """Returns the cross-signing keys for a set of users. The output of this
+ function should be passed to _get_e2e_cross_signing_signatures_txn if
+ the signatures for the calling user need to be fetched.
+
+ Args:
+ txn (twisted.enterprise.adbapi.Connection): db connection
+ user_ids (list[str]): the users whose keys are being requested
+
+ Returns:
+ dict[str, dict[str, dict]]: mapping from user ID to key type to key
+ data. If a user's cross-signing keys were not found, their user
+ ID will not be in the dict.
+
+ """
+ result = {}
+
+ for user_chunk in batch_iter(user_ids, 100):
+ clause, params = make_in_list_sql_clause(
+ txn.database_engine, "k.user_id", user_chunk
+ )
+ sql = (
+ """
+ SELECT k.user_id, k.keytype, k.keydata, k.stream_id
+ FROM e2e_cross_signing_keys k
+ INNER JOIN (SELECT user_id, keytype, MAX(stream_id) AS stream_id
+ FROM e2e_cross_signing_keys
+ GROUP BY user_id, keytype) s
+ USING (user_id, stream_id, keytype)
+ WHERE
+ """
+ + clause
+ )
+
+ txn.execute(sql, params)
+ rows = self.db_pool.cursor_to_dict(txn)
+
+ for row in rows:
+ user_id = row["user_id"]
+ key_type = row["keytype"]
+ key = db_to_json(row["keydata"])
+ user_info = result.setdefault(user_id, {})
+ user_info[key_type] = key
+
+ return result
+
+ def _get_e2e_cross_signing_signatures_txn(
+ self, txn: Connection, keys: Dict[str, Dict[str, dict]], from_user_id: str,
+ ) -> Dict[str, Dict[str, dict]]:
+ """Returns the cross-signing signatures made by a user on a set of keys.
+
+ Args:
+ txn (twisted.enterprise.adbapi.Connection): db connection
+ keys (dict[str, dict[str, dict]]): a map of user ID to key type to
+ key data. This dict will be modified to add signatures.
+ from_user_id (str): fetch the signatures made by this user
+
+ Returns:
+ dict[str, dict[str, dict]]: mapping from user ID to key type to key
+ data. The return value will be the same as the keys argument,
+ with the modifications included.
+ """
+
+ # find out what cross-signing keys (a.k.a. devices) we need to get
+ # signatures for. This is a map of (user_id, device_id) to key type
+ # (device_id is the key's public part).
+ devices = {}
+
+ for user_id, user_info in keys.items():
+ if user_info is None:
+ continue
+ for key_type, key in user_info.items():
+ device_id = None
+ for k in key["keys"].values():
+ device_id = k
+ devices[(user_id, device_id)] = key_type
+
+ for batch in batch_iter(devices.keys(), size=100):
+ sql = """
+ SELECT target_user_id, target_device_id, key_id, signature
+ FROM e2e_cross_signing_signatures
+ WHERE user_id = ?
+ AND (%s)
+ """ % (
+ " OR ".join(
+ "(target_user_id = ? AND target_device_id = ?)" for _ in batch
+ )
+ )
+ query_params = [from_user_id]
+ for item in batch:
+ # item is a (user_id, device_id) tuple
+ query_params.extend(item)
+
+ txn.execute(sql, query_params)
+ rows = self.db_pool.cursor_to_dict(txn)
+
+ # and add the signatures to the appropriate keys
+ for row in rows:
+ key_id = row["key_id"]
+ target_user_id = row["target_user_id"]
+ target_device_id = row["target_device_id"]
+ key_type = devices[(target_user_id, target_device_id)]
+ # We need to copy everything, because the result may have come
+ # from the cache. dict.copy only does a shallow copy, so we
+ # need to recursively copy the dicts that will be modified.
+ user_info = keys[target_user_id] = keys[target_user_id].copy()
+ target_user_key = user_info[key_type] = user_info[key_type].copy()
+ if "signatures" in target_user_key:
+ signatures = target_user_key["signatures"] = target_user_key[
+ "signatures"
+ ].copy()
+ if from_user_id in signatures:
+ user_sigs = signatures[from_user_id] = signatures[from_user_id]
+ user_sigs[key_id] = row["signature"]
+ else:
+ signatures[from_user_id] = {key_id: row["signature"]}
+ else:
+ target_user_key["signatures"] = {
+ from_user_id: {key_id: row["signature"]}
+ }
+
+ return keys
+
+ async def get_e2e_cross_signing_keys_bulk(
+ self, user_ids: List[str], from_user_id: Optional[str] = None
+ ) -> Dict[str, Dict[str, dict]]:
+ """Returns the cross-signing keys for a set of users.
+
+ Args:
+ user_ids: the users whose keys are being requested
+ from_user_id: if specified, signatures made by this user on
+ the self-signing keys will be included in the result
+
+ Returns:
+ A map of user ID to key type to key data. If a user's cross-signing
+ keys were not found, either their user ID will not be in the dict,
+ or their user ID will map to None.
+ """
+
+ result = await self._get_bare_e2e_cross_signing_keys_bulk(user_ids)
+
+ if from_user_id:
+ result = await self.db_pool.runInteraction(
+ "get_e2e_cross_signing_signatures",
+ self._get_e2e_cross_signing_signatures_txn,
+ result,
+ from_user_id,
+ )
+
+ return result
+
+ async def get_all_user_signature_changes_for_remotes(
+ self, instance_name: str, last_id: int, current_id: int, limit: int
+ ) -> Tuple[List[Tuple[int, tuple]], int, bool]:
+ """Get updates for groups replication stream.
+
+ Note that the user signature stream represents when a user signs their
+ device with their user-signing key, which is not published to other
+ users or servers, so no `destination` is needed in the returned
+ list. However, this is needed to poke workers.
+
+ Args:
+ instance_name: The writer we want to fetch updates from. Unused
+ here since there is only ever one writer.
+ last_id: The token to fetch updates from. Exclusive.
+ current_id: The token to fetch updates up to. Inclusive.
+ limit: The requested limit for the number of rows to return. The
+ function may return more or fewer rows.
+
+ Returns:
+ A tuple consisting of: the updates, a token to use to fetch
+ subsequent updates, and whether we returned fewer rows than exists
+ between the requested tokens due to the limit.
+
+ The token returned can be used in a subsequent call to this
+ function to get further updatees.
+
+ The updates are a list of 2-tuples of stream ID and the row data
+ """
+
+ if last_id == current_id:
+ return [], current_id, False
+
+ def _get_all_user_signature_changes_for_remotes_txn(txn):
+ sql = """
+ SELECT stream_id, from_user_id AS user_id
+ FROM user_signature_stream
+ WHERE ? < stream_id AND stream_id <= ?
+ ORDER BY stream_id ASC
+ LIMIT ?
+ """
+ txn.execute(sql, (last_id, current_id, limit))
+
+ updates = [(row[0], (row[1:])) for row in txn]
+
+ limited = False
+ upto_token = current_id
+ if len(updates) >= limit:
+ upto_token = updates[-1][0]
+ limited = True
+
+ return updates, upto_token, limited
+
+ return await self.db_pool.runInteraction(
+ "get_all_user_signature_changes_for_remotes",
+ _get_all_user_signature_changes_for_remotes_txn,
+ )
+
+
+class EndToEndKeyStore(EndToEndKeyWorkerStore, SQLBaseStore):
+ def set_e2e_device_keys(self, user_id, device_id, time_now, device_keys):
+ """Stores device keys for a device. Returns whether there was a change
+ or the keys were already in the database.
+ """
+
+ def _set_e2e_device_keys_txn(txn):
+ set_tag("user_id", user_id)
+ set_tag("device_id", device_id)
+ set_tag("time_now", time_now)
+ set_tag("device_keys", device_keys)
+
+ old_key_json = self.db_pool.simple_select_one_onecol_txn(
+ txn,
+ table="e2e_device_keys_json",
+ keyvalues={"user_id": user_id, "device_id": device_id},
+ retcol="key_json",
+ allow_none=True,
+ )
+
+ # In py3 we need old_key_json to match new_key_json type. The DB
+ # returns unicode while encode_canonical_json returns bytes.
+ new_key_json = encode_canonical_json(device_keys).decode("utf-8")
+
+ if old_key_json == new_key_json:
+ log_kv({"Message": "Device key already stored."})
+ return False
+
+ self.db_pool.simple_upsert_txn(
+ txn,
+ table="e2e_device_keys_json",
+ keyvalues={"user_id": user_id, "device_id": device_id},
+ values={"ts_added_ms": time_now, "key_json": new_key_json},
+ )
+ log_kv({"message": "Device keys stored."})
+ return True
+
+ return self.db_pool.runInteraction(
+ "set_e2e_device_keys", _set_e2e_device_keys_txn
+ )
+
+ def claim_e2e_one_time_keys(self, query_list):
+ """Take a list of one time keys out of the database"""
+
+ @trace
+ def _claim_e2e_one_time_keys(txn):
+ sql = (
+ "SELECT key_id, key_json FROM e2e_one_time_keys_json"
+ " WHERE user_id = ? AND device_id = ? AND algorithm = ?"
+ " LIMIT 1"
+ )
+ result = {}
+ delete = []
+ for user_id, device_id, algorithm in query_list:
+ user_result = result.setdefault(user_id, {})
+ device_result = user_result.setdefault(device_id, {})
+ txn.execute(sql, (user_id, device_id, algorithm))
+ for key_id, key_json in txn:
+ device_result[algorithm + ":" + key_id] = key_json
+ delete.append((user_id, device_id, algorithm, key_id))
+ sql = (
+ "DELETE FROM e2e_one_time_keys_json"
+ " WHERE user_id = ? AND device_id = ? AND algorithm = ?"
+ " AND key_id = ?"
+ )
+ for user_id, device_id, algorithm, key_id in delete:
+ log_kv(
+ {
+ "message": "Executing claim e2e_one_time_keys transaction on database."
+ }
+ )
+ txn.execute(sql, (user_id, device_id, algorithm, key_id))
+ log_kv({"message": "finished executing and invalidating cache"})
+ self._invalidate_cache_and_stream(
+ txn, self.count_e2e_one_time_keys, (user_id, device_id)
+ )
+ return result
+
+ return self.db_pool.runInteraction(
+ "claim_e2e_one_time_keys", _claim_e2e_one_time_keys
+ )
+
+ def delete_e2e_keys_by_device(self, user_id, device_id):
+ def delete_e2e_keys_by_device_txn(txn):
+ log_kv(
+ {
+ "message": "Deleting keys for device",
+ "device_id": device_id,
+ "user_id": user_id,
+ }
+ )
+ self.db_pool.simple_delete_txn(
+ txn,
+ table="e2e_device_keys_json",
+ keyvalues={"user_id": user_id, "device_id": device_id},
+ )
+ self.db_pool.simple_delete_txn(
+ txn,
+ table="e2e_one_time_keys_json",
+ keyvalues={"user_id": user_id, "device_id": device_id},
+ )
+ self._invalidate_cache_and_stream(
+ txn, self.count_e2e_one_time_keys, (user_id, device_id)
+ )
+
+ return self.db_pool.runInteraction(
+ "delete_e2e_keys_by_device", delete_e2e_keys_by_device_txn
+ )
+
+ def _set_e2e_cross_signing_key_txn(self, txn, user_id, key_type, key):
+ """Set a user's cross-signing key.
+
+ Args:
+ txn (twisted.enterprise.adbapi.Connection): db connection
+ user_id (str): the user to set the signing key for
+ key_type (str): the type of key that is being set: either 'master'
+ for a master key, 'self_signing' for a self-signing key, or
+ 'user_signing' for a user-signing key
+ key (dict): the key data
+ """
+ # the 'key' dict will look something like:
+ # {
+ # "user_id": "@alice:example.com",
+ # "usage": ["self_signing"],
+ # "keys": {
+ # "ed25519:base64+self+signing+public+key": "base64+self+signing+public+key",
+ # },
+ # "signatures": {
+ # "@alice:example.com": {
+ # "ed25519:base64+master+public+key": "base64+signature"
+ # }
+ # }
+ # }
+ # The "keys" property must only have one entry, which will be the public
+ # key, so we just grab the first value in there
+ pubkey = next(iter(key["keys"].values()))
+
+ # The cross-signing keys need to occupy the same namespace as devices,
+ # since signatures are identified by device ID. So add an entry to the
+ # device table to make sure that we don't have a collision with device
+ # IDs.
+ # We only need to do this for local users, since remote servers should be
+ # responsible for checking this for their own users.
+ if self.hs.is_mine_id(user_id):
+ self.db_pool.simple_insert_txn(
+ txn,
+ "devices",
+ values={
+ "user_id": user_id,
+ "device_id": pubkey,
+ "display_name": key_type + " signing key",
+ "hidden": True,
+ },
+ )
+
+ # and finally, store the key itself
+ with self._cross_signing_id_gen.get_next() as stream_id:
+ self.db_pool.simple_insert_txn(
+ txn,
+ "e2e_cross_signing_keys",
+ values={
+ "user_id": user_id,
+ "keytype": key_type,
+ "keydata": json_encoder.encode(key),
+ "stream_id": stream_id,
+ },
+ )
+
+ self._invalidate_cache_and_stream(
+ txn, self._get_bare_e2e_cross_signing_keys, (user_id,)
+ )
+
+ def set_e2e_cross_signing_key(self, user_id, key_type, key):
+ """Set a user's cross-signing key.
+
+ Args:
+ user_id (str): the user to set the user-signing key for
+ key_type (str): the type of cross-signing key to set
+ key (dict): the key data
+ """
+ return self.db_pool.runInteraction(
+ "add_e2e_cross_signing_key",
+ self._set_e2e_cross_signing_key_txn,
+ user_id,
+ key_type,
+ key,
+ )
+
+ def store_e2e_cross_signing_signatures(self, user_id, signatures):
+ """Stores cross-signing signatures.
+
+ Args:
+ user_id (str): the user who made the signatures
+ signatures (iterable[SignatureListItem]): signatures to add
+ """
+ return self.db_pool.simple_insert_many(
+ "e2e_cross_signing_signatures",
+ [
+ {
+ "user_id": user_id,
+ "key_id": item.signing_key_id,
+ "target_user_id": item.target_user_id,
+ "target_device_id": item.target_device_id,
+ "signature": item.signature,
+ }
+ for item in signatures
+ ],
+ "add_e2e_signing_key",
+ )
diff --git a/synapse/storage/databases/main/event_federation.py b/synapse/storage/databases/main/event_federation.py
new file mode 100644
index 00000000..484875f9
--- /dev/null
+++ b/synapse/storage/databases/main/event_federation.py
@@ -0,0 +1,714 @@
+# -*- coding: utf-8 -*-
+# Copyright 2014-2016 OpenMarket Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import itertools
+import logging
+from queue import Empty, PriorityQueue
+from typing import Dict, Iterable, List, Optional, Set, Tuple
+
+from synapse.api.errors import StoreError
+from synapse.metrics.background_process_metrics import run_as_background_process
+from synapse.storage._base import SQLBaseStore, make_in_list_sql_clause
+from synapse.storage.database import DatabasePool
+from synapse.storage.databases.main.events_worker import EventsWorkerStore
+from synapse.storage.databases.main.signatures import SignatureWorkerStore
+from synapse.util.caches.descriptors import cached
+from synapse.util.iterutils import batch_iter
+
+logger = logging.getLogger(__name__)
+
+
+class EventFederationWorkerStore(EventsWorkerStore, SignatureWorkerStore, SQLBaseStore):
+ def get_auth_chain(self, event_ids, include_given=False):
+ """Get auth events for given event_ids. The events *must* be state events.
+
+ Args:
+ event_ids (list): state events
+ include_given (bool): include the given events in result
+
+ Returns:
+ list of events
+ """
+ return self.get_auth_chain_ids(
+ event_ids, include_given=include_given
+ ).addCallback(self.get_events_as_list)
+
+ def get_auth_chain_ids(
+ self,
+ event_ids: List[str],
+ include_given: bool = False,
+ ignore_events: Optional[Set[str]] = None,
+ ):
+ """Get auth events for given event_ids. The events *must* be state events.
+
+ Args:
+ event_ids: state events
+ include_given: include the given events in result
+ ignore_events: Set of events to exclude from the returned auth
+ chain. This is useful if the caller will just discard the
+ given events anyway, and saves us from figuring out their auth
+ chains if not required.
+
+ Returns:
+ list of event_ids
+ """
+ return self.db_pool.runInteraction(
+ "get_auth_chain_ids",
+ self._get_auth_chain_ids_txn,
+ event_ids,
+ include_given,
+ ignore_events,
+ )
+
+ def _get_auth_chain_ids_txn(self, txn, event_ids, include_given, ignore_events):
+ if ignore_events is None:
+ ignore_events = set()
+
+ if include_given:
+ results = set(event_ids)
+ else:
+ results = set()
+
+ base_sql = "SELECT auth_id FROM event_auth WHERE "
+
+ front = set(event_ids)
+ while front:
+ new_front = set()
+ for chunk in batch_iter(front, 100):
+ clause, args = make_in_list_sql_clause(
+ txn.database_engine, "event_id", chunk
+ )
+ txn.execute(base_sql + clause, args)
+ new_front.update(r[0] for r in txn)
+
+ new_front -= ignore_events
+ new_front -= results
+
+ front = new_front
+ results.update(front)
+
+ return list(results)
+
+ def get_auth_chain_difference(self, state_sets: List[Set[str]]):
+ """Given sets of state events figure out the auth chain difference (as
+ per state res v2 algorithm).
+
+ This equivalent to fetching the full auth chain for each set of state
+ and returning the events that don't appear in each and every auth
+ chain.
+
+ Returns:
+ Deferred[Set[str]]
+ """
+
+ return self.db_pool.runInteraction(
+ "get_auth_chain_difference",
+ self._get_auth_chain_difference_txn,
+ state_sets,
+ )
+
+ def _get_auth_chain_difference_txn(
+ self, txn, state_sets: List[Set[str]]
+ ) -> Set[str]:
+
+ # Algorithm Description
+ # ~~~~~~~~~~~~~~~~~~~~~
+ #
+ # The idea here is to basically walk the auth graph of each state set in
+ # tandem, keeping track of which auth events are reachable by each state
+ # set. If we reach an auth event we've already visited (via a different
+ # state set) then we mark that auth event and all ancestors as reachable
+ # by the state set. This requires that we keep track of the auth chains
+ # in memory.
+ #
+ # Doing it in a such a way means that we can stop early if all auth
+ # events we're currently walking are reachable by all state sets.
+ #
+ # *Note*: We can't stop walking an event's auth chain if it is reachable
+ # by all state sets. This is because other auth chains we're walking
+ # might be reachable only via the original auth chain. For example,
+ # given the following auth chain:
+ #
+ # A -> C -> D -> E
+ # / /
+ # B -´---------´
+ #
+ # and state sets {A} and {B} then walking the auth chains of A and B
+ # would immediately show that C is reachable by both. However, if we
+ # stopped at C then we'd only reach E via the auth chain of B and so E
+ # would errornously get included in the returned difference.
+ #
+ # The other thing that we do is limit the number of auth chains we walk
+ # at once, due to practical limits (i.e. we can only query the database
+ # with a limited set of parameters). We pick the auth chains we walk
+ # each iteration based on their depth, in the hope that events with a
+ # lower depth are likely reachable by those with higher depths.
+ #
+ # We could use any ordering that we believe would give a rough
+ # topological ordering, e.g. origin server timestamp. If the ordering
+ # chosen is not topological then the algorithm still produces the right
+ # result, but perhaps a bit more inefficiently. This is why it is safe
+ # to use "depth" here.
+
+ initial_events = set(state_sets[0]).union(*state_sets[1:])
+
+ # Dict from events in auth chains to which sets *cannot* reach them.
+ # I.e. if the set is empty then all sets can reach the event.
+ event_to_missing_sets = {
+ event_id: {i for i, a in enumerate(state_sets) if event_id not in a}
+ for event_id in initial_events
+ }
+
+ # The sorted list of events whose auth chains we should walk.
+ search = [] # type: List[Tuple[int, str]]
+
+ # We need to get the depth of the initial events for sorting purposes.
+ sql = """
+ SELECT depth, event_id FROM events
+ WHERE %s
+ """
+ # the list can be huge, so let's avoid looking them all up in one massive
+ # query.
+ for batch in batch_iter(initial_events, 1000):
+ clause, args = make_in_list_sql_clause(
+ txn.database_engine, "event_id", batch
+ )
+ txn.execute(sql % (clause,), args)
+
+ # I think building a temporary list with fetchall is more efficient than
+ # just `search.extend(txn)`, but this is unconfirmed
+ search.extend(txn.fetchall())
+
+ # sort by depth
+ search.sort()
+
+ # Map from event to its auth events
+ event_to_auth_events = {} # type: Dict[str, Set[str]]
+
+ base_sql = """
+ SELECT a.event_id, auth_id, depth
+ FROM event_auth AS a
+ INNER JOIN events AS e ON (e.event_id = a.auth_id)
+ WHERE
+ """
+
+ while search:
+ # Check whether all our current walks are reachable by all state
+ # sets. If so we can bail.
+ if all(not event_to_missing_sets[eid] for _, eid in search):
+ break
+
+ # Fetch the auth events and their depths of the N last events we're
+ # currently walking
+ search, chunk = search[:-100], search[-100:]
+ clause, args = make_in_list_sql_clause(
+ txn.database_engine, "a.event_id", [e_id for _, e_id in chunk]
+ )
+ txn.execute(base_sql + clause, args)
+
+ for event_id, auth_event_id, auth_event_depth in txn:
+ event_to_auth_events.setdefault(event_id, set()).add(auth_event_id)
+
+ sets = event_to_missing_sets.get(auth_event_id)
+ if sets is None:
+ # First time we're seeing this event, so we add it to the
+ # queue of things to fetch.
+ search.append((auth_event_depth, auth_event_id))
+
+ # Assume that this event is unreachable from any of the
+ # state sets until proven otherwise
+ sets = event_to_missing_sets[auth_event_id] = set(
+ range(len(state_sets))
+ )
+ else:
+ # We've previously seen this event, so look up its auth
+ # events and recursively mark all ancestors as reachable
+ # by the current event's state set.
+ a_ids = event_to_auth_events.get(auth_event_id)
+ while a_ids:
+ new_aids = set()
+ for a_id in a_ids:
+ event_to_missing_sets[a_id].intersection_update(
+ event_to_missing_sets[event_id]
+ )
+
+ b = event_to_auth_events.get(a_id)
+ if b:
+ new_aids.update(b)
+
+ a_ids = new_aids
+
+ # Mark that the auth event is reachable by the approriate sets.
+ sets.intersection_update(event_to_missing_sets[event_id])
+
+ search.sort()
+
+ # Return all events where not all sets can reach them.
+ return {eid for eid, n in event_to_missing_sets.items() if n}
+
+ def get_oldest_events_in_room(self, room_id):
+ return self.db_pool.runInteraction(
+ "get_oldest_events_in_room", self._get_oldest_events_in_room_txn, room_id
+ )
+
+ def get_oldest_events_with_depth_in_room(self, room_id):
+ return self.db_pool.runInteraction(
+ "get_oldest_events_with_depth_in_room",
+ self.get_oldest_events_with_depth_in_room_txn,
+ room_id,
+ )
+
+ def get_oldest_events_with_depth_in_room_txn(self, txn, room_id):
+ sql = (
+ "SELECT b.event_id, MAX(e.depth) FROM events as e"
+ " INNER JOIN event_edges as g"
+ " ON g.event_id = e.event_id"
+ " INNER JOIN event_backward_extremities as b"
+ " ON g.prev_event_id = b.event_id"
+ " WHERE b.room_id = ? AND g.is_state is ?"
+ " GROUP BY b.event_id"
+ )
+
+ txn.execute(sql, (room_id, False))
+
+ return dict(txn)
+
+ async def get_max_depth_of(self, event_ids: List[str]) -> int:
+ """Returns the max depth of a set of event IDs
+
+ Args:
+ event_ids: The event IDs to calculate the max depth of.
+ """
+ rows = await self.db_pool.simple_select_many_batch(
+ table="events",
+ column="event_id",
+ iterable=event_ids,
+ retcols=("depth",),
+ desc="get_max_depth_of",
+ )
+
+ if not rows:
+ return 0
+ else:
+ return max(row["depth"] for row in rows)
+
+ def _get_oldest_events_in_room_txn(self, txn, room_id):
+ return self.db_pool.simple_select_onecol_txn(
+ txn,
+ table="event_backward_extremities",
+ keyvalues={"room_id": room_id},
+ retcol="event_id",
+ )
+
+ def get_prev_events_for_room(self, room_id: str):
+ """
+ Gets a subset of the current forward extremities in the given room.
+
+ Limits the result to 10 extremities, so that we can avoid creating
+ events which refer to hundreds of prev_events.
+
+ Args:
+ room_id (str): room_id
+
+ Returns:
+ Deferred[List[str]]: the event ids of the forward extremites
+
+ """
+
+ return self.db_pool.runInteraction(
+ "get_prev_events_for_room", self._get_prev_events_for_room_txn, room_id
+ )
+
+ def _get_prev_events_for_room_txn(self, txn, room_id: str):
+ # we just use the 10 newest events. Older events will become
+ # prev_events of future events.
+
+ sql = """
+ SELECT e.event_id FROM event_forward_extremities AS f
+ INNER JOIN events AS e USING (event_id)
+ WHERE f.room_id = ?
+ ORDER BY e.depth DESC
+ LIMIT 10
+ """
+
+ txn.execute(sql, (room_id,))
+
+ return [row[0] for row in txn]
+
+ def get_rooms_with_many_extremities(self, min_count, limit, room_id_filter):
+ """Get the top rooms with at least N extremities.
+
+ Args:
+ min_count (int): The minimum number of extremities
+ limit (int): The maximum number of rooms to return.
+ room_id_filter (iterable[str]): room_ids to exclude from the results
+
+ Returns:
+ Deferred[list]: At most `limit` room IDs that have at least
+ `min_count` extremities, sorted by extremity count.
+ """
+
+ def _get_rooms_with_many_extremities_txn(txn):
+ where_clause = "1=1"
+ if room_id_filter:
+ where_clause = "room_id NOT IN (%s)" % (
+ ",".join("?" for _ in room_id_filter),
+ )
+
+ sql = """
+ SELECT room_id FROM event_forward_extremities
+ WHERE %s
+ GROUP BY room_id
+ HAVING count(*) > ?
+ ORDER BY count(*) DESC
+ LIMIT ?
+ """ % (
+ where_clause,
+ )
+
+ query_args = list(itertools.chain(room_id_filter, [min_count, limit]))
+ txn.execute(sql, query_args)
+ return [room_id for room_id, in txn]
+
+ return self.db_pool.runInteraction(
+ "get_rooms_with_many_extremities", _get_rooms_with_many_extremities_txn
+ )
+
+ @cached(max_entries=5000, iterable=True)
+ def get_latest_event_ids_in_room(self, room_id):
+ return self.db_pool.simple_select_onecol(
+ table="event_forward_extremities",
+ keyvalues={"room_id": room_id},
+ retcol="event_id",
+ desc="get_latest_event_ids_in_room",
+ )
+
+ def get_min_depth(self, room_id):
+ """ For hte given room, get the minimum depth we have seen for it.
+ """
+ return self.db_pool.runInteraction(
+ "get_min_depth", self._get_min_depth_interaction, room_id
+ )
+
+ def _get_min_depth_interaction(self, txn, room_id):
+ min_depth = self.db_pool.simple_select_one_onecol_txn(
+ txn,
+ table="room_depth",
+ keyvalues={"room_id": room_id},
+ retcol="min_depth",
+ allow_none=True,
+ )
+
+ return int(min_depth) if min_depth is not None else None
+
+ def get_forward_extremeties_for_room(self, room_id, stream_ordering):
+ """For a given room_id and stream_ordering, return the forward
+ extremeties of the room at that point in "time".
+
+ Throws a StoreError if we have since purged the index for
+ stream_orderings from that point.
+
+ Args:
+ room_id (str):
+ stream_ordering (int):
+
+ Returns:
+ deferred, which resolves to a list of event_ids
+ """
+ # We want to make the cache more effective, so we clamp to the last
+ # change before the given ordering.
+ last_change = self._events_stream_cache.get_max_pos_of_last_change(room_id)
+
+ # We don't always have a full stream_to_exterm_id table, e.g. after
+ # the upgrade that introduced it, so we make sure we never ask for a
+ # stream_ordering from before a restart
+ last_change = max(self._stream_order_on_start, last_change)
+
+ # provided the last_change is recent enough, we now clamp the requested
+ # stream_ordering to it.
+ if last_change > self.stream_ordering_month_ago:
+ stream_ordering = min(last_change, stream_ordering)
+
+ return self._get_forward_extremeties_for_room(room_id, stream_ordering)
+
+ @cached(max_entries=5000, num_args=2)
+ def _get_forward_extremeties_for_room(self, room_id, stream_ordering):
+ """For a given room_id and stream_ordering, return the forward
+ extremeties of the room at that point in "time".
+
+ Throws a StoreError if we have since purged the index for
+ stream_orderings from that point.
+ """
+
+ if stream_ordering <= self.stream_ordering_month_ago:
+ raise StoreError(400, "stream_ordering too old")
+
+ sql = """
+ SELECT event_id FROM stream_ordering_to_exterm
+ INNER JOIN (
+ SELECT room_id, MAX(stream_ordering) AS stream_ordering
+ FROM stream_ordering_to_exterm
+ WHERE stream_ordering <= ? GROUP BY room_id
+ ) AS rms USING (room_id, stream_ordering)
+ WHERE room_id = ?
+ """
+
+ def get_forward_extremeties_for_room_txn(txn):
+ txn.execute(sql, (stream_ordering, room_id))
+ return [event_id for event_id, in txn]
+
+ return self.db_pool.runInteraction(
+ "get_forward_extremeties_for_room", get_forward_extremeties_for_room_txn
+ )
+
+ def get_backfill_events(self, room_id, event_list, limit):
+ """Get a list of Events for a given topic that occurred before (and
+ including) the events in event_list. Return a list of max size `limit`
+
+ Args:
+ txn
+ room_id (str)
+ event_list (list)
+ limit (int)
+ """
+ return (
+ self.db_pool.runInteraction(
+ "get_backfill_events",
+ self._get_backfill_events,
+ room_id,
+ event_list,
+ limit,
+ )
+ .addCallback(self.get_events_as_list)
+ .addCallback(lambda l: sorted(l, key=lambda e: -e.depth))
+ )
+
+ def _get_backfill_events(self, txn, room_id, event_list, limit):
+ logger.debug("_get_backfill_events: %s, %r, %s", room_id, event_list, limit)
+
+ event_results = set()
+
+ # We want to make sure that we do a breadth-first, "depth" ordered
+ # search.
+
+ query = (
+ "SELECT depth, prev_event_id FROM event_edges"
+ " INNER JOIN events"
+ " ON prev_event_id = events.event_id"
+ " WHERE event_edges.event_id = ?"
+ " AND event_edges.is_state = ?"
+ " LIMIT ?"
+ )
+
+ queue = PriorityQueue()
+
+ for event_id in event_list:
+ depth = self.db_pool.simple_select_one_onecol_txn(
+ txn,
+ table="events",
+ keyvalues={"event_id": event_id, "room_id": room_id},
+ retcol="depth",
+ allow_none=True,
+ )
+
+ if depth:
+ queue.put((-depth, event_id))
+
+ while not queue.empty() and len(event_results) < limit:
+ try:
+ _, event_id = queue.get_nowait()
+ except Empty:
+ break
+
+ if event_id in event_results:
+ continue
+
+ event_results.add(event_id)
+
+ txn.execute(query, (event_id, False, limit - len(event_results)))
+
+ for row in txn:
+ if row[1] not in event_results:
+ queue.put((-row[0], row[1]))
+
+ return event_results
+
+ async def get_missing_events(self, room_id, earliest_events, latest_events, limit):
+ ids = await self.db_pool.runInteraction(
+ "get_missing_events",
+ self._get_missing_events,
+ room_id,
+ earliest_events,
+ latest_events,
+ limit,
+ )
+ events = await self.get_events_as_list(ids)
+ return events
+
+ def _get_missing_events(self, txn, room_id, earliest_events, latest_events, limit):
+
+ seen_events = set(earliest_events)
+ front = set(latest_events) - seen_events
+ event_results = []
+
+ query = (
+ "SELECT prev_event_id FROM event_edges "
+ "WHERE room_id = ? AND event_id = ? AND is_state = ? "
+ "LIMIT ?"
+ )
+
+ while front and len(event_results) < limit:
+ new_front = set()
+ for event_id in front:
+ txn.execute(
+ query, (room_id, event_id, False, limit - len(event_results))
+ )
+
+ new_results = {t[0] for t in txn} - seen_events
+
+ new_front |= new_results
+ seen_events |= new_results
+ event_results.extend(new_results)
+
+ front = new_front
+
+ # we built the list working backwards from latest_events; we now need to
+ # reverse it so that the events are approximately chronological.
+ event_results.reverse()
+ return event_results
+
+ async def get_successor_events(self, event_ids: Iterable[str]) -> List[str]:
+ """Fetch all events that have the given events as a prev event
+
+ Args:
+ event_ids: The events to use as the previous events.
+ """
+ rows = await self.db_pool.simple_select_many_batch(
+ table="event_edges",
+ column="prev_event_id",
+ iterable=event_ids,
+ retcols=("event_id",),
+ desc="get_successor_events",
+ )
+
+ return [row["event_id"] for row in rows]
+
+
+class EventFederationStore(EventFederationWorkerStore):
+ """ Responsible for storing and serving up the various graphs associated
+ with an event. Including the main event graph and the auth chains for an
+ event.
+
+ Also has methods for getting the front (latest) and back (oldest) edges
+ of the event graphs. These are used to generate the parents for new events
+ and backfilling from another server respectively.
+ """
+
+ EVENT_AUTH_STATE_ONLY = "event_auth_state_only"
+
+ def __init__(self, database: DatabasePool, db_conn, hs):
+ super(EventFederationStore, self).__init__(database, db_conn, hs)
+
+ self.db_pool.updates.register_background_update_handler(
+ self.EVENT_AUTH_STATE_ONLY, self._background_delete_non_state_event_auth
+ )
+
+ hs.get_clock().looping_call(
+ self._delete_old_forward_extrem_cache, 60 * 60 * 1000
+ )
+
+ def _delete_old_forward_extrem_cache(self):
+ def _delete_old_forward_extrem_cache_txn(txn):
+ # Delete entries older than a month, while making sure we don't delete
+ # the only entries for a room.
+ sql = """
+ DELETE FROM stream_ordering_to_exterm
+ WHERE
+ room_id IN (
+ SELECT room_id
+ FROM stream_ordering_to_exterm
+ WHERE stream_ordering > ?
+ ) AND stream_ordering < ?
+ """
+ txn.execute(
+ sql, (self.stream_ordering_month_ago, self.stream_ordering_month_ago)
+ )
+
+ return run_as_background_process(
+ "delete_old_forward_extrem_cache",
+ self.db_pool.runInteraction,
+ "_delete_old_forward_extrem_cache",
+ _delete_old_forward_extrem_cache_txn,
+ )
+
+ def clean_room_for_join(self, room_id):
+ return self.db_pool.runInteraction(
+ "clean_room_for_join", self._clean_room_for_join_txn, room_id
+ )
+
+ def _clean_room_for_join_txn(self, txn, room_id):
+ query = "DELETE FROM event_forward_extremities WHERE room_id = ?"
+
+ txn.execute(query, (room_id,))
+ txn.call_after(self.get_latest_event_ids_in_room.invalidate, (room_id,))
+
+ async def _background_delete_non_state_event_auth(self, progress, batch_size):
+ def delete_event_auth(txn):
+ target_min_stream_id = progress.get("target_min_stream_id_inclusive")
+ max_stream_id = progress.get("max_stream_id_exclusive")
+
+ if not target_min_stream_id or not max_stream_id:
+ txn.execute("SELECT COALESCE(MIN(stream_ordering), 0) FROM events")
+ rows = txn.fetchall()
+ target_min_stream_id = rows[0][0]
+
+ txn.execute("SELECT COALESCE(MAX(stream_ordering), 0) FROM events")
+ rows = txn.fetchall()
+ max_stream_id = rows[0][0]
+
+ min_stream_id = max_stream_id - batch_size
+
+ sql = """
+ DELETE FROM event_auth
+ WHERE event_id IN (
+ SELECT event_id FROM events
+ LEFT JOIN state_events USING (room_id, event_id)
+ WHERE ? <= stream_ordering AND stream_ordering < ?
+ AND state_key IS null
+ )
+ """
+
+ txn.execute(sql, (min_stream_id, max_stream_id))
+
+ new_progress = {
+ "target_min_stream_id_inclusive": target_min_stream_id,
+ "max_stream_id_exclusive": min_stream_id,
+ }
+
+ self.db_pool.updates._background_update_progress_txn(
+ txn, self.EVENT_AUTH_STATE_ONLY, new_progress
+ )
+
+ return min_stream_id >= target_min_stream_id
+
+ result = await self.db_pool.runInteraction(
+ self.EVENT_AUTH_STATE_ONLY, delete_event_auth
+ )
+
+ if not result:
+ await self.db_pool.updates._end_background_update(
+ self.EVENT_AUTH_STATE_ONLY
+ )
+
+ return batch_size
diff --git a/synapse/storage/databases/main/event_push_actions.py b/synapse/storage/databases/main/event_push_actions.py
new file mode 100644
index 00000000..7c246d3e
--- /dev/null
+++ b/synapse/storage/databases/main/event_push_actions.py
@@ -0,0 +1,884 @@
+# -*- coding: utf-8 -*-
+# Copyright 2015 OpenMarket Ltd
+# Copyright 2018 New Vector Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import logging
+from typing import List
+
+from synapse.metrics.background_process_metrics import run_as_background_process
+from synapse.storage._base import LoggingTransaction, SQLBaseStore, db_to_json
+from synapse.storage.database import DatabasePool
+from synapse.util import json_encoder
+from synapse.util.caches.descriptors import cachedInlineCallbacks
+
+logger = logging.getLogger(__name__)
+
+
+DEFAULT_NOTIF_ACTION = ["notify", {"set_tweak": "highlight", "value": False}]
+DEFAULT_HIGHLIGHT_ACTION = [
+ "notify",
+ {"set_tweak": "sound", "value": "default"},
+ {"set_tweak": "highlight"},
+]
+
+
+def _serialize_action(actions, is_highlight):
+ """Custom serializer for actions. This allows us to "compress" common actions.
+
+ We use the fact that most users have the same actions for notifs (and for
+ highlights).
+ We store these default actions as the empty string rather than the full JSON.
+ Since the empty string isn't valid JSON there is no risk of this clashing with
+ any real JSON actions
+ """
+ if is_highlight:
+ if actions == DEFAULT_HIGHLIGHT_ACTION:
+ return "" # We use empty string as the column is non-NULL
+ else:
+ if actions == DEFAULT_NOTIF_ACTION:
+ return ""
+ return json_encoder.encode(actions)
+
+
+def _deserialize_action(actions, is_highlight):
+ """Custom deserializer for actions. This allows us to "compress" common actions
+ """
+ if actions:
+ return db_to_json(actions)
+
+ if is_highlight:
+ return DEFAULT_HIGHLIGHT_ACTION
+ else:
+ return DEFAULT_NOTIF_ACTION
+
+
+class EventPushActionsWorkerStore(SQLBaseStore):
+ def __init__(self, database: DatabasePool, db_conn, hs):
+ super(EventPushActionsWorkerStore, self).__init__(database, db_conn, hs)
+
+ # These get correctly set by _find_stream_orderings_for_times_txn
+ self.stream_ordering_month_ago = None
+ self.stream_ordering_day_ago = None
+
+ cur = LoggingTransaction(
+ db_conn.cursor(),
+ name="_find_stream_orderings_for_times_txn",
+ database_engine=self.database_engine,
+ )
+ self._find_stream_orderings_for_times_txn(cur)
+ cur.close()
+
+ self.find_stream_orderings_looping_call = self._clock.looping_call(
+ self._find_stream_orderings_for_times, 10 * 60 * 1000
+ )
+ self._rotate_delay = 3
+ self._rotate_count = 10000
+
+ @cachedInlineCallbacks(num_args=3, tree=True, max_entries=5000)
+ def get_unread_event_push_actions_by_room_for_user(
+ self, room_id, user_id, last_read_event_id
+ ):
+ ret = yield self.db_pool.runInteraction(
+ "get_unread_event_push_actions_by_room",
+ self._get_unread_counts_by_receipt_txn,
+ room_id,
+ user_id,
+ last_read_event_id,
+ )
+ return ret
+
+ def _get_unread_counts_by_receipt_txn(
+ self, txn, room_id, user_id, last_read_event_id
+ ):
+ sql = (
+ "SELECT stream_ordering"
+ " FROM events"
+ " WHERE room_id = ? AND event_id = ?"
+ )
+ txn.execute(sql, (room_id, last_read_event_id))
+ results = txn.fetchall()
+ if len(results) == 0:
+ return {"notify_count": 0, "highlight_count": 0}
+
+ stream_ordering = results[0][0]
+
+ return self._get_unread_counts_by_pos_txn(
+ txn, room_id, user_id, stream_ordering
+ )
+
+ def _get_unread_counts_by_pos_txn(self, txn, room_id, user_id, stream_ordering):
+
+ # First get number of notifications.
+ # We don't need to put a notif=1 clause as all rows always have
+ # notif=1
+ sql = (
+ "SELECT count(*)"
+ " FROM event_push_actions ea"
+ " WHERE"
+ " user_id = ?"
+ " AND room_id = ?"
+ " AND stream_ordering > ?"
+ )
+
+ txn.execute(sql, (user_id, room_id, stream_ordering))
+ row = txn.fetchone()
+ notify_count = row[0] if row else 0
+
+ txn.execute(
+ """
+ SELECT notif_count FROM event_push_summary
+ WHERE room_id = ? AND user_id = ? AND stream_ordering > ?
+ """,
+ (room_id, user_id, stream_ordering),
+ )
+ rows = txn.fetchall()
+ if rows:
+ notify_count += rows[0][0]
+
+ # Now get the number of highlights
+ sql = (
+ "SELECT count(*)"
+ " FROM event_push_actions ea"
+ " WHERE"
+ " highlight = 1"
+ " AND user_id = ?"
+ " AND room_id = ?"
+ " AND stream_ordering > ?"
+ )
+
+ txn.execute(sql, (user_id, room_id, stream_ordering))
+ row = txn.fetchone()
+ highlight_count = row[0] if row else 0
+
+ return {"notify_count": notify_count, "highlight_count": highlight_count}
+
+ async def get_push_action_users_in_range(
+ self, min_stream_ordering, max_stream_ordering
+ ):
+ def f(txn):
+ sql = (
+ "SELECT DISTINCT(user_id) FROM event_push_actions WHERE"
+ " stream_ordering >= ? AND stream_ordering <= ?"
+ )
+ txn.execute(sql, (min_stream_ordering, max_stream_ordering))
+ return [r[0] for r in txn]
+
+ ret = await self.db_pool.runInteraction("get_push_action_users_in_range", f)
+ return ret
+
+ async def get_unread_push_actions_for_user_in_range_for_http(
+ self,
+ user_id: str,
+ min_stream_ordering: int,
+ max_stream_ordering: int,
+ limit: int = 20,
+ ) -> List[dict]:
+ """Get a list of the most recent unread push actions for a given user,
+ within the given stream ordering range. Called by the httppusher.
+
+ Args:
+ user_id: The user to fetch push actions for.
+ min_stream_ordering: The exclusive lower bound on the
+ stream ordering of event push actions to fetch.
+ max_stream_ordering: The inclusive upper bound on the
+ stream ordering of event push actions to fetch.
+ limit: The maximum number of rows to return.
+ Returns:
+ A list of dicts with the keys "event_id", "room_id", "stream_ordering", "actions".
+ The list will be ordered by ascending stream_ordering.
+ The list will have between 0~limit entries.
+ """
+ # find rooms that have a read receipt in them and return the next
+ # push actions
+ def get_after_receipt(txn):
+ # find rooms that have a read receipt in them and return the next
+ # push actions
+ sql = (
+ "SELECT ep.event_id, ep.room_id, ep.stream_ordering, ep.actions,"
+ " ep.highlight "
+ " FROM ("
+ " SELECT room_id,"
+ " MAX(stream_ordering) as stream_ordering"
+ " FROM events"
+ " INNER JOIN receipts_linearized USING (room_id, event_id)"
+ " WHERE receipt_type = 'm.read' AND user_id = ?"
+ " GROUP BY room_id"
+ ") AS rl,"
+ " event_push_actions AS ep"
+ " WHERE"
+ " ep.room_id = rl.room_id"
+ " AND ep.stream_ordering > rl.stream_ordering"
+ " AND ep.user_id = ?"
+ " AND ep.stream_ordering > ?"
+ " AND ep.stream_ordering <= ?"
+ " ORDER BY ep.stream_ordering ASC LIMIT ?"
+ )
+ args = [user_id, user_id, min_stream_ordering, max_stream_ordering, limit]
+ txn.execute(sql, args)
+ return txn.fetchall()
+
+ after_read_receipt = await self.db_pool.runInteraction(
+ "get_unread_push_actions_for_user_in_range_http_arr", get_after_receipt
+ )
+
+ # There are rooms with push actions in them but you don't have a read receipt in
+ # them e.g. rooms you've been invited to, so get push actions for rooms which do
+ # not have read receipts in them too.
+ def get_no_receipt(txn):
+ sql = (
+ "SELECT ep.event_id, ep.room_id, ep.stream_ordering, ep.actions,"
+ " ep.highlight "
+ " FROM event_push_actions AS ep"
+ " INNER JOIN events AS e USING (room_id, event_id)"
+ " WHERE"
+ " ep.room_id NOT IN ("
+ " SELECT room_id FROM receipts_linearized"
+ " WHERE receipt_type = 'm.read' AND user_id = ?"
+ " GROUP BY room_id"
+ " )"
+ " AND ep.user_id = ?"
+ " AND ep.stream_ordering > ?"
+ " AND ep.stream_ordering <= ?"
+ " ORDER BY ep.stream_ordering ASC LIMIT ?"
+ )
+ args = [user_id, user_id, min_stream_ordering, max_stream_ordering, limit]
+ txn.execute(sql, args)
+ return txn.fetchall()
+
+ no_read_receipt = await self.db_pool.runInteraction(
+ "get_unread_push_actions_for_user_in_range_http_nrr", get_no_receipt
+ )
+
+ notifs = [
+ {
+ "event_id": row[0],
+ "room_id": row[1],
+ "stream_ordering": row[2],
+ "actions": _deserialize_action(row[3], row[4]),
+ }
+ for row in after_read_receipt + no_read_receipt
+ ]
+
+ # Now sort it so it's ordered correctly, since currently it will
+ # contain results from the first query, correctly ordered, followed
+ # by results from the second query, but we want them all ordered
+ # by stream_ordering, oldest first.
+ notifs.sort(key=lambda r: r["stream_ordering"])
+
+ # Take only up to the limit. We have to stop at the limit because
+ # one of the subqueries may have hit the limit.
+ return notifs[:limit]
+
+ async def get_unread_push_actions_for_user_in_range_for_email(
+ self,
+ user_id: str,
+ min_stream_ordering: int,
+ max_stream_ordering: int,
+ limit: int = 20,
+ ) -> List[dict]:
+ """Get a list of the most recent unread push actions for a given user,
+ within the given stream ordering range. Called by the emailpusher
+
+ Args:
+ user_id: The user to fetch push actions for.
+ min_stream_ordering: The exclusive lower bound on the
+ stream ordering of event push actions to fetch.
+ max_stream_ordering: The inclusive upper bound on the
+ stream ordering of event push actions to fetch.
+ limit: The maximum number of rows to return.
+ Returns:
+ A list of dicts with the keys "event_id", "room_id", "stream_ordering", "actions", "received_ts".
+ The list will be ordered by descending received_ts.
+ The list will have between 0~limit entries.
+ """
+ # find rooms that have a read receipt in them and return the most recent
+ # push actions
+ def get_after_receipt(txn):
+ sql = (
+ "SELECT ep.event_id, ep.room_id, ep.stream_ordering, ep.actions,"
+ " ep.highlight, e.received_ts"
+ " FROM ("
+ " SELECT room_id,"
+ " MAX(stream_ordering) as stream_ordering"
+ " FROM events"
+ " INNER JOIN receipts_linearized USING (room_id, event_id)"
+ " WHERE receipt_type = 'm.read' AND user_id = ?"
+ " GROUP BY room_id"
+ ") AS rl,"
+ " event_push_actions AS ep"
+ " INNER JOIN events AS e USING (room_id, event_id)"
+ " WHERE"
+ " ep.room_id = rl.room_id"
+ " AND ep.stream_ordering > rl.stream_ordering"
+ " AND ep.user_id = ?"
+ " AND ep.stream_ordering > ?"
+ " AND ep.stream_ordering <= ?"
+ " ORDER BY ep.stream_ordering DESC LIMIT ?"
+ )
+ args = [user_id, user_id, min_stream_ordering, max_stream_ordering, limit]
+ txn.execute(sql, args)
+ return txn.fetchall()
+
+ after_read_receipt = await self.db_pool.runInteraction(
+ "get_unread_push_actions_for_user_in_range_email_arr", get_after_receipt
+ )
+
+ # There are rooms with push actions in them but you don't have a read receipt in
+ # them e.g. rooms you've been invited to, so get push actions for rooms which do
+ # not have read receipts in them too.
+ def get_no_receipt(txn):
+ sql = (
+ "SELECT ep.event_id, ep.room_id, ep.stream_ordering, ep.actions,"
+ " ep.highlight, e.received_ts"
+ " FROM event_push_actions AS ep"
+ " INNER JOIN events AS e USING (room_id, event_id)"
+ " WHERE"
+ " ep.room_id NOT IN ("
+ " SELECT room_id FROM receipts_linearized"
+ " WHERE receipt_type = 'm.read' AND user_id = ?"
+ " GROUP BY room_id"
+ " )"
+ " AND ep.user_id = ?"
+ " AND ep.stream_ordering > ?"
+ " AND ep.stream_ordering <= ?"
+ " ORDER BY ep.stream_ordering DESC LIMIT ?"
+ )
+ args = [user_id, user_id, min_stream_ordering, max_stream_ordering, limit]
+ txn.execute(sql, args)
+ return txn.fetchall()
+
+ no_read_receipt = await self.db_pool.runInteraction(
+ "get_unread_push_actions_for_user_in_range_email_nrr", get_no_receipt
+ )
+
+ # Make a list of dicts from the two sets of results.
+ notifs = [
+ {
+ "event_id": row[0],
+ "room_id": row[1],
+ "stream_ordering": row[2],
+ "actions": _deserialize_action(row[3], row[4]),
+ "received_ts": row[5],
+ }
+ for row in after_read_receipt + no_read_receipt
+ ]
+
+ # Now sort it so it's ordered correctly, since currently it will
+ # contain results from the first query, correctly ordered, followed
+ # by results from the second query, but we want them all ordered
+ # by received_ts (most recent first)
+ notifs.sort(key=lambda r: -(r["received_ts"] or 0))
+
+ # Now return the first `limit`
+ return notifs[:limit]
+
+ def get_if_maybe_push_in_range_for_user(self, user_id, min_stream_ordering):
+ """A fast check to see if there might be something to push for the
+ user since the given stream ordering. May return false positives.
+
+ Useful to know whether to bother starting a pusher on start up or not.
+
+ Args:
+ user_id (str)
+ min_stream_ordering (int)
+
+ Returns:
+ Deferred[bool]: True if there may be push to process, False if
+ there definitely isn't.
+ """
+
+ def _get_if_maybe_push_in_range_for_user_txn(txn):
+ sql = """
+ SELECT 1 FROM event_push_actions
+ WHERE user_id = ? AND stream_ordering > ?
+ LIMIT 1
+ """
+
+ txn.execute(sql, (user_id, min_stream_ordering))
+ return bool(txn.fetchone())
+
+ return self.db_pool.runInteraction(
+ "get_if_maybe_push_in_range_for_user",
+ _get_if_maybe_push_in_range_for_user_txn,
+ )
+
+ async def add_push_actions_to_staging(self, event_id, user_id_actions):
+ """Add the push actions for the event to the push action staging area.
+
+ Args:
+ event_id (str)
+ user_id_actions (dict[str, list[dict|str])]): A dictionary mapping
+ user_id to list of push actions, where an action can either be
+ a string or dict.
+
+ Returns:
+ Deferred
+ """
+
+ if not user_id_actions:
+ return
+
+ # This is a helper function for generating the necessary tuple that
+ # can be used to inert into the `event_push_actions_staging` table.
+ def _gen_entry(user_id, actions):
+ is_highlight = 1 if _action_has_highlight(actions) else 0
+ return (
+ event_id, # event_id column
+ user_id, # user_id column
+ _serialize_action(actions, is_highlight), # actions column
+ 1, # notif column
+ is_highlight, # highlight column
+ )
+
+ def _add_push_actions_to_staging_txn(txn):
+ # We don't use simple_insert_many here to avoid the overhead
+ # of generating lists of dicts.
+
+ sql = """
+ INSERT INTO event_push_actions_staging
+ (event_id, user_id, actions, notif, highlight)
+ VALUES (?, ?, ?, ?, ?)
+ """
+
+ txn.executemany(
+ sql,
+ (
+ _gen_entry(user_id, actions)
+ for user_id, actions in user_id_actions.items()
+ ),
+ )
+
+ return await self.db_pool.runInteraction(
+ "add_push_actions_to_staging", _add_push_actions_to_staging_txn
+ )
+
+ async def remove_push_actions_from_staging(self, event_id: str) -> None:
+ """Called if we failed to persist the event to ensure that stale push
+ actions don't build up in the DB
+ """
+
+ try:
+ res = await self.db_pool.simple_delete(
+ table="event_push_actions_staging",
+ keyvalues={"event_id": event_id},
+ desc="remove_push_actions_from_staging",
+ )
+ return res
+ except Exception:
+ # this method is called from an exception handler, so propagating
+ # another exception here really isn't helpful - there's nothing
+ # the caller can do about it. Just log the exception and move on.
+ logger.exception(
+ "Error removing push actions after event persistence failure"
+ )
+
+ def _find_stream_orderings_for_times(self):
+ return run_as_background_process(
+ "event_push_action_stream_orderings",
+ self.db_pool.runInteraction,
+ "_find_stream_orderings_for_times",
+ self._find_stream_orderings_for_times_txn,
+ )
+
+ def _find_stream_orderings_for_times_txn(self, txn):
+ logger.info("Searching for stream ordering 1 month ago")
+ self.stream_ordering_month_ago = self._find_first_stream_ordering_after_ts_txn(
+ txn, self._clock.time_msec() - 30 * 24 * 60 * 60 * 1000
+ )
+ logger.info(
+ "Found stream ordering 1 month ago: it's %d", self.stream_ordering_month_ago
+ )
+ logger.info("Searching for stream ordering 1 day ago")
+ self.stream_ordering_day_ago = self._find_first_stream_ordering_after_ts_txn(
+ txn, self._clock.time_msec() - 24 * 60 * 60 * 1000
+ )
+ logger.info(
+ "Found stream ordering 1 day ago: it's %d", self.stream_ordering_day_ago
+ )
+
+ def find_first_stream_ordering_after_ts(self, ts):
+ """Gets the stream ordering corresponding to a given timestamp.
+
+ Specifically, finds the stream_ordering of the first event that was
+ received on or after the timestamp. This is done by a binary search on
+ the events table, since there is no index on received_ts, so is
+ relatively slow.
+
+ Args:
+ ts (int): timestamp in millis
+
+ Returns:
+ Deferred[int]: stream ordering of the first event received on/after
+ the timestamp
+ """
+ return self.db_pool.runInteraction(
+ "_find_first_stream_ordering_after_ts_txn",
+ self._find_first_stream_ordering_after_ts_txn,
+ ts,
+ )
+
+ @staticmethod
+ def _find_first_stream_ordering_after_ts_txn(txn, ts):
+ """
+ Find the stream_ordering of the first event that was received on or
+ after a given timestamp. This is relatively slow as there is no index
+ on received_ts but we can then use this to delete push actions before
+ this.
+
+ received_ts must necessarily be in the same order as stream_ordering
+ and stream_ordering is indexed, so we manually binary search using
+ stream_ordering
+
+ Args:
+ txn (twisted.enterprise.adbapi.Transaction):
+ ts (int): timestamp to search for
+
+ Returns:
+ int: stream ordering
+ """
+ txn.execute("SELECT MAX(stream_ordering) FROM events")
+ max_stream_ordering = txn.fetchone()[0]
+
+ if max_stream_ordering is None:
+ return 0
+
+ # We want the first stream_ordering in which received_ts is greater
+ # than or equal to ts. Call this point X.
+ #
+ # We maintain the invariants:
+ #
+ # range_start <= X <= range_end
+ #
+ range_start = 0
+ range_end = max_stream_ordering + 1
+
+ # Given a stream_ordering, look up the timestamp at that
+ # stream_ordering.
+ #
+ # The array may be sparse (we may be missing some stream_orderings).
+ # We treat the gaps as the same as having the same value as the
+ # preceding entry, because we will pick the lowest stream_ordering
+ # which satisfies our requirement of received_ts >= ts.
+ #
+ # For example, if our array of events indexed by stream_ordering is
+ # [10, <none>, 20], we should treat this as being equivalent to
+ # [10, 10, 20].
+ #
+ sql = (
+ "SELECT received_ts FROM events"
+ " WHERE stream_ordering <= ?"
+ " ORDER BY stream_ordering DESC"
+ " LIMIT 1"
+ )
+
+ while range_end - range_start > 0:
+ middle = (range_end + range_start) // 2
+ txn.execute(sql, (middle,))
+ row = txn.fetchone()
+ if row is None:
+ # no rows with stream_ordering<=middle
+ range_start = middle + 1
+ continue
+
+ middle_ts = row[0]
+ if ts > middle_ts:
+ # we got a timestamp lower than the one we were looking for.
+ # definitely need to look higher: X > middle.
+ range_start = middle + 1
+ else:
+ # we got a timestamp higher than (or the same as) the one we
+ # were looking for. We aren't yet sure about the point we
+ # looked up, but we can be sure that X <= middle.
+ range_end = middle
+
+ return range_end
+
+ async def get_time_of_last_push_action_before(self, stream_ordering):
+ def f(txn):
+ sql = (
+ "SELECT e.received_ts"
+ " FROM event_push_actions AS ep"
+ " JOIN events e ON ep.room_id = e.room_id AND ep.event_id = e.event_id"
+ " WHERE ep.stream_ordering > ?"
+ " ORDER BY ep.stream_ordering ASC"
+ " LIMIT 1"
+ )
+ txn.execute(sql, (stream_ordering,))
+ return txn.fetchone()
+
+ result = await self.db_pool.runInteraction(
+ "get_time_of_last_push_action_before", f
+ )
+ return result[0] if result else None
+
+
+class EventPushActionsStore(EventPushActionsWorkerStore):
+ EPA_HIGHLIGHT_INDEX = "epa_highlight_index"
+
+ def __init__(self, database: DatabasePool, db_conn, hs):
+ super(EventPushActionsStore, self).__init__(database, db_conn, hs)
+
+ self.db_pool.updates.register_background_index_update(
+ self.EPA_HIGHLIGHT_INDEX,
+ index_name="event_push_actions_u_highlight",
+ table="event_push_actions",
+ columns=["user_id", "stream_ordering"],
+ )
+
+ self.db_pool.updates.register_background_index_update(
+ "event_push_actions_highlights_index",
+ index_name="event_push_actions_highlights_index",
+ table="event_push_actions",
+ columns=["user_id", "room_id", "topological_ordering", "stream_ordering"],
+ where_clause="highlight=1",
+ )
+
+ self._doing_notif_rotation = False
+ self._rotate_notif_loop = self._clock.looping_call(
+ self._start_rotate_notifs, 30 * 60 * 1000
+ )
+
+ async def get_push_actions_for_user(
+ self, user_id, before=None, limit=50, only_highlight=False
+ ):
+ def f(txn):
+ before_clause = ""
+ if before:
+ before_clause = "AND epa.stream_ordering < ?"
+ args = [user_id, before, limit]
+ else:
+ args = [user_id, limit]
+
+ if only_highlight:
+ if len(before_clause) > 0:
+ before_clause += " "
+ before_clause += "AND epa.highlight = 1"
+
+ # NB. This assumes event_ids are globally unique since
+ # it makes the query easier to index
+ sql = (
+ "SELECT epa.event_id, epa.room_id,"
+ " epa.stream_ordering, epa.topological_ordering,"
+ " epa.actions, epa.highlight, epa.profile_tag, e.received_ts"
+ " FROM event_push_actions epa, events e"
+ " WHERE epa.event_id = e.event_id"
+ " AND epa.user_id = ? %s"
+ " ORDER BY epa.stream_ordering DESC"
+ " LIMIT ?" % (before_clause,)
+ )
+ txn.execute(sql, args)
+ return self.db_pool.cursor_to_dict(txn)
+
+ push_actions = await self.db_pool.runInteraction("get_push_actions_for_user", f)
+ for pa in push_actions:
+ pa["actions"] = _deserialize_action(pa["actions"], pa["highlight"])
+ return push_actions
+
+ async def get_latest_push_action_stream_ordering(self):
+ def f(txn):
+ txn.execute("SELECT MAX(stream_ordering) FROM event_push_actions")
+ return txn.fetchone()
+
+ result = await self.db_pool.runInteraction(
+ "get_latest_push_action_stream_ordering", f
+ )
+ return result[0] or 0
+
+ def _remove_old_push_actions_before_txn(
+ self, txn, room_id, user_id, stream_ordering
+ ):
+ """
+ Purges old push actions for a user and room before a given
+ stream_ordering.
+
+ We however keep a months worth of highlighted notifications, so that
+ users can still get a list of recent highlights.
+
+ Args:
+ txn: The transcation
+ room_id: Room ID to delete from
+ user_id: user ID to delete for
+ stream_ordering: The lowest stream ordering which will
+ not be deleted.
+ """
+ txn.call_after(
+ self.get_unread_event_push_actions_by_room_for_user.invalidate_many,
+ (room_id, user_id),
+ )
+
+ # We need to join on the events table to get the received_ts for
+ # event_push_actions and sqlite won't let us use a join in a delete so
+ # we can't just delete where received_ts < x. Furthermore we can
+ # only identify event_push_actions by a tuple of room_id, event_id
+ # we we can't use a subquery.
+ # Instead, we look up the stream ordering for the last event in that
+ # room received before the threshold time and delete event_push_actions
+ # in the room with a stream_odering before that.
+ txn.execute(
+ "DELETE FROM event_push_actions "
+ " WHERE user_id = ? AND room_id = ? AND "
+ " stream_ordering <= ?"
+ " AND ((stream_ordering < ? AND highlight = 1) or highlight = 0)",
+ (user_id, room_id, stream_ordering, self.stream_ordering_month_ago),
+ )
+
+ txn.execute(
+ """
+ DELETE FROM event_push_summary
+ WHERE room_id = ? AND user_id = ? AND stream_ordering <= ?
+ """,
+ (room_id, user_id, stream_ordering),
+ )
+
+ def _start_rotate_notifs(self):
+ return run_as_background_process("rotate_notifs", self._rotate_notifs)
+
+ async def _rotate_notifs(self):
+ if self._doing_notif_rotation or self.stream_ordering_day_ago is None:
+ return
+ self._doing_notif_rotation = True
+
+ try:
+ while True:
+ logger.info("Rotating notifications")
+
+ caught_up = await self.db_pool.runInteraction(
+ "_rotate_notifs", self._rotate_notifs_txn
+ )
+ if caught_up:
+ break
+ await self.hs.get_clock().sleep(self._rotate_delay)
+ finally:
+ self._doing_notif_rotation = False
+
+ def _rotate_notifs_txn(self, txn):
+ """Archives older notifications into event_push_summary. Returns whether
+ the archiving process has caught up or not.
+ """
+
+ old_rotate_stream_ordering = self.db_pool.simple_select_one_onecol_txn(
+ txn,
+ table="event_push_summary_stream_ordering",
+ keyvalues={},
+ retcol="stream_ordering",
+ )
+
+ # We don't to try and rotate millions of rows at once, so we cap the
+ # maximum stream ordering we'll rotate before.
+ txn.execute(
+ """
+ SELECT stream_ordering FROM event_push_actions
+ WHERE stream_ordering > ?
+ ORDER BY stream_ordering ASC LIMIT 1 OFFSET ?
+ """,
+ (old_rotate_stream_ordering, self._rotate_count),
+ )
+ stream_row = txn.fetchone()
+ if stream_row:
+ (offset_stream_ordering,) = stream_row
+ rotate_to_stream_ordering = min(
+ self.stream_ordering_day_ago, offset_stream_ordering
+ )
+ caught_up = offset_stream_ordering >= self.stream_ordering_day_ago
+ else:
+ rotate_to_stream_ordering = self.stream_ordering_day_ago
+ caught_up = True
+
+ logger.info("Rotating notifications up to: %s", rotate_to_stream_ordering)
+
+ self._rotate_notifs_before_txn(txn, rotate_to_stream_ordering)
+
+ # We have caught up iff we were limited by `stream_ordering_day_ago`
+ return caught_up
+
+ def _rotate_notifs_before_txn(self, txn, rotate_to_stream_ordering):
+ old_rotate_stream_ordering = self.db_pool.simple_select_one_onecol_txn(
+ txn,
+ table="event_push_summary_stream_ordering",
+ keyvalues={},
+ retcol="stream_ordering",
+ )
+
+ # Calculate the new counts that should be upserted into event_push_summary
+ sql = """
+ SELECT user_id, room_id,
+ coalesce(old.notif_count, 0) + upd.notif_count,
+ upd.stream_ordering,
+ old.user_id
+ FROM (
+ SELECT user_id, room_id, count(*) as notif_count,
+ max(stream_ordering) as stream_ordering
+ FROM event_push_actions
+ WHERE ? <= stream_ordering AND stream_ordering < ?
+ AND highlight = 0
+ GROUP BY user_id, room_id
+ ) AS upd
+ LEFT JOIN event_push_summary AS old USING (user_id, room_id)
+ """
+
+ txn.execute(sql, (old_rotate_stream_ordering, rotate_to_stream_ordering))
+ rows = txn.fetchall()
+
+ logger.info("Rotating notifications, handling %d rows", len(rows))
+
+ # If the `old.user_id` above is NULL then we know there isn't already an
+ # entry in the table, so we simply insert it. Otherwise we update the
+ # existing table.
+ self.db_pool.simple_insert_many_txn(
+ txn,
+ table="event_push_summary",
+ values=[
+ {
+ "user_id": row[0],
+ "room_id": row[1],
+ "notif_count": row[2],
+ "stream_ordering": row[3],
+ }
+ for row in rows
+ if row[4] is None
+ ],
+ )
+
+ txn.executemany(
+ """
+ UPDATE event_push_summary SET notif_count = ?, stream_ordering = ?
+ WHERE user_id = ? AND room_id = ?
+ """,
+ ((row[2], row[3], row[0], row[1]) for row in rows if row[4] is not None),
+ )
+
+ txn.execute(
+ "DELETE FROM event_push_actions"
+ " WHERE ? <= stream_ordering AND stream_ordering < ? AND highlight = 0",
+ (old_rotate_stream_ordering, rotate_to_stream_ordering),
+ )
+
+ logger.info("Rotating notifications, deleted %s push actions", txn.rowcount)
+
+ txn.execute(
+ "UPDATE event_push_summary_stream_ordering SET stream_ordering = ?",
+ (rotate_to_stream_ordering,),
+ )
+
+
+def _action_has_highlight(actions):
+ for action in actions:
+ try:
+ if action.get("set_tweak", None) == "highlight":
+ return action.get("value", True)
+ except AttributeError:
+ pass
+
+ return False
diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py
new file mode 100644
index 00000000..1a68bf32
--- /dev/null
+++ b/synapse/storage/databases/main/events.py
@@ -0,0 +1,1481 @@
+# -*- coding: utf-8 -*-
+# Copyright 2014-2016 OpenMarket Ltd
+# Copyright 2018-2019 New Vector Ltd
+# Copyright 2019 The Matrix.org Foundation C.I.C.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import itertools
+import logging
+from collections import OrderedDict, namedtuple
+from typing import TYPE_CHECKING, Dict, Iterable, List, Tuple
+
+import attr
+from prometheus_client import Counter
+
+from twisted.internet import defer
+
+import synapse.metrics
+from synapse.api.constants import EventContentFields, EventTypes, RelationTypes
+from synapse.api.room_versions import RoomVersions
+from synapse.crypto.event_signing import compute_event_reference_hash
+from synapse.events import EventBase # noqa: F401
+from synapse.events.snapshot import EventContext # noqa: F401
+from synapse.logging.utils import log_function
+from synapse.storage._base import db_to_json, make_in_list_sql_clause
+from synapse.storage.database import DatabasePool, LoggingTransaction
+from synapse.storage.databases.main.search import SearchEntry
+from synapse.storage.util.id_generators import StreamIdGenerator
+from synapse.types import StateMap, get_domain_from_id
+from synapse.util.frozenutils import frozendict_json_encoder
+from synapse.util.iterutils import batch_iter
+
+if TYPE_CHECKING:
+ from synapse.server import HomeServer
+ from synapse.storage.databases.main import DataStore
+
+
+logger = logging.getLogger(__name__)
+
+persist_event_counter = Counter("synapse_storage_events_persisted_events", "")
+event_counter = Counter(
+ "synapse_storage_events_persisted_events_sep",
+ "",
+ ["type", "origin_type", "origin_entity"],
+)
+
+
+def encode_json(json_object):
+ """
+ Encode a Python object as JSON and return it in a Unicode string.
+ """
+ out = frozendict_json_encoder.encode(json_object)
+ if isinstance(out, bytes):
+ out = out.decode("utf8")
+ return out
+
+
+_EventCacheEntry = namedtuple("_EventCacheEntry", ("event", "redacted_event"))
+
+
+@attr.s(slots=True)
+class DeltaState:
+ """Deltas to use to update the `current_state_events` table.
+
+ Attributes:
+ to_delete: List of type/state_keys to delete from current state
+ to_insert: Map of state to upsert into current state
+ no_longer_in_room: The server is not longer in the room, so the room
+ should e.g. be removed from `current_state_events` table.
+ """
+
+ to_delete = attr.ib(type=List[Tuple[str, str]])
+ to_insert = attr.ib(type=StateMap[str])
+ no_longer_in_room = attr.ib(type=bool, default=False)
+
+
+class PersistEventsStore:
+ """Contains all the functions for writing events to the database.
+
+ Should only be instantiated on one process (when using a worker mode setup).
+
+ Note: This is not part of the `DataStore` mixin.
+ """
+
+ def __init__(
+ self, hs: "HomeServer", db: DatabasePool, main_data_store: "DataStore"
+ ):
+ self.hs = hs
+ self.db_pool = db
+ self.store = main_data_store
+ self.database_engine = db.engine
+ self._clock = hs.get_clock()
+
+ self._ephemeral_messages_enabled = hs.config.enable_ephemeral_messages
+ self.is_mine_id = hs.is_mine_id
+
+ # Ideally we'd move these ID gens here, unfortunately some other ID
+ # generators are chained off them so doing so is a bit of a PITA.
+ self._backfill_id_gen = self.store._backfill_id_gen # type: StreamIdGenerator
+ self._stream_id_gen = self.store._stream_id_gen # type: StreamIdGenerator
+
+ # This should only exist on instances that are configured to write
+ assert (
+ hs.config.worker.writers.events == hs.get_instance_name()
+ ), "Can only instantiate EventsStore on master"
+
+ @defer.inlineCallbacks
+ def _persist_events_and_state_updates(
+ self,
+ events_and_contexts: List[Tuple[EventBase, EventContext]],
+ current_state_for_room: Dict[str, StateMap[str]],
+ state_delta_for_room: Dict[str, DeltaState],
+ new_forward_extremeties: Dict[str, List[str]],
+ backfilled: bool = False,
+ ):
+ """Persist a set of events alongside updates to the current state and
+ forward extremities tables.
+
+ Args:
+ events_and_contexts:
+ current_state_for_room: Map from room_id to the current state of
+ the room based on forward extremities
+ state_delta_for_room: Map from room_id to the delta to apply to
+ room state
+ new_forward_extremities: Map from room_id to list of event IDs
+ that are the new forward extremities of the room.
+ backfilled
+
+ Returns:
+ Deferred: resolves when the events have been persisted
+ """
+
+ # We want to calculate the stream orderings as late as possible, as
+ # we only notify after all events with a lesser stream ordering have
+ # been persisted. I.e. if we spend 10s inside the with block then
+ # that will delay all subsequent events from being notified about.
+ # Hence why we do it down here rather than wrapping the entire
+ # function.
+ #
+ # Its safe to do this after calculating the state deltas etc as we
+ # only need to protect the *persistence* of the events. This is to
+ # ensure that queries of the form "fetch events since X" don't
+ # return events and stream positions after events that are still in
+ # flight, as otherwise subsequent requests "fetch event since Y"
+ # will not return those events.
+ #
+ # Note: Multiple instances of this function cannot be in flight at
+ # the same time for the same room.
+ if backfilled:
+ stream_ordering_manager = self._backfill_id_gen.get_next_mult(
+ len(events_and_contexts)
+ )
+ else:
+ stream_ordering_manager = self._stream_id_gen.get_next_mult(
+ len(events_and_contexts)
+ )
+
+ with stream_ordering_manager as stream_orderings:
+ for (event, context), stream in zip(events_and_contexts, stream_orderings):
+ event.internal_metadata.stream_ordering = stream
+
+ yield self.db_pool.runInteraction(
+ "persist_events",
+ self._persist_events_txn,
+ events_and_contexts=events_and_contexts,
+ backfilled=backfilled,
+ state_delta_for_room=state_delta_for_room,
+ new_forward_extremeties=new_forward_extremeties,
+ )
+ persist_event_counter.inc(len(events_and_contexts))
+
+ if not backfilled:
+ # backfilled events have negative stream orderings, so we don't
+ # want to set the event_persisted_position to that.
+ synapse.metrics.event_persisted_position.set(
+ events_and_contexts[-1][0].internal_metadata.stream_ordering
+ )
+
+ for event, context in events_and_contexts:
+ if context.app_service:
+ origin_type = "local"
+ origin_entity = context.app_service.id
+ elif self.hs.is_mine_id(event.sender):
+ origin_type = "local"
+ origin_entity = "*client*"
+ else:
+ origin_type = "remote"
+ origin_entity = get_domain_from_id(event.sender)
+
+ event_counter.labels(event.type, origin_type, origin_entity).inc()
+
+ for room_id, new_state in current_state_for_room.items():
+ self.store.get_current_state_ids.prefill((room_id,), new_state)
+
+ for room_id, latest_event_ids in new_forward_extremeties.items():
+ self.store.get_latest_event_ids_in_room.prefill(
+ (room_id,), list(latest_event_ids)
+ )
+
+ @defer.inlineCallbacks
+ def _get_events_which_are_prevs(self, event_ids):
+ """Filter the supplied list of event_ids to get those which are prev_events of
+ existing (non-outlier/rejected) events.
+
+ Args:
+ event_ids (Iterable[str]): event ids to filter
+
+ Returns:
+ Deferred[List[str]]: filtered event ids
+ """
+ results = []
+
+ def _get_events_which_are_prevs_txn(txn, batch):
+ sql = """
+ SELECT prev_event_id, internal_metadata
+ FROM event_edges
+ INNER JOIN events USING (event_id)
+ LEFT JOIN rejections USING (event_id)
+ LEFT JOIN event_json USING (event_id)
+ WHERE
+ NOT events.outlier
+ AND rejections.event_id IS NULL
+ AND
+ """
+
+ clause, args = make_in_list_sql_clause(
+ self.database_engine, "prev_event_id", batch
+ )
+
+ txn.execute(sql + clause, args)
+ results.extend(r[0] for r in txn if not db_to_json(r[1]).get("soft_failed"))
+
+ for chunk in batch_iter(event_ids, 100):
+ yield self.db_pool.runInteraction(
+ "_get_events_which_are_prevs", _get_events_which_are_prevs_txn, chunk
+ )
+
+ return results
+
+ @defer.inlineCallbacks
+ def _get_prevs_before_rejected(self, event_ids):
+ """Get soft-failed ancestors to remove from the extremities.
+
+ Given a set of events, find all those that have been soft-failed or
+ rejected. Returns those soft failed/rejected events and their prev
+ events (whether soft-failed/rejected or not), and recurses up the
+ prev-event graph until it finds no more soft-failed/rejected events.
+
+ This is used to find extremities that are ancestors of new events, but
+ are separated by soft failed events.
+
+ Args:
+ event_ids (Iterable[str]): Events to find prev events for. Note
+ that these must have already been persisted.
+
+ Returns:
+ Deferred[set[str]]
+ """
+
+ # The set of event_ids to return. This includes all soft-failed events
+ # and their prev events.
+ existing_prevs = set()
+
+ def _get_prevs_before_rejected_txn(txn, batch):
+ to_recursively_check = batch
+
+ while to_recursively_check:
+ sql = """
+ SELECT
+ event_id, prev_event_id, internal_metadata,
+ rejections.event_id IS NOT NULL
+ FROM event_edges
+ INNER JOIN events USING (event_id)
+ LEFT JOIN rejections USING (event_id)
+ LEFT JOIN event_json USING (event_id)
+ WHERE
+ NOT events.outlier
+ AND
+ """
+
+ clause, args = make_in_list_sql_clause(
+ self.database_engine, "event_id", to_recursively_check
+ )
+
+ txn.execute(sql + clause, args)
+ to_recursively_check = []
+
+ for event_id, prev_event_id, metadata, rejected in txn:
+ if prev_event_id in existing_prevs:
+ continue
+
+ soft_failed = db_to_json(metadata).get("soft_failed")
+ if soft_failed or rejected:
+ to_recursively_check.append(prev_event_id)
+ existing_prevs.add(prev_event_id)
+
+ for chunk in batch_iter(event_ids, 100):
+ yield self.db_pool.runInteraction(
+ "_get_prevs_before_rejected", _get_prevs_before_rejected_txn, chunk
+ )
+
+ return existing_prevs
+
+ @log_function
+ def _persist_events_txn(
+ self,
+ txn: LoggingTransaction,
+ events_and_contexts: List[Tuple[EventBase, EventContext]],
+ backfilled: bool,
+ state_delta_for_room: Dict[str, DeltaState] = {},
+ new_forward_extremeties: Dict[str, List[str]] = {},
+ ):
+ """Insert some number of room events into the necessary database tables.
+
+ Rejected events are only inserted into the events table, the events_json table,
+ and the rejections table. Things reading from those table will need to check
+ whether the event was rejected.
+
+ Args:
+ txn
+ events_and_contexts: events to persist
+ backfilled: True if the events were backfilled
+ delete_existing True to purge existing table rows for the events
+ from the database. This is useful when retrying due to
+ IntegrityError.
+ state_delta_for_room: The current-state delta for each room.
+ new_forward_extremetie: The new forward extremities for each room.
+ For each room, a list of the event ids which are the forward
+ extremities.
+
+ """
+ all_events_and_contexts = events_and_contexts
+
+ min_stream_order = events_and_contexts[0][0].internal_metadata.stream_ordering
+ max_stream_order = events_and_contexts[-1][0].internal_metadata.stream_ordering
+
+ self._update_forward_extremities_txn(
+ txn,
+ new_forward_extremities=new_forward_extremeties,
+ max_stream_order=max_stream_order,
+ )
+
+ # Ensure that we don't have the same event twice.
+ events_and_contexts = self._filter_events_and_contexts_for_duplicates(
+ events_and_contexts
+ )
+
+ self._update_room_depths_txn(
+ txn, events_and_contexts=events_and_contexts, backfilled=backfilled
+ )
+
+ # _update_outliers_txn filters out any events which have already been
+ # persisted, and returns the filtered list.
+ events_and_contexts = self._update_outliers_txn(
+ txn, events_and_contexts=events_and_contexts
+ )
+
+ # From this point onwards the events are only events that we haven't
+ # seen before.
+
+ self._store_event_txn(txn, events_and_contexts=events_and_contexts)
+
+ # Insert into event_to_state_groups.
+ self._store_event_state_mappings_txn(txn, events_and_contexts)
+
+ # We want to store event_auth mappings for rejected events, as they're
+ # used in state res v2.
+ # This is only necessary if the rejected event appears in an accepted
+ # event's auth chain, but its easier for now just to store them (and
+ # it doesn't take much storage compared to storing the entire event
+ # anyway).
+ self.db_pool.simple_insert_many_txn(
+ txn,
+ table="event_auth",
+ values=[
+ {
+ "event_id": event.event_id,
+ "room_id": event.room_id,
+ "auth_id": auth_id,
+ }
+ for event, _ in events_and_contexts
+ for auth_id in event.auth_event_ids()
+ if event.is_state()
+ ],
+ )
+
+ # _store_rejected_events_txn filters out any events which were
+ # rejected, and returns the filtered list.
+ events_and_contexts = self._store_rejected_events_txn(
+ txn, events_and_contexts=events_and_contexts
+ )
+
+ # From this point onwards the events are only ones that weren't
+ # rejected.
+
+ self._update_metadata_tables_txn(
+ txn,
+ events_and_contexts=events_and_contexts,
+ all_events_and_contexts=all_events_and_contexts,
+ backfilled=backfilled,
+ )
+
+ # We call this last as it assumes we've inserted the events into
+ # room_memberships, where applicable.
+ self._update_current_state_txn(txn, state_delta_for_room, min_stream_order)
+
+ def _update_current_state_txn(
+ self,
+ txn: LoggingTransaction,
+ state_delta_by_room: Dict[str, DeltaState],
+ stream_id: int,
+ ):
+ for room_id, delta_state in state_delta_by_room.items():
+ to_delete = delta_state.to_delete
+ to_insert = delta_state.to_insert
+
+ if delta_state.no_longer_in_room:
+ # Server is no longer in the room so we delete the room from
+ # current_state_events, being careful we've already updated the
+ # rooms.room_version column (which gets populated in a
+ # background task).
+ self._upsert_room_version_txn(txn, room_id)
+
+ # Before deleting we populate the current_state_delta_stream
+ # so that async background tasks get told what happened.
+ sql = """
+ INSERT INTO current_state_delta_stream
+ (stream_id, room_id, type, state_key, event_id, prev_event_id)
+ SELECT ?, room_id, type, state_key, null, event_id
+ FROM current_state_events
+ WHERE room_id = ?
+ """
+ txn.execute(sql, (stream_id, room_id))
+
+ self.db_pool.simple_delete_txn(
+ txn, table="current_state_events", keyvalues={"room_id": room_id},
+ )
+ else:
+ # We're still in the room, so we update the current state as normal.
+
+ # First we add entries to the current_state_delta_stream. We
+ # do this before updating the current_state_events table so
+ # that we can use it to calculate the `prev_event_id`. (This
+ # allows us to not have to pull out the existing state
+ # unnecessarily).
+ #
+ # The stream_id for the update is chosen to be the minimum of the stream_ids
+ # for the batch of the events that we are persisting; that means we do not
+ # end up in a situation where workers see events before the
+ # current_state_delta updates.
+ #
+ sql = """
+ INSERT INTO current_state_delta_stream
+ (stream_id, room_id, type, state_key, event_id, prev_event_id)
+ SELECT ?, ?, ?, ?, ?, (
+ SELECT event_id FROM current_state_events
+ WHERE room_id = ? AND type = ? AND state_key = ?
+ )
+ """
+ txn.executemany(
+ sql,
+ (
+ (
+ stream_id,
+ room_id,
+ etype,
+ state_key,
+ to_insert.get((etype, state_key)),
+ room_id,
+ etype,
+ state_key,
+ )
+ for etype, state_key in itertools.chain(to_delete, to_insert)
+ ),
+ )
+ # Now we actually update the current_state_events table
+
+ txn.executemany(
+ "DELETE FROM current_state_events"
+ " WHERE room_id = ? AND type = ? AND state_key = ?",
+ (
+ (room_id, etype, state_key)
+ for etype, state_key in itertools.chain(to_delete, to_insert)
+ ),
+ )
+
+ # We include the membership in the current state table, hence we do
+ # a lookup when we insert. This assumes that all events have already
+ # been inserted into room_memberships.
+ txn.executemany(
+ """INSERT INTO current_state_events
+ (room_id, type, state_key, event_id, membership)
+ VALUES (?, ?, ?, ?, (SELECT membership FROM room_memberships WHERE event_id = ?))
+ """,
+ [
+ (room_id, key[0], key[1], ev_id, ev_id)
+ for key, ev_id in to_insert.items()
+ ],
+ )
+
+ # We now update `local_current_membership`. We do this regardless
+ # of whether we're still in the room or not to handle the case where
+ # e.g. we just got banned (where we need to record that fact here).
+
+ # Note: Do we really want to delete rows here (that we do not
+ # subsequently reinsert below)? While technically correct it means
+ # we have no record of the fact the user *was* a member of the
+ # room but got, say, state reset out of it.
+ if to_delete or to_insert:
+ txn.executemany(
+ "DELETE FROM local_current_membership"
+ " WHERE room_id = ? AND user_id = ?",
+ (
+ (room_id, state_key)
+ for etype, state_key in itertools.chain(to_delete, to_insert)
+ if etype == EventTypes.Member and self.is_mine_id(state_key)
+ ),
+ )
+
+ if to_insert:
+ txn.executemany(
+ """INSERT INTO local_current_membership
+ (room_id, user_id, event_id, membership)
+ VALUES (?, ?, ?, (SELECT membership FROM room_memberships WHERE event_id = ?))
+ """,
+ [
+ (room_id, key[1], ev_id, ev_id)
+ for key, ev_id in to_insert.items()
+ if key[0] == EventTypes.Member and self.is_mine_id(key[1])
+ ],
+ )
+
+ txn.call_after(
+ self.store._curr_state_delta_stream_cache.entity_has_changed,
+ room_id,
+ stream_id,
+ )
+
+ # Invalidate the various caches
+
+ # Figure out the changes of membership to invalidate the
+ # `get_rooms_for_user` cache.
+ # We find out which membership events we may have deleted
+ # and which we have added, then we invlidate the caches for all
+ # those users.
+ members_changed = {
+ state_key
+ for ev_type, state_key in itertools.chain(to_delete, to_insert)
+ if ev_type == EventTypes.Member
+ }
+
+ for member in members_changed:
+ txn.call_after(
+ self.store.get_rooms_for_user_with_stream_ordering.invalidate,
+ (member,),
+ )
+
+ self.store._invalidate_state_caches_and_stream(
+ txn, room_id, members_changed
+ )
+
+ def _upsert_room_version_txn(self, txn: LoggingTransaction, room_id: str):
+ """Update the room version in the database based off current state
+ events.
+
+ This is used when we're about to delete current state and we want to
+ ensure that the `rooms.room_version` column is up to date.
+ """
+
+ sql = """
+ SELECT json FROM event_json
+ INNER JOIN current_state_events USING (room_id, event_id)
+ WHERE room_id = ? AND type = ? AND state_key = ?
+ """
+ txn.execute(sql, (room_id, EventTypes.Create, ""))
+ row = txn.fetchone()
+ if row:
+ event_json = db_to_json(row[0])
+ content = event_json.get("content", {})
+ creator = content.get("creator")
+ room_version_id = content.get("room_version", RoomVersions.V1.identifier)
+
+ self.db_pool.simple_upsert_txn(
+ txn,
+ table="rooms",
+ keyvalues={"room_id": room_id},
+ values={"room_version": room_version_id},
+ insertion_values={"is_public": False, "creator": creator},
+ )
+
+ def _update_forward_extremities_txn(
+ self, txn, new_forward_extremities, max_stream_order
+ ):
+ for room_id, new_extrem in new_forward_extremities.items():
+ self.db_pool.simple_delete_txn(
+ txn, table="event_forward_extremities", keyvalues={"room_id": room_id}
+ )
+ txn.call_after(
+ self.store.get_latest_event_ids_in_room.invalidate, (room_id,)
+ )
+
+ self.db_pool.simple_insert_many_txn(
+ txn,
+ table="event_forward_extremities",
+ values=[
+ {"event_id": ev_id, "room_id": room_id}
+ for room_id, new_extrem in new_forward_extremities.items()
+ for ev_id in new_extrem
+ ],
+ )
+ # We now insert into stream_ordering_to_exterm a mapping from room_id,
+ # new stream_ordering to new forward extremeties in the room.
+ # This allows us to later efficiently look up the forward extremeties
+ # for a room before a given stream_ordering
+ self.db_pool.simple_insert_many_txn(
+ txn,
+ table="stream_ordering_to_exterm",
+ values=[
+ {
+ "room_id": room_id,
+ "event_id": event_id,
+ "stream_ordering": max_stream_order,
+ }
+ for room_id, new_extrem in new_forward_extremities.items()
+ for event_id in new_extrem
+ ],
+ )
+
+ @classmethod
+ def _filter_events_and_contexts_for_duplicates(cls, events_and_contexts):
+ """Ensure that we don't have the same event twice.
+
+ Pick the earliest non-outlier if there is one, else the earliest one.
+
+ Args:
+ events_and_contexts (list[(EventBase, EventContext)]):
+ Returns:
+ list[(EventBase, EventContext)]: filtered list
+ """
+ new_events_and_contexts = OrderedDict()
+ for event, context in events_and_contexts:
+ prev_event_context = new_events_and_contexts.get(event.event_id)
+ if prev_event_context:
+ if not event.internal_metadata.is_outlier():
+ if prev_event_context[0].internal_metadata.is_outlier():
+ # To ensure correct ordering we pop, as OrderedDict is
+ # ordered by first insertion.
+ new_events_and_contexts.pop(event.event_id, None)
+ new_events_and_contexts[event.event_id] = (event, context)
+ else:
+ new_events_and_contexts[event.event_id] = (event, context)
+ return list(new_events_and_contexts.values())
+
+ def _update_room_depths_txn(self, txn, events_and_contexts, backfilled):
+ """Update min_depth for each room
+
+ Args:
+ txn (twisted.enterprise.adbapi.Connection): db connection
+ events_and_contexts (list[(EventBase, EventContext)]): events
+ we are persisting
+ backfilled (bool): True if the events were backfilled
+ """
+ depth_updates = {}
+ for event, context in events_and_contexts:
+ # Remove the any existing cache entries for the event_ids
+ txn.call_after(self.store._invalidate_get_event_cache, event.event_id)
+ if not backfilled:
+ txn.call_after(
+ self.store._events_stream_cache.entity_has_changed,
+ event.room_id,
+ event.internal_metadata.stream_ordering,
+ )
+
+ if not event.internal_metadata.is_outlier() and not context.rejected:
+ depth_updates[event.room_id] = max(
+ event.depth, depth_updates.get(event.room_id, event.depth)
+ )
+
+ for room_id, depth in depth_updates.items():
+ self._update_min_depth_for_room_txn(txn, room_id, depth)
+
+ def _update_outliers_txn(self, txn, events_and_contexts):
+ """Update any outliers with new event info.
+
+ This turns outliers into ex-outliers (unless the new event was
+ rejected).
+
+ Args:
+ txn (twisted.enterprise.adbapi.Connection): db connection
+ events_and_contexts (list[(EventBase, EventContext)]): events
+ we are persisting
+
+ Returns:
+ list[(EventBase, EventContext)] new list, without events which
+ are already in the events table.
+ """
+ txn.execute(
+ "SELECT event_id, outlier FROM events WHERE event_id in (%s)"
+ % (",".join(["?"] * len(events_and_contexts)),),
+ [event.event_id for event, _ in events_and_contexts],
+ )
+
+ have_persisted = {event_id: outlier for event_id, outlier in txn}
+
+ to_remove = set()
+ for event, context in events_and_contexts:
+ if event.event_id not in have_persisted:
+ continue
+
+ to_remove.add(event)
+
+ if context.rejected:
+ # If the event is rejected then we don't care if the event
+ # was an outlier or not.
+ continue
+
+ outlier_persisted = have_persisted[event.event_id]
+ if not event.internal_metadata.is_outlier() and outlier_persisted:
+ # We received a copy of an event that we had already stored as
+ # an outlier in the database. We now have some state at that
+ # so we need to update the state_groups table with that state.
+
+ # insert into event_to_state_groups.
+ try:
+ self._store_event_state_mappings_txn(txn, ((event, context),))
+ except Exception:
+ logger.exception("")
+ raise
+
+ metadata_json = encode_json(event.internal_metadata.get_dict())
+
+ sql = "UPDATE event_json SET internal_metadata = ? WHERE event_id = ?"
+ txn.execute(sql, (metadata_json, event.event_id))
+
+ # Add an entry to the ex_outlier_stream table to replicate the
+ # change in outlier status to our workers.
+ stream_order = event.internal_metadata.stream_ordering
+ state_group_id = context.state_group
+ self.db_pool.simple_insert_txn(
+ txn,
+ table="ex_outlier_stream",
+ values={
+ "event_stream_ordering": stream_order,
+ "event_id": event.event_id,
+ "state_group": state_group_id,
+ },
+ )
+
+ sql = "UPDATE events SET outlier = ? WHERE event_id = ?"
+ txn.execute(sql, (False, event.event_id))
+
+ # Update the event_backward_extremities table now that this
+ # event isn't an outlier any more.
+ self._update_backward_extremeties(txn, [event])
+
+ return [ec for ec in events_and_contexts if ec[0] not in to_remove]
+
+ def _store_event_txn(self, txn, events_and_contexts):
+ """Insert new events into the event and event_json tables
+
+ Args:
+ txn (twisted.enterprise.adbapi.Connection): db connection
+ events_and_contexts (list[(EventBase, EventContext)]): events
+ we are persisting
+ """
+
+ if not events_and_contexts:
+ # nothing to do here
+ return
+
+ def event_dict(event):
+ d = event.get_dict()
+ d.pop("redacted", None)
+ d.pop("redacted_because", None)
+ return d
+
+ self.db_pool.simple_insert_many_txn(
+ txn,
+ table="event_json",
+ values=[
+ {
+ "event_id": event.event_id,
+ "room_id": event.room_id,
+ "internal_metadata": encode_json(
+ event.internal_metadata.get_dict()
+ ),
+ "json": encode_json(event_dict(event)),
+ "format_version": event.format_version,
+ }
+ for event, _ in events_and_contexts
+ ],
+ )
+
+ self.db_pool.simple_insert_many_txn(
+ txn,
+ table="events",
+ values=[
+ {
+ "stream_ordering": event.internal_metadata.stream_ordering,
+ "topological_ordering": event.depth,
+ "depth": event.depth,
+ "event_id": event.event_id,
+ "room_id": event.room_id,
+ "type": event.type,
+ "processed": True,
+ "outlier": event.internal_metadata.is_outlier(),
+ "origin_server_ts": int(event.origin_server_ts),
+ "received_ts": self._clock.time_msec(),
+ "sender": event.sender,
+ "contains_url": (
+ "url" in event.content and isinstance(event.content["url"], str)
+ ),
+ }
+ for event, _ in events_and_contexts
+ ],
+ )
+
+ for event, _ in events_and_contexts:
+ if not event.internal_metadata.is_redacted():
+ # If we're persisting an unredacted event we go and ensure
+ # that we mark any redactions that reference this event as
+ # requiring censoring.
+ self.db_pool.simple_update_txn(
+ txn,
+ table="redactions",
+ keyvalues={"redacts": event.event_id},
+ updatevalues={"have_censored": False},
+ )
+
+ def _store_rejected_events_txn(self, txn, events_and_contexts):
+ """Add rows to the 'rejections' table for received events which were
+ rejected
+
+ Args:
+ txn (twisted.enterprise.adbapi.Connection): db connection
+ events_and_contexts (list[(EventBase, EventContext)]): events
+ we are persisting
+
+ Returns:
+ list[(EventBase, EventContext)] new list, without the rejected
+ events.
+ """
+ # Remove the rejected events from the list now that we've added them
+ # to the events table and the events_json table.
+ to_remove = set()
+ for event, context in events_and_contexts:
+ if context.rejected:
+ # Insert the event_id into the rejections table
+ self._store_rejections_txn(txn, event.event_id, context.rejected)
+ to_remove.add(event)
+
+ return [ec for ec in events_and_contexts if ec[0] not in to_remove]
+
+ def _update_metadata_tables_txn(
+ self, txn, events_and_contexts, all_events_and_contexts, backfilled
+ ):
+ """Update all the miscellaneous tables for new events
+
+ Args:
+ txn (twisted.enterprise.adbapi.Connection): db connection
+ events_and_contexts (list[(EventBase, EventContext)]): events
+ we are persisting
+ all_events_and_contexts (list[(EventBase, EventContext)]): all
+ events that we were going to persist. This includes events
+ we've already persisted, etc, that wouldn't appear in
+ events_and_context.
+ backfilled (bool): True if the events were backfilled
+ """
+
+ # Insert all the push actions into the event_push_actions table.
+ self._set_push_actions_for_event_and_users_txn(
+ txn,
+ events_and_contexts=events_and_contexts,
+ all_events_and_contexts=all_events_and_contexts,
+ )
+
+ if not events_and_contexts:
+ # nothing to do here
+ return
+
+ for event, context in events_and_contexts:
+ if event.type == EventTypes.Redaction and event.redacts is not None:
+ # Remove the entries in the event_push_actions table for the
+ # redacted event.
+ self._remove_push_actions_for_event_id_txn(
+ txn, event.room_id, event.redacts
+ )
+
+ # Remove from relations table.
+ self._handle_redaction(txn, event.redacts)
+
+ # Update the event_forward_extremities, event_backward_extremities and
+ # event_edges tables.
+ self._handle_mult_prev_events(
+ txn, events=[event for event, _ in events_and_contexts]
+ )
+
+ for event, _ in events_and_contexts:
+ if event.type == EventTypes.Name:
+ # Insert into the event_search table.
+ self._store_room_name_txn(txn, event)
+ elif event.type == EventTypes.Topic:
+ # Insert into the event_search table.
+ self._store_room_topic_txn(txn, event)
+ elif event.type == EventTypes.Message:
+ # Insert into the event_search table.
+ self._store_room_message_txn(txn, event)
+ elif event.type == EventTypes.Redaction and event.redacts is not None:
+ # Insert into the redactions table.
+ self._store_redaction(txn, event)
+ elif event.type == EventTypes.Retention:
+ # Update the room_retention table.
+ self._store_retention_policy_for_room_txn(txn, event)
+
+ self._handle_event_relations(txn, event)
+
+ # Store the labels for this event.
+ labels = event.content.get(EventContentFields.LABELS)
+ if labels:
+ self.insert_labels_for_event_txn(
+ txn, event.event_id, labels, event.room_id, event.depth
+ )
+
+ if self._ephemeral_messages_enabled:
+ # If there's an expiry timestamp on the event, store it.
+ expiry_ts = event.content.get(EventContentFields.SELF_DESTRUCT_AFTER)
+ if isinstance(expiry_ts, int) and not event.is_state():
+ self._insert_event_expiry_txn(txn, event.event_id, expiry_ts)
+
+ # Insert into the room_memberships table.
+ self._store_room_members_txn(
+ txn,
+ [
+ event
+ for event, _ in events_and_contexts
+ if event.type == EventTypes.Member
+ ],
+ backfilled=backfilled,
+ )
+
+ # Insert event_reference_hashes table.
+ self._store_event_reference_hashes_txn(
+ txn, [event for event, _ in events_and_contexts]
+ )
+
+ state_events_and_contexts = [
+ ec for ec in events_and_contexts if ec[0].is_state()
+ ]
+
+ state_values = []
+ for event, context in state_events_and_contexts:
+ vals = {
+ "event_id": event.event_id,
+ "room_id": event.room_id,
+ "type": event.type,
+ "state_key": event.state_key,
+ }
+
+ # TODO: How does this work with backfilling?
+ if hasattr(event, "replaces_state"):
+ vals["prev_state"] = event.replaces_state
+
+ state_values.append(vals)
+
+ self.db_pool.simple_insert_many_txn(
+ txn, table="state_events", values=state_values
+ )
+
+ # Prefill the event cache
+ self._add_to_cache(txn, events_and_contexts)
+
+ def _add_to_cache(self, txn, events_and_contexts):
+ to_prefill = []
+
+ rows = []
+ N = 200
+ for i in range(0, len(events_and_contexts), N):
+ ev_map = {e[0].event_id: e[0] for e in events_and_contexts[i : i + N]}
+ if not ev_map:
+ break
+
+ sql = (
+ "SELECT "
+ " e.event_id as event_id, "
+ " r.redacts as redacts,"
+ " rej.event_id as rejects "
+ " FROM events as e"
+ " LEFT JOIN rejections as rej USING (event_id)"
+ " LEFT JOIN redactions as r ON e.event_id = r.redacts"
+ " WHERE "
+ )
+
+ clause, args = make_in_list_sql_clause(
+ self.database_engine, "e.event_id", list(ev_map)
+ )
+
+ txn.execute(sql + clause, args)
+ rows = self.db_pool.cursor_to_dict(txn)
+ for row in rows:
+ event = ev_map[row["event_id"]]
+ if not row["rejects"] and not row["redacts"]:
+ to_prefill.append(
+ _EventCacheEntry(event=event, redacted_event=None)
+ )
+
+ def prefill():
+ for cache_entry in to_prefill:
+ self.store._get_event_cache.prefill(
+ (cache_entry[0].event_id,), cache_entry
+ )
+
+ txn.call_after(prefill)
+
+ def _store_redaction(self, txn, event):
+ # invalidate the cache for the redacted event
+ txn.call_after(self.store._invalidate_get_event_cache, event.redacts)
+
+ self.db_pool.simple_insert_txn(
+ txn,
+ table="redactions",
+ values={
+ "event_id": event.event_id,
+ "redacts": event.redacts,
+ "received_ts": self._clock.time_msec(),
+ },
+ )
+
+ def insert_labels_for_event_txn(
+ self, txn, event_id, labels, room_id, topological_ordering
+ ):
+ """Store the mapping between an event's ID and its labels, with one row per
+ (event_id, label) tuple.
+
+ Args:
+ txn (LoggingTransaction): The transaction to execute.
+ event_id (str): The event's ID.
+ labels (list[str]): A list of text labels.
+ room_id (str): The ID of the room the event was sent to.
+ topological_ordering (int): The position of the event in the room's topology.
+ """
+ return self.db_pool.simple_insert_many_txn(
+ txn=txn,
+ table="event_labels",
+ values=[
+ {
+ "event_id": event_id,
+ "label": label,
+ "room_id": room_id,
+ "topological_ordering": topological_ordering,
+ }
+ for label in labels
+ ],
+ )
+
+ def _insert_event_expiry_txn(self, txn, event_id, expiry_ts):
+ """Save the expiry timestamp associated with a given event ID.
+
+ Args:
+ txn (LoggingTransaction): The database transaction to use.
+ event_id (str): The event ID the expiry timestamp is associated with.
+ expiry_ts (int): The timestamp at which to expire (delete) the event.
+ """
+ return self.db_pool.simple_insert_txn(
+ txn=txn,
+ table="event_expiry",
+ values={"event_id": event_id, "expiry_ts": expiry_ts},
+ )
+
+ def _store_event_reference_hashes_txn(self, txn, events):
+ """Store a hash for a PDU
+ Args:
+ txn (cursor):
+ events (list): list of Events.
+ """
+
+ vals = []
+ for event in events:
+ ref_alg, ref_hash_bytes = compute_event_reference_hash(event)
+ vals.append(
+ {
+ "event_id": event.event_id,
+ "algorithm": ref_alg,
+ "hash": memoryview(ref_hash_bytes),
+ }
+ )
+
+ self.db_pool.simple_insert_many_txn(
+ txn, table="event_reference_hashes", values=vals
+ )
+
+ def _store_room_members_txn(self, txn, events, backfilled):
+ """Store a room member in the database.
+ """
+ self.db_pool.simple_insert_many_txn(
+ txn,
+ table="room_memberships",
+ values=[
+ {
+ "event_id": event.event_id,
+ "user_id": event.state_key,
+ "sender": event.user_id,
+ "room_id": event.room_id,
+ "membership": event.membership,
+ "display_name": event.content.get("displayname", None),
+ "avatar_url": event.content.get("avatar_url", None),
+ }
+ for event in events
+ ],
+ )
+
+ for event in events:
+ txn.call_after(
+ self.store._membership_stream_cache.entity_has_changed,
+ event.state_key,
+ event.internal_metadata.stream_ordering,
+ )
+ txn.call_after(
+ self.store.get_invited_rooms_for_local_user.invalidate,
+ (event.state_key,),
+ )
+
+ # We update the local_current_membership table only if the event is
+ # "current", i.e., its something that has just happened.
+ #
+ # This will usually get updated by the `current_state_events` handling,
+ # unless its an outlier, and an outlier is only "current" if it's an "out of
+ # band membership", like a remote invite or a rejection of a remote invite.
+ if (
+ self.is_mine_id(event.state_key)
+ and not backfilled
+ and event.internal_metadata.is_outlier()
+ and event.internal_metadata.is_out_of_band_membership()
+ ):
+ self.db_pool.simple_upsert_txn(
+ txn,
+ table="local_current_membership",
+ keyvalues={"room_id": event.room_id, "user_id": event.state_key},
+ values={
+ "event_id": event.event_id,
+ "membership": event.membership,
+ },
+ )
+
+ def _handle_event_relations(self, txn, event):
+ """Handles inserting relation data during peristence of events
+
+ Args:
+ txn
+ event (EventBase)
+ """
+ relation = event.content.get("m.relates_to")
+ if not relation:
+ # No relations
+ return
+
+ rel_type = relation.get("rel_type")
+ if rel_type not in (
+ RelationTypes.ANNOTATION,
+ RelationTypes.REFERENCE,
+ RelationTypes.REPLACE,
+ ):
+ # Unknown relation type
+ return
+
+ parent_id = relation.get("event_id")
+ if not parent_id:
+ # Invalid relation
+ return
+
+ aggregation_key = relation.get("key")
+
+ self.db_pool.simple_insert_txn(
+ txn,
+ table="event_relations",
+ values={
+ "event_id": event.event_id,
+ "relates_to_id": parent_id,
+ "relation_type": rel_type,
+ "aggregation_key": aggregation_key,
+ },
+ )
+
+ txn.call_after(self.store.get_relations_for_event.invalidate_many, (parent_id,))
+ txn.call_after(
+ self.store.get_aggregation_groups_for_event.invalidate_many, (parent_id,)
+ )
+
+ if rel_type == RelationTypes.REPLACE:
+ txn.call_after(self.store.get_applicable_edit.invalidate, (parent_id,))
+
+ def _handle_redaction(self, txn, redacted_event_id):
+ """Handles receiving a redaction and checking whether we need to remove
+ any redacted relations from the database.
+
+ Args:
+ txn
+ redacted_event_id (str): The event that was redacted.
+ """
+
+ self.db_pool.simple_delete_txn(
+ txn, table="event_relations", keyvalues={"event_id": redacted_event_id}
+ )
+
+ def _store_room_topic_txn(self, txn, event):
+ if hasattr(event, "content") and "topic" in event.content:
+ self.store_event_search_txn(
+ txn, event, "content.topic", event.content["topic"]
+ )
+
+ def _store_room_name_txn(self, txn, event):
+ if hasattr(event, "content") and "name" in event.content:
+ self.store_event_search_txn(
+ txn, event, "content.name", event.content["name"]
+ )
+
+ def _store_room_message_txn(self, txn, event):
+ if hasattr(event, "content") and "body" in event.content:
+ self.store_event_search_txn(
+ txn, event, "content.body", event.content["body"]
+ )
+
+ def _store_retention_policy_for_room_txn(self, txn, event):
+ if hasattr(event, "content") and (
+ "min_lifetime" in event.content or "max_lifetime" in event.content
+ ):
+ if (
+ "min_lifetime" in event.content
+ and not isinstance(event.content.get("min_lifetime"), int)
+ ) or (
+ "max_lifetime" in event.content
+ and not isinstance(event.content.get("max_lifetime"), int)
+ ):
+ # Ignore the event if one of the value isn't an integer.
+ return
+
+ self.db_pool.simple_insert_txn(
+ txn=txn,
+ table="room_retention",
+ values={
+ "room_id": event.room_id,
+ "event_id": event.event_id,
+ "min_lifetime": event.content.get("min_lifetime"),
+ "max_lifetime": event.content.get("max_lifetime"),
+ },
+ )
+
+ self.store._invalidate_cache_and_stream(
+ txn, self.store.get_retention_policy_for_room, (event.room_id,)
+ )
+
+ def store_event_search_txn(self, txn, event, key, value):
+ """Add event to the search table
+
+ Args:
+ txn (cursor):
+ event (EventBase):
+ key (str):
+ value (str):
+ """
+ self.store.store_search_entries_txn(
+ txn,
+ (
+ SearchEntry(
+ key=key,
+ value=value,
+ event_id=event.event_id,
+ room_id=event.room_id,
+ stream_ordering=event.internal_metadata.stream_ordering,
+ origin_server_ts=event.origin_server_ts,
+ ),
+ ),
+ )
+
+ def _set_push_actions_for_event_and_users_txn(
+ self, txn, events_and_contexts, all_events_and_contexts
+ ):
+ """Handles moving push actions from staging table to main
+ event_push_actions table for all events in `events_and_contexts`.
+
+ Also ensures that all events in `all_events_and_contexts` are removed
+ from the push action staging area.
+
+ Args:
+ events_and_contexts (list[(EventBase, EventContext)]): events
+ we are persisting
+ all_events_and_contexts (list[(EventBase, EventContext)]): all
+ events that we were going to persist. This includes events
+ we've already persisted, etc, that wouldn't appear in
+ events_and_context.
+ """
+
+ sql = """
+ INSERT INTO event_push_actions (
+ room_id, event_id, user_id, actions, stream_ordering,
+ topological_ordering, notif, highlight
+ )
+ SELECT ?, event_id, user_id, actions, ?, ?, notif, highlight
+ FROM event_push_actions_staging
+ WHERE event_id = ?
+ """
+
+ if events_and_contexts:
+ txn.executemany(
+ sql,
+ (
+ (
+ event.room_id,
+ event.internal_metadata.stream_ordering,
+ event.depth,
+ event.event_id,
+ )
+ for event, _ in events_and_contexts
+ ),
+ )
+
+ for event, _ in events_and_contexts:
+ user_ids = self.db_pool.simple_select_onecol_txn(
+ txn,
+ table="event_push_actions_staging",
+ keyvalues={"event_id": event.event_id},
+ retcol="user_id",
+ )
+
+ for uid in user_ids:
+ txn.call_after(
+ self.store.get_unread_event_push_actions_by_room_for_user.invalidate_many,
+ (event.room_id, uid),
+ )
+
+ # Now we delete the staging area for *all* events that were being
+ # persisted.
+ txn.executemany(
+ "DELETE FROM event_push_actions_staging WHERE event_id = ?",
+ ((event.event_id,) for event, _ in all_events_and_contexts),
+ )
+
+ def _remove_push_actions_for_event_id_txn(self, txn, room_id, event_id):
+ # Sad that we have to blow away the cache for the whole room here
+ txn.call_after(
+ self.store.get_unread_event_push_actions_by_room_for_user.invalidate_many,
+ (room_id,),
+ )
+ txn.execute(
+ "DELETE FROM event_push_actions WHERE room_id = ? AND event_id = ?",
+ (room_id, event_id),
+ )
+
+ def _store_rejections_txn(self, txn, event_id, reason):
+ self.db_pool.simple_insert_txn(
+ txn,
+ table="rejections",
+ values={
+ "event_id": event_id,
+ "reason": reason,
+ "last_check": self._clock.time_msec(),
+ },
+ )
+
+ def _store_event_state_mappings_txn(
+ self, txn, events_and_contexts: Iterable[Tuple[EventBase, EventContext]]
+ ):
+ state_groups = {}
+ for event, context in events_and_contexts:
+ if event.internal_metadata.is_outlier():
+ continue
+
+ # if the event was rejected, just give it the same state as its
+ # predecessor.
+ if context.rejected:
+ state_groups[event.event_id] = context.state_group_before_event
+ continue
+
+ state_groups[event.event_id] = context.state_group
+
+ self.db_pool.simple_insert_many_txn(
+ txn,
+ table="event_to_state_groups",
+ values=[
+ {"state_group": state_group_id, "event_id": event_id}
+ for event_id, state_group_id in state_groups.items()
+ ],
+ )
+
+ for event_id, state_group_id in state_groups.items():
+ txn.call_after(
+ self.store._get_state_group_for_event.prefill,
+ (event_id,),
+ state_group_id,
+ )
+
+ def _update_min_depth_for_room_txn(self, txn, room_id, depth):
+ min_depth = self.store._get_min_depth_interaction(txn, room_id)
+
+ if min_depth is not None and depth >= min_depth:
+ return
+
+ self.db_pool.simple_upsert_txn(
+ txn,
+ table="room_depth",
+ keyvalues={"room_id": room_id},
+ values={"min_depth": depth},
+ )
+
+ def _handle_mult_prev_events(self, txn, events):
+ """
+ For the given event, update the event edges table and forward and
+ backward extremities tables.
+ """
+ self.db_pool.simple_insert_many_txn(
+ txn,
+ table="event_edges",
+ values=[
+ {
+ "event_id": ev.event_id,
+ "prev_event_id": e_id,
+ "room_id": ev.room_id,
+ "is_state": False,
+ }
+ for ev in events
+ for e_id in ev.prev_event_ids()
+ ],
+ )
+
+ self._update_backward_extremeties(txn, events)
+
+ def _update_backward_extremeties(self, txn, events):
+ """Updates the event_backward_extremities tables based on the new/updated
+ events being persisted.
+
+ This is called for new events *and* for events that were outliers, but
+ are now being persisted as non-outliers.
+
+ Forward extremities are handled when we first start persisting the events.
+ """
+ events_by_room = {}
+ for ev in events:
+ events_by_room.setdefault(ev.room_id, []).append(ev)
+
+ query = (
+ "INSERT INTO event_backward_extremities (event_id, room_id)"
+ " SELECT ?, ? WHERE NOT EXISTS ("
+ " SELECT 1 FROM event_backward_extremities"
+ " WHERE event_id = ? AND room_id = ?"
+ " )"
+ " AND NOT EXISTS ("
+ " SELECT 1 FROM events WHERE event_id = ? AND room_id = ? "
+ " AND outlier = ?"
+ " )"
+ )
+
+ txn.executemany(
+ query,
+ [
+ (e_id, ev.room_id, e_id, ev.room_id, e_id, ev.room_id, False)
+ for ev in events
+ for e_id in ev.prev_event_ids()
+ if not ev.internal_metadata.is_outlier()
+ ],
+ )
+
+ query = (
+ "DELETE FROM event_backward_extremities"
+ " WHERE event_id = ? AND room_id = ?"
+ )
+ txn.executemany(
+ query,
+ [
+ (ev.event_id, ev.room_id)
+ for ev in events
+ if not ev.internal_metadata.is_outlier()
+ ],
+ )
diff --git a/synapse/storage/databases/main/events_bg_updates.py b/synapse/storage/databases/main/events_bg_updates.py
new file mode 100644
index 00000000..35a0e09e
--- /dev/null
+++ b/synapse/storage/databases/main/events_bg_updates.py
@@ -0,0 +1,585 @@
+# -*- coding: utf-8 -*-
+# Copyright 2019 The Matrix.org Foundation C.I.C.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import logging
+
+from twisted.internet import defer
+
+from synapse.api.constants import EventContentFields
+from synapse.storage._base import SQLBaseStore, db_to_json, make_in_list_sql_clause
+from synapse.storage.database import DatabasePool
+
+logger = logging.getLogger(__name__)
+
+
+class EventsBackgroundUpdatesStore(SQLBaseStore):
+
+ EVENT_ORIGIN_SERVER_TS_NAME = "event_origin_server_ts"
+ EVENT_FIELDS_SENDER_URL_UPDATE_NAME = "event_fields_sender_url"
+ DELETE_SOFT_FAILED_EXTREMITIES = "delete_soft_failed_extremities"
+
+ def __init__(self, database: DatabasePool, db_conn, hs):
+ super(EventsBackgroundUpdatesStore, self).__init__(database, db_conn, hs)
+
+ self.db_pool.updates.register_background_update_handler(
+ self.EVENT_ORIGIN_SERVER_TS_NAME, self._background_reindex_origin_server_ts
+ )
+ self.db_pool.updates.register_background_update_handler(
+ self.EVENT_FIELDS_SENDER_URL_UPDATE_NAME,
+ self._background_reindex_fields_sender,
+ )
+
+ self.db_pool.updates.register_background_index_update(
+ "event_contains_url_index",
+ index_name="event_contains_url_index",
+ table="events",
+ columns=["room_id", "topological_ordering", "stream_ordering"],
+ where_clause="contains_url = true AND outlier = false",
+ )
+
+ # an event_id index on event_search is useful for the purge_history
+ # api. Plus it means we get to enforce some integrity with a UNIQUE
+ # clause
+ self.db_pool.updates.register_background_index_update(
+ "event_search_event_id_idx",
+ index_name="event_search_event_id_idx",
+ table="event_search",
+ columns=["event_id"],
+ unique=True,
+ psql_only=True,
+ )
+
+ self.db_pool.updates.register_background_update_handler(
+ self.DELETE_SOFT_FAILED_EXTREMITIES, self._cleanup_extremities_bg_update
+ )
+
+ self.db_pool.updates.register_background_update_handler(
+ "redactions_received_ts", self._redactions_received_ts
+ )
+
+ # This index gets deleted in `event_fix_redactions_bytes` update
+ self.db_pool.updates.register_background_index_update(
+ "event_fix_redactions_bytes_create_index",
+ index_name="redactions_censored_redacts",
+ table="redactions",
+ columns=["redacts"],
+ where_clause="have_censored",
+ )
+
+ self.db_pool.updates.register_background_update_handler(
+ "event_fix_redactions_bytes", self._event_fix_redactions_bytes
+ )
+
+ self.db_pool.updates.register_background_update_handler(
+ "event_store_labels", self._event_store_labels
+ )
+
+ self.db_pool.updates.register_background_index_update(
+ "redactions_have_censored_ts_idx",
+ index_name="redactions_have_censored_ts",
+ table="redactions",
+ columns=["received_ts"],
+ where_clause="NOT have_censored",
+ )
+
+ @defer.inlineCallbacks
+ def _background_reindex_fields_sender(self, progress, batch_size):
+ target_min_stream_id = progress["target_min_stream_id_inclusive"]
+ max_stream_id = progress["max_stream_id_exclusive"]
+ rows_inserted = progress.get("rows_inserted", 0)
+
+ INSERT_CLUMP_SIZE = 1000
+
+ def reindex_txn(txn):
+ sql = (
+ "SELECT stream_ordering, event_id, json FROM events"
+ " INNER JOIN event_json USING (event_id)"
+ " WHERE ? <= stream_ordering AND stream_ordering < ?"
+ " ORDER BY stream_ordering DESC"
+ " LIMIT ?"
+ )
+
+ txn.execute(sql, (target_min_stream_id, max_stream_id, batch_size))
+
+ rows = txn.fetchall()
+ if not rows:
+ return 0
+
+ min_stream_id = rows[-1][0]
+
+ update_rows = []
+ for row in rows:
+ try:
+ event_id = row[1]
+ event_json = db_to_json(row[2])
+ sender = event_json["sender"]
+ content = event_json["content"]
+
+ contains_url = "url" in content
+ if contains_url:
+ contains_url &= isinstance(content["url"], str)
+ except (KeyError, AttributeError):
+ # If the event is missing a necessary field then
+ # skip over it.
+ continue
+
+ update_rows.append((sender, contains_url, event_id))
+
+ sql = "UPDATE events SET sender = ?, contains_url = ? WHERE event_id = ?"
+
+ for index in range(0, len(update_rows), INSERT_CLUMP_SIZE):
+ clump = update_rows[index : index + INSERT_CLUMP_SIZE]
+ txn.executemany(sql, clump)
+
+ progress = {
+ "target_min_stream_id_inclusive": target_min_stream_id,
+ "max_stream_id_exclusive": min_stream_id,
+ "rows_inserted": rows_inserted + len(rows),
+ }
+
+ self.db_pool.updates._background_update_progress_txn(
+ txn, self.EVENT_FIELDS_SENDER_URL_UPDATE_NAME, progress
+ )
+
+ return len(rows)
+
+ result = yield self.db_pool.runInteraction(
+ self.EVENT_FIELDS_SENDER_URL_UPDATE_NAME, reindex_txn
+ )
+
+ if not result:
+ yield self.db_pool.updates._end_background_update(
+ self.EVENT_FIELDS_SENDER_URL_UPDATE_NAME
+ )
+
+ return result
+
+ @defer.inlineCallbacks
+ def _background_reindex_origin_server_ts(self, progress, batch_size):
+ target_min_stream_id = progress["target_min_stream_id_inclusive"]
+ max_stream_id = progress["max_stream_id_exclusive"]
+ rows_inserted = progress.get("rows_inserted", 0)
+
+ INSERT_CLUMP_SIZE = 1000
+
+ def reindex_search_txn(txn):
+ sql = (
+ "SELECT stream_ordering, event_id FROM events"
+ " WHERE ? <= stream_ordering AND stream_ordering < ?"
+ " ORDER BY stream_ordering DESC"
+ " LIMIT ?"
+ )
+
+ txn.execute(sql, (target_min_stream_id, max_stream_id, batch_size))
+
+ rows = txn.fetchall()
+ if not rows:
+ return 0
+
+ min_stream_id = rows[-1][0]
+ event_ids = [row[1] for row in rows]
+
+ rows_to_update = []
+
+ chunks = [event_ids[i : i + 100] for i in range(0, len(event_ids), 100)]
+ for chunk in chunks:
+ ev_rows = self.db_pool.simple_select_many_txn(
+ txn,
+ table="event_json",
+ column="event_id",
+ iterable=chunk,
+ retcols=["event_id", "json"],
+ keyvalues={},
+ )
+
+ for row in ev_rows:
+ event_id = row["event_id"]
+ event_json = db_to_json(row["json"])
+ try:
+ origin_server_ts = event_json["origin_server_ts"]
+ except (KeyError, AttributeError):
+ # If the event is missing a necessary field then
+ # skip over it.
+ continue
+
+ rows_to_update.append((origin_server_ts, event_id))
+
+ sql = "UPDATE events SET origin_server_ts = ? WHERE event_id = ?"
+
+ for index in range(0, len(rows_to_update), INSERT_CLUMP_SIZE):
+ clump = rows_to_update[index : index + INSERT_CLUMP_SIZE]
+ txn.executemany(sql, clump)
+
+ progress = {
+ "target_min_stream_id_inclusive": target_min_stream_id,
+ "max_stream_id_exclusive": min_stream_id,
+ "rows_inserted": rows_inserted + len(rows_to_update),
+ }
+
+ self.db_pool.updates._background_update_progress_txn(
+ txn, self.EVENT_ORIGIN_SERVER_TS_NAME, progress
+ )
+
+ return len(rows_to_update)
+
+ result = yield self.db_pool.runInteraction(
+ self.EVENT_ORIGIN_SERVER_TS_NAME, reindex_search_txn
+ )
+
+ if not result:
+ yield self.db_pool.updates._end_background_update(
+ self.EVENT_ORIGIN_SERVER_TS_NAME
+ )
+
+ return result
+
+ @defer.inlineCallbacks
+ def _cleanup_extremities_bg_update(self, progress, batch_size):
+ """Background update to clean out extremities that should have been
+ deleted previously.
+
+ Mainly used to deal with the aftermath of #5269.
+ """
+
+ # This works by first copying all existing forward extremities into the
+ # `_extremities_to_check` table at start up, and then checking each
+ # event in that table whether we have any descendants that are not
+ # soft-failed/rejected. If that is the case then we delete that event
+ # from the forward extremities table.
+ #
+ # For efficiency, we do this in batches by recursively pulling out all
+ # descendants of a batch until we find the non soft-failed/rejected
+ # events, i.e. the set of descendants whose chain of prev events back
+ # to the batch of extremities are all soft-failed or rejected.
+ # Typically, we won't find any such events as extremities will rarely
+ # have any descendants, but if they do then we should delete those
+ # extremities.
+
+ def _cleanup_extremities_bg_update_txn(txn):
+ # The set of extremity event IDs that we're checking this round
+ original_set = set()
+
+ # A dict[str, set[str]] of event ID to their prev events.
+ graph = {}
+
+ # The set of descendants of the original set that are not rejected
+ # nor soft-failed. Ancestors of these events should be removed
+ # from the forward extremities table.
+ non_rejected_leaves = set()
+
+ # Set of event IDs that have been soft failed, and for which we
+ # should check if they have descendants which haven't been soft
+ # failed.
+ soft_failed_events_to_lookup = set()
+
+ # First, we get `batch_size` events from the table, pulling out
+ # their successor events, if any, and the successor events'
+ # rejection status.
+ txn.execute(
+ """SELECT prev_event_id, event_id, internal_metadata,
+ rejections.event_id IS NOT NULL, events.outlier
+ FROM (
+ SELECT event_id AS prev_event_id
+ FROM _extremities_to_check
+ LIMIT ?
+ ) AS f
+ LEFT JOIN event_edges USING (prev_event_id)
+ LEFT JOIN events USING (event_id)
+ LEFT JOIN event_json USING (event_id)
+ LEFT JOIN rejections USING (event_id)
+ """,
+ (batch_size,),
+ )
+
+ for prev_event_id, event_id, metadata, rejected, outlier in txn:
+ original_set.add(prev_event_id)
+
+ if not event_id or outlier:
+ # Common case where the forward extremity doesn't have any
+ # descendants.
+ continue
+
+ graph.setdefault(event_id, set()).add(prev_event_id)
+
+ soft_failed = False
+ if metadata:
+ soft_failed = db_to_json(metadata).get("soft_failed")
+
+ if soft_failed or rejected:
+ soft_failed_events_to_lookup.add(event_id)
+ else:
+ non_rejected_leaves.add(event_id)
+
+ # Now we recursively check all the soft-failed descendants we
+ # found above in the same way, until we have nothing left to
+ # check.
+ while soft_failed_events_to_lookup:
+ # We only want to do 100 at a time, so we split given list
+ # into two.
+ batch = list(soft_failed_events_to_lookup)
+ to_check, to_defer = batch[:100], batch[100:]
+ soft_failed_events_to_lookup = set(to_defer)
+
+ sql = """SELECT prev_event_id, event_id, internal_metadata,
+ rejections.event_id IS NOT NULL
+ FROM event_edges
+ INNER JOIN events USING (event_id)
+ INNER JOIN event_json USING (event_id)
+ LEFT JOIN rejections USING (event_id)
+ WHERE
+ NOT events.outlier
+ AND
+ """
+ clause, args = make_in_list_sql_clause(
+ self.database_engine, "prev_event_id", to_check
+ )
+ txn.execute(sql + clause, list(args))
+
+ for prev_event_id, event_id, metadata, rejected in txn:
+ if event_id in graph:
+ # Already handled this event previously, but we still
+ # want to record the edge.
+ graph[event_id].add(prev_event_id)
+ continue
+
+ graph[event_id] = {prev_event_id}
+
+ soft_failed = db_to_json(metadata).get("soft_failed")
+ if soft_failed or rejected:
+ soft_failed_events_to_lookup.add(event_id)
+ else:
+ non_rejected_leaves.add(event_id)
+
+ # We have a set of non-soft-failed descendants, so we recurse up
+ # the graph to find all ancestors and add them to the set of event
+ # IDs that we can delete from forward extremities table.
+ to_delete = set()
+ while non_rejected_leaves:
+ event_id = non_rejected_leaves.pop()
+ prev_event_ids = graph.get(event_id, set())
+ non_rejected_leaves.update(prev_event_ids)
+ to_delete.update(prev_event_ids)
+
+ to_delete.intersection_update(original_set)
+
+ deleted = self.db_pool.simple_delete_many_txn(
+ txn=txn,
+ table="event_forward_extremities",
+ column="event_id",
+ iterable=to_delete,
+ keyvalues={},
+ )
+
+ logger.info(
+ "Deleted %d forward extremities of %d checked, to clean up #5269",
+ deleted,
+ len(original_set),
+ )
+
+ if deleted:
+ # We now need to invalidate the caches of these rooms
+ rows = self.db_pool.simple_select_many_txn(
+ txn,
+ table="events",
+ column="event_id",
+ iterable=to_delete,
+ keyvalues={},
+ retcols=("room_id",),
+ )
+ room_ids = {row["room_id"] for row in rows}
+ for room_id in room_ids:
+ txn.call_after(
+ self.get_latest_event_ids_in_room.invalidate, (room_id,)
+ )
+
+ self.db_pool.simple_delete_many_txn(
+ txn=txn,
+ table="_extremities_to_check",
+ column="event_id",
+ iterable=original_set,
+ keyvalues={},
+ )
+
+ return len(original_set)
+
+ num_handled = yield self.db_pool.runInteraction(
+ "_cleanup_extremities_bg_update", _cleanup_extremities_bg_update_txn
+ )
+
+ if not num_handled:
+ yield self.db_pool.updates._end_background_update(
+ self.DELETE_SOFT_FAILED_EXTREMITIES
+ )
+
+ def _drop_table_txn(txn):
+ txn.execute("DROP TABLE _extremities_to_check")
+
+ yield self.db_pool.runInteraction(
+ "_cleanup_extremities_bg_update_drop_table", _drop_table_txn
+ )
+
+ return num_handled
+
+ @defer.inlineCallbacks
+ def _redactions_received_ts(self, progress, batch_size):
+ """Handles filling out the `received_ts` column in redactions.
+ """
+ last_event_id = progress.get("last_event_id", "")
+
+ def _redactions_received_ts_txn(txn):
+ # Fetch the set of event IDs that we want to update
+ sql = """
+ SELECT event_id FROM redactions
+ WHERE event_id > ?
+ ORDER BY event_id ASC
+ LIMIT ?
+ """
+
+ txn.execute(sql, (last_event_id, batch_size))
+
+ rows = txn.fetchall()
+ if not rows:
+ return 0
+
+ (upper_event_id,) = rows[-1]
+
+ # Update the redactions with the received_ts.
+ #
+ # Note: Not all events have an associated received_ts, so we
+ # fallback to using origin_server_ts. If we for some reason don't
+ # have an origin_server_ts, lets just use the current timestamp.
+ #
+ # We don't want to leave it null, as then we'll never try and
+ # censor those redactions.
+ sql = """
+ UPDATE redactions
+ SET received_ts = (
+ SELECT COALESCE(received_ts, origin_server_ts, ?) FROM events
+ WHERE events.event_id = redactions.event_id
+ )
+ WHERE ? <= event_id AND event_id <= ?
+ """
+
+ txn.execute(sql, (self._clock.time_msec(), last_event_id, upper_event_id))
+
+ self.db_pool.updates._background_update_progress_txn(
+ txn, "redactions_received_ts", {"last_event_id": upper_event_id}
+ )
+
+ return len(rows)
+
+ count = yield self.db_pool.runInteraction(
+ "_redactions_received_ts", _redactions_received_ts_txn
+ )
+
+ if not count:
+ yield self.db_pool.updates._end_background_update("redactions_received_ts")
+
+ return count
+
+ @defer.inlineCallbacks
+ def _event_fix_redactions_bytes(self, progress, batch_size):
+ """Undoes hex encoded censored redacted event JSON.
+ """
+
+ def _event_fix_redactions_bytes_txn(txn):
+ # This update is quite fast due to new index.
+ txn.execute(
+ """
+ UPDATE event_json
+ SET
+ json = convert_from(json::bytea, 'utf8')
+ FROM redactions
+ WHERE
+ redactions.have_censored
+ AND event_json.event_id = redactions.redacts
+ AND json NOT LIKE '{%';
+ """
+ )
+
+ txn.execute("DROP INDEX redactions_censored_redacts")
+
+ yield self.db_pool.runInteraction(
+ "_event_fix_redactions_bytes", _event_fix_redactions_bytes_txn
+ )
+
+ yield self.db_pool.updates._end_background_update("event_fix_redactions_bytes")
+
+ return 1
+
+ @defer.inlineCallbacks
+ def _event_store_labels(self, progress, batch_size):
+ """Background update handler which will store labels for existing events."""
+ last_event_id = progress.get("last_event_id", "")
+
+ def _event_store_labels_txn(txn):
+ txn.execute(
+ """
+ SELECT event_id, json FROM event_json
+ LEFT JOIN event_labels USING (event_id)
+ WHERE event_id > ? AND label IS NULL
+ ORDER BY event_id LIMIT ?
+ """,
+ (last_event_id, batch_size),
+ )
+
+ results = list(txn)
+
+ nbrows = 0
+ last_row_event_id = ""
+ for (event_id, event_json_raw) in results:
+ try:
+ event_json = db_to_json(event_json_raw)
+
+ self.db_pool.simple_insert_many_txn(
+ txn=txn,
+ table="event_labels",
+ values=[
+ {
+ "event_id": event_id,
+ "label": label,
+ "room_id": event_json["room_id"],
+ "topological_ordering": event_json["depth"],
+ }
+ for label in event_json["content"].get(
+ EventContentFields.LABELS, []
+ )
+ if isinstance(label, str)
+ ],
+ )
+ except Exception as e:
+ logger.warning(
+ "Unable to load event %s (no labels will be imported): %s",
+ event_id,
+ e,
+ )
+
+ nbrows += 1
+ last_row_event_id = event_id
+
+ self.db_pool.updates._background_update_progress_txn(
+ txn, "event_store_labels", {"last_event_id": last_row_event_id}
+ )
+
+ return nbrows
+
+ num_rows = yield self.db_pool.runInteraction(
+ desc="event_store_labels", func=_event_store_labels_txn
+ )
+
+ if not num_rows:
+ yield self.db_pool.updates._end_background_update("event_store_labels")
+
+ return num_rows
diff --git a/synapse/storage/databases/main/events_worker.py b/synapse/storage/databases/main/events_worker.py
new file mode 100644
index 00000000..755b7a2a
--- /dev/null
+++ b/synapse/storage/databases/main/events_worker.py
@@ -0,0 +1,1370 @@
+# -*- coding: utf-8 -*-
+# Copyright 2018 New Vector Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import division
+
+import itertools
+import logging
+import threading
+from collections import namedtuple
+from typing import List, Optional, Tuple
+
+from constantly import NamedConstant, Names
+
+from twisted.internet import defer
+
+from synapse.api.constants import EventTypes
+from synapse.api.errors import NotFoundError, SynapseError
+from synapse.api.room_versions import (
+ KNOWN_ROOM_VERSIONS,
+ EventFormatVersions,
+ RoomVersions,
+)
+from synapse.events import make_event_from_dict
+from synapse.events.utils import prune_event
+from synapse.logging.context import PreserveLoggingContext, current_context
+from synapse.metrics.background_process_metrics import run_as_background_process
+from synapse.replication.slave.storage._slaved_id_tracker import SlavedIdTracker
+from synapse.replication.tcp.streams import BackfillStream
+from synapse.replication.tcp.streams.events import EventsStream
+from synapse.storage._base import SQLBaseStore, db_to_json, make_in_list_sql_clause
+from synapse.storage.database import DatabasePool
+from synapse.storage.util.id_generators import StreamIdGenerator
+from synapse.types import get_domain_from_id
+from synapse.util.caches.descriptors import Cache, cached, cachedInlineCallbacks
+from synapse.util.iterutils import batch_iter
+from synapse.util.metrics import Measure
+
+logger = logging.getLogger(__name__)
+
+
+# These values are used in the `enqueus_event` and `_do_fetch` methods to
+# control how we batch/bulk fetch events from the database.
+# The values are plucked out of thing air to make initial sync run faster
+# on jki.re
+# TODO: Make these configurable.
+EVENT_QUEUE_THREADS = 3 # Max number of threads that will fetch events
+EVENT_QUEUE_ITERATIONS = 3 # No. times we block waiting for requests for events
+EVENT_QUEUE_TIMEOUT_S = 0.1 # Timeout when waiting for requests for events
+
+
+_EventCacheEntry = namedtuple("_EventCacheEntry", ("event", "redacted_event"))
+
+
+class EventRedactBehaviour(Names):
+ """
+ What to do when retrieving a redacted event from the database.
+ """
+
+ AS_IS = NamedConstant()
+ REDACT = NamedConstant()
+ BLOCK = NamedConstant()
+
+
+class EventsWorkerStore(SQLBaseStore):
+ def __init__(self, database: DatabasePool, db_conn, hs):
+ super(EventsWorkerStore, self).__init__(database, db_conn, hs)
+
+ if hs.config.worker.writers.events == hs.get_instance_name():
+ # We are the process in charge of generating stream ids for events,
+ # so instantiate ID generators based on the database
+ self._stream_id_gen = StreamIdGenerator(
+ db_conn, "events", "stream_ordering",
+ )
+ self._backfill_id_gen = StreamIdGenerator(
+ db_conn,
+ "events",
+ "stream_ordering",
+ step=-1,
+ extra_tables=[("ex_outlier_stream", "event_stream_ordering")],
+ )
+ else:
+ # Another process is in charge of persisting events and generating
+ # stream IDs: rely on the replication streams to let us know which
+ # IDs we can process.
+ self._stream_id_gen = SlavedIdTracker(db_conn, "events", "stream_ordering")
+ self._backfill_id_gen = SlavedIdTracker(
+ db_conn, "events", "stream_ordering", step=-1
+ )
+
+ self._get_event_cache = Cache(
+ "*getEvent*",
+ keylen=3,
+ max_entries=hs.config.caches.event_cache_size,
+ apply_cache_factor_from_config=False,
+ )
+
+ self._event_fetch_lock = threading.Condition()
+ self._event_fetch_list = []
+ self._event_fetch_ongoing = 0
+
+ def process_replication_rows(self, stream_name, instance_name, token, rows):
+ if stream_name == EventsStream.NAME:
+ self._stream_id_gen.advance(token)
+ elif stream_name == BackfillStream.NAME:
+ self._backfill_id_gen.advance(-token)
+
+ super().process_replication_rows(stream_name, instance_name, token, rows)
+
+ def get_received_ts(self, event_id):
+ """Get received_ts (when it was persisted) for the event.
+
+ Raises an exception for unknown events.
+
+ Args:
+ event_id (str)
+
+ Returns:
+ Deferred[int|None]: Timestamp in milliseconds, or None for events
+ that were persisted before received_ts was implemented.
+ """
+ return self.db_pool.simple_select_one_onecol(
+ table="events",
+ keyvalues={"event_id": event_id},
+ retcol="received_ts",
+ desc="get_received_ts",
+ )
+
+ def get_received_ts_by_stream_pos(self, stream_ordering):
+ """Given a stream ordering get an approximate timestamp of when it
+ happened.
+
+ This is done by simply taking the received ts of the first event that
+ has a stream ordering greater than or equal to the given stream pos.
+ If none exists returns the current time, on the assumption that it must
+ have happened recently.
+
+ Args:
+ stream_ordering (int)
+
+ Returns:
+ Deferred[int]
+ """
+
+ def _get_approximate_received_ts_txn(txn):
+ sql = """
+ SELECT received_ts FROM events
+ WHERE stream_ordering >= ?
+ LIMIT 1
+ """
+
+ txn.execute(sql, (stream_ordering,))
+ row = txn.fetchone()
+ if row and row[0]:
+ ts = row[0]
+ else:
+ ts = self.clock.time_msec()
+
+ return ts
+
+ return self.db_pool.runInteraction(
+ "get_approximate_received_ts", _get_approximate_received_ts_txn
+ )
+
+ @defer.inlineCallbacks
+ def get_event(
+ self,
+ event_id: str,
+ redact_behaviour: EventRedactBehaviour = EventRedactBehaviour.REDACT,
+ get_prev_content: bool = False,
+ allow_rejected: bool = False,
+ allow_none: bool = False,
+ check_room_id: Optional[str] = None,
+ ):
+ """Get an event from the database by event_id.
+
+ Args:
+ event_id: The event_id of the event to fetch
+
+ redact_behaviour: Determine what to do with a redacted event. Possible values:
+ * AS_IS - Return the full event body with no redacted content
+ * REDACT - Return the event but with a redacted body
+ * DISALLOW - Do not return redacted events (behave as per allow_none
+ if the event is redacted)
+
+ get_prev_content: If True and event is a state event,
+ include the previous states content in the unsigned field.
+
+ allow_rejected: If True, return rejected events. Otherwise,
+ behave as per allow_none.
+
+ allow_none: If True, return None if no event found, if
+ False throw a NotFoundError
+
+ check_room_id: if not None, check the room of the found event.
+ If there is a mismatch, behave as per allow_none.
+
+ Returns:
+ Deferred[EventBase|None]
+ """
+ if not isinstance(event_id, str):
+ raise TypeError("Invalid event event_id %r" % (event_id,))
+
+ events = yield self.get_events_as_list(
+ [event_id],
+ redact_behaviour=redact_behaviour,
+ get_prev_content=get_prev_content,
+ allow_rejected=allow_rejected,
+ )
+
+ event = events[0] if events else None
+
+ if event is not None and check_room_id is not None:
+ if event.room_id != check_room_id:
+ event = None
+
+ if event is None and not allow_none:
+ raise NotFoundError("Could not find event %s" % (event_id,))
+
+ return event
+
+ @defer.inlineCallbacks
+ def get_events(
+ self,
+ event_ids: List[str],
+ redact_behaviour: EventRedactBehaviour = EventRedactBehaviour.REDACT,
+ get_prev_content: bool = False,
+ allow_rejected: bool = False,
+ ):
+ """Get events from the database
+
+ Args:
+ event_ids: The event_ids of the events to fetch
+
+ redact_behaviour: Determine what to do with a redacted event. Possible
+ values:
+ * AS_IS - Return the full event body with no redacted content
+ * REDACT - Return the event but with a redacted body
+ * DISALLOW - Do not return redacted events (omit them from the response)
+
+ get_prev_content: If True and event is a state event,
+ include the previous states content in the unsigned field.
+
+ allow_rejected: If True, return rejected events. Otherwise,
+ omits rejeted events from the response.
+
+ Returns:
+ Deferred : Dict from event_id to event.
+ """
+ events = yield self.get_events_as_list(
+ event_ids,
+ redact_behaviour=redact_behaviour,
+ get_prev_content=get_prev_content,
+ allow_rejected=allow_rejected,
+ )
+
+ return {e.event_id: e for e in events}
+
+ @defer.inlineCallbacks
+ def get_events_as_list(
+ self,
+ event_ids: List[str],
+ redact_behaviour: EventRedactBehaviour = EventRedactBehaviour.REDACT,
+ get_prev_content: bool = False,
+ allow_rejected: bool = False,
+ ):
+ """Get events from the database and return in a list in the same order
+ as given by `event_ids` arg.
+
+ Unknown events will be omitted from the response.
+
+ Args:
+ event_ids: The event_ids of the events to fetch
+
+ redact_behaviour: Determine what to do with a redacted event. Possible values:
+ * AS_IS - Return the full event body with no redacted content
+ * REDACT - Return the event but with a redacted body
+ * DISALLOW - Do not return redacted events (omit them from the response)
+
+ get_prev_content: If True and event is a state event,
+ include the previous states content in the unsigned field.
+
+ allow_rejected: If True, return rejected events. Otherwise,
+ omits rejected events from the response.
+
+ Returns:
+ Deferred[list[EventBase]]: List of events fetched from the database. The
+ events are in the same order as `event_ids` arg.
+
+ Note that the returned list may be smaller than the list of event
+ IDs if not all events could be fetched.
+ """
+
+ if not event_ids:
+ return []
+
+ # there may be duplicates so we cast the list to a set
+ event_entry_map = yield self._get_events_from_cache_or_db(
+ set(event_ids), allow_rejected=allow_rejected
+ )
+
+ events = []
+ for event_id in event_ids:
+ entry = event_entry_map.get(event_id, None)
+ if not entry:
+ continue
+
+ if not allow_rejected:
+ assert not entry.event.rejected_reason, (
+ "rejected event returned from _get_events_from_cache_or_db despite "
+ "allow_rejected=False"
+ )
+
+ # We may not have had the original event when we received a redaction, so
+ # we have to recheck auth now.
+
+ if not allow_rejected and entry.event.type == EventTypes.Redaction:
+ if entry.event.redacts is None:
+ # A redacted redaction doesn't have a `redacts` key, in
+ # which case lets just withhold the event.
+ #
+ # Note: Most of the time if the redactions has been
+ # redacted we still have the un-redacted event in the DB
+ # and so we'll still see the `redacts` key. However, this
+ # isn't always true e.g. if we have censored the event.
+ logger.debug(
+ "Withholding redaction event %s as we don't have redacts key",
+ event_id,
+ )
+ continue
+
+ redacted_event_id = entry.event.redacts
+ event_map = yield self._get_events_from_cache_or_db([redacted_event_id])
+ original_event_entry = event_map.get(redacted_event_id)
+ if not original_event_entry:
+ # we don't have the redacted event (or it was rejected).
+ #
+ # We assume that the redaction isn't authorized for now; if the
+ # redacted event later turns up, the redaction will be re-checked,
+ # and if it is found valid, the original will get redacted before it
+ # is served to the client.
+ logger.debug(
+ "Withholding redaction event %s since we don't (yet) have the "
+ "original %s",
+ event_id,
+ redacted_event_id,
+ )
+ continue
+
+ original_event = original_event_entry.event
+ if original_event.type == EventTypes.Create:
+ # we never serve redactions of Creates to clients.
+ logger.info(
+ "Withholding redaction %s of create event %s",
+ event_id,
+ redacted_event_id,
+ )
+ continue
+
+ if original_event.room_id != entry.event.room_id:
+ logger.info(
+ "Withholding redaction %s of event %s from a different room",
+ event_id,
+ redacted_event_id,
+ )
+ continue
+
+ if entry.event.internal_metadata.need_to_check_redaction():
+ original_domain = get_domain_from_id(original_event.sender)
+ redaction_domain = get_domain_from_id(entry.event.sender)
+ if original_domain != redaction_domain:
+ # the senders don't match, so this is forbidden
+ logger.info(
+ "Withholding redaction %s whose sender domain %s doesn't "
+ "match that of redacted event %s %s",
+ event_id,
+ redaction_domain,
+ redacted_event_id,
+ original_domain,
+ )
+ continue
+
+ # Update the cache to save doing the checks again.
+ entry.event.internal_metadata.recheck_redaction = False
+
+ event = entry.event
+
+ if entry.redacted_event:
+ if redact_behaviour == EventRedactBehaviour.BLOCK:
+ # Skip this event
+ continue
+ elif redact_behaviour == EventRedactBehaviour.REDACT:
+ event = entry.redacted_event
+
+ events.append(event)
+
+ if get_prev_content:
+ if "replaces_state" in event.unsigned:
+ prev = yield self.get_event(
+ event.unsigned["replaces_state"],
+ get_prev_content=False,
+ allow_none=True,
+ )
+ if prev:
+ event.unsigned = dict(event.unsigned)
+ event.unsigned["prev_content"] = prev.content
+ event.unsigned["prev_sender"] = prev.sender
+
+ return events
+
+ @defer.inlineCallbacks
+ def _get_events_from_cache_or_db(self, event_ids, allow_rejected=False):
+ """Fetch a bunch of events from the cache or the database.
+
+ If events are pulled from the database, they will be cached for future lookups.
+
+ Unknown events are omitted from the response.
+
+ Args:
+
+ event_ids (Iterable[str]): The event_ids of the events to fetch
+
+ allow_rejected (bool): Whether to include rejected events. If False,
+ rejected events are omitted from the response.
+
+ Returns:
+ Deferred[Dict[str, _EventCacheEntry]]:
+ map from event id to result
+ """
+ event_entry_map = self._get_events_from_cache(
+ event_ids, allow_rejected=allow_rejected
+ )
+
+ missing_events_ids = [e for e in event_ids if e not in event_entry_map]
+
+ if missing_events_ids:
+ log_ctx = current_context()
+ log_ctx.record_event_fetch(len(missing_events_ids))
+
+ # Note that _get_events_from_db is also responsible for turning db rows
+ # into FrozenEvents (via _get_event_from_row), which involves seeing if
+ # the events have been redacted, and if so pulling the redaction event out
+ # of the database to check it.
+ #
+ missing_events = yield self._get_events_from_db(
+ missing_events_ids, allow_rejected=allow_rejected
+ )
+
+ event_entry_map.update(missing_events)
+
+ return event_entry_map
+
+ def _invalidate_get_event_cache(self, event_id):
+ self._get_event_cache.invalidate((event_id,))
+
+ def _get_events_from_cache(self, events, allow_rejected, update_metrics=True):
+ """Fetch events from the caches
+
+ Args:
+ events (Iterable[str]): list of event_ids to fetch
+ allow_rejected (bool): Whether to return events that were rejected
+ update_metrics (bool): Whether to update the cache hit ratio metrics
+
+ Returns:
+ dict of event_id -> _EventCacheEntry for each event_id in cache. If
+ allow_rejected is `False` then there will still be an entry but it
+ will be `None`
+ """
+ event_map = {}
+
+ for event_id in events:
+ ret = self._get_event_cache.get(
+ (event_id,), None, update_metrics=update_metrics
+ )
+ if not ret:
+ continue
+
+ if allow_rejected or not ret.event.rejected_reason:
+ event_map[event_id] = ret
+ else:
+ event_map[event_id] = None
+
+ return event_map
+
+ def _do_fetch(self, conn):
+ """Takes a database connection and waits for requests for events from
+ the _event_fetch_list queue.
+ """
+ i = 0
+ while True:
+ with self._event_fetch_lock:
+ event_list = self._event_fetch_list
+ self._event_fetch_list = []
+
+ if not event_list:
+ single_threaded = self.database_engine.single_threaded
+ if single_threaded or i > EVENT_QUEUE_ITERATIONS:
+ self._event_fetch_ongoing -= 1
+ return
+ else:
+ self._event_fetch_lock.wait(EVENT_QUEUE_TIMEOUT_S)
+ i += 1
+ continue
+ i = 0
+
+ self._fetch_event_list(conn, event_list)
+
+ def _fetch_event_list(self, conn, event_list):
+ """Handle a load of requests from the _event_fetch_list queue
+
+ Args:
+ conn (twisted.enterprise.adbapi.Connection): database connection
+
+ event_list (list[Tuple[list[str], Deferred]]):
+ The fetch requests. Each entry consists of a list of event
+ ids to be fetched, and a deferred to be completed once the
+ events have been fetched.
+
+ The deferreds are callbacked with a dictionary mapping from event id
+ to event row. Note that it may well contain additional events that
+ were not part of this request.
+ """
+ with Measure(self._clock, "_fetch_event_list"):
+ try:
+ events_to_fetch = {
+ event_id for events, _ in event_list for event_id in events
+ }
+
+ row_dict = self.db_pool.new_transaction(
+ conn, "do_fetch", [], [], self._fetch_event_rows, events_to_fetch
+ )
+
+ # We only want to resolve deferreds from the main thread
+ def fire():
+ for _, d in event_list:
+ d.callback(row_dict)
+
+ with PreserveLoggingContext():
+ self.hs.get_reactor().callFromThread(fire)
+ except Exception as e:
+ logger.exception("do_fetch")
+
+ # We only want to resolve deferreds from the main thread
+ def fire(evs, exc):
+ for _, d in evs:
+ if not d.called:
+ with PreserveLoggingContext():
+ d.errback(exc)
+
+ with PreserveLoggingContext():
+ self.hs.get_reactor().callFromThread(fire, event_list, e)
+
+ @defer.inlineCallbacks
+ def _get_events_from_db(self, event_ids, allow_rejected=False):
+ """Fetch a bunch of events from the database.
+
+ Returned events will be added to the cache for future lookups.
+
+ Unknown events are omitted from the response.
+
+ Args:
+ event_ids (Iterable[str]): The event_ids of the events to fetch
+
+ allow_rejected (bool): Whether to include rejected events. If False,
+ rejected events are omitted from the response.
+
+ Returns:
+ Deferred[Dict[str, _EventCacheEntry]]:
+ map from event id to result. May return extra events which
+ weren't asked for.
+ """
+ fetched_events = {}
+ events_to_fetch = event_ids
+
+ while events_to_fetch:
+ row_map = yield self._enqueue_events(events_to_fetch)
+
+ # we need to recursively fetch any redactions of those events
+ redaction_ids = set()
+ for event_id in events_to_fetch:
+ row = row_map.get(event_id)
+ fetched_events[event_id] = row
+ if row:
+ redaction_ids.update(row["redactions"])
+
+ events_to_fetch = redaction_ids.difference(fetched_events.keys())
+ if events_to_fetch:
+ logger.debug("Also fetching redaction events %s", events_to_fetch)
+
+ # build a map from event_id to EventBase
+ event_map = {}
+ for event_id, row in fetched_events.items():
+ if not row:
+ continue
+ assert row["event_id"] == event_id
+
+ rejected_reason = row["rejected_reason"]
+
+ if not allow_rejected and rejected_reason:
+ continue
+
+ d = db_to_json(row["json"])
+ internal_metadata = db_to_json(row["internal_metadata"])
+
+ format_version = row["format_version"]
+ if format_version is None:
+ # This means that we stored the event before we had the concept
+ # of a event format version, so it must be a V1 event.
+ format_version = EventFormatVersions.V1
+
+ room_version_id = row["room_version_id"]
+
+ if not room_version_id:
+ # this should only happen for out-of-band membership events
+ if not internal_metadata.get("out_of_band_membership"):
+ logger.warning(
+ "Room %s for event %s is unknown", d["room_id"], event_id
+ )
+ continue
+
+ # take a wild stab at the room version based on the event format
+ if format_version == EventFormatVersions.V1:
+ room_version = RoomVersions.V1
+ elif format_version == EventFormatVersions.V2:
+ room_version = RoomVersions.V3
+ else:
+ room_version = RoomVersions.V5
+ else:
+ room_version = KNOWN_ROOM_VERSIONS.get(room_version_id)
+ if not room_version:
+ logger.warning(
+ "Event %s in room %s has unknown room version %s",
+ event_id,
+ d["room_id"],
+ room_version_id,
+ )
+ continue
+
+ if room_version.event_format != format_version:
+ logger.error(
+ "Event %s in room %s with version %s has wrong format: "
+ "expected %s, was %s",
+ event_id,
+ d["room_id"],
+ room_version_id,
+ room_version.event_format,
+ format_version,
+ )
+ continue
+
+ original_ev = make_event_from_dict(
+ event_dict=d,
+ room_version=room_version,
+ internal_metadata_dict=internal_metadata,
+ rejected_reason=rejected_reason,
+ )
+
+ event_map[event_id] = original_ev
+
+ # finally, we can decide whether each one needs redacting, and build
+ # the cache entries.
+ result_map = {}
+ for event_id, original_ev in event_map.items():
+ redactions = fetched_events[event_id]["redactions"]
+ redacted_event = self._maybe_redact_event_row(
+ original_ev, redactions, event_map
+ )
+
+ cache_entry = _EventCacheEntry(
+ event=original_ev, redacted_event=redacted_event
+ )
+
+ self._get_event_cache.prefill((event_id,), cache_entry)
+ result_map[event_id] = cache_entry
+
+ return result_map
+
+ @defer.inlineCallbacks
+ def _enqueue_events(self, events):
+ """Fetches events from the database using the _event_fetch_list. This
+ allows batch and bulk fetching of events - it allows us to fetch events
+ without having to create a new transaction for each request for events.
+
+ Args:
+ events (Iterable[str]): events to be fetched.
+
+ Returns:
+ Deferred[Dict[str, Dict]]: map from event id to row data from the database.
+ May contain events that weren't requested.
+ """
+
+ events_d = defer.Deferred()
+ with self._event_fetch_lock:
+ self._event_fetch_list.append((events, events_d))
+
+ self._event_fetch_lock.notify()
+
+ if self._event_fetch_ongoing < EVENT_QUEUE_THREADS:
+ self._event_fetch_ongoing += 1
+ should_start = True
+ else:
+ should_start = False
+
+ if should_start:
+ run_as_background_process(
+ "fetch_events", self.db_pool.runWithConnection, self._do_fetch
+ )
+
+ logger.debug("Loading %d events: %s", len(events), events)
+ with PreserveLoggingContext():
+ row_map = yield events_d
+ logger.debug("Loaded %d events (%d rows)", len(events), len(row_map))
+
+ return row_map
+
+ def _fetch_event_rows(self, txn, event_ids):
+ """Fetch event rows from the database
+
+ Events which are not found are omitted from the result.
+
+ The returned per-event dicts contain the following keys:
+
+ * event_id (str)
+
+ * json (str): json-encoded event structure
+
+ * internal_metadata (str): json-encoded internal metadata dict
+
+ * format_version (int|None): The format of the event. Hopefully one
+ of EventFormatVersions. 'None' means the event predates
+ EventFormatVersions (so the event is format V1).
+
+ * room_version_id (str|None): The version of the room which contains the event.
+ Hopefully one of RoomVersions.
+
+ Due to historical reasons, there may be a few events in the database which
+ do not have an associated room; in this case None will be returned here.
+
+ * rejected_reason (str|None): if the event was rejected, the reason
+ why.
+
+ * redactions (List[str]): a list of event-ids which (claim to) redact
+ this event.
+
+ Args:
+ txn (twisted.enterprise.adbapi.Connection):
+ event_ids (Iterable[str]): event IDs to fetch
+
+ Returns:
+ Dict[str, Dict]: a map from event id to event info.
+ """
+ event_dict = {}
+ for evs in batch_iter(event_ids, 200):
+ sql = """\
+ SELECT
+ e.event_id,
+ e.internal_metadata,
+ e.json,
+ e.format_version,
+ r.room_version,
+ rej.reason
+ FROM event_json as e
+ LEFT JOIN rooms r USING (room_id)
+ LEFT JOIN rejections as rej USING (event_id)
+ WHERE """
+
+ clause, args = make_in_list_sql_clause(
+ txn.database_engine, "e.event_id", evs
+ )
+
+ txn.execute(sql + clause, args)
+
+ for row in txn:
+ event_id = row[0]
+ event_dict[event_id] = {
+ "event_id": event_id,
+ "internal_metadata": row[1],
+ "json": row[2],
+ "format_version": row[3],
+ "room_version_id": row[4],
+ "rejected_reason": row[5],
+ "redactions": [],
+ }
+
+ # check for redactions
+ redactions_sql = "SELECT event_id, redacts FROM redactions WHERE "
+
+ clause, args = make_in_list_sql_clause(txn.database_engine, "redacts", evs)
+
+ txn.execute(redactions_sql + clause, args)
+
+ for (redacter, redacted) in txn:
+ d = event_dict.get(redacted)
+ if d:
+ d["redactions"].append(redacter)
+
+ return event_dict
+
+ def _maybe_redact_event_row(self, original_ev, redactions, event_map):
+ """Given an event object and a list of possible redacting event ids,
+ determine whether to honour any of those redactions and if so return a redacted
+ event.
+
+ Args:
+ original_ev (EventBase):
+ redactions (iterable[str]): list of event ids of potential redaction events
+ event_map (dict[str, EventBase]): other events which have been fetched, in
+ which we can look up the redaaction events. Map from event id to event.
+
+ Returns:
+ Deferred[EventBase|None]: if the event should be redacted, a pruned
+ event object. Otherwise, None.
+ """
+ if original_ev.type == "m.room.create":
+ # we choose to ignore redactions of m.room.create events.
+ return None
+
+ for redaction_id in redactions:
+ redaction_event = event_map.get(redaction_id)
+ if not redaction_event or redaction_event.rejected_reason:
+ # we don't have the redaction event, or the redaction event was not
+ # authorized.
+ logger.debug(
+ "%s was redacted by %s but redaction not found/authed",
+ original_ev.event_id,
+ redaction_id,
+ )
+ continue
+
+ if redaction_event.room_id != original_ev.room_id:
+ logger.debug(
+ "%s was redacted by %s but redaction was in a different room!",
+ original_ev.event_id,
+ redaction_id,
+ )
+ continue
+
+ # Starting in room version v3, some redactions need to be
+ # rechecked if we didn't have the redacted event at the
+ # time, so we recheck on read instead.
+ if redaction_event.internal_metadata.need_to_check_redaction():
+ expected_domain = get_domain_from_id(original_ev.sender)
+ if get_domain_from_id(redaction_event.sender) == expected_domain:
+ # This redaction event is allowed. Mark as not needing a recheck.
+ redaction_event.internal_metadata.recheck_redaction = False
+ else:
+ # Senders don't match, so the event isn't actually redacted
+ logger.debug(
+ "%s was redacted by %s but the senders don't match",
+ original_ev.event_id,
+ redaction_id,
+ )
+ continue
+
+ logger.debug("Redacting %s due to %s", original_ev.event_id, redaction_id)
+
+ # we found a good redaction event. Redact!
+ redacted_event = prune_event(original_ev)
+ redacted_event.unsigned["redacted_by"] = redaction_id
+
+ # It's fine to add the event directly, since get_pdu_json
+ # will serialise this field correctly
+ redacted_event.unsigned["redacted_because"] = redaction_event
+
+ return redacted_event
+
+ # no valid redaction found for this event
+ return None
+
+ @defer.inlineCallbacks
+ def have_events_in_timeline(self, event_ids):
+ """Given a list of event ids, check if we have already processed and
+ stored them as non outliers.
+ """
+ rows = yield self.db_pool.simple_select_many_batch(
+ table="events",
+ retcols=("event_id",),
+ column="event_id",
+ iterable=list(event_ids),
+ keyvalues={"outlier": False},
+ desc="have_events_in_timeline",
+ )
+
+ return {r["event_id"] for r in rows}
+
+ @defer.inlineCallbacks
+ def have_seen_events(self, event_ids):
+ """Given a list of event ids, check if we have already processed them.
+
+ Args:
+ event_ids (iterable[str]):
+
+ Returns:
+ Deferred[set[str]]: The events we have already seen.
+ """
+ results = set()
+
+ def have_seen_events_txn(txn, chunk):
+ sql = "SELECT event_id FROM events as e WHERE "
+ clause, args = make_in_list_sql_clause(
+ txn.database_engine, "e.event_id", chunk
+ )
+ txn.execute(sql + clause, args)
+ for (event_id,) in txn:
+ results.add(event_id)
+
+ # break the input up into chunks of 100
+ input_iterator = iter(event_ids)
+ for chunk in iter(lambda: list(itertools.islice(input_iterator, 100)), []):
+ yield self.db_pool.runInteraction(
+ "have_seen_events", have_seen_events_txn, chunk
+ )
+ return results
+
+ def _get_total_state_event_counts_txn(self, txn, room_id):
+ """
+ See get_total_state_event_counts.
+ """
+ # We join against the events table as that has an index on room_id
+ sql = """
+ SELECT COUNT(*) FROM state_events
+ INNER JOIN events USING (room_id, event_id)
+ WHERE room_id=?
+ """
+ txn.execute(sql, (room_id,))
+ row = txn.fetchone()
+ return row[0] if row else 0
+
+ def get_total_state_event_counts(self, room_id):
+ """
+ Gets the total number of state events in a room.
+
+ Args:
+ room_id (str)
+
+ Returns:
+ Deferred[int]
+ """
+ return self.db_pool.runInteraction(
+ "get_total_state_event_counts",
+ self._get_total_state_event_counts_txn,
+ room_id,
+ )
+
+ def _get_current_state_event_counts_txn(self, txn, room_id):
+ """
+ See get_current_state_event_counts.
+ """
+ sql = "SELECT COUNT(*) FROM current_state_events WHERE room_id=?"
+ txn.execute(sql, (room_id,))
+ row = txn.fetchone()
+ return row[0] if row else 0
+
+ def get_current_state_event_counts(self, room_id):
+ """
+ Gets the current number of state events in a room.
+
+ Args:
+ room_id (str)
+
+ Returns:
+ Deferred[int]
+ """
+ return self.db_pool.runInteraction(
+ "get_current_state_event_counts",
+ self._get_current_state_event_counts_txn,
+ room_id,
+ )
+
+ @defer.inlineCallbacks
+ def get_room_complexity(self, room_id):
+ """
+ Get a rough approximation of the complexity of the room. This is used by
+ remote servers to decide whether they wish to join the room or not.
+ Higher complexity value indicates that being in the room will consume
+ more resources.
+
+ Args:
+ room_id (str)
+
+ Returns:
+ Deferred[dict[str:int]] of complexity version to complexity.
+ """
+ state_events = yield self.get_current_state_event_counts(room_id)
+
+ # Call this one "v1", so we can introduce new ones as we want to develop
+ # it.
+ complexity_v1 = round(state_events / 500, 2)
+
+ return {"v1": complexity_v1}
+
+ def get_current_backfill_token(self):
+ """The current minimum token that backfilled events have reached"""
+ return -self._backfill_id_gen.get_current_token()
+
+ def get_current_events_token(self):
+ """The current maximum token that events have reached"""
+ return self._stream_id_gen.get_current_token()
+
+ def get_all_new_forward_event_rows(self, last_id, current_id, limit):
+ """Returns new events, for the Events replication stream
+
+ Args:
+ last_id: the last stream_id from the previous batch.
+ current_id: the maximum stream_id to return up to
+ limit: the maximum number of rows to return
+
+ Returns: Deferred[List[Tuple]]
+ a list of events stream rows. Each tuple consists of a stream id as
+ the first element, followed by fields suitable for casting into an
+ EventsStreamRow.
+ """
+
+ def get_all_new_forward_event_rows(txn):
+ sql = (
+ "SELECT e.stream_ordering, e.event_id, e.room_id, e.type,"
+ " state_key, redacts, relates_to_id"
+ " FROM events AS e"
+ " LEFT JOIN redactions USING (event_id)"
+ " LEFT JOIN state_events USING (event_id)"
+ " LEFT JOIN event_relations USING (event_id)"
+ " WHERE ? < stream_ordering AND stream_ordering <= ?"
+ " ORDER BY stream_ordering ASC"
+ " LIMIT ?"
+ )
+ txn.execute(sql, (last_id, current_id, limit))
+ return txn.fetchall()
+
+ return self.db_pool.runInteraction(
+ "get_all_new_forward_event_rows", get_all_new_forward_event_rows
+ )
+
+ def get_ex_outlier_stream_rows(self, last_id, current_id):
+ """Returns de-outliered events, for the Events replication stream
+
+ Args:
+ last_id: the last stream_id from the previous batch.
+ current_id: the maximum stream_id to return up to
+
+ Returns: Deferred[List[Tuple]]
+ a list of events stream rows. Each tuple consists of a stream id as
+ the first element, followed by fields suitable for casting into an
+ EventsStreamRow.
+ """
+
+ def get_ex_outlier_stream_rows_txn(txn):
+ sql = (
+ "SELECT event_stream_ordering, e.event_id, e.room_id, e.type,"
+ " state_key, redacts, relates_to_id"
+ " FROM events AS e"
+ " INNER JOIN ex_outlier_stream USING (event_id)"
+ " LEFT JOIN redactions USING (event_id)"
+ " LEFT JOIN state_events USING (event_id)"
+ " LEFT JOIN event_relations USING (event_id)"
+ " WHERE ? < event_stream_ordering"
+ " AND event_stream_ordering <= ?"
+ " ORDER BY event_stream_ordering ASC"
+ )
+
+ txn.execute(sql, (last_id, current_id))
+ return txn.fetchall()
+
+ return self.db_pool.runInteraction(
+ "get_ex_outlier_stream_rows", get_ex_outlier_stream_rows_txn
+ )
+
+ async def get_all_new_backfill_event_rows(
+ self, instance_name: str, last_id: int, current_id: int, limit: int
+ ) -> Tuple[List[Tuple[int, list]], int, bool]:
+ """Get updates for backfill replication stream, including all new
+ backfilled events and events that have gone from being outliers to not.
+
+ Args:
+ instance_name: The writer we want to fetch updates from. Unused
+ here since there is only ever one writer.
+ last_id: The token to fetch updates from. Exclusive.
+ current_id: The token to fetch updates up to. Inclusive.
+ limit: The requested limit for the number of rows to return. The
+ function may return more or fewer rows.
+
+ Returns:
+ A tuple consisting of: the updates, a token to use to fetch
+ subsequent updates, and whether we returned fewer rows than exists
+ between the requested tokens due to the limit.
+
+ The token returned can be used in a subsequent call to this
+ function to get further updatees.
+
+ The updates are a list of 2-tuples of stream ID and the row data
+ """
+ if last_id == current_id:
+ return [], current_id, False
+
+ def get_all_new_backfill_event_rows(txn):
+ sql = (
+ "SELECT -e.stream_ordering, e.event_id, e.room_id, e.type,"
+ " state_key, redacts, relates_to_id"
+ " FROM events AS e"
+ " LEFT JOIN redactions USING (event_id)"
+ " LEFT JOIN state_events USING (event_id)"
+ " LEFT JOIN event_relations USING (event_id)"
+ " WHERE ? > stream_ordering AND stream_ordering >= ?"
+ " ORDER BY stream_ordering ASC"
+ " LIMIT ?"
+ )
+ txn.execute(sql, (-last_id, -current_id, limit))
+ new_event_updates = [(row[0], row[1:]) for row in txn]
+
+ limited = False
+ if len(new_event_updates) == limit:
+ upper_bound = new_event_updates[-1][0]
+ limited = True
+ else:
+ upper_bound = current_id
+
+ sql = (
+ "SELECT -event_stream_ordering, e.event_id, e.room_id, e.type,"
+ " state_key, redacts, relates_to_id"
+ " FROM events AS e"
+ " INNER JOIN ex_outlier_stream USING (event_id)"
+ " LEFT JOIN redactions USING (event_id)"
+ " LEFT JOIN state_events USING (event_id)"
+ " LEFT JOIN event_relations USING (event_id)"
+ " WHERE ? > event_stream_ordering"
+ " AND event_stream_ordering >= ?"
+ " ORDER BY event_stream_ordering DESC"
+ )
+ txn.execute(sql, (-last_id, -upper_bound))
+ new_event_updates.extend((row[0], row[1:]) for row in txn)
+
+ if len(new_event_updates) >= limit:
+ upper_bound = new_event_updates[-1][0]
+ limited = True
+
+ return new_event_updates, upper_bound, limited
+
+ return await self.db_pool.runInteraction(
+ "get_all_new_backfill_event_rows", get_all_new_backfill_event_rows
+ )
+
+ async def get_all_updated_current_state_deltas(
+ self, from_token: int, to_token: int, target_row_count: int
+ ) -> Tuple[List[Tuple], int, bool]:
+ """Fetch updates from current_state_delta_stream
+
+ Args:
+ from_token: The previous stream token. Updates from this stream id will
+ be excluded.
+
+ to_token: The current stream token (ie the upper limit). Updates up to this
+ stream id will be included (modulo the 'limit' param)
+
+ target_row_count: The number of rows to try to return. If more rows are
+ available, we will set 'limited' in the result. In the event of a large
+ batch, we may return more rows than this.
+ Returns:
+ A triplet `(updates, new_last_token, limited)`, where:
+ * `updates` is a list of database tuples.
+ * `new_last_token` is the new position in stream.
+ * `limited` is whether there are more updates to fetch.
+ """
+
+ def get_all_updated_current_state_deltas_txn(txn):
+ sql = """
+ SELECT stream_id, room_id, type, state_key, event_id
+ FROM current_state_delta_stream
+ WHERE ? < stream_id AND stream_id <= ?
+ ORDER BY stream_id ASC LIMIT ?
+ """
+ txn.execute(sql, (from_token, to_token, target_row_count))
+ return txn.fetchall()
+
+ def get_deltas_for_stream_id_txn(txn, stream_id):
+ sql = """
+ SELECT stream_id, room_id, type, state_key, event_id
+ FROM current_state_delta_stream
+ WHERE stream_id = ?
+ """
+ txn.execute(sql, [stream_id])
+ return txn.fetchall()
+
+ # we need to make sure that, for every stream id in the results, we get *all*
+ # the rows with that stream id.
+
+ rows = await self.db_pool.runInteraction(
+ "get_all_updated_current_state_deltas",
+ get_all_updated_current_state_deltas_txn,
+ ) # type: List[Tuple]
+
+ # if we've got fewer rows than the limit, we're good
+ if len(rows) < target_row_count:
+ return rows, to_token, False
+
+ # we hit the limit, so reduce the upper limit so that we exclude the stream id
+ # of the last row in the result.
+ assert rows[-1][0] <= to_token
+ to_token = rows[-1][0] - 1
+
+ # search backwards through the list for the point to truncate
+ for idx in range(len(rows) - 1, 0, -1):
+ if rows[idx - 1][0] <= to_token:
+ return rows[:idx], to_token, True
+
+ # bother. We didn't get a full set of changes for even a single
+ # stream id. let's run the query again, without a row limit, but for
+ # just one stream id.
+ to_token += 1
+ rows = await self.db_pool.runInteraction(
+ "get_deltas_for_stream_id", get_deltas_for_stream_id_txn, to_token
+ )
+
+ return rows, to_token, True
+
+ @cached(num_args=5, max_entries=10)
+ def get_all_new_events(
+ self,
+ last_backfill_id,
+ last_forward_id,
+ current_backfill_id,
+ current_forward_id,
+ limit,
+ ):
+ """Get all the new events that have arrived at the server either as
+ new events or as backfilled events"""
+ have_backfill_events = last_backfill_id != current_backfill_id
+ have_forward_events = last_forward_id != current_forward_id
+
+ if not have_backfill_events and not have_forward_events:
+ return defer.succeed(AllNewEventsResult([], [], [], [], []))
+
+ def get_all_new_events_txn(txn):
+ sql = (
+ "SELECT e.stream_ordering, e.event_id, e.room_id, e.type,"
+ " state_key, redacts"
+ " FROM events AS e"
+ " LEFT JOIN redactions USING (event_id)"
+ " LEFT JOIN state_events USING (event_id)"
+ " WHERE ? < stream_ordering AND stream_ordering <= ?"
+ " ORDER BY stream_ordering ASC"
+ " LIMIT ?"
+ )
+ if have_forward_events:
+ txn.execute(sql, (last_forward_id, current_forward_id, limit))
+ new_forward_events = txn.fetchall()
+
+ if len(new_forward_events) == limit:
+ upper_bound = new_forward_events[-1][0]
+ else:
+ upper_bound = current_forward_id
+
+ sql = (
+ "SELECT event_stream_ordering, event_id, state_group"
+ " FROM ex_outlier_stream"
+ " WHERE ? > event_stream_ordering"
+ " AND event_stream_ordering >= ?"
+ " ORDER BY event_stream_ordering DESC"
+ )
+ txn.execute(sql, (last_forward_id, upper_bound))
+ forward_ex_outliers = txn.fetchall()
+ else:
+ new_forward_events = []
+ forward_ex_outliers = []
+
+ sql = (
+ "SELECT -e.stream_ordering, e.event_id, e.room_id, e.type,"
+ " state_key, redacts"
+ " FROM events AS e"
+ " LEFT JOIN redactions USING (event_id)"
+ " LEFT JOIN state_events USING (event_id)"
+ " WHERE ? > stream_ordering AND stream_ordering >= ?"
+ " ORDER BY stream_ordering DESC"
+ " LIMIT ?"
+ )
+ if have_backfill_events:
+ txn.execute(sql, (-last_backfill_id, -current_backfill_id, limit))
+ new_backfill_events = txn.fetchall()
+
+ if len(new_backfill_events) == limit:
+ upper_bound = new_backfill_events[-1][0]
+ else:
+ upper_bound = current_backfill_id
+
+ sql = (
+ "SELECT -event_stream_ordering, event_id, state_group"
+ " FROM ex_outlier_stream"
+ " WHERE ? > event_stream_ordering"
+ " AND event_stream_ordering >= ?"
+ " ORDER BY event_stream_ordering DESC"
+ )
+ txn.execute(sql, (-last_backfill_id, -upper_bound))
+ backward_ex_outliers = txn.fetchall()
+ else:
+ new_backfill_events = []
+ backward_ex_outliers = []
+
+ return AllNewEventsResult(
+ new_forward_events,
+ new_backfill_events,
+ forward_ex_outliers,
+ backward_ex_outliers,
+ )
+
+ return self.db_pool.runInteraction("get_all_new_events", get_all_new_events_txn)
+
+ async def is_event_after(self, event_id1, event_id2):
+ """Returns True if event_id1 is after event_id2 in the stream
+ """
+ to_1, so_1 = await self.get_event_ordering(event_id1)
+ to_2, so_2 = await self.get_event_ordering(event_id2)
+ return (to_1, so_1) > (to_2, so_2)
+
+ @cachedInlineCallbacks(max_entries=5000)
+ def get_event_ordering(self, event_id):
+ res = yield self.db_pool.simple_select_one(
+ table="events",
+ retcols=["topological_ordering", "stream_ordering"],
+ keyvalues={"event_id": event_id},
+ allow_none=True,
+ )
+
+ if not res:
+ raise SynapseError(404, "Could not find event %s" % (event_id,))
+
+ return (int(res["topological_ordering"]), int(res["stream_ordering"]))
+
+ def get_next_event_to_expire(self):
+ """Retrieve the entry with the lowest expiry timestamp in the event_expiry
+ table, or None if there's no more event to expire.
+
+ Returns: Deferred[Optional[Tuple[str, int]]]
+ A tuple containing the event ID as its first element and an expiry timestamp
+ as its second one, if there's at least one row in the event_expiry table.
+ None otherwise.
+ """
+
+ def get_next_event_to_expire_txn(txn):
+ txn.execute(
+ """
+ SELECT event_id, expiry_ts FROM event_expiry
+ ORDER BY expiry_ts ASC LIMIT 1
+ """
+ )
+
+ return txn.fetchone()
+
+ return self.db_pool.runInteraction(
+ desc="get_next_event_to_expire", func=get_next_event_to_expire_txn
+ )
+
+
+AllNewEventsResult = namedtuple(
+ "AllNewEventsResult",
+ [
+ "new_forward_events",
+ "new_backfill_events",
+ "forward_ex_outliers",
+ "backward_ex_outliers",
+ ],
+)
diff --git a/synapse/storage/databases/main/filtering.py b/synapse/storage/databases/main/filtering.py
new file mode 100644
index 00000000..45a17601
--- /dev/null
+++ b/synapse/storage/databases/main/filtering.py
@@ -0,0 +1,74 @@
+# -*- coding: utf-8 -*-
+# Copyright 2015, 2016 OpenMarket Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from canonicaljson import encode_canonical_json
+
+from synapse.api.errors import Codes, SynapseError
+from synapse.storage._base import SQLBaseStore, db_to_json
+from synapse.util.caches.descriptors import cached
+
+
+class FilteringStore(SQLBaseStore):
+ @cached(num_args=2)
+ async def get_user_filter(self, user_localpart, filter_id):
+ # filter_id is BIGINT UNSIGNED, so if it isn't a number, fail
+ # with a coherent error message rather than 500 M_UNKNOWN.
+ try:
+ int(filter_id)
+ except ValueError:
+ raise SynapseError(400, "Invalid filter ID", Codes.INVALID_PARAM)
+
+ def_json = await self.db_pool.simple_select_one_onecol(
+ table="user_filters",
+ keyvalues={"user_id": user_localpart, "filter_id": filter_id},
+ retcol="filter_json",
+ allow_none=False,
+ desc="get_user_filter",
+ )
+
+ return db_to_json(def_json)
+
+ def add_user_filter(self, user_localpart, user_filter):
+ def_json = encode_canonical_json(user_filter)
+
+ # Need an atomic transaction to SELECT the maximal ID so far then
+ # INSERT a new one
+ def _do_txn(txn):
+ sql = (
+ "SELECT filter_id FROM user_filters "
+ "WHERE user_id = ? AND filter_json = ?"
+ )
+ txn.execute(sql, (user_localpart, bytearray(def_json)))
+ filter_id_response = txn.fetchone()
+ if filter_id_response is not None:
+ return filter_id_response[0]
+
+ sql = "SELECT MAX(filter_id) FROM user_filters WHERE user_id = ?"
+ txn.execute(sql, (user_localpart,))
+ max_id = txn.fetchone()[0]
+ if max_id is None:
+ filter_id = 0
+ else:
+ filter_id = max_id + 1
+
+ sql = (
+ "INSERT INTO user_filters (user_id, filter_id, filter_json)"
+ "VALUES(?, ?, ?)"
+ )
+ txn.execute(sql, (user_localpart, filter_id, bytearray(def_json)))
+
+ return filter_id
+
+ return self.db_pool.runInteraction("add_user_filter", _do_txn)
diff --git a/synapse/storage/databases/main/group_server.py b/synapse/storage/databases/main/group_server.py
new file mode 100644
index 00000000..380db3a3
--- /dev/null
+++ b/synapse/storage/databases/main/group_server.py
@@ -0,0 +1,1288 @@
+# -*- coding: utf-8 -*-
+# Copyright 2017 Vector Creations Ltd
+# Copyright 2018 New Vector Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import List, Optional, Tuple
+
+from synapse.api.errors import SynapseError
+from synapse.storage._base import SQLBaseStore, db_to_json
+from synapse.types import JsonDict
+from synapse.util import json_encoder
+
+# The category ID for the "default" category. We don't store as null in the
+# database to avoid the fun of null != null
+_DEFAULT_CATEGORY_ID = ""
+_DEFAULT_ROLE_ID = ""
+
+
+class GroupServerWorkerStore(SQLBaseStore):
+ def get_group(self, group_id):
+ return self.db_pool.simple_select_one(
+ table="groups",
+ keyvalues={"group_id": group_id},
+ retcols=(
+ "name",
+ "short_description",
+ "long_description",
+ "avatar_url",
+ "is_public",
+ "join_policy",
+ ),
+ allow_none=True,
+ desc="get_group",
+ )
+
+ def get_users_in_group(self, group_id, include_private=False):
+ # TODO: Pagination
+
+ keyvalues = {"group_id": group_id}
+ if not include_private:
+ keyvalues["is_public"] = True
+
+ return self.db_pool.simple_select_list(
+ table="group_users",
+ keyvalues=keyvalues,
+ retcols=("user_id", "is_public", "is_admin"),
+ desc="get_users_in_group",
+ )
+
+ def get_invited_users_in_group(self, group_id):
+ # TODO: Pagination
+
+ return self.db_pool.simple_select_onecol(
+ table="group_invites",
+ keyvalues={"group_id": group_id},
+ retcol="user_id",
+ desc="get_invited_users_in_group",
+ )
+
+ def get_rooms_in_group(self, group_id: str, include_private: bool = False):
+ """Retrieve the rooms that belong to a given group. Does not return rooms that
+ lack members.
+
+ Args:
+ group_id: The ID of the group to query for rooms
+ include_private: Whether to return private rooms in results
+
+ Returns:
+ Deferred[List[Dict[str, str|bool]]]: A list of dictionaries, each in the
+ form of:
+
+ {
+ "room_id": "!a_room_id:example.com", # The ID of the room
+ "is_public": False # Whether this is a public room or not
+ }
+ """
+ # TODO: Pagination
+
+ def _get_rooms_in_group_txn(txn):
+ sql = """
+ SELECT room_id, is_public FROM group_rooms
+ WHERE group_id = ?
+ AND room_id IN (
+ SELECT group_rooms.room_id FROM group_rooms
+ LEFT JOIN room_stats_current ON
+ group_rooms.room_id = room_stats_current.room_id
+ AND joined_members > 0
+ AND local_users_in_room > 0
+ LEFT JOIN rooms ON
+ group_rooms.room_id = rooms.room_id
+ AND (room_version <> '') = ?
+ )
+ """
+ args = [group_id, False]
+
+ if not include_private:
+ sql += " AND is_public = ?"
+ args += [True]
+
+ txn.execute(sql, args)
+
+ return [
+ {"room_id": room_id, "is_public": is_public}
+ for room_id, is_public in txn
+ ]
+
+ return self.db_pool.runInteraction(
+ "get_rooms_in_group", _get_rooms_in_group_txn
+ )
+
+ def get_rooms_for_summary_by_category(
+ self, group_id: str, include_private: bool = False,
+ ):
+ """Get the rooms and categories that should be included in a summary request
+
+ Args:
+ group_id: The ID of the group to query the summary for
+ include_private: Whether to return private rooms in results
+
+ Returns:
+ Deferred[Tuple[List, Dict]]: A tuple containing:
+
+ * A list of dictionaries with the keys:
+ * "room_id": str, the room ID
+ * "is_public": bool, whether the room is public
+ * "category_id": str|None, the category ID if set, else None
+ * "order": int, the sort order of rooms
+
+ * A dictionary with the key:
+ * category_id (str): a dictionary with the keys:
+ * "is_public": bool, whether the category is public
+ * "profile": str, the category profile
+ * "order": int, the sort order of rooms in this category
+ """
+
+ def _get_rooms_for_summary_txn(txn):
+ keyvalues = {"group_id": group_id}
+ if not include_private:
+ keyvalues["is_public"] = True
+
+ sql = """
+ SELECT room_id, is_public, category_id, room_order
+ FROM group_summary_rooms
+ WHERE group_id = ?
+ AND room_id IN (
+ SELECT group_rooms.room_id FROM group_rooms
+ LEFT JOIN room_stats_current ON
+ group_rooms.room_id = room_stats_current.room_id
+ AND joined_members > 0
+ AND local_users_in_room > 0
+ LEFT JOIN rooms ON
+ group_rooms.room_id = rooms.room_id
+ AND (room_version <> '') = ?
+ )
+ """
+
+ if not include_private:
+ sql += " AND is_public = ?"
+ txn.execute(sql, (group_id, False, True))
+ else:
+ txn.execute(sql, (group_id, False))
+
+ rooms = [
+ {
+ "room_id": row[0],
+ "is_public": row[1],
+ "category_id": row[2] if row[2] != _DEFAULT_CATEGORY_ID else None,
+ "order": row[3],
+ }
+ for row in txn
+ ]
+
+ sql = """
+ SELECT category_id, is_public, profile, cat_order
+ FROM group_summary_room_categories
+ INNER JOIN group_room_categories USING (group_id, category_id)
+ WHERE group_id = ?
+ """
+
+ if not include_private:
+ sql += " AND is_public = ?"
+ txn.execute(sql, (group_id, True))
+ else:
+ txn.execute(sql, (group_id,))
+
+ categories = {
+ row[0]: {
+ "is_public": row[1],
+ "profile": db_to_json(row[2]),
+ "order": row[3],
+ }
+ for row in txn
+ }
+
+ return rooms, categories
+
+ return self.db_pool.runInteraction(
+ "get_rooms_for_summary", _get_rooms_for_summary_txn
+ )
+
+ async def get_group_categories(self, group_id):
+ rows = await self.db_pool.simple_select_list(
+ table="group_room_categories",
+ keyvalues={"group_id": group_id},
+ retcols=("category_id", "is_public", "profile"),
+ desc="get_group_categories",
+ )
+
+ return {
+ row["category_id"]: {
+ "is_public": row["is_public"],
+ "profile": db_to_json(row["profile"]),
+ }
+ for row in rows
+ }
+
+ async def get_group_category(self, group_id, category_id):
+ category = await self.db_pool.simple_select_one(
+ table="group_room_categories",
+ keyvalues={"group_id": group_id, "category_id": category_id},
+ retcols=("is_public", "profile"),
+ desc="get_group_category",
+ )
+
+ category["profile"] = db_to_json(category["profile"])
+
+ return category
+
+ async def get_group_roles(self, group_id):
+ rows = await self.db_pool.simple_select_list(
+ table="group_roles",
+ keyvalues={"group_id": group_id},
+ retcols=("role_id", "is_public", "profile"),
+ desc="get_group_roles",
+ )
+
+ return {
+ row["role_id"]: {
+ "is_public": row["is_public"],
+ "profile": db_to_json(row["profile"]),
+ }
+ for row in rows
+ }
+
+ async def get_group_role(self, group_id, role_id):
+ role = await self.db_pool.simple_select_one(
+ table="group_roles",
+ keyvalues={"group_id": group_id, "role_id": role_id},
+ retcols=("is_public", "profile"),
+ desc="get_group_role",
+ )
+
+ role["profile"] = db_to_json(role["profile"])
+
+ return role
+
+ def get_local_groups_for_room(self, room_id):
+ """Get all of the local group that contain a given room
+ Args:
+ room_id (str): The ID of a room
+ Returns:
+ Deferred[list[str]]: A twisted.Deferred containing a list of group ids
+ containing this room
+ """
+ return self.db_pool.simple_select_onecol(
+ table="group_rooms",
+ keyvalues={"room_id": room_id},
+ retcol="group_id",
+ desc="get_local_groups_for_room",
+ )
+
+ def get_users_for_summary_by_role(self, group_id, include_private=False):
+ """Get the users and roles that should be included in a summary request
+
+ Returns ([users], [roles])
+ """
+
+ def _get_users_for_summary_txn(txn):
+ keyvalues = {"group_id": group_id}
+ if not include_private:
+ keyvalues["is_public"] = True
+
+ sql = """
+ SELECT user_id, is_public, role_id, user_order
+ FROM group_summary_users
+ WHERE group_id = ?
+ """
+
+ if not include_private:
+ sql += " AND is_public = ?"
+ txn.execute(sql, (group_id, True))
+ else:
+ txn.execute(sql, (group_id,))
+
+ users = [
+ {
+ "user_id": row[0],
+ "is_public": row[1],
+ "role_id": row[2] if row[2] != _DEFAULT_ROLE_ID else None,
+ "order": row[3],
+ }
+ for row in txn
+ ]
+
+ sql = """
+ SELECT role_id, is_public, profile, role_order
+ FROM group_summary_roles
+ INNER JOIN group_roles USING (group_id, role_id)
+ WHERE group_id = ?
+ """
+
+ if not include_private:
+ sql += " AND is_public = ?"
+ txn.execute(sql, (group_id, True))
+ else:
+ txn.execute(sql, (group_id,))
+
+ roles = {
+ row[0]: {
+ "is_public": row[1],
+ "profile": db_to_json(row[2]),
+ "order": row[3],
+ }
+ for row in txn
+ }
+
+ return users, roles
+
+ return self.db_pool.runInteraction(
+ "get_users_for_summary_by_role", _get_users_for_summary_txn
+ )
+
+ def is_user_in_group(self, user_id, group_id):
+ return self.db_pool.simple_select_one_onecol(
+ table="group_users",
+ keyvalues={"group_id": group_id, "user_id": user_id},
+ retcol="user_id",
+ allow_none=True,
+ desc="is_user_in_group",
+ ).addCallback(lambda r: bool(r))
+
+ def is_user_admin_in_group(self, group_id, user_id):
+ return self.db_pool.simple_select_one_onecol(
+ table="group_users",
+ keyvalues={"group_id": group_id, "user_id": user_id},
+ retcol="is_admin",
+ allow_none=True,
+ desc="is_user_admin_in_group",
+ )
+
+ def is_user_invited_to_local_group(self, group_id, user_id):
+ """Has the group server invited a user?
+ """
+ return self.db_pool.simple_select_one_onecol(
+ table="group_invites",
+ keyvalues={"group_id": group_id, "user_id": user_id},
+ retcol="user_id",
+ desc="is_user_invited_to_local_group",
+ allow_none=True,
+ )
+
+ def get_users_membership_info_in_group(self, group_id, user_id):
+ """Get a dict describing the membership of a user in a group.
+
+ Example if joined:
+
+ {
+ "membership": "join",
+ "is_public": True,
+ "is_privileged": False,
+ }
+
+ Returns an empty dict if the user is not join/invite/etc
+ """
+
+ def _get_users_membership_in_group_txn(txn):
+ row = self.db_pool.simple_select_one_txn(
+ txn,
+ table="group_users",
+ keyvalues={"group_id": group_id, "user_id": user_id},
+ retcols=("is_admin", "is_public"),
+ allow_none=True,
+ )
+
+ if row:
+ return {
+ "membership": "join",
+ "is_public": row["is_public"],
+ "is_privileged": row["is_admin"],
+ }
+
+ row = self.db_pool.simple_select_one_onecol_txn(
+ txn,
+ table="group_invites",
+ keyvalues={"group_id": group_id, "user_id": user_id},
+ retcol="user_id",
+ allow_none=True,
+ )
+
+ if row:
+ return {"membership": "invite"}
+
+ return {}
+
+ return self.db_pool.runInteraction(
+ "get_users_membership_info_in_group", _get_users_membership_in_group_txn
+ )
+
+ def get_publicised_groups_for_user(self, user_id):
+ """Get all groups a user is publicising
+ """
+ return self.db_pool.simple_select_onecol(
+ table="local_group_membership",
+ keyvalues={"user_id": user_id, "membership": "join", "is_publicised": True},
+ retcol="group_id",
+ desc="get_publicised_groups_for_user",
+ )
+
+ def get_attestations_need_renewals(self, valid_until_ms):
+ """Get all attestations that need to be renewed until givent time
+ """
+
+ def _get_attestations_need_renewals_txn(txn):
+ sql = """
+ SELECT group_id, user_id FROM group_attestations_renewals
+ WHERE valid_until_ms <= ?
+ """
+ txn.execute(sql, (valid_until_ms,))
+ return self.db_pool.cursor_to_dict(txn)
+
+ return self.db_pool.runInteraction(
+ "get_attestations_need_renewals", _get_attestations_need_renewals_txn
+ )
+
+ async def get_remote_attestation(self, group_id, user_id):
+ """Get the attestation that proves the remote agrees that the user is
+ in the group.
+ """
+ row = await self.db_pool.simple_select_one(
+ table="group_attestations_remote",
+ keyvalues={"group_id": group_id, "user_id": user_id},
+ retcols=("valid_until_ms", "attestation_json"),
+ desc="get_remote_attestation",
+ allow_none=True,
+ )
+
+ now = int(self._clock.time_msec())
+ if row and now < row["valid_until_ms"]:
+ return db_to_json(row["attestation_json"])
+
+ return None
+
+ def get_joined_groups(self, user_id):
+ return self.db_pool.simple_select_onecol(
+ table="local_group_membership",
+ keyvalues={"user_id": user_id, "membership": "join"},
+ retcol="group_id",
+ desc="get_joined_groups",
+ )
+
+ def get_all_groups_for_user(self, user_id, now_token):
+ def _get_all_groups_for_user_txn(txn):
+ sql = """
+ SELECT group_id, type, membership, u.content
+ FROM local_group_updates AS u
+ INNER JOIN local_group_membership USING (group_id, user_id)
+ WHERE user_id = ? AND membership != 'leave'
+ AND stream_id <= ?
+ """
+ txn.execute(sql, (user_id, now_token))
+ return [
+ {
+ "group_id": row[0],
+ "type": row[1],
+ "membership": row[2],
+ "content": db_to_json(row[3]),
+ }
+ for row in txn
+ ]
+
+ return self.db_pool.runInteraction(
+ "get_all_groups_for_user", _get_all_groups_for_user_txn
+ )
+
+ async def get_groups_changes_for_user(self, user_id, from_token, to_token):
+ from_token = int(from_token)
+ has_changed = self._group_updates_stream_cache.has_entity_changed(
+ user_id, from_token
+ )
+ if not has_changed:
+ return []
+
+ def _get_groups_changes_for_user_txn(txn):
+ sql = """
+ SELECT group_id, membership, type, u.content
+ FROM local_group_updates AS u
+ INNER JOIN local_group_membership USING (group_id, user_id)
+ WHERE user_id = ? AND ? < stream_id AND stream_id <= ?
+ """
+ txn.execute(sql, (user_id, from_token, to_token))
+ return [
+ {
+ "group_id": group_id,
+ "membership": membership,
+ "type": gtype,
+ "content": db_to_json(content_json),
+ }
+ for group_id, membership, gtype, content_json in txn
+ ]
+
+ return await self.db_pool.runInteraction(
+ "get_groups_changes_for_user", _get_groups_changes_for_user_txn
+ )
+
+ async def get_all_groups_changes(
+ self, instance_name: str, last_id: int, current_id: int, limit: int
+ ) -> Tuple[List[Tuple[int, tuple]], int, bool]:
+ """Get updates for groups replication stream.
+
+ Args:
+ instance_name: The writer we want to fetch updates from. Unused
+ here since there is only ever one writer.
+ last_id: The token to fetch updates from. Exclusive.
+ current_id: The token to fetch updates up to. Inclusive.
+ limit: The requested limit for the number of rows to return. The
+ function may return more or fewer rows.
+
+ Returns:
+ A tuple consisting of: the updates, a token to use to fetch
+ subsequent updates, and whether we returned fewer rows than exists
+ between the requested tokens due to the limit.
+
+ The token returned can be used in a subsequent call to this
+ function to get further updatees.
+
+ The updates are a list of 2-tuples of stream ID and the row data
+ """
+
+ last_id = int(last_id)
+ has_changed = self._group_updates_stream_cache.has_any_entity_changed(last_id)
+
+ if not has_changed:
+ return [], current_id, False
+
+ def _get_all_groups_changes_txn(txn):
+ sql = """
+ SELECT stream_id, group_id, user_id, type, content
+ FROM local_group_updates
+ WHERE ? < stream_id AND stream_id <= ?
+ LIMIT ?
+ """
+ txn.execute(sql, (last_id, current_id, limit))
+ updates = [
+ (stream_id, (group_id, user_id, gtype, db_to_json(content_json)))
+ for stream_id, group_id, user_id, gtype, content_json in txn
+ ]
+
+ limited = False
+ upto_token = current_id
+ if len(updates) >= limit:
+ upto_token = updates[-1][0]
+ limited = True
+
+ return updates, upto_token, limited
+
+ return await self.db_pool.runInteraction(
+ "get_all_groups_changes", _get_all_groups_changes_txn
+ )
+
+
+class GroupServerStore(GroupServerWorkerStore):
+ def set_group_join_policy(self, group_id, join_policy):
+ """Set the join policy of a group.
+
+ join_policy can be one of:
+ * "invite"
+ * "open"
+ """
+ return self.db_pool.simple_update_one(
+ table="groups",
+ keyvalues={"group_id": group_id},
+ updatevalues={"join_policy": join_policy},
+ desc="set_group_join_policy",
+ )
+
+ def add_room_to_summary(self, group_id, room_id, category_id, order, is_public):
+ return self.db_pool.runInteraction(
+ "add_room_to_summary",
+ self._add_room_to_summary_txn,
+ group_id,
+ room_id,
+ category_id,
+ order,
+ is_public,
+ )
+
+ def _add_room_to_summary_txn(
+ self, txn, group_id, room_id, category_id, order, is_public
+ ):
+ """Add (or update) room's entry in summary.
+
+ Args:
+ group_id (str)
+ room_id (str)
+ category_id (str): If not None then adds the category to the end of
+ the summary if its not already there. [Optional]
+ order (int): If not None inserts the room at that position, e.g.
+ an order of 1 will put the room first. Otherwise, the room gets
+ added to the end.
+ """
+ room_in_group = self.db_pool.simple_select_one_onecol_txn(
+ txn,
+ table="group_rooms",
+ keyvalues={"group_id": group_id, "room_id": room_id},
+ retcol="room_id",
+ allow_none=True,
+ )
+ if not room_in_group:
+ raise SynapseError(400, "room not in group")
+
+ if category_id is None:
+ category_id = _DEFAULT_CATEGORY_ID
+ else:
+ cat_exists = self.db_pool.simple_select_one_onecol_txn(
+ txn,
+ table="group_room_categories",
+ keyvalues={"group_id": group_id, "category_id": category_id},
+ retcol="group_id",
+ allow_none=True,
+ )
+ if not cat_exists:
+ raise SynapseError(400, "Category doesn't exist")
+
+ # TODO: Check category is part of summary already
+ cat_exists = self.db_pool.simple_select_one_onecol_txn(
+ txn,
+ table="group_summary_room_categories",
+ keyvalues={"group_id": group_id, "category_id": category_id},
+ retcol="group_id",
+ allow_none=True,
+ )
+ if not cat_exists:
+ # If not, add it with an order larger than all others
+ txn.execute(
+ """
+ INSERT INTO group_summary_room_categories
+ (group_id, category_id, cat_order)
+ SELECT ?, ?, COALESCE(MAX(cat_order), 0) + 1
+ FROM group_summary_room_categories
+ WHERE group_id = ? AND category_id = ?
+ """,
+ (group_id, category_id, group_id, category_id),
+ )
+
+ existing = self.db_pool.simple_select_one_txn(
+ txn,
+ table="group_summary_rooms",
+ keyvalues={
+ "group_id": group_id,
+ "room_id": room_id,
+ "category_id": category_id,
+ },
+ retcols=("room_order", "is_public"),
+ allow_none=True,
+ )
+
+ if order is not None:
+ # Shuffle other room orders that come after the given order
+ sql = """
+ UPDATE group_summary_rooms SET room_order = room_order + 1
+ WHERE group_id = ? AND category_id = ? AND room_order >= ?
+ """
+ txn.execute(sql, (group_id, category_id, order))
+ elif not existing:
+ sql = """
+ SELECT COALESCE(MAX(room_order), 0) + 1 FROM group_summary_rooms
+ WHERE group_id = ? AND category_id = ?
+ """
+ txn.execute(sql, (group_id, category_id))
+ (order,) = txn.fetchone()
+
+ if existing:
+ to_update = {}
+ if order is not None:
+ to_update["room_order"] = order
+ if is_public is not None:
+ to_update["is_public"] = is_public
+ self.db_pool.simple_update_txn(
+ txn,
+ table="group_summary_rooms",
+ keyvalues={
+ "group_id": group_id,
+ "category_id": category_id,
+ "room_id": room_id,
+ },
+ values=to_update,
+ )
+ else:
+ if is_public is None:
+ is_public = True
+
+ self.db_pool.simple_insert_txn(
+ txn,
+ table="group_summary_rooms",
+ values={
+ "group_id": group_id,
+ "category_id": category_id,
+ "room_id": room_id,
+ "room_order": order,
+ "is_public": is_public,
+ },
+ )
+
+ def remove_room_from_summary(self, group_id, room_id, category_id):
+ if category_id is None:
+ category_id = _DEFAULT_CATEGORY_ID
+
+ return self.db_pool.simple_delete(
+ table="group_summary_rooms",
+ keyvalues={
+ "group_id": group_id,
+ "category_id": category_id,
+ "room_id": room_id,
+ },
+ desc="remove_room_from_summary",
+ )
+
+ def upsert_group_category(self, group_id, category_id, profile, is_public):
+ """Add/update room category for group
+ """
+ insertion_values = {}
+ update_values = {"category_id": category_id} # This cannot be empty
+
+ if profile is None:
+ insertion_values["profile"] = "{}"
+ else:
+ update_values["profile"] = json_encoder.encode(profile)
+
+ if is_public is None:
+ insertion_values["is_public"] = True
+ else:
+ update_values["is_public"] = is_public
+
+ return self.db_pool.simple_upsert(
+ table="group_room_categories",
+ keyvalues={"group_id": group_id, "category_id": category_id},
+ values=update_values,
+ insertion_values=insertion_values,
+ desc="upsert_group_category",
+ )
+
+ def remove_group_category(self, group_id, category_id):
+ return self.db_pool.simple_delete(
+ table="group_room_categories",
+ keyvalues={"group_id": group_id, "category_id": category_id},
+ desc="remove_group_category",
+ )
+
+ def upsert_group_role(self, group_id, role_id, profile, is_public):
+ """Add/remove user role
+ """
+ insertion_values = {}
+ update_values = {"role_id": role_id} # This cannot be empty
+
+ if profile is None:
+ insertion_values["profile"] = "{}"
+ else:
+ update_values["profile"] = json_encoder.encode(profile)
+
+ if is_public is None:
+ insertion_values["is_public"] = True
+ else:
+ update_values["is_public"] = is_public
+
+ return self.db_pool.simple_upsert(
+ table="group_roles",
+ keyvalues={"group_id": group_id, "role_id": role_id},
+ values=update_values,
+ insertion_values=insertion_values,
+ desc="upsert_group_role",
+ )
+
+ def remove_group_role(self, group_id, role_id):
+ return self.db_pool.simple_delete(
+ table="group_roles",
+ keyvalues={"group_id": group_id, "role_id": role_id},
+ desc="remove_group_role",
+ )
+
+ def add_user_to_summary(self, group_id, user_id, role_id, order, is_public):
+ return self.db_pool.runInteraction(
+ "add_user_to_summary",
+ self._add_user_to_summary_txn,
+ group_id,
+ user_id,
+ role_id,
+ order,
+ is_public,
+ )
+
+ def _add_user_to_summary_txn(
+ self, txn, group_id, user_id, role_id, order, is_public
+ ):
+ """Add (or update) user's entry in summary.
+
+ Args:
+ group_id (str)
+ user_id (str)
+ role_id (str): If not None then adds the role to the end of
+ the summary if its not already there. [Optional]
+ order (int): If not None inserts the user at that position, e.g.
+ an order of 1 will put the user first. Otherwise, the user gets
+ added to the end.
+ """
+ user_in_group = self.db_pool.simple_select_one_onecol_txn(
+ txn,
+ table="group_users",
+ keyvalues={"group_id": group_id, "user_id": user_id},
+ retcol="user_id",
+ allow_none=True,
+ )
+ if not user_in_group:
+ raise SynapseError(400, "user not in group")
+
+ if role_id is None:
+ role_id = _DEFAULT_ROLE_ID
+ else:
+ role_exists = self.db_pool.simple_select_one_onecol_txn(
+ txn,
+ table="group_roles",
+ keyvalues={"group_id": group_id, "role_id": role_id},
+ retcol="group_id",
+ allow_none=True,
+ )
+ if not role_exists:
+ raise SynapseError(400, "Role doesn't exist")
+
+ # TODO: Check role is part of the summary already
+ role_exists = self.db_pool.simple_select_one_onecol_txn(
+ txn,
+ table="group_summary_roles",
+ keyvalues={"group_id": group_id, "role_id": role_id},
+ retcol="group_id",
+ allow_none=True,
+ )
+ if not role_exists:
+ # If not, add it with an order larger than all others
+ txn.execute(
+ """
+ INSERT INTO group_summary_roles
+ (group_id, role_id, role_order)
+ SELECT ?, ?, COALESCE(MAX(role_order), 0) + 1
+ FROM group_summary_roles
+ WHERE group_id = ? AND role_id = ?
+ """,
+ (group_id, role_id, group_id, role_id),
+ )
+
+ existing = self.db_pool.simple_select_one_txn(
+ txn,
+ table="group_summary_users",
+ keyvalues={"group_id": group_id, "user_id": user_id, "role_id": role_id},
+ retcols=("user_order", "is_public"),
+ allow_none=True,
+ )
+
+ if order is not None:
+ # Shuffle other users orders that come after the given order
+ sql = """
+ UPDATE group_summary_users SET user_order = user_order + 1
+ WHERE group_id = ? AND role_id = ? AND user_order >= ?
+ """
+ txn.execute(sql, (group_id, role_id, order))
+ elif not existing:
+ sql = """
+ SELECT COALESCE(MAX(user_order), 0) + 1 FROM group_summary_users
+ WHERE group_id = ? AND role_id = ?
+ """
+ txn.execute(sql, (group_id, role_id))
+ (order,) = txn.fetchone()
+
+ if existing:
+ to_update = {}
+ if order is not None:
+ to_update["user_order"] = order
+ if is_public is not None:
+ to_update["is_public"] = is_public
+ self.db_pool.simple_update_txn(
+ txn,
+ table="group_summary_users",
+ keyvalues={
+ "group_id": group_id,
+ "role_id": role_id,
+ "user_id": user_id,
+ },
+ values=to_update,
+ )
+ else:
+ if is_public is None:
+ is_public = True
+
+ self.db_pool.simple_insert_txn(
+ txn,
+ table="group_summary_users",
+ values={
+ "group_id": group_id,
+ "role_id": role_id,
+ "user_id": user_id,
+ "user_order": order,
+ "is_public": is_public,
+ },
+ )
+
+ def remove_user_from_summary(self, group_id, user_id, role_id):
+ if role_id is None:
+ role_id = _DEFAULT_ROLE_ID
+
+ return self.db_pool.simple_delete(
+ table="group_summary_users",
+ keyvalues={"group_id": group_id, "role_id": role_id, "user_id": user_id},
+ desc="remove_user_from_summary",
+ )
+
+ def add_group_invite(self, group_id, user_id):
+ """Record that the group server has invited a user
+ """
+ return self.db_pool.simple_insert(
+ table="group_invites",
+ values={"group_id": group_id, "user_id": user_id},
+ desc="add_group_invite",
+ )
+
+ def add_user_to_group(
+ self,
+ group_id,
+ user_id,
+ is_admin=False,
+ is_public=True,
+ local_attestation=None,
+ remote_attestation=None,
+ ):
+ """Add a user to the group server.
+
+ Args:
+ group_id (str)
+ user_id (str)
+ is_admin (bool)
+ is_public (bool)
+ local_attestation (dict): The attestation the GS created to give
+ to the remote server. Optional if the user and group are on the
+ same server
+ remote_attestation (dict): The attestation given to GS by remote
+ server. Optional if the user and group are on the same server
+ """
+
+ def _add_user_to_group_txn(txn):
+ self.db_pool.simple_insert_txn(
+ txn,
+ table="group_users",
+ values={
+ "group_id": group_id,
+ "user_id": user_id,
+ "is_admin": is_admin,
+ "is_public": is_public,
+ },
+ )
+
+ self.db_pool.simple_delete_txn(
+ txn,
+ table="group_invites",
+ keyvalues={"group_id": group_id, "user_id": user_id},
+ )
+
+ if local_attestation:
+ self.db_pool.simple_insert_txn(
+ txn,
+ table="group_attestations_renewals",
+ values={
+ "group_id": group_id,
+ "user_id": user_id,
+ "valid_until_ms": local_attestation["valid_until_ms"],
+ },
+ )
+ if remote_attestation:
+ self.db_pool.simple_insert_txn(
+ txn,
+ table="group_attestations_remote",
+ values={
+ "group_id": group_id,
+ "user_id": user_id,
+ "valid_until_ms": remote_attestation["valid_until_ms"],
+ "attestation_json": json_encoder.encode(remote_attestation),
+ },
+ )
+
+ return self.db_pool.runInteraction("add_user_to_group", _add_user_to_group_txn)
+
+ def remove_user_from_group(self, group_id, user_id):
+ def _remove_user_from_group_txn(txn):
+ self.db_pool.simple_delete_txn(
+ txn,
+ table="group_users",
+ keyvalues={"group_id": group_id, "user_id": user_id},
+ )
+ self.db_pool.simple_delete_txn(
+ txn,
+ table="group_invites",
+ keyvalues={"group_id": group_id, "user_id": user_id},
+ )
+ self.db_pool.simple_delete_txn(
+ txn,
+ table="group_attestations_renewals",
+ keyvalues={"group_id": group_id, "user_id": user_id},
+ )
+ self.db_pool.simple_delete_txn(
+ txn,
+ table="group_attestations_remote",
+ keyvalues={"group_id": group_id, "user_id": user_id},
+ )
+ self.db_pool.simple_delete_txn(
+ txn,
+ table="group_summary_users",
+ keyvalues={"group_id": group_id, "user_id": user_id},
+ )
+
+ return self.db_pool.runInteraction(
+ "remove_user_from_group", _remove_user_from_group_txn
+ )
+
+ def add_room_to_group(self, group_id, room_id, is_public):
+ return self.db_pool.simple_insert(
+ table="group_rooms",
+ values={"group_id": group_id, "room_id": room_id, "is_public": is_public},
+ desc="add_room_to_group",
+ )
+
+ def update_room_in_group_visibility(self, group_id, room_id, is_public):
+ return self.db_pool.simple_update(
+ table="group_rooms",
+ keyvalues={"group_id": group_id, "room_id": room_id},
+ updatevalues={"is_public": is_public},
+ desc="update_room_in_group_visibility",
+ )
+
+ def remove_room_from_group(self, group_id, room_id):
+ def _remove_room_from_group_txn(txn):
+ self.db_pool.simple_delete_txn(
+ txn,
+ table="group_rooms",
+ keyvalues={"group_id": group_id, "room_id": room_id},
+ )
+
+ self.db_pool.simple_delete_txn(
+ txn,
+ table="group_summary_rooms",
+ keyvalues={"group_id": group_id, "room_id": room_id},
+ )
+
+ return self.db_pool.runInteraction(
+ "remove_room_from_group", _remove_room_from_group_txn
+ )
+
+ def update_group_publicity(self, group_id, user_id, publicise):
+ """Update whether the user is publicising their membership of the group
+ """
+ return self.db_pool.simple_update_one(
+ table="local_group_membership",
+ keyvalues={"group_id": group_id, "user_id": user_id},
+ updatevalues={"is_publicised": publicise},
+ desc="update_group_publicity",
+ )
+
+ async def register_user_group_membership(
+ self,
+ group_id: str,
+ user_id: str,
+ membership: str,
+ is_admin: bool = False,
+ content: JsonDict = {},
+ local_attestation: Optional[dict] = None,
+ remote_attestation: Optional[dict] = None,
+ is_publicised: bool = False,
+ ) -> int:
+ """Registers that a local user is a member of a (local or remote) group.
+
+ Args:
+ group_id: The group the member is being added to.
+ user_id: THe user ID to add to the group.
+ membership: The type of group membership.
+ is_admin: Whether the user should be added as a group admin.
+ content: Content of the membership, e.g. includes the inviter
+ if the user has been invited.
+ local_attestation: If remote group then store the fact that we
+ have given out an attestation, else None.
+ remote_attestation: If remote group then store the remote
+ attestation from the group, else None.
+ is_publicised: Whether this should be publicised.
+ """
+
+ def _register_user_group_membership_txn(txn, next_id):
+ # TODO: Upsert?
+ self.db_pool.simple_delete_txn(
+ txn,
+ table="local_group_membership",
+ keyvalues={"group_id": group_id, "user_id": user_id},
+ )
+ self.db_pool.simple_insert_txn(
+ txn,
+ table="local_group_membership",
+ values={
+ "group_id": group_id,
+ "user_id": user_id,
+ "is_admin": is_admin,
+ "membership": membership,
+ "is_publicised": is_publicised,
+ "content": json_encoder.encode(content),
+ },
+ )
+
+ self.db_pool.simple_insert_txn(
+ txn,
+ table="local_group_updates",
+ values={
+ "stream_id": next_id,
+ "group_id": group_id,
+ "user_id": user_id,
+ "type": "membership",
+ "content": json_encoder.encode(
+ {"membership": membership, "content": content}
+ ),
+ },
+ )
+ self._group_updates_stream_cache.entity_has_changed(user_id, next_id)
+
+ # TODO: Insert profile to ensure it comes down stream if its a join.
+
+ if membership == "join":
+ if local_attestation:
+ self.db_pool.simple_insert_txn(
+ txn,
+ table="group_attestations_renewals",
+ values={
+ "group_id": group_id,
+ "user_id": user_id,
+ "valid_until_ms": local_attestation["valid_until_ms"],
+ },
+ )
+ if remote_attestation:
+ self.db_pool.simple_insert_txn(
+ txn,
+ table="group_attestations_remote",
+ values={
+ "group_id": group_id,
+ "user_id": user_id,
+ "valid_until_ms": remote_attestation["valid_until_ms"],
+ "attestation_json": json_encoder.encode(remote_attestation),
+ },
+ )
+ else:
+ self.db_pool.simple_delete_txn(
+ txn,
+ table="group_attestations_renewals",
+ keyvalues={"group_id": group_id, "user_id": user_id},
+ )
+ self.db_pool.simple_delete_txn(
+ txn,
+ table="group_attestations_remote",
+ keyvalues={"group_id": group_id, "user_id": user_id},
+ )
+
+ return next_id
+
+ with self._group_updates_id_gen.get_next() as next_id:
+ res = await self.db_pool.runInteraction(
+ "register_user_group_membership",
+ _register_user_group_membership_txn,
+ next_id,
+ )
+ return res
+
+ async def create_group(
+ self, group_id, user_id, name, avatar_url, short_description, long_description
+ ) -> None:
+ await self.db_pool.simple_insert(
+ table="groups",
+ values={
+ "group_id": group_id,
+ "name": name,
+ "avatar_url": avatar_url,
+ "short_description": short_description,
+ "long_description": long_description,
+ "is_public": True,
+ },
+ desc="create_group",
+ )
+
+ async def update_group_profile(self, group_id, profile):
+ await self.db_pool.simple_update_one(
+ table="groups",
+ keyvalues={"group_id": group_id},
+ updatevalues=profile,
+ desc="update_group_profile",
+ )
+
+ def update_attestation_renewal(self, group_id, user_id, attestation):
+ """Update an attestation that we have renewed
+ """
+ return self.db_pool.simple_update_one(
+ table="group_attestations_renewals",
+ keyvalues={"group_id": group_id, "user_id": user_id},
+ updatevalues={"valid_until_ms": attestation["valid_until_ms"]},
+ desc="update_attestation_renewal",
+ )
+
+ def update_remote_attestion(self, group_id, user_id, attestation):
+ """Update an attestation that a remote has renewed
+ """
+ return self.db_pool.simple_update_one(
+ table="group_attestations_remote",
+ keyvalues={"group_id": group_id, "user_id": user_id},
+ updatevalues={
+ "valid_until_ms": attestation["valid_until_ms"],
+ "attestation_json": json_encoder.encode(attestation),
+ },
+ desc="update_remote_attestion",
+ )
+
+ def remove_attestation_renewal(self, group_id, user_id):
+ """Remove an attestation that we thought we should renew, but actually
+ shouldn't. Ideally this would never get called as we would never
+ incorrectly try and do attestations for local users on local groups.
+
+ Args:
+ group_id (str)
+ user_id (str)
+ """
+ return self.db_pool.simple_delete(
+ table="group_attestations_renewals",
+ keyvalues={"group_id": group_id, "user_id": user_id},
+ desc="remove_attestation_renewal",
+ )
+
+ def get_group_stream_token(self):
+ return self._group_updates_id_gen.get_current_token()
+
+ def delete_group(self, group_id):
+ """Deletes a group fully from the database.
+
+ Args:
+ group_id (str)
+
+ Returns:
+ Deferred
+ """
+
+ def _delete_group_txn(txn):
+ tables = [
+ "groups",
+ "group_users",
+ "group_invites",
+ "group_rooms",
+ "group_summary_rooms",
+ "group_summary_room_categories",
+ "group_room_categories",
+ "group_summary_users",
+ "group_summary_roles",
+ "group_roles",
+ "group_attestations_renewals",
+ "group_attestations_remote",
+ ]
+
+ for table in tables:
+ self.db_pool.simple_delete_txn(
+ txn, table=table, keyvalues={"group_id": group_id}
+ )
+
+ return self.db_pool.runInteraction("delete_group", _delete_group_txn)
diff --git a/synapse/storage/databases/main/keys.py b/synapse/storage/databases/main/keys.py
new file mode 100644
index 00000000..384e9c5e
--- /dev/null
+++ b/synapse/storage/databases/main/keys.py
@@ -0,0 +1,210 @@
+# -*- coding: utf-8 -*-
+# Copyright 2014-2016 OpenMarket Ltd
+# Copyright 2019 New Vector Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import itertools
+import logging
+
+from signedjson.key import decode_verify_key_bytes
+
+from synapse.storage._base import SQLBaseStore
+from synapse.storage.keys import FetchKeyResult
+from synapse.util.caches.descriptors import cached, cachedList
+from synapse.util.iterutils import batch_iter
+
+logger = logging.getLogger(__name__)
+
+
+db_binary_type = memoryview
+
+
+class KeyStore(SQLBaseStore):
+ """Persistence for signature verification keys
+ """
+
+ @cached()
+ def _get_server_verify_key(self, server_name_and_key_id):
+ raise NotImplementedError()
+
+ @cachedList(
+ cached_method_name="_get_server_verify_key", list_name="server_name_and_key_ids"
+ )
+ def get_server_verify_keys(self, server_name_and_key_ids):
+ """
+ Args:
+ server_name_and_key_ids (iterable[Tuple[str, str]]):
+ iterable of (server_name, key-id) tuples to fetch keys for
+
+ Returns:
+ Deferred: resolves to dict[Tuple[str, str], FetchKeyResult|None]:
+ map from (server_name, key_id) -> FetchKeyResult, or None if the key is
+ unknown
+ """
+ keys = {}
+
+ def _get_keys(txn, batch):
+ """Processes a batch of keys to fetch, and adds the result to `keys`."""
+
+ # batch_iter always returns tuples so it's safe to do len(batch)
+ sql = (
+ "SELECT server_name, key_id, verify_key, ts_valid_until_ms "
+ "FROM server_signature_keys WHERE 1=0"
+ ) + " OR (server_name=? AND key_id=?)" * len(batch)
+
+ txn.execute(sql, tuple(itertools.chain.from_iterable(batch)))
+
+ for row in txn:
+ server_name, key_id, key_bytes, ts_valid_until_ms = row
+
+ if ts_valid_until_ms is None:
+ # Old keys may be stored with a ts_valid_until_ms of null,
+ # in which case we treat this as if it was set to `0`, i.e.
+ # it won't match key requests that define a minimum
+ # `ts_valid_until_ms`.
+ ts_valid_until_ms = 0
+
+ res = FetchKeyResult(
+ verify_key=decode_verify_key_bytes(key_id, bytes(key_bytes)),
+ valid_until_ts=ts_valid_until_ms,
+ )
+ keys[(server_name, key_id)] = res
+
+ def _txn(txn):
+ for batch in batch_iter(server_name_and_key_ids, 50):
+ _get_keys(txn, batch)
+ return keys
+
+ return self.db_pool.runInteraction("get_server_verify_keys", _txn)
+
+ def store_server_verify_keys(self, from_server, ts_added_ms, verify_keys):
+ """Stores NACL verification keys for remote servers.
+ Args:
+ from_server (str): Where the verification keys were looked up
+ ts_added_ms (int): The time to record that the key was added
+ verify_keys (iterable[tuple[str, str, FetchKeyResult]]):
+ keys to be stored. Each entry is a triplet of
+ (server_name, key_id, key).
+ """
+ key_values = []
+ value_values = []
+ invalidations = []
+ for server_name, key_id, fetch_result in verify_keys:
+ key_values.append((server_name, key_id))
+ value_values.append(
+ (
+ from_server,
+ ts_added_ms,
+ fetch_result.valid_until_ts,
+ db_binary_type(fetch_result.verify_key.encode()),
+ )
+ )
+ # invalidate takes a tuple corresponding to the params of
+ # _get_server_verify_key. _get_server_verify_key only takes one
+ # param, which is itself the 2-tuple (server_name, key_id).
+ invalidations.append((server_name, key_id))
+
+ def _invalidate(res):
+ f = self._get_server_verify_key.invalidate
+ for i in invalidations:
+ f((i,))
+ return res
+
+ return self.db_pool.runInteraction(
+ "store_server_verify_keys",
+ self.db_pool.simple_upsert_many_txn,
+ table="server_signature_keys",
+ key_names=("server_name", "key_id"),
+ key_values=key_values,
+ value_names=(
+ "from_server",
+ "ts_added_ms",
+ "ts_valid_until_ms",
+ "verify_key",
+ ),
+ value_values=value_values,
+ ).addCallback(_invalidate)
+
+ def store_server_keys_json(
+ self, server_name, key_id, from_server, ts_now_ms, ts_expires_ms, key_json_bytes
+ ):
+ """Stores the JSON bytes for a set of keys from a server
+ The JSON should be signed by the originating server, the intermediate
+ server, and by this server. Updates the value for the
+ (server_name, key_id, from_server) triplet if one already existed.
+ Args:
+ server_name (str): The name of the server.
+ key_id (str): The identifer of the key this JSON is for.
+ from_server (str): The server this JSON was fetched from.
+ ts_now_ms (int): The time now in milliseconds.
+ ts_valid_until_ms (int): The time when this json stops being valid.
+ key_json (bytes): The encoded JSON.
+ """
+ return self.db_pool.simple_upsert(
+ table="server_keys_json",
+ keyvalues={
+ "server_name": server_name,
+ "key_id": key_id,
+ "from_server": from_server,
+ },
+ values={
+ "server_name": server_name,
+ "key_id": key_id,
+ "from_server": from_server,
+ "ts_added_ms": ts_now_ms,
+ "ts_valid_until_ms": ts_expires_ms,
+ "key_json": db_binary_type(key_json_bytes),
+ },
+ desc="store_server_keys_json",
+ )
+
+ def get_server_keys_json(self, server_keys):
+ """Retrive the key json for a list of server_keys and key ids.
+ If no keys are found for a given server, key_id and source then
+ that server, key_id, and source triplet entry will be an empty list.
+ The JSON is returned as a byte array so that it can be efficiently
+ used in an HTTP response.
+ Args:
+ server_keys (list): List of (server_name, key_id, source) triplets.
+ Returns:
+ Deferred[dict[Tuple[str, str, str|None], list[dict]]]:
+ Dict mapping (server_name, key_id, source) triplets to lists of dicts
+ """
+
+ def _get_server_keys_json_txn(txn):
+ results = {}
+ for server_name, key_id, from_server in server_keys:
+ keyvalues = {"server_name": server_name}
+ if key_id is not None:
+ keyvalues["key_id"] = key_id
+ if from_server is not None:
+ keyvalues["from_server"] = from_server
+ rows = self.db_pool.simple_select_list_txn(
+ txn,
+ "server_keys_json",
+ keyvalues=keyvalues,
+ retcols=(
+ "key_id",
+ "from_server",
+ "ts_added_ms",
+ "ts_valid_until_ms",
+ "key_json",
+ ),
+ )
+ results[(server_name, key_id, from_server)] = rows
+ return results
+
+ return self.db_pool.runInteraction(
+ "get_server_keys_json", _get_server_keys_json_txn
+ )
diff --git a/synapse/storage/databases/main/media_repository.py b/synapse/storage/databases/main/media_repository.py
new file mode 100644
index 00000000..80fc1cd0
--- /dev/null
+++ b/synapse/storage/databases/main/media_repository.py
@@ -0,0 +1,398 @@
+# -*- coding: utf-8 -*-
+# Copyright 2014-2016 OpenMarket Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from synapse.storage._base import SQLBaseStore
+from synapse.storage.database import DatabasePool
+
+
+class MediaRepositoryBackgroundUpdateStore(SQLBaseStore):
+ def __init__(self, database: DatabasePool, db_conn, hs):
+ super(MediaRepositoryBackgroundUpdateStore, self).__init__(
+ database, db_conn, hs
+ )
+
+ self.db_pool.updates.register_background_index_update(
+ update_name="local_media_repository_url_idx",
+ index_name="local_media_repository_url_idx",
+ table="local_media_repository",
+ columns=["created_ts"],
+ where_clause="url_cache IS NOT NULL",
+ )
+
+
+class MediaRepositoryStore(MediaRepositoryBackgroundUpdateStore):
+ """Persistence for attachments and avatars"""
+
+ def __init__(self, database: DatabasePool, db_conn, hs):
+ super(MediaRepositoryStore, self).__init__(database, db_conn, hs)
+
+ def get_local_media(self, media_id):
+ """Get the metadata for a local piece of media
+ Returns:
+ None if the media_id doesn't exist.
+ """
+ return self.db_pool.simple_select_one(
+ "local_media_repository",
+ {"media_id": media_id},
+ (
+ "media_type",
+ "media_length",
+ "upload_name",
+ "created_ts",
+ "quarantined_by",
+ "url_cache",
+ ),
+ allow_none=True,
+ desc="get_local_media",
+ )
+
+ def store_local_media(
+ self,
+ media_id,
+ media_type,
+ time_now_ms,
+ upload_name,
+ media_length,
+ user_id,
+ url_cache=None,
+ ):
+ return self.db_pool.simple_insert(
+ "local_media_repository",
+ {
+ "media_id": media_id,
+ "media_type": media_type,
+ "created_ts": time_now_ms,
+ "upload_name": upload_name,
+ "media_length": media_length,
+ "user_id": user_id.to_string(),
+ "url_cache": url_cache,
+ },
+ desc="store_local_media",
+ )
+
+ def mark_local_media_as_safe(self, media_id: str):
+ """Mark a local media as safe from quarantining."""
+ return self.db_pool.simple_update_one(
+ table="local_media_repository",
+ keyvalues={"media_id": media_id},
+ updatevalues={"safe_from_quarantine": True},
+ desc="mark_local_media_as_safe",
+ )
+
+ def get_url_cache(self, url, ts):
+ """Get the media_id and ts for a cached URL as of the given timestamp
+ Returns:
+ None if the URL isn't cached.
+ """
+
+ def get_url_cache_txn(txn):
+ # get the most recently cached result (relative to the given ts)
+ sql = (
+ "SELECT response_code, etag, expires_ts, og, media_id, download_ts"
+ " FROM local_media_repository_url_cache"
+ " WHERE url = ? AND download_ts <= ?"
+ " ORDER BY download_ts DESC LIMIT 1"
+ )
+ txn.execute(sql, (url, ts))
+ row = txn.fetchone()
+
+ if not row:
+ # ...or if we've requested a timestamp older than the oldest
+ # copy in the cache, return the oldest copy (if any)
+ sql = (
+ "SELECT response_code, etag, expires_ts, og, media_id, download_ts"
+ " FROM local_media_repository_url_cache"
+ " WHERE url = ? AND download_ts > ?"
+ " ORDER BY download_ts ASC LIMIT 1"
+ )
+ txn.execute(sql, (url, ts))
+ row = txn.fetchone()
+
+ if not row:
+ return None
+
+ return dict(
+ zip(
+ (
+ "response_code",
+ "etag",
+ "expires_ts",
+ "og",
+ "media_id",
+ "download_ts",
+ ),
+ row,
+ )
+ )
+
+ return self.db_pool.runInteraction("get_url_cache", get_url_cache_txn)
+
+ def store_url_cache(
+ self, url, response_code, etag, expires_ts, og, media_id, download_ts
+ ):
+ return self.db_pool.simple_insert(
+ "local_media_repository_url_cache",
+ {
+ "url": url,
+ "response_code": response_code,
+ "etag": etag,
+ "expires_ts": expires_ts,
+ "og": og,
+ "media_id": media_id,
+ "download_ts": download_ts,
+ },
+ desc="store_url_cache",
+ )
+
+ def get_local_media_thumbnails(self, media_id):
+ return self.db_pool.simple_select_list(
+ "local_media_repository_thumbnails",
+ {"media_id": media_id},
+ (
+ "thumbnail_width",
+ "thumbnail_height",
+ "thumbnail_method",
+ "thumbnail_type",
+ "thumbnail_length",
+ ),
+ desc="get_local_media_thumbnails",
+ )
+
+ def store_local_thumbnail(
+ self,
+ media_id,
+ thumbnail_width,
+ thumbnail_height,
+ thumbnail_type,
+ thumbnail_method,
+ thumbnail_length,
+ ):
+ return self.db_pool.simple_insert(
+ "local_media_repository_thumbnails",
+ {
+ "media_id": media_id,
+ "thumbnail_width": thumbnail_width,
+ "thumbnail_height": thumbnail_height,
+ "thumbnail_method": thumbnail_method,
+ "thumbnail_type": thumbnail_type,
+ "thumbnail_length": thumbnail_length,
+ },
+ desc="store_local_thumbnail",
+ )
+
+ def get_cached_remote_media(self, origin, media_id):
+ return self.db_pool.simple_select_one(
+ "remote_media_cache",
+ {"media_origin": origin, "media_id": media_id},
+ (
+ "media_type",
+ "media_length",
+ "upload_name",
+ "created_ts",
+ "filesystem_id",
+ "quarantined_by",
+ ),
+ allow_none=True,
+ desc="get_cached_remote_media",
+ )
+
+ def store_cached_remote_media(
+ self,
+ origin,
+ media_id,
+ media_type,
+ media_length,
+ time_now_ms,
+ upload_name,
+ filesystem_id,
+ ):
+ return self.db_pool.simple_insert(
+ "remote_media_cache",
+ {
+ "media_origin": origin,
+ "media_id": media_id,
+ "media_type": media_type,
+ "media_length": media_length,
+ "created_ts": time_now_ms,
+ "upload_name": upload_name,
+ "filesystem_id": filesystem_id,
+ "last_access_ts": time_now_ms,
+ },
+ desc="store_cached_remote_media",
+ )
+
+ def update_cached_last_access_time(self, local_media, remote_media, time_ms):
+ """Updates the last access time of the given media
+
+ Args:
+ local_media (iterable[str]): Set of media_ids
+ remote_media (iterable[(str, str)]): Set of (server_name, media_id)
+ time_ms: Current time in milliseconds
+ """
+
+ def update_cache_txn(txn):
+ sql = (
+ "UPDATE remote_media_cache SET last_access_ts = ?"
+ " WHERE media_origin = ? AND media_id = ?"
+ )
+
+ txn.executemany(
+ sql,
+ (
+ (time_ms, media_origin, media_id)
+ for media_origin, media_id in remote_media
+ ),
+ )
+
+ sql = (
+ "UPDATE local_media_repository SET last_access_ts = ?"
+ " WHERE media_id = ?"
+ )
+
+ txn.executemany(sql, ((time_ms, media_id) for media_id in local_media))
+
+ return self.db_pool.runInteraction(
+ "update_cached_last_access_time", update_cache_txn
+ )
+
+ def get_remote_media_thumbnails(self, origin, media_id):
+ return self.db_pool.simple_select_list(
+ "remote_media_cache_thumbnails",
+ {"media_origin": origin, "media_id": media_id},
+ (
+ "thumbnail_width",
+ "thumbnail_height",
+ "thumbnail_method",
+ "thumbnail_type",
+ "thumbnail_length",
+ "filesystem_id",
+ ),
+ desc="get_remote_media_thumbnails",
+ )
+
+ def store_remote_media_thumbnail(
+ self,
+ origin,
+ media_id,
+ filesystem_id,
+ thumbnail_width,
+ thumbnail_height,
+ thumbnail_type,
+ thumbnail_method,
+ thumbnail_length,
+ ):
+ return self.db_pool.simple_insert(
+ "remote_media_cache_thumbnails",
+ {
+ "media_origin": origin,
+ "media_id": media_id,
+ "thumbnail_width": thumbnail_width,
+ "thumbnail_height": thumbnail_height,
+ "thumbnail_method": thumbnail_method,
+ "thumbnail_type": thumbnail_type,
+ "thumbnail_length": thumbnail_length,
+ "filesystem_id": filesystem_id,
+ },
+ desc="store_remote_media_thumbnail",
+ )
+
+ def get_remote_media_before(self, before_ts):
+ sql = (
+ "SELECT media_origin, media_id, filesystem_id"
+ " FROM remote_media_cache"
+ " WHERE last_access_ts < ?"
+ )
+
+ return self.db_pool.execute(
+ "get_remote_media_before", self.db_pool.cursor_to_dict, sql, before_ts
+ )
+
+ def delete_remote_media(self, media_origin, media_id):
+ def delete_remote_media_txn(txn):
+ self.db_pool.simple_delete_txn(
+ txn,
+ "remote_media_cache",
+ keyvalues={"media_origin": media_origin, "media_id": media_id},
+ )
+ self.db_pool.simple_delete_txn(
+ txn,
+ "remote_media_cache_thumbnails",
+ keyvalues={"media_origin": media_origin, "media_id": media_id},
+ )
+
+ return self.db_pool.runInteraction(
+ "delete_remote_media", delete_remote_media_txn
+ )
+
+ def get_expired_url_cache(self, now_ts):
+ sql = (
+ "SELECT media_id FROM local_media_repository_url_cache"
+ " WHERE expires_ts < ?"
+ " ORDER BY expires_ts ASC"
+ " LIMIT 500"
+ )
+
+ def _get_expired_url_cache_txn(txn):
+ txn.execute(sql, (now_ts,))
+ return [row[0] for row in txn]
+
+ return self.db_pool.runInteraction(
+ "get_expired_url_cache", _get_expired_url_cache_txn
+ )
+
+ async def delete_url_cache(self, media_ids):
+ if len(media_ids) == 0:
+ return
+
+ sql = "DELETE FROM local_media_repository_url_cache WHERE media_id = ?"
+
+ def _delete_url_cache_txn(txn):
+ txn.executemany(sql, [(media_id,) for media_id in media_ids])
+
+ return await self.db_pool.runInteraction(
+ "delete_url_cache", _delete_url_cache_txn
+ )
+
+ def get_url_cache_media_before(self, before_ts):
+ sql = (
+ "SELECT media_id FROM local_media_repository"
+ " WHERE created_ts < ? AND url_cache IS NOT NULL"
+ " ORDER BY created_ts ASC"
+ " LIMIT 500"
+ )
+
+ def _get_url_cache_media_before_txn(txn):
+ txn.execute(sql, (before_ts,))
+ return [row[0] for row in txn]
+
+ return self.db_pool.runInteraction(
+ "get_url_cache_media_before", _get_url_cache_media_before_txn
+ )
+
+ async def delete_url_cache_media(self, media_ids):
+ if len(media_ids) == 0:
+ return
+
+ def _delete_url_cache_media_txn(txn):
+ sql = "DELETE FROM local_media_repository WHERE media_id = ?"
+
+ txn.executemany(sql, [(media_id,) for media_id in media_ids])
+
+ sql = "DELETE FROM local_media_repository_thumbnails WHERE media_id = ?"
+
+ txn.executemany(sql, [(media_id,) for media_id in media_ids])
+
+ return await self.db_pool.runInteraction(
+ "delete_url_cache_media", _delete_url_cache_media_txn
+ )
diff --git a/synapse/storage/databases/main/metrics.py b/synapse/storage/databases/main/metrics.py
new file mode 100644
index 00000000..686052bd
--- /dev/null
+++ b/synapse/storage/databases/main/metrics.py
@@ -0,0 +1,122 @@
+# -*- coding: utf-8 -*-
+# Copyright 2020 The Matrix.org Foundation C.I.C.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import typing
+from collections import Counter
+
+from synapse.metrics import BucketCollector
+from synapse.metrics.background_process_metrics import run_as_background_process
+from synapse.storage._base import SQLBaseStore
+from synapse.storage.database import DatabasePool
+from synapse.storage.databases.main.event_push_actions import (
+ EventPushActionsWorkerStore,
+)
+
+
+class ServerMetricsStore(EventPushActionsWorkerStore, SQLBaseStore):
+ """Functions to pull various metrics from the DB, for e.g. phone home
+ stats and prometheus metrics.
+ """
+
+ def __init__(self, database: DatabasePool, db_conn, hs):
+ super().__init__(database, db_conn, hs)
+
+ # Collect metrics on the number of forward extremities that exist.
+ # Counter of number of extremities to count
+ self._current_forward_extremities_amount = (
+ Counter()
+ ) # type: typing.Counter[int]
+
+ BucketCollector(
+ "synapse_forward_extremities",
+ lambda: self._current_forward_extremities_amount,
+ buckets=[1, 2, 3, 5, 7, 10, 15, 20, 50, 100, 200, 500, "+Inf"],
+ )
+
+ # Read the extrems every 60 minutes
+ def read_forward_extremities():
+ # run as a background process to make sure that the database transactions
+ # have a logcontext to report to
+ return run_as_background_process(
+ "read_forward_extremities", self._read_forward_extremities
+ )
+
+ hs.get_clock().looping_call(read_forward_extremities, 60 * 60 * 1000)
+
+ async def _read_forward_extremities(self):
+ def fetch(txn):
+ txn.execute(
+ """
+ select count(*) c from event_forward_extremities
+ group by room_id
+ """
+ )
+ return txn.fetchall()
+
+ res = await self.db_pool.runInteraction("read_forward_extremities", fetch)
+ self._current_forward_extremities_amount = Counter([x[0] for x in res])
+
+ async def count_daily_messages(self):
+ """
+ Returns an estimate of the number of messages sent in the last day.
+
+ If it has been significantly less or more than one day since the last
+ call to this function, it will return None.
+ """
+
+ def _count_messages(txn):
+ sql = """
+ SELECT COALESCE(COUNT(*), 0) FROM events
+ WHERE type = 'm.room.message'
+ AND stream_ordering > ?
+ """
+ txn.execute(sql, (self.stream_ordering_day_ago,))
+ (count,) = txn.fetchone()
+ return count
+
+ return await self.db_pool.runInteraction("count_messages", _count_messages)
+
+ async def count_daily_sent_messages(self):
+ def _count_messages(txn):
+ # This is good enough as if you have silly characters in your own
+ # hostname then thats your own fault.
+ like_clause = "%:" + self.hs.hostname
+
+ sql = """
+ SELECT COALESCE(COUNT(*), 0) FROM events
+ WHERE type = 'm.room.message'
+ AND sender LIKE ?
+ AND stream_ordering > ?
+ """
+
+ txn.execute(sql, (like_clause, self.stream_ordering_day_ago))
+ (count,) = txn.fetchone()
+ return count
+
+ return await self.db_pool.runInteraction(
+ "count_daily_sent_messages", _count_messages
+ )
+
+ async def count_daily_active_rooms(self):
+ def _count(txn):
+ sql = """
+ SELECT COALESCE(COUNT(DISTINCT room_id), 0) FROM events
+ WHERE type = 'm.room.message'
+ AND stream_ordering > ?
+ """
+ txn.execute(sql, (self.stream_ordering_day_ago,))
+ (count,) = txn.fetchone()
+ return count
+
+ return await self.db_pool.runInteraction("count_daily_active_rooms", _count)
diff --git a/synapse/storage/databases/main/monthly_active_users.py b/synapse/storage/databases/main/monthly_active_users.py
new file mode 100644
index 00000000..e71cdd2c
--- /dev/null
+++ b/synapse/storage/databases/main/monthly_active_users.py
@@ -0,0 +1,354 @@
+# -*- coding: utf-8 -*-
+# Copyright 2018 New Vector
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import logging
+from typing import List
+
+from synapse.storage._base import SQLBaseStore
+from synapse.storage.database import DatabasePool, make_in_list_sql_clause
+from synapse.util.caches.descriptors import cached
+
+logger = logging.getLogger(__name__)
+
+# Number of msec of granularity to store the monthly_active_user timestamp
+# This means it is not necessary to update the table on every request
+LAST_SEEN_GRANULARITY = 60 * 60 * 1000
+
+
+class MonthlyActiveUsersWorkerStore(SQLBaseStore):
+ def __init__(self, database: DatabasePool, db_conn, hs):
+ super(MonthlyActiveUsersWorkerStore, self).__init__(database, db_conn, hs)
+ self._clock = hs.get_clock()
+ self.hs = hs
+
+ @cached(num_args=0)
+ def get_monthly_active_count(self):
+ """Generates current count of monthly active users
+
+ Returns:
+ Defered[int]: Number of current monthly active users
+ """
+
+ def _count_users(txn):
+ sql = "SELECT COALESCE(count(*), 0) FROM monthly_active_users"
+ txn.execute(sql)
+ (count,) = txn.fetchone()
+ return count
+
+ return self.db_pool.runInteraction("count_users", _count_users)
+
+ @cached(num_args=0)
+ def get_monthly_active_count_by_service(self):
+ """Generates current count of monthly active users broken down by service.
+ A service is typically an appservice but also includes native matrix users.
+ Since the `monthly_active_users` table is populated from the `user_ips` table
+ `config.track_appservice_user_ips` must be set to `true` for this
+ method to return anything other than native matrix users.
+
+ Returns:
+ Deferred[dict]: dict that includes a mapping between app_service_id
+ and the number of occurrences.
+
+ """
+
+ def _count_users_by_service(txn):
+ sql = """
+ SELECT COALESCE(appservice_id, 'native'), COALESCE(count(*), 0)
+ FROM monthly_active_users
+ LEFT JOIN users ON monthly_active_users.user_id=users.name
+ GROUP BY appservice_id;
+ """
+
+ txn.execute(sql)
+ result = txn.fetchall()
+ return dict(result)
+
+ return self.db_pool.runInteraction(
+ "count_users_by_service", _count_users_by_service
+ )
+
+ async def get_registered_reserved_users(self) -> List[str]:
+ """Of the reserved threepids defined in config, retrieve those that are associated
+ with registered users
+
+ Returns:
+ User IDs of actual users that are reserved
+ """
+ users = []
+
+ for tp in self.hs.config.mau_limits_reserved_threepids[
+ : self.hs.config.max_mau_value
+ ]:
+ user_id = await self.hs.get_datastore().get_user_id_by_threepid(
+ tp["medium"], tp["address"]
+ )
+ if user_id:
+ users.append(user_id)
+
+ return users
+
+ @cached(num_args=1)
+ def user_last_seen_monthly_active(self, user_id):
+ """
+ Checks if a given user is part of the monthly active user group
+ Arguments:
+ user_id (str): user to add/update
+ Return:
+ Deferred[int] : timestamp since last seen, None if never seen
+
+ """
+
+ return self.db_pool.simple_select_one_onecol(
+ table="monthly_active_users",
+ keyvalues={"user_id": user_id},
+ retcol="timestamp",
+ allow_none=True,
+ desc="user_last_seen_monthly_active",
+ )
+
+
+class MonthlyActiveUsersStore(MonthlyActiveUsersWorkerStore):
+ def __init__(self, database: DatabasePool, db_conn, hs):
+ super(MonthlyActiveUsersStore, self).__init__(database, db_conn, hs)
+
+ self._limit_usage_by_mau = hs.config.limit_usage_by_mau
+ self._mau_stats_only = hs.config.mau_stats_only
+ self._max_mau_value = hs.config.max_mau_value
+
+ # Do not add more reserved users than the total allowable number
+ # cur = LoggingTransaction(
+ self.db_pool.new_transaction(
+ db_conn,
+ "initialise_mau_threepids",
+ [],
+ [],
+ self._initialise_reserved_users,
+ hs.config.mau_limits_reserved_threepids[: self._max_mau_value],
+ )
+
+ def _initialise_reserved_users(self, txn, threepids):
+ """Ensures that reserved threepids are accounted for in the MAU table, should
+ be called on start up.
+
+ Args:
+ txn (cursor):
+ threepids (list[dict]): List of threepid dicts to reserve
+ """
+
+ # XXX what is this function trying to achieve? It upserts into
+ # monthly_active_users for each *registered* reserved mau user, but why?
+ #
+ # - shouldn't there already be an entry for each reserved user (at least
+ # if they have been active recently)?
+ #
+ # - if it's important that the timestamp is kept up to date, why do we only
+ # run this at startup?
+
+ for tp in threepids:
+ user_id = self.get_user_id_by_threepid_txn(txn, tp["medium"], tp["address"])
+
+ if user_id:
+ is_support = self.is_support_user_txn(txn, user_id)
+ if not is_support:
+ # We do this manually here to avoid hitting #6791
+ self.db_pool.simple_upsert_txn(
+ txn,
+ table="monthly_active_users",
+ keyvalues={"user_id": user_id},
+ values={"timestamp": int(self._clock.time_msec())},
+ )
+ else:
+ logger.warning("mau limit reserved threepid %s not found in db" % tp)
+
+ async def reap_monthly_active_users(self):
+ """Cleans out monthly active user table to ensure that no stale
+ entries exist.
+ """
+
+ def _reap_users(txn, reserved_users):
+ """
+ Args:
+ reserved_users (tuple): reserved users to preserve
+ """
+
+ thirty_days_ago = int(self._clock.time_msec()) - (1000 * 60 * 60 * 24 * 30)
+
+ in_clause, in_clause_args = make_in_list_sql_clause(
+ self.database_engine, "user_id", reserved_users
+ )
+
+ txn.execute(
+ "DELETE FROM monthly_active_users WHERE timestamp < ? AND NOT %s"
+ % (in_clause,),
+ [thirty_days_ago] + in_clause_args,
+ )
+
+ if self._limit_usage_by_mau:
+ # If MAU user count still exceeds the MAU threshold, then delete on
+ # a least recently active basis.
+ # Note it is not possible to write this query using OFFSET due to
+ # incompatibilities in how sqlite and postgres support the feature.
+ # Sqlite requires 'LIMIT -1 OFFSET ?', the LIMIT must be present,
+ # while Postgres does not require 'LIMIT', but also does not support
+ # negative LIMIT values. So there is no way to write it that both can
+ # support
+
+ # Limit must be >= 0 for postgres
+ num_of_non_reserved_users_to_remove = max(
+ self._max_mau_value - len(reserved_users), 0
+ )
+
+ # It is important to filter reserved users twice to guard
+ # against the case where the reserved user is present in the
+ # SELECT, meaning that a legitimate mau is deleted.
+ sql = """
+ DELETE FROM monthly_active_users
+ WHERE user_id NOT IN (
+ SELECT user_id FROM monthly_active_users
+ WHERE NOT %s
+ ORDER BY timestamp DESC
+ LIMIT ?
+ )
+ AND NOT %s
+ """ % (
+ in_clause,
+ in_clause,
+ )
+
+ query_args = (
+ in_clause_args
+ + [num_of_non_reserved_users_to_remove]
+ + in_clause_args
+ )
+ txn.execute(sql, query_args)
+
+ # It seems poor to invalidate the whole cache. Postgres supports
+ # 'Returning' which would allow me to invalidate only the
+ # specific users, but sqlite has no way to do this and instead
+ # I would need to SELECT and the DELETE which without locking
+ # is racy.
+ # Have resolved to invalidate the whole cache for now and do
+ # something about it if and when the perf becomes significant
+ self._invalidate_all_cache_and_stream(
+ txn, self.user_last_seen_monthly_active
+ )
+ self._invalidate_cache_and_stream(txn, self.get_monthly_active_count, ())
+
+ reserved_users = await self.get_registered_reserved_users()
+ await self.db_pool.runInteraction(
+ "reap_monthly_active_users", _reap_users, reserved_users
+ )
+
+ async def upsert_monthly_active_user(self, user_id: str) -> None:
+ """Updates or inserts the user into the monthly active user table, which
+ is used to track the current MAU usage of the server
+
+ Args:
+ user_id: user to add/update
+ """
+ # Support user never to be included in MAU stats. Note I can't easily call this
+ # from upsert_monthly_active_user_txn because then I need a _txn form of
+ # is_support_user which is complicated because I want to cache the result.
+ # Therefore I call it here and ignore the case where
+ # upsert_monthly_active_user_txn is called directly from
+ # _initialise_reserved_users reasoning that it would be very strange to
+ # include a support user in this context.
+
+ is_support = await self.is_support_user(user_id)
+ if is_support:
+ return
+
+ await self.db_pool.runInteraction(
+ "upsert_monthly_active_user", self.upsert_monthly_active_user_txn, user_id
+ )
+
+ def upsert_monthly_active_user_txn(self, txn, user_id):
+ """Updates or inserts monthly active user member
+
+ We consciously do not call is_support_txn from this method because it
+ is not possible to cache the response. is_support_txn will be false in
+ almost all cases, so it seems reasonable to call it only for
+ upsert_monthly_active_user and to call is_support_txn manually
+ for cases where upsert_monthly_active_user_txn is called directly,
+ like _initialise_reserved_users
+
+ In short, don't call this method with support users. (Support users
+ should not appear in the MAU stats).
+
+ Args:
+ txn (cursor):
+ user_id (str): user to add/update
+
+ Returns:
+ bool: True if a new entry was created, False if an
+ existing one was updated.
+ """
+
+ # Am consciously deciding to lock the table on the basis that is ought
+ # never be a big table and alternative approaches (batching multiple
+ # upserts into a single txn) introduced a lot of extra complexity.
+ # See https://github.com/matrix-org/synapse/issues/3854 for more
+ is_insert = self.db_pool.simple_upsert_txn(
+ txn,
+ table="monthly_active_users",
+ keyvalues={"user_id": user_id},
+ values={"timestamp": int(self._clock.time_msec())},
+ )
+
+ self._invalidate_cache_and_stream(txn, self.get_monthly_active_count, ())
+ self._invalidate_cache_and_stream(
+ txn, self.get_monthly_active_count_by_service, ()
+ )
+ self._invalidate_cache_and_stream(
+ txn, self.user_last_seen_monthly_active, (user_id,)
+ )
+
+ return is_insert
+
+ async def populate_monthly_active_users(self, user_id):
+ """Checks on the state of monthly active user limits and optionally
+ add the user to the monthly active tables
+
+ Args:
+ user_id(str): the user_id to query
+ """
+ if self._limit_usage_by_mau or self._mau_stats_only:
+ # Trial users and guests should not be included as part of MAU group
+ is_guest = await self.is_guest(user_id)
+ if is_guest:
+ return
+ is_trial = await self.is_trial_user(user_id)
+ if is_trial:
+ return
+
+ last_seen_timestamp = await self.user_last_seen_monthly_active(user_id)
+ now = self.hs.get_clock().time_msec()
+
+ # We want to reduce to the total number of db writes, and are happy
+ # to trade accuracy of timestamp in order to lighten load. This means
+ # We always insert new users (where MAU threshold has not been reached),
+ # but only update if we have not previously seen the user for
+ # LAST_SEEN_GRANULARITY ms
+ if last_seen_timestamp is None:
+ # In the case where mau_stats_only is True and limit_usage_by_mau is
+ # False, there is no point in checking get_monthly_active_count - it
+ # adds no value and will break the logic if max_mau_value is exceeded.
+ if not self._limit_usage_by_mau:
+ await self.upsert_monthly_active_user(user_id)
+ else:
+ count = await self.get_monthly_active_count()
+ if count < self._max_mau_value:
+ await self.upsert_monthly_active_user(user_id)
+ elif now - last_seen_timestamp > LAST_SEEN_GRANULARITY:
+ await self.upsert_monthly_active_user(user_id)
diff --git a/synapse/storage/databases/main/openid.py b/synapse/storage/databases/main/openid.py
new file mode 100644
index 00000000..dcd1ff91
--- /dev/null
+++ b/synapse/storage/databases/main/openid.py
@@ -0,0 +1,33 @@
+from synapse.storage._base import SQLBaseStore
+
+
+class OpenIdStore(SQLBaseStore):
+ def insert_open_id_token(self, token, ts_valid_until_ms, user_id):
+ return self.db_pool.simple_insert(
+ table="open_id_tokens",
+ values={
+ "token": token,
+ "ts_valid_until_ms": ts_valid_until_ms,
+ "user_id": user_id,
+ },
+ desc="insert_open_id_token",
+ )
+
+ def get_user_id_for_open_id_token(self, token, ts_now_ms):
+ def get_user_id_for_token_txn(txn):
+ sql = (
+ "SELECT user_id FROM open_id_tokens"
+ " WHERE token = ? AND ? <= ts_valid_until_ms"
+ )
+
+ txn.execute(sql, (token, ts_now_ms))
+
+ rows = txn.fetchall()
+ if not rows:
+ return None
+ else:
+ return rows[0][0]
+
+ return self.db_pool.runInteraction(
+ "get_user_id_for_token", get_user_id_for_token_txn
+ )
diff --git a/synapse/storage/databases/main/presence.py b/synapse/storage/databases/main/presence.py
new file mode 100644
index 00000000..59ba1282
--- /dev/null
+++ b/synapse/storage/databases/main/presence.py
@@ -0,0 +1,183 @@
+# -*- coding: utf-8 -*-
+# Copyright 2014-2016 OpenMarket Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import List, Tuple
+
+from synapse.storage._base import SQLBaseStore, make_in_list_sql_clause
+from synapse.storage.presence import UserPresenceState
+from synapse.util.caches.descriptors import cached, cachedList
+from synapse.util.iterutils import batch_iter
+
+
+class PresenceStore(SQLBaseStore):
+ async def update_presence(self, presence_states):
+ stream_ordering_manager = self._presence_id_gen.get_next_mult(
+ len(presence_states)
+ )
+
+ with stream_ordering_manager as stream_orderings:
+ await self.db_pool.runInteraction(
+ "update_presence",
+ self._update_presence_txn,
+ stream_orderings,
+ presence_states,
+ )
+
+ return stream_orderings[-1], self._presence_id_gen.get_current_token()
+
+ def _update_presence_txn(self, txn, stream_orderings, presence_states):
+ for stream_id, state in zip(stream_orderings, presence_states):
+ txn.call_after(
+ self.presence_stream_cache.entity_has_changed, state.user_id, stream_id
+ )
+ txn.call_after(self._get_presence_for_user.invalidate, (state.user_id,))
+
+ # Actually insert new rows
+ self.db_pool.simple_insert_many_txn(
+ txn,
+ table="presence_stream",
+ values=[
+ {
+ "stream_id": stream_id,
+ "user_id": state.user_id,
+ "state": state.state,
+ "last_active_ts": state.last_active_ts,
+ "last_federation_update_ts": state.last_federation_update_ts,
+ "last_user_sync_ts": state.last_user_sync_ts,
+ "status_msg": state.status_msg,
+ "currently_active": state.currently_active,
+ }
+ for stream_id, state in zip(stream_orderings, presence_states)
+ ],
+ )
+
+ # Delete old rows to stop database from getting really big
+ sql = "DELETE FROM presence_stream WHERE stream_id < ? AND "
+
+ for states in batch_iter(presence_states, 50):
+ clause, args = make_in_list_sql_clause(
+ self.database_engine, "user_id", [s.user_id for s in states]
+ )
+ txn.execute(sql + clause, [stream_id] + list(args))
+
+ async def get_all_presence_updates(
+ self, instance_name: str, last_id: int, current_id: int, limit: int
+ ) -> Tuple[List[Tuple[int, list]], int, bool]:
+ """Get updates for presence replication stream.
+
+ Args:
+ instance_name: The writer we want to fetch updates from. Unused
+ here since there is only ever one writer.
+ last_id: The token to fetch updates from. Exclusive.
+ current_id: The token to fetch updates up to. Inclusive.
+ limit: The requested limit for the number of rows to return. The
+ function may return more or fewer rows.
+
+ Returns:
+ A tuple consisting of: the updates, a token to use to fetch
+ subsequent updates, and whether we returned fewer rows than exists
+ between the requested tokens due to the limit.
+
+ The token returned can be used in a subsequent call to this
+ function to get further updatees.
+
+ The updates are a list of 2-tuples of stream ID and the row data
+ """
+
+ if last_id == current_id:
+ return [], current_id, False
+
+ def get_all_presence_updates_txn(txn):
+ sql = """
+ SELECT stream_id, user_id, state, last_active_ts,
+ last_federation_update_ts, last_user_sync_ts,
+ status_msg,
+ currently_active
+ FROM presence_stream
+ WHERE ? < stream_id AND stream_id <= ?
+ ORDER BY stream_id ASC
+ LIMIT ?
+ """
+ txn.execute(sql, (last_id, current_id, limit))
+ updates = [(row[0], row[1:]) for row in txn]
+
+ upper_bound = current_id
+ limited = False
+ if len(updates) >= limit:
+ upper_bound = updates[-1][0]
+ limited = True
+
+ return updates, upper_bound, limited
+
+ return await self.db_pool.runInteraction(
+ "get_all_presence_updates", get_all_presence_updates_txn
+ )
+
+ @cached()
+ def _get_presence_for_user(self, user_id):
+ raise NotImplementedError()
+
+ @cachedList(
+ cached_method_name="_get_presence_for_user",
+ list_name="user_ids",
+ num_args=1,
+ inlineCallbacks=True,
+ )
+ def get_presence_for_users(self, user_ids):
+ rows = yield self.db_pool.simple_select_many_batch(
+ table="presence_stream",
+ column="user_id",
+ iterable=user_ids,
+ keyvalues={},
+ retcols=(
+ "user_id",
+ "state",
+ "last_active_ts",
+ "last_federation_update_ts",
+ "last_user_sync_ts",
+ "status_msg",
+ "currently_active",
+ ),
+ desc="get_presence_for_users",
+ )
+
+ for row in rows:
+ row["currently_active"] = bool(row["currently_active"])
+
+ return {row["user_id"]: UserPresenceState(**row) for row in rows}
+
+ def get_current_presence_token(self):
+ return self._presence_id_gen.get_current_token()
+
+ def allow_presence_visible(self, observed_localpart, observer_userid):
+ return self.db_pool.simple_insert(
+ table="presence_allow_inbound",
+ values={
+ "observed_user_id": observed_localpart,
+ "observer_user_id": observer_userid,
+ },
+ desc="allow_presence_visible",
+ or_ignore=True,
+ )
+
+ def disallow_presence_visible(self, observed_localpart, observer_userid):
+ return self.db_pool.simple_delete_one(
+ table="presence_allow_inbound",
+ keyvalues={
+ "observed_user_id": observed_localpart,
+ "observer_user_id": observer_userid,
+ },
+ desc="disallow_presence_visible",
+ )
diff --git a/synapse/storage/databases/main/profile.py b/synapse/storage/databases/main/profile.py
new file mode 100644
index 00000000..b8261357
--- /dev/null
+++ b/synapse/storage/databases/main/profile.py
@@ -0,0 +1,173 @@
+# -*- coding: utf-8 -*-
+# Copyright 2014-2016 OpenMarket Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from synapse.api.errors import StoreError
+from synapse.storage._base import SQLBaseStore
+from synapse.storage.databases.main.roommember import ProfileInfo
+
+
+class ProfileWorkerStore(SQLBaseStore):
+ async def get_profileinfo(self, user_localpart):
+ try:
+ profile = await self.db_pool.simple_select_one(
+ table="profiles",
+ keyvalues={"user_id": user_localpart},
+ retcols=("displayname", "avatar_url"),
+ desc="get_profileinfo",
+ )
+ except StoreError as e:
+ if e.code == 404:
+ # no match
+ return ProfileInfo(None, None)
+ else:
+ raise
+
+ return ProfileInfo(
+ avatar_url=profile["avatar_url"], display_name=profile["displayname"]
+ )
+
+ def get_profile_displayname(self, user_localpart):
+ return self.db_pool.simple_select_one_onecol(
+ table="profiles",
+ keyvalues={"user_id": user_localpart},
+ retcol="displayname",
+ desc="get_profile_displayname",
+ )
+
+ def get_profile_avatar_url(self, user_localpart):
+ return self.db_pool.simple_select_one_onecol(
+ table="profiles",
+ keyvalues={"user_id": user_localpart},
+ retcol="avatar_url",
+ desc="get_profile_avatar_url",
+ )
+
+ def get_from_remote_profile_cache(self, user_id):
+ return self.db_pool.simple_select_one(
+ table="remote_profile_cache",
+ keyvalues={"user_id": user_id},
+ retcols=("displayname", "avatar_url"),
+ allow_none=True,
+ desc="get_from_remote_profile_cache",
+ )
+
+ def create_profile(self, user_localpart):
+ return self.db_pool.simple_insert(
+ table="profiles", values={"user_id": user_localpart}, desc="create_profile"
+ )
+
+ def set_profile_displayname(self, user_localpart, new_displayname):
+ return self.db_pool.simple_update_one(
+ table="profiles",
+ keyvalues={"user_id": user_localpart},
+ updatevalues={"displayname": new_displayname},
+ desc="set_profile_displayname",
+ )
+
+ def set_profile_avatar_url(self, user_localpart, new_avatar_url):
+ return self.db_pool.simple_update_one(
+ table="profiles",
+ keyvalues={"user_id": user_localpart},
+ updatevalues={"avatar_url": new_avatar_url},
+ desc="set_profile_avatar_url",
+ )
+
+
+class ProfileStore(ProfileWorkerStore):
+ def add_remote_profile_cache(self, user_id, displayname, avatar_url):
+ """Ensure we are caching the remote user's profiles.
+
+ This should only be called when `is_subscribed_remote_profile_for_user`
+ would return true for the user.
+ """
+ return self.db_pool.simple_upsert(
+ table="remote_profile_cache",
+ keyvalues={"user_id": user_id},
+ values={
+ "displayname": displayname,
+ "avatar_url": avatar_url,
+ "last_check": self._clock.time_msec(),
+ },
+ desc="add_remote_profile_cache",
+ )
+
+ def update_remote_profile_cache(self, user_id, displayname, avatar_url):
+ return self.db_pool.simple_update(
+ table="remote_profile_cache",
+ keyvalues={"user_id": user_id},
+ updatevalues={
+ "displayname": displayname,
+ "avatar_url": avatar_url,
+ "last_check": self._clock.time_msec(),
+ },
+ desc="update_remote_profile_cache",
+ )
+
+ async def maybe_delete_remote_profile_cache(self, user_id):
+ """Check if we still care about the remote user's profile, and if we
+ don't then remove their profile from the cache
+ """
+ subscribed = await self.is_subscribed_remote_profile_for_user(user_id)
+ if not subscribed:
+ await self.db_pool.simple_delete(
+ table="remote_profile_cache",
+ keyvalues={"user_id": user_id},
+ desc="delete_remote_profile_cache",
+ )
+
+ def get_remote_profile_cache_entries_that_expire(self, last_checked):
+ """Get all users who haven't been checked since `last_checked`
+ """
+
+ def _get_remote_profile_cache_entries_that_expire_txn(txn):
+ sql = """
+ SELECT user_id, displayname, avatar_url
+ FROM remote_profile_cache
+ WHERE last_check < ?
+ """
+
+ txn.execute(sql, (last_checked,))
+
+ return self.db_pool.cursor_to_dict(txn)
+
+ return self.db_pool.runInteraction(
+ "get_remote_profile_cache_entries_that_expire",
+ _get_remote_profile_cache_entries_that_expire_txn,
+ )
+
+ async def is_subscribed_remote_profile_for_user(self, user_id):
+ """Check whether we are interested in a remote user's profile.
+ """
+ res = await self.db_pool.simple_select_one_onecol(
+ table="group_users",
+ keyvalues={"user_id": user_id},
+ retcol="user_id",
+ allow_none=True,
+ desc="should_update_remote_profile_cache_for_user",
+ )
+
+ if res:
+ return True
+
+ res = await self.db_pool.simple_select_one_onecol(
+ table="group_invites",
+ keyvalues={"user_id": user_id},
+ retcol="user_id",
+ allow_none=True,
+ desc="should_update_remote_profile_cache_for_user",
+ )
+
+ if res:
+ return True
diff --git a/synapse/storage/databases/main/purge_events.py b/synapse/storage/databases/main/purge_events.py
new file mode 100644
index 00000000..3526b6fd
--- /dev/null
+++ b/synapse/storage/databases/main/purge_events.py
@@ -0,0 +1,400 @@
+# -*- coding: utf-8 -*-
+# Copyright 2020 The Matrix.org Foundation C.I.C.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import logging
+from typing import Any, Tuple
+
+from synapse.api.errors import SynapseError
+from synapse.storage._base import SQLBaseStore
+from synapse.storage.databases.main.state import StateGroupWorkerStore
+from synapse.types import RoomStreamToken
+
+logger = logging.getLogger(__name__)
+
+
+class PurgeEventsStore(StateGroupWorkerStore, SQLBaseStore):
+ def purge_history(self, room_id, token, delete_local_events):
+ """Deletes room history before a certain point
+
+ Args:
+ room_id (str):
+
+ token (str): A topological token to delete events before
+
+ delete_local_events (bool):
+ if True, we will delete local events as well as remote ones
+ (instead of just marking them as outliers and deleting their
+ state groups).
+
+ Returns:
+ Deferred[set[int]]: The set of state groups that are referenced by
+ deleted events.
+ """
+
+ return self.db_pool.runInteraction(
+ "purge_history",
+ self._purge_history_txn,
+ room_id,
+ token,
+ delete_local_events,
+ )
+
+ def _purge_history_txn(self, txn, room_id, token_str, delete_local_events):
+ token = RoomStreamToken.parse(token_str)
+
+ # Tables that should be pruned:
+ # event_auth
+ # event_backward_extremities
+ # event_edges
+ # event_forward_extremities
+ # event_json
+ # event_push_actions
+ # event_reference_hashes
+ # event_relations
+ # event_search
+ # event_to_state_groups
+ # events
+ # rejections
+ # room_depth
+ # state_groups
+ # state_groups_state
+
+ # we will build a temporary table listing the events so that we don't
+ # have to keep shovelling the list back and forth across the
+ # connection. Annoyingly the python sqlite driver commits the
+ # transaction on CREATE, so let's do this first.
+ #
+ # furthermore, we might already have the table from a previous (failed)
+ # purge attempt, so let's drop the table first.
+
+ txn.execute("DROP TABLE IF EXISTS events_to_purge")
+
+ txn.execute(
+ "CREATE TEMPORARY TABLE events_to_purge ("
+ " event_id TEXT NOT NULL,"
+ " should_delete BOOLEAN NOT NULL"
+ ")"
+ )
+
+ # First ensure that we're not about to delete all the forward extremeties
+ txn.execute(
+ "SELECT e.event_id, e.depth FROM events as e "
+ "INNER JOIN event_forward_extremities as f "
+ "ON e.event_id = f.event_id "
+ "AND e.room_id = f.room_id "
+ "WHERE f.room_id = ?",
+ (room_id,),
+ )
+ rows = txn.fetchall()
+ max_depth = max(row[1] for row in rows)
+
+ if max_depth < token.topological:
+ # We need to ensure we don't delete all the events from the database
+ # otherwise we wouldn't be able to send any events (due to not
+ # having any backwards extremeties)
+ raise SynapseError(
+ 400, "topological_ordering is greater than forward extremeties"
+ )
+
+ logger.info("[purge] looking for events to delete")
+
+ should_delete_expr = "state_key IS NULL"
+ should_delete_params = () # type: Tuple[Any, ...]
+ if not delete_local_events:
+ should_delete_expr += " AND event_id NOT LIKE ?"
+
+ # We include the parameter twice since we use the expression twice
+ should_delete_params += ("%:" + self.hs.hostname, "%:" + self.hs.hostname)
+
+ should_delete_params += (room_id, token.topological)
+
+ # Note that we insert events that are outliers and aren't going to be
+ # deleted, as nothing will happen to them.
+ txn.execute(
+ "INSERT INTO events_to_purge"
+ " SELECT event_id, %s"
+ " FROM events AS e LEFT JOIN state_events USING (event_id)"
+ " WHERE (NOT outlier OR (%s)) AND e.room_id = ? AND topological_ordering < ?"
+ % (should_delete_expr, should_delete_expr),
+ should_delete_params,
+ )
+
+ # We create the indices *after* insertion as that's a lot faster.
+
+ # create an index on should_delete because later we'll be looking for
+ # the should_delete / shouldn't_delete subsets
+ txn.execute(
+ "CREATE INDEX events_to_purge_should_delete"
+ " ON events_to_purge(should_delete)"
+ )
+
+ # We do joins against events_to_purge for e.g. calculating state
+ # groups to purge, etc., so lets make an index.
+ txn.execute("CREATE INDEX events_to_purge_id ON events_to_purge(event_id)")
+
+ txn.execute("SELECT event_id, should_delete FROM events_to_purge")
+ event_rows = txn.fetchall()
+ logger.info(
+ "[purge] found %i events before cutoff, of which %i can be deleted",
+ len(event_rows),
+ sum(1 for e in event_rows if e[1]),
+ )
+
+ logger.info("[purge] Finding new backward extremities")
+
+ # We calculate the new entries for the backward extremeties by finding
+ # events to be purged that are pointed to by events we're not going to
+ # purge.
+ txn.execute(
+ "SELECT DISTINCT e.event_id FROM events_to_purge AS e"
+ " INNER JOIN event_edges AS ed ON e.event_id = ed.prev_event_id"
+ " LEFT JOIN events_to_purge AS ep2 ON ed.event_id = ep2.event_id"
+ " WHERE ep2.event_id IS NULL"
+ )
+ new_backwards_extrems = txn.fetchall()
+
+ logger.info("[purge] replacing backward extremities: %r", new_backwards_extrems)
+
+ txn.execute(
+ "DELETE FROM event_backward_extremities WHERE room_id = ?", (room_id,)
+ )
+
+ # Update backward extremeties
+ txn.executemany(
+ "INSERT INTO event_backward_extremities (room_id, event_id)"
+ " VALUES (?, ?)",
+ [(room_id, event_id) for event_id, in new_backwards_extrems],
+ )
+
+ logger.info("[purge] finding state groups referenced by deleted events")
+
+ # Get all state groups that are referenced by events that are to be
+ # deleted.
+ txn.execute(
+ """
+ SELECT DISTINCT state_group FROM events_to_purge
+ INNER JOIN event_to_state_groups USING (event_id)
+ """
+ )
+
+ referenced_state_groups = {sg for sg, in txn}
+ logger.info(
+ "[purge] found %i referenced state groups", len(referenced_state_groups)
+ )
+
+ logger.info("[purge] removing events from event_to_state_groups")
+ txn.execute(
+ "DELETE FROM event_to_state_groups "
+ "WHERE event_id IN (SELECT event_id from events_to_purge)"
+ )
+ for event_id, _ in event_rows:
+ txn.call_after(self._get_state_group_for_event.invalidate, (event_id,))
+
+ # Delete all remote non-state events
+ for table in (
+ "events",
+ "event_json",
+ "event_auth",
+ "event_edges",
+ "event_forward_extremities",
+ "event_reference_hashes",
+ "event_relations",
+ "event_search",
+ "rejections",
+ ):
+ logger.info("[purge] removing events from %s", table)
+
+ txn.execute(
+ "DELETE FROM %s WHERE event_id IN ("
+ " SELECT event_id FROM events_to_purge WHERE should_delete"
+ ")" % (table,)
+ )
+
+ # event_push_actions lacks an index on event_id, and has one on
+ # (room_id, event_id) instead.
+ for table in ("event_push_actions",):
+ logger.info("[purge] removing events from %s", table)
+
+ txn.execute(
+ "DELETE FROM %s WHERE room_id = ? AND event_id IN ("
+ " SELECT event_id FROM events_to_purge WHERE should_delete"
+ ")" % (table,),
+ (room_id,),
+ )
+
+ # Mark all state and own events as outliers
+ logger.info("[purge] marking remaining events as outliers")
+ txn.execute(
+ "UPDATE events SET outlier = ?"
+ " WHERE event_id IN ("
+ " SELECT event_id FROM events_to_purge "
+ " WHERE NOT should_delete"
+ ")",
+ (True,),
+ )
+
+ # synapse tries to take out an exclusive lock on room_depth whenever it
+ # persists events (because upsert), and once we run this update, we
+ # will block that for the rest of our transaction.
+ #
+ # So, let's stick it at the end so that we don't block event
+ # persistence.
+ #
+ # We do this by calculating the minimum depth of the backwards
+ # extremities. However, the events in event_backward_extremities
+ # are ones we don't have yet so we need to look at the events that
+ # point to it via event_edges table.
+ txn.execute(
+ """
+ SELECT COALESCE(MIN(depth), 0)
+ FROM event_backward_extremities AS eb
+ INNER JOIN event_edges AS eg ON eg.prev_event_id = eb.event_id
+ INNER JOIN events AS e ON e.event_id = eg.event_id
+ WHERE eb.room_id = ?
+ """,
+ (room_id,),
+ )
+ (min_depth,) = txn.fetchone()
+
+ logger.info("[purge] updating room_depth to %d", min_depth)
+
+ txn.execute(
+ "UPDATE room_depth SET min_depth = ? WHERE room_id = ?",
+ (min_depth, room_id),
+ )
+
+ # finally, drop the temp table. this will commit the txn in sqlite,
+ # so make sure to keep this actually last.
+ txn.execute("DROP TABLE events_to_purge")
+
+ logger.info("[purge] done")
+
+ return referenced_state_groups
+
+ def purge_room(self, room_id):
+ """Deletes all record of a room
+
+ Args:
+ room_id (str)
+
+ Returns:
+ Deferred[List[int]]: The list of state groups to delete.
+ """
+
+ return self.db_pool.runInteraction("purge_room", self._purge_room_txn, room_id)
+
+ def _purge_room_txn(self, txn, room_id):
+ # First we fetch all the state groups that should be deleted, before
+ # we delete that information.
+ txn.execute(
+ """
+ SELECT DISTINCT state_group FROM events
+ INNER JOIN event_to_state_groups USING(event_id)
+ WHERE events.room_id = ?
+ """,
+ (room_id,),
+ )
+
+ state_groups = [row[0] for row in txn]
+
+ # Now we delete tables which lack an index on room_id but have one on event_id
+ for table in (
+ "event_auth",
+ "event_edges",
+ "event_push_actions_staging",
+ "event_reference_hashes",
+ "event_relations",
+ "event_to_state_groups",
+ "redactions",
+ "rejections",
+ "state_events",
+ ):
+ logger.info("[purge] removing %s from %s", room_id, table)
+
+ txn.execute(
+ """
+ DELETE FROM %s WHERE event_id IN (
+ SELECT event_id FROM events WHERE room_id=?
+ )
+ """
+ % (table,),
+ (room_id,),
+ )
+
+ # and finally, the tables with an index on room_id (or no useful index)
+ for table in (
+ "current_state_events",
+ "event_backward_extremities",
+ "event_forward_extremities",
+ "event_json",
+ "event_push_actions",
+ "event_search",
+ "events",
+ "group_rooms",
+ "public_room_list_stream",
+ "receipts_graph",
+ "receipts_linearized",
+ "room_aliases",
+ "room_depth",
+ "room_memberships",
+ "room_stats_state",
+ "room_stats_current",
+ "room_stats_historical",
+ "room_stats_earliest_token",
+ "rooms",
+ "stream_ordering_to_exterm",
+ "users_in_public_rooms",
+ "users_who_share_private_rooms",
+ # no useful index, but let's clear them anyway
+ "appservice_room_list",
+ "e2e_room_keys",
+ "event_push_summary",
+ "pusher_throttle",
+ "group_summary_rooms",
+ "room_account_data",
+ "room_tags",
+ "local_current_membership",
+ ):
+ logger.info("[purge] removing %s from %s", room_id, table)
+ txn.execute("DELETE FROM %s WHERE room_id=?" % (table,), (room_id,))
+
+ # Other tables we do NOT need to clear out:
+ #
+ # - blocked_rooms
+ # This is important, to make sure that we don't accidentally rejoin a blocked
+ # room after it was purged
+ #
+ # - user_directory
+ # This has a room_id column, but it is unused
+ #
+
+ # Other tables that we might want to consider clearing out include:
+ #
+ # - event_reports
+ # Given that these are intended for abuse management my initial
+ # inclination is to leave them in place.
+ #
+ # - current_state_delta_stream
+ # - ex_outlier_stream
+ # - room_tags_revisions
+ # The problem with these is that they are largeish and there is no room_id
+ # index on them. In any case we should be clearing out 'stream' tables
+ # periodically anyway (#5888)
+
+ # TODO: we could probably usefully do a bunch of cache invalidation here
+
+ logger.info("[purge] done")
+
+ return state_groups
diff --git a/synapse/storage/databases/main/push_rule.py b/synapse/storage/databases/main/push_rule.py
new file mode 100644
index 00000000..6562db5c
--- /dev/null
+++ b/synapse/storage/databases/main/push_rule.py
@@ -0,0 +1,691 @@
+# -*- coding: utf-8 -*-
+# Copyright 2014-2016 OpenMarket Ltd
+# Copyright 2018 New Vector Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import abc
+import logging
+from typing import List, Tuple, Union
+
+from twisted.internet import defer
+
+from synapse.push.baserules import list_with_base_rules
+from synapse.replication.slave.storage._slaved_id_tracker import SlavedIdTracker
+from synapse.storage._base import SQLBaseStore, db_to_json
+from synapse.storage.database import DatabasePool
+from synapse.storage.databases.main.appservice import ApplicationServiceWorkerStore
+from synapse.storage.databases.main.events_worker import EventsWorkerStore
+from synapse.storage.databases.main.pusher import PusherWorkerStore
+from synapse.storage.databases.main.receipts import ReceiptsWorkerStore
+from synapse.storage.databases.main.roommember import RoomMemberWorkerStore
+from synapse.storage.push_rule import InconsistentRuleException, RuleNotFoundException
+from synapse.storage.util.id_generators import ChainedIdGenerator
+from synapse.util import json_encoder
+from synapse.util.caches.descriptors import cachedInlineCallbacks, cachedList
+from synapse.util.caches.stream_change_cache import StreamChangeCache
+
+logger = logging.getLogger(__name__)
+
+
+def _load_rules(rawrules, enabled_map, use_new_defaults=False):
+ ruleslist = []
+ for rawrule in rawrules:
+ rule = dict(rawrule)
+ rule["conditions"] = db_to_json(rawrule["conditions"])
+ rule["actions"] = db_to_json(rawrule["actions"])
+ rule["default"] = False
+ ruleslist.append(rule)
+
+ # We're going to be mutating this a lot, so do a deep copy
+ rules = list(list_with_base_rules(ruleslist, use_new_defaults))
+
+ for i, rule in enumerate(rules):
+ rule_id = rule["rule_id"]
+ if rule_id in enabled_map:
+ if rule.get("enabled", True) != bool(enabled_map[rule_id]):
+ # Rules are cached across users.
+ rule = dict(rule)
+ rule["enabled"] = bool(enabled_map[rule_id])
+ rules[i] = rule
+
+ return rules
+
+
+class PushRulesWorkerStore(
+ ApplicationServiceWorkerStore,
+ ReceiptsWorkerStore,
+ PusherWorkerStore,
+ RoomMemberWorkerStore,
+ EventsWorkerStore,
+ SQLBaseStore,
+):
+ """This is an abstract base class where subclasses must implement
+ `get_max_push_rules_stream_id` which can be called in the initializer.
+ """
+
+ # This ABCMeta metaclass ensures that we cannot be instantiated without
+ # the abstract methods being implemented.
+ __metaclass__ = abc.ABCMeta
+
+ def __init__(self, database: DatabasePool, db_conn, hs):
+ super(PushRulesWorkerStore, self).__init__(database, db_conn, hs)
+
+ if hs.config.worker.worker_app is None:
+ self._push_rules_stream_id_gen = ChainedIdGenerator(
+ self._stream_id_gen, db_conn, "push_rules_stream", "stream_id"
+ ) # type: Union[ChainedIdGenerator, SlavedIdTracker]
+ else:
+ self._push_rules_stream_id_gen = SlavedIdTracker(
+ db_conn, "push_rules_stream", "stream_id"
+ )
+
+ push_rules_prefill, push_rules_id = self.db_pool.get_cache_dict(
+ db_conn,
+ "push_rules_stream",
+ entity_column="user_id",
+ stream_column="stream_id",
+ max_value=self.get_max_push_rules_stream_id(),
+ )
+
+ self.push_rules_stream_cache = StreamChangeCache(
+ "PushRulesStreamChangeCache",
+ push_rules_id,
+ prefilled_cache=push_rules_prefill,
+ )
+
+ self._users_new_default_push_rules = hs.config.users_new_default_push_rules
+
+ @abc.abstractmethod
+ def get_max_push_rules_stream_id(self):
+ """Get the position of the push rules stream.
+
+ Returns:
+ int
+ """
+ raise NotImplementedError()
+
+ @cachedInlineCallbacks(max_entries=5000)
+ def get_push_rules_for_user(self, user_id):
+ rows = yield self.db_pool.simple_select_list(
+ table="push_rules",
+ keyvalues={"user_name": user_id},
+ retcols=(
+ "user_name",
+ "rule_id",
+ "priority_class",
+ "priority",
+ "conditions",
+ "actions",
+ ),
+ desc="get_push_rules_enabled_for_user",
+ )
+
+ rows.sort(key=lambda row: (-int(row["priority_class"]), -int(row["priority"])))
+
+ enabled_map = yield self.get_push_rules_enabled_for_user(user_id)
+
+ use_new_defaults = user_id in self._users_new_default_push_rules
+
+ rules = _load_rules(rows, enabled_map, use_new_defaults)
+
+ return rules
+
+ @cachedInlineCallbacks(max_entries=5000)
+ def get_push_rules_enabled_for_user(self, user_id):
+ results = yield self.db_pool.simple_select_list(
+ table="push_rules_enable",
+ keyvalues={"user_name": user_id},
+ retcols=("user_name", "rule_id", "enabled"),
+ desc="get_push_rules_enabled_for_user",
+ )
+ return {r["rule_id"]: False if r["enabled"] == 0 else True for r in results}
+
+ def have_push_rules_changed_for_user(self, user_id, last_id):
+ if not self.push_rules_stream_cache.has_entity_changed(user_id, last_id):
+ return defer.succeed(False)
+ else:
+
+ def have_push_rules_changed_txn(txn):
+ sql = (
+ "SELECT COUNT(stream_id) FROM push_rules_stream"
+ " WHERE user_id = ? AND ? < stream_id"
+ )
+ txn.execute(sql, (user_id, last_id))
+ (count,) = txn.fetchone()
+ return bool(count)
+
+ return self.db_pool.runInteraction(
+ "have_push_rules_changed", have_push_rules_changed_txn
+ )
+
+ @cachedList(
+ cached_method_name="get_push_rules_for_user",
+ list_name="user_ids",
+ num_args=1,
+ inlineCallbacks=True,
+ )
+ def bulk_get_push_rules(self, user_ids):
+ if not user_ids:
+ return {}
+
+ results = {user_id: [] for user_id in user_ids}
+
+ rows = yield self.db_pool.simple_select_many_batch(
+ table="push_rules",
+ column="user_name",
+ iterable=user_ids,
+ retcols=("*",),
+ desc="bulk_get_push_rules",
+ )
+
+ rows.sort(key=lambda row: (-int(row["priority_class"]), -int(row["priority"])))
+
+ for row in rows:
+ results.setdefault(row["user_name"], []).append(row)
+
+ enabled_map_by_user = yield self.bulk_get_push_rules_enabled(user_ids)
+
+ for user_id, rules in results.items():
+ use_new_defaults = user_id in self._users_new_default_push_rules
+
+ results[user_id] = _load_rules(
+ rules, enabled_map_by_user.get(user_id, {}), use_new_defaults,
+ )
+
+ return results
+
+ @defer.inlineCallbacks
+ def copy_push_rule_from_room_to_room(self, new_room_id, user_id, rule):
+ """Copy a single push rule from one room to another for a specific user.
+
+ Args:
+ new_room_id (str): ID of the new room.
+ user_id (str): ID of user the push rule belongs to.
+ rule (Dict): A push rule.
+ """
+ # Create new rule id
+ rule_id_scope = "/".join(rule["rule_id"].split("/")[:-1])
+ new_rule_id = rule_id_scope + "/" + new_room_id
+
+ # Change room id in each condition
+ for condition in rule.get("conditions", []):
+ if condition.get("key") == "room_id":
+ condition["pattern"] = new_room_id
+
+ # Add the rule for the new room
+ yield self.add_push_rule(
+ user_id=user_id,
+ rule_id=new_rule_id,
+ priority_class=rule["priority_class"],
+ conditions=rule["conditions"],
+ actions=rule["actions"],
+ )
+
+ @defer.inlineCallbacks
+ def copy_push_rules_from_room_to_room_for_user(
+ self, old_room_id, new_room_id, user_id
+ ):
+ """Copy all of the push rules from one room to another for a specific
+ user.
+
+ Args:
+ old_room_id (str): ID of the old room.
+ new_room_id (str): ID of the new room.
+ user_id (str): ID of user to copy push rules for.
+ """
+ # Retrieve push rules for this user
+ user_push_rules = yield self.get_push_rules_for_user(user_id)
+
+ # Get rules relating to the old room and copy them to the new room
+ for rule in user_push_rules:
+ conditions = rule.get("conditions", [])
+ if any(
+ (c.get("key") == "room_id" and c.get("pattern") == old_room_id)
+ for c in conditions
+ ):
+ yield self.copy_push_rule_from_room_to_room(new_room_id, user_id, rule)
+
+ @cachedList(
+ cached_method_name="get_push_rules_enabled_for_user",
+ list_name="user_ids",
+ num_args=1,
+ inlineCallbacks=True,
+ )
+ def bulk_get_push_rules_enabled(self, user_ids):
+ if not user_ids:
+ return {}
+
+ results = {user_id: {} for user_id in user_ids}
+
+ rows = yield self.db_pool.simple_select_many_batch(
+ table="push_rules_enable",
+ column="user_name",
+ iterable=user_ids,
+ retcols=("user_name", "rule_id", "enabled"),
+ desc="bulk_get_push_rules_enabled",
+ )
+ for row in rows:
+ enabled = bool(row["enabled"])
+ results.setdefault(row["user_name"], {})[row["rule_id"]] = enabled
+ return results
+
+ async def get_all_push_rule_updates(
+ self, instance_name: str, last_id: int, current_id: int, limit: int
+ ) -> Tuple[List[Tuple[int, tuple]], int, bool]:
+ """Get updates for push_rules replication stream.
+
+ Args:
+ instance_name: The writer we want to fetch updates from. Unused
+ here since there is only ever one writer.
+ last_id: The token to fetch updates from. Exclusive.
+ current_id: The token to fetch updates up to. Inclusive.
+ limit: The requested limit for the number of rows to return. The
+ function may return more or fewer rows.
+
+ Returns:
+ A tuple consisting of: the updates, a token to use to fetch
+ subsequent updates, and whether we returned fewer rows than exists
+ between the requested tokens due to the limit.
+
+ The token returned can be used in a subsequent call to this
+ function to get further updatees.
+
+ The updates are a list of 2-tuples of stream ID and the row data
+ """
+
+ if last_id == current_id:
+ return [], current_id, False
+
+ def get_all_push_rule_updates_txn(txn):
+ sql = """
+ SELECT stream_id, user_id
+ FROM push_rules_stream
+ WHERE ? < stream_id AND stream_id <= ?
+ ORDER BY stream_id ASC
+ LIMIT ?
+ """
+ txn.execute(sql, (last_id, current_id, limit))
+ updates = [(stream_id, (user_id,)) for stream_id, user_id in txn]
+
+ limited = False
+ upper_bound = current_id
+ if len(updates) == limit:
+ limited = True
+ upper_bound = updates[-1][0]
+
+ return updates, upper_bound, limited
+
+ return await self.db_pool.runInteraction(
+ "get_all_push_rule_updates", get_all_push_rule_updates_txn
+ )
+
+
+class PushRuleStore(PushRulesWorkerStore):
+ @defer.inlineCallbacks
+ def add_push_rule(
+ self,
+ user_id,
+ rule_id,
+ priority_class,
+ conditions,
+ actions,
+ before=None,
+ after=None,
+ ):
+ conditions_json = json_encoder.encode(conditions)
+ actions_json = json_encoder.encode(actions)
+ with self._push_rules_stream_id_gen.get_next() as ids:
+ stream_id, event_stream_ordering = ids
+ if before or after:
+ yield self.db_pool.runInteraction(
+ "_add_push_rule_relative_txn",
+ self._add_push_rule_relative_txn,
+ stream_id,
+ event_stream_ordering,
+ user_id,
+ rule_id,
+ priority_class,
+ conditions_json,
+ actions_json,
+ before,
+ after,
+ )
+ else:
+ yield self.db_pool.runInteraction(
+ "_add_push_rule_highest_priority_txn",
+ self._add_push_rule_highest_priority_txn,
+ stream_id,
+ event_stream_ordering,
+ user_id,
+ rule_id,
+ priority_class,
+ conditions_json,
+ actions_json,
+ )
+
+ def _add_push_rule_relative_txn(
+ self,
+ txn,
+ stream_id,
+ event_stream_ordering,
+ user_id,
+ rule_id,
+ priority_class,
+ conditions_json,
+ actions_json,
+ before,
+ after,
+ ):
+ # Lock the table since otherwise we'll have annoying races between the
+ # SELECT here and the UPSERT below.
+ self.database_engine.lock_table(txn, "push_rules")
+
+ relative_to_rule = before or after
+
+ res = self.db_pool.simple_select_one_txn(
+ txn,
+ table="push_rules",
+ keyvalues={"user_name": user_id, "rule_id": relative_to_rule},
+ retcols=["priority_class", "priority"],
+ allow_none=True,
+ )
+
+ if not res:
+ raise RuleNotFoundException(
+ "before/after rule not found: %s" % (relative_to_rule,)
+ )
+
+ base_priority_class = res["priority_class"]
+ base_rule_priority = res["priority"]
+
+ if base_priority_class != priority_class:
+ raise InconsistentRuleException(
+ "Given priority class does not match class of relative rule"
+ )
+
+ if before:
+ # Higher priority rules are executed first, So adding a rule before
+ # a rule means giving it a higher priority than that rule.
+ new_rule_priority = base_rule_priority + 1
+ else:
+ # We increment the priority of the existing rules to make space for
+ # the new rule. Therefore if we want this rule to appear after
+ # an existing rule we give it the priority of the existing rule,
+ # and then increment the priority of the existing rule.
+ new_rule_priority = base_rule_priority
+
+ sql = (
+ "UPDATE push_rules SET priority = priority + 1"
+ " WHERE user_name = ? AND priority_class = ? AND priority >= ?"
+ )
+
+ txn.execute(sql, (user_id, priority_class, new_rule_priority))
+
+ self._upsert_push_rule_txn(
+ txn,
+ stream_id,
+ event_stream_ordering,
+ user_id,
+ rule_id,
+ priority_class,
+ new_rule_priority,
+ conditions_json,
+ actions_json,
+ )
+
+ def _add_push_rule_highest_priority_txn(
+ self,
+ txn,
+ stream_id,
+ event_stream_ordering,
+ user_id,
+ rule_id,
+ priority_class,
+ conditions_json,
+ actions_json,
+ ):
+ # Lock the table since otherwise we'll have annoying races between the
+ # SELECT here and the UPSERT below.
+ self.database_engine.lock_table(txn, "push_rules")
+
+ # find the highest priority rule in that class
+ sql = (
+ "SELECT COUNT(*), MAX(priority) FROM push_rules"
+ " WHERE user_name = ? and priority_class = ?"
+ )
+ txn.execute(sql, (user_id, priority_class))
+ res = txn.fetchall()
+ (how_many, highest_prio) = res[0]
+
+ new_prio = 0
+ if how_many > 0:
+ new_prio = highest_prio + 1
+
+ self._upsert_push_rule_txn(
+ txn,
+ stream_id,
+ event_stream_ordering,
+ user_id,
+ rule_id,
+ priority_class,
+ new_prio,
+ conditions_json,
+ actions_json,
+ )
+
+ def _upsert_push_rule_txn(
+ self,
+ txn,
+ stream_id,
+ event_stream_ordering,
+ user_id,
+ rule_id,
+ priority_class,
+ priority,
+ conditions_json,
+ actions_json,
+ update_stream=True,
+ ):
+ """Specialised version of simple_upsert_txn that picks a push_rule_id
+ using the _push_rule_id_gen if it needs to insert the rule. It assumes
+ that the "push_rules" table is locked"""
+
+ sql = (
+ "UPDATE push_rules"
+ " SET priority_class = ?, priority = ?, conditions = ?, actions = ?"
+ " WHERE user_name = ? AND rule_id = ?"
+ )
+
+ txn.execute(
+ sql,
+ (priority_class, priority, conditions_json, actions_json, user_id, rule_id),
+ )
+
+ if txn.rowcount == 0:
+ # We didn't update a row with the given rule_id so insert one
+ push_rule_id = self._push_rule_id_gen.get_next()
+
+ self.db_pool.simple_insert_txn(
+ txn,
+ table="push_rules",
+ values={
+ "id": push_rule_id,
+ "user_name": user_id,
+ "rule_id": rule_id,
+ "priority_class": priority_class,
+ "priority": priority,
+ "conditions": conditions_json,
+ "actions": actions_json,
+ },
+ )
+
+ if update_stream:
+ self._insert_push_rules_update_txn(
+ txn,
+ stream_id,
+ event_stream_ordering,
+ user_id,
+ rule_id,
+ op="ADD",
+ data={
+ "priority_class": priority_class,
+ "priority": priority,
+ "conditions": conditions_json,
+ "actions": actions_json,
+ },
+ )
+
+ @defer.inlineCallbacks
+ def delete_push_rule(self, user_id, rule_id):
+ """
+ Delete a push rule. Args specify the row to be deleted and can be
+ any of the columns in the push_rule table, but below are the
+ standard ones
+
+ Args:
+ user_id (str): The matrix ID of the push rule owner
+ rule_id (str): The rule_id of the rule to be deleted
+ """
+
+ def delete_push_rule_txn(txn, stream_id, event_stream_ordering):
+ self.db_pool.simple_delete_one_txn(
+ txn, "push_rules", {"user_name": user_id, "rule_id": rule_id}
+ )
+
+ self._insert_push_rules_update_txn(
+ txn, stream_id, event_stream_ordering, user_id, rule_id, op="DELETE"
+ )
+
+ with self._push_rules_stream_id_gen.get_next() as ids:
+ stream_id, event_stream_ordering = ids
+ yield self.db_pool.runInteraction(
+ "delete_push_rule",
+ delete_push_rule_txn,
+ stream_id,
+ event_stream_ordering,
+ )
+
+ @defer.inlineCallbacks
+ def set_push_rule_enabled(self, user_id, rule_id, enabled):
+ with self._push_rules_stream_id_gen.get_next() as ids:
+ stream_id, event_stream_ordering = ids
+ yield self.db_pool.runInteraction(
+ "_set_push_rule_enabled_txn",
+ self._set_push_rule_enabled_txn,
+ stream_id,
+ event_stream_ordering,
+ user_id,
+ rule_id,
+ enabled,
+ )
+
+ def _set_push_rule_enabled_txn(
+ self, txn, stream_id, event_stream_ordering, user_id, rule_id, enabled
+ ):
+ new_id = self._push_rules_enable_id_gen.get_next()
+ self.db_pool.simple_upsert_txn(
+ txn,
+ "push_rules_enable",
+ {"user_name": user_id, "rule_id": rule_id},
+ {"enabled": 1 if enabled else 0},
+ {"id": new_id},
+ )
+
+ self._insert_push_rules_update_txn(
+ txn,
+ stream_id,
+ event_stream_ordering,
+ user_id,
+ rule_id,
+ op="ENABLE" if enabled else "DISABLE",
+ )
+
+ @defer.inlineCallbacks
+ def set_push_rule_actions(self, user_id, rule_id, actions, is_default_rule):
+ actions_json = json_encoder.encode(actions)
+
+ def set_push_rule_actions_txn(txn, stream_id, event_stream_ordering):
+ if is_default_rule:
+ # Add a dummy rule to the rules table with the user specified
+ # actions.
+ priority_class = -1
+ priority = 1
+ self._upsert_push_rule_txn(
+ txn,
+ stream_id,
+ event_stream_ordering,
+ user_id,
+ rule_id,
+ priority_class,
+ priority,
+ "[]",
+ actions_json,
+ update_stream=False,
+ )
+ else:
+ self.db_pool.simple_update_one_txn(
+ txn,
+ "push_rules",
+ {"user_name": user_id, "rule_id": rule_id},
+ {"actions": actions_json},
+ )
+
+ self._insert_push_rules_update_txn(
+ txn,
+ stream_id,
+ event_stream_ordering,
+ user_id,
+ rule_id,
+ op="ACTIONS",
+ data={"actions": actions_json},
+ )
+
+ with self._push_rules_stream_id_gen.get_next() as ids:
+ stream_id, event_stream_ordering = ids
+ yield self.db_pool.runInteraction(
+ "set_push_rule_actions",
+ set_push_rule_actions_txn,
+ stream_id,
+ event_stream_ordering,
+ )
+
+ def _insert_push_rules_update_txn(
+ self, txn, stream_id, event_stream_ordering, user_id, rule_id, op, data=None
+ ):
+ values = {
+ "stream_id": stream_id,
+ "event_stream_ordering": event_stream_ordering,
+ "user_id": user_id,
+ "rule_id": rule_id,
+ "op": op,
+ }
+ if data is not None:
+ values.update(data)
+
+ self.db_pool.simple_insert_txn(txn, "push_rules_stream", values=values)
+
+ txn.call_after(self.get_push_rules_for_user.invalidate, (user_id,))
+ txn.call_after(self.get_push_rules_enabled_for_user.invalidate, (user_id,))
+ txn.call_after(
+ self.push_rules_stream_cache.entity_has_changed, user_id, stream_id
+ )
+
+ def get_push_rules_stream_token(self):
+ """Get the position of the push rules stream.
+ Returns a pair of a stream id for the push_rules stream and the
+ room stream ordering it corresponds to."""
+ return self._push_rules_stream_id_gen.get_current_token()
+
+ def get_max_push_rules_stream_id(self):
+ return self.get_push_rules_stream_token()[0]
diff --git a/synapse/storage/databases/main/pusher.py b/synapse/storage/databases/main/pusher.py
new file mode 100644
index 00000000..b5200fbe
--- /dev/null
+++ b/synapse/storage/databases/main/pusher.py
@@ -0,0 +1,356 @@
+# -*- coding: utf-8 -*-
+# Copyright 2014-2016 OpenMarket Ltd
+# Copyright 2018 New Vector Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import logging
+from typing import Iterable, Iterator, List, Tuple
+
+from canonicaljson import encode_canonical_json
+
+from twisted.internet import defer
+
+from synapse.storage._base import SQLBaseStore, db_to_json
+from synapse.util.caches.descriptors import cachedInlineCallbacks, cachedList
+
+logger = logging.getLogger(__name__)
+
+
+class PusherWorkerStore(SQLBaseStore):
+ def _decode_pushers_rows(self, rows: Iterable[dict]) -> Iterator[dict]:
+ """JSON-decode the data in the rows returned from the `pushers` table
+
+ Drops any rows whose data cannot be decoded
+ """
+ for r in rows:
+ dataJson = r["data"]
+ try:
+ r["data"] = db_to_json(dataJson)
+ except Exception as e:
+ logger.warning(
+ "Invalid JSON in data for pusher %d: %s, %s",
+ r["id"],
+ dataJson,
+ e.args[0],
+ )
+ continue
+
+ yield r
+
+ @defer.inlineCallbacks
+ def user_has_pusher(self, user_id):
+ ret = yield self.db_pool.simple_select_one_onecol(
+ "pushers", {"user_name": user_id}, "id", allow_none=True
+ )
+ return ret is not None
+
+ def get_pushers_by_app_id_and_pushkey(self, app_id, pushkey):
+ return self.get_pushers_by({"app_id": app_id, "pushkey": pushkey})
+
+ def get_pushers_by_user_id(self, user_id):
+ return self.get_pushers_by({"user_name": user_id})
+
+ @defer.inlineCallbacks
+ def get_pushers_by(self, keyvalues):
+ ret = yield self.db_pool.simple_select_list(
+ "pushers",
+ keyvalues,
+ [
+ "id",
+ "user_name",
+ "access_token",
+ "profile_tag",
+ "kind",
+ "app_id",
+ "app_display_name",
+ "device_display_name",
+ "pushkey",
+ "ts",
+ "lang",
+ "data",
+ "last_stream_ordering",
+ "last_success",
+ "failing_since",
+ ],
+ desc="get_pushers_by",
+ )
+ return self._decode_pushers_rows(ret)
+
+ @defer.inlineCallbacks
+ def get_all_pushers(self):
+ def get_pushers(txn):
+ txn.execute("SELECT * FROM pushers")
+ rows = self.db_pool.cursor_to_dict(txn)
+
+ return self._decode_pushers_rows(rows)
+
+ rows = yield self.db_pool.runInteraction("get_all_pushers", get_pushers)
+ return rows
+
+ async def get_all_updated_pushers_rows(
+ self, instance_name: str, last_id: int, current_id: int, limit: int
+ ) -> Tuple[List[Tuple[int, tuple]], int, bool]:
+ """Get updates for pushers replication stream.
+
+ Args:
+ instance_name: The writer we want to fetch updates from. Unused
+ here since there is only ever one writer.
+ last_id: The token to fetch updates from. Exclusive.
+ current_id: The token to fetch updates up to. Inclusive.
+ limit: The requested limit for the number of rows to return. The
+ function may return more or fewer rows.
+
+ Returns:
+ A tuple consisting of: the updates, a token to use to fetch
+ subsequent updates, and whether we returned fewer rows than exists
+ between the requested tokens due to the limit.
+
+ The token returned can be used in a subsequent call to this
+ function to get further updatees.
+
+ The updates are a list of 2-tuples of stream ID and the row data
+ """
+
+ if last_id == current_id:
+ return [], current_id, False
+
+ def get_all_updated_pushers_rows_txn(txn):
+ sql = """
+ SELECT id, user_name, app_id, pushkey
+ FROM pushers
+ WHERE ? < id AND id <= ?
+ ORDER BY id ASC LIMIT ?
+ """
+ txn.execute(sql, (last_id, current_id, limit))
+ updates = [
+ (stream_id, (user_name, app_id, pushkey, False))
+ for stream_id, user_name, app_id, pushkey in txn
+ ]
+
+ sql = """
+ SELECT stream_id, user_id, app_id, pushkey
+ FROM deleted_pushers
+ WHERE ? < stream_id AND stream_id <= ?
+ ORDER BY stream_id ASC LIMIT ?
+ """
+ txn.execute(sql, (last_id, current_id, limit))
+ updates.extend(
+ (stream_id, (user_name, app_id, pushkey, True))
+ for stream_id, user_name, app_id, pushkey in txn
+ )
+
+ updates.sort() # Sort so that they're ordered by stream id
+
+ limited = False
+ upper_bound = current_id
+ if len(updates) >= limit:
+ limited = True
+ upper_bound = updates[-1][0]
+
+ return updates, upper_bound, limited
+
+ return await self.db_pool.runInteraction(
+ "get_all_updated_pushers_rows", get_all_updated_pushers_rows_txn
+ )
+
+ @cachedInlineCallbacks(num_args=1, max_entries=15000)
+ def get_if_user_has_pusher(self, user_id):
+ # This only exists for the cachedList decorator
+ raise NotImplementedError()
+
+ @cachedList(
+ cached_method_name="get_if_user_has_pusher",
+ list_name="user_ids",
+ num_args=1,
+ inlineCallbacks=True,
+ )
+ def get_if_users_have_pushers(self, user_ids):
+ rows = yield self.db_pool.simple_select_many_batch(
+ table="pushers",
+ column="user_name",
+ iterable=user_ids,
+ retcols=["user_name"],
+ desc="get_if_users_have_pushers",
+ )
+
+ result = {user_id: False for user_id in user_ids}
+ result.update({r["user_name"]: True for r in rows})
+
+ return result
+
+ @defer.inlineCallbacks
+ def update_pusher_last_stream_ordering(
+ self, app_id, pushkey, user_id, last_stream_ordering
+ ):
+ yield self.db_pool.simple_update_one(
+ "pushers",
+ {"app_id": app_id, "pushkey": pushkey, "user_name": user_id},
+ {"last_stream_ordering": last_stream_ordering},
+ desc="update_pusher_last_stream_ordering",
+ )
+
+ @defer.inlineCallbacks
+ def update_pusher_last_stream_ordering_and_success(
+ self, app_id, pushkey, user_id, last_stream_ordering, last_success
+ ):
+ """Update the last stream ordering position we've processed up to for
+ the given pusher.
+
+ Args:
+ app_id (str)
+ pushkey (str)
+ last_stream_ordering (int)
+ last_success (int)
+
+ Returns:
+ Deferred[bool]: True if the pusher still exists; False if it has been deleted.
+ """
+ updated = yield self.db_pool.simple_update(
+ table="pushers",
+ keyvalues={"app_id": app_id, "pushkey": pushkey, "user_name": user_id},
+ updatevalues={
+ "last_stream_ordering": last_stream_ordering,
+ "last_success": last_success,
+ },
+ desc="update_pusher_last_stream_ordering_and_success",
+ )
+
+ return bool(updated)
+
+ @defer.inlineCallbacks
+ def update_pusher_failing_since(self, app_id, pushkey, user_id, failing_since):
+ yield self.db_pool.simple_update(
+ table="pushers",
+ keyvalues={"app_id": app_id, "pushkey": pushkey, "user_name": user_id},
+ updatevalues={"failing_since": failing_since},
+ desc="update_pusher_failing_since",
+ )
+
+ @defer.inlineCallbacks
+ def get_throttle_params_by_room(self, pusher_id):
+ res = yield self.db_pool.simple_select_list(
+ "pusher_throttle",
+ {"pusher": pusher_id},
+ ["room_id", "last_sent_ts", "throttle_ms"],
+ desc="get_throttle_params_by_room",
+ )
+
+ params_by_room = {}
+ for row in res:
+ params_by_room[row["room_id"]] = {
+ "last_sent_ts": row["last_sent_ts"],
+ "throttle_ms": row["throttle_ms"],
+ }
+
+ return params_by_room
+
+ @defer.inlineCallbacks
+ def set_throttle_params(self, pusher_id, room_id, params):
+ # no need to lock because `pusher_throttle` has a primary key on
+ # (pusher, room_id) so simple_upsert will retry
+ yield self.db_pool.simple_upsert(
+ "pusher_throttle",
+ {"pusher": pusher_id, "room_id": room_id},
+ params,
+ desc="set_throttle_params",
+ lock=False,
+ )
+
+
+class PusherStore(PusherWorkerStore):
+ def get_pushers_stream_token(self):
+ return self._pushers_id_gen.get_current_token()
+
+ @defer.inlineCallbacks
+ def add_pusher(
+ self,
+ user_id,
+ access_token,
+ kind,
+ app_id,
+ app_display_name,
+ device_display_name,
+ pushkey,
+ pushkey_ts,
+ lang,
+ data,
+ last_stream_ordering,
+ profile_tag="",
+ ):
+ with self._pushers_id_gen.get_next() as stream_id:
+ # no need to lock because `pushers` has a unique key on
+ # (app_id, pushkey, user_name) so simple_upsert will retry
+ yield self.db_pool.simple_upsert(
+ table="pushers",
+ keyvalues={"app_id": app_id, "pushkey": pushkey, "user_name": user_id},
+ values={
+ "access_token": access_token,
+ "kind": kind,
+ "app_display_name": app_display_name,
+ "device_display_name": device_display_name,
+ "ts": pushkey_ts,
+ "lang": lang,
+ "data": bytearray(encode_canonical_json(data)),
+ "last_stream_ordering": last_stream_ordering,
+ "profile_tag": profile_tag,
+ "id": stream_id,
+ },
+ desc="add_pusher",
+ lock=False,
+ )
+
+ user_has_pusher = self.get_if_user_has_pusher.cache.get(
+ (user_id,), None, update_metrics=False
+ )
+
+ if user_has_pusher is not True:
+ # invalidate, since we the user might not have had a pusher before
+ yield self.db_pool.runInteraction(
+ "add_pusher",
+ self._invalidate_cache_and_stream,
+ self.get_if_user_has_pusher,
+ (user_id,),
+ )
+
+ @defer.inlineCallbacks
+ def delete_pusher_by_app_id_pushkey_user_id(self, app_id, pushkey, user_id):
+ def delete_pusher_txn(txn, stream_id):
+ self._invalidate_cache_and_stream(
+ txn, self.get_if_user_has_pusher, (user_id,)
+ )
+
+ self.db_pool.simple_delete_one_txn(
+ txn,
+ "pushers",
+ {"app_id": app_id, "pushkey": pushkey, "user_name": user_id},
+ )
+
+ # it's possible for us to end up with duplicate rows for
+ # (app_id, pushkey, user_id) at different stream_ids, but that
+ # doesn't really matter.
+ self.db_pool.simple_insert_txn(
+ txn,
+ table="deleted_pushers",
+ values={
+ "stream_id": stream_id,
+ "app_id": app_id,
+ "pushkey": pushkey,
+ "user_id": user_id,
+ },
+ )
+
+ with self._pushers_id_gen.get_next() as stream_id:
+ yield self.db_pool.runInteraction(
+ "delete_pusher", delete_pusher_txn, stream_id
+ )
diff --git a/synapse/storage/databases/main/receipts.py b/synapse/storage/databases/main/receipts.py
new file mode 100644
index 00000000..1920a8a1
--- /dev/null
+++ b/synapse/storage/databases/main/receipts.py
@@ -0,0 +1,590 @@
+# -*- coding: utf-8 -*-
+# Copyright 2014-2016 OpenMarket Ltd
+# Copyright 2018 New Vector Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import abc
+import logging
+from typing import List, Tuple
+
+from twisted.internet import defer
+
+from synapse.storage._base import SQLBaseStore, db_to_json, make_in_list_sql_clause
+from synapse.storage.database import DatabasePool
+from synapse.storage.util.id_generators import StreamIdGenerator
+from synapse.util import json_encoder
+from synapse.util.async_helpers import ObservableDeferred
+from synapse.util.caches.descriptors import cached, cachedInlineCallbacks, cachedList
+from synapse.util.caches.stream_change_cache import StreamChangeCache
+
+logger = logging.getLogger(__name__)
+
+
+class ReceiptsWorkerStore(SQLBaseStore):
+ """This is an abstract base class where subclasses must implement
+ `get_max_receipt_stream_id` which can be called in the initializer.
+ """
+
+ # This ABCMeta metaclass ensures that we cannot be instantiated without
+ # the abstract methods being implemented.
+ __metaclass__ = abc.ABCMeta
+
+ def __init__(self, database: DatabasePool, db_conn, hs):
+ super(ReceiptsWorkerStore, self).__init__(database, db_conn, hs)
+
+ self._receipts_stream_cache = StreamChangeCache(
+ "ReceiptsRoomChangeCache", self.get_max_receipt_stream_id()
+ )
+
+ @abc.abstractmethod
+ def get_max_receipt_stream_id(self):
+ """Get the current max stream ID for receipts stream
+
+ Returns:
+ int
+ """
+ raise NotImplementedError()
+
+ @cachedInlineCallbacks()
+ def get_users_with_read_receipts_in_room(self, room_id):
+ receipts = yield self.get_receipts_for_room(room_id, "m.read")
+ return {r["user_id"] for r in receipts}
+
+ @cached(num_args=2)
+ def get_receipts_for_room(self, room_id, receipt_type):
+ return self.db_pool.simple_select_list(
+ table="receipts_linearized",
+ keyvalues={"room_id": room_id, "receipt_type": receipt_type},
+ retcols=("user_id", "event_id"),
+ desc="get_receipts_for_room",
+ )
+
+ @cached(num_args=3)
+ def get_last_receipt_event_id_for_user(self, user_id, room_id, receipt_type):
+ return self.db_pool.simple_select_one_onecol(
+ table="receipts_linearized",
+ keyvalues={
+ "room_id": room_id,
+ "receipt_type": receipt_type,
+ "user_id": user_id,
+ },
+ retcol="event_id",
+ desc="get_own_receipt_for_user",
+ allow_none=True,
+ )
+
+ @cachedInlineCallbacks(num_args=2)
+ def get_receipts_for_user(self, user_id, receipt_type):
+ rows = yield self.db_pool.simple_select_list(
+ table="receipts_linearized",
+ keyvalues={"user_id": user_id, "receipt_type": receipt_type},
+ retcols=("room_id", "event_id"),
+ desc="get_receipts_for_user",
+ )
+
+ return {row["room_id"]: row["event_id"] for row in rows}
+
+ @defer.inlineCallbacks
+ def get_receipts_for_user_with_orderings(self, user_id, receipt_type):
+ def f(txn):
+ sql = (
+ "SELECT rl.room_id, rl.event_id,"
+ " e.topological_ordering, e.stream_ordering"
+ " FROM receipts_linearized AS rl"
+ " INNER JOIN events AS e USING (room_id, event_id)"
+ " WHERE rl.room_id = e.room_id"
+ " AND rl.event_id = e.event_id"
+ " AND user_id = ?"
+ )
+ txn.execute(sql, (user_id,))
+ return txn.fetchall()
+
+ rows = yield self.db_pool.runInteraction(
+ "get_receipts_for_user_with_orderings", f
+ )
+ return {
+ row[0]: {
+ "event_id": row[1],
+ "topological_ordering": row[2],
+ "stream_ordering": row[3],
+ }
+ for row in rows
+ }
+
+ @defer.inlineCallbacks
+ def get_linearized_receipts_for_rooms(self, room_ids, to_key, from_key=None):
+ """Get receipts for multiple rooms for sending to clients.
+
+ Args:
+ room_ids (list): List of room_ids.
+ to_key (int): Max stream id to fetch receipts upto.
+ from_key (int): Min stream id to fetch receipts from. None fetches
+ from the start.
+
+ Returns:
+ list: A list of receipts.
+ """
+ room_ids = set(room_ids)
+
+ if from_key is not None:
+ # Only ask the database about rooms where there have been new
+ # receipts added since `from_key`
+ room_ids = yield self._receipts_stream_cache.get_entities_changed(
+ room_ids, from_key
+ )
+
+ results = yield self._get_linearized_receipts_for_rooms(
+ room_ids, to_key, from_key=from_key
+ )
+
+ return [ev for res in results.values() for ev in res]
+
+ def get_linearized_receipts_for_room(self, room_id, to_key, from_key=None):
+ """Get receipts for a single room for sending to clients.
+
+ Args:
+ room_ids (str): The room id.
+ to_key (int): Max stream id to fetch receipts upto.
+ from_key (int): Min stream id to fetch receipts from. None fetches
+ from the start.
+
+ Returns:
+ Deferred[list]: A list of receipts.
+ """
+ if from_key is not None:
+ # Check the cache first to see if any new receipts have been added
+ # since`from_key`. If not we can no-op.
+ if not self._receipts_stream_cache.has_entity_changed(room_id, from_key):
+ defer.succeed([])
+
+ return self._get_linearized_receipts_for_room(room_id, to_key, from_key)
+
+ @cachedInlineCallbacks(num_args=3, tree=True)
+ def _get_linearized_receipts_for_room(self, room_id, to_key, from_key=None):
+ """See get_linearized_receipts_for_room
+ """
+
+ def f(txn):
+ if from_key:
+ sql = (
+ "SELECT * FROM receipts_linearized WHERE"
+ " room_id = ? AND stream_id > ? AND stream_id <= ?"
+ )
+
+ txn.execute(sql, (room_id, from_key, to_key))
+ else:
+ sql = (
+ "SELECT * FROM receipts_linearized WHERE"
+ " room_id = ? AND stream_id <= ?"
+ )
+
+ txn.execute(sql, (room_id, to_key))
+
+ rows = self.db_pool.cursor_to_dict(txn)
+
+ return rows
+
+ rows = yield self.db_pool.runInteraction("get_linearized_receipts_for_room", f)
+
+ if not rows:
+ return []
+
+ content = {}
+ for row in rows:
+ content.setdefault(row["event_id"], {}).setdefault(row["receipt_type"], {})[
+ row["user_id"]
+ ] = db_to_json(row["data"])
+
+ return [{"type": "m.receipt", "room_id": room_id, "content": content}]
+
+ @cachedList(
+ cached_method_name="_get_linearized_receipts_for_room",
+ list_name="room_ids",
+ num_args=3,
+ inlineCallbacks=True,
+ )
+ def _get_linearized_receipts_for_rooms(self, room_ids, to_key, from_key=None):
+ if not room_ids:
+ return {}
+
+ def f(txn):
+ if from_key:
+ sql = """
+ SELECT * FROM receipts_linearized WHERE
+ stream_id > ? AND stream_id <= ? AND
+ """
+ clause, args = make_in_list_sql_clause(
+ self.database_engine, "room_id", room_ids
+ )
+
+ txn.execute(sql + clause, [from_key, to_key] + list(args))
+ else:
+ sql = """
+ SELECT * FROM receipts_linearized WHERE
+ stream_id <= ? AND
+ """
+
+ clause, args = make_in_list_sql_clause(
+ self.database_engine, "room_id", room_ids
+ )
+
+ txn.execute(sql + clause, [to_key] + list(args))
+
+ return self.db_pool.cursor_to_dict(txn)
+
+ txn_results = yield self.db_pool.runInteraction(
+ "_get_linearized_receipts_for_rooms", f
+ )
+
+ results = {}
+ for row in txn_results:
+ # We want a single event per room, since we want to batch the
+ # receipts by room, event and type.
+ room_event = results.setdefault(
+ row["room_id"],
+ {"type": "m.receipt", "room_id": row["room_id"], "content": {}},
+ )
+
+ # The content is of the form:
+ # {"$foo:bar": { "read": { "@user:host": <receipt> }, .. }, .. }
+ event_entry = room_event["content"].setdefault(row["event_id"], {})
+ receipt_type = event_entry.setdefault(row["receipt_type"], {})
+
+ receipt_type[row["user_id"]] = db_to_json(row["data"])
+
+ results = {
+ room_id: [results[room_id]] if room_id in results else []
+ for room_id in room_ids
+ }
+ return results
+
+ def get_users_sent_receipts_between(self, last_id: int, current_id: int):
+ """Get all users who sent receipts between `last_id` exclusive and
+ `current_id` inclusive.
+
+ Returns:
+ Deferred[List[str]]
+ """
+
+ if last_id == current_id:
+ return defer.succeed([])
+
+ def _get_users_sent_receipts_between_txn(txn):
+ sql = """
+ SELECT DISTINCT user_id FROM receipts_linearized
+ WHERE ? < stream_id AND stream_id <= ?
+ """
+ txn.execute(sql, (last_id, current_id))
+
+ return [r[0] for r in txn]
+
+ return self.db_pool.runInteraction(
+ "get_users_sent_receipts_between", _get_users_sent_receipts_between_txn
+ )
+
+ async def get_all_updated_receipts(
+ self, instance_name: str, last_id: int, current_id: int, limit: int
+ ) -> Tuple[List[Tuple[int, list]], int, bool]:
+ """Get updates for receipts replication stream.
+
+ Args:
+ instance_name: The writer we want to fetch updates from. Unused
+ here since there is only ever one writer.
+ last_id: The token to fetch updates from. Exclusive.
+ current_id: The token to fetch updates up to. Inclusive.
+ limit: The requested limit for the number of rows to return. The
+ function may return more or fewer rows.
+
+ Returns:
+ A tuple consisting of: the updates, a token to use to fetch
+ subsequent updates, and whether we returned fewer rows than exists
+ between the requested tokens due to the limit.
+
+ The token returned can be used in a subsequent call to this
+ function to get further updatees.
+
+ The updates are a list of 2-tuples of stream ID and the row data
+ """
+
+ if last_id == current_id:
+ return [], current_id, False
+
+ def get_all_updated_receipts_txn(txn):
+ sql = """
+ SELECT stream_id, room_id, receipt_type, user_id, event_id, data
+ FROM receipts_linearized
+ WHERE ? < stream_id AND stream_id <= ?
+ ORDER BY stream_id ASC
+ LIMIT ?
+ """
+ txn.execute(sql, (last_id, current_id, limit))
+
+ updates = [(r[0], r[1:5] + (db_to_json(r[5]),)) for r in txn]
+
+ limited = False
+ upper_bound = current_id
+
+ if len(updates) == limit:
+ limited = True
+ upper_bound = updates[-1][0]
+
+ return updates, upper_bound, limited
+
+ return await self.db_pool.runInteraction(
+ "get_all_updated_receipts", get_all_updated_receipts_txn
+ )
+
+ def _invalidate_get_users_with_receipts_in_room(
+ self, room_id, receipt_type, user_id
+ ):
+ if receipt_type != "m.read":
+ return
+
+ # Returns either an ObservableDeferred or the raw result
+ res = self.get_users_with_read_receipts_in_room.cache.get(
+ room_id, None, update_metrics=False
+ )
+
+ # first handle the ObservableDeferred case
+ if isinstance(res, ObservableDeferred):
+ if res.has_called():
+ res = res.get_result()
+ else:
+ res = None
+
+ if res and user_id in res:
+ # We'd only be adding to the set, so no point invalidating if the
+ # user is already there
+ return
+
+ self.get_users_with_read_receipts_in_room.invalidate((room_id,))
+
+
+class ReceiptsStore(ReceiptsWorkerStore):
+ def __init__(self, database: DatabasePool, db_conn, hs):
+ # We instantiate this first as the ReceiptsWorkerStore constructor
+ # needs to be able to call get_max_receipt_stream_id
+ self._receipts_id_gen = StreamIdGenerator(
+ db_conn, "receipts_linearized", "stream_id"
+ )
+
+ super(ReceiptsStore, self).__init__(database, db_conn, hs)
+
+ def get_max_receipt_stream_id(self):
+ return self._receipts_id_gen.get_current_token()
+
+ def insert_linearized_receipt_txn(
+ self, txn, room_id, receipt_type, user_id, event_id, data, stream_id
+ ):
+ """Inserts a read-receipt into the database if it's newer than the current RR
+
+ Returns: int|None
+ None if the RR is older than the current RR
+ otherwise, the rx timestamp of the event that the RR corresponds to
+ (or 0 if the event is unknown)
+ """
+ res = self.db_pool.simple_select_one_txn(
+ txn,
+ table="events",
+ retcols=["stream_ordering", "received_ts"],
+ keyvalues={"event_id": event_id},
+ allow_none=True,
+ )
+
+ stream_ordering = int(res["stream_ordering"]) if res else None
+ rx_ts = res["received_ts"] if res else 0
+
+ # We don't want to clobber receipts for more recent events, so we
+ # have to compare orderings of existing receipts
+ if stream_ordering is not None:
+ sql = (
+ "SELECT stream_ordering, event_id FROM events"
+ " INNER JOIN receipts_linearized as r USING (event_id, room_id)"
+ " WHERE r.room_id = ? AND r.receipt_type = ? AND r.user_id = ?"
+ )
+ txn.execute(sql, (room_id, receipt_type, user_id))
+
+ for so, eid in txn:
+ if int(so) >= stream_ordering:
+ logger.debug(
+ "Ignoring new receipt for %s in favour of existing "
+ "one for later event %s",
+ event_id,
+ eid,
+ )
+ return None
+
+ txn.call_after(self.get_receipts_for_room.invalidate, (room_id, receipt_type))
+ txn.call_after(
+ self._invalidate_get_users_with_receipts_in_room,
+ room_id,
+ receipt_type,
+ user_id,
+ )
+ txn.call_after(self.get_receipts_for_user.invalidate, (user_id, receipt_type))
+ # FIXME: This shouldn't invalidate the whole cache
+ txn.call_after(
+ self._get_linearized_receipts_for_room.invalidate_many, (room_id,)
+ )
+
+ txn.call_after(
+ self._receipts_stream_cache.entity_has_changed, room_id, stream_id
+ )
+
+ txn.call_after(
+ self.get_last_receipt_event_id_for_user.invalidate,
+ (user_id, room_id, receipt_type),
+ )
+
+ self.db_pool.simple_upsert_txn(
+ txn,
+ table="receipts_linearized",
+ keyvalues={
+ "room_id": room_id,
+ "receipt_type": receipt_type,
+ "user_id": user_id,
+ },
+ values={
+ "stream_id": stream_id,
+ "event_id": event_id,
+ "data": json_encoder.encode(data),
+ },
+ # receipts_linearized has a unique constraint on
+ # (user_id, room_id, receipt_type), so no need to lock
+ lock=False,
+ )
+
+ if receipt_type == "m.read" and stream_ordering is not None:
+ self._remove_old_push_actions_before_txn(
+ txn, room_id=room_id, user_id=user_id, stream_ordering=stream_ordering
+ )
+
+ return rx_ts
+
+ @defer.inlineCallbacks
+ def insert_receipt(self, room_id, receipt_type, user_id, event_ids, data):
+ """Insert a receipt, either from local client or remote server.
+
+ Automatically does conversion between linearized and graph
+ representations.
+ """
+ if not event_ids:
+ return
+
+ if len(event_ids) == 1:
+ linearized_event_id = event_ids[0]
+ else:
+ # we need to points in graph -> linearized form.
+ # TODO: Make this better.
+ def graph_to_linear(txn):
+ clause, args = make_in_list_sql_clause(
+ self.database_engine, "event_id", event_ids
+ )
+
+ sql = """
+ SELECT event_id WHERE room_id = ? AND stream_ordering IN (
+ SELECT max(stream_ordering) WHERE %s
+ )
+ """ % (
+ clause,
+ )
+
+ txn.execute(sql, [room_id] + list(args))
+ rows = txn.fetchall()
+ if rows:
+ return rows[0][0]
+ else:
+ raise RuntimeError("Unrecognized event_ids: %r" % (event_ids,))
+
+ linearized_event_id = yield self.db_pool.runInteraction(
+ "insert_receipt_conv", graph_to_linear
+ )
+
+ stream_id_manager = self._receipts_id_gen.get_next()
+ with stream_id_manager as stream_id:
+ event_ts = yield self.db_pool.runInteraction(
+ "insert_linearized_receipt",
+ self.insert_linearized_receipt_txn,
+ room_id,
+ receipt_type,
+ user_id,
+ linearized_event_id,
+ data,
+ stream_id=stream_id,
+ )
+
+ if event_ts is None:
+ return None
+
+ now = self._clock.time_msec()
+ logger.debug(
+ "RR for event %s in %s (%i ms old)",
+ linearized_event_id,
+ room_id,
+ now - event_ts,
+ )
+
+ yield self.insert_graph_receipt(room_id, receipt_type, user_id, event_ids, data)
+
+ max_persisted_id = self._receipts_id_gen.get_current_token()
+
+ return stream_id, max_persisted_id
+
+ def insert_graph_receipt(self, room_id, receipt_type, user_id, event_ids, data):
+ return self.db_pool.runInteraction(
+ "insert_graph_receipt",
+ self.insert_graph_receipt_txn,
+ room_id,
+ receipt_type,
+ user_id,
+ event_ids,
+ data,
+ )
+
+ def insert_graph_receipt_txn(
+ self, txn, room_id, receipt_type, user_id, event_ids, data
+ ):
+ txn.call_after(self.get_receipts_for_room.invalidate, (room_id, receipt_type))
+ txn.call_after(
+ self._invalidate_get_users_with_receipts_in_room,
+ room_id,
+ receipt_type,
+ user_id,
+ )
+ txn.call_after(self.get_receipts_for_user.invalidate, (user_id, receipt_type))
+ # FIXME: This shouldn't invalidate the whole cache
+ txn.call_after(
+ self._get_linearized_receipts_for_room.invalidate_many, (room_id,)
+ )
+
+ self.db_pool.simple_delete_txn(
+ txn,
+ table="receipts_graph",
+ keyvalues={
+ "room_id": room_id,
+ "receipt_type": receipt_type,
+ "user_id": user_id,
+ },
+ )
+ self.db_pool.simple_insert_txn(
+ txn,
+ table="receipts_graph",
+ values={
+ "room_id": room_id,
+ "receipt_type": receipt_type,
+ "user_id": user_id,
+ "event_ids": json_encoder.encode(event_ids),
+ "data": json_encoder.encode(data),
+ },
+ )
diff --git a/synapse/storage/databases/main/registration.py b/synapse/storage/databases/main/registration.py
new file mode 100644
index 00000000..402ae255
--- /dev/null
+++ b/synapse/storage/databases/main/registration.py
@@ -0,0 +1,1565 @@
+# -*- coding: utf-8 -*-
+# Copyright 2014-2016 OpenMarket Ltd
+# Copyright 2017-2018 New Vector Ltd
+# Copyright 2019 The Matrix.org Foundation C.I.C.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import logging
+import re
+from typing import Dict, List, Optional
+
+from twisted.internet.defer import Deferred
+
+from synapse.api.constants import UserTypes
+from synapse.api.errors import Codes, StoreError, SynapseError, ThreepidValidationError
+from synapse.metrics.background_process_metrics import run_as_background_process
+from synapse.storage._base import SQLBaseStore
+from synapse.storage.database import DatabasePool
+from synapse.storage.types import Cursor
+from synapse.storage.util.sequence import build_sequence_generator
+from synapse.types import UserID
+from synapse.util.caches.descriptors import cached
+
+THIRTY_MINUTES_IN_MS = 30 * 60 * 1000
+
+logger = logging.getLogger(__name__)
+
+
+class RegistrationWorkerStore(SQLBaseStore):
+ def __init__(self, database: DatabasePool, db_conn, hs):
+ super(RegistrationWorkerStore, self).__init__(database, db_conn, hs)
+
+ self.config = hs.config
+ self.clock = hs.get_clock()
+
+ self._user_id_seq = build_sequence_generator(
+ database.engine, find_max_generated_user_id_localpart, "user_id_seq",
+ )
+
+ @cached()
+ def get_user_by_id(self, user_id):
+ return self.db_pool.simple_select_one(
+ table="users",
+ keyvalues={"name": user_id},
+ retcols=[
+ "name",
+ "password_hash",
+ "is_guest",
+ "admin",
+ "consent_version",
+ "consent_server_notice_sent",
+ "appservice_id",
+ "creation_ts",
+ "user_type",
+ "deactivated",
+ ],
+ allow_none=True,
+ desc="get_user_by_id",
+ )
+
+ async def is_trial_user(self, user_id: str) -> bool:
+ """Checks if user is in the "trial" period, i.e. within the first
+ N days of registration defined by `mau_trial_days` config
+
+ Args:
+ user_id: The user to check for trial status.
+ """
+
+ info = await self.get_user_by_id(user_id)
+ if not info:
+ return False
+
+ now = self.clock.time_msec()
+ trial_duration_ms = self.config.mau_trial_days * 24 * 60 * 60 * 1000
+ is_trial = (now - info["creation_ts"] * 1000) < trial_duration_ms
+ return is_trial
+
+ @cached()
+ def get_user_by_access_token(self, token):
+ """Get a user from the given access token.
+
+ Args:
+ token (str): The access token of a user.
+ Returns:
+ defer.Deferred: None, if the token did not match, otherwise dict
+ including the keys `name`, `is_guest`, `device_id`, `token_id`,
+ `valid_until_ms`.
+ """
+ return self.db_pool.runInteraction(
+ "get_user_by_access_token", self._query_for_auth, token
+ )
+
+ @cached()
+ async def get_expiration_ts_for_user(self, user_id: str) -> Optional[None]:
+ """Get the expiration timestamp for the account bearing a given user ID.
+
+ Args:
+ user_id: The ID of the user.
+ Returns:
+ None, if the account has no expiration timestamp, otherwise int
+ representation of the timestamp (as a number of milliseconds since epoch).
+ """
+ return await self.db_pool.simple_select_one_onecol(
+ table="account_validity",
+ keyvalues={"user_id": user_id},
+ retcol="expiration_ts_ms",
+ allow_none=True,
+ desc="get_expiration_ts_for_user",
+ )
+
+ async def set_account_validity_for_user(
+ self,
+ user_id: str,
+ expiration_ts: int,
+ email_sent: bool,
+ renewal_token: Optional[str] = None,
+ ) -> None:
+ """Updates the account validity properties of the given account, with the
+ given values.
+
+ Args:
+ user_id: ID of the account to update properties for.
+ expiration_ts: New expiration date, as a timestamp in milliseconds
+ since epoch.
+ email_sent: True means a renewal email has been sent for this account
+ and there's no need to send another one for the current validity
+ period.
+ renewal_token: Renewal token the user can use to extend the validity
+ of their account. Defaults to no token.
+ """
+
+ def set_account_validity_for_user_txn(txn):
+ self.db_pool.simple_update_txn(
+ txn=txn,
+ table="account_validity",
+ keyvalues={"user_id": user_id},
+ updatevalues={
+ "expiration_ts_ms": expiration_ts,
+ "email_sent": email_sent,
+ "renewal_token": renewal_token,
+ },
+ )
+ self._invalidate_cache_and_stream(
+ txn, self.get_expiration_ts_for_user, (user_id,)
+ )
+
+ await self.db_pool.runInteraction(
+ "set_account_validity_for_user", set_account_validity_for_user_txn
+ )
+
+ async def set_renewal_token_for_user(
+ self, user_id: str, renewal_token: str
+ ) -> None:
+ """Defines a renewal token for a given user.
+
+ Args:
+ user_id: ID of the user to set the renewal token for.
+ renewal_token: Random unique string that will be used to renew the
+ user's account.
+
+ Raises:
+ StoreError: The provided token is already set for another user.
+ """
+ await self.db_pool.simple_update_one(
+ table="account_validity",
+ keyvalues={"user_id": user_id},
+ updatevalues={"renewal_token": renewal_token},
+ desc="set_renewal_token_for_user",
+ )
+
+ async def get_user_from_renewal_token(self, renewal_token: str) -> str:
+ """Get a user ID from a renewal token.
+
+ Args:
+ renewal_token: The renewal token to perform the lookup with.
+
+ Returns:
+ The ID of the user to which the token belongs.
+ """
+ return await self.db_pool.simple_select_one_onecol(
+ table="account_validity",
+ keyvalues={"renewal_token": renewal_token},
+ retcol="user_id",
+ desc="get_user_from_renewal_token",
+ )
+
+ async def get_renewal_token_for_user(self, user_id: str) -> str:
+ """Get the renewal token associated with a given user ID.
+
+ Args:
+ user_id: The user ID to lookup a token for.
+
+ Returns:
+ The renewal token associated with this user ID.
+ """
+ return await self.db_pool.simple_select_one_onecol(
+ table="account_validity",
+ keyvalues={"user_id": user_id},
+ retcol="renewal_token",
+ desc="get_renewal_token_for_user",
+ )
+
+ async def get_users_expiring_soon(self) -> List[Dict[str, int]]:
+ """Selects users whose account will expire in the [now, now + renew_at] time
+ window (see configuration for account_validity for information on what renew_at
+ refers to).
+
+ Returns:
+ A list of dictionaries mapping user ID to expiration time (in milliseconds).
+ """
+
+ def select_users_txn(txn, now_ms, renew_at):
+ sql = (
+ "SELECT user_id, expiration_ts_ms FROM account_validity"
+ " WHERE email_sent = ? AND (expiration_ts_ms - ?) <= ?"
+ )
+ values = [False, now_ms, renew_at]
+ txn.execute(sql, values)
+ return self.db_pool.cursor_to_dict(txn)
+
+ return await self.db_pool.runInteraction(
+ "get_users_expiring_soon",
+ select_users_txn,
+ self.clock.time_msec(),
+ self.config.account_validity.renew_at,
+ )
+
+ async def set_renewal_mail_status(self, user_id: str, email_sent: bool) -> None:
+ """Sets or unsets the flag that indicates whether a renewal email has been sent
+ to the user (and the user hasn't renewed their account yet).
+
+ Args:
+ user_id: ID of the user to set/unset the flag for.
+ email_sent: Flag which indicates whether a renewal email has been sent
+ to this user.
+ """
+ await self.db_pool.simple_update_one(
+ table="account_validity",
+ keyvalues={"user_id": user_id},
+ updatevalues={"email_sent": email_sent},
+ desc="set_renewal_mail_status",
+ )
+
+ async def delete_account_validity_for_user(self, user_id: str) -> None:
+ """Deletes the entry for the given user in the account validity table, removing
+ their expiration date and renewal token.
+
+ Args:
+ user_id: ID of the user to remove from the account validity table.
+ """
+ await self.db_pool.simple_delete_one(
+ table="account_validity",
+ keyvalues={"user_id": user_id},
+ desc="delete_account_validity_for_user",
+ )
+
+ async def is_server_admin(self, user: UserID) -> bool:
+ """Determines if a user is an admin of this homeserver.
+
+ Args:
+ user: user ID of the user to test
+
+ Returns:
+ true iff the user is a server admin, false otherwise.
+ """
+ res = await self.db_pool.simple_select_one_onecol(
+ table="users",
+ keyvalues={"name": user.to_string()},
+ retcol="admin",
+ allow_none=True,
+ desc="is_server_admin",
+ )
+
+ return bool(res) if res else False
+
+ def set_server_admin(self, user, admin):
+ """Sets whether a user is an admin of this homeserver.
+
+ Args:
+ user (UserID): user ID of the user to test
+ admin (bool): true iff the user is to be a server admin,
+ false otherwise.
+ """
+
+ def set_server_admin_txn(txn):
+ self.db_pool.simple_update_one_txn(
+ txn, "users", {"name": user.to_string()}, {"admin": 1 if admin else 0}
+ )
+ self._invalidate_cache_and_stream(
+ txn, self.get_user_by_id, (user.to_string(),)
+ )
+
+ return self.db_pool.runInteraction("set_server_admin", set_server_admin_txn)
+
+ def _query_for_auth(self, txn, token):
+ sql = (
+ "SELECT users.name, users.is_guest, access_tokens.id as token_id,"
+ " access_tokens.device_id, access_tokens.valid_until_ms"
+ " FROM users"
+ " INNER JOIN access_tokens on users.name = access_tokens.user_id"
+ " WHERE token = ?"
+ )
+
+ txn.execute(sql, (token,))
+ rows = self.db_pool.cursor_to_dict(txn)
+ if rows:
+ return rows[0]
+
+ return None
+
+ @cached()
+ async def is_real_user(self, user_id: str) -> bool:
+ """Determines if the user is a real user, ie does not have a 'user_type'.
+
+ Args:
+ user_id: user id to test
+
+ Returns:
+ True if user 'user_type' is null or empty string
+ """
+ return await self.db_pool.runInteraction(
+ "is_real_user", self.is_real_user_txn, user_id
+ )
+
+ @cached()
+ async def is_support_user(self, user_id: str) -> bool:
+ """Determines if the user is of type UserTypes.SUPPORT
+
+ Args:
+ user_id: user id to test
+
+ Returns:
+ True if user is of type UserTypes.SUPPORT
+ """
+ return await self.db_pool.runInteraction(
+ "is_support_user", self.is_support_user_txn, user_id
+ )
+
+ def is_real_user_txn(self, txn, user_id):
+ res = self.db_pool.simple_select_one_onecol_txn(
+ txn=txn,
+ table="users",
+ keyvalues={"name": user_id},
+ retcol="user_type",
+ allow_none=True,
+ )
+ return res is None
+
+ def is_support_user_txn(self, txn, user_id):
+ res = self.db_pool.simple_select_one_onecol_txn(
+ txn=txn,
+ table="users",
+ keyvalues={"name": user_id},
+ retcol="user_type",
+ allow_none=True,
+ )
+ return True if res == UserTypes.SUPPORT else False
+
+ def get_users_by_id_case_insensitive(self, user_id):
+ """Gets users that match user_id case insensitively.
+ Returns a mapping of user_id -> password_hash.
+ """
+
+ def f(txn):
+ sql = "SELECT name, password_hash FROM users WHERE lower(name) = lower(?)"
+ txn.execute(sql, (user_id,))
+ return dict(txn)
+
+ return self.db_pool.runInteraction("get_users_by_id_case_insensitive", f)
+
+ async def get_user_by_external_id(
+ self, auth_provider: str, external_id: str
+ ) -> str:
+ """Look up a user by their external auth id
+
+ Args:
+ auth_provider: identifier for the remote auth provider
+ external_id: id on that system
+
+ Returns:
+ str|None: the mxid of the user, or None if they are not known
+ """
+ return await self.db_pool.simple_select_one_onecol(
+ table="user_external_ids",
+ keyvalues={"auth_provider": auth_provider, "external_id": external_id},
+ retcol="user_id",
+ allow_none=True,
+ desc="get_user_by_external_id",
+ )
+
+ async def count_all_users(self):
+ """Counts all users registered on the homeserver."""
+
+ def _count_users(txn):
+ txn.execute("SELECT COUNT(*) AS users FROM users")
+ rows = self.db_pool.cursor_to_dict(txn)
+ if rows:
+ return rows[0]["users"]
+ return 0
+
+ return await self.db_pool.runInteraction("count_users", _count_users)
+
+ def count_daily_user_type(self):
+ """
+ Counts 1) native non guest users
+ 2) native guests users
+ 3) bridged users
+ who registered on the homeserver in the past 24 hours
+ """
+
+ def _count_daily_user_type(txn):
+ yesterday = int(self._clock.time()) - (60 * 60 * 24)
+
+ sql = """
+ SELECT user_type, COALESCE(count(*), 0) AS count FROM (
+ SELECT
+ CASE
+ WHEN is_guest=0 AND appservice_id IS NULL THEN 'native'
+ WHEN is_guest=1 AND appservice_id IS NULL THEN 'guest'
+ WHEN is_guest=0 AND appservice_id IS NOT NULL THEN 'bridged'
+ END AS user_type
+ FROM users
+ WHERE creation_ts > ?
+ ) AS t GROUP BY user_type
+ """
+ results = {"native": 0, "guest": 0, "bridged": 0}
+ txn.execute(sql, (yesterday,))
+ for row in txn:
+ results[row[0]] = row[1]
+ return results
+
+ return self.db_pool.runInteraction(
+ "count_daily_user_type", _count_daily_user_type
+ )
+
+ async def count_nonbridged_users(self):
+ def _count_users(txn):
+ txn.execute(
+ """
+ SELECT COALESCE(COUNT(*), 0) FROM users
+ WHERE appservice_id IS NULL
+ """
+ )
+ (count,) = txn.fetchone()
+ return count
+
+ return await self.db_pool.runInteraction("count_users", _count_users)
+
+ async def count_real_users(self):
+ """Counts all users without a special user_type registered on the homeserver."""
+
+ def _count_users(txn):
+ txn.execute("SELECT COUNT(*) AS users FROM users where user_type is null")
+ rows = self.db_pool.cursor_to_dict(txn)
+ if rows:
+ return rows[0]["users"]
+ return 0
+
+ return await self.db_pool.runInteraction("count_real_users", _count_users)
+
+ async def generate_user_id(self) -> str:
+ """Generate a suitable localpart for a guest user
+
+ Returns: a (hopefully) free localpart
+ """
+ next_id = await self.db_pool.runInteraction(
+ "generate_user_id", self._user_id_seq.get_next_id_txn
+ )
+
+ return str(next_id)
+
+ async def get_user_id_by_threepid(self, medium: str, address: str) -> Optional[str]:
+ """Returns user id from threepid
+
+ Args:
+ medium: threepid medium e.g. email
+ address: threepid address e.g. me@example.com
+
+ Returns:
+ The user ID or None if no user id/threepid mapping exists
+ """
+ user_id = await self.db_pool.runInteraction(
+ "get_user_id_by_threepid", self.get_user_id_by_threepid_txn, medium, address
+ )
+ return user_id
+
+ def get_user_id_by_threepid_txn(self, txn, medium, address):
+ """Returns user id from threepid
+
+ Args:
+ txn (cursor):
+ medium (str): threepid medium e.g. email
+ address (str): threepid address e.g. me@example.com
+
+ Returns:
+ str|None: user id or None if no user id/threepid mapping exists
+ """
+ ret = self.db_pool.simple_select_one_txn(
+ txn,
+ "user_threepids",
+ {"medium": medium, "address": address},
+ ["user_id"],
+ True,
+ )
+ if ret:
+ return ret["user_id"]
+ return None
+
+ async def user_add_threepid(self, user_id, medium, address, validated_at, added_at):
+ await self.db_pool.simple_upsert(
+ "user_threepids",
+ {"medium": medium, "address": address},
+ {"user_id": user_id, "validated_at": validated_at, "added_at": added_at},
+ )
+
+ async def user_get_threepids(self, user_id):
+ return await self.db_pool.simple_select_list(
+ "user_threepids",
+ {"user_id": user_id},
+ ["medium", "address", "validated_at", "added_at"],
+ "user_get_threepids",
+ )
+
+ def user_delete_threepid(self, user_id, medium, address):
+ return self.db_pool.simple_delete(
+ "user_threepids",
+ keyvalues={"user_id": user_id, "medium": medium, "address": address},
+ desc="user_delete_threepid",
+ )
+
+ def user_delete_threepids(self, user_id: str):
+ """Delete all threepid this user has bound
+
+ Args:
+ user_id: The user id to delete all threepids of
+
+ """
+ return self.db_pool.simple_delete(
+ "user_threepids",
+ keyvalues={"user_id": user_id},
+ desc="user_delete_threepids",
+ )
+
+ def add_user_bound_threepid(self, user_id, medium, address, id_server):
+ """The server proxied a bind request to the given identity server on
+ behalf of the given user. We need to remember this in case the user
+ asks us to unbind the threepid.
+
+ Args:
+ user_id (str)
+ medium (str)
+ address (str)
+ id_server (str)
+
+ Returns:
+ Deferred
+ """
+ # We need to use an upsert, in case they user had already bound the
+ # threepid
+ return self.db_pool.simple_upsert(
+ table="user_threepid_id_server",
+ keyvalues={
+ "user_id": user_id,
+ "medium": medium,
+ "address": address,
+ "id_server": id_server,
+ },
+ values={},
+ insertion_values={},
+ desc="add_user_bound_threepid",
+ )
+
+ def user_get_bound_threepids(self, user_id):
+ """Get the threepids that a user has bound to an identity server through the homeserver
+ The homeserver remembers where binds to an identity server occurred. Using this
+ method can retrieve those threepids.
+
+ Args:
+ user_id (str): The ID of the user to retrieve threepids for
+
+ Returns:
+ Deferred[list[dict]]: List of dictionaries containing the following:
+ medium (str): The medium of the threepid (e.g "email")
+ address (str): The address of the threepid (e.g "bob@example.com")
+ """
+ return self.db_pool.simple_select_list(
+ table="user_threepid_id_server",
+ keyvalues={"user_id": user_id},
+ retcols=["medium", "address"],
+ desc="user_get_bound_threepids",
+ )
+
+ def remove_user_bound_threepid(self, user_id, medium, address, id_server):
+ """The server proxied an unbind request to the given identity server on
+ behalf of the given user, so we remove the mapping of threepid to
+ identity server.
+
+ Args:
+ user_id (str)
+ medium (str)
+ address (str)
+ id_server (str)
+
+ Returns:
+ Deferred
+ """
+ return self.db_pool.simple_delete(
+ table="user_threepid_id_server",
+ keyvalues={
+ "user_id": user_id,
+ "medium": medium,
+ "address": address,
+ "id_server": id_server,
+ },
+ desc="remove_user_bound_threepid",
+ )
+
+ def get_id_servers_user_bound(self, user_id, medium, address):
+ """Get the list of identity servers that the server proxied bind
+ requests to for given user and threepid
+
+ Args:
+ user_id (str)
+ medium (str)
+ address (str)
+
+ Returns:
+ Deferred[list[str]]: Resolves to a list of identity servers
+ """
+ return self.db_pool.simple_select_onecol(
+ table="user_threepid_id_server",
+ keyvalues={"user_id": user_id, "medium": medium, "address": address},
+ retcol="id_server",
+ desc="get_id_servers_user_bound",
+ )
+
+ @cached()
+ async def get_user_deactivated_status(self, user_id: str) -> bool:
+ """Retrieve the value for the `deactivated` property for the provided user.
+
+ Args:
+ user_id: The ID of the user to retrieve the status for.
+
+ Returns:
+ True if the user was deactivated, false if the user is still active.
+ """
+
+ res = await self.db_pool.simple_select_one_onecol(
+ table="users",
+ keyvalues={"name": user_id},
+ retcol="deactivated",
+ desc="get_user_deactivated_status",
+ )
+
+ # Convert the integer into a boolean.
+ return res == 1
+
+ def get_threepid_validation_session(
+ self, medium, client_secret, address=None, sid=None, validated=True
+ ):
+ """Gets a session_id and last_send_attempt (if available) for a
+ combination of validation metadata
+
+ Args:
+ medium (str|None): The medium of the 3PID
+ address (str|None): The address of the 3PID
+ sid (str|None): The ID of the validation session
+ client_secret (str): A unique string provided by the client to help identify this
+ validation attempt
+ validated (bool|None): Whether sessions should be filtered by
+ whether they have been validated already or not. None to
+ perform no filtering
+
+ Returns:
+ Deferred[dict|None]: A dict containing the following:
+ * address - address of the 3pid
+ * medium - medium of the 3pid
+ * client_secret - a secret provided by the client for this validation session
+ * session_id - ID of the validation session
+ * send_attempt - a number serving to dedupe send attempts for this session
+ * validated_at - timestamp of when this session was validated if so
+
+ Otherwise None if a validation session is not found
+ """
+ if not client_secret:
+ raise SynapseError(
+ 400, "Missing parameter: client_secret", errcode=Codes.MISSING_PARAM
+ )
+
+ keyvalues = {"client_secret": client_secret}
+ if medium:
+ keyvalues["medium"] = medium
+ if address:
+ keyvalues["address"] = address
+ if sid:
+ keyvalues["session_id"] = sid
+
+ assert address or sid
+
+ def get_threepid_validation_session_txn(txn):
+ sql = """
+ SELECT address, session_id, medium, client_secret,
+ last_send_attempt, validated_at
+ FROM threepid_validation_session WHERE %s
+ """ % (
+ " AND ".join("%s = ?" % k for k in keyvalues.keys()),
+ )
+
+ if validated is not None:
+ sql += " AND validated_at IS " + ("NOT NULL" if validated else "NULL")
+
+ sql += " LIMIT 1"
+
+ txn.execute(sql, list(keyvalues.values()))
+ rows = self.db_pool.cursor_to_dict(txn)
+ if not rows:
+ return None
+
+ return rows[0]
+
+ return self.db_pool.runInteraction(
+ "get_threepid_validation_session", get_threepid_validation_session_txn
+ )
+
+ def delete_threepid_session(self, session_id):
+ """Removes a threepid validation session from the database. This can
+ be done after validation has been performed and whatever action was
+ waiting on it has been carried out
+
+ Args:
+ session_id (str): The ID of the session to delete
+ """
+
+ def delete_threepid_session_txn(txn):
+ self.db_pool.simple_delete_txn(
+ txn,
+ table="threepid_validation_token",
+ keyvalues={"session_id": session_id},
+ )
+ self.db_pool.simple_delete_txn(
+ txn,
+ table="threepid_validation_session",
+ keyvalues={"session_id": session_id},
+ )
+
+ return self.db_pool.runInteraction(
+ "delete_threepid_session", delete_threepid_session_txn
+ )
+
+
+class RegistrationBackgroundUpdateStore(RegistrationWorkerStore):
+ def __init__(self, database: DatabasePool, db_conn, hs):
+ super(RegistrationBackgroundUpdateStore, self).__init__(database, db_conn, hs)
+
+ self.clock = hs.get_clock()
+ self.config = hs.config
+
+ self.db_pool.updates.register_background_index_update(
+ "access_tokens_device_index",
+ index_name="access_tokens_device_id",
+ table="access_tokens",
+ columns=["user_id", "device_id"],
+ )
+
+ self.db_pool.updates.register_background_index_update(
+ "users_creation_ts",
+ index_name="users_creation_ts",
+ table="users",
+ columns=["creation_ts"],
+ )
+
+ # we no longer use refresh tokens, but it's possible that some people
+ # might have a background update queued to build this index. Just
+ # clear the background update.
+ self.db_pool.updates.register_noop_background_update(
+ "refresh_tokens_device_index"
+ )
+
+ self.db_pool.updates.register_background_update_handler(
+ "user_threepids_grandfather", self._bg_user_threepids_grandfather
+ )
+
+ self.db_pool.updates.register_background_update_handler(
+ "users_set_deactivated_flag", self._background_update_set_deactivated_flag
+ )
+
+ async def _background_update_set_deactivated_flag(self, progress, batch_size):
+ """Retrieves a list of all deactivated users and sets the 'deactivated' flag to 1
+ for each of them.
+ """
+
+ last_user = progress.get("user_id", "")
+
+ def _background_update_set_deactivated_flag_txn(txn):
+ txn.execute(
+ """
+ SELECT
+ users.name,
+ COUNT(access_tokens.token) AS count_tokens,
+ COUNT(user_threepids.address) AS count_threepids
+ FROM users
+ LEFT JOIN access_tokens ON (access_tokens.user_id = users.name)
+ LEFT JOIN user_threepids ON (user_threepids.user_id = users.name)
+ WHERE (users.password_hash IS NULL OR users.password_hash = '')
+ AND (users.appservice_id IS NULL OR users.appservice_id = '')
+ AND users.is_guest = 0
+ AND users.name > ?
+ GROUP BY users.name
+ ORDER BY users.name ASC
+ LIMIT ?;
+ """,
+ (last_user, batch_size),
+ )
+
+ rows = self.db_pool.cursor_to_dict(txn)
+
+ if not rows:
+ return True, 0
+
+ rows_processed_nb = 0
+
+ for user in rows:
+ if not user["count_tokens"] and not user["count_threepids"]:
+ self.set_user_deactivated_status_txn(txn, user["name"], True)
+ rows_processed_nb += 1
+
+ logger.info("Marked %d rows as deactivated", rows_processed_nb)
+
+ self.db_pool.updates._background_update_progress_txn(
+ txn, "users_set_deactivated_flag", {"user_id": rows[-1]["name"]}
+ )
+
+ if batch_size > len(rows):
+ return True, len(rows)
+ else:
+ return False, len(rows)
+
+ end, nb_processed = await self.db_pool.runInteraction(
+ "users_set_deactivated_flag", _background_update_set_deactivated_flag_txn
+ )
+
+ if end:
+ await self.db_pool.updates._end_background_update(
+ "users_set_deactivated_flag"
+ )
+
+ return nb_processed
+
+ async def _bg_user_threepids_grandfather(self, progress, batch_size):
+ """We now track which identity servers a user binds their 3PID to, so
+ we need to handle the case of existing bindings where we didn't track
+ this.
+
+ We do this by grandfathering in existing user threepids assuming that
+ they used one of the server configured trusted identity servers.
+ """
+ id_servers = set(self.config.trusted_third_party_id_servers)
+
+ def _bg_user_threepids_grandfather_txn(txn):
+ sql = """
+ INSERT INTO user_threepid_id_server
+ (user_id, medium, address, id_server)
+ SELECT user_id, medium, address, ?
+ FROM user_threepids
+ """
+
+ txn.executemany(sql, [(id_server,) for id_server in id_servers])
+
+ if id_servers:
+ await self.db_pool.runInteraction(
+ "_bg_user_threepids_grandfather", _bg_user_threepids_grandfather_txn
+ )
+
+ await self.db_pool.updates._end_background_update("user_threepids_grandfather")
+
+ return 1
+
+
+class RegistrationStore(RegistrationBackgroundUpdateStore):
+ def __init__(self, database: DatabasePool, db_conn, hs):
+ super(RegistrationStore, self).__init__(database, db_conn, hs)
+
+ self._account_validity = hs.config.account_validity
+
+ if self._account_validity.enabled:
+ self._clock.call_later(
+ 0.0,
+ run_as_background_process,
+ "account_validity_set_expiration_dates",
+ self._set_expiration_date_when_missing,
+ )
+
+ # Create a background job for culling expired 3PID validity tokens
+ def start_cull():
+ # run as a background process to make sure that the database transactions
+ # have a logcontext to report to
+ return run_as_background_process(
+ "cull_expired_threepid_validation_tokens",
+ self.cull_expired_threepid_validation_tokens,
+ )
+
+ hs.get_clock().looping_call(start_cull, THIRTY_MINUTES_IN_MS)
+
+ async def add_access_token_to_user(
+ self,
+ user_id: str,
+ token: str,
+ device_id: Optional[str],
+ valid_until_ms: Optional[int],
+ ) -> None:
+ """Adds an access token for the given user.
+
+ Args:
+ user_id: The user ID.
+ token: The new access token to add.
+ device_id: ID of the device to associate with the access token
+ valid_until_ms: when the token is valid until. None for no expiry.
+ Raises:
+ StoreError if there was a problem adding this.
+ """
+ next_id = self._access_tokens_id_gen.get_next()
+
+ await self.db_pool.simple_insert(
+ "access_tokens",
+ {
+ "id": next_id,
+ "user_id": user_id,
+ "token": token,
+ "device_id": device_id,
+ "valid_until_ms": valid_until_ms,
+ },
+ desc="add_access_token_to_user",
+ )
+
+ def register_user(
+ self,
+ user_id,
+ password_hash=None,
+ was_guest=False,
+ make_guest=False,
+ appservice_id=None,
+ create_profile_with_displayname=None,
+ admin=False,
+ user_type=None,
+ ):
+ """Attempts to register an account.
+
+ Args:
+ user_id (str): The desired user ID to register.
+ password_hash (str|None): Optional. The password hash for this user.
+ was_guest (bool): Optional. Whether this is a guest account being
+ upgraded to a non-guest account.
+ make_guest (boolean): True if the the new user should be guest,
+ false to add a regular user account.
+ appservice_id (str): The ID of the appservice registering the user.
+ create_profile_with_displayname (unicode): Optionally create a profile for
+ the user, setting their displayname to the given value
+ admin (boolean): is an admin user?
+ user_type (str|None): type of user. One of the values from
+ api.constants.UserTypes, or None for a normal user.
+
+ Raises:
+ StoreError if the user_id could not be registered.
+
+ Returns:
+ Deferred
+ """
+ return self.db_pool.runInteraction(
+ "register_user",
+ self._register_user,
+ user_id,
+ password_hash,
+ was_guest,
+ make_guest,
+ appservice_id,
+ create_profile_with_displayname,
+ admin,
+ user_type,
+ )
+
+ def _register_user(
+ self,
+ txn,
+ user_id,
+ password_hash,
+ was_guest,
+ make_guest,
+ appservice_id,
+ create_profile_with_displayname,
+ admin,
+ user_type,
+ ):
+ user_id_obj = UserID.from_string(user_id)
+
+ now = int(self.clock.time())
+
+ try:
+ if was_guest:
+ # Ensure that the guest user actually exists
+ # ``allow_none=False`` makes this raise an exception
+ # if the row isn't in the database.
+ self.db_pool.simple_select_one_txn(
+ txn,
+ "users",
+ keyvalues={"name": user_id, "is_guest": 1},
+ retcols=("name",),
+ allow_none=False,
+ )
+
+ self.db_pool.simple_update_one_txn(
+ txn,
+ "users",
+ keyvalues={"name": user_id, "is_guest": 1},
+ updatevalues={
+ "password_hash": password_hash,
+ "upgrade_ts": now,
+ "is_guest": 1 if make_guest else 0,
+ "appservice_id": appservice_id,
+ "admin": 1 if admin else 0,
+ "user_type": user_type,
+ },
+ )
+ else:
+ self.db_pool.simple_insert_txn(
+ txn,
+ "users",
+ values={
+ "name": user_id,
+ "password_hash": password_hash,
+ "creation_ts": now,
+ "is_guest": 1 if make_guest else 0,
+ "appservice_id": appservice_id,
+ "admin": 1 if admin else 0,
+ "user_type": user_type,
+ },
+ )
+
+ except self.database_engine.module.IntegrityError:
+ raise StoreError(400, "User ID already taken.", errcode=Codes.USER_IN_USE)
+
+ if self._account_validity.enabled:
+ self.set_expiration_date_for_user_txn(txn, user_id)
+
+ if create_profile_with_displayname:
+ # set a default displayname serverside to avoid ugly race
+ # between auto-joins and clients trying to set displaynames
+ #
+ # *obviously* the 'profiles' table uses localpart for user_id
+ # while everything else uses the full mxid.
+ txn.execute(
+ "INSERT INTO profiles(user_id, displayname) VALUES (?,?)",
+ (user_id_obj.localpart, create_profile_with_displayname),
+ )
+
+ if self.hs.config.stats_enabled:
+ # we create a new completed user statistics row
+
+ # we don't strictly need current_token since this user really can't
+ # have any state deltas before now (as it is a new user), but still,
+ # we include it for completeness.
+ current_token = self._get_max_stream_id_in_current_state_deltas_txn(txn)
+ self._update_stats_delta_txn(
+ txn, now, "user", user_id, {}, complete_with_stream_id=current_token
+ )
+
+ self._invalidate_cache_and_stream(txn, self.get_user_by_id, (user_id,))
+
+ def record_user_external_id(
+ self, auth_provider: str, external_id: str, user_id: str
+ ) -> Deferred:
+ """Record a mapping from an external user id to a mxid
+
+ Args:
+ auth_provider: identifier for the remote auth provider
+ external_id: id on that system
+ user_id: complete mxid that it is mapped to
+ """
+ return self.db_pool.simple_insert(
+ table="user_external_ids",
+ values={
+ "auth_provider": auth_provider,
+ "external_id": external_id,
+ "user_id": user_id,
+ },
+ desc="record_user_external_id",
+ )
+
+ def user_set_password_hash(self, user_id, password_hash):
+ """
+ NB. This does *not* evict any cache because the one use for this
+ removes most of the entries subsequently anyway so it would be
+ pointless. Use flush_user separately.
+ """
+
+ def user_set_password_hash_txn(txn):
+ self.db_pool.simple_update_one_txn(
+ txn, "users", {"name": user_id}, {"password_hash": password_hash}
+ )
+ self._invalidate_cache_and_stream(txn, self.get_user_by_id, (user_id,))
+
+ return self.db_pool.runInteraction(
+ "user_set_password_hash", user_set_password_hash_txn
+ )
+
+ def user_set_consent_version(self, user_id, consent_version):
+ """Updates the user table to record privacy policy consent
+
+ Args:
+ user_id (str): full mxid of the user to update
+ consent_version (str): version of the policy the user has consented
+ to
+
+ Raises:
+ StoreError(404) if user not found
+ """
+
+ def f(txn):
+ self.db_pool.simple_update_one_txn(
+ txn,
+ table="users",
+ keyvalues={"name": user_id},
+ updatevalues={"consent_version": consent_version},
+ )
+ self._invalidate_cache_and_stream(txn, self.get_user_by_id, (user_id,))
+
+ return self.db_pool.runInteraction("user_set_consent_version", f)
+
+ def user_set_consent_server_notice_sent(self, user_id, consent_version):
+ """Updates the user table to record that we have sent the user a server
+ notice about privacy policy consent
+
+ Args:
+ user_id (str): full mxid of the user to update
+ consent_version (str): version of the policy we have notified the
+ user about
+
+ Raises:
+ StoreError(404) if user not found
+ """
+
+ def f(txn):
+ self.db_pool.simple_update_one_txn(
+ txn,
+ table="users",
+ keyvalues={"name": user_id},
+ updatevalues={"consent_server_notice_sent": consent_version},
+ )
+ self._invalidate_cache_and_stream(txn, self.get_user_by_id, (user_id,))
+
+ return self.db_pool.runInteraction("user_set_consent_server_notice_sent", f)
+
+ def user_delete_access_tokens(self, user_id, except_token_id=None, device_id=None):
+ """
+ Invalidate access tokens belonging to a user
+
+ Args:
+ user_id (str): ID of user the tokens belong to
+ except_token_id (str): list of access_tokens IDs which should
+ *not* be deleted
+ device_id (str|None): ID of device the tokens are associated with.
+ If None, tokens associated with any device (or no device) will
+ be deleted
+ Returns:
+ defer.Deferred[list[str, int, str|None, int]]: a list of
+ (token, token id, device id) for each of the deleted tokens
+ """
+
+ def f(txn):
+ keyvalues = {"user_id": user_id}
+ if device_id is not None:
+ keyvalues["device_id"] = device_id
+
+ items = keyvalues.items()
+ where_clause = " AND ".join(k + " = ?" for k, _ in items)
+ values = [v for _, v in items]
+ if except_token_id:
+ where_clause += " AND id != ?"
+ values.append(except_token_id)
+
+ txn.execute(
+ "SELECT token, id, device_id FROM access_tokens WHERE %s"
+ % where_clause,
+ values,
+ )
+ tokens_and_devices = [(r[0], r[1], r[2]) for r in txn]
+
+ for token, _, _ in tokens_and_devices:
+ self._invalidate_cache_and_stream(
+ txn, self.get_user_by_access_token, (token,)
+ )
+
+ txn.execute("DELETE FROM access_tokens WHERE %s" % where_clause, values)
+
+ return tokens_and_devices
+
+ return self.db_pool.runInteraction("user_delete_access_tokens", f)
+
+ def delete_access_token(self, access_token):
+ def f(txn):
+ self.db_pool.simple_delete_one_txn(
+ txn, table="access_tokens", keyvalues={"token": access_token}
+ )
+
+ self._invalidate_cache_and_stream(
+ txn, self.get_user_by_access_token, (access_token,)
+ )
+
+ return self.db_pool.runInteraction("delete_access_token", f)
+
+ @cached()
+ async def is_guest(self, user_id: str) -> bool:
+ res = await self.db_pool.simple_select_one_onecol(
+ table="users",
+ keyvalues={"name": user_id},
+ retcol="is_guest",
+ allow_none=True,
+ desc="is_guest",
+ )
+
+ return res if res else False
+
+ def add_user_pending_deactivation(self, user_id):
+ """
+ Adds a user to the table of users who need to be parted from all the rooms they're
+ in
+ """
+ return self.db_pool.simple_insert(
+ "users_pending_deactivation",
+ values={"user_id": user_id},
+ desc="add_user_pending_deactivation",
+ )
+
+ def del_user_pending_deactivation(self, user_id):
+ """
+ Removes the given user to the table of users who need to be parted from all the
+ rooms they're in, effectively marking that user as fully deactivated.
+ """
+ # XXX: This should be simple_delete_one but we failed to put a unique index on
+ # the table, so somehow duplicate entries have ended up in it.
+ return self.db_pool.simple_delete(
+ "users_pending_deactivation",
+ keyvalues={"user_id": user_id},
+ desc="del_user_pending_deactivation",
+ )
+
+ def get_user_pending_deactivation(self):
+ """
+ Gets one user from the table of users waiting to be parted from all the rooms
+ they're in.
+ """
+ return self.db_pool.simple_select_one_onecol(
+ "users_pending_deactivation",
+ keyvalues={},
+ retcol="user_id",
+ allow_none=True,
+ desc="get_users_pending_deactivation",
+ )
+
+ def validate_threepid_session(self, session_id, client_secret, token, current_ts):
+ """Attempt to validate a threepid session using a token
+
+ Args:
+ session_id (str): The id of a validation session
+ client_secret (str): A unique string provided by the client to
+ help identify this validation attempt
+ token (str): A validation token
+ current_ts (int): The current unix time in milliseconds. Used for
+ checking token expiry status
+
+ Raises:
+ ThreepidValidationError: if a matching validation token was not found or has
+ expired
+
+ Returns:
+ deferred str|None: A str representing a link to redirect the user
+ to if there is one.
+ """
+
+ # Insert everything into a transaction in order to run atomically
+ def validate_threepid_session_txn(txn):
+ row = self.db_pool.simple_select_one_txn(
+ txn,
+ table="threepid_validation_session",
+ keyvalues={"session_id": session_id},
+ retcols=["client_secret", "validated_at"],
+ allow_none=True,
+ )
+
+ if not row:
+ raise ThreepidValidationError(400, "Unknown session_id")
+ retrieved_client_secret = row["client_secret"]
+ validated_at = row["validated_at"]
+
+ if retrieved_client_secret != client_secret:
+ raise ThreepidValidationError(
+ 400, "This client_secret does not match the provided session_id"
+ )
+
+ row = self.db_pool.simple_select_one_txn(
+ txn,
+ table="threepid_validation_token",
+ keyvalues={"session_id": session_id, "token": token},
+ retcols=["expires", "next_link"],
+ allow_none=True,
+ )
+
+ if not row:
+ raise ThreepidValidationError(
+ 400, "Validation token not found or has expired"
+ )
+ expires = row["expires"]
+ next_link = row["next_link"]
+
+ # If the session is already validated, no need to revalidate
+ if validated_at:
+ return next_link
+
+ if expires <= current_ts:
+ raise ThreepidValidationError(
+ 400, "This token has expired. Please request a new one"
+ )
+
+ # Looks good. Validate the session
+ self.db_pool.simple_update_txn(
+ txn,
+ table="threepid_validation_session",
+ keyvalues={"session_id": session_id},
+ updatevalues={"validated_at": self.clock.time_msec()},
+ )
+
+ return next_link
+
+ # Return next_link if it exists
+ return self.db_pool.runInteraction(
+ "validate_threepid_session_txn", validate_threepid_session_txn
+ )
+
+ def upsert_threepid_validation_session(
+ self,
+ medium,
+ address,
+ client_secret,
+ send_attempt,
+ session_id,
+ validated_at=None,
+ ):
+ """Upsert a threepid validation session
+ Args:
+ medium (str): The medium of the 3PID
+ address (str): The address of the 3PID
+ client_secret (str): A unique string provided by the client to
+ help identify this validation attempt
+ send_attempt (int): The latest send_attempt on this session
+ session_id (str): The id of this validation session
+ validated_at (int|None): The unix timestamp in milliseconds of
+ when the session was marked as valid
+ """
+ insertion_values = {
+ "medium": medium,
+ "address": address,
+ "client_secret": client_secret,
+ }
+
+ if validated_at:
+ insertion_values["validated_at"] = validated_at
+
+ return self.db_pool.simple_upsert(
+ table="threepid_validation_session",
+ keyvalues={"session_id": session_id},
+ values={"last_send_attempt": send_attempt},
+ insertion_values=insertion_values,
+ desc="upsert_threepid_validation_session",
+ )
+
+ def start_or_continue_validation_session(
+ self,
+ medium,
+ address,
+ session_id,
+ client_secret,
+ send_attempt,
+ next_link,
+ token,
+ token_expires,
+ ):
+ """Creates a new threepid validation session if it does not already
+ exist and associates a new validation token with it
+
+ Args:
+ medium (str): The medium of the 3PID
+ address (str): The address of the 3PID
+ session_id (str): The id of this validation session
+ client_secret (str): A unique string provided by the client to
+ help identify this validation attempt
+ send_attempt (int): The latest send_attempt on this session
+ next_link (str|None): The link to redirect the user to upon
+ successful validation
+ token (str): The validation token
+ token_expires (int): The timestamp for which after the token
+ will no longer be valid
+ """
+
+ def start_or_continue_validation_session_txn(txn):
+ # Create or update a validation session
+ self.db_pool.simple_upsert_txn(
+ txn,
+ table="threepid_validation_session",
+ keyvalues={"session_id": session_id},
+ values={"last_send_attempt": send_attempt},
+ insertion_values={
+ "medium": medium,
+ "address": address,
+ "client_secret": client_secret,
+ },
+ )
+
+ # Create a new validation token with this session ID
+ self.db_pool.simple_insert_txn(
+ txn,
+ table="threepid_validation_token",
+ values={
+ "session_id": session_id,
+ "token": token,
+ "next_link": next_link,
+ "expires": token_expires,
+ },
+ )
+
+ return self.db_pool.runInteraction(
+ "start_or_continue_validation_session",
+ start_or_continue_validation_session_txn,
+ )
+
+ def cull_expired_threepid_validation_tokens(self):
+ """Remove threepid validation tokens with expiry dates that have passed"""
+
+ def cull_expired_threepid_validation_tokens_txn(txn, ts):
+ sql = """
+ DELETE FROM threepid_validation_token WHERE
+ expires < ?
+ """
+ return txn.execute(sql, (ts,))
+
+ return self.db_pool.runInteraction(
+ "cull_expired_threepid_validation_tokens",
+ cull_expired_threepid_validation_tokens_txn,
+ self.clock.time_msec(),
+ )
+
+ async def set_user_deactivated_status(
+ self, user_id: str, deactivated: bool
+ ) -> None:
+ """Set the `deactivated` property for the provided user to the provided value.
+
+ Args:
+ user_id: The ID of the user to set the status for.
+ deactivated: The value to set for `deactivated`.
+ """
+
+ await self.db_pool.runInteraction(
+ "set_user_deactivated_status",
+ self.set_user_deactivated_status_txn,
+ user_id,
+ deactivated,
+ )
+
+ def set_user_deactivated_status_txn(self, txn, user_id, deactivated):
+ self.db_pool.simple_update_one_txn(
+ txn=txn,
+ table="users",
+ keyvalues={"name": user_id},
+ updatevalues={"deactivated": 1 if deactivated else 0},
+ )
+ self._invalidate_cache_and_stream(
+ txn, self.get_user_deactivated_status, (user_id,)
+ )
+ txn.call_after(self.is_guest.invalidate, (user_id,))
+
+ async def _set_expiration_date_when_missing(self):
+ """
+ Retrieves the list of registered users that don't have an expiration date, and
+ adds an expiration date for each of them.
+ """
+
+ def select_users_with_no_expiration_date_txn(txn):
+ """Retrieves the list of registered users with no expiration date from the
+ database, filtering out deactivated users.
+ """
+ sql = (
+ "SELECT users.name FROM users"
+ " LEFT JOIN account_validity ON (users.name = account_validity.user_id)"
+ " WHERE account_validity.user_id is NULL AND users.deactivated = 0;"
+ )
+ txn.execute(sql, [])
+
+ res = self.db_pool.cursor_to_dict(txn)
+ if res:
+ for user in res:
+ self.set_expiration_date_for_user_txn(
+ txn, user["name"], use_delta=True
+ )
+
+ await self.db_pool.runInteraction(
+ "get_users_with_no_expiration_date",
+ select_users_with_no_expiration_date_txn,
+ )
+
+ def set_expiration_date_for_user_txn(self, txn, user_id, use_delta=False):
+ """Sets an expiration date to the account with the given user ID.
+
+ Args:
+ user_id (str): User ID to set an expiration date for.
+ use_delta (bool): If set to False, the expiration date for the user will be
+ now + validity period. If set to True, this expiration date will be a
+ random value in the [now + period - d ; now + period] range, d being a
+ delta equal to 10% of the validity period.
+ """
+ now_ms = self._clock.time_msec()
+ expiration_ts = now_ms + self._account_validity.period
+
+ if use_delta:
+ expiration_ts = self.rand.randrange(
+ expiration_ts - self._account_validity.startup_job_max_delta,
+ expiration_ts,
+ )
+
+ self.db_pool.simple_upsert_txn(
+ txn,
+ "account_validity",
+ keyvalues={"user_id": user_id},
+ values={"expiration_ts_ms": expiration_ts, "email_sent": False},
+ )
+
+
+def find_max_generated_user_id_localpart(cur: Cursor) -> int:
+ """
+ Gets the localpart of the max current generated user ID.
+
+ Generated user IDs are integers, so we find the largest integer user ID
+ already taken and return that.
+ """
+
+ # We bound between '@0' and '@a' to avoid pulling the entire table
+ # out.
+ cur.execute("SELECT name FROM users WHERE '@0' <= name AND name < '@a'")
+
+ regex = re.compile(r"^@(\d+):")
+
+ max_found = 0
+
+ for (user_id,) in cur:
+ match = regex.search(user_id)
+ if match:
+ max_found = max(int(match.group(1)), max_found)
+ return max_found
diff --git a/synapse/storage/databases/main/rejections.py b/synapse/storage/databases/main/rejections.py
new file mode 100644
index 00000000..cf9ba512
--- /dev/null
+++ b/synapse/storage/databases/main/rejections.py
@@ -0,0 +1,31 @@
+# -*- coding: utf-8 -*-
+# Copyright 2014-2016 OpenMarket Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import logging
+
+from synapse.storage._base import SQLBaseStore
+
+logger = logging.getLogger(__name__)
+
+
+class RejectionsStore(SQLBaseStore):
+ def get_rejection_reason(self, event_id):
+ return self.db_pool.simple_select_one_onecol(
+ table="rejections",
+ retcol="reason",
+ keyvalues={"event_id": event_id},
+ allow_none=True,
+ desc="get_rejection_reason",
+ )
diff --git a/synapse/storage/databases/main/relations.py b/synapse/storage/databases/main/relations.py
new file mode 100644
index 00000000..a9ceffc2
--- /dev/null
+++ b/synapse/storage/databases/main/relations.py
@@ -0,0 +1,328 @@
+# -*- coding: utf-8 -*-
+# Copyright 2019 New Vector Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import logging
+from typing import Optional
+
+import attr
+
+from synapse.api.constants import RelationTypes
+from synapse.events import EventBase
+from synapse.storage._base import SQLBaseStore
+from synapse.storage.databases.main.stream import generate_pagination_where_clause
+from synapse.storage.relations import (
+ AggregationPaginationToken,
+ PaginationChunk,
+ RelationPaginationToken,
+)
+from synapse.util.caches.descriptors import cached
+
+logger = logging.getLogger(__name__)
+
+
+class RelationsWorkerStore(SQLBaseStore):
+ @cached(tree=True)
+ def get_relations_for_event(
+ self,
+ event_id,
+ relation_type=None,
+ event_type=None,
+ aggregation_key=None,
+ limit=5,
+ direction="b",
+ from_token=None,
+ to_token=None,
+ ):
+ """Get a list of relations for an event, ordered by topological ordering.
+
+ Args:
+ event_id (str): Fetch events that relate to this event ID.
+ relation_type (str|None): Only fetch events with this relation
+ type, if given.
+ event_type (str|None): Only fetch events with this event type, if
+ given.
+ aggregation_key (str|None): Only fetch events with this aggregation
+ key, if given.
+ limit (int): Only fetch the most recent `limit` events.
+ direction (str): Whether to fetch the most recent first (`"b"`) or
+ the oldest first (`"f"`).
+ from_token (RelationPaginationToken|None): Fetch rows from the given
+ token, or from the start if None.
+ to_token (RelationPaginationToken|None): Fetch rows up to the given
+ token, or up to the end if None.
+
+ Returns:
+ Deferred[PaginationChunk]: List of event IDs that match relations
+ requested. The rows are of the form `{"event_id": "..."}`.
+ """
+
+ where_clause = ["relates_to_id = ?"]
+ where_args = [event_id]
+
+ if relation_type is not None:
+ where_clause.append("relation_type = ?")
+ where_args.append(relation_type)
+
+ if event_type is not None:
+ where_clause.append("type = ?")
+ where_args.append(event_type)
+
+ if aggregation_key:
+ where_clause.append("aggregation_key = ?")
+ where_args.append(aggregation_key)
+
+ pagination_clause = generate_pagination_where_clause(
+ direction=direction,
+ column_names=("topological_ordering", "stream_ordering"),
+ from_token=attr.astuple(from_token) if from_token else None,
+ to_token=attr.astuple(to_token) if to_token else None,
+ engine=self.database_engine,
+ )
+
+ if pagination_clause:
+ where_clause.append(pagination_clause)
+
+ if direction == "b":
+ order = "DESC"
+ else:
+ order = "ASC"
+
+ sql = """
+ SELECT event_id, topological_ordering, stream_ordering
+ FROM event_relations
+ INNER JOIN events USING (event_id)
+ WHERE %s
+ ORDER BY topological_ordering %s, stream_ordering %s
+ LIMIT ?
+ """ % (
+ " AND ".join(where_clause),
+ order,
+ order,
+ )
+
+ def _get_recent_references_for_event_txn(txn):
+ txn.execute(sql, where_args + [limit + 1])
+
+ last_topo_id = None
+ last_stream_id = None
+ events = []
+ for row in txn:
+ events.append({"event_id": row[0]})
+ last_topo_id = row[1]
+ last_stream_id = row[2]
+
+ next_batch = None
+ if len(events) > limit and last_topo_id and last_stream_id:
+ next_batch = RelationPaginationToken(last_topo_id, last_stream_id)
+
+ return PaginationChunk(
+ chunk=list(events[:limit]), next_batch=next_batch, prev_batch=from_token
+ )
+
+ return self.db_pool.runInteraction(
+ "get_recent_references_for_event", _get_recent_references_for_event_txn
+ )
+
+ @cached(tree=True)
+ def get_aggregation_groups_for_event(
+ self,
+ event_id,
+ event_type=None,
+ limit=5,
+ direction="b",
+ from_token=None,
+ to_token=None,
+ ):
+ """Get a list of annotations on the event, grouped by event type and
+ aggregation key, sorted by count.
+
+ This is used e.g. to get the what and how many reactions have happend
+ on an event.
+
+ Args:
+ event_id (str): Fetch events that relate to this event ID.
+ event_type (str|None): Only fetch events with this event type, if
+ given.
+ limit (int): Only fetch the `limit` groups.
+ direction (str): Whether to fetch the highest count first (`"b"`) or
+ the lowest count first (`"f"`).
+ from_token (AggregationPaginationToken|None): Fetch rows from the
+ given token, or from the start if None.
+ to_token (AggregationPaginationToken|None): Fetch rows up to the
+ given token, or up to the end if None.
+
+
+ Returns:
+ Deferred[PaginationChunk]: List of groups of annotations that
+ match. Each row is a dict with `type`, `key` and `count` fields.
+ """
+
+ where_clause = ["relates_to_id = ?", "relation_type = ?"]
+ where_args = [event_id, RelationTypes.ANNOTATION]
+
+ if event_type:
+ where_clause.append("type = ?")
+ where_args.append(event_type)
+
+ having_clause = generate_pagination_where_clause(
+ direction=direction,
+ column_names=("COUNT(*)", "MAX(stream_ordering)"),
+ from_token=attr.astuple(from_token) if from_token else None,
+ to_token=attr.astuple(to_token) if to_token else None,
+ engine=self.database_engine,
+ )
+
+ if direction == "b":
+ order = "DESC"
+ else:
+ order = "ASC"
+
+ if having_clause:
+ having_clause = "HAVING " + having_clause
+ else:
+ having_clause = ""
+
+ sql = """
+ SELECT type, aggregation_key, COUNT(DISTINCT sender), MAX(stream_ordering)
+ FROM event_relations
+ INNER JOIN events USING (event_id)
+ WHERE {where_clause}
+ GROUP BY relation_type, type, aggregation_key
+ {having_clause}
+ ORDER BY COUNT(*) {order}, MAX(stream_ordering) {order}
+ LIMIT ?
+ """.format(
+ where_clause=" AND ".join(where_clause),
+ order=order,
+ having_clause=having_clause,
+ )
+
+ def _get_aggregation_groups_for_event_txn(txn):
+ txn.execute(sql, where_args + [limit + 1])
+
+ next_batch = None
+ events = []
+ for row in txn:
+ events.append({"type": row[0], "key": row[1], "count": row[2]})
+ next_batch = AggregationPaginationToken(row[2], row[3])
+
+ if len(events) <= limit:
+ next_batch = None
+
+ return PaginationChunk(
+ chunk=list(events[:limit]), next_batch=next_batch, prev_batch=from_token
+ )
+
+ return self.db_pool.runInteraction(
+ "get_aggregation_groups_for_event", _get_aggregation_groups_for_event_txn
+ )
+
+ @cached()
+ async def get_applicable_edit(self, event_id: str) -> Optional[EventBase]:
+ """Get the most recent edit (if any) that has happened for the given
+ event.
+
+ Correctly handles checking whether edits were allowed to happen.
+
+ Args:
+ event_id: The original event ID
+
+ Returns:
+ The most recent edit, if any.
+ """
+
+ # We only allow edits for `m.room.message` events that have the same sender
+ # and event type. We can't assert these things during regular event auth so
+ # we have to do the checks post hoc.
+
+ # Fetches latest edit that has the same type and sender as the
+ # original, and is an `m.room.message`.
+ sql = """
+ SELECT edit.event_id FROM events AS edit
+ INNER JOIN event_relations USING (event_id)
+ INNER JOIN events AS original ON
+ original.event_id = relates_to_id
+ AND edit.type = original.type
+ AND edit.sender = original.sender
+ WHERE
+ relates_to_id = ?
+ AND relation_type = ?
+ AND edit.type = 'm.room.message'
+ ORDER by edit.origin_server_ts DESC, edit.event_id DESC
+ LIMIT 1
+ """
+
+ def _get_applicable_edit_txn(txn):
+ txn.execute(sql, (event_id, RelationTypes.REPLACE))
+ row = txn.fetchone()
+ if row:
+ return row[0]
+
+ edit_id = await self.db_pool.runInteraction(
+ "get_applicable_edit", _get_applicable_edit_txn
+ )
+
+ if not edit_id:
+ return None
+
+ return await self.get_event(edit_id, allow_none=True)
+
+ def has_user_annotated_event(self, parent_id, event_type, aggregation_key, sender):
+ """Check if a user has already annotated an event with the same key
+ (e.g. already liked an event).
+
+ Args:
+ parent_id (str): The event being annotated
+ event_type (str): The event type of the annotation
+ aggregation_key (str): The aggregation key of the annotation
+ sender (str): The sender of the annotation
+
+ Returns:
+ Deferred[bool]
+ """
+
+ sql = """
+ SELECT 1 FROM event_relations
+ INNER JOIN events USING (event_id)
+ WHERE
+ relates_to_id = ?
+ AND relation_type = ?
+ AND type = ?
+ AND sender = ?
+ AND aggregation_key = ?
+ LIMIT 1;
+ """
+
+ def _get_if_user_has_annotated_event(txn):
+ txn.execute(
+ sql,
+ (
+ parent_id,
+ RelationTypes.ANNOTATION,
+ event_type,
+ sender,
+ aggregation_key,
+ ),
+ )
+
+ return bool(txn.fetchone())
+
+ return self.db_pool.runInteraction(
+ "get_if_user_has_annotated_event", _get_if_user_has_annotated_event
+ )
+
+
+class RelationsStore(RelationsWorkerStore):
+ pass
diff --git a/synapse/storage/databases/main/room.py b/synapse/storage/databases/main/room.py
new file mode 100644
index 00000000..f4008e62
--- /dev/null
+++ b/synapse/storage/databases/main/room.py
@@ -0,0 +1,1429 @@
+# -*- coding: utf-8 -*-
+# Copyright 2014-2016 OpenMarket Ltd
+# Copyright 2019 The Matrix.org Foundation C.I.C.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import collections
+import logging
+import re
+from abc import abstractmethod
+from enum import Enum
+from typing import Any, Dict, List, Optional, Tuple
+
+from canonicaljson import json
+
+from synapse.api.constants import EventTypes
+from synapse.api.errors import StoreError
+from synapse.api.room_versions import RoomVersion, RoomVersions
+from synapse.storage._base import SQLBaseStore, db_to_json
+from synapse.storage.database import DatabasePool, LoggingTransaction
+from synapse.storage.databases.main.search import SearchStore
+from synapse.types import ThirdPartyInstanceID
+from synapse.util.caches.descriptors import cached
+
+logger = logging.getLogger(__name__)
+
+
+OpsLevel = collections.namedtuple(
+ "OpsLevel", ("ban_level", "kick_level", "redact_level")
+)
+
+RatelimitOverride = collections.namedtuple(
+ "RatelimitOverride", ("messages_per_second", "burst_count")
+)
+
+
+class RoomSortOrder(Enum):
+ """
+ Enum to define the sorting method used when returning rooms with get_rooms_paginate
+
+ NAME = sort rooms alphabetically by name
+ JOINED_MEMBERS = sort rooms by membership size, highest to lowest
+ """
+
+ # ALPHABETICAL and SIZE are deprecated.
+ # ALPHABETICAL is the same as NAME.
+ ALPHABETICAL = "alphabetical"
+ # SIZE is the same as JOINED_MEMBERS.
+ SIZE = "size"
+ NAME = "name"
+ CANONICAL_ALIAS = "canonical_alias"
+ JOINED_MEMBERS = "joined_members"
+ JOINED_LOCAL_MEMBERS = "joined_local_members"
+ VERSION = "version"
+ CREATOR = "creator"
+ ENCRYPTION = "encryption"
+ FEDERATABLE = "federatable"
+ PUBLIC = "public"
+ JOIN_RULES = "join_rules"
+ GUEST_ACCESS = "guest_access"
+ HISTORY_VISIBILITY = "history_visibility"
+ STATE_EVENTS = "state_events"
+
+
+class RoomWorkerStore(SQLBaseStore):
+ def __init__(self, database: DatabasePool, db_conn, hs):
+ super(RoomWorkerStore, self).__init__(database, db_conn, hs)
+
+ self.config = hs.config
+
+ def get_room(self, room_id):
+ """Retrieve a room.
+
+ Args:
+ room_id (str): The ID of the room to retrieve.
+ Returns:
+ A dict containing the room information, or None if the room is unknown.
+ """
+ return self.db_pool.simple_select_one(
+ table="rooms",
+ keyvalues={"room_id": room_id},
+ retcols=("room_id", "is_public", "creator"),
+ desc="get_room",
+ allow_none=True,
+ )
+
+ def get_room_with_stats(self, room_id: str):
+ """Retrieve room with statistics.
+
+ Args:
+ room_id: The ID of the room to retrieve.
+ Returns:
+ A dict containing the room information, or None if the room is unknown.
+ """
+
+ def get_room_with_stats_txn(txn, room_id):
+ sql = """
+ SELECT room_id, state.name, state.canonical_alias, curr.joined_members,
+ curr.local_users_in_room AS joined_local_members, rooms.room_version AS version,
+ rooms.creator, state.encryption, state.is_federatable AS federatable,
+ rooms.is_public AS public, state.join_rules, state.guest_access,
+ state.history_visibility, curr.current_state_events AS state_events
+ FROM rooms
+ LEFT JOIN room_stats_state state USING (room_id)
+ LEFT JOIN room_stats_current curr USING (room_id)
+ WHERE room_id = ?
+ """
+ txn.execute(sql, [room_id])
+ # Catch error if sql returns empty result to return "None" instead of an error
+ try:
+ res = self.db_pool.cursor_to_dict(txn)[0]
+ except IndexError:
+ return None
+
+ res["federatable"] = bool(res["federatable"])
+ res["public"] = bool(res["public"])
+ return res
+
+ return self.db_pool.runInteraction(
+ "get_room_with_stats", get_room_with_stats_txn, room_id
+ )
+
+ def get_public_room_ids(self):
+ return self.db_pool.simple_select_onecol(
+ table="rooms",
+ keyvalues={"is_public": True},
+ retcol="room_id",
+ desc="get_public_room_ids",
+ )
+
+ def count_public_rooms(self, network_tuple, ignore_non_federatable):
+ """Counts the number of public rooms as tracked in the room_stats_current
+ and room_stats_state table.
+
+ Args:
+ network_tuple (ThirdPartyInstanceID|None)
+ ignore_non_federatable (bool): If true filters out non-federatable rooms
+ """
+
+ def _count_public_rooms_txn(txn):
+ query_args = []
+
+ if network_tuple:
+ if network_tuple.appservice_id:
+ published_sql = """
+ SELECT room_id from appservice_room_list
+ WHERE appservice_id = ? AND network_id = ?
+ """
+ query_args.append(network_tuple.appservice_id)
+ query_args.append(network_tuple.network_id)
+ else:
+ published_sql = """
+ SELECT room_id FROM rooms WHERE is_public
+ """
+ else:
+ published_sql = """
+ SELECT room_id FROM rooms WHERE is_public
+ UNION SELECT room_id from appservice_room_list
+ """
+
+ sql = """
+ SELECT
+ COALESCE(COUNT(*), 0)
+ FROM (
+ %(published_sql)s
+ ) published
+ INNER JOIN room_stats_state USING (room_id)
+ INNER JOIN room_stats_current USING (room_id)
+ WHERE
+ (
+ join_rules = 'public' OR history_visibility = 'world_readable'
+ )
+ AND joined_members > 0
+ """ % {
+ "published_sql": published_sql
+ }
+
+ txn.execute(sql, query_args)
+ return txn.fetchone()[0]
+
+ return self.db_pool.runInteraction(
+ "count_public_rooms", _count_public_rooms_txn
+ )
+
+ async def get_largest_public_rooms(
+ self,
+ network_tuple: Optional[ThirdPartyInstanceID],
+ search_filter: Optional[dict],
+ limit: Optional[int],
+ bounds: Optional[Tuple[int, str]],
+ forwards: bool,
+ ignore_non_federatable: bool = False,
+ ):
+ """Gets the largest public rooms (where largest is in terms of joined
+ members, as tracked in the statistics table).
+
+ Args:
+ network_tuple
+ search_filter
+ limit: Maxmimum number of rows to return, unlimited otherwise.
+ bounds: An uppoer or lower bound to apply to result set if given,
+ consists of a joined member count and room_id (these are
+ excluded from result set).
+ forwards: true iff going forwards, going backwards otherwise
+ ignore_non_federatable: If true filters out non-federatable rooms.
+
+ Returns:
+ Rooms in order: biggest number of joined users first.
+ We then arbitrarily use the room_id as a tie breaker.
+
+ """
+
+ where_clauses = []
+ query_args = []
+
+ if network_tuple:
+ if network_tuple.appservice_id:
+ published_sql = """
+ SELECT room_id from appservice_room_list
+ WHERE appservice_id = ? AND network_id = ?
+ """
+ query_args.append(network_tuple.appservice_id)
+ query_args.append(network_tuple.network_id)
+ else:
+ published_sql = """
+ SELECT room_id FROM rooms WHERE is_public
+ """
+ else:
+ published_sql = """
+ SELECT room_id FROM rooms WHERE is_public
+ UNION SELECT room_id from appservice_room_list
+ """
+
+ # Work out the bounds if we're given them, these bounds look slightly
+ # odd, but are designed to help query planner use indices by pulling
+ # out a common bound.
+ if bounds:
+ last_joined_members, last_room_id = bounds
+ if forwards:
+ where_clauses.append(
+ """
+ joined_members <= ? AND (
+ joined_members < ? OR room_id < ?
+ )
+ """
+ )
+ else:
+ where_clauses.append(
+ """
+ joined_members >= ? AND (
+ joined_members > ? OR room_id > ?
+ )
+ """
+ )
+
+ query_args += [last_joined_members, last_joined_members, last_room_id]
+
+ if ignore_non_federatable:
+ where_clauses.append("is_federatable")
+
+ if search_filter and search_filter.get("generic_search_term", None):
+ search_term = "%" + search_filter["generic_search_term"] + "%"
+
+ where_clauses.append(
+ """
+ (
+ LOWER(name) LIKE ?
+ OR LOWER(topic) LIKE ?
+ OR LOWER(canonical_alias) LIKE ?
+ )
+ """
+ )
+ query_args += [
+ search_term.lower(),
+ search_term.lower(),
+ search_term.lower(),
+ ]
+
+ where_clause = ""
+ if where_clauses:
+ where_clause = " AND " + " AND ".join(where_clauses)
+
+ sql = """
+ SELECT
+ room_id, name, topic, canonical_alias, joined_members,
+ avatar, history_visibility, joined_members, guest_access
+ FROM (
+ %(published_sql)s
+ ) published
+ INNER JOIN room_stats_state USING (room_id)
+ INNER JOIN room_stats_current USING (room_id)
+ WHERE
+ (
+ join_rules = 'public' OR history_visibility = 'world_readable'
+ )
+ AND joined_members > 0
+ %(where_clause)s
+ ORDER BY joined_members %(dir)s, room_id %(dir)s
+ """ % {
+ "published_sql": published_sql,
+ "where_clause": where_clause,
+ "dir": "DESC" if forwards else "ASC",
+ }
+
+ if limit is not None:
+ query_args.append(limit)
+
+ sql += """
+ LIMIT ?
+ """
+
+ def _get_largest_public_rooms_txn(txn):
+ txn.execute(sql, query_args)
+
+ results = self.db_pool.cursor_to_dict(txn)
+
+ if not forwards:
+ results.reverse()
+
+ return results
+
+ ret_val = await self.db_pool.runInteraction(
+ "get_largest_public_rooms", _get_largest_public_rooms_txn
+ )
+ return ret_val
+
+ @cached(max_entries=10000)
+ def is_room_blocked(self, room_id):
+ return self.db_pool.simple_select_one_onecol(
+ table="blocked_rooms",
+ keyvalues={"room_id": room_id},
+ retcol="1",
+ allow_none=True,
+ desc="is_room_blocked",
+ )
+
+ async def get_rooms_paginate(
+ self,
+ start: int,
+ limit: int,
+ order_by: RoomSortOrder,
+ reverse_order: bool,
+ search_term: Optional[str],
+ ) -> Tuple[List[Dict[str, Any]], int]:
+ """Function to retrieve a paginated list of rooms as json.
+
+ Args:
+ start: offset in the list
+ limit: maximum amount of rooms to retrieve
+ order_by: the sort order of the returned list
+ reverse_order: whether to reverse the room list
+ search_term: a string to filter room names by
+ Returns:
+ A list of room dicts and an integer representing the total number of
+ rooms that exist given this query
+ """
+ # Filter room names by a string
+ where_statement = ""
+ if search_term:
+ where_statement = "WHERE state.name LIKE ?"
+
+ # Our postgres db driver converts ? -> %s in SQL strings as that's the
+ # placeholder for postgres.
+ # HOWEVER, if you put a % into your SQL then everything goes wibbly.
+ # To get around this, we're going to surround search_term with %'s
+ # before giving it to the database in python instead
+ search_term = "%" + search_term + "%"
+
+ # Set ordering
+ if RoomSortOrder(order_by) == RoomSortOrder.SIZE:
+ # Deprecated in favour of RoomSortOrder.JOINED_MEMBERS
+ order_by_column = "curr.joined_members"
+ order_by_asc = False
+ elif RoomSortOrder(order_by) == RoomSortOrder.ALPHABETICAL:
+ # Deprecated in favour of RoomSortOrder.NAME
+ order_by_column = "state.name"
+ order_by_asc = True
+ elif RoomSortOrder(order_by) == RoomSortOrder.NAME:
+ order_by_column = "state.name"
+ order_by_asc = True
+ elif RoomSortOrder(order_by) == RoomSortOrder.CANONICAL_ALIAS:
+ order_by_column = "state.canonical_alias"
+ order_by_asc = True
+ elif RoomSortOrder(order_by) == RoomSortOrder.JOINED_MEMBERS:
+ order_by_column = "curr.joined_members"
+ order_by_asc = False
+ elif RoomSortOrder(order_by) == RoomSortOrder.JOINED_LOCAL_MEMBERS:
+ order_by_column = "curr.local_users_in_room"
+ order_by_asc = False
+ elif RoomSortOrder(order_by) == RoomSortOrder.VERSION:
+ order_by_column = "rooms.room_version"
+ order_by_asc = False
+ elif RoomSortOrder(order_by) == RoomSortOrder.CREATOR:
+ order_by_column = "rooms.creator"
+ order_by_asc = True
+ elif RoomSortOrder(order_by) == RoomSortOrder.ENCRYPTION:
+ order_by_column = "state.encryption"
+ order_by_asc = True
+ elif RoomSortOrder(order_by) == RoomSortOrder.FEDERATABLE:
+ order_by_column = "state.is_federatable"
+ order_by_asc = True
+ elif RoomSortOrder(order_by) == RoomSortOrder.PUBLIC:
+ order_by_column = "rooms.is_public"
+ order_by_asc = True
+ elif RoomSortOrder(order_by) == RoomSortOrder.JOIN_RULES:
+ order_by_column = "state.join_rules"
+ order_by_asc = True
+ elif RoomSortOrder(order_by) == RoomSortOrder.GUEST_ACCESS:
+ order_by_column = "state.guest_access"
+ order_by_asc = True
+ elif RoomSortOrder(order_by) == RoomSortOrder.HISTORY_VISIBILITY:
+ order_by_column = "state.history_visibility"
+ order_by_asc = True
+ elif RoomSortOrder(order_by) == RoomSortOrder.STATE_EVENTS:
+ order_by_column = "curr.current_state_events"
+ order_by_asc = False
+ else:
+ raise StoreError(
+ 500, "Incorrect value for order_by provided: %s" % order_by
+ )
+
+ # Whether to return the list in reverse order
+ if reverse_order:
+ # Flip the boolean
+ order_by_asc = not order_by_asc
+
+ # Create one query for getting the limited number of events that the user asked
+ # for, and another query for getting the total number of events that could be
+ # returned. Thus allowing us to see if there are more events to paginate through
+ info_sql = """
+ SELECT state.room_id, state.name, state.canonical_alias, curr.joined_members,
+ curr.local_users_in_room, rooms.room_version, rooms.creator,
+ state.encryption, state.is_federatable, rooms.is_public, state.join_rules,
+ state.guest_access, state.history_visibility, curr.current_state_events
+ FROM room_stats_state state
+ INNER JOIN room_stats_current curr USING (room_id)
+ INNER JOIN rooms USING (room_id)
+ %s
+ ORDER BY %s %s
+ LIMIT ?
+ OFFSET ?
+ """ % (
+ where_statement,
+ order_by_column,
+ "ASC" if order_by_asc else "DESC",
+ )
+
+ # Use a nested SELECT statement as SQL can't count(*) with an OFFSET
+ count_sql = """
+ SELECT count(*) FROM (
+ SELECT room_id FROM room_stats_state state
+ %s
+ ) AS get_room_ids
+ """ % (
+ where_statement,
+ )
+
+ def _get_rooms_paginate_txn(txn):
+ # Execute the data query
+ sql_values = (limit, start)
+ if search_term:
+ # Add the search term into the WHERE clause
+ sql_values = (search_term,) + sql_values
+ txn.execute(info_sql, sql_values)
+
+ # Refactor room query data into a structured dictionary
+ rooms = []
+ for room in txn:
+ rooms.append(
+ {
+ "room_id": room[0],
+ "name": room[1],
+ "canonical_alias": room[2],
+ "joined_members": room[3],
+ "joined_local_members": room[4],
+ "version": room[5],
+ "creator": room[6],
+ "encryption": room[7],
+ "federatable": room[8],
+ "public": room[9],
+ "join_rules": room[10],
+ "guest_access": room[11],
+ "history_visibility": room[12],
+ "state_events": room[13],
+ }
+ )
+
+ # Execute the count query
+
+ # Add the search term into the WHERE clause if present
+ sql_values = (search_term,) if search_term else ()
+ txn.execute(count_sql, sql_values)
+
+ room_count = txn.fetchone()
+ return rooms, room_count[0]
+
+ return await self.db_pool.runInteraction(
+ "get_rooms_paginate", _get_rooms_paginate_txn,
+ )
+
+ @cached(max_entries=10000)
+ async def get_ratelimit_for_user(self, user_id):
+ """Check if there are any overrides for ratelimiting for the given
+ user
+
+ Args:
+ user_id (str)
+
+ Returns:
+ RatelimitOverride if there is an override, else None. If the contents
+ of RatelimitOverride are None or 0 then ratelimitng has been
+ disabled for that user entirely.
+ """
+ row = await self.db_pool.simple_select_one(
+ table="ratelimit_override",
+ keyvalues={"user_id": user_id},
+ retcols=("messages_per_second", "burst_count"),
+ allow_none=True,
+ desc="get_ratelimit_for_user",
+ )
+
+ if row:
+ return RatelimitOverride(
+ messages_per_second=row["messages_per_second"],
+ burst_count=row["burst_count"],
+ )
+ else:
+ return None
+
+ @cached()
+ async def get_retention_policy_for_room(self, room_id):
+ """Get the retention policy for a given room.
+
+ If no retention policy has been found for this room, returns a policy defined
+ by the configured default policy (which has None as both the 'min_lifetime' and
+ the 'max_lifetime' if no default policy has been defined in the server's
+ configuration).
+
+ Args:
+ room_id (str): The ID of the room to get the retention policy of.
+
+ Returns:
+ dict[int, int]: "min_lifetime" and "max_lifetime" for this room.
+ """
+
+ def get_retention_policy_for_room_txn(txn):
+ txn.execute(
+ """
+ SELECT min_lifetime, max_lifetime FROM room_retention
+ INNER JOIN current_state_events USING (event_id, room_id)
+ WHERE room_id = ?;
+ """,
+ (room_id,),
+ )
+
+ return self.db_pool.cursor_to_dict(txn)
+
+ ret = await self.db_pool.runInteraction(
+ "get_retention_policy_for_room", get_retention_policy_for_room_txn,
+ )
+
+ # If we don't know this room ID, ret will be None, in this case return the default
+ # policy.
+ if not ret:
+ return {
+ "min_lifetime": self.config.retention_default_min_lifetime,
+ "max_lifetime": self.config.retention_default_max_lifetime,
+ }
+
+ row = ret[0]
+
+ # If one of the room's policy's attributes isn't defined, use the matching
+ # attribute from the default policy.
+ # The default values will be None if no default policy has been defined, or if one
+ # of the attributes is missing from the default policy.
+ if row["min_lifetime"] is None:
+ row["min_lifetime"] = self.config.retention_default_min_lifetime
+
+ if row["max_lifetime"] is None:
+ row["max_lifetime"] = self.config.retention_default_max_lifetime
+
+ return row
+
+ def get_media_mxcs_in_room(self, room_id):
+ """Retrieves all the local and remote media MXC URIs in a given room
+
+ Args:
+ room_id (str)
+
+ Returns:
+ The local and remote media as a lists of tuples where the key is
+ the hostname and the value is the media ID.
+ """
+
+ def _get_media_mxcs_in_room_txn(txn):
+ local_mxcs, remote_mxcs = self._get_media_mxcs_in_room_txn(txn, room_id)
+ local_media_mxcs = []
+ remote_media_mxcs = []
+
+ # Convert the IDs to MXC URIs
+ for media_id in local_mxcs:
+ local_media_mxcs.append("mxc://%s/%s" % (self.hs.hostname, media_id))
+ for hostname, media_id in remote_mxcs:
+ remote_media_mxcs.append("mxc://%s/%s" % (hostname, media_id))
+
+ return local_media_mxcs, remote_media_mxcs
+
+ return self.db_pool.runInteraction(
+ "get_media_ids_in_room", _get_media_mxcs_in_room_txn
+ )
+
+ def quarantine_media_ids_in_room(self, room_id, quarantined_by):
+ """For a room loops through all events with media and quarantines
+ the associated media
+ """
+
+ logger.info("Quarantining media in room: %s", room_id)
+
+ def _quarantine_media_in_room_txn(txn):
+ local_mxcs, remote_mxcs = self._get_media_mxcs_in_room_txn(txn, room_id)
+ return self._quarantine_media_txn(
+ txn, local_mxcs, remote_mxcs, quarantined_by
+ )
+
+ return self.db_pool.runInteraction(
+ "quarantine_media_in_room", _quarantine_media_in_room_txn
+ )
+
+ def _get_media_mxcs_in_room_txn(self, txn, room_id):
+ """Retrieves all the local and remote media MXC URIs in a given room
+
+ Args:
+ txn (cursor)
+ room_id (str)
+
+ Returns:
+ The local and remote media as a lists of tuples where the key is
+ the hostname and the value is the media ID.
+ """
+ mxc_re = re.compile("^mxc://([^/]+)/([^/#?]+)")
+
+ sql = """
+ SELECT stream_ordering, json FROM events
+ JOIN event_json USING (room_id, event_id)
+ WHERE room_id = ?
+ %(where_clause)s
+ AND contains_url = ? AND outlier = ?
+ ORDER BY stream_ordering DESC
+ LIMIT ?
+ """
+ txn.execute(sql % {"where_clause": ""}, (room_id, True, False, 100))
+
+ local_media_mxcs = []
+ remote_media_mxcs = []
+
+ while True:
+ next_token = None
+ for stream_ordering, content_json in txn:
+ next_token = stream_ordering
+ event_json = db_to_json(content_json)
+ content = event_json["content"]
+ content_url = content.get("url")
+ thumbnail_url = content.get("info", {}).get("thumbnail_url")
+
+ for url in (content_url, thumbnail_url):
+ if not url:
+ continue
+ matches = mxc_re.match(url)
+ if matches:
+ hostname = matches.group(1)
+ media_id = matches.group(2)
+ if hostname == self.hs.hostname:
+ local_media_mxcs.append(media_id)
+ else:
+ remote_media_mxcs.append((hostname, media_id))
+
+ if next_token is None:
+ # We've gone through the whole room, so we're finished.
+ break
+
+ txn.execute(
+ sql % {"where_clause": "AND stream_ordering < ?"},
+ (room_id, next_token, True, False, 100),
+ )
+
+ return local_media_mxcs, remote_media_mxcs
+
+ def quarantine_media_by_id(
+ self, server_name: str, media_id: str, quarantined_by: str,
+ ):
+ """quarantines a single local or remote media id
+
+ Args:
+ server_name: The name of the server that holds this media
+ media_id: The ID of the media to be quarantined
+ quarantined_by: The user ID that initiated the quarantine request
+ """
+ logger.info("Quarantining media: %s/%s", server_name, media_id)
+ is_local = server_name == self.config.server_name
+
+ def _quarantine_media_by_id_txn(txn):
+ local_mxcs = [media_id] if is_local else []
+ remote_mxcs = [(server_name, media_id)] if not is_local else []
+
+ return self._quarantine_media_txn(
+ txn, local_mxcs, remote_mxcs, quarantined_by
+ )
+
+ return self.db_pool.runInteraction(
+ "quarantine_media_by_user", _quarantine_media_by_id_txn
+ )
+
+ def quarantine_media_ids_by_user(self, user_id: str, quarantined_by: str):
+ """quarantines all local media associated with a single user
+
+ Args:
+ user_id: The ID of the user to quarantine media of
+ quarantined_by: The ID of the user who made the quarantine request
+ """
+
+ def _quarantine_media_by_user_txn(txn):
+ local_media_ids = self._get_media_ids_by_user_txn(txn, user_id)
+ return self._quarantine_media_txn(txn, local_media_ids, [], quarantined_by)
+
+ return self.db_pool.runInteraction(
+ "quarantine_media_by_user", _quarantine_media_by_user_txn
+ )
+
+ def _get_media_ids_by_user_txn(self, txn, user_id: str, filter_quarantined=True):
+ """Retrieves local media IDs by a given user
+
+ Args:
+ txn (cursor)
+ user_id: The ID of the user to retrieve media IDs of
+
+ Returns:
+ The local and remote media as a lists of tuples where the key is
+ the hostname and the value is the media ID.
+ """
+ # Local media
+ sql = """
+ SELECT media_id
+ FROM local_media_repository
+ WHERE user_id = ?
+ """
+ if filter_quarantined:
+ sql += "AND quarantined_by IS NULL"
+ txn.execute(sql, (user_id,))
+
+ local_media_ids = [row[0] for row in txn]
+
+ # TODO: Figure out all remote media a user has referenced in a message
+
+ return local_media_ids
+
+ def _quarantine_media_txn(
+ self,
+ txn,
+ local_mxcs: List[str],
+ remote_mxcs: List[Tuple[str, str]],
+ quarantined_by: str,
+ ) -> int:
+ """Quarantine local and remote media items
+
+ Args:
+ txn (cursor)
+ local_mxcs: A list of local mxc URLs
+ remote_mxcs: A list of (remote server, media id) tuples representing
+ remote mxc URLs
+ quarantined_by: The ID of the user who initiated the quarantine request
+ Returns:
+ The total number of media items quarantined
+ """
+ # Update all the tables to set the quarantined_by flag
+ txn.executemany(
+ """
+ UPDATE local_media_repository
+ SET quarantined_by = ?
+ WHERE media_id = ? AND safe_from_quarantine = ?
+ """,
+ ((quarantined_by, media_id, False) for media_id in local_mxcs),
+ )
+ # Note that a rowcount of -1 can be used to indicate no rows were affected.
+ total_media_quarantined = txn.rowcount if txn.rowcount > 0 else 0
+
+ txn.executemany(
+ """
+ UPDATE remote_media_cache
+ SET quarantined_by = ?
+ WHERE media_origin = ? AND media_id = ?
+ """,
+ ((quarantined_by, origin, media_id) for origin, media_id in remote_mxcs),
+ )
+ total_media_quarantined += txn.rowcount if txn.rowcount > 0 else 0
+
+ return total_media_quarantined
+
+ async def get_all_new_public_rooms(
+ self, instance_name: str, last_id: int, current_id: int, limit: int
+ ) -> Tuple[List[Tuple[int, tuple]], int, bool]:
+ """Get updates for public rooms replication stream.
+
+ Args:
+ instance_name: The writer we want to fetch updates from. Unused
+ here since there is only ever one writer.
+ last_id: The token to fetch updates from. Exclusive.
+ current_id: The token to fetch updates up to. Inclusive.
+ limit: The requested limit for the number of rows to return. The
+ function may return more or fewer rows.
+
+ Returns:
+ A tuple consisting of: the updates, a token to use to fetch
+ subsequent updates, and whether we returned fewer rows than exists
+ between the requested tokens due to the limit.
+
+ The token returned can be used in a subsequent call to this
+ function to get further updatees.
+
+ The updates are a list of 2-tuples of stream ID and the row data
+ """
+ if last_id == current_id:
+ return [], current_id, False
+
+ def get_all_new_public_rooms(txn):
+ sql = """
+ SELECT stream_id, room_id, visibility, appservice_id, network_id
+ FROM public_room_list_stream
+ WHERE stream_id > ? AND stream_id <= ?
+ ORDER BY stream_id ASC
+ LIMIT ?
+ """
+
+ txn.execute(sql, (last_id, current_id, limit))
+ updates = [(row[0], row[1:]) for row in txn]
+ limited = False
+ upto_token = current_id
+ if len(updates) >= limit:
+ upto_token = updates[-1][0]
+ limited = True
+
+ return updates, upto_token, limited
+
+ return await self.db_pool.runInteraction(
+ "get_all_new_public_rooms", get_all_new_public_rooms
+ )
+
+
+class RoomBackgroundUpdateStore(SQLBaseStore):
+ REMOVE_TOMESTONED_ROOMS_BG_UPDATE = "remove_tombstoned_rooms_from_directory"
+ ADD_ROOMS_ROOM_VERSION_COLUMN = "add_rooms_room_version_column"
+
+ def __init__(self, database: DatabasePool, db_conn, hs):
+ super(RoomBackgroundUpdateStore, self).__init__(database, db_conn, hs)
+
+ self.config = hs.config
+
+ self.db_pool.updates.register_background_update_handler(
+ "insert_room_retention", self._background_insert_retention,
+ )
+
+ self.db_pool.updates.register_background_update_handler(
+ self.REMOVE_TOMESTONED_ROOMS_BG_UPDATE,
+ self._remove_tombstoned_rooms_from_directory,
+ )
+
+ self.db_pool.updates.register_background_update_handler(
+ self.ADD_ROOMS_ROOM_VERSION_COLUMN,
+ self._background_add_rooms_room_version_column,
+ )
+
+ async def _background_insert_retention(self, progress, batch_size):
+ """Retrieves a list of all rooms within a range and inserts an entry for each of
+ them into the room_retention table.
+ NULLs the property's columns if missing from the retention event in the room's
+ state (or NULLs all of them if there's no retention event in the room's state),
+ so that we fall back to the server's retention policy.
+ """
+
+ last_room = progress.get("room_id", "")
+
+ def _background_insert_retention_txn(txn):
+ txn.execute(
+ """
+ SELECT state.room_id, state.event_id, events.json
+ FROM current_state_events as state
+ LEFT JOIN event_json AS events ON (state.event_id = events.event_id)
+ WHERE state.room_id > ? AND state.type = '%s'
+ ORDER BY state.room_id ASC
+ LIMIT ?;
+ """
+ % EventTypes.Retention,
+ (last_room, batch_size),
+ )
+
+ rows = self.db_pool.cursor_to_dict(txn)
+
+ if not rows:
+ return True
+
+ for row in rows:
+ if not row["json"]:
+ retention_policy = {}
+ else:
+ ev = db_to_json(row["json"])
+ retention_policy = ev["content"]
+
+ self.db_pool.simple_insert_txn(
+ txn=txn,
+ table="room_retention",
+ values={
+ "room_id": row["room_id"],
+ "event_id": row["event_id"],
+ "min_lifetime": retention_policy.get("min_lifetime"),
+ "max_lifetime": retention_policy.get("max_lifetime"),
+ },
+ )
+
+ logger.info("Inserted %d rows into room_retention", len(rows))
+
+ self.db_pool.updates._background_update_progress_txn(
+ txn, "insert_room_retention", {"room_id": rows[-1]["room_id"]}
+ )
+
+ if batch_size > len(rows):
+ return True
+ else:
+ return False
+
+ end = await self.db_pool.runInteraction(
+ "insert_room_retention", _background_insert_retention_txn,
+ )
+
+ if end:
+ await self.db_pool.updates._end_background_update("insert_room_retention")
+
+ return batch_size
+
+ async def _background_add_rooms_room_version_column(
+ self, progress: dict, batch_size: int
+ ):
+ """Background update to go and add room version inforamtion to `rooms`
+ table from `current_state_events` table.
+ """
+
+ last_room_id = progress.get("room_id", "")
+
+ def _background_add_rooms_room_version_column_txn(txn: LoggingTransaction):
+ sql = """
+ SELECT room_id, json FROM current_state_events
+ INNER JOIN event_json USING (room_id, event_id)
+ WHERE room_id > ? AND type = 'm.room.create' AND state_key = ''
+ ORDER BY room_id
+ LIMIT ?
+ """
+
+ txn.execute(sql, (last_room_id, batch_size))
+
+ updates = []
+ for room_id, event_json in txn:
+ event_dict = db_to_json(event_json)
+ room_version_id = event_dict.get("content", {}).get(
+ "room_version", RoomVersions.V1.identifier
+ )
+
+ creator = event_dict.get("content").get("creator")
+
+ updates.append((room_id, creator, room_version_id))
+
+ if not updates:
+ return True
+
+ new_last_room_id = ""
+ for room_id, creator, room_version_id in updates:
+ # We upsert here just in case we don't already have a row,
+ # mainly for paranoia as much badness would happen if we don't
+ # insert the row and then try and get the room version for the
+ # room.
+ self.db_pool.simple_upsert_txn(
+ txn,
+ table="rooms",
+ keyvalues={"room_id": room_id},
+ values={"room_version": room_version_id},
+ insertion_values={"is_public": False, "creator": creator},
+ )
+ new_last_room_id = room_id
+
+ self.db_pool.updates._background_update_progress_txn(
+ txn, self.ADD_ROOMS_ROOM_VERSION_COLUMN, {"room_id": new_last_room_id}
+ )
+
+ return False
+
+ end = await self.db_pool.runInteraction(
+ "_background_add_rooms_room_version_column",
+ _background_add_rooms_room_version_column_txn,
+ )
+
+ if end:
+ await self.db_pool.updates._end_background_update(
+ self.ADD_ROOMS_ROOM_VERSION_COLUMN
+ )
+
+ return batch_size
+
+ async def _remove_tombstoned_rooms_from_directory(
+ self, progress, batch_size
+ ) -> int:
+ """Removes any rooms with tombstone events from the room directory
+
+ Nowadays this is handled by the room upgrade handler, but we may have some
+ that got left behind
+ """
+
+ last_room = progress.get("room_id", "")
+
+ def _get_rooms(txn):
+ txn.execute(
+ """
+ SELECT room_id
+ FROM rooms r
+ INNER JOIN current_state_events cse USING (room_id)
+ WHERE room_id > ? AND r.is_public
+ AND cse.type = '%s' AND cse.state_key = ''
+ ORDER BY room_id ASC
+ LIMIT ?;
+ """
+ % EventTypes.Tombstone,
+ (last_room, batch_size),
+ )
+
+ return [row[0] for row in txn]
+
+ rooms = await self.db_pool.runInteraction(
+ "get_tombstoned_directory_rooms", _get_rooms
+ )
+
+ if not rooms:
+ await self.db_pool.updates._end_background_update(
+ self.REMOVE_TOMESTONED_ROOMS_BG_UPDATE
+ )
+ return 0
+
+ for room_id in rooms:
+ logger.info("Removing tombstoned room %s from the directory", room_id)
+ await self.set_room_is_public(room_id, False)
+
+ await self.db_pool.updates._background_update_progress(
+ self.REMOVE_TOMESTONED_ROOMS_BG_UPDATE, {"room_id": rooms[-1]}
+ )
+
+ return len(rooms)
+
+ @abstractmethod
+ def set_room_is_public(self, room_id, is_public):
+ # this will need to be implemented if a background update is performed with
+ # existing (tombstoned, public) rooms in the database.
+ #
+ # It's overridden by RoomStore for the synapse master.
+ raise NotImplementedError()
+
+
+class RoomStore(RoomBackgroundUpdateStore, RoomWorkerStore, SearchStore):
+ def __init__(self, database: DatabasePool, db_conn, hs):
+ super(RoomStore, self).__init__(database, db_conn, hs)
+
+ self.config = hs.config
+
+ async def upsert_room_on_join(self, room_id: str, room_version: RoomVersion):
+ """Ensure that the room is stored in the table
+
+ Called when we join a room over federation, and overwrites any room version
+ currently in the table.
+ """
+ await self.db_pool.simple_upsert(
+ desc="upsert_room_on_join",
+ table="rooms",
+ keyvalues={"room_id": room_id},
+ values={"room_version": room_version.identifier},
+ insertion_values={"is_public": False, "creator": ""},
+ # rooms has a unique constraint on room_id, so no need to lock when doing an
+ # emulated upsert.
+ lock=False,
+ )
+
+ async def store_room(
+ self,
+ room_id: str,
+ room_creator_user_id: str,
+ is_public: bool,
+ room_version: RoomVersion,
+ ):
+ """Stores a room.
+
+ Args:
+ room_id: The desired room ID, can be None.
+ room_creator_user_id: The user ID of the room creator.
+ is_public: True to indicate that this room should appear in
+ public room lists.
+ room_version: The version of the room
+ Raises:
+ StoreError if the room could not be stored.
+ """
+ try:
+
+ def store_room_txn(txn, next_id):
+ self.db_pool.simple_insert_txn(
+ txn,
+ "rooms",
+ {
+ "room_id": room_id,
+ "creator": room_creator_user_id,
+ "is_public": is_public,
+ "room_version": room_version.identifier,
+ },
+ )
+ if is_public:
+ self.db_pool.simple_insert_txn(
+ txn,
+ table="public_room_list_stream",
+ values={
+ "stream_id": next_id,
+ "room_id": room_id,
+ "visibility": is_public,
+ },
+ )
+
+ with self._public_room_id_gen.get_next() as next_id:
+ await self.db_pool.runInteraction(
+ "store_room_txn", store_room_txn, next_id
+ )
+ except Exception as e:
+ logger.error("store_room with room_id=%s failed: %s", room_id, e)
+ raise StoreError(500, "Problem creating room.")
+
+ async def maybe_store_room_on_invite(self, room_id: str, room_version: RoomVersion):
+ """
+ When we receive an invite over federation, store the version of the room if we
+ don't already know the room version.
+ """
+ await self.db_pool.simple_upsert(
+ desc="maybe_store_room_on_invite",
+ table="rooms",
+ keyvalues={"room_id": room_id},
+ values={},
+ insertion_values={
+ "room_version": room_version.identifier,
+ "is_public": False,
+ "creator": "",
+ },
+ # rooms has a unique constraint on room_id, so no need to lock when doing an
+ # emulated upsert.
+ lock=False,
+ )
+
+ async def set_room_is_public(self, room_id, is_public):
+ def set_room_is_public_txn(txn, next_id):
+ self.db_pool.simple_update_one_txn(
+ txn,
+ table="rooms",
+ keyvalues={"room_id": room_id},
+ updatevalues={"is_public": is_public},
+ )
+
+ entries = self.db_pool.simple_select_list_txn(
+ txn,
+ table="public_room_list_stream",
+ keyvalues={
+ "room_id": room_id,
+ "appservice_id": None,
+ "network_id": None,
+ },
+ retcols=("stream_id", "visibility"),
+ )
+
+ entries.sort(key=lambda r: r["stream_id"])
+
+ add_to_stream = True
+ if entries:
+ add_to_stream = bool(entries[-1]["visibility"]) != is_public
+
+ if add_to_stream:
+ self.db_pool.simple_insert_txn(
+ txn,
+ table="public_room_list_stream",
+ values={
+ "stream_id": next_id,
+ "room_id": room_id,
+ "visibility": is_public,
+ "appservice_id": None,
+ "network_id": None,
+ },
+ )
+
+ with self._public_room_id_gen.get_next() as next_id:
+ await self.db_pool.runInteraction(
+ "set_room_is_public", set_room_is_public_txn, next_id
+ )
+ self.hs.get_notifier().on_new_replication_data()
+
+ async def set_room_is_public_appservice(
+ self, room_id, appservice_id, network_id, is_public
+ ):
+ """Edit the appservice/network specific public room list.
+
+ Each appservice can have a number of published room lists associated
+ with them, keyed off of an appservice defined `network_id`, which
+ basically represents a single instance of a bridge to a third party
+ network.
+
+ Args:
+ room_id (str)
+ appservice_id (str)
+ network_id (str)
+ is_public (bool): Whether to publish or unpublish the room from the
+ list.
+ """
+
+ def set_room_is_public_appservice_txn(txn, next_id):
+ if is_public:
+ try:
+ self.db_pool.simple_insert_txn(
+ txn,
+ table="appservice_room_list",
+ values={
+ "appservice_id": appservice_id,
+ "network_id": network_id,
+ "room_id": room_id,
+ },
+ )
+ except self.database_engine.module.IntegrityError:
+ # We've already inserted, nothing to do.
+ return
+ else:
+ self.db_pool.simple_delete_txn(
+ txn,
+ table="appservice_room_list",
+ keyvalues={
+ "appservice_id": appservice_id,
+ "network_id": network_id,
+ "room_id": room_id,
+ },
+ )
+
+ entries = self.db_pool.simple_select_list_txn(
+ txn,
+ table="public_room_list_stream",
+ keyvalues={
+ "room_id": room_id,
+ "appservice_id": appservice_id,
+ "network_id": network_id,
+ },
+ retcols=("stream_id", "visibility"),
+ )
+
+ entries.sort(key=lambda r: r["stream_id"])
+
+ add_to_stream = True
+ if entries:
+ add_to_stream = bool(entries[-1]["visibility"]) != is_public
+
+ if add_to_stream:
+ self.db_pool.simple_insert_txn(
+ txn,
+ table="public_room_list_stream",
+ values={
+ "stream_id": next_id,
+ "room_id": room_id,
+ "visibility": is_public,
+ "appservice_id": appservice_id,
+ "network_id": network_id,
+ },
+ )
+
+ with self._public_room_id_gen.get_next() as next_id:
+ await self.db_pool.runInteraction(
+ "set_room_is_public_appservice",
+ set_room_is_public_appservice_txn,
+ next_id,
+ )
+ self.hs.get_notifier().on_new_replication_data()
+
+ def get_room_count(self):
+ """Retrieve a list of all rooms
+ """
+
+ def f(txn):
+ sql = "SELECT count(*) FROM rooms"
+ txn.execute(sql)
+ row = txn.fetchone()
+ return row[0] or 0
+
+ return self.db_pool.runInteraction("get_rooms", f)
+
+ def add_event_report(
+ self, room_id, event_id, user_id, reason, content, received_ts
+ ):
+ next_id = self._event_reports_id_gen.get_next()
+ return self.db_pool.simple_insert(
+ table="event_reports",
+ values={
+ "id": next_id,
+ "received_ts": received_ts,
+ "room_id": room_id,
+ "event_id": event_id,
+ "user_id": user_id,
+ "reason": reason,
+ "content": json.dumps(content),
+ },
+ desc="add_event_report",
+ )
+
+ def get_current_public_room_stream_id(self):
+ return self._public_room_id_gen.get_current_token()
+
+ async def block_room(self, room_id: str, user_id: str) -> None:
+ """Marks the room as blocked. Can be called multiple times.
+
+ Args:
+ room_id: Room to block
+ user_id: Who blocked it
+ """
+ await self.db_pool.simple_upsert(
+ table="blocked_rooms",
+ keyvalues={"room_id": room_id},
+ values={},
+ insertion_values={"user_id": user_id},
+ desc="block_room",
+ )
+ await self.db_pool.runInteraction(
+ "block_room_invalidation",
+ self._invalidate_cache_and_stream,
+ self.is_room_blocked,
+ (room_id,),
+ )
+
+ async def get_rooms_for_retention_period_in_range(
+ self, min_ms: Optional[int], max_ms: Optional[int], include_null: bool = False
+ ) -> Dict[str, dict]:
+ """Retrieves all of the rooms within the given retention range.
+
+ Optionally includes the rooms which don't have a retention policy.
+
+ Args:
+ min_ms: Duration in milliseconds that define the lower limit of
+ the range to handle (exclusive). If None, doesn't set a lower limit.
+ max_ms: Duration in milliseconds that define the upper limit of
+ the range to handle (inclusive). If None, doesn't set an upper limit.
+ include_null: Whether to include rooms which retention policy is NULL
+ in the returned set.
+
+ Returns:
+ The rooms within this range, along with their retention
+ policy. The key is "room_id", and maps to a dict describing the retention
+ policy associated with this room ID. The keys for this nested dict are
+ "min_lifetime" (int|None), and "max_lifetime" (int|None).
+ """
+
+ def get_rooms_for_retention_period_in_range_txn(txn):
+ range_conditions = []
+ args = []
+
+ if min_ms is not None:
+ range_conditions.append("max_lifetime > ?")
+ args.append(min_ms)
+
+ if max_ms is not None:
+ range_conditions.append("max_lifetime <= ?")
+ args.append(max_ms)
+
+ # Do a first query which will retrieve the rooms that have a retention policy
+ # in their current state.
+ sql = """
+ SELECT room_id, min_lifetime, max_lifetime FROM room_retention
+ INNER JOIN current_state_events USING (event_id, room_id)
+ """
+
+ if len(range_conditions):
+ sql += " WHERE (" + " AND ".join(range_conditions) + ")"
+
+ if include_null:
+ sql += " OR max_lifetime IS NULL"
+
+ txn.execute(sql, args)
+
+ rows = self.db_pool.cursor_to_dict(txn)
+ rooms_dict = {}
+
+ for row in rows:
+ rooms_dict[row["room_id"]] = {
+ "min_lifetime": row["min_lifetime"],
+ "max_lifetime": row["max_lifetime"],
+ }
+
+ if include_null:
+ # If required, do a second query that retrieves all of the rooms we know
+ # of so we can handle rooms with no retention policy.
+ sql = "SELECT DISTINCT room_id FROM current_state_events"
+
+ txn.execute(sql)
+
+ rows = self.db_pool.cursor_to_dict(txn)
+
+ # If a room isn't already in the dict (i.e. it doesn't have a retention
+ # policy in its state), add it with a null policy.
+ for row in rows:
+ if row["room_id"] not in rooms_dict:
+ rooms_dict[row["room_id"]] = {
+ "min_lifetime": None,
+ "max_lifetime": None,
+ }
+
+ return rooms_dict
+
+ rooms = await self.db_pool.runInteraction(
+ "get_rooms_for_retention_period_in_range",
+ get_rooms_for_retention_period_in_range_txn,
+ )
+
+ return rooms
diff --git a/synapse/storage/databases/main/roommember.py b/synapse/storage/databases/main/roommember.py
new file mode 100644
index 00000000..b2fcfc9b
--- /dev/null
+++ b/synapse/storage/databases/main/roommember.py
@@ -0,0 +1,1072 @@
+# -*- coding: utf-8 -*-
+# Copyright 2014-2016 OpenMarket Ltd
+# Copyright 2018 New Vector Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import logging
+from typing import TYPE_CHECKING, Awaitable, Iterable, List, Optional, Set
+
+from twisted.internet import defer
+
+from synapse.api.constants import EventTypes, Membership
+from synapse.events import EventBase
+from synapse.events.snapshot import EventContext
+from synapse.metrics import LaterGauge
+from synapse.metrics.background_process_metrics import run_as_background_process
+from synapse.storage._base import (
+ LoggingTransaction,
+ SQLBaseStore,
+ db_to_json,
+ make_in_list_sql_clause,
+)
+from synapse.storage.database import DatabasePool
+from synapse.storage.databases.main.events_worker import EventsWorkerStore
+from synapse.storage.engines import Sqlite3Engine
+from synapse.storage.roommember import (
+ GetRoomsForUserWithStreamOrdering,
+ MemberSummary,
+ ProfileInfo,
+ RoomsForUser,
+)
+from synapse.types import Collection, get_domain_from_id
+from synapse.util.async_helpers import Linearizer
+from synapse.util.caches import intern_string
+from synapse.util.caches.descriptors import _CacheContext, cached, cachedList
+from synapse.util.metrics import Measure
+
+if TYPE_CHECKING:
+ from synapse.state import _StateCacheEntry
+
+logger = logging.getLogger(__name__)
+
+
+_MEMBERSHIP_PROFILE_UPDATE_NAME = "room_membership_profile_update"
+_CURRENT_STATE_MEMBERSHIP_UPDATE_NAME = "current_state_events_membership"
+
+
+class RoomMemberWorkerStore(EventsWorkerStore):
+ def __init__(self, database: DatabasePool, db_conn, hs):
+ super(RoomMemberWorkerStore, self).__init__(database, db_conn, hs)
+
+ # Is the current_state_events.membership up to date? Or is the
+ # background update still running?
+ self._current_state_events_membership_up_to_date = False
+
+ txn = LoggingTransaction(
+ db_conn.cursor(),
+ name="_check_safe_current_state_events_membership_updated",
+ database_engine=self.database_engine,
+ )
+ self._check_safe_current_state_events_membership_updated_txn(txn)
+ txn.close()
+
+ if self.hs.config.metrics_flags.known_servers:
+ self._known_servers_count = 1
+ self.hs.get_clock().looping_call(
+ run_as_background_process,
+ 60 * 1000,
+ "_count_known_servers",
+ self._count_known_servers,
+ )
+ self.hs.get_clock().call_later(
+ 1000,
+ run_as_background_process,
+ "_count_known_servers",
+ self._count_known_servers,
+ )
+ LaterGauge(
+ "synapse_federation_known_servers",
+ "",
+ [],
+ lambda: self._known_servers_count,
+ )
+
+ @defer.inlineCallbacks
+ def _count_known_servers(self):
+ """
+ Count the servers that this server knows about.
+
+ The statistic is stored on the class for the
+ `synapse_federation_known_servers` LaterGauge to collect.
+ """
+
+ def _transact(txn):
+ if isinstance(self.database_engine, Sqlite3Engine):
+ query = """
+ SELECT COUNT(DISTINCT substr(out.user_id, pos+1))
+ FROM (
+ SELECT rm.user_id as user_id, instr(rm.user_id, ':')
+ AS pos FROM room_memberships as rm
+ INNER JOIN current_state_events as c ON rm.event_id = c.event_id
+ WHERE c.type = 'm.room.member'
+ ) as out
+ """
+ else:
+ query = """
+ SELECT COUNT(DISTINCT split_part(state_key, ':', 2))
+ FROM current_state_events
+ WHERE type = 'm.room.member' AND membership = 'join';
+ """
+ txn.execute(query)
+ return list(txn)[0][0]
+
+ count = yield self.db_pool.runInteraction("get_known_servers", _transact)
+
+ # We always know about ourselves, even if we have nothing in
+ # room_memberships (for example, the server is new).
+ self._known_servers_count = max([count, 1])
+ return self._known_servers_count
+
+ def _check_safe_current_state_events_membership_updated_txn(self, txn):
+ """Checks if it is safe to assume the new current_state_events
+ membership column is up to date
+ """
+
+ pending_update = self.db_pool.simple_select_one_txn(
+ txn,
+ table="background_updates",
+ keyvalues={"update_name": _CURRENT_STATE_MEMBERSHIP_UPDATE_NAME},
+ retcols=["update_name"],
+ allow_none=True,
+ )
+
+ self._current_state_events_membership_up_to_date = not pending_update
+
+ # If the update is still running, reschedule to run.
+ if pending_update:
+ self._clock.call_later(
+ 15.0,
+ run_as_background_process,
+ "_check_safe_current_state_events_membership_updated",
+ self.db_pool.runInteraction,
+ "_check_safe_current_state_events_membership_updated",
+ self._check_safe_current_state_events_membership_updated_txn,
+ )
+
+ @cached(max_entries=100000, iterable=True)
+ def get_users_in_room(self, room_id: str):
+ return self.db_pool.runInteraction(
+ "get_users_in_room", self.get_users_in_room_txn, room_id
+ )
+
+ def get_users_in_room_txn(self, txn, room_id: str) -> List[str]:
+ # If we can assume current_state_events.membership is up to date
+ # then we can avoid a join, which is a Very Good Thing given how
+ # frequently this function gets called.
+ if self._current_state_events_membership_up_to_date:
+ sql = """
+ SELECT state_key FROM current_state_events
+ WHERE type = 'm.room.member' AND room_id = ? AND membership = ?
+ """
+ else:
+ sql = """
+ SELECT state_key FROM room_memberships as m
+ INNER JOIN current_state_events as c
+ ON m.event_id = c.event_id
+ AND m.room_id = c.room_id
+ AND m.user_id = c.state_key
+ WHERE c.type = 'm.room.member' AND c.room_id = ? AND m.membership = ?
+ """
+
+ txn.execute(sql, (room_id, Membership.JOIN))
+ return [r[0] for r in txn]
+
+ @cached(max_entries=100000)
+ def get_room_summary(self, room_id: str):
+ """ Get the details of a room roughly suitable for use by the room
+ summary extension to /sync. Useful when lazy loading room members.
+ Args:
+ room_id: The room ID to query
+ Returns:
+ Deferred[dict[str, MemberSummary]:
+ dict of membership states, pointing to a MemberSummary named tuple.
+ """
+
+ def _get_room_summary_txn(txn):
+ # first get counts.
+ # We do this all in one transaction to keep the cache small.
+ # FIXME: get rid of this when we have room_stats
+
+ # If we can assume current_state_events.membership is up to date
+ # then we can avoid a join, which is a Very Good Thing given how
+ # frequently this function gets called.
+ if self._current_state_events_membership_up_to_date:
+ # Note, rejected events will have a null membership field, so
+ # we we manually filter them out.
+ sql = """
+ SELECT count(*), membership FROM current_state_events
+ WHERE type = 'm.room.member' AND room_id = ?
+ AND membership IS NOT NULL
+ GROUP BY membership
+ """
+ else:
+ sql = """
+ SELECT count(*), m.membership FROM room_memberships as m
+ INNER JOIN current_state_events as c
+ ON m.event_id = c.event_id
+ AND m.room_id = c.room_id
+ AND m.user_id = c.state_key
+ WHERE c.type = 'm.room.member' AND c.room_id = ?
+ GROUP BY m.membership
+ """
+
+ txn.execute(sql, (room_id,))
+ res = {}
+ for count, membership in txn:
+ summary = res.setdefault(membership, MemberSummary([], count))
+
+ # we order by membership and then fairly arbitrarily by event_id so
+ # heroes are consistent
+ if self._current_state_events_membership_up_to_date:
+ # Note, rejected events will have a null membership field, so
+ # we we manually filter them out.
+ sql = """
+ SELECT state_key, membership, event_id
+ FROM current_state_events
+ WHERE type = 'm.room.member' AND room_id = ?
+ AND membership IS NOT NULL
+ ORDER BY
+ CASE membership WHEN ? THEN 1 WHEN ? THEN 2 ELSE 3 END ASC,
+ event_id ASC
+ LIMIT ?
+ """
+ else:
+ sql = """
+ SELECT c.state_key, m.membership, c.event_id
+ FROM room_memberships as m
+ INNER JOIN current_state_events as c USING (room_id, event_id)
+ WHERE c.type = 'm.room.member' AND c.room_id = ?
+ ORDER BY
+ CASE m.membership WHEN ? THEN 1 WHEN ? THEN 2 ELSE 3 END ASC,
+ c.event_id ASC
+ LIMIT ?
+ """
+
+ # 6 is 5 (number of heroes) plus 1, in case one of them is the calling user.
+ txn.execute(sql, (room_id, Membership.JOIN, Membership.INVITE, 6))
+ for user_id, membership, event_id in txn:
+ summary = res[membership]
+ # we will always have a summary for this membership type at this
+ # point given the summary currently contains the counts.
+ members = summary.members
+ members.append((user_id, event_id))
+
+ return res
+
+ return self.db_pool.runInteraction("get_room_summary", _get_room_summary_txn)
+
+ @cached()
+ def get_invited_rooms_for_local_user(self, user_id: str) -> Awaitable[RoomsForUser]:
+ """Get all the rooms the *local* user is invited to.
+
+ Args:
+ user_id: The user ID.
+
+ Returns:
+ A awaitable list of RoomsForUser.
+ """
+
+ return self.get_rooms_for_local_user_where_membership_is(
+ user_id, [Membership.INVITE]
+ )
+
+ async def get_invite_for_local_user_in_room(
+ self, user_id: str, room_id: str
+ ) -> Optional[RoomsForUser]:
+ """Gets the invite for the given *local* user and room.
+
+ Args:
+ user_id: The user ID to find the invite of.
+ room_id: The room to user was invited to.
+
+ Returns:
+ Either a RoomsForUser or None if no invite was found.
+ """
+ invites = await self.get_invited_rooms_for_local_user(user_id)
+ for invite in invites:
+ if invite.room_id == room_id:
+ return invite
+ return None
+
+ async def get_rooms_for_local_user_where_membership_is(
+ self, user_id: str, membership_list: List[str]
+ ) -> Optional[List[RoomsForUser]]:
+ """Get all the rooms for this *local* user where the membership for this user
+ matches one in the membership list.
+
+ Filters out forgotten rooms.
+
+ Args:
+ user_id: The user ID.
+ membership_list: A list of synapse.api.constants.Membership
+ values which the user must be in.
+
+ Returns:
+ The RoomsForUser that the user matches the membership types.
+ """
+ if not membership_list:
+ return None
+
+ rooms = await self.db_pool.runInteraction(
+ "get_rooms_for_local_user_where_membership_is",
+ self._get_rooms_for_local_user_where_membership_is_txn,
+ user_id,
+ membership_list,
+ )
+
+ # Now we filter out forgotten rooms
+ forgotten_rooms = await self.get_forgotten_rooms_for_user(user_id)
+ return [room for room in rooms if room.room_id not in forgotten_rooms]
+
+ def _get_rooms_for_local_user_where_membership_is_txn(
+ self, txn, user_id: str, membership_list: List[str]
+ ) -> List[RoomsForUser]:
+ # Paranoia check.
+ if not self.hs.is_mine_id(user_id):
+ raise Exception(
+ "Cannot call 'get_rooms_for_local_user_where_membership_is' on non-local user %r"
+ % (user_id,),
+ )
+
+ clause, args = make_in_list_sql_clause(
+ self.database_engine, "c.membership", membership_list
+ )
+
+ sql = """
+ SELECT room_id, e.sender, c.membership, event_id, e.stream_ordering
+ FROM local_current_membership AS c
+ INNER JOIN events AS e USING (room_id, event_id)
+ WHERE
+ user_id = ?
+ AND %s
+ """ % (
+ clause,
+ )
+
+ txn.execute(sql, (user_id, *args))
+ results = [RoomsForUser(**r) for r in self.db_pool.cursor_to_dict(txn)]
+
+ return results
+
+ @cached(max_entries=500000, iterable=True)
+ def get_rooms_for_user_with_stream_ordering(self, user_id: str):
+ """Returns a set of room_ids the user is currently joined to.
+
+ If a remote user only returns rooms this server is currently
+ participating in.
+
+ Args:
+ user_id
+
+ Returns:
+ Deferred[frozenset[GetRoomsForUserWithStreamOrdering]]: Returns
+ the rooms the user is in currently, along with the stream ordering
+ of the most recent join for that user and room.
+ """
+ return self.db_pool.runInteraction(
+ "get_rooms_for_user_with_stream_ordering",
+ self._get_rooms_for_user_with_stream_ordering_txn,
+ user_id,
+ )
+
+ def _get_rooms_for_user_with_stream_ordering_txn(self, txn, user_id: str):
+ # We use `current_state_events` here and not `local_current_membership`
+ # as a) this gets called with remote users and b) this only gets called
+ # for rooms the server is participating in.
+ if self._current_state_events_membership_up_to_date:
+ sql = """
+ SELECT room_id, e.stream_ordering
+ FROM current_state_events AS c
+ INNER JOIN events AS e USING (room_id, event_id)
+ WHERE
+ c.type = 'm.room.member'
+ AND state_key = ?
+ AND c.membership = ?
+ """
+ else:
+ sql = """
+ SELECT room_id, e.stream_ordering
+ FROM current_state_events AS c
+ INNER JOIN room_memberships AS m USING (room_id, event_id)
+ INNER JOIN events AS e USING (room_id, event_id)
+ WHERE
+ c.type = 'm.room.member'
+ AND state_key = ?
+ AND m.membership = ?
+ """
+
+ txn.execute(sql, (user_id, Membership.JOIN))
+ results = frozenset(GetRoomsForUserWithStreamOrdering(*row) for row in txn)
+
+ return results
+
+ async def get_users_server_still_shares_room_with(
+ self, user_ids: Collection[str]
+ ) -> Set[str]:
+ """Given a list of users return the set that the server still share a
+ room with.
+ """
+
+ if not user_ids:
+ return set()
+
+ def _get_users_server_still_shares_room_with_txn(txn):
+ sql = """
+ SELECT state_key FROM current_state_events
+ WHERE
+ type = 'm.room.member'
+ AND membership = 'join'
+ AND %s
+ GROUP BY state_key
+ """
+
+ clause, args = make_in_list_sql_clause(
+ self.database_engine, "state_key", user_ids
+ )
+
+ txn.execute(sql % (clause,), args)
+
+ return {row[0] for row in txn}
+
+ return await self.db_pool.runInteraction(
+ "get_users_server_still_shares_room_with",
+ _get_users_server_still_shares_room_with_txn,
+ )
+
+ async def get_rooms_for_user(self, user_id: str, on_invalidate=None):
+ """Returns a set of room_ids the user is currently joined to.
+
+ If a remote user only returns rooms this server is currently
+ participating in.
+ """
+ rooms = await self.get_rooms_for_user_with_stream_ordering(
+ user_id, on_invalidate=on_invalidate
+ )
+ return frozenset(r.room_id for r in rooms)
+
+ @cached(max_entries=500000, cache_context=True, iterable=True)
+ async def get_users_who_share_room_with_user(
+ self, user_id: str, cache_context: _CacheContext
+ ) -> Set[str]:
+ """Returns the set of users who share a room with `user_id`
+ """
+ room_ids = await self.get_rooms_for_user(
+ user_id, on_invalidate=cache_context.invalidate
+ )
+
+ user_who_share_room = set()
+ for room_id in room_ids:
+ user_ids = await self.get_users_in_room(
+ room_id, on_invalidate=cache_context.invalidate
+ )
+ user_who_share_room.update(user_ids)
+
+ return user_who_share_room
+
+ async def get_joined_users_from_context(
+ self, event: EventBase, context: EventContext
+ ):
+ state_group = context.state_group
+ if not state_group:
+ # If state_group is None it means it has yet to be assigned a
+ # state group, i.e. we need to make sure that calls with a state_group
+ # of None don't hit previous cached calls with a None state_group.
+ # To do this we set the state_group to a new object as object() != object()
+ state_group = object()
+
+ current_state_ids = await context.get_current_state_ids()
+ return await self._get_joined_users_from_context(
+ event.room_id, state_group, current_state_ids, event=event, context=context
+ )
+
+ async def get_joined_users_from_state(self, room_id, state_entry):
+ state_group = state_entry.state_group
+ if not state_group:
+ # If state_group is None it means it has yet to be assigned a
+ # state group, i.e. we need to make sure that calls with a state_group
+ # of None don't hit previous cached calls with a None state_group.
+ # To do this we set the state_group to a new object as object() != object()
+ state_group = object()
+
+ with Measure(self._clock, "get_joined_users_from_state"):
+ return await self._get_joined_users_from_context(
+ room_id, state_group, state_entry.state, context=state_entry
+ )
+
+ @cached(num_args=2, cache_context=True, iterable=True, max_entries=100000)
+ async def _get_joined_users_from_context(
+ self,
+ room_id,
+ state_group,
+ current_state_ids,
+ cache_context,
+ event=None,
+ context=None,
+ ):
+ # We don't use `state_group`, it's there so that we can cache based
+ # on it. However, it's important that it's never None, since two current_states
+ # with a state_group of None are likely to be different.
+ assert state_group is not None
+
+ users_in_room = {}
+ member_event_ids = [
+ e_id
+ for key, e_id in current_state_ids.items()
+ if key[0] == EventTypes.Member
+ ]
+
+ if context is not None:
+ # If we have a context with a delta from a previous state group,
+ # check if we also have the result from the previous group in cache.
+ # If we do then we can reuse that result and simply update it with
+ # any membership changes in `delta_ids`
+ if context.prev_group and context.delta_ids:
+ prev_res = self._get_joined_users_from_context.cache.get(
+ (room_id, context.prev_group), None
+ )
+ if prev_res and isinstance(prev_res, dict):
+ users_in_room = dict(prev_res)
+ member_event_ids = [
+ e_id
+ for key, e_id in context.delta_ids.items()
+ if key[0] == EventTypes.Member
+ ]
+ for etype, state_key in context.delta_ids:
+ if etype == EventTypes.Member:
+ users_in_room.pop(state_key, None)
+
+ # We check if we have any of the member event ids in the event cache
+ # before we ask the DB
+
+ # We don't update the event cache hit ratio as it completely throws off
+ # the hit ratio counts. After all, we don't populate the cache if we
+ # miss it here
+ event_map = self._get_events_from_cache(
+ member_event_ids, allow_rejected=False, update_metrics=False
+ )
+
+ missing_member_event_ids = []
+ for event_id in member_event_ids:
+ ev_entry = event_map.get(event_id)
+ if ev_entry:
+ if ev_entry.event.membership == Membership.JOIN:
+ users_in_room[ev_entry.event.state_key] = ProfileInfo(
+ display_name=ev_entry.event.content.get("displayname", None),
+ avatar_url=ev_entry.event.content.get("avatar_url", None),
+ )
+ else:
+ missing_member_event_ids.append(event_id)
+
+ if missing_member_event_ids:
+ event_to_memberships = await self._get_joined_profiles_from_event_ids(
+ missing_member_event_ids
+ )
+ users_in_room.update((row for row in event_to_memberships.values() if row))
+
+ if event is not None and event.type == EventTypes.Member:
+ if event.membership == Membership.JOIN:
+ if event.event_id in member_event_ids:
+ users_in_room[event.state_key] = ProfileInfo(
+ display_name=event.content.get("displayname", None),
+ avatar_url=event.content.get("avatar_url", None),
+ )
+
+ return users_in_room
+
+ @cached(max_entries=10000)
+ def _get_joined_profile_from_event_id(self, event_id):
+ raise NotImplementedError()
+
+ @cachedList(
+ cached_method_name="_get_joined_profile_from_event_id",
+ list_name="event_ids",
+ inlineCallbacks=True,
+ )
+ def _get_joined_profiles_from_event_ids(self, event_ids: Iterable[str]):
+ """For given set of member event_ids check if they point to a join
+ event and if so return the associated user and profile info.
+
+ Args:
+ event_ids: The member event IDs to lookup
+
+ Returns:
+ Deferred[dict[str, Tuple[str, ProfileInfo]|None]]: Map from event ID
+ to `user_id` and ProfileInfo (or None if not join event).
+ """
+
+ rows = yield self.db_pool.simple_select_many_batch(
+ table="room_memberships",
+ column="event_id",
+ iterable=event_ids,
+ retcols=("user_id", "display_name", "avatar_url", "event_id"),
+ keyvalues={"membership": Membership.JOIN},
+ batch_size=500,
+ desc="_get_membership_from_event_ids",
+ )
+
+ return {
+ row["event_id"]: (
+ row["user_id"],
+ ProfileInfo(
+ avatar_url=row["avatar_url"], display_name=row["display_name"]
+ ),
+ )
+ for row in rows
+ }
+
+ @cached(max_entries=10000)
+ async def is_host_joined(self, room_id: str, host: str) -> bool:
+ if "%" in host or "_" in host:
+ raise Exception("Invalid host name")
+
+ sql = """
+ SELECT state_key FROM current_state_events AS c
+ INNER JOIN room_memberships AS m USING (event_id)
+ WHERE m.membership = 'join'
+ AND type = 'm.room.member'
+ AND c.room_id = ?
+ AND state_key LIKE ?
+ LIMIT 1
+ """
+
+ # We do need to be careful to ensure that host doesn't have any wild cards
+ # in it, but we checked above for known ones and we'll check below that
+ # the returned user actually has the correct domain.
+ like_clause = "%:" + host
+
+ rows = await self.db_pool.execute(
+ "is_host_joined", None, sql, room_id, like_clause
+ )
+
+ if not rows:
+ return False
+
+ user_id = rows[0][0]
+ if get_domain_from_id(user_id) != host:
+ # This can only happen if the host name has something funky in it
+ raise Exception("Invalid host name")
+
+ return True
+
+ async def get_joined_hosts(self, room_id: str, state_entry):
+ state_group = state_entry.state_group
+ if not state_group:
+ # If state_group is None it means it has yet to be assigned a
+ # state group, i.e. we need to make sure that calls with a state_group
+ # of None don't hit previous cached calls with a None state_group.
+ # To do this we set the state_group to a new object as object() != object()
+ state_group = object()
+
+ with Measure(self._clock, "get_joined_hosts"):
+ return await self._get_joined_hosts(
+ room_id, state_group, state_entry.state, state_entry=state_entry
+ )
+
+ @cached(num_args=2, max_entries=10000, iterable=True)
+ async def _get_joined_hosts(
+ self, room_id, state_group, current_state_ids, state_entry
+ ):
+ # We don't use `state_group`, its there so that we can cache based
+ # on it. However, its important that its never None, since two current_state's
+ # with a state_group of None are likely to be different.
+ assert state_group is not None
+
+ cache = await self._get_joined_hosts_cache(room_id)
+ return await cache.get_destinations(state_entry)
+
+ @cached(max_entries=10000)
+ def _get_joined_hosts_cache(self, room_id: str) -> "_JoinedHostsCache":
+ return _JoinedHostsCache(self, room_id)
+
+ @cached(num_args=2)
+ async def did_forget(self, user_id: str, room_id: str) -> bool:
+ """Returns whether user_id has elected to discard history for room_id.
+
+ Returns False if they have since re-joined."""
+
+ def f(txn):
+ sql = (
+ "SELECT"
+ " COUNT(*)"
+ " FROM"
+ " room_memberships"
+ " WHERE"
+ " user_id = ?"
+ " AND"
+ " room_id = ?"
+ " AND"
+ " forgotten = 0"
+ )
+ txn.execute(sql, (user_id, room_id))
+ rows = txn.fetchall()
+ return rows[0][0]
+
+ count = await self.db_pool.runInteraction("did_forget_membership", f)
+ return count == 0
+
+ @cached()
+ def get_forgotten_rooms_for_user(self, user_id: str):
+ """Gets all rooms the user has forgotten.
+
+ Args:
+ user_id
+
+ Returns:
+ Deferred[set[str]]
+ """
+
+ def _get_forgotten_rooms_for_user_txn(txn):
+ # This is a slightly convoluted query that first looks up all rooms
+ # that the user has forgotten in the past, then rechecks that list
+ # to see if any have subsequently been updated. This is done so that
+ # we can use a partial index on `forgotten = 1` on the assumption
+ # that few users will actually forget many rooms.
+ #
+ # Note that a room is considered "forgotten" if *all* membership
+ # events for that user and room have the forgotten field set (as
+ # when a user forgets a room we update all rows for that user and
+ # room, not just the current one).
+ sql = """
+ SELECT room_id, (
+ SELECT count(*) FROM room_memberships
+ WHERE room_id = m.room_id AND user_id = m.user_id AND forgotten = 0
+ ) AS count
+ FROM room_memberships AS m
+ WHERE user_id = ? AND forgotten = 1
+ GROUP BY room_id, user_id;
+ """
+ txn.execute(sql, (user_id,))
+ return {row[0] for row in txn if row[1] == 0}
+
+ return self.db_pool.runInteraction(
+ "get_forgotten_rooms_for_user", _get_forgotten_rooms_for_user_txn
+ )
+
+ async def get_rooms_user_has_been_in(self, user_id: str) -> Set[str]:
+ """Get all rooms that the user has ever been in.
+
+ Args:
+ user_id: The user ID to get the rooms of.
+
+ Returns:
+ Set of room IDs.
+ """
+
+ room_ids = await self.db_pool.simple_select_onecol(
+ table="room_memberships",
+ keyvalues={"membership": Membership.JOIN, "user_id": user_id},
+ retcol="room_id",
+ desc="get_rooms_user_has_been_in",
+ )
+
+ return set(room_ids)
+
+ def get_membership_from_event_ids(
+ self, member_event_ids: Iterable[str]
+ ) -> List[dict]:
+ """Get user_id and membership of a set of event IDs.
+ """
+
+ return self.db_pool.simple_select_many_batch(
+ table="room_memberships",
+ column="event_id",
+ iterable=member_event_ids,
+ retcols=("user_id", "membership", "event_id"),
+ keyvalues={},
+ batch_size=500,
+ desc="get_membership_from_event_ids",
+ )
+
+ async def is_local_host_in_room_ignoring_users(
+ self, room_id: str, ignore_users: Collection[str]
+ ) -> bool:
+ """Check if there are any local users, excluding those in the given
+ list, in the room.
+ """
+
+ clause, args = make_in_list_sql_clause(
+ self.database_engine, "user_id", ignore_users
+ )
+
+ sql = """
+ SELECT 1 FROM local_current_membership
+ WHERE
+ room_id = ? AND membership = ?
+ AND NOT (%s)
+ LIMIT 1
+ """ % (
+ clause,
+ )
+
+ def _is_local_host_in_room_ignoring_users_txn(txn):
+ txn.execute(sql, (room_id, Membership.JOIN, *args))
+
+ return bool(txn.fetchone())
+
+ return await self.db_pool.runInteraction(
+ "is_local_host_in_room_ignoring_users",
+ _is_local_host_in_room_ignoring_users_txn,
+ )
+
+
+class RoomMemberBackgroundUpdateStore(SQLBaseStore):
+ def __init__(self, database: DatabasePool, db_conn, hs):
+ super(RoomMemberBackgroundUpdateStore, self).__init__(database, db_conn, hs)
+ self.db_pool.updates.register_background_update_handler(
+ _MEMBERSHIP_PROFILE_UPDATE_NAME, self._background_add_membership_profile
+ )
+ self.db_pool.updates.register_background_update_handler(
+ _CURRENT_STATE_MEMBERSHIP_UPDATE_NAME,
+ self._background_current_state_membership,
+ )
+ self.db_pool.updates.register_background_index_update(
+ "room_membership_forgotten_idx",
+ index_name="room_memberships_user_room_forgotten",
+ table="room_memberships",
+ columns=["user_id", "room_id"],
+ where_clause="forgotten = 1",
+ )
+
+ async def _background_add_membership_profile(self, progress, batch_size):
+ target_min_stream_id = progress.get(
+ "target_min_stream_id_inclusive", self._min_stream_order_on_start
+ )
+ max_stream_id = progress.get(
+ "max_stream_id_exclusive", self._stream_order_on_start + 1
+ )
+
+ INSERT_CLUMP_SIZE = 1000
+
+ def add_membership_profile_txn(txn):
+ sql = """
+ SELECT stream_ordering, event_id, events.room_id, event_json.json
+ FROM events
+ INNER JOIN event_json USING (event_id)
+ INNER JOIN room_memberships USING (event_id)
+ WHERE ? <= stream_ordering AND stream_ordering < ?
+ AND type = 'm.room.member'
+ ORDER BY stream_ordering DESC
+ LIMIT ?
+ """
+
+ txn.execute(sql, (target_min_stream_id, max_stream_id, batch_size))
+
+ rows = self.db_pool.cursor_to_dict(txn)
+ if not rows:
+ return 0
+
+ min_stream_id = rows[-1]["stream_ordering"]
+
+ to_update = []
+ for row in rows:
+ event_id = row["event_id"]
+ room_id = row["room_id"]
+ try:
+ event_json = db_to_json(row["json"])
+ content = event_json["content"]
+ except Exception:
+ continue
+
+ display_name = content.get("displayname", None)
+ avatar_url = content.get("avatar_url", None)
+
+ if display_name or avatar_url:
+ to_update.append((display_name, avatar_url, event_id, room_id))
+
+ to_update_sql = """
+ UPDATE room_memberships SET display_name = ?, avatar_url = ?
+ WHERE event_id = ? AND room_id = ?
+ """
+ for index in range(0, len(to_update), INSERT_CLUMP_SIZE):
+ clump = to_update[index : index + INSERT_CLUMP_SIZE]
+ txn.executemany(to_update_sql, clump)
+
+ progress = {
+ "target_min_stream_id_inclusive": target_min_stream_id,
+ "max_stream_id_exclusive": min_stream_id,
+ }
+
+ self.db_pool.updates._background_update_progress_txn(
+ txn, _MEMBERSHIP_PROFILE_UPDATE_NAME, progress
+ )
+
+ return len(rows)
+
+ result = await self.db_pool.runInteraction(
+ _MEMBERSHIP_PROFILE_UPDATE_NAME, add_membership_profile_txn
+ )
+
+ if not result:
+ await self.db_pool.updates._end_background_update(
+ _MEMBERSHIP_PROFILE_UPDATE_NAME
+ )
+
+ return result
+
+ async def _background_current_state_membership(self, progress, batch_size):
+ """Update the new membership column on current_state_events.
+
+ This works by iterating over all rooms in alphebetical order.
+ """
+
+ def _background_current_state_membership_txn(txn, last_processed_room):
+ processed = 0
+ while processed < batch_size:
+ txn.execute(
+ """
+ SELECT MIN(room_id) FROM current_state_events WHERE room_id > ?
+ """,
+ (last_processed_room,),
+ )
+ row = txn.fetchone()
+ if not row or not row[0]:
+ return processed, True
+
+ (next_room,) = row
+
+ sql = """
+ UPDATE current_state_events
+ SET membership = (
+ SELECT membership FROM room_memberships
+ WHERE event_id = current_state_events.event_id
+ )
+ WHERE room_id = ?
+ """
+ txn.execute(sql, (next_room,))
+ processed += txn.rowcount
+
+ last_processed_room = next_room
+
+ self.db_pool.updates._background_update_progress_txn(
+ txn,
+ _CURRENT_STATE_MEMBERSHIP_UPDATE_NAME,
+ {"last_processed_room": last_processed_room},
+ )
+
+ return processed, False
+
+ # If we haven't got a last processed room then just use the empty
+ # string, which will compare before all room IDs correctly.
+ last_processed_room = progress.get("last_processed_room", "")
+
+ row_count, finished = await self.db_pool.runInteraction(
+ "_background_current_state_membership_update",
+ _background_current_state_membership_txn,
+ last_processed_room,
+ )
+
+ if finished:
+ await self.db_pool.updates._end_background_update(
+ _CURRENT_STATE_MEMBERSHIP_UPDATE_NAME
+ )
+
+ return row_count
+
+
+class RoomMemberStore(RoomMemberWorkerStore, RoomMemberBackgroundUpdateStore):
+ def __init__(self, database: DatabasePool, db_conn, hs):
+ super(RoomMemberStore, self).__init__(database, db_conn, hs)
+
+ def forget(self, user_id: str, room_id: str):
+ """Indicate that user_id wishes to discard history for room_id."""
+
+ def f(txn):
+ sql = (
+ "UPDATE"
+ " room_memberships"
+ " SET"
+ " forgotten = 1"
+ " WHERE"
+ " user_id = ?"
+ " AND"
+ " room_id = ?"
+ )
+ txn.execute(sql, (user_id, room_id))
+
+ self._invalidate_cache_and_stream(txn, self.did_forget, (user_id, room_id))
+ self._invalidate_cache_and_stream(
+ txn, self.get_forgotten_rooms_for_user, (user_id,)
+ )
+
+ return self.db_pool.runInteraction("forget_membership", f)
+
+
+class _JoinedHostsCache(object):
+ """Cache for joined hosts in a room that is optimised to handle updates
+ via state deltas.
+ """
+
+ def __init__(self, store, room_id):
+ self.store = store
+ self.room_id = room_id
+
+ self.hosts_to_joined_users = {}
+
+ self.state_group = object()
+
+ self.linearizer = Linearizer("_JoinedHostsCache")
+
+ self._len = 0
+
+ async def get_destinations(self, state_entry: "_StateCacheEntry") -> Set[str]:
+ """Get set of destinations for a state entry
+
+ Args:
+ state_entry
+
+ Returns:
+ The destinations as a set.
+ """
+ if state_entry.state_group == self.state_group:
+ return frozenset(self.hosts_to_joined_users)
+
+ with (await self.linearizer.queue(())):
+ if state_entry.state_group == self.state_group:
+ pass
+ elif state_entry.prev_group == self.state_group:
+ for (typ, state_key), event_id in state_entry.delta_ids.items():
+ if typ != EventTypes.Member:
+ continue
+
+ host = intern_string(get_domain_from_id(state_key))
+ user_id = state_key
+ known_joins = self.hosts_to_joined_users.setdefault(host, set())
+
+ event = await self.store.get_event(event_id)
+ if event.membership == Membership.JOIN:
+ known_joins.add(user_id)
+ else:
+ known_joins.discard(user_id)
+
+ if not known_joins:
+ self.hosts_to_joined_users.pop(host, None)
+ else:
+ joined_users = await self.store.get_joined_users_from_state(
+ self.room_id, state_entry
+ )
+
+ self.hosts_to_joined_users = {}
+ for user_id in joined_users:
+ host = intern_string(get_domain_from_id(user_id))
+ self.hosts_to_joined_users.setdefault(host, set()).add(user_id)
+
+ if state_entry.state_group:
+ self.state_group = state_entry.state_group
+ else:
+ self.state_group = object()
+ self._len = sum(len(v) for v in self.hosts_to_joined_users.values())
+ return frozenset(self.hosts_to_joined_users)
+
+ def __len__(self):
+ return self._len
diff --git a/synapse/storage/databases/main/schema/delta/12/v12.sql b/synapse/storage/databases/main/schema/delta/12/v12.sql
new file mode 100644
index 00000000..5964c5aa
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/12/v12.sql
@@ -0,0 +1,63 @@
+/* Copyright 2015, 2016 OpenMarket Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+CREATE TABLE IF NOT EXISTS rejections(
+ event_id TEXT NOT NULL,
+ reason TEXT NOT NULL,
+ last_check TEXT NOT NULL,
+ UNIQUE (event_id)
+);
+
+-- Push notification endpoints that users have configured
+CREATE TABLE IF NOT EXISTS pushers (
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
+ user_name TEXT NOT NULL,
+ profile_tag VARCHAR(32) NOT NULL,
+ kind VARCHAR(8) NOT NULL,
+ app_id VARCHAR(64) NOT NULL,
+ app_display_name VARCHAR(64) NOT NULL,
+ device_display_name VARCHAR(128) NOT NULL,
+ pushkey VARBINARY(512) NOT NULL,
+ ts BIGINT UNSIGNED NOT NULL,
+ lang VARCHAR(8),
+ data LONGBLOB,
+ last_token TEXT,
+ last_success BIGINT UNSIGNED,
+ failing_since BIGINT UNSIGNED,
+ UNIQUE (app_id, pushkey)
+);
+
+CREATE TABLE IF NOT EXISTS push_rules (
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
+ user_name TEXT NOT NULL,
+ rule_id TEXT NOT NULL,
+ priority_class TINYINT NOT NULL,
+ priority INTEGER NOT NULL DEFAULT 0,
+ conditions TEXT NOT NULL,
+ actions TEXT NOT NULL,
+ UNIQUE(user_name, rule_id)
+);
+
+CREATE INDEX IF NOT EXISTS push_rules_user_name on push_rules (user_name);
+
+CREATE TABLE IF NOT EXISTS user_filters(
+ user_id TEXT,
+ filter_id BIGINT UNSIGNED,
+ filter_json LONGBLOB
+);
+
+CREATE INDEX IF NOT EXISTS user_filters_by_user_id_filter_id ON user_filters(
+ user_id, filter_id
+);
diff --git a/synapse/storage/databases/main/schema/delta/13/v13.sql b/synapse/storage/databases/main/schema/delta/13/v13.sql
new file mode 100644
index 00000000..f8649e5d
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/13/v13.sql
@@ -0,0 +1,19 @@
+/* Copyright 2015, 2016 OpenMarket Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* We used to create a tables called application_services and
+ * application_services_regex, but these are no longer used and are removed in
+ * delta 54.
+ */
diff --git a/synapse/storage/databases/main/schema/delta/14/v14.sql b/synapse/storage/databases/main/schema/delta/14/v14.sql
new file mode 100644
index 00000000..a831920d
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/14/v14.sql
@@ -0,0 +1,23 @@
+/* Copyright 2015, 2016 OpenMarket Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+CREATE TABLE IF NOT EXISTS push_rules_enable (
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
+ user_name TEXT NOT NULL,
+ rule_id TEXT NOT NULL,
+ enabled TINYINT,
+ UNIQUE(user_name, rule_id)
+);
+
+CREATE INDEX IF NOT EXISTS push_rules_enable_user_name on push_rules_enable (user_name);
diff --git a/synapse/storage/databases/main/schema/delta/15/appservice_txns.sql b/synapse/storage/databases/main/schema/delta/15/appservice_txns.sql
new file mode 100644
index 00000000..e4f5e76a
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/15/appservice_txns.sql
@@ -0,0 +1,31 @@
+/* Copyright 2015, 2016 OpenMarket Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+CREATE TABLE IF NOT EXISTS application_services_state(
+ as_id TEXT PRIMARY KEY,
+ state VARCHAR(5),
+ last_txn INTEGER
+);
+
+CREATE TABLE IF NOT EXISTS application_services_txns(
+ as_id TEXT NOT NULL,
+ txn_id INTEGER NOT NULL,
+ event_ids TEXT NOT NULL,
+ UNIQUE(as_id, txn_id)
+);
+
+CREATE INDEX IF NOT EXISTS application_services_txns_id ON application_services_txns (
+ as_id
+);
diff --git a/synapse/storage/databases/main/schema/delta/15/presence_indices.sql b/synapse/storage/databases/main/schema/delta/15/presence_indices.sql
new file mode 100644
index 00000000..6b8d0f1c
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/15/presence_indices.sql
@@ -0,0 +1,2 @@
+
+CREATE INDEX IF NOT EXISTS presence_list_user_id ON presence_list (user_id);
diff --git a/synapse/storage/databases/main/schema/delta/15/v15.sql b/synapse/storage/databases/main/schema/delta/15/v15.sql
new file mode 100644
index 00000000..9523d2bc
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/15/v15.sql
@@ -0,0 +1,24 @@
+-- Drop, copy & recreate pushers table to change unique key
+-- Also add access_token column at the same time
+CREATE TABLE IF NOT EXISTS pushers2 (
+ id BIGINT PRIMARY KEY,
+ user_name TEXT NOT NULL,
+ access_token BIGINT DEFAULT NULL,
+ profile_tag VARCHAR(32) NOT NULL,
+ kind VARCHAR(8) NOT NULL,
+ app_id VARCHAR(64) NOT NULL,
+ app_display_name VARCHAR(64) NOT NULL,
+ device_display_name VARCHAR(128) NOT NULL,
+ pushkey bytea NOT NULL,
+ ts BIGINT NOT NULL,
+ lang VARCHAR(8),
+ data bytea,
+ last_token TEXT,
+ last_success BIGINT,
+ failing_since BIGINT,
+ UNIQUE (app_id, pushkey)
+);
+INSERT INTO pushers2 (id, user_name, profile_tag, kind, app_id, app_display_name, device_display_name, pushkey, ts, lang, data, last_token, last_success, failing_since)
+ SELECT id, user_name, profile_tag, kind, app_id, app_display_name, device_display_name, pushkey, ts, lang, data, last_token, last_success, failing_since FROM pushers;
+DROP TABLE pushers;
+ALTER TABLE pushers2 RENAME TO pushers;
diff --git a/synapse/storage/databases/main/schema/delta/16/events_order_index.sql b/synapse/storage/databases/main/schema/delta/16/events_order_index.sql
new file mode 100644
index 00000000..a48f2151
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/16/events_order_index.sql
@@ -0,0 +1,4 @@
+CREATE INDEX events_order ON events (topological_ordering, stream_ordering);
+CREATE INDEX events_order_room ON events (
+ room_id, topological_ordering, stream_ordering
+);
diff --git a/synapse/storage/databases/main/schema/delta/16/remote_media_cache_index.sql b/synapse/storage/databases/main/schema/delta/16/remote_media_cache_index.sql
new file mode 100644
index 00000000..7a15265c
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/16/remote_media_cache_index.sql
@@ -0,0 +1,2 @@
+CREATE INDEX IF NOT EXISTS remote_media_cache_thumbnails_media_id
+ ON remote_media_cache_thumbnails (media_id); \ No newline at end of file
diff --git a/synapse/storage/databases/main/schema/delta/16/remove_duplicates.sql b/synapse/storage/databases/main/schema/delta/16/remove_duplicates.sql
new file mode 100644
index 00000000..65c97b5e
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/16/remove_duplicates.sql
@@ -0,0 +1,9 @@
+
+
+DELETE FROM event_to_state_groups WHERE state_group not in (
+ SELECT MAX(state_group) FROM event_to_state_groups GROUP BY event_id
+);
+
+DELETE FROM event_to_state_groups WHERE rowid not in (
+ SELECT MIN(rowid) FROM event_to_state_groups GROUP BY event_id
+);
diff --git a/synapse/storage/databases/main/schema/delta/16/room_alias_index.sql b/synapse/storage/databases/main/schema/delta/16/room_alias_index.sql
new file mode 100644
index 00000000..f8248613
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/16/room_alias_index.sql
@@ -0,0 +1,3 @@
+
+CREATE INDEX IF NOT EXISTS room_aliases_id ON room_aliases(room_id);
+CREATE INDEX IF NOT EXISTS room_alias_servers_alias ON room_alias_servers(room_alias);
diff --git a/synapse/storage/databases/main/schema/delta/16/unique_constraints.sql b/synapse/storage/databases/main/schema/delta/16/unique_constraints.sql
new file mode 100644
index 00000000..5b8de52c
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/16/unique_constraints.sql
@@ -0,0 +1,72 @@
+
+-- We can use SQLite features here, since other db support was only added in v16
+
+--
+DELETE FROM current_state_events WHERE rowid not in (
+ SELECT MIN(rowid) FROM current_state_events GROUP BY event_id
+);
+
+DROP INDEX IF EXISTS current_state_events_event_id;
+CREATE UNIQUE INDEX current_state_events_event_id ON current_state_events(event_id);
+
+--
+DELETE FROM room_memberships WHERE rowid not in (
+ SELECT MIN(rowid) FROM room_memberships GROUP BY event_id
+);
+
+DROP INDEX IF EXISTS room_memberships_event_id;
+CREATE UNIQUE INDEX room_memberships_event_id ON room_memberships(event_id);
+
+--
+DELETE FROM topics WHERE rowid not in (
+ SELECT MIN(rowid) FROM topics GROUP BY event_id
+);
+
+DROP INDEX IF EXISTS topics_event_id;
+CREATE UNIQUE INDEX topics_event_id ON topics(event_id);
+
+--
+DELETE FROM room_names WHERE rowid not in (
+ SELECT MIN(rowid) FROM room_names GROUP BY event_id
+);
+
+DROP INDEX IF EXISTS room_names_id;
+CREATE UNIQUE INDEX room_names_id ON room_names(event_id);
+
+--
+DELETE FROM presence WHERE rowid not in (
+ SELECT MIN(rowid) FROM presence GROUP BY user_id
+);
+
+DROP INDEX IF EXISTS presence_id;
+CREATE UNIQUE INDEX presence_id ON presence(user_id);
+
+--
+DELETE FROM presence_allow_inbound WHERE rowid not in (
+ SELECT MIN(rowid) FROM presence_allow_inbound
+ GROUP BY observed_user_id, observer_user_id
+);
+
+DROP INDEX IF EXISTS presence_allow_inbound_observers;
+CREATE UNIQUE INDEX presence_allow_inbound_observers ON presence_allow_inbound(
+ observed_user_id, observer_user_id
+);
+
+--
+DELETE FROM presence_list WHERE rowid not in (
+ SELECT MIN(rowid) FROM presence_list
+ GROUP BY user_id, observed_user_id
+);
+
+DROP INDEX IF EXISTS presence_list_observers;
+CREATE UNIQUE INDEX presence_list_observers ON presence_list(
+ user_id, observed_user_id
+);
+
+--
+DELETE FROM room_aliases WHERE rowid not in (
+ SELECT MIN(rowid) FROM room_aliases GROUP BY room_alias
+);
+
+DROP INDEX IF EXISTS room_aliases_id;
+CREATE INDEX room_aliases_id ON room_aliases(room_id);
diff --git a/synapse/storage/databases/main/schema/delta/16/users.sql b/synapse/storage/databases/main/schema/delta/16/users.sql
new file mode 100644
index 00000000..cd070925
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/16/users.sql
@@ -0,0 +1,56 @@
+-- Convert `access_tokens`.user from rowids to user strings.
+-- MUST BE DONE BEFORE REMOVING ID COLUMN FROM USERS TABLE BELOW
+CREATE TABLE IF NOT EXISTS new_access_tokens(
+ id BIGINT UNSIGNED PRIMARY KEY,
+ user_id TEXT NOT NULL,
+ device_id TEXT,
+ token TEXT NOT NULL,
+ last_used BIGINT UNSIGNED,
+ UNIQUE(token)
+);
+
+INSERT INTO new_access_tokens
+ SELECT a.id, u.name, a.device_id, a.token, a.last_used
+ FROM access_tokens as a
+ INNER JOIN users as u ON u.id = a.user_id;
+
+DROP TABLE access_tokens;
+
+ALTER TABLE new_access_tokens RENAME TO access_tokens;
+
+-- Remove ID column from `users` table
+CREATE TABLE IF NOT EXISTS new_users(
+ name TEXT,
+ password_hash TEXT,
+ creation_ts BIGINT UNSIGNED,
+ admin BOOL DEFAULT 0 NOT NULL,
+ UNIQUE(name)
+);
+
+INSERT INTO new_users SELECT name, password_hash, creation_ts, admin FROM users;
+
+DROP TABLE users;
+
+ALTER TABLE new_users RENAME TO users;
+
+
+-- Remove UNIQUE constraint from `user_ips` table
+CREATE TABLE IF NOT EXISTS new_user_ips (
+ user_id TEXT NOT NULL,
+ access_token TEXT NOT NULL,
+ device_id TEXT,
+ ip TEXT NOT NULL,
+ user_agent TEXT NOT NULL,
+ last_seen BIGINT UNSIGNED NOT NULL
+);
+
+INSERT INTO new_user_ips
+ SELECT user, access_token, device_id, ip, user_agent, last_seen FROM user_ips;
+
+DROP TABLE user_ips;
+
+ALTER TABLE new_user_ips RENAME TO user_ips;
+
+CREATE INDEX IF NOT EXISTS user_ips_user ON user_ips(user_id);
+CREATE INDEX IF NOT EXISTS user_ips_user_ip ON user_ips(user_id, access_token, ip);
+
diff --git a/synapse/storage/databases/main/schema/delta/17/drop_indexes.sql b/synapse/storage/databases/main/schema/delta/17/drop_indexes.sql
new file mode 100644
index 00000000..7c9a90e2
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/17/drop_indexes.sql
@@ -0,0 +1,18 @@
+/* Copyright 2015, 2016 OpenMarket Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+DROP INDEX IF EXISTS sent_transaction_dest;
+DROP INDEX IF EXISTS sent_transaction_sent;
+DROP INDEX IF EXISTS user_ips_user;
diff --git a/synapse/storage/databases/main/schema/delta/17/server_keys.sql b/synapse/storage/databases/main/schema/delta/17/server_keys.sql
new file mode 100644
index 00000000..70b247a0
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/17/server_keys.sql
@@ -0,0 +1,24 @@
+/* Copyright 2015, 2016 OpenMarket Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+CREATE TABLE IF NOT EXISTS server_keys_json (
+ server_name TEXT, -- Server name.
+ key_id TEXT, -- Requested key id.
+ from_server TEXT, -- Which server the keys were fetched from.
+ ts_added_ms INTEGER, -- When the keys were fetched
+ ts_valid_until_ms INTEGER, -- When this version of the keys exipires.
+ key_json bytea, -- JSON certificate for the remote server.
+ CONSTRAINT uniqueness UNIQUE (server_name, key_id, from_server)
+);
diff --git a/synapse/storage/databases/main/schema/delta/17/user_threepids.sql b/synapse/storage/databases/main/schema/delta/17/user_threepids.sql
new file mode 100644
index 00000000..c17715ac
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/17/user_threepids.sql
@@ -0,0 +1,9 @@
+CREATE TABLE user_threepids (
+ user_id TEXT NOT NULL,
+ medium TEXT NOT NULL,
+ address TEXT NOT NULL,
+ validated_at BIGINT NOT NULL,
+ added_at BIGINT NOT NULL,
+ CONSTRAINT user_medium_address UNIQUE (user_id, medium, address)
+);
+CREATE INDEX user_threepids_user_id ON user_threepids(user_id);
diff --git a/synapse/storage/databases/main/schema/delta/18/server_keys_bigger_ints.sql b/synapse/storage/databases/main/schema/delta/18/server_keys_bigger_ints.sql
new file mode 100644
index 00000000..6e0871c9
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/18/server_keys_bigger_ints.sql
@@ -0,0 +1,32 @@
+/* Copyright 2015, 2016 OpenMarket Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+CREATE TABLE IF NOT EXISTS new_server_keys_json (
+ server_name TEXT NOT NULL, -- Server name.
+ key_id TEXT NOT NULL, -- Requested key id.
+ from_server TEXT NOT NULL, -- Which server the keys were fetched from.
+ ts_added_ms BIGINT NOT NULL, -- When the keys were fetched
+ ts_valid_until_ms BIGINT NOT NULL, -- When this version of the keys exipires.
+ key_json bytea NOT NULL, -- JSON certificate for the remote server.
+ CONSTRAINT server_keys_json_uniqueness UNIQUE (server_name, key_id, from_server)
+);
+
+INSERT INTO new_server_keys_json
+ SELECT server_name, key_id, from_server,ts_added_ms, ts_valid_until_ms, key_json FROM server_keys_json ;
+
+DROP TABLE server_keys_json;
+
+ALTER TABLE new_server_keys_json RENAME TO server_keys_json;
diff --git a/synapse/storage/databases/main/schema/delta/19/event_index.sql b/synapse/storage/databases/main/schema/delta/19/event_index.sql
new file mode 100644
index 00000000..18b97b43
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/19/event_index.sql
@@ -0,0 +1,19 @@
+/* Copyright 2015, 2016 OpenMarket Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+CREATE INDEX events_order_topo_stream_room ON events(
+ topological_ordering, stream_ordering, room_id
+);
diff --git a/synapse/storage/databases/main/schema/delta/20/dummy.sql b/synapse/storage/databases/main/schema/delta/20/dummy.sql
new file mode 100644
index 00000000..e0ac49d1
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/20/dummy.sql
@@ -0,0 +1 @@
+SELECT 1;
diff --git a/synapse/storage/databases/main/schema/delta/20/pushers.py b/synapse/storage/databases/main/schema/delta/20/pushers.py
new file mode 100644
index 00000000..3edfcfd7
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/20/pushers.py
@@ -0,0 +1,88 @@
+# Copyright 2015, 2016 OpenMarket Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+"""
+Main purpose of this upgrade is to change the unique key on the
+pushers table again (it was missed when the v16 full schema was
+made) but this also changes the pushkey and data columns to text.
+When selecting a bytea column into a text column, postgres inserts
+the hex encoded data, and there's no portable way of getting the
+UTF-8 bytes, so we have to do it in Python.
+"""
+
+import logging
+
+logger = logging.getLogger(__name__)
+
+
+def run_create(cur, database_engine, *args, **kwargs):
+ logger.info("Porting pushers table...")
+ cur.execute(
+ """
+ CREATE TABLE IF NOT EXISTS pushers2 (
+ id BIGINT PRIMARY KEY,
+ user_name TEXT NOT NULL,
+ access_token BIGINT DEFAULT NULL,
+ profile_tag VARCHAR(32) NOT NULL,
+ kind VARCHAR(8) NOT NULL,
+ app_id VARCHAR(64) NOT NULL,
+ app_display_name VARCHAR(64) NOT NULL,
+ device_display_name VARCHAR(128) NOT NULL,
+ pushkey TEXT NOT NULL,
+ ts BIGINT NOT NULL,
+ lang VARCHAR(8),
+ data TEXT,
+ last_token TEXT,
+ last_success BIGINT,
+ failing_since BIGINT,
+ UNIQUE (app_id, pushkey, user_name)
+ )
+ """
+ )
+ cur.execute(
+ """SELECT
+ id, user_name, access_token, profile_tag, kind,
+ app_id, app_display_name, device_display_name,
+ pushkey, ts, lang, data, last_token, last_success,
+ failing_since
+ FROM pushers
+ """
+ )
+ count = 0
+ for row in cur.fetchall():
+ row = list(row)
+ row[8] = bytes(row[8]).decode("utf-8")
+ row[11] = bytes(row[11]).decode("utf-8")
+ cur.execute(
+ database_engine.convert_param_style(
+ """
+ INSERT into pushers2 (
+ id, user_name, access_token, profile_tag, kind,
+ app_id, app_display_name, device_display_name,
+ pushkey, ts, lang, data, last_token, last_success,
+ failing_since
+ ) values (%s)"""
+ % (",".join(["?" for _ in range(len(row))]))
+ ),
+ row,
+ )
+ count += 1
+ cur.execute("DROP TABLE pushers")
+ cur.execute("ALTER TABLE pushers2 RENAME TO pushers")
+ logger.info("Moved %d pushers to new table", count)
+
+
+def run_upgrade(*args, **kwargs):
+ pass
diff --git a/synapse/storage/databases/main/schema/delta/21/end_to_end_keys.sql b/synapse/storage/databases/main/schema/delta/21/end_to_end_keys.sql
new file mode 100644
index 00000000..4c2fb20b
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/21/end_to_end_keys.sql
@@ -0,0 +1,34 @@
+/* Copyright 2015, 2016 OpenMarket Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+CREATE TABLE IF NOT EXISTS e2e_device_keys_json (
+ user_id TEXT NOT NULL, -- The user these keys are for.
+ device_id TEXT NOT NULL, -- Which of the user's devices these keys are for.
+ ts_added_ms BIGINT NOT NULL, -- When the keys were uploaded.
+ key_json TEXT NOT NULL, -- The keys for the device as a JSON blob.
+ CONSTRAINT e2e_device_keys_json_uniqueness UNIQUE (user_id, device_id)
+);
+
+
+CREATE TABLE IF NOT EXISTS e2e_one_time_keys_json (
+ user_id TEXT NOT NULL, -- The user this one-time key is for.
+ device_id TEXT NOT NULL, -- The device this one-time key is for.
+ algorithm TEXT NOT NULL, -- Which algorithm this one-time key is for.
+ key_id TEXT NOT NULL, -- An id for suppressing duplicate uploads.
+ ts_added_ms BIGINT NOT NULL, -- When this key was uploaded.
+ key_json TEXT NOT NULL, -- The key as a JSON blob.
+ CONSTRAINT e2e_one_time_keys_json_uniqueness UNIQUE (user_id, device_id, algorithm, key_id)
+);
diff --git a/synapse/storage/databases/main/schema/delta/21/receipts.sql b/synapse/storage/databases/main/schema/delta/21/receipts.sql
new file mode 100644
index 00000000..d0708454
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/21/receipts.sql
@@ -0,0 +1,38 @@
+/* Copyright 2015, 2016 OpenMarket Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+CREATE TABLE IF NOT EXISTS receipts_graph(
+ room_id TEXT NOT NULL,
+ receipt_type TEXT NOT NULL,
+ user_id TEXT NOT NULL,
+ event_ids TEXT NOT NULL,
+ data TEXT NOT NULL,
+ CONSTRAINT receipts_graph_uniqueness UNIQUE (room_id, receipt_type, user_id)
+);
+
+CREATE TABLE IF NOT EXISTS receipts_linearized (
+ stream_id BIGINT NOT NULL,
+ room_id TEXT NOT NULL,
+ receipt_type TEXT NOT NULL,
+ user_id TEXT NOT NULL,
+ event_id TEXT NOT NULL,
+ data TEXT NOT NULL,
+ CONSTRAINT receipts_linearized_uniqueness UNIQUE (room_id, receipt_type, user_id)
+);
+
+CREATE INDEX receipts_linearized_id ON receipts_linearized(
+ stream_id
+);
diff --git a/synapse/storage/databases/main/schema/delta/22/receipts_index.sql b/synapse/storage/databases/main/schema/delta/22/receipts_index.sql
new file mode 100644
index 00000000..bfc0b3bc
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/22/receipts_index.sql
@@ -0,0 +1,22 @@
+/* Copyright 2015, 2016 OpenMarket Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/** Using CREATE INDEX directly is deprecated in favour of using background
+ * update see synapse/storage/schema/delta/33/access_tokens_device_index.sql
+ * and synapse/storage/registration.py for an example using
+ * "access_tokens_device_index" **/
+CREATE INDEX receipts_linearized_room_stream ON receipts_linearized(
+ room_id, stream_id
+);
diff --git a/synapse/storage/databases/main/schema/delta/22/user_threepids_unique.sql b/synapse/storage/databases/main/schema/delta/22/user_threepids_unique.sql
new file mode 100644
index 00000000..87edfa45
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/22/user_threepids_unique.sql
@@ -0,0 +1,19 @@
+CREATE TABLE IF NOT EXISTS user_threepids2 (
+ user_id TEXT NOT NULL,
+ medium TEXT NOT NULL,
+ address TEXT NOT NULL,
+ validated_at BIGINT NOT NULL,
+ added_at BIGINT NOT NULL,
+ CONSTRAINT medium_address UNIQUE (medium, address)
+);
+
+INSERT INTO user_threepids2
+ SELECT * FROM user_threepids WHERE added_at IN (
+ SELECT max(added_at) FROM user_threepids GROUP BY medium, address
+ )
+;
+
+DROP TABLE user_threepids;
+ALTER TABLE user_threepids2 RENAME TO user_threepids;
+
+CREATE INDEX user_threepids_user_id ON user_threepids(user_id);
diff --git a/synapse/storage/databases/main/schema/delta/24/stats_reporting.sql b/synapse/storage/databases/main/schema/delta/24/stats_reporting.sql
new file mode 100644
index 00000000..acea7483
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/24/stats_reporting.sql
@@ -0,0 +1,18 @@
+/* Copyright 2019 New Vector Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ /* We used to create a table called stats_reporting, but this is no longer
+ * used and is removed in delta 54.
+ */ \ No newline at end of file
diff --git a/synapse/storage/databases/main/schema/delta/25/fts.py b/synapse/storage/databases/main/schema/delta/25/fts.py
new file mode 100644
index 00000000..ee675e71
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/25/fts.py
@@ -0,0 +1,80 @@
+# Copyright 2015, 2016 OpenMarket Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import json
+import logging
+
+from synapse.storage.engines import PostgresEngine, Sqlite3Engine
+from synapse.storage.prepare_database import get_statements
+
+logger = logging.getLogger(__name__)
+
+
+POSTGRES_TABLE = """
+CREATE TABLE IF NOT EXISTS event_search (
+ event_id TEXT,
+ room_id TEXT,
+ sender TEXT,
+ key TEXT,
+ vector tsvector
+);
+
+CREATE INDEX event_search_fts_idx ON event_search USING gin(vector);
+CREATE INDEX event_search_ev_idx ON event_search(event_id);
+CREATE INDEX event_search_ev_ridx ON event_search(room_id);
+"""
+
+
+SQLITE_TABLE = (
+ "CREATE VIRTUAL TABLE event_search"
+ " USING fts4 ( event_id, room_id, sender, key, value )"
+)
+
+
+def run_create(cur, database_engine, *args, **kwargs):
+ if isinstance(database_engine, PostgresEngine):
+ for statement in get_statements(POSTGRES_TABLE.splitlines()):
+ cur.execute(statement)
+ elif isinstance(database_engine, Sqlite3Engine):
+ cur.execute(SQLITE_TABLE)
+ else:
+ raise Exception("Unrecognized database engine")
+
+ cur.execute("SELECT MIN(stream_ordering) FROM events")
+ rows = cur.fetchall()
+ min_stream_id = rows[0][0]
+
+ cur.execute("SELECT MAX(stream_ordering) FROM events")
+ rows = cur.fetchall()
+ max_stream_id = rows[0][0]
+
+ if min_stream_id is not None and max_stream_id is not None:
+ progress = {
+ "target_min_stream_id_inclusive": min_stream_id,
+ "max_stream_id_exclusive": max_stream_id + 1,
+ "rows_inserted": 0,
+ }
+ progress_json = json.dumps(progress)
+
+ sql = (
+ "INSERT into background_updates (update_name, progress_json)"
+ " VALUES (?, ?)"
+ )
+
+ sql = database_engine.convert_param_style(sql)
+
+ cur.execute(sql, ("event_search", progress_json))
+
+
+def run_upgrade(*args, **kwargs):
+ pass
diff --git a/synapse/storage/databases/main/schema/delta/25/guest_access.sql b/synapse/storage/databases/main/schema/delta/25/guest_access.sql
new file mode 100644
index 00000000..1ea389b4
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/25/guest_access.sql
@@ -0,0 +1,25 @@
+/* Copyright 2015, 2016 OpenMarket Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * This is a manual index of guest_access content of state events,
+ * so that we can join on them in SELECT statements.
+ */
+CREATE TABLE IF NOT EXISTS guest_access(
+ event_id TEXT NOT NULL,
+ room_id TEXT NOT NULL,
+ guest_access TEXT NOT NULL,
+ UNIQUE (event_id)
+);
diff --git a/synapse/storage/databases/main/schema/delta/25/history_visibility.sql b/synapse/storage/databases/main/schema/delta/25/history_visibility.sql
new file mode 100644
index 00000000..f468fc18
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/25/history_visibility.sql
@@ -0,0 +1,25 @@
+/* Copyright 2015, 2016 OpenMarket Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * This is a manual index of history_visibility content of state events,
+ * so that we can join on them in SELECT statements.
+ */
+CREATE TABLE IF NOT EXISTS history_visibility(
+ event_id TEXT NOT NULL,
+ room_id TEXT NOT NULL,
+ history_visibility TEXT NOT NULL,
+ UNIQUE (event_id)
+);
diff --git a/synapse/storage/databases/main/schema/delta/25/tags.sql b/synapse/storage/databases/main/schema/delta/25/tags.sql
new file mode 100644
index 00000000..7a32ce68
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/25/tags.sql
@@ -0,0 +1,38 @@
+/* Copyright 2015, 2016 OpenMarket Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+CREATE TABLE IF NOT EXISTS room_tags(
+ user_id TEXT NOT NULL,
+ room_id TEXT NOT NULL,
+ tag TEXT NOT NULL, -- The name of the tag.
+ content TEXT NOT NULL, -- The JSON content of the tag.
+ CONSTRAINT room_tag_uniqueness UNIQUE (user_id, room_id, tag)
+);
+
+CREATE TABLE IF NOT EXISTS room_tags_revisions (
+ user_id TEXT NOT NULL,
+ room_id TEXT NOT NULL,
+ stream_id BIGINT NOT NULL, -- The current version of the room tags.
+ CONSTRAINT room_tag_revisions_uniqueness UNIQUE (user_id, room_id)
+);
+
+CREATE TABLE IF NOT EXISTS private_user_data_max_stream_id(
+ Lock CHAR(1) NOT NULL DEFAULT 'X' UNIQUE, -- Makes sure this table only has one row.
+ stream_id BIGINT NOT NULL,
+ CHECK (Lock='X')
+);
+
+INSERT INTO private_user_data_max_stream_id (stream_id) VALUES (0);
diff --git a/synapse/storage/databases/main/schema/delta/26/account_data.sql b/synapse/storage/databases/main/schema/delta/26/account_data.sql
new file mode 100644
index 00000000..e395de2b
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/26/account_data.sql
@@ -0,0 +1,17 @@
+/* Copyright 2015, 2016 OpenMarket Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+ALTER TABLE private_user_data_max_stream_id RENAME TO account_data_max_stream_id;
diff --git a/synapse/storage/databases/main/schema/delta/27/account_data.sql b/synapse/storage/databases/main/schema/delta/27/account_data.sql
new file mode 100644
index 00000000..bf0558b5
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/27/account_data.sql
@@ -0,0 +1,36 @@
+/* Copyright 2015, 2016 OpenMarket Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+CREATE TABLE IF NOT EXISTS account_data(
+ user_id TEXT NOT NULL,
+ account_data_type TEXT NOT NULL, -- The type of the account_data.
+ stream_id BIGINT NOT NULL, -- The version of the account_data.
+ content TEXT NOT NULL, -- The JSON content of the account_data
+ CONSTRAINT account_data_uniqueness UNIQUE (user_id, account_data_type)
+);
+
+
+CREATE TABLE IF NOT EXISTS room_account_data(
+ user_id TEXT NOT NULL,
+ room_id TEXT NOT NULL,
+ account_data_type TEXT NOT NULL, -- The type of the account_data.
+ stream_id BIGINT NOT NULL, -- The version of the account_data.
+ content TEXT NOT NULL, -- The JSON content of the account_data
+ CONSTRAINT room_account_data_uniqueness UNIQUE (user_id, room_id, account_data_type)
+);
+
+
+CREATE INDEX account_data_stream_id on account_data(user_id, stream_id);
+CREATE INDEX room_account_data_stream_id on room_account_data(user_id, stream_id);
diff --git a/synapse/storage/databases/main/schema/delta/27/forgotten_memberships.sql b/synapse/storage/databases/main/schema/delta/27/forgotten_memberships.sql
new file mode 100644
index 00000000..e2094f37
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/27/forgotten_memberships.sql
@@ -0,0 +1,26 @@
+/* Copyright 2015, 2016 OpenMarket Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Keeps track of what rooms users have left and don't want to be able to
+ * access again.
+ *
+ * If all users on this server have left a room, we can delete the room
+ * entirely.
+ *
+ * This column should always contain either 0 or 1.
+ */
+
+ ALTER TABLE room_memberships ADD COLUMN forgotten INTEGER DEFAULT 0;
diff --git a/synapse/storage/databases/main/schema/delta/27/ts.py b/synapse/storage/databases/main/schema/delta/27/ts.py
new file mode 100644
index 00000000..b7972cfa
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/27/ts.py
@@ -0,0 +1,59 @@
+# Copyright 2015, 2016 OpenMarket Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import json
+import logging
+
+from synapse.storage.prepare_database import get_statements
+
+logger = logging.getLogger(__name__)
+
+
+ALTER_TABLE = (
+ "ALTER TABLE events ADD COLUMN origin_server_ts BIGINT;"
+ "CREATE INDEX events_ts ON events(origin_server_ts, stream_ordering);"
+)
+
+
+def run_create(cur, database_engine, *args, **kwargs):
+ for statement in get_statements(ALTER_TABLE.splitlines()):
+ cur.execute(statement)
+
+ cur.execute("SELECT MIN(stream_ordering) FROM events")
+ rows = cur.fetchall()
+ min_stream_id = rows[0][0]
+
+ cur.execute("SELECT MAX(stream_ordering) FROM events")
+ rows = cur.fetchall()
+ max_stream_id = rows[0][0]
+
+ if min_stream_id is not None and max_stream_id is not None:
+ progress = {
+ "target_min_stream_id_inclusive": min_stream_id,
+ "max_stream_id_exclusive": max_stream_id + 1,
+ "rows_inserted": 0,
+ }
+ progress_json = json.dumps(progress)
+
+ sql = (
+ "INSERT into background_updates (update_name, progress_json)"
+ " VALUES (?, ?)"
+ )
+
+ sql = database_engine.convert_param_style(sql)
+
+ cur.execute(sql, ("event_origin_server_ts", progress_json))
+
+
+def run_upgrade(*args, **kwargs):
+ pass
diff --git a/synapse/storage/databases/main/schema/delta/28/event_push_actions.sql b/synapse/storage/databases/main/schema/delta/28/event_push_actions.sql
new file mode 100644
index 00000000..4d519849
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/28/event_push_actions.sql
@@ -0,0 +1,27 @@
+/* Copyright 2015 OpenMarket Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+CREATE TABLE IF NOT EXISTS event_push_actions(
+ room_id TEXT NOT NULL,
+ event_id TEXT NOT NULL,
+ user_id TEXT NOT NULL,
+ profile_tag VARCHAR(32),
+ actions TEXT NOT NULL,
+ CONSTRAINT event_id_user_id_profile_tag_uniqueness UNIQUE (room_id, event_id, user_id, profile_tag)
+);
+
+
+CREATE INDEX event_push_actions_room_id_event_id_user_id_profile_tag on event_push_actions(room_id, event_id, user_id, profile_tag);
+CREATE INDEX event_push_actions_room_id_user_id on event_push_actions(room_id, user_id);
diff --git a/synapse/storage/databases/main/schema/delta/28/events_room_stream.sql b/synapse/storage/databases/main/schema/delta/28/events_room_stream.sql
new file mode 100644
index 00000000..36609475
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/28/events_room_stream.sql
@@ -0,0 +1,20 @@
+/* Copyright 2016 OpenMarket Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+
+/** Using CREATE INDEX directly is deprecated in favour of using background
+ * update see synapse/storage/schema/delta/33/access_tokens_device_index.sql
+ * and synapse/storage/registration.py for an example using
+ * "access_tokens_device_index" **/
+CREATE INDEX events_room_stream on events(room_id, stream_ordering);
diff --git a/synapse/storage/databases/main/schema/delta/28/public_roms_index.sql b/synapse/storage/databases/main/schema/delta/28/public_roms_index.sql
new file mode 100644
index 00000000..6c1fd68c
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/28/public_roms_index.sql
@@ -0,0 +1,20 @@
+/* Copyright 2016 OpenMarket Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+
+/** Using CREATE INDEX directly is deprecated in favour of using background
+ * update see synapse/storage/schema/delta/33/access_tokens_device_index.sql
+ * and synapse/storage/registration.py for an example using
+ * "access_tokens_device_index" **/
+CREATE INDEX public_room_index on rooms(is_public);
diff --git a/synapse/storage/databases/main/schema/delta/28/receipts_user_id_index.sql b/synapse/storage/databases/main/schema/delta/28/receipts_user_id_index.sql
new file mode 100644
index 00000000..cb84c69b
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/28/receipts_user_id_index.sql
@@ -0,0 +1,22 @@
+/* Copyright 2015, 2016 OpenMarket Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/** Using CREATE INDEX directly is deprecated in favour of using background
+ * update see synapse/storage/schema/delta/33/access_tokens_device_index.sql
+ * and synapse/storage/registration.py for an example using
+ * "access_tokens_device_index" **/
+CREATE INDEX receipts_linearized_user ON receipts_linearized(
+ user_id
+);
diff --git a/synapse/storage/databases/main/schema/delta/28/upgrade_times.sql b/synapse/storage/databases/main/schema/delta/28/upgrade_times.sql
new file mode 100644
index 00000000..3e4a9ab4
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/28/upgrade_times.sql
@@ -0,0 +1,21 @@
+/* Copyright 2016 OpenMarket Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Stores the timestamp when a user upgraded from a guest to a full user, if
+ * that happened.
+ */
+
+ALTER TABLE users ADD COLUMN upgrade_ts BIGINT;
diff --git a/synapse/storage/databases/main/schema/delta/28/users_is_guest.sql b/synapse/storage/databases/main/schema/delta/28/users_is_guest.sql
new file mode 100644
index 00000000..21d2b420
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/28/users_is_guest.sql
@@ -0,0 +1,22 @@
+/* Copyright 2016 OpenMarket Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ALTER TABLE users ADD is_guest SMALLINT DEFAULT 0 NOT NULL;
+/*
+ * NB: any guest users created between 27 and 28 will be incorrectly
+ * marked as not guests: we don't bother to fill these in correctly
+ * because guest access is not really complete in 27 anyway so it's
+ * very unlikley there will be any guest users created.
+ */
diff --git a/synapse/storage/databases/main/schema/delta/29/push_actions.sql b/synapse/storage/databases/main/schema/delta/29/push_actions.sql
new file mode 100644
index 00000000..84b21cf8
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/29/push_actions.sql
@@ -0,0 +1,35 @@
+/* Copyright 2016 OpenMarket Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ALTER TABLE event_push_actions ADD COLUMN topological_ordering BIGINT;
+ALTER TABLE event_push_actions ADD COLUMN stream_ordering BIGINT;
+ALTER TABLE event_push_actions ADD COLUMN notif SMALLINT;
+ALTER TABLE event_push_actions ADD COLUMN highlight SMALLINT;
+
+UPDATE event_push_actions SET stream_ordering = (
+ SELECT stream_ordering FROM events WHERE event_id = event_push_actions.event_id
+), topological_ordering = (
+ SELECT topological_ordering FROM events WHERE event_id = event_push_actions.event_id
+);
+
+UPDATE event_push_actions SET notif = 1, highlight = 0;
+
+/** Using CREATE INDEX directly is deprecated in favour of using background
+ * update see synapse/storage/schema/delta/33/access_tokens_device_index.sql
+ * and synapse/storage/registration.py for an example using
+ * "access_tokens_device_index" **/
+CREATE INDEX event_push_actions_rm_tokens on event_push_actions(
+ user_id, room_id, topological_ordering, stream_ordering
+);
diff --git a/synapse/storage/databases/main/schema/delta/30/alias_creator.sql b/synapse/storage/databases/main/schema/delta/30/alias_creator.sql
new file mode 100644
index 00000000..c9d0dde6
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/30/alias_creator.sql
@@ -0,0 +1,16 @@
+/* Copyright 2016 OpenMarket Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ALTER TABLE room_aliases ADD COLUMN creator TEXT;
diff --git a/synapse/storage/databases/main/schema/delta/30/as_users.py b/synapse/storage/databases/main/schema/delta/30/as_users.py
new file mode 100644
index 00000000..b42c0271
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/30/as_users.py
@@ -0,0 +1,67 @@
+# Copyright 2016 OpenMarket Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import logging
+
+from synapse.config.appservice import load_appservices
+
+logger = logging.getLogger(__name__)
+
+
+def run_create(cur, database_engine, *args, **kwargs):
+ # NULL indicates user was not registered by an appservice.
+ try:
+ cur.execute("ALTER TABLE users ADD COLUMN appservice_id TEXT")
+ except Exception:
+ # Maybe we already added the column? Hope so...
+ pass
+
+
+def run_upgrade(cur, database_engine, config, *args, **kwargs):
+ cur.execute("SELECT name FROM users")
+ rows = cur.fetchall()
+
+ config_files = []
+ try:
+ config_files = config.app_service_config_files
+ except AttributeError:
+ logger.warning("Could not get app_service_config_files from config")
+ pass
+
+ appservices = load_appservices(config.server_name, config_files)
+
+ owned = {}
+
+ for row in rows:
+ user_id = row[0]
+ for appservice in appservices:
+ if appservice.is_exclusive_user(user_id):
+ if user_id in owned.keys():
+ logger.error(
+ "user_id %s was owned by more than one application"
+ " service (IDs %s and %s); assigning arbitrarily to %s"
+ % (user_id, owned[user_id], appservice.id, owned[user_id])
+ )
+ owned.setdefault(appservice.id, []).append(user_id)
+
+ for as_id, user_ids in owned.items():
+ n = 100
+ user_chunks = (user_ids[i : i + 100] for i in range(0, len(user_ids), n))
+ for chunk in user_chunks:
+ cur.execute(
+ database_engine.convert_param_style(
+ "UPDATE users SET appservice_id = ? WHERE name IN (%s)"
+ % (",".join("?" for _ in chunk),)
+ ),
+ [as_id] + chunk,
+ )
diff --git a/synapse/storage/databases/main/schema/delta/30/deleted_pushers.sql b/synapse/storage/databases/main/schema/delta/30/deleted_pushers.sql
new file mode 100644
index 00000000..712c454a
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/30/deleted_pushers.sql
@@ -0,0 +1,25 @@
+/* Copyright 2016 OpenMarket Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+CREATE TABLE IF NOT EXISTS deleted_pushers(
+ stream_id BIGINT NOT NULL,
+ app_id TEXT NOT NULL,
+ pushkey TEXT NOT NULL,
+ user_id TEXT NOT NULL,
+ /* We only track the most recent delete for each app_id, pushkey and user_id. */
+ UNIQUE (app_id, pushkey, user_id)
+);
+
+CREATE INDEX deleted_pushers_stream_id ON deleted_pushers (stream_id);
diff --git a/synapse/storage/databases/main/schema/delta/30/presence_stream.sql b/synapse/storage/databases/main/schema/delta/30/presence_stream.sql
new file mode 100644
index 00000000..606bbb03
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/30/presence_stream.sql
@@ -0,0 +1,30 @@
+/* Copyright 2016 OpenMarket Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+ CREATE TABLE presence_stream(
+ stream_id BIGINT,
+ user_id TEXT,
+ state TEXT,
+ last_active_ts BIGINT,
+ last_federation_update_ts BIGINT,
+ last_user_sync_ts BIGINT,
+ status_msg TEXT,
+ currently_active BOOLEAN
+ );
+
+ CREATE INDEX presence_stream_id ON presence_stream(stream_id, user_id);
+ CREATE INDEX presence_stream_user_id ON presence_stream(user_id);
+ CREATE INDEX presence_stream_state ON presence_stream(state);
diff --git a/synapse/storage/databases/main/schema/delta/30/public_rooms.sql b/synapse/storage/databases/main/schema/delta/30/public_rooms.sql
new file mode 100644
index 00000000..f09db4fa
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/30/public_rooms.sql
@@ -0,0 +1,23 @@
+/* Copyright 2016 OpenMarket Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+/* This release removes the restriction that published rooms must have an alias,
+ * so we go back and ensure the only 'public' rooms are ones with an alias.
+ * We use (1 = 0) and (1 = 1) so that it works in both postgres and sqlite
+ */
+UPDATE rooms SET is_public = (1 = 0) WHERE is_public = (1 = 1) AND room_id not in (
+ SELECT room_id FROM room_aliases
+);
diff --git a/synapse/storage/databases/main/schema/delta/30/push_rule_stream.sql b/synapse/storage/databases/main/schema/delta/30/push_rule_stream.sql
new file mode 100644
index 00000000..735aa8d5
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/30/push_rule_stream.sql
@@ -0,0 +1,38 @@
+/* Copyright 2016 OpenMarket Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+
+CREATE TABLE push_rules_stream(
+ stream_id BIGINT NOT NULL,
+ event_stream_ordering BIGINT NOT NULL,
+ user_id TEXT NOT NULL,
+ rule_id TEXT NOT NULL,
+ op TEXT NOT NULL, -- One of "ENABLE", "DISABLE", "ACTIONS", "ADD", "DELETE"
+ priority_class SMALLINT,
+ priority INTEGER,
+ conditions TEXT,
+ actions TEXT
+);
+
+-- The extra data for each operation is:
+-- * ENABLE, DISABLE, DELETE: []
+-- * ACTIONS: ["actions"]
+-- * ADD: ["priority_class", "priority", "actions", "conditions"]
+
+-- Index for replication queries.
+CREATE INDEX push_rules_stream_id ON push_rules_stream(stream_id);
+-- Index for /sync queries.
+CREATE INDEX push_rules_stream_user_stream_id on push_rules_stream(user_id, stream_id);
diff --git a/synapse/storage/databases/main/schema/delta/30/threepid_guest_access_tokens.sql b/synapse/storage/databases/main/schema/delta/30/threepid_guest_access_tokens.sql
new file mode 100644
index 00000000..0dd2f136
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/30/threepid_guest_access_tokens.sql
@@ -0,0 +1,24 @@
+/* Copyright 2016 OpenMarket Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+-- Stores guest account access tokens generated for unbound 3pids.
+CREATE TABLE threepid_guest_access_tokens(
+ medium TEXT, -- The medium of the 3pid. Must be "email".
+ address TEXT, -- The 3pid address.
+ guest_access_token TEXT, -- The access token for a guest user for this 3pid.
+ first_inviter TEXT -- User ID of the first user to invite this 3pid to a room.
+);
+
+CREATE UNIQUE INDEX threepid_guest_access_tokens_index ON threepid_guest_access_tokens(medium, address);
diff --git a/synapse/storage/databases/main/schema/delta/31/invites.sql b/synapse/storage/databases/main/schema/delta/31/invites.sql
new file mode 100644
index 00000000..2c57846d
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/31/invites.sql
@@ -0,0 +1,42 @@
+/* Copyright 2016 OpenMarket Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+CREATE TABLE local_invites(
+ stream_id BIGINT NOT NULL,
+ inviter TEXT NOT NULL,
+ invitee TEXT NOT NULL,
+ event_id TEXT NOT NULL,
+ room_id TEXT NOT NULL,
+ locally_rejected TEXT,
+ replaced_by TEXT
+);
+
+-- Insert all invites for local users into new `invites` table
+INSERT INTO local_invites SELECT
+ stream_ordering as stream_id,
+ sender as inviter,
+ state_key as invitee,
+ event_id,
+ room_id,
+ NULL as locally_rejected,
+ NULL as replaced_by
+ FROM events
+ NATURAL JOIN current_state_events
+ NATURAL JOIN room_memberships
+ WHERE membership = 'invite' AND state_key IN (SELECT name FROM users);
+
+CREATE INDEX local_invites_id ON local_invites(stream_id);
+CREATE INDEX local_invites_for_user_idx ON local_invites(invitee, locally_rejected, replaced_by, room_id);
diff --git a/synapse/storage/databases/main/schema/delta/31/local_media_repository_url_cache.sql b/synapse/storage/databases/main/schema/delta/31/local_media_repository_url_cache.sql
new file mode 100644
index 00000000..9efb4280
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/31/local_media_repository_url_cache.sql
@@ -0,0 +1,27 @@
+/* Copyright 2016 OpenMarket Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+CREATE TABLE local_media_repository_url_cache(
+ url TEXT, -- the URL being cached
+ response_code INTEGER, -- the HTTP response code of this download attempt
+ etag TEXT, -- the etag header of this response
+ expires INTEGER, -- the number of ms this response was valid for
+ og TEXT, -- cache of the OG metadata of this URL as JSON
+ media_id TEXT, -- the media_id, if any, of the URL's content in the repo
+ download_ts BIGINT -- the timestamp of this download attempt
+);
+
+CREATE INDEX local_media_repository_url_cache_by_url_download_ts
+ ON local_media_repository_url_cache(url, download_ts);
diff --git a/synapse/storage/databases/main/schema/delta/31/pushers.py b/synapse/storage/databases/main/schema/delta/31/pushers.py
new file mode 100644
index 00000000..9bb504aa
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/31/pushers.py
@@ -0,0 +1,87 @@
+# Copyright 2016 OpenMarket Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+# Change the last_token to last_stream_ordering now that pushers no longer
+# listen on an event stream but instead select out of the event_push_actions
+# table.
+
+
+import logging
+
+logger = logging.getLogger(__name__)
+
+
+def token_to_stream_ordering(token):
+ return int(token[1:].split("_")[0])
+
+
+def run_create(cur, database_engine, *args, **kwargs):
+ logger.info("Porting pushers table, delta 31...")
+ cur.execute(
+ """
+ CREATE TABLE IF NOT EXISTS pushers2 (
+ id BIGINT PRIMARY KEY,
+ user_name TEXT NOT NULL,
+ access_token BIGINT DEFAULT NULL,
+ profile_tag VARCHAR(32) NOT NULL,
+ kind VARCHAR(8) NOT NULL,
+ app_id VARCHAR(64) NOT NULL,
+ app_display_name VARCHAR(64) NOT NULL,
+ device_display_name VARCHAR(128) NOT NULL,
+ pushkey TEXT NOT NULL,
+ ts BIGINT NOT NULL,
+ lang VARCHAR(8),
+ data TEXT,
+ last_stream_ordering INTEGER,
+ last_success BIGINT,
+ failing_since BIGINT,
+ UNIQUE (app_id, pushkey, user_name)
+ )
+ """
+ )
+ cur.execute(
+ """SELECT
+ id, user_name, access_token, profile_tag, kind,
+ app_id, app_display_name, device_display_name,
+ pushkey, ts, lang, data, last_token, last_success,
+ failing_since
+ FROM pushers
+ """
+ )
+ count = 0
+ for row in cur.fetchall():
+ row = list(row)
+ row[12] = token_to_stream_ordering(row[12])
+ cur.execute(
+ database_engine.convert_param_style(
+ """
+ INSERT into pushers2 (
+ id, user_name, access_token, profile_tag, kind,
+ app_id, app_display_name, device_display_name,
+ pushkey, ts, lang, data, last_stream_ordering, last_success,
+ failing_since
+ ) values (%s)"""
+ % (",".join(["?" for _ in range(len(row))]))
+ ),
+ row,
+ )
+ count += 1
+ cur.execute("DROP TABLE pushers")
+ cur.execute("ALTER TABLE pushers2 RENAME TO pushers")
+ logger.info("Moved %d pushers to new table", count)
+
+
+def run_upgrade(cur, database_engine, *args, **kwargs):
+ pass
diff --git a/synapse/storage/databases/main/schema/delta/31/pushers_index.sql b/synapse/storage/databases/main/schema/delta/31/pushers_index.sql
new file mode 100644
index 00000000..a82add88
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/31/pushers_index.sql
@@ -0,0 +1,22 @@
+/* Copyright 2016 OpenMarket Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/** Using CREATE INDEX directly is deprecated in favour of using background
+ * update see synapse/storage/schema/delta/33/access_tokens_device_index.sql
+ * and synapse/storage/registration.py for an example using
+ * "access_tokens_device_index" **/
+ CREATE INDEX event_push_actions_stream_ordering on event_push_actions(
+ stream_ordering, user_id
+ );
diff --git a/synapse/storage/databases/main/schema/delta/31/search_update.py b/synapse/storage/databases/main/schema/delta/31/search_update.py
new file mode 100644
index 00000000..63b757ad
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/31/search_update.py
@@ -0,0 +1,64 @@
+# Copyright 2016 OpenMarket Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import json
+import logging
+
+from synapse.storage.engines import PostgresEngine
+from synapse.storage.prepare_database import get_statements
+
+logger = logging.getLogger(__name__)
+
+
+ALTER_TABLE = """
+ALTER TABLE event_search ADD COLUMN origin_server_ts BIGINT;
+ALTER TABLE event_search ADD COLUMN stream_ordering BIGINT;
+"""
+
+
+def run_create(cur, database_engine, *args, **kwargs):
+ if not isinstance(database_engine, PostgresEngine):
+ return
+
+ for statement in get_statements(ALTER_TABLE.splitlines()):
+ cur.execute(statement)
+
+ cur.execute("SELECT MIN(stream_ordering) FROM events")
+ rows = cur.fetchall()
+ min_stream_id = rows[0][0]
+
+ cur.execute("SELECT MAX(stream_ordering) FROM events")
+ rows = cur.fetchall()
+ max_stream_id = rows[0][0]
+
+ if min_stream_id is not None and max_stream_id is not None:
+ progress = {
+ "target_min_stream_id_inclusive": min_stream_id,
+ "max_stream_id_exclusive": max_stream_id + 1,
+ "rows_inserted": 0,
+ "have_added_indexes": False,
+ }
+ progress_json = json.dumps(progress)
+
+ sql = (
+ "INSERT into background_updates (update_name, progress_json)"
+ " VALUES (?, ?)"
+ )
+
+ sql = database_engine.convert_param_style(sql)
+
+ cur.execute(sql, ("event_search_order", progress_json))
+
+
+def run_upgrade(cur, database_engine, *args, **kwargs):
+ pass
diff --git a/synapse/storage/databases/main/schema/delta/32/events.sql b/synapse/storage/databases/main/schema/delta/32/events.sql
new file mode 100644
index 00000000..1dd0f9e1
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/32/events.sql
@@ -0,0 +1,16 @@
+/* Copyright 2016 OpenMarket Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ALTER TABLE events ADD COLUMN received_ts BIGINT;
diff --git a/synapse/storage/databases/main/schema/delta/32/openid.sql b/synapse/storage/databases/main/schema/delta/32/openid.sql
new file mode 100644
index 00000000..36f37b11
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/32/openid.sql
@@ -0,0 +1,9 @@
+
+CREATE TABLE open_id_tokens (
+ token TEXT NOT NULL PRIMARY KEY,
+ ts_valid_until_ms bigint NOT NULL,
+ user_id TEXT NOT NULL,
+ UNIQUE (token)
+);
+
+CREATE index open_id_tokens_ts_valid_until_ms ON open_id_tokens(ts_valid_until_ms);
diff --git a/synapse/storage/databases/main/schema/delta/32/pusher_throttle.sql b/synapse/storage/databases/main/schema/delta/32/pusher_throttle.sql
new file mode 100644
index 00000000..d86d30c1
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/32/pusher_throttle.sql
@@ -0,0 +1,23 @@
+/* Copyright 2016 OpenMarket Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+CREATE TABLE pusher_throttle(
+ pusher BIGINT NOT NULL,
+ room_id TEXT NOT NULL,
+ last_sent_ts BIGINT,
+ throttle_ms BIGINT,
+ PRIMARY KEY (pusher, room_id)
+);
diff --git a/synapse/storage/databases/main/schema/delta/32/remove_indices.sql b/synapse/storage/databases/main/schema/delta/32/remove_indices.sql
new file mode 100644
index 00000000..2de50d40
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/32/remove_indices.sql
@@ -0,0 +1,33 @@
+/* Copyright 2016 OpenMarket Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+-- The following indices are redundant, other indices are equivalent or
+-- supersets
+DROP INDEX IF EXISTS events_room_id; -- Prefix of events_room_stream
+DROP INDEX IF EXISTS events_order; -- Prefix of events_order_topo_stream_room
+DROP INDEX IF EXISTS events_topological_ordering; -- Prefix of events_order_topo_stream_room
+DROP INDEX IF EXISTS events_stream_ordering; -- Duplicate of PRIMARY KEY
+DROP INDEX IF EXISTS event_to_state_groups_id; -- Duplicate of PRIMARY KEY
+DROP INDEX IF EXISTS event_push_actions_room_id_event_id_user_id_profile_tag; -- Duplicate of UNIQUE CONSTRAINT
+
+DROP INDEX IF EXISTS st_extrem_id; -- Prefix of UNIQUE CONSTRAINT
+DROP INDEX IF EXISTS event_signatures_id; -- Prefix of UNIQUE CONSTRAINT
+DROP INDEX IF EXISTS redactions_event_id; -- Duplicate of UNIQUE CONSTRAINT
+
+-- The following indices were unused
+DROP INDEX IF EXISTS remote_media_cache_thumbnails_media_id;
+DROP INDEX IF EXISTS evauth_edges_auth_id;
+DROP INDEX IF EXISTS presence_stream_state;
diff --git a/synapse/storage/databases/main/schema/delta/32/reports.sql b/synapse/storage/databases/main/schema/delta/32/reports.sql
new file mode 100644
index 00000000..d1360977
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/32/reports.sql
@@ -0,0 +1,25 @@
+/* Copyright 2016 OpenMarket Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+CREATE TABLE event_reports(
+ id BIGINT NOT NULL PRIMARY KEY,
+ received_ts BIGINT NOT NULL,
+ room_id TEXT NOT NULL,
+ event_id TEXT NOT NULL,
+ user_id TEXT NOT NULL,
+ reason TEXT,
+ content TEXT
+);
diff --git a/synapse/storage/databases/main/schema/delta/33/access_tokens_device_index.sql b/synapse/storage/databases/main/schema/delta/33/access_tokens_device_index.sql
new file mode 100644
index 00000000..61ad3fe3
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/33/access_tokens_device_index.sql
@@ -0,0 +1,17 @@
+/* Copyright 2016 OpenMarket Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+INSERT INTO background_updates (update_name, progress_json) VALUES
+ ('access_tokens_device_index', '{}');
diff --git a/synapse/storage/databases/main/schema/delta/33/devices.sql b/synapse/storage/databases/main/schema/delta/33/devices.sql
new file mode 100644
index 00000000..eca7268d
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/33/devices.sql
@@ -0,0 +1,21 @@
+/* Copyright 2016 OpenMarket Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+CREATE TABLE devices (
+ user_id TEXT NOT NULL,
+ device_id TEXT NOT NULL,
+ display_name TEXT,
+ CONSTRAINT device_uniqueness UNIQUE (user_id, device_id)
+);
diff --git a/synapse/storage/databases/main/schema/delta/33/devices_for_e2e_keys.sql b/synapse/storage/databases/main/schema/delta/33/devices_for_e2e_keys.sql
new file mode 100644
index 00000000..aa4a3b9f
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/33/devices_for_e2e_keys.sql
@@ -0,0 +1,19 @@
+/* Copyright 2016 OpenMarket Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+-- make sure that we have a device record for each set of E2E keys, so that the
+-- user can delete them if they like.
+INSERT INTO devices
+ SELECT user_id, device_id, NULL FROM e2e_device_keys_json;
diff --git a/synapse/storage/databases/main/schema/delta/33/devices_for_e2e_keys_clear_unknown_device.sql b/synapse/storage/databases/main/schema/delta/33/devices_for_e2e_keys_clear_unknown_device.sql
new file mode 100644
index 00000000..66715733
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/33/devices_for_e2e_keys_clear_unknown_device.sql
@@ -0,0 +1,20 @@
+/* Copyright 2016 OpenMarket Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+-- a previous version of the "devices_for_e2e_keys" delta set all the device
+-- names to "unknown device". This wasn't terribly helpful
+UPDATE devices
+ SET display_name = NULL
+ WHERE display_name = 'unknown device';
diff --git a/synapse/storage/databases/main/schema/delta/33/event_fields.py b/synapse/storage/databases/main/schema/delta/33/event_fields.py
new file mode 100644
index 00000000..a3e81eea
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/33/event_fields.py
@@ -0,0 +1,59 @@
+# Copyright 2016 OpenMarket Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import json
+import logging
+
+from synapse.storage.prepare_database import get_statements
+
+logger = logging.getLogger(__name__)
+
+
+ALTER_TABLE = """
+ALTER TABLE events ADD COLUMN sender TEXT;
+ALTER TABLE events ADD COLUMN contains_url BOOLEAN;
+"""
+
+
+def run_create(cur, database_engine, *args, **kwargs):
+ for statement in get_statements(ALTER_TABLE.splitlines()):
+ cur.execute(statement)
+
+ cur.execute("SELECT MIN(stream_ordering) FROM events")
+ rows = cur.fetchall()
+ min_stream_id = rows[0][0]
+
+ cur.execute("SELECT MAX(stream_ordering) FROM events")
+ rows = cur.fetchall()
+ max_stream_id = rows[0][0]
+
+ if min_stream_id is not None and max_stream_id is not None:
+ progress = {
+ "target_min_stream_id_inclusive": min_stream_id,
+ "max_stream_id_exclusive": max_stream_id + 1,
+ "rows_inserted": 0,
+ }
+ progress_json = json.dumps(progress)
+
+ sql = (
+ "INSERT into background_updates (update_name, progress_json)"
+ " VALUES (?, ?)"
+ )
+
+ sql = database_engine.convert_param_style(sql)
+
+ cur.execute(sql, ("event_fields_sender_url", progress_json))
+
+
+def run_upgrade(cur, database_engine, *args, **kwargs):
+ pass
diff --git a/synapse/storage/databases/main/schema/delta/33/remote_media_ts.py b/synapse/storage/databases/main/schema/delta/33/remote_media_ts.py
new file mode 100644
index 00000000..a26057df
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/33/remote_media_ts.py
@@ -0,0 +1,30 @@
+# Copyright 2016 OpenMarket Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import time
+
+ALTER_TABLE = "ALTER TABLE remote_media_cache ADD COLUMN last_access_ts BIGINT"
+
+
+def run_create(cur, database_engine, *args, **kwargs):
+ cur.execute(ALTER_TABLE)
+
+
+def run_upgrade(cur, database_engine, *args, **kwargs):
+ cur.execute(
+ database_engine.convert_param_style(
+ "UPDATE remote_media_cache SET last_access_ts = ?"
+ ),
+ (int(time.time() * 1000),),
+ )
diff --git a/synapse/storage/databases/main/schema/delta/33/user_ips_index.sql b/synapse/storage/databases/main/schema/delta/33/user_ips_index.sql
new file mode 100644
index 00000000..473f75a7
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/33/user_ips_index.sql
@@ -0,0 +1,17 @@
+/* Copyright 2016 OpenMarket Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+INSERT INTO background_updates (update_name, progress_json) VALUES
+ ('user_ips_device_index', '{}');
diff --git a/synapse/storage/databases/main/schema/delta/34/appservice_stream.sql b/synapse/storage/databases/main/schema/delta/34/appservice_stream.sql
new file mode 100644
index 00000000..69e16eda
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/34/appservice_stream.sql
@@ -0,0 +1,23 @@
+/* Copyright 2016 OpenMarket Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+CREATE TABLE IF NOT EXISTS appservice_stream_position(
+ Lock CHAR(1) NOT NULL DEFAULT 'X' UNIQUE, -- Makes sure this table only has one row.
+ stream_ordering BIGINT,
+ CHECK (Lock='X')
+);
+
+INSERT INTO appservice_stream_position (stream_ordering)
+ SELECT COALESCE(MAX(stream_ordering), 0) FROM events;
diff --git a/synapse/storage/databases/main/schema/delta/34/cache_stream.py b/synapse/storage/databases/main/schema/delta/34/cache_stream.py
new file mode 100644
index 00000000..cf09e43e
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/34/cache_stream.py
@@ -0,0 +1,46 @@
+# Copyright 2016 OpenMarket Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import logging
+
+from synapse.storage.engines import PostgresEngine
+from synapse.storage.prepare_database import get_statements
+
+logger = logging.getLogger(__name__)
+
+
+# This stream is used to notify replication slaves that some caches have
+# been invalidated that they cannot infer from the other streams.
+CREATE_TABLE = """
+CREATE TABLE cache_invalidation_stream (
+ stream_id BIGINT,
+ cache_func TEXT,
+ keys TEXT[],
+ invalidation_ts BIGINT
+);
+
+CREATE INDEX cache_invalidation_stream_id ON cache_invalidation_stream(stream_id);
+"""
+
+
+def run_create(cur, database_engine, *args, **kwargs):
+ if not isinstance(database_engine, PostgresEngine):
+ return
+
+ for statement in get_statements(CREATE_TABLE.splitlines()):
+ cur.execute(statement)
+
+
+def run_upgrade(cur, database_engine, *args, **kwargs):
+ pass
diff --git a/synapse/storage/databases/main/schema/delta/34/device_inbox.sql b/synapse/storage/databases/main/schema/delta/34/device_inbox.sql
new file mode 100644
index 00000000..e68844c7
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/34/device_inbox.sql
@@ -0,0 +1,24 @@
+/* Copyright 2016 OpenMarket Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+CREATE TABLE device_inbox (
+ user_id TEXT NOT NULL,
+ device_id TEXT NOT NULL,
+ stream_id BIGINT NOT NULL,
+ message_json TEXT NOT NULL -- {"type":, "sender":, "content",}
+);
+
+CREATE INDEX device_inbox_user_stream_id ON device_inbox(user_id, device_id, stream_id);
+CREATE INDEX device_inbox_stream_id ON device_inbox(stream_id);
diff --git a/synapse/storage/databases/main/schema/delta/34/push_display_name_rename.sql b/synapse/storage/databases/main/schema/delta/34/push_display_name_rename.sql
new file mode 100644
index 00000000..0d9fe1a9
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/34/push_display_name_rename.sql
@@ -0,0 +1,20 @@
+/* Copyright 2016 OpenMarket Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+DELETE FROM push_rules WHERE rule_id = 'global/override/.m.rule.contains_display_name';
+UPDATE push_rules SET rule_id = 'global/override/.m.rule.contains_display_name' WHERE rule_id = 'global/underride/.m.rule.contains_display_name';
+
+DELETE FROM push_rules_enable WHERE rule_id = 'global/override/.m.rule.contains_display_name';
+UPDATE push_rules_enable SET rule_id = 'global/override/.m.rule.contains_display_name' WHERE rule_id = 'global/underride/.m.rule.contains_display_name';
diff --git a/synapse/storage/databases/main/schema/delta/34/received_txn_purge.py b/synapse/storage/databases/main/schema/delta/34/received_txn_purge.py
new file mode 100644
index 00000000..67d505e6
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/34/received_txn_purge.py
@@ -0,0 +1,32 @@
+# Copyright 2016 OpenMarket Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import logging
+
+from synapse.storage.engines import PostgresEngine
+
+logger = logging.getLogger(__name__)
+
+
+def run_create(cur, database_engine, *args, **kwargs):
+ if isinstance(database_engine, PostgresEngine):
+ cur.execute("TRUNCATE received_transactions")
+ else:
+ cur.execute("DELETE FROM received_transactions")
+
+ cur.execute("CREATE INDEX received_transactions_ts ON received_transactions(ts)")
+
+
+def run_upgrade(cur, database_engine, *args, **kwargs):
+ pass
diff --git a/synapse/storage/databases/main/schema/delta/35/contains_url.sql b/synapse/storage/databases/main/schema/delta/35/contains_url.sql
new file mode 100644
index 00000000..6cd12302
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/35/contains_url.sql
@@ -0,0 +1,17 @@
+/* Copyright 2016 OpenMarket Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ INSERT into background_updates (update_name, progress_json)
+ VALUES ('event_contains_url_index', '{}');
diff --git a/synapse/storage/databases/main/schema/delta/35/device_outbox.sql b/synapse/storage/databases/main/schema/delta/35/device_outbox.sql
new file mode 100644
index 00000000..17e6c431
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/35/device_outbox.sql
@@ -0,0 +1,39 @@
+/* Copyright 2016 OpenMarket Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+DROP TABLE IF EXISTS device_federation_outbox;
+CREATE TABLE device_federation_outbox (
+ destination TEXT NOT NULL,
+ stream_id BIGINT NOT NULL,
+ queued_ts BIGINT NOT NULL,
+ messages_json TEXT NOT NULL
+);
+
+
+DROP INDEX IF EXISTS device_federation_outbox_destination_id;
+CREATE INDEX device_federation_outbox_destination_id
+ ON device_federation_outbox(destination, stream_id);
+
+
+DROP TABLE IF EXISTS device_federation_inbox;
+CREATE TABLE device_federation_inbox (
+ origin TEXT NOT NULL,
+ message_id TEXT NOT NULL,
+ received_ts BIGINT NOT NULL
+);
+
+DROP INDEX IF EXISTS device_federation_inbox_sender_id;
+CREATE INDEX device_federation_inbox_sender_id
+ ON device_federation_inbox(origin, message_id);
diff --git a/synapse/storage/databases/main/schema/delta/35/device_stream_id.sql b/synapse/storage/databases/main/schema/delta/35/device_stream_id.sql
new file mode 100644
index 00000000..7ab7d942
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/35/device_stream_id.sql
@@ -0,0 +1,21 @@
+/* Copyright 2016 OpenMarket Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+CREATE TABLE device_max_stream_id (
+ stream_id BIGINT NOT NULL
+);
+
+INSERT INTO device_max_stream_id (stream_id)
+ SELECT COALESCE(MAX(stream_id), 0) FROM device_inbox;
diff --git a/synapse/storage/databases/main/schema/delta/35/event_push_actions_index.sql b/synapse/storage/databases/main/schema/delta/35/event_push_actions_index.sql
new file mode 100644
index 00000000..2e836d8e
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/35/event_push_actions_index.sql
@@ -0,0 +1,17 @@
+/* Copyright 2016 OpenMarket Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ INSERT into background_updates (update_name, progress_json)
+ VALUES ('epa_highlight_index', '{}');
diff --git a/synapse/storage/databases/main/schema/delta/35/public_room_list_change_stream.sql b/synapse/storage/databases/main/schema/delta/35/public_room_list_change_stream.sql
new file mode 100644
index 00000000..dd2bf2e2
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/35/public_room_list_change_stream.sql
@@ -0,0 +1,33 @@
+/* Copyright 2016 OpenMarket Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+CREATE TABLE public_room_list_stream (
+ stream_id BIGINT NOT NULL,
+ room_id TEXT NOT NULL,
+ visibility BOOLEAN NOT NULL
+);
+
+INSERT INTO public_room_list_stream (stream_id, room_id, visibility)
+ SELECT 1, room_id, is_public FROM rooms
+ WHERE is_public = CAST(1 AS BOOLEAN);
+
+CREATE INDEX public_room_list_stream_idx on public_room_list_stream(
+ stream_id
+);
+
+CREATE INDEX public_room_list_stream_rm_idx on public_room_list_stream(
+ room_id, stream_id
+);
diff --git a/synapse/storage/databases/main/schema/delta/35/stream_order_to_extrem.sql b/synapse/storage/databases/main/schema/delta/35/stream_order_to_extrem.sql
new file mode 100644
index 00000000..2b945d8a
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/35/stream_order_to_extrem.sql
@@ -0,0 +1,37 @@
+/* Copyright 2016 OpenMarket Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+CREATE TABLE stream_ordering_to_exterm (
+ stream_ordering BIGINT NOT NULL,
+ room_id TEXT NOT NULL,
+ event_id TEXT NOT NULL
+);
+
+INSERT INTO stream_ordering_to_exterm (stream_ordering, room_id, event_id)
+ SELECT stream_ordering, room_id, event_id FROM event_forward_extremities
+ INNER JOIN (
+ SELECT room_id, max(stream_ordering) as stream_ordering FROM events
+ INNER JOIN event_forward_extremities USING (room_id, event_id)
+ GROUP BY room_id
+ ) AS rms USING (room_id);
+
+CREATE INDEX stream_ordering_to_exterm_idx on stream_ordering_to_exterm(
+ stream_ordering
+);
+
+CREATE INDEX stream_ordering_to_exterm_rm_idx on stream_ordering_to_exterm(
+ room_id, stream_ordering
+);
diff --git a/synapse/storage/databases/main/schema/delta/36/readd_public_rooms.sql b/synapse/storage/databases/main/schema/delta/36/readd_public_rooms.sql
new file mode 100644
index 00000000..90d8fd18
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/36/readd_public_rooms.sql
@@ -0,0 +1,26 @@
+/* Copyright 2016 OpenMarket Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+-- Re-add some entries to stream_ordering_to_exterm that were incorrectly deleted
+INSERT INTO stream_ordering_to_exterm (stream_ordering, room_id, event_id)
+ SELECT
+ (SELECT stream_ordering FROM events where event_id = e.event_id) AS stream_ordering,
+ room_id,
+ event_id
+ FROM event_forward_extremities AS e
+ WHERE NOT EXISTS (
+ SELECT room_id FROM stream_ordering_to_exterm AS s
+ WHERE s.room_id = e.room_id
+ );
diff --git a/synapse/storage/databases/main/schema/delta/37/remove_auth_idx.py b/synapse/storage/databases/main/schema/delta/37/remove_auth_idx.py
new file mode 100644
index 00000000..a3778841
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/37/remove_auth_idx.py
@@ -0,0 +1,85 @@
+# Copyright 2016 OpenMarket Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import logging
+
+from synapse.storage.engines import PostgresEngine
+from synapse.storage.prepare_database import get_statements
+
+logger = logging.getLogger(__name__)
+
+DROP_INDICES = """
+-- We only ever query based on event_id
+DROP INDEX IF EXISTS state_events_room_id;
+DROP INDEX IF EXISTS state_events_type;
+DROP INDEX IF EXISTS state_events_state_key;
+
+-- room_id is indexed elsewhere
+DROP INDEX IF EXISTS current_state_events_room_id;
+DROP INDEX IF EXISTS current_state_events_state_key;
+DROP INDEX IF EXISTS current_state_events_type;
+
+DROP INDEX IF EXISTS transactions_have_ref;
+
+-- (topological_ordering, stream_ordering, room_id) seems like a strange index,
+-- and is used incredibly rarely.
+DROP INDEX IF EXISTS events_order_topo_stream_room;
+
+-- an equivalent index to this actually gets re-created in delta 41, because it
+-- turned out that deleting it wasn't a great plan :/. In any case, let's
+-- delete it here, and delta 41 will create a new one with an added UNIQUE
+-- constraint
+DROP INDEX IF EXISTS event_search_ev_idx;
+"""
+
+POSTGRES_DROP_CONSTRAINT = """
+ALTER TABLE event_auth DROP CONSTRAINT IF EXISTS event_auth_event_id_auth_id_room_id_key;
+"""
+
+SQLITE_DROP_CONSTRAINT = """
+DROP INDEX IF EXISTS evauth_edges_id;
+
+CREATE TABLE IF NOT EXISTS event_auth_new(
+ event_id TEXT NOT NULL,
+ auth_id TEXT NOT NULL,
+ room_id TEXT NOT NULL
+);
+
+INSERT INTO event_auth_new
+ SELECT event_id, auth_id, room_id
+ FROM event_auth;
+
+DROP TABLE event_auth;
+
+ALTER TABLE event_auth_new RENAME TO event_auth;
+
+CREATE INDEX evauth_edges_id ON event_auth(event_id);
+"""
+
+
+def run_create(cur, database_engine, *args, **kwargs):
+ for statement in get_statements(DROP_INDICES.splitlines()):
+ cur.execute(statement)
+
+ if isinstance(database_engine, PostgresEngine):
+ drop_constraint = POSTGRES_DROP_CONSTRAINT
+ else:
+ drop_constraint = SQLITE_DROP_CONSTRAINT
+
+ for statement in get_statements(drop_constraint.splitlines()):
+ cur.execute(statement)
+
+
+def run_upgrade(cur, database_engine, *args, **kwargs):
+ pass
diff --git a/synapse/storage/databases/main/schema/delta/37/user_threepids.sql b/synapse/storage/databases/main/schema/delta/37/user_threepids.sql
new file mode 100644
index 00000000..cf7a90dd
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/37/user_threepids.sql
@@ -0,0 +1,52 @@
+/* Copyright 2016 OpenMarket Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Update any email addresses that were stored with mixed case into all
+ * lowercase
+ */
+
+ -- There may be "duplicate" emails (with different case) already in the table,
+ -- so we find them and move all but the most recently used account.
+ UPDATE user_threepids
+ SET medium = 'email_old'
+ WHERE medium = 'email'
+ AND address IN (
+ -- We select all the addresses that are linked to the user_id that is NOT
+ -- the most recently created.
+ SELECT u.address
+ FROM
+ user_threepids AS u,
+ -- `duplicate_addresses` is a table of all the email addresses that
+ -- appear multiple times and when the binding was created
+ (
+ SELECT lower(u1.address) AS address, max(u1.added_at) AS max_ts
+ FROM user_threepids AS u1
+ INNER JOIN user_threepids AS u2 ON u1.medium = u2.medium AND lower(u1.address) = lower(u2.address) AND u1.address != u2.address
+ WHERE u1.medium = 'email' AND u2.medium = 'email'
+ GROUP BY lower(u1.address)
+ ) AS duplicate_addresses
+ WHERE
+ lower(u.address) = duplicate_addresses.address
+ AND u.added_at != max_ts -- NOT the most recently created
+ );
+
+
+-- This update is now safe since we've removed the duplicate addresses.
+UPDATE user_threepids SET address = LOWER(address) WHERE medium = 'email';
+
+
+/* Add an index for the select we do on passwored reset */
+CREATE INDEX user_threepids_medium_address on user_threepids (medium, address);
diff --git a/synapse/storage/databases/main/schema/delta/38/postgres_fts_gist.sql b/synapse/storage/databases/main/schema/delta/38/postgres_fts_gist.sql
new file mode 100644
index 00000000..515e6b8e
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/38/postgres_fts_gist.sql
@@ -0,0 +1,19 @@
+/* Copyright 2016 OpenMarket Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+-- We no longer do this given we back it out again in schema 47
+
+-- INSERT into background_updates (update_name, progress_json)
+-- VALUES ('event_search_postgres_gist', '{}');
diff --git a/synapse/storage/databases/main/schema/delta/39/appservice_room_list.sql b/synapse/storage/databases/main/schema/delta/39/appservice_room_list.sql
new file mode 100644
index 00000000..74bdc490
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/39/appservice_room_list.sql
@@ -0,0 +1,29 @@
+/* Copyright 2016 OpenMarket Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+CREATE TABLE appservice_room_list(
+ appservice_id TEXT NOT NULL,
+ network_id TEXT NOT NULL,
+ room_id TEXT NOT NULL
+);
+
+-- Each appservice can have multiple published room lists associated with them,
+-- keyed of a particular network_id
+CREATE UNIQUE INDEX appservice_room_list_idx ON appservice_room_list(
+ appservice_id, network_id, room_id
+);
+
+ALTER TABLE public_room_list_stream ADD COLUMN appservice_id TEXT;
+ALTER TABLE public_room_list_stream ADD COLUMN network_id TEXT;
diff --git a/synapse/storage/databases/main/schema/delta/39/device_federation_stream_idx.sql b/synapse/storage/databases/main/schema/delta/39/device_federation_stream_idx.sql
new file mode 100644
index 00000000..00be801e
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/39/device_federation_stream_idx.sql
@@ -0,0 +1,16 @@
+/* Copyright 2016 OpenMarket Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+CREATE INDEX device_federation_outbox_id ON device_federation_outbox(stream_id);
diff --git a/synapse/storage/databases/main/schema/delta/39/event_push_index.sql b/synapse/storage/databases/main/schema/delta/39/event_push_index.sql
new file mode 100644
index 00000000..de2ad93e
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/39/event_push_index.sql
@@ -0,0 +1,17 @@
+/* Copyright 2016 OpenMarket Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+INSERT INTO background_updates (update_name, progress_json) VALUES
+ ('event_push_actions_highlights_index', '{}');
diff --git a/synapse/storage/databases/main/schema/delta/39/federation_out_position.sql b/synapse/storage/databases/main/schema/delta/39/federation_out_position.sql
new file mode 100644
index 00000000..5af81429
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/39/federation_out_position.sql
@@ -0,0 +1,22 @@
+/* Copyright 2016 OpenMarket Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ CREATE TABLE federation_stream_position(
+ type TEXT NOT NULL,
+ stream_id INTEGER NOT NULL
+ );
+
+ INSERT INTO federation_stream_position (type, stream_id) VALUES ('federation', -1);
+ INSERT INTO federation_stream_position (type, stream_id) SELECT 'events', coalesce(max(stream_ordering), -1) FROM events;
diff --git a/synapse/storage/databases/main/schema/delta/39/membership_profile.sql b/synapse/storage/databases/main/schema/delta/39/membership_profile.sql
new file mode 100644
index 00000000..1bf911c8
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/39/membership_profile.sql
@@ -0,0 +1,20 @@
+/* Copyright 2016 OpenMarket Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ALTER TABLE room_memberships ADD COLUMN display_name TEXT;
+ALTER TABLE room_memberships ADD COLUMN avatar_url TEXT;
+
+INSERT into background_updates (update_name, progress_json)
+ VALUES ('room_membership_profile_update', '{}');
diff --git a/synapse/storage/databases/main/schema/delta/40/current_state_idx.sql b/synapse/storage/databases/main/schema/delta/40/current_state_idx.sql
new file mode 100644
index 00000000..7ffa189f
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/40/current_state_idx.sql
@@ -0,0 +1,17 @@
+/* Copyright 2017 OpenMarket Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+INSERT INTO background_updates (update_name, progress_json) VALUES
+ ('current_state_members_idx', '{}');
diff --git a/synapse/storage/databases/main/schema/delta/40/device_inbox.sql b/synapse/storage/databases/main/schema/delta/40/device_inbox.sql
new file mode 100644
index 00000000..b9fe1f04
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/40/device_inbox.sql
@@ -0,0 +1,21 @@
+/* Copyright 2016 OpenMarket Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+-- turn the pre-fill startup query into a index-only scan on postgresql.
+INSERT into background_updates (update_name, progress_json)
+ VALUES ('device_inbox_stream_index', '{}');
+
+INSERT into background_updates (update_name, progress_json, depends_on)
+ VALUES ('device_inbox_stream_drop', '{}', 'device_inbox_stream_index');
diff --git a/synapse/storage/databases/main/schema/delta/40/device_list_streams.sql b/synapse/storage/databases/main/schema/delta/40/device_list_streams.sql
new file mode 100644
index 00000000..dd6dcb65
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/40/device_list_streams.sql
@@ -0,0 +1,60 @@
+/* Copyright 2017 OpenMarket Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+-- Cache of remote devices.
+CREATE TABLE device_lists_remote_cache (
+ user_id TEXT NOT NULL,
+ device_id TEXT NOT NULL,
+ content TEXT NOT NULL
+);
+
+-- The last update we got for a user. Empty if we're not receiving updates for
+-- that user.
+CREATE TABLE device_lists_remote_extremeties (
+ user_id TEXT NOT NULL,
+ stream_id TEXT NOT NULL
+);
+
+-- we used to create non-unique indexes on these tables, but as of update 52 we create
+-- unique indexes concurrently:
+--
+-- CREATE INDEX device_lists_remote_cache_id ON device_lists_remote_cache(user_id, device_id);
+-- CREATE INDEX device_lists_remote_extremeties_id ON device_lists_remote_extremeties(user_id, stream_id);
+
+
+-- Stream of device lists updates. Includes both local and remotes
+CREATE TABLE device_lists_stream (
+ stream_id BIGINT NOT NULL,
+ user_id TEXT NOT NULL,
+ device_id TEXT NOT NULL
+);
+
+CREATE INDEX device_lists_stream_id ON device_lists_stream(stream_id, user_id);
+
+
+-- The stream of updates to send to other servers. We keep at least one row
+-- per user that was sent so that the prev_id for any new updates can be
+-- calculated
+CREATE TABLE device_lists_outbound_pokes (
+ destination TEXT NOT NULL,
+ stream_id BIGINT NOT NULL,
+ user_id TEXT NOT NULL,
+ device_id TEXT NOT NULL,
+ sent BOOLEAN NOT NULL,
+ ts BIGINT NOT NULL -- So that in future we can clear out pokes to dead servers
+);
+
+CREATE INDEX device_lists_outbound_pokes_id ON device_lists_outbound_pokes(destination, stream_id);
+CREATE INDEX device_lists_outbound_pokes_user ON device_lists_outbound_pokes(destination, user_id);
diff --git a/synapse/storage/databases/main/schema/delta/40/event_push_summary.sql b/synapse/storage/databases/main/schema/delta/40/event_push_summary.sql
new file mode 100644
index 00000000..3918f0b7
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/40/event_push_summary.sql
@@ -0,0 +1,37 @@
+/* Copyright 2017 OpenMarket Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+-- Aggregate of old notification counts that have been deleted out of the
+-- main event_push_actions table. This count does not include those that were
+-- highlights, as they remain in the event_push_actions table.
+CREATE TABLE event_push_summary (
+ user_id TEXT NOT NULL,
+ room_id TEXT NOT NULL,
+ notif_count BIGINT NOT NULL,
+ stream_ordering BIGINT NOT NULL
+);
+
+CREATE INDEX event_push_summary_user_rm ON event_push_summary(user_id, room_id);
+
+
+-- The stream ordering up to which we have aggregated the event_push_actions
+-- table into event_push_summary
+CREATE TABLE event_push_summary_stream_ordering (
+ Lock CHAR(1) NOT NULL DEFAULT 'X' UNIQUE, -- Makes sure this table only has one row.
+ stream_ordering BIGINT NOT NULL,
+ CHECK (Lock='X')
+);
+
+INSERT INTO event_push_summary_stream_ordering (stream_ordering) VALUES (0);
diff --git a/synapse/storage/databases/main/schema/delta/40/pushers.sql b/synapse/storage/databases/main/schema/delta/40/pushers.sql
new file mode 100644
index 00000000..054a223f
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/40/pushers.sql
@@ -0,0 +1,39 @@
+/* Copyright 2017 Vector Creations Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+CREATE TABLE IF NOT EXISTS pushers2 (
+ id BIGINT PRIMARY KEY,
+ user_name TEXT NOT NULL,
+ access_token BIGINT DEFAULT NULL,
+ profile_tag TEXT NOT NULL,
+ kind TEXT NOT NULL,
+ app_id TEXT NOT NULL,
+ app_display_name TEXT NOT NULL,
+ device_display_name TEXT NOT NULL,
+ pushkey TEXT NOT NULL,
+ ts BIGINT NOT NULL,
+ lang TEXT,
+ data TEXT,
+ last_stream_ordering INTEGER,
+ last_success BIGINT,
+ failing_since BIGINT,
+ UNIQUE (app_id, pushkey, user_name)
+);
+
+INSERT INTO pushers2 SELECT * FROM PUSHERS;
+
+DROP TABLE PUSHERS;
+
+ALTER TABLE pushers2 RENAME TO pushers;
diff --git a/synapse/storage/databases/main/schema/delta/41/device_list_stream_idx.sql b/synapse/storage/databases/main/schema/delta/41/device_list_stream_idx.sql
new file mode 100644
index 00000000..b7bee8b6
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/41/device_list_stream_idx.sql
@@ -0,0 +1,17 @@
+/* Copyright 2017 Vector Creations Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+INSERT into background_updates (update_name, progress_json)
+ VALUES ('device_lists_stream_idx', '{}');
diff --git a/synapse/storage/databases/main/schema/delta/41/device_outbound_index.sql b/synapse/storage/databases/main/schema/delta/41/device_outbound_index.sql
new file mode 100644
index 00000000..62f0b989
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/41/device_outbound_index.sql
@@ -0,0 +1,16 @@
+/* Copyright 2017 Vector Creations Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+CREATE INDEX device_lists_outbound_pokes_stream ON device_lists_outbound_pokes(stream_id);
diff --git a/synapse/storage/databases/main/schema/delta/41/event_search_event_id_idx.sql b/synapse/storage/databases/main/schema/delta/41/event_search_event_id_idx.sql
new file mode 100644
index 00000000..5d9cfecf
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/41/event_search_event_id_idx.sql
@@ -0,0 +1,17 @@
+/* Copyright 2017 Vector Creations Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+INSERT into background_updates (update_name, progress_json)
+ VALUES ('event_search_event_id_idx', '{}');
diff --git a/synapse/storage/databases/main/schema/delta/41/ratelimit.sql b/synapse/storage/databases/main/schema/delta/41/ratelimit.sql
new file mode 100644
index 00000000..a194bf02
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/41/ratelimit.sql
@@ -0,0 +1,22 @@
+/* Copyright 2017 Vector Creations Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+CREATE TABLE ratelimit_override (
+ user_id TEXT NOT NULL,
+ messages_per_second BIGINT,
+ burst_count BIGINT
+);
+
+CREATE UNIQUE INDEX ratelimit_override_idx ON ratelimit_override(user_id);
diff --git a/synapse/storage/databases/main/schema/delta/42/current_state_delta.sql b/synapse/storage/databases/main/schema/delta/42/current_state_delta.sql
new file mode 100644
index 00000000..d28851af
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/42/current_state_delta.sql
@@ -0,0 +1,26 @@
+/* Copyright 2017 Vector Creations Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+CREATE TABLE current_state_delta_stream (
+ stream_id BIGINT NOT NULL,
+ room_id TEXT NOT NULL,
+ type TEXT NOT NULL,
+ state_key TEXT NOT NULL,
+ event_id TEXT, -- Is null if the key was removed
+ prev_event_id TEXT -- Is null if the key was added
+);
+
+CREATE INDEX current_state_delta_stream_idx ON current_state_delta_stream(stream_id);
diff --git a/synapse/storage/databases/main/schema/delta/42/device_list_last_id.sql b/synapse/storage/databases/main/schema/delta/42/device_list_last_id.sql
new file mode 100644
index 00000000..9ab8c14f
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/42/device_list_last_id.sql
@@ -0,0 +1,33 @@
+/* Copyright 2017 Vector Creations Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+-- Table of last stream_id that we sent to destination for user_id. This is
+-- used to fill out the `prev_id` fields of outbound device list updates.
+CREATE TABLE device_lists_outbound_last_success (
+ destination TEXT NOT NULL,
+ user_id TEXT NOT NULL,
+ stream_id BIGINT NOT NULL
+);
+
+INSERT INTO device_lists_outbound_last_success
+ SELECT destination, user_id, coalesce(max(stream_id), 0) as stream_id
+ FROM device_lists_outbound_pokes
+ WHERE sent = (1 = 1) -- sqlite doesn't have inbuilt boolean values
+ GROUP BY destination, user_id;
+
+CREATE INDEX device_lists_outbound_last_success_idx ON device_lists_outbound_last_success(
+ destination, user_id, stream_id
+);
diff --git a/synapse/storage/databases/main/schema/delta/42/event_auth_state_only.sql b/synapse/storage/databases/main/schema/delta/42/event_auth_state_only.sql
new file mode 100644
index 00000000..b8821ac7
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/42/event_auth_state_only.sql
@@ -0,0 +1,17 @@
+/* Copyright 2017 Vector Creations Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+INSERT INTO background_updates (update_name, progress_json) VALUES
+ ('event_auth_state_only', '{}');
diff --git a/synapse/storage/databases/main/schema/delta/42/user_dir.py b/synapse/storage/databases/main/schema/delta/42/user_dir.py
new file mode 100644
index 00000000..506f326f
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/42/user_dir.py
@@ -0,0 +1,84 @@
+# Copyright 2017 Vector Creations Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import logging
+
+from synapse.storage.engines import PostgresEngine, Sqlite3Engine
+from synapse.storage.prepare_database import get_statements
+
+logger = logging.getLogger(__name__)
+
+
+BOTH_TABLES = """
+CREATE TABLE user_directory_stream_pos (
+ Lock CHAR(1) NOT NULL DEFAULT 'X' UNIQUE, -- Makes sure this table only has one row.
+ stream_id BIGINT,
+ CHECK (Lock='X')
+);
+
+INSERT INTO user_directory_stream_pos (stream_id) VALUES (null);
+
+CREATE TABLE user_directory (
+ user_id TEXT NOT NULL,
+ room_id TEXT NOT NULL, -- A room_id that we know the user is joined to
+ display_name TEXT,
+ avatar_url TEXT
+);
+
+CREATE INDEX user_directory_room_idx ON user_directory(room_id);
+CREATE UNIQUE INDEX user_directory_user_idx ON user_directory(user_id);
+
+CREATE TABLE users_in_pubic_room (
+ user_id TEXT NOT NULL,
+ room_id TEXT NOT NULL -- A room_id that we know is public
+);
+
+CREATE INDEX users_in_pubic_room_room_idx ON users_in_pubic_room(room_id);
+CREATE UNIQUE INDEX users_in_pubic_room_user_idx ON users_in_pubic_room(user_id);
+"""
+
+
+POSTGRES_TABLE = """
+CREATE TABLE user_directory_search (
+ user_id TEXT NOT NULL,
+ vector tsvector
+);
+
+CREATE INDEX user_directory_search_fts_idx ON user_directory_search USING gin(vector);
+CREATE UNIQUE INDEX user_directory_search_user_idx ON user_directory_search(user_id);
+"""
+
+
+SQLITE_TABLE = """
+CREATE VIRTUAL TABLE user_directory_search
+ USING fts4 ( user_id, value );
+"""
+
+
+def run_create(cur, database_engine, *args, **kwargs):
+ for statement in get_statements(BOTH_TABLES.splitlines()):
+ cur.execute(statement)
+
+ if isinstance(database_engine, PostgresEngine):
+ for statement in get_statements(POSTGRES_TABLE.splitlines()):
+ cur.execute(statement)
+ elif isinstance(database_engine, Sqlite3Engine):
+ for statement in get_statements(SQLITE_TABLE.splitlines()):
+ cur.execute(statement)
+ else:
+ raise Exception("Unrecognized database engine")
+
+
+def run_upgrade(*args, **kwargs):
+ pass
diff --git a/synapse/storage/databases/main/schema/delta/43/blocked_rooms.sql b/synapse/storage/databases/main/schema/delta/43/blocked_rooms.sql
new file mode 100644
index 00000000..0e3cd143
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/43/blocked_rooms.sql
@@ -0,0 +1,21 @@
+/* Copyright 2017 Vector Creations Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+CREATE TABLE blocked_rooms (
+ room_id TEXT NOT NULL,
+ user_id TEXT NOT NULL -- Admin who blocked the room
+);
+
+CREATE UNIQUE INDEX blocked_rooms_idx ON blocked_rooms(room_id);
diff --git a/synapse/storage/databases/main/schema/delta/43/quarantine_media.sql b/synapse/storage/databases/main/schema/delta/43/quarantine_media.sql
new file mode 100644
index 00000000..630907ec
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/43/quarantine_media.sql
@@ -0,0 +1,17 @@
+/* Copyright 2017 Vector Creations Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ALTER TABLE local_media_repository ADD COLUMN quarantined_by TEXT;
+ALTER TABLE remote_media_cache ADD COLUMN quarantined_by TEXT;
diff --git a/synapse/storage/databases/main/schema/delta/43/url_cache.sql b/synapse/storage/databases/main/schema/delta/43/url_cache.sql
new file mode 100644
index 00000000..45ebe020
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/43/url_cache.sql
@@ -0,0 +1,16 @@
+/* Copyright 2017 Vector Creations Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ALTER TABLE local_media_repository ADD COLUMN url_cache TEXT;
diff --git a/synapse/storage/databases/main/schema/delta/43/user_share.sql b/synapse/storage/databases/main/schema/delta/43/user_share.sql
new file mode 100644
index 00000000..ee7062ab
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/43/user_share.sql
@@ -0,0 +1,33 @@
+/* Copyright 2017 Vector Creations Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+-- Table keeping track of who shares a room with who. We only keep track
+-- of this for local users, so `user_id` is local users only (but we do keep track
+-- of which remote users share a room)
+CREATE TABLE users_who_share_rooms (
+ user_id TEXT NOT NULL,
+ other_user_id TEXT NOT NULL,
+ room_id TEXT NOT NULL,
+ share_private BOOLEAN NOT NULL -- is the shared room private? i.e. they share a private room
+);
+
+
+CREATE UNIQUE INDEX users_who_share_rooms_u_idx ON users_who_share_rooms(user_id, other_user_id);
+CREATE INDEX users_who_share_rooms_r_idx ON users_who_share_rooms(room_id);
+CREATE INDEX users_who_share_rooms_o_idx ON users_who_share_rooms(other_user_id);
+
+
+-- Make sure that we populate the table initially
+UPDATE user_directory_stream_pos SET stream_id = NULL;
diff --git a/synapse/storage/databases/main/schema/delta/44/expire_url_cache.sql b/synapse/storage/databases/main/schema/delta/44/expire_url_cache.sql
new file mode 100644
index 00000000..b12f9b2e
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/44/expire_url_cache.sql
@@ -0,0 +1,41 @@
+/* Copyright 2017 New Vector Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+-- this didn't work on SQLite 3.7 (because of lack of partial indexes), so was
+-- removed and replaced with 46/local_media_repository_url_idx.sql.
+--
+-- CREATE INDEX local_media_repository_url_idx ON local_media_repository(created_ts) WHERE url_cache IS NOT NULL;
+
+-- we need to change `expires` to `expires_ts` so that we can index on it. SQLite doesn't support
+-- indices on expressions until 3.9.
+CREATE TABLE local_media_repository_url_cache_new(
+ url TEXT,
+ response_code INTEGER,
+ etag TEXT,
+ expires_ts BIGINT,
+ og TEXT,
+ media_id TEXT,
+ download_ts BIGINT
+);
+
+INSERT INTO local_media_repository_url_cache_new
+ SELECT url, response_code, etag, expires + download_ts, og, media_id, download_ts FROM local_media_repository_url_cache;
+
+DROP TABLE local_media_repository_url_cache;
+ALTER TABLE local_media_repository_url_cache_new RENAME TO local_media_repository_url_cache;
+
+CREATE INDEX local_media_repository_url_cache_expires_idx ON local_media_repository_url_cache(expires_ts);
+CREATE INDEX local_media_repository_url_cache_by_url_download_ts ON local_media_repository_url_cache(url, download_ts);
+CREATE INDEX local_media_repository_url_cache_media_idx ON local_media_repository_url_cache(media_id);
diff --git a/synapse/storage/databases/main/schema/delta/45/group_server.sql b/synapse/storage/databases/main/schema/delta/45/group_server.sql
new file mode 100644
index 00000000..b2333848
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/45/group_server.sql
@@ -0,0 +1,167 @@
+/* Copyright 2017 Vector Creations Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+CREATE TABLE groups (
+ group_id TEXT NOT NULL,
+ name TEXT, -- the display name of the room
+ avatar_url TEXT,
+ short_description TEXT,
+ long_description TEXT
+);
+
+CREATE UNIQUE INDEX groups_idx ON groups(group_id);
+
+
+-- list of users the group server thinks are joined
+CREATE TABLE group_users (
+ group_id TEXT NOT NULL,
+ user_id TEXT NOT NULL,
+ is_admin BOOLEAN NOT NULL,
+ is_public BOOLEAN NOT NULL -- whether the users membership can be seen by everyone
+);
+
+
+CREATE INDEX groups_users_g_idx ON group_users(group_id, user_id);
+CREATE INDEX groups_users_u_idx ON group_users(user_id);
+
+-- list of users the group server thinks are invited
+CREATE TABLE group_invites (
+ group_id TEXT NOT NULL,
+ user_id TEXT NOT NULL
+);
+
+CREATE INDEX groups_invites_g_idx ON group_invites(group_id, user_id);
+CREATE INDEX groups_invites_u_idx ON group_invites(user_id);
+
+
+CREATE TABLE group_rooms (
+ group_id TEXT NOT NULL,
+ room_id TEXT NOT NULL,
+ is_public BOOLEAN NOT NULL -- whether the room can be seen by everyone
+);
+
+CREATE UNIQUE INDEX groups_rooms_g_idx ON group_rooms(group_id, room_id);
+CREATE INDEX groups_rooms_r_idx ON group_rooms(room_id);
+
+
+-- Rooms to include in the summary
+CREATE TABLE group_summary_rooms (
+ group_id TEXT NOT NULL,
+ room_id TEXT NOT NULL,
+ category_id TEXT NOT NULL,
+ room_order BIGINT NOT NULL,
+ is_public BOOLEAN NOT NULL, -- whether the room should be show to everyone
+ UNIQUE (group_id, category_id, room_id, room_order),
+ CHECK (room_order > 0)
+);
+
+CREATE UNIQUE INDEX group_summary_rooms_g_idx ON group_summary_rooms(group_id, room_id, category_id);
+
+
+-- Categories to include in the summary
+CREATE TABLE group_summary_room_categories (
+ group_id TEXT NOT NULL,
+ category_id TEXT NOT NULL,
+ cat_order BIGINT NOT NULL,
+ UNIQUE (group_id, category_id, cat_order),
+ CHECK (cat_order > 0)
+);
+
+-- The categories in the group
+CREATE TABLE group_room_categories (
+ group_id TEXT NOT NULL,
+ category_id TEXT NOT NULL,
+ profile TEXT NOT NULL,
+ is_public BOOLEAN NOT NULL, -- whether the category should be show to everyone
+ UNIQUE (group_id, category_id)
+);
+
+-- The users to include in the group summary
+CREATE TABLE group_summary_users (
+ group_id TEXT NOT NULL,
+ user_id TEXT NOT NULL,
+ role_id TEXT NOT NULL,
+ user_order BIGINT NOT NULL,
+ is_public BOOLEAN NOT NULL -- whether the user should be show to everyone
+);
+
+CREATE INDEX group_summary_users_g_idx ON group_summary_users(group_id);
+
+-- The roles to include in the group summary
+CREATE TABLE group_summary_roles (
+ group_id TEXT NOT NULL,
+ role_id TEXT NOT NULL,
+ role_order BIGINT NOT NULL,
+ UNIQUE (group_id, role_id, role_order),
+ CHECK (role_order > 0)
+);
+
+
+-- The roles in a groups
+CREATE TABLE group_roles (
+ group_id TEXT NOT NULL,
+ role_id TEXT NOT NULL,
+ profile TEXT NOT NULL,
+ is_public BOOLEAN NOT NULL, -- whether the role should be show to everyone
+ UNIQUE (group_id, role_id)
+);
+
+
+-- List of attestations we've given out and need to renew
+CREATE TABLE group_attestations_renewals (
+ group_id TEXT NOT NULL,
+ user_id TEXT NOT NULL,
+ valid_until_ms BIGINT NOT NULL
+);
+
+CREATE INDEX group_attestations_renewals_g_idx ON group_attestations_renewals(group_id, user_id);
+CREATE INDEX group_attestations_renewals_u_idx ON group_attestations_renewals(user_id);
+CREATE INDEX group_attestations_renewals_v_idx ON group_attestations_renewals(valid_until_ms);
+
+
+-- List of attestations we've received from remotes and are interested in.
+CREATE TABLE group_attestations_remote (
+ group_id TEXT NOT NULL,
+ user_id TEXT NOT NULL,
+ valid_until_ms BIGINT NOT NULL,
+ attestation_json TEXT NOT NULL
+);
+
+CREATE INDEX group_attestations_remote_g_idx ON group_attestations_remote(group_id, user_id);
+CREATE INDEX group_attestations_remote_u_idx ON group_attestations_remote(user_id);
+CREATE INDEX group_attestations_remote_v_idx ON group_attestations_remote(valid_until_ms);
+
+
+-- The group membership for the HS's users
+CREATE TABLE local_group_membership (
+ group_id TEXT NOT NULL,
+ user_id TEXT NOT NULL,
+ is_admin BOOLEAN NOT NULL,
+ membership TEXT NOT NULL,
+ is_publicised BOOLEAN NOT NULL, -- if the user is publicising their membership
+ content TEXT NOT NULL
+);
+
+CREATE INDEX local_group_membership_u_idx ON local_group_membership(user_id, group_id);
+CREATE INDEX local_group_membership_g_idx ON local_group_membership(group_id);
+
+
+CREATE TABLE local_group_updates (
+ stream_id BIGINT NOT NULL,
+ group_id TEXT NOT NULL,
+ user_id TEXT NOT NULL,
+ type TEXT NOT NULL,
+ content TEXT NOT NULL
+);
diff --git a/synapse/storage/databases/main/schema/delta/45/profile_cache.sql b/synapse/storage/databases/main/schema/delta/45/profile_cache.sql
new file mode 100644
index 00000000..e5ddc84d
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/45/profile_cache.sql
@@ -0,0 +1,28 @@
+/* Copyright 2017 New Vector Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+-- A subset of remote users whose profiles we have cached.
+-- Whether a user is in this table or not is defined by the storage function
+-- `is_subscribed_remote_profile_for_user`
+CREATE TABLE remote_profile_cache (
+ user_id TEXT NOT NULL,
+ displayname TEXT,
+ avatar_url TEXT,
+ last_check BIGINT NOT NULL
+);
+
+CREATE UNIQUE INDEX remote_profile_cache_user_id ON remote_profile_cache(user_id);
+CREATE INDEX remote_profile_cache_time ON remote_profile_cache(last_check);
diff --git a/synapse/storage/databases/main/schema/delta/46/drop_refresh_tokens.sql b/synapse/storage/databases/main/schema/delta/46/drop_refresh_tokens.sql
new file mode 100644
index 00000000..68c48a89
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/46/drop_refresh_tokens.sql
@@ -0,0 +1,17 @@
+/* Copyright 2017 New Vector Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* we no longer use (or create) the refresh_tokens table */
+DROP TABLE IF EXISTS refresh_tokens;
diff --git a/synapse/storage/databases/main/schema/delta/46/drop_unique_deleted_pushers.sql b/synapse/storage/databases/main/schema/delta/46/drop_unique_deleted_pushers.sql
new file mode 100644
index 00000000..bb307889
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/46/drop_unique_deleted_pushers.sql
@@ -0,0 +1,35 @@
+/* Copyright 2017 New Vector Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+-- drop the unique constraint on deleted_pushers so that we can just insert
+-- into it rather than upserting.
+
+CREATE TABLE deleted_pushers2 (
+ stream_id BIGINT NOT NULL,
+ app_id TEXT NOT NULL,
+ pushkey TEXT NOT NULL,
+ user_id TEXT NOT NULL
+);
+
+INSERT INTO deleted_pushers2 (stream_id, app_id, pushkey, user_id)
+ SELECT stream_id, app_id, pushkey, user_id from deleted_pushers;
+
+DROP TABLE deleted_pushers;
+ALTER TABLE deleted_pushers2 RENAME TO deleted_pushers;
+
+-- create the index after doing the inserts because that's more efficient.
+-- it also means we can give it the same name as the old one without renaming.
+CREATE INDEX deleted_pushers_stream_id ON deleted_pushers (stream_id);
+
diff --git a/synapse/storage/databases/main/schema/delta/46/group_server.sql b/synapse/storage/databases/main/schema/delta/46/group_server.sql
new file mode 100644
index 00000000..097679bc
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/46/group_server.sql
@@ -0,0 +1,32 @@
+/* Copyright 2017 New Vector Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+CREATE TABLE groups_new (
+ group_id TEXT NOT NULL,
+ name TEXT, -- the display name of the room
+ avatar_url TEXT,
+ short_description TEXT,
+ long_description TEXT,
+ is_public BOOL NOT NULL -- whether non-members can access group APIs
+);
+
+-- NB: awful hack to get the default to be true on postgres and 1 on sqlite
+INSERT INTO groups_new
+ SELECT group_id, name, avatar_url, short_description, long_description, (1=1) FROM groups;
+
+DROP TABLE groups;
+ALTER TABLE groups_new RENAME TO groups;
+
+CREATE UNIQUE INDEX groups_idx ON groups(group_id);
diff --git a/synapse/storage/databases/main/schema/delta/46/local_media_repository_url_idx.sql b/synapse/storage/databases/main/schema/delta/46/local_media_repository_url_idx.sql
new file mode 100644
index 00000000..bbfc7f5d
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/46/local_media_repository_url_idx.sql
@@ -0,0 +1,24 @@
+/* Copyright 2017 New Vector Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+-- register a background update which will recreate the
+-- local_media_repository_url_idx index.
+--
+-- We do this as a bg update not because it is a particularly onerous
+-- operation, but because we'd like it to be a partial index if possible, and
+-- the background_index_update code will understand whether we are on
+-- postgres or sqlite and behave accordingly.
+INSERT INTO background_updates (update_name, progress_json) VALUES
+ ('local_media_repository_url_idx', '{}');
diff --git a/synapse/storage/databases/main/schema/delta/46/user_dir_null_room_ids.sql b/synapse/storage/databases/main/schema/delta/46/user_dir_null_room_ids.sql
new file mode 100644
index 00000000..cb0d5a25
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/46/user_dir_null_room_ids.sql
@@ -0,0 +1,35 @@
+/* Copyright 2017 New Vector Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+-- change the user_directory table to also cover global local user profiles
+-- rather than just profiles within specific rooms.
+
+CREATE TABLE user_directory2 (
+ user_id TEXT NOT NULL,
+ room_id TEXT,
+ display_name TEXT,
+ avatar_url TEXT
+);
+
+INSERT INTO user_directory2(user_id, room_id, display_name, avatar_url)
+ SELECT user_id, room_id, display_name, avatar_url from user_directory;
+
+DROP TABLE user_directory;
+ALTER TABLE user_directory2 RENAME TO user_directory;
+
+-- create indexes after doing the inserts because that's more efficient.
+-- it also means we can give it the same name as the old one without renaming.
+CREATE INDEX user_directory_room_idx ON user_directory(room_id);
+CREATE UNIQUE INDEX user_directory_user_idx ON user_directory(user_id);
diff --git a/synapse/storage/databases/main/schema/delta/46/user_dir_typos.sql b/synapse/storage/databases/main/schema/delta/46/user_dir_typos.sql
new file mode 100644
index 00000000..d9505f8d
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/46/user_dir_typos.sql
@@ -0,0 +1,24 @@
+/* Copyright 2017 New Vector Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+-- this is just embarassing :|
+ALTER TABLE users_in_pubic_room RENAME TO users_in_public_rooms;
+
+-- this is only 300K rows on matrix.org and takes ~3s to generate the index,
+-- so is hopefully not going to block anyone else for that long...
+CREATE INDEX users_in_public_rooms_room_idx ON users_in_public_rooms(room_id);
+CREATE UNIQUE INDEX users_in_public_rooms_user_idx ON users_in_public_rooms(user_id);
+DROP INDEX users_in_pubic_room_room_idx;
+DROP INDEX users_in_pubic_room_user_idx;
diff --git a/synapse/storage/databases/main/schema/delta/47/last_access_media.sql b/synapse/storage/databases/main/schema/delta/47/last_access_media.sql
new file mode 100644
index 00000000..f505fb22
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/47/last_access_media.sql
@@ -0,0 +1,16 @@
+/* Copyright 2018 New Vector Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ALTER TABLE local_media_repository ADD COLUMN last_access_ts BIGINT;
diff --git a/synapse/storage/databases/main/schema/delta/47/postgres_fts_gin.sql b/synapse/storage/databases/main/schema/delta/47/postgres_fts_gin.sql
new file mode 100644
index 00000000..31d7a817
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/47/postgres_fts_gin.sql
@@ -0,0 +1,17 @@
+/* Copyright 2018 New Vector Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+INSERT into background_updates (update_name, progress_json)
+ VALUES ('event_search_postgres_gin', '{}');
diff --git a/synapse/storage/databases/main/schema/delta/47/push_actions_staging.sql b/synapse/storage/databases/main/schema/delta/47/push_actions_staging.sql
new file mode 100644
index 00000000..edccf4a9
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/47/push_actions_staging.sql
@@ -0,0 +1,28 @@
+/* Copyright 2018 New Vector Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+-- Temporary staging area for push actions that have been calculated for an
+-- event, but the event hasn't yet been persisted.
+-- When the event is persisted the rows are moved over to the
+-- event_push_actions table.
+CREATE TABLE event_push_actions_staging (
+ event_id TEXT NOT NULL,
+ user_id TEXT NOT NULL,
+ actions TEXT NOT NULL,
+ notif SMALLINT NOT NULL,
+ highlight SMALLINT NOT NULL
+);
+
+CREATE INDEX event_push_actions_staging_id ON event_push_actions_staging(event_id);
diff --git a/synapse/storage/databases/main/schema/delta/48/add_user_consent.sql b/synapse/storage/databases/main/schema/delta/48/add_user_consent.sql
new file mode 100644
index 00000000..52374915
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/48/add_user_consent.sql
@@ -0,0 +1,18 @@
+/* Copyright 2018 New Vector Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* record the version of the privacy policy the user has consented to
+ */
+ALTER TABLE users ADD COLUMN consent_version TEXT;
diff --git a/synapse/storage/databases/main/schema/delta/48/add_user_ips_last_seen_index.sql b/synapse/storage/databases/main/schema/delta/48/add_user_ips_last_seen_index.sql
new file mode 100644
index 00000000..9248b0b2
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/48/add_user_ips_last_seen_index.sql
@@ -0,0 +1,17 @@
+/* Copyright 2018 New Vector Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+INSERT into background_updates (update_name, progress_json)
+ VALUES ('user_ips_last_seen_index', '{}');
diff --git a/synapse/storage/databases/main/schema/delta/48/deactivated_users.sql b/synapse/storage/databases/main/schema/delta/48/deactivated_users.sql
new file mode 100644
index 00000000..e9013a69
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/48/deactivated_users.sql
@@ -0,0 +1,25 @@
+/* Copyright 2018 New Vector Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Store any accounts that have been requested to be deactivated.
+ * We part the account from all the rooms its in when its
+ * deactivated. This can take some time and synapse may be restarted
+ * before it completes, so store the user IDs here until the process
+ * is complete.
+ */
+CREATE TABLE users_pending_deactivation (
+ user_id TEXT NOT NULL
+);
diff --git a/synapse/storage/databases/main/schema/delta/48/group_unique_indexes.py b/synapse/storage/databases/main/schema/delta/48/group_unique_indexes.py
new file mode 100644
index 00000000..49f5f2c0
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/48/group_unique_indexes.py
@@ -0,0 +1,63 @@
+# Copyright 2018 New Vector Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from synapse.storage.engines import PostgresEngine
+from synapse.storage.prepare_database import get_statements
+
+FIX_INDEXES = """
+-- rebuild indexes as uniques
+DROP INDEX groups_invites_g_idx;
+CREATE UNIQUE INDEX group_invites_g_idx ON group_invites(group_id, user_id);
+DROP INDEX groups_users_g_idx;
+CREATE UNIQUE INDEX group_users_g_idx ON group_users(group_id, user_id);
+
+-- rename other indexes to actually match their table names..
+DROP INDEX groups_users_u_idx;
+CREATE INDEX group_users_u_idx ON group_users(user_id);
+DROP INDEX groups_invites_u_idx;
+CREATE INDEX group_invites_u_idx ON group_invites(user_id);
+DROP INDEX groups_rooms_g_idx;
+CREATE UNIQUE INDEX group_rooms_g_idx ON group_rooms(group_id, room_id);
+DROP INDEX groups_rooms_r_idx;
+CREATE INDEX group_rooms_r_idx ON group_rooms(room_id);
+"""
+
+
+def run_create(cur, database_engine, *args, **kwargs):
+ rowid = "ctid" if isinstance(database_engine, PostgresEngine) else "rowid"
+
+ # remove duplicates from group_users & group_invites tables
+ cur.execute(
+ """
+ DELETE FROM group_users WHERE %s NOT IN (
+ SELECT min(%s) FROM group_users GROUP BY group_id, user_id
+ );
+ """
+ % (rowid, rowid)
+ )
+ cur.execute(
+ """
+ DELETE FROM group_invites WHERE %s NOT IN (
+ SELECT min(%s) FROM group_invites GROUP BY group_id, user_id
+ );
+ """
+ % (rowid, rowid)
+ )
+
+ for statement in get_statements(FIX_INDEXES.splitlines()):
+ cur.execute(statement)
+
+
+def run_upgrade(*args, **kwargs):
+ pass
diff --git a/synapse/storage/databases/main/schema/delta/48/groups_joinable.sql b/synapse/storage/databases/main/schema/delta/48/groups_joinable.sql
new file mode 100644
index 00000000..ce26eaf0
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/48/groups_joinable.sql
@@ -0,0 +1,22 @@
+/* Copyright 2018 New Vector Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * This isn't a real ENUM because sqlite doesn't support it
+ * and we use a default of NULL for inserted rows and interpret
+ * NULL at the python store level as necessary so that existing
+ * rows are given the correct default policy.
+ */
+ALTER TABLE groups ADD COLUMN join_policy TEXT NOT NULL DEFAULT 'invite';
diff --git a/synapse/storage/databases/main/schema/delta/49/add_user_consent_server_notice_sent.sql b/synapse/storage/databases/main/schema/delta/49/add_user_consent_server_notice_sent.sql
new file mode 100644
index 00000000..14dcf18d
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/49/add_user_consent_server_notice_sent.sql
@@ -0,0 +1,20 @@
+/* Copyright 2018 New Vector Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* record whether we have sent a server notice about consenting to the
+ * privacy policy. Specifically records the version of the policy we sent
+ * a message about.
+ */
+ALTER TABLE users ADD COLUMN consent_server_notice_sent TEXT;
diff --git a/synapse/storage/databases/main/schema/delta/49/add_user_daily_visits.sql b/synapse/storage/databases/main/schema/delta/49/add_user_daily_visits.sql
new file mode 100644
index 00000000..3dd47819
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/49/add_user_daily_visits.sql
@@ -0,0 +1,21 @@
+/* Copyright 2018 New Vector Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+CREATE TABLE user_daily_visits ( user_id TEXT NOT NULL,
+ device_id TEXT,
+ timestamp BIGINT NOT NULL );
+CREATE INDEX user_daily_visits_uts_idx ON user_daily_visits(user_id, timestamp);
+CREATE INDEX user_daily_visits_ts_idx ON user_daily_visits(timestamp);
diff --git a/synapse/storage/databases/main/schema/delta/49/add_user_ips_last_seen_only_index.sql b/synapse/storage/databases/main/schema/delta/49/add_user_ips_last_seen_only_index.sql
new file mode 100644
index 00000000..3a4ed59b
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/49/add_user_ips_last_seen_only_index.sql
@@ -0,0 +1,17 @@
+/* Copyright 2018 New Vector Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+INSERT into background_updates (update_name, progress_json)
+ VALUES ('user_ips_last_seen_only_index', '{}');
diff --git a/synapse/storage/databases/main/schema/delta/50/add_creation_ts_users_index.sql b/synapse/storage/databases/main/schema/delta/50/add_creation_ts_users_index.sql
new file mode 100644
index 00000000..c93ae475
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/50/add_creation_ts_users_index.sql
@@ -0,0 +1,19 @@
+/* Copyright 2018 New Vector Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+
+INSERT into background_updates (update_name, progress_json)
+ VALUES ('users_creation_ts', '{}');
diff --git a/synapse/storage/databases/main/schema/delta/50/erasure_store.sql b/synapse/storage/databases/main/schema/delta/50/erasure_store.sql
new file mode 100644
index 00000000..5d8641a9
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/50/erasure_store.sql
@@ -0,0 +1,21 @@
+/* Copyright 2018 New Vector Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+-- a table of users who have requested that their details be erased
+CREATE TABLE erased_users (
+ user_id TEXT NOT NULL
+);
+
+CREATE UNIQUE INDEX erased_users_user ON erased_users(user_id);
diff --git a/synapse/storage/databases/main/schema/delta/50/make_event_content_nullable.py b/synapse/storage/databases/main/schema/delta/50/make_event_content_nullable.py
new file mode 100644
index 00000000..b1684a84
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/50/make_event_content_nullable.py
@@ -0,0 +1,96 @@
+# -*- coding: utf-8 -*-
+# Copyright 2018 New Vector Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+We want to stop populating 'event.content', so we need to make it nullable.
+
+If this has to be rolled back, then the following should populate the missing data:
+
+Postgres:
+
+ UPDATE events SET content=(ej.json::json)->'content' FROM event_json ej
+ WHERE ej.event_id = events.event_id AND
+ stream_ordering < (
+ SELECT stream_ordering FROM events WHERE content IS NOT NULL
+ ORDER BY stream_ordering LIMIT 1
+ );
+
+ UPDATE events SET content=(ej.json::json)->'content' FROM event_json ej
+ WHERE ej.event_id = events.event_id AND
+ stream_ordering > (
+ SELECT stream_ordering FROM events WHERE content IS NOT NULL
+ ORDER BY stream_ordering DESC LIMIT 1
+ );
+
+SQLite:
+
+ UPDATE events SET content=(
+ SELECT json_extract(json,'$.content') FROM event_json ej
+ WHERE ej.event_id = events.event_id
+ )
+ WHERE
+ stream_ordering < (
+ SELECT stream_ordering FROM events WHERE content IS NOT NULL
+ ORDER BY stream_ordering LIMIT 1
+ )
+ OR stream_ordering > (
+ SELECT stream_ordering FROM events WHERE content IS NOT NULL
+ ORDER BY stream_ordering DESC LIMIT 1
+ );
+
+"""
+
+import logging
+
+from synapse.storage.engines import PostgresEngine
+
+logger = logging.getLogger(__name__)
+
+
+def run_create(cur, database_engine, *args, **kwargs):
+ pass
+
+
+def run_upgrade(cur, database_engine, *args, **kwargs):
+ if isinstance(database_engine, PostgresEngine):
+ cur.execute(
+ """
+ ALTER TABLE events ALTER COLUMN content DROP NOT NULL;
+ """
+ )
+ return
+
+ # sqlite is an arse about this. ref: https://www.sqlite.org/lang_altertable.html
+
+ cur.execute(
+ "SELECT sql FROM sqlite_master WHERE tbl_name='events' AND type='table'"
+ )
+ (oldsql,) = cur.fetchone()
+
+ sql = oldsql.replace("content TEXT NOT NULL", "content TEXT")
+ if sql == oldsql:
+ raise Exception("Couldn't find null constraint to drop in %s" % oldsql)
+
+ logger.info("Replacing definition of 'events' with: %s", sql)
+
+ cur.execute("PRAGMA schema_version")
+ (oldver,) = cur.fetchone()
+ cur.execute("PRAGMA writable_schema=ON")
+ cur.execute(
+ "UPDATE sqlite_master SET sql=? WHERE tbl_name='events' AND type='table'",
+ (sql,),
+ )
+ cur.execute("PRAGMA schema_version=%i" % (oldver + 1,))
+ cur.execute("PRAGMA writable_schema=OFF")
diff --git a/synapse/storage/databases/main/schema/delta/51/e2e_room_keys.sql b/synapse/storage/databases/main/schema/delta/51/e2e_room_keys.sql
new file mode 100644
index 00000000..c0e66a69
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/51/e2e_room_keys.sql
@@ -0,0 +1,39 @@
+/* Copyright 2017 New Vector Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+-- users' optionally backed up encrypted e2e sessions
+CREATE TABLE e2e_room_keys (
+ user_id TEXT NOT NULL,
+ room_id TEXT NOT NULL,
+ session_id TEXT NOT NULL,
+ version TEXT NOT NULL,
+ first_message_index INT,
+ forwarded_count INT,
+ is_verified BOOLEAN,
+ session_data TEXT NOT NULL
+);
+
+CREATE UNIQUE INDEX e2e_room_keys_idx ON e2e_room_keys(user_id, room_id, session_id);
+
+-- the metadata for each generation of encrypted e2e session backups
+CREATE TABLE e2e_room_keys_versions (
+ user_id TEXT NOT NULL,
+ version TEXT NOT NULL,
+ algorithm TEXT NOT NULL,
+ auth_data TEXT NOT NULL,
+ deleted SMALLINT DEFAULT 0 NOT NULL
+);
+
+CREATE UNIQUE INDEX e2e_room_keys_versions_idx ON e2e_room_keys_versions(user_id, version);
diff --git a/synapse/storage/databases/main/schema/delta/51/monthly_active_users.sql b/synapse/storage/databases/main/schema/delta/51/monthly_active_users.sql
new file mode 100644
index 00000000..c9d537d5
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/51/monthly_active_users.sql
@@ -0,0 +1,27 @@
+/* Copyright 2018 New Vector Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+-- a table of monthly active users, for use where blocking based on mau limits
+CREATE TABLE monthly_active_users (
+ user_id TEXT NOT NULL,
+ -- Last time we saw the user. Not guaranteed to be accurate due to rate limiting
+ -- on updates, Granularity of updates governed by
+ -- synapse.storage.monthly_active_users.LAST_SEEN_GRANULARITY
+ -- Measured in ms since epoch.
+ timestamp BIGINT NOT NULL
+);
+
+CREATE UNIQUE INDEX monthly_active_users_users ON monthly_active_users(user_id);
+CREATE INDEX monthly_active_users_time_stamp ON monthly_active_users(timestamp);
diff --git a/synapse/storage/databases/main/schema/delta/52/add_event_to_state_group_index.sql b/synapse/storage/databases/main/schema/delta/52/add_event_to_state_group_index.sql
new file mode 100644
index 00000000..91e03d13
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/52/add_event_to_state_group_index.sql
@@ -0,0 +1,19 @@
+/* Copyright 2018 New Vector Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+-- This is needed to efficiently check for unreferenced state groups during
+-- purge. Added events_to_state_group(state_group) index
+INSERT into background_updates (update_name, progress_json)
+ VALUES ('event_to_state_groups_sg_index', '{}');
diff --git a/synapse/storage/databases/main/schema/delta/52/device_list_streams_unique_idx.sql b/synapse/storage/databases/main/schema/delta/52/device_list_streams_unique_idx.sql
new file mode 100644
index 00000000..bfa49e6f
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/52/device_list_streams_unique_idx.sql
@@ -0,0 +1,36 @@
+/* Copyright 2018 New Vector Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+-- register a background update which will create a unique index on
+-- device_lists_remote_cache
+INSERT into background_updates (update_name, progress_json)
+ VALUES ('device_lists_remote_cache_unique_idx', '{}');
+
+-- and one on device_lists_remote_extremeties
+INSERT into background_updates (update_name, progress_json, depends_on)
+ VALUES (
+ 'device_lists_remote_extremeties_unique_idx', '{}',
+
+ -- doesn't really depend on this, but we need to make sure both happen
+ -- before we drop the old indexes.
+ 'device_lists_remote_cache_unique_idx'
+ );
+
+-- once they complete, we can drop the old indexes.
+INSERT into background_updates (update_name, progress_json, depends_on)
+ VALUES (
+ 'drop_device_list_streams_non_unique_indexes', '{}',
+ 'device_lists_remote_extremeties_unique_idx'
+ );
diff --git a/synapse/storage/databases/main/schema/delta/52/e2e_room_keys.sql b/synapse/storage/databases/main/schema/delta/52/e2e_room_keys.sql
new file mode 100644
index 00000000..db687ccc
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/52/e2e_room_keys.sql
@@ -0,0 +1,53 @@
+/* Copyright 2018 New Vector Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* Change version column to an integer so we can do MAX() sensibly
+ */
+CREATE TABLE e2e_room_keys_versions_new (
+ user_id TEXT NOT NULL,
+ version BIGINT NOT NULL,
+ algorithm TEXT NOT NULL,
+ auth_data TEXT NOT NULL,
+ deleted SMALLINT DEFAULT 0 NOT NULL
+);
+
+INSERT INTO e2e_room_keys_versions_new
+ SELECT user_id, CAST(version as BIGINT), algorithm, auth_data, deleted FROM e2e_room_keys_versions;
+
+DROP TABLE e2e_room_keys_versions;
+ALTER TABLE e2e_room_keys_versions_new RENAME TO e2e_room_keys_versions;
+
+CREATE UNIQUE INDEX e2e_room_keys_versions_idx ON e2e_room_keys_versions(user_id, version);
+
+/* Change e2e_rooms_keys to match
+ */
+CREATE TABLE e2e_room_keys_new (
+ user_id TEXT NOT NULL,
+ room_id TEXT NOT NULL,
+ session_id TEXT NOT NULL,
+ version BIGINT NOT NULL,
+ first_message_index INT,
+ forwarded_count INT,
+ is_verified BOOLEAN,
+ session_data TEXT NOT NULL
+);
+
+INSERT INTO e2e_room_keys_new
+ SELECT user_id, room_id, session_id, CAST(version as BIGINT), first_message_index, forwarded_count, is_verified, session_data FROM e2e_room_keys;
+
+DROP TABLE e2e_room_keys;
+ALTER TABLE e2e_room_keys_new RENAME TO e2e_room_keys;
+
+CREATE UNIQUE INDEX e2e_room_keys_idx ON e2e_room_keys(user_id, room_id, session_id);
diff --git a/synapse/storage/databases/main/schema/delta/53/add_user_type_to_users.sql b/synapse/storage/databases/main/schema/delta/53/add_user_type_to_users.sql
new file mode 100644
index 00000000..88ec2f83
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/53/add_user_type_to_users.sql
@@ -0,0 +1,19 @@
+/* Copyright 2018 New Vector Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* The type of the user: NULL for a regular user, or one of the constants in
+ * synapse.api.constants.UserTypes
+ */
+ALTER TABLE users ADD COLUMN user_type TEXT DEFAULT NULL;
diff --git a/synapse/storage/databases/main/schema/delta/53/drop_sent_transactions.sql b/synapse/storage/databases/main/schema/delta/53/drop_sent_transactions.sql
new file mode 100644
index 00000000..e372f5a4
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/53/drop_sent_transactions.sql
@@ -0,0 +1,16 @@
+/* Copyright 2018 New Vector Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+DROP TABLE IF EXISTS sent_transactions;
diff --git a/synapse/storage/databases/main/schema/delta/53/event_format_version.sql b/synapse/storage/databases/main/schema/delta/53/event_format_version.sql
new file mode 100644
index 00000000..1d977c28
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/53/event_format_version.sql
@@ -0,0 +1,16 @@
+/* Copyright 2019 New Vector Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ALTER TABLE event_json ADD COLUMN format_version INTEGER;
diff --git a/synapse/storage/databases/main/schema/delta/53/user_dir_populate.sql b/synapse/storage/databases/main/schema/delta/53/user_dir_populate.sql
new file mode 100644
index 00000000..ffcc896b
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/53/user_dir_populate.sql
@@ -0,0 +1,30 @@
+/* Copyright 2019 New Vector Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+-- Set up staging tables
+INSERT INTO background_updates (update_name, progress_json) VALUES
+ ('populate_user_directory_createtables', '{}');
+
+-- Run through each room and update the user directory according to who is in it
+INSERT INTO background_updates (update_name, progress_json, depends_on) VALUES
+ ('populate_user_directory_process_rooms', '{}', 'populate_user_directory_createtables');
+
+-- Insert all users, if search_all_users is on
+INSERT INTO background_updates (update_name, progress_json, depends_on) VALUES
+ ('populate_user_directory_process_users', '{}', 'populate_user_directory_process_rooms');
+
+-- Clean up staging tables
+INSERT INTO background_updates (update_name, progress_json, depends_on) VALUES
+ ('populate_user_directory_cleanup', '{}', 'populate_user_directory_process_users');
diff --git a/synapse/storage/databases/main/schema/delta/53/user_ips_index.sql b/synapse/storage/databases/main/schema/delta/53/user_ips_index.sql
new file mode 100644
index 00000000..b812c579
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/53/user_ips_index.sql
@@ -0,0 +1,30 @@
+/* Copyright 2018 New Vector Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ -- analyze user_ips, to help ensure the correct indices are used
+INSERT INTO background_updates (update_name, progress_json) VALUES
+ ('user_ips_analyze', '{}');
+
+-- delete duplicates
+INSERT INTO background_updates (update_name, progress_json, depends_on) VALUES
+ ('user_ips_remove_dupes', '{}', 'user_ips_analyze');
+
+-- add a new unique index to user_ips table
+INSERT INTO background_updates (update_name, progress_json, depends_on) VALUES
+ ('user_ips_device_unique_index', '{}', 'user_ips_remove_dupes');
+
+-- drop the old original index
+INSERT INTO background_updates (update_name, progress_json, depends_on) VALUES
+ ('user_ips_drop_nonunique_index', '{}', 'user_ips_device_unique_index');
diff --git a/synapse/storage/databases/main/schema/delta/53/user_share.sql b/synapse/storage/databases/main/schema/delta/53/user_share.sql
new file mode 100644
index 00000000..5831b1a6
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/53/user_share.sql
@@ -0,0 +1,44 @@
+/* Copyright 2017 Vector Creations Ltd, 2019 New Vector Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+-- Old disused version of the tables below.
+DROP TABLE IF EXISTS users_who_share_rooms;
+
+-- Tables keeping track of what users share rooms. This is a map of local users
+-- to local or remote users, per room. Remote users cannot be in the user_id
+-- column, only the other_user_id column. There are two tables, one for public
+-- rooms and those for private rooms.
+CREATE TABLE IF NOT EXISTS users_who_share_public_rooms (
+ user_id TEXT NOT NULL,
+ other_user_id TEXT NOT NULL,
+ room_id TEXT NOT NULL
+);
+
+CREATE TABLE IF NOT EXISTS users_who_share_private_rooms (
+ user_id TEXT NOT NULL,
+ other_user_id TEXT NOT NULL,
+ room_id TEXT NOT NULL
+);
+
+CREATE UNIQUE INDEX users_who_share_public_rooms_u_idx ON users_who_share_public_rooms(user_id, other_user_id, room_id);
+CREATE INDEX users_who_share_public_rooms_r_idx ON users_who_share_public_rooms(room_id);
+CREATE INDEX users_who_share_public_rooms_o_idx ON users_who_share_public_rooms(other_user_id);
+
+CREATE UNIQUE INDEX users_who_share_private_rooms_u_idx ON users_who_share_private_rooms(user_id, other_user_id, room_id);
+CREATE INDEX users_who_share_private_rooms_r_idx ON users_who_share_private_rooms(room_id);
+CREATE INDEX users_who_share_private_rooms_o_idx ON users_who_share_private_rooms(other_user_id);
+
+-- Make sure that we populate the tables initially by resetting the stream ID
+UPDATE user_directory_stream_pos SET stream_id = NULL;
diff --git a/synapse/storage/databases/main/schema/delta/53/user_threepid_id.sql b/synapse/storage/databases/main/schema/delta/53/user_threepid_id.sql
new file mode 100644
index 00000000..80c2c573
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/53/user_threepid_id.sql
@@ -0,0 +1,29 @@
+/* Copyright 2019 New Vector Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+-- Tracks which identity server a user bound their threepid via.
+CREATE TABLE user_threepid_id_server (
+ user_id TEXT NOT NULL,
+ medium TEXT NOT NULL,
+ address TEXT NOT NULL,
+ id_server TEXT NOT NULL
+);
+
+CREATE UNIQUE INDEX user_threepid_id_server_idx ON user_threepid_id_server(
+ user_id, medium, address, id_server
+);
+
+INSERT INTO background_updates (update_name, progress_json) VALUES
+ ('user_threepids_grandfather', '{}');
diff --git a/synapse/storage/databases/main/schema/delta/53/users_in_public_rooms.sql b/synapse/storage/databases/main/schema/delta/53/users_in_public_rooms.sql
new file mode 100644
index 00000000..f7827ca6
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/53/users_in_public_rooms.sql
@@ -0,0 +1,28 @@
+/* Copyright 2019 New Vector Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+-- We don't need the old version of this table.
+DROP TABLE IF EXISTS users_in_public_rooms;
+
+-- Old version of users_in_public_rooms
+DROP TABLE IF EXISTS users_who_share_public_rooms;
+
+-- Track what users are in public rooms.
+CREATE TABLE IF NOT EXISTS users_in_public_rooms (
+ user_id TEXT NOT NULL,
+ room_id TEXT NOT NULL
+);
+
+CREATE UNIQUE INDEX users_in_public_rooms_u_idx ON users_in_public_rooms(user_id, room_id);
diff --git a/synapse/storage/databases/main/schema/delta/54/account_validity_with_renewal.sql b/synapse/storage/databases/main/schema/delta/54/account_validity_with_renewal.sql
new file mode 100644
index 00000000..0adb2ad5
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/54/account_validity_with_renewal.sql
@@ -0,0 +1,30 @@
+/* Copyright 2019 New Vector Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+-- We previously changed the schema for this table without renaming the file, which means
+-- that some databases might still be using the old schema. This ensures Synapse uses the
+-- right schema for the table.
+DROP TABLE IF EXISTS account_validity;
+
+-- Track what users are in public rooms.
+CREATE TABLE IF NOT EXISTS account_validity (
+ user_id TEXT PRIMARY KEY,
+ expiration_ts_ms BIGINT NOT NULL,
+ email_sent BOOLEAN NOT NULL,
+ renewal_token TEXT
+);
+
+CREATE INDEX account_validity_email_sent_idx ON account_validity(email_sent, expiration_ts_ms)
+CREATE UNIQUE INDEX account_validity_renewal_string_idx ON account_validity(renewal_token)
diff --git a/synapse/storage/databases/main/schema/delta/54/add_validity_to_server_keys.sql b/synapse/storage/databases/main/schema/delta/54/add_validity_to_server_keys.sql
new file mode 100644
index 00000000..c01aa9d2
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/54/add_validity_to_server_keys.sql
@@ -0,0 +1,23 @@
+/* Copyright 2019 New Vector Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* When we can use this key until, before we have to refresh it. */
+ALTER TABLE server_signature_keys ADD COLUMN ts_valid_until_ms BIGINT;
+
+UPDATE server_signature_keys SET ts_valid_until_ms = (
+ SELECT MAX(ts_valid_until_ms) FROM server_keys_json skj WHERE
+ skj.server_name = server_signature_keys.server_name AND
+ skj.key_id = server_signature_keys.key_id
+);
diff --git a/synapse/storage/databases/main/schema/delta/54/delete_forward_extremities.sql b/synapse/storage/databases/main/schema/delta/54/delete_forward_extremities.sql
new file mode 100644
index 00000000..b062ec84
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/54/delete_forward_extremities.sql
@@ -0,0 +1,23 @@
+/* Copyright 2019 The Matrix.org Foundation C.I.C.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+-- Start a background job to cleanup extremities that were incorrectly added
+-- by bug #5269.
+INSERT INTO background_updates (update_name, progress_json) VALUES
+ ('delete_soft_failed_extremities', '{}');
+
+DROP TABLE IF EXISTS _extremities_to_check; -- To make this delta schema file idempotent.
+CREATE TABLE _extremities_to_check AS SELECT event_id FROM event_forward_extremities;
+CREATE INDEX _extremities_to_check_id ON _extremities_to_check(event_id);
diff --git a/synapse/storage/databases/main/schema/delta/54/drop_legacy_tables.sql b/synapse/storage/databases/main/schema/delta/54/drop_legacy_tables.sql
new file mode 100644
index 00000000..dbbe6826
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/54/drop_legacy_tables.sql
@@ -0,0 +1,30 @@
+/* Copyright 2019 New Vector Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+-- we need to do this first due to foreign constraints
+DROP TABLE IF EXISTS application_services_regex;
+
+DROP TABLE IF EXISTS application_services;
+DROP TABLE IF EXISTS transaction_id_to_pdu;
+DROP TABLE IF EXISTS stats_reporting;
+DROP TABLE IF EXISTS current_state_resets;
+DROP TABLE IF EXISTS event_content_hashes;
+DROP TABLE IF EXISTS event_destinations;
+DROP TABLE IF EXISTS event_edge_hashes;
+DROP TABLE IF EXISTS event_signatures;
+DROP TABLE IF EXISTS feedback;
+DROP TABLE IF EXISTS room_hosts;
+DROP TABLE IF EXISTS server_tls_certificates;
+DROP TABLE IF EXISTS state_forward_extremities;
diff --git a/synapse/storage/databases/main/schema/delta/54/drop_presence_list.sql b/synapse/storage/databases/main/schema/delta/54/drop_presence_list.sql
new file mode 100644
index 00000000..e6ee70c6
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/54/drop_presence_list.sql
@@ -0,0 +1,16 @@
+/* Copyright 2019 New Vector Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+DROP TABLE IF EXISTS presence_list;
diff --git a/synapse/storage/databases/main/schema/delta/54/relations.sql b/synapse/storage/databases/main/schema/delta/54/relations.sql
new file mode 100644
index 00000000..134862b8
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/54/relations.sql
@@ -0,0 +1,27 @@
+/* Copyright 2019 New Vector Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+-- Tracks related events, like reactions, replies, edits, etc. Note that things
+-- in this table are not necessarily "valid", e.g. it may contain edits from
+-- people who don't have power to edit other peoples events.
+CREATE TABLE IF NOT EXISTS event_relations (
+ event_id TEXT NOT NULL,
+ relates_to_id TEXT NOT NULL,
+ relation_type TEXT NOT NULL,
+ aggregation_key TEXT
+);
+
+CREATE UNIQUE INDEX event_relations_id ON event_relations(event_id);
+CREATE INDEX event_relations_relates ON event_relations(relates_to_id, relation_type, aggregation_key);
diff --git a/synapse/storage/databases/main/schema/delta/54/stats.sql b/synapse/storage/databases/main/schema/delta/54/stats.sql
new file mode 100644
index 00000000..652e5830
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/54/stats.sql
@@ -0,0 +1,80 @@
+/* Copyright 2018 New Vector Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+CREATE TABLE stats_stream_pos (
+ Lock CHAR(1) NOT NULL DEFAULT 'X' UNIQUE, -- Makes sure this table only has one row.
+ stream_id BIGINT,
+ CHECK (Lock='X')
+);
+
+INSERT INTO stats_stream_pos (stream_id) VALUES (null);
+
+CREATE TABLE user_stats (
+ user_id TEXT NOT NULL,
+ ts BIGINT NOT NULL,
+ bucket_size INT NOT NULL,
+ public_rooms INT NOT NULL,
+ private_rooms INT NOT NULL
+);
+
+CREATE UNIQUE INDEX user_stats_user_ts ON user_stats(user_id, ts);
+
+CREATE TABLE room_stats (
+ room_id TEXT NOT NULL,
+ ts BIGINT NOT NULL,
+ bucket_size INT NOT NULL,
+ current_state_events INT NOT NULL,
+ joined_members INT NOT NULL,
+ invited_members INT NOT NULL,
+ left_members INT NOT NULL,
+ banned_members INT NOT NULL,
+ state_events INT NOT NULL
+);
+
+CREATE UNIQUE INDEX room_stats_room_ts ON room_stats(room_id, ts);
+
+-- cache of current room state; useful for the publicRooms list
+CREATE TABLE room_state (
+ room_id TEXT NOT NULL,
+ join_rules TEXT,
+ history_visibility TEXT,
+ encryption TEXT,
+ name TEXT,
+ topic TEXT,
+ avatar TEXT,
+ canonical_alias TEXT
+ -- get aliases straight from the right table
+);
+
+CREATE UNIQUE INDEX room_state_room ON room_state(room_id);
+
+CREATE TABLE room_stats_earliest_token (
+ room_id TEXT NOT NULL,
+ token BIGINT NOT NULL
+);
+
+CREATE UNIQUE INDEX room_stats_earliest_token_idx ON room_stats_earliest_token(room_id);
+
+-- Set up staging tables
+INSERT INTO background_updates (update_name, progress_json) VALUES
+ ('populate_stats_createtables', '{}');
+
+-- Run through each room and update stats
+INSERT INTO background_updates (update_name, progress_json, depends_on) VALUES
+ ('populate_stats_process_rooms', '{}', 'populate_stats_createtables');
+
+-- Clean up staging tables
+INSERT INTO background_updates (update_name, progress_json, depends_on) VALUES
+ ('populate_stats_cleanup', '{}', 'populate_stats_process_rooms');
diff --git a/synapse/storage/databases/main/schema/delta/54/stats2.sql b/synapse/storage/databases/main/schema/delta/54/stats2.sql
new file mode 100644
index 00000000..3b2d4844
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/54/stats2.sql
@@ -0,0 +1,28 @@
+/* Copyright 2019 The Matrix.org Foundation C.I.C.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+-- This delta file gets run after `54/stats.sql` delta.
+
+-- We want to add some indices to the temporary stats table, so we re-insert
+-- 'populate_stats_createtables' if we are still processing the rooms update.
+INSERT INTO background_updates (update_name, progress_json)
+ SELECT 'populate_stats_createtables', '{}'
+ WHERE
+ 'populate_stats_process_rooms' IN (
+ SELECT update_name FROM background_updates
+ )
+ AND 'populate_stats_createtables' NOT IN ( -- don't insert if already exists
+ SELECT update_name FROM background_updates
+ );
diff --git a/synapse/storage/databases/main/schema/delta/55/access_token_expiry.sql b/synapse/storage/databases/main/schema/delta/55/access_token_expiry.sql
new file mode 100644
index 00000000..4590604b
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/55/access_token_expiry.sql
@@ -0,0 +1,18 @@
+/* Copyright 2019 The Matrix.org Foundation C.I.C.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+-- when this access token can be used until, in ms since the epoch. NULL means the token
+-- never expires.
+ALTER TABLE access_tokens ADD COLUMN valid_until_ms BIGINT;
diff --git a/synapse/storage/databases/main/schema/delta/55/track_threepid_validations.sql b/synapse/storage/databases/main/schema/delta/55/track_threepid_validations.sql
new file mode 100644
index 00000000..a8eced2e
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/55/track_threepid_validations.sql
@@ -0,0 +1,31 @@
+/* Copyright 2019 The Matrix.org Foundation C.I.C.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+CREATE TABLE IF NOT EXISTS threepid_validation_session (
+ session_id TEXT PRIMARY KEY,
+ medium TEXT NOT NULL,
+ address TEXT NOT NULL,
+ client_secret TEXT NOT NULL,
+ last_send_attempt BIGINT NOT NULL,
+ validated_at BIGINT
+);
+
+CREATE TABLE IF NOT EXISTS threepid_validation_token (
+ token TEXT PRIMARY KEY,
+ session_id TEXT NOT NULL,
+ next_link TEXT,
+ expires BIGINT NOT NULL
+);
+
+CREATE INDEX threepid_validation_token_session_id ON threepid_validation_token(session_id);
diff --git a/synapse/storage/databases/main/schema/delta/55/users_alter_deactivated.sql b/synapse/storage/databases/main/schema/delta/55/users_alter_deactivated.sql
new file mode 100644
index 00000000..dabdde48
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/55/users_alter_deactivated.sql
@@ -0,0 +1,19 @@
+/* Copyright 2019 The Matrix.org Foundation C.I.C.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ALTER TABLE users ADD deactivated SMALLINT DEFAULT 0 NOT NULL;
+
+INSERT INTO background_updates (update_name, progress_json) VALUES
+ ('users_set_deactivated_flag', '{}');
diff --git a/synapse/storage/databases/main/schema/delta/56/add_spans_to_device_lists.sql b/synapse/storage/databases/main/schema/delta/56/add_spans_to_device_lists.sql
new file mode 100644
index 00000000..41807eb1
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/56/add_spans_to_device_lists.sql
@@ -0,0 +1,20 @@
+/* Copyright 2019 The Matrix.org Foundation C.I.C
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Opentracing context data for inclusion in the device_list_update EDUs, as a
+ * json-encoded dictionary. NULL if opentracing is disabled (or not enabled for this destination).
+ */
+ALTER TABLE device_lists_outbound_pokes ADD opentracing_context TEXT;
diff --git a/synapse/storage/databases/main/schema/delta/56/current_state_events_membership.sql b/synapse/storage/databases/main/schema/delta/56/current_state_events_membership.sql
new file mode 100644
index 00000000..47301867
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/56/current_state_events_membership.sql
@@ -0,0 +1,22 @@
+/* Copyright 2019 The Matrix.org Foundation C.I.C.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+-- We add membership to current state so that we don't need to join against
+-- room_memberships, which can be surprisingly costly (we do such queries
+-- very frequently).
+-- This will be null for non-membership events and the content.membership key
+-- for membership events. (Will also be null for membership events until the
+-- background update job has finished).
+ALTER TABLE current_state_events ADD membership TEXT;
diff --git a/synapse/storage/databases/main/schema/delta/56/current_state_events_membership_mk2.sql b/synapse/storage/databases/main/schema/delta/56/current_state_events_membership_mk2.sql
new file mode 100644
index 00000000..3133d42d
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/56/current_state_events_membership_mk2.sql
@@ -0,0 +1,24 @@
+/* Copyright 2019 The Matrix.org Foundation C.I.C.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+-- We add membership to current state so that we don't need to join against
+-- room_memberships, which can be surprisingly costly (we do such queries
+-- very frequently).
+-- This will be null for non-membership events and the content.membership key
+-- for membership events. (Will also be null for membership events until the
+-- background update job has finished).
+
+INSERT INTO background_updates (update_name, progress_json) VALUES
+ ('current_state_events_membership', '{}');
diff --git a/synapse/storage/databases/main/schema/delta/56/delete_keys_from_deleted_backups.sql b/synapse/storage/databases/main/schema/delta/56/delete_keys_from_deleted_backups.sql
new file mode 100644
index 00000000..1d2ddb1b
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/56/delete_keys_from_deleted_backups.sql
@@ -0,0 +1,25 @@
+/* Copyright 2019 The Matrix.org Foundation C.I.C
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* delete room keys that belong to deleted room key version, or to room key
+ * versions that don't exist (anymore)
+ */
+DELETE FROM e2e_room_keys
+WHERE version NOT IN (
+ SELECT version
+ FROM e2e_room_keys_versions
+ WHERE e2e_room_keys.user_id = e2e_room_keys_versions.user_id
+ AND e2e_room_keys_versions.deleted = 0
+);
diff --git a/synapse/storage/databases/main/schema/delta/56/destinations_failure_ts.sql b/synapse/storage/databases/main/schema/delta/56/destinations_failure_ts.sql
new file mode 100644
index 00000000..f0088929
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/56/destinations_failure_ts.sql
@@ -0,0 +1,25 @@
+/* Copyright 2019 The Matrix.org Foundation C.I.C
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Record the timestamp when a given server started failing
+ */
+ALTER TABLE destinations ADD failure_ts BIGINT;
+
+/* as a rough approximation, we assume that the server started failing at
+ * retry_interval before the last retry
+ */
+UPDATE destinations SET failure_ts = retry_last_ts - retry_interval
+ WHERE retry_last_ts > 0;
diff --git a/synapse/storage/databases/main/schema/delta/56/destinations_retry_interval_type.sql.postgres b/synapse/storage/databases/main/schema/delta/56/destinations_retry_interval_type.sql.postgres
new file mode 100644
index 00000000..b9bbb18a
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/56/destinations_retry_interval_type.sql.postgres
@@ -0,0 +1,18 @@
+/* Copyright 2019 The Matrix.org Foundation C.I.C
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+-- We want to store large retry intervals so we upgrade the column from INT
+-- to BIGINT. We don't need to do this on SQLite.
+ALTER TABLE destinations ALTER retry_interval SET DATA TYPE BIGINT;
diff --git a/synapse/storage/databases/main/schema/delta/56/device_stream_id_insert.sql b/synapse/storage/databases/main/schema/delta/56/device_stream_id_insert.sql
new file mode 100644
index 00000000..c2f557fd
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/56/device_stream_id_insert.sql
@@ -0,0 +1,20 @@
+/* Copyright 2019 The Matrix.org Foundation C.I.C.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+-- This line already existed in deltas/35/device_stream_id but was not included in the
+-- 54 full schema SQL. Add some SQL here to insert the missing row if it does not exist
+INSERT INTO device_max_stream_id (stream_id) SELECT 0 WHERE NOT EXISTS (
+ SELECT * from device_max_stream_id
+); \ No newline at end of file
diff --git a/synapse/storage/databases/main/schema/delta/56/devices_last_seen.sql b/synapse/storage/databases/main/schema/delta/56/devices_last_seen.sql
new file mode 100644
index 00000000..dfa902d0
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/56/devices_last_seen.sql
@@ -0,0 +1,24 @@
+/* Copyright 2019 Matrix.org Foundation CIC
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+-- Track last seen information for a device in the devices table, rather
+-- than relying on it being in the user_ips table (which we want to be able
+-- to purge old entries from)
+ALTER TABLE devices ADD COLUMN last_seen BIGINT;
+ALTER TABLE devices ADD COLUMN ip TEXT;
+ALTER TABLE devices ADD COLUMN user_agent TEXT;
+
+INSERT INTO background_updates (update_name, progress_json) VALUES
+ ('devices_last_seen', '{}');
diff --git a/synapse/storage/databases/main/schema/delta/56/drop_unused_event_tables.sql b/synapse/storage/databases/main/schema/delta/56/drop_unused_event_tables.sql
new file mode 100644
index 00000000..9f09922c
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/56/drop_unused_event_tables.sql
@@ -0,0 +1,20 @@
+/* Copyright 2019 The Matrix.org Foundation C.I.C.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+-- these tables are never used.
+DROP TABLE IF EXISTS room_names;
+DROP TABLE IF EXISTS topics;
+DROP TABLE IF EXISTS history_visibility;
+DROP TABLE IF EXISTS guest_access;
diff --git a/synapse/storage/databases/main/schema/delta/56/event_expiry.sql b/synapse/storage/databases/main/schema/delta/56/event_expiry.sql
new file mode 100644
index 00000000..81a36a8b
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/56/event_expiry.sql
@@ -0,0 +1,21 @@
+/* Copyright 2019 The Matrix.org Foundation C.I.C.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+CREATE TABLE IF NOT EXISTS event_expiry (
+ event_id TEXT PRIMARY KEY,
+ expiry_ts BIGINT NOT NULL
+);
+
+CREATE INDEX event_expiry_expiry_ts_idx ON event_expiry(expiry_ts);
diff --git a/synapse/storage/databases/main/schema/delta/56/event_labels.sql b/synapse/storage/databases/main/schema/delta/56/event_labels.sql
new file mode 100644
index 00000000..5e29c1da
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/56/event_labels.sql
@@ -0,0 +1,30 @@
+/* Copyright 2019 The Matrix.org Foundation C.I.C.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+-- room_id and topoligical_ordering are denormalised from the events table in order to
+-- make the index work.
+CREATE TABLE IF NOT EXISTS event_labels (
+ event_id TEXT,
+ label TEXT,
+ room_id TEXT NOT NULL,
+ topological_ordering BIGINT NOT NULL,
+ PRIMARY KEY(event_id, label)
+);
+
+
+-- This index enables an event pagination looking for a particular label to index the
+-- event_labels table first, which is much quicker than scanning the events table and then
+-- filtering by label, if the label is rarely used relative to the size of the room.
+CREATE INDEX event_labels_room_id_label_idx ON event_labels(room_id, label, topological_ordering);
diff --git a/synapse/storage/databases/main/schema/delta/56/event_labels_background_update.sql b/synapse/storage/databases/main/schema/delta/56/event_labels_background_update.sql
new file mode 100644
index 00000000..5f5e0499
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/56/event_labels_background_update.sql
@@ -0,0 +1,17 @@
+/* Copyright 2019 The Matrix.org Foundation C.I.C.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+INSERT INTO background_updates (update_name, progress_json) VALUES
+ ('event_store_labels', '{}');
diff --git a/synapse/storage/databases/main/schema/delta/56/fix_room_keys_index.sql b/synapse/storage/databases/main/schema/delta/56/fix_room_keys_index.sql
new file mode 100644
index 00000000..014cb3b5
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/56/fix_room_keys_index.sql
@@ -0,0 +1,18 @@
+/* Copyright 2019 Matrix.org Foundation CIC
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+-- version is supposed to be part of the room keys index
+CREATE UNIQUE INDEX e2e_room_keys_with_version_idx ON e2e_room_keys(user_id, version, room_id, session_id);
+DROP INDEX IF EXISTS e2e_room_keys_idx;
diff --git a/synapse/storage/databases/main/schema/delta/56/hidden_devices.sql b/synapse/storage/databases/main/schema/delta/56/hidden_devices.sql
new file mode 100644
index 00000000..67f8b202
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/56/hidden_devices.sql
@@ -0,0 +1,18 @@
+/* Copyright 2019 New Vector Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+-- device list needs to know which ones are "real" devices, and which ones are
+-- just used to avoid collisions
+ALTER TABLE devices ADD COLUMN hidden BOOLEAN DEFAULT FALSE;
diff --git a/synapse/storage/databases/main/schema/delta/56/hidden_devices_fix.sql.sqlite b/synapse/storage/databases/main/schema/delta/56/hidden_devices_fix.sql.sqlite
new file mode 100644
index 00000000..e8b1fd35
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/56/hidden_devices_fix.sql.sqlite
@@ -0,0 +1,42 @@
+/* Copyright 2019 The Matrix.org Foundation C.I.C.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* Change the hidden column from a default value of FALSE to a default value of
+ * 0, because sqlite3 prior to 3.23.0 caused the hidden column to contain the
+ * string 'FALSE', which is truthy.
+ *
+ * Since sqlite doesn't allow us to just change the default value, we have to
+ * recreate the table, copy the data, fix the rows that have incorrect data, and
+ * replace the old table with the new table.
+ */
+
+CREATE TABLE IF NOT EXISTS devices2 (
+ user_id TEXT NOT NULL,
+ device_id TEXT NOT NULL,
+ display_name TEXT,
+ last_seen BIGINT,
+ ip TEXT,
+ user_agent TEXT,
+ hidden BOOLEAN DEFAULT 0,
+ CONSTRAINT device_uniqueness UNIQUE (user_id, device_id)
+);
+
+INSERT INTO devices2 SELECT * FROM devices;
+
+UPDATE devices2 SET hidden = 0 WHERE hidden = 'FALSE';
+
+DROP TABLE devices;
+
+ALTER TABLE devices2 RENAME TO devices;
diff --git a/synapse/storage/databases/main/schema/delta/56/nuke_empty_communities_from_db.sql b/synapse/storage/databases/main/schema/delta/56/nuke_empty_communities_from_db.sql
new file mode 100644
index 00000000..4f24c140
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/56/nuke_empty_communities_from_db.sql
@@ -0,0 +1,29 @@
+/* Copyright 2019 Werner Sembach
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+-- Groups/communities now get deleted when the last member leaves. This is a one time cleanup to remove old groups/communities that were already empty before that change was made.
+DELETE FROM group_attestations_remote WHERE group_id IN (SELECT group_id FROM groups WHERE NOT EXISTS (SELECT group_id FROM group_users WHERE group_id = groups.group_id));
+DELETE FROM group_attestations_renewals WHERE group_id IN (SELECT group_id FROM groups WHERE NOT EXISTS (SELECT group_id FROM group_users WHERE group_id = groups.group_id));
+DELETE FROM group_invites WHERE group_id IN (SELECT group_id FROM groups WHERE NOT EXISTS (SELECT group_id FROM group_users WHERE group_id = groups.group_id));
+DELETE FROM group_roles WHERE group_id IN (SELECT group_id FROM groups WHERE NOT EXISTS (SELECT group_id FROM group_users WHERE group_id = groups.group_id));
+DELETE FROM group_room_categories WHERE group_id IN (SELECT group_id FROM groups WHERE NOT EXISTS (SELECT group_id FROM group_users WHERE group_id = groups.group_id));
+DELETE FROM group_rooms WHERE group_id IN (SELECT group_id FROM groups WHERE NOT EXISTS (SELECT group_id FROM group_users WHERE group_id = groups.group_id));
+DELETE FROM group_summary_roles WHERE group_id IN (SELECT group_id FROM groups WHERE NOT EXISTS (SELECT group_id FROM group_users WHERE group_id = groups.group_id));
+DELETE FROM group_summary_room_categories WHERE group_id IN (SELECT group_id FROM groups WHERE NOT EXISTS (SELECT group_id FROM group_users WHERE group_id = groups.group_id));
+DELETE FROM group_summary_rooms WHERE group_id IN (SELECT group_id FROM groups WHERE NOT EXISTS (SELECT group_id FROM group_users WHERE group_id = groups.group_id));
+DELETE FROM group_summary_users WHERE group_id IN (SELECT group_id FROM groups WHERE NOT EXISTS (SELECT group_id FROM group_users WHERE group_id = groups.group_id));
+DELETE FROM local_group_membership WHERE group_id IN (SELECT group_id FROM groups WHERE NOT EXISTS (SELECT group_id FROM group_users WHERE group_id = groups.group_id));
+DELETE FROM local_group_updates WHERE group_id IN (SELECT group_id FROM groups WHERE NOT EXISTS (SELECT group_id FROM group_users WHERE group_id = groups.group_id));
+DELETE FROM groups WHERE group_id IN (SELECT group_id FROM groups WHERE NOT EXISTS (SELECT group_id FROM group_users WHERE group_id = groups.group_id));
diff --git a/synapse/storage/databases/main/schema/delta/56/public_room_list_idx.sql b/synapse/storage/databases/main/schema/delta/56/public_room_list_idx.sql
new file mode 100644
index 00000000..7be31ffe
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/56/public_room_list_idx.sql
@@ -0,0 +1,16 @@
+/* Copyright 2019 The Matrix.org Foundation C.I.C.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+CREATE INDEX public_room_list_stream_network ON public_room_list_stream (appservice_id, network_id, room_id);
diff --git a/synapse/storage/databases/main/schema/delta/56/redaction_censor.sql b/synapse/storage/databases/main/schema/delta/56/redaction_censor.sql
new file mode 100644
index 00000000..ea95db0e
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/56/redaction_censor.sql
@@ -0,0 +1,16 @@
+/* Copyright 2019 The Matrix.org Foundation C.I.C.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ALTER TABLE redactions ADD COLUMN have_censored BOOL NOT NULL DEFAULT false;
diff --git a/synapse/storage/databases/main/schema/delta/56/redaction_censor2.sql b/synapse/storage/databases/main/schema/delta/56/redaction_censor2.sql
new file mode 100644
index 00000000..49ce35d7
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/56/redaction_censor2.sql
@@ -0,0 +1,22 @@
+/* Copyright 2019 The Matrix.org Foundation C.I.C.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ALTER TABLE redactions ADD COLUMN received_ts BIGINT;
+
+INSERT INTO background_updates (update_name, progress_json) VALUES
+ ('redactions_received_ts', '{}');
+
+INSERT INTO background_updates (update_name, progress_json) VALUES
+ ('redactions_have_censored_ts_idx', '{}');
diff --git a/synapse/storage/databases/main/schema/delta/56/redaction_censor3_fix_update.sql.postgres b/synapse/storage/databases/main/schema/delta/56/redaction_censor3_fix_update.sql.postgres
new file mode 100644
index 00000000..67471f3e
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/56/redaction_censor3_fix_update.sql.postgres
@@ -0,0 +1,25 @@
+/* Copyright 2019 The Matrix.org Foundation C.I.C.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+-- There was a bug where we may have updated censored redactions as bytes,
+-- which can (somehow) cause json to be inserted hex encoded. These updates go
+-- and undoes any such hex encoded JSON.
+
+INSERT into background_updates (update_name, progress_json)
+ VALUES ('event_fix_redactions_bytes_create_index', '{}');
+
+INSERT into background_updates (update_name, progress_json, depends_on)
+ VALUES ('event_fix_redactions_bytes', '{}', 'event_fix_redactions_bytes_create_index');
diff --git a/synapse/storage/databases/main/schema/delta/56/redaction_censor4.sql b/synapse/storage/databases/main/schema/delta/56/redaction_censor4.sql
new file mode 100644
index 00000000..b7550f6f
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/56/redaction_censor4.sql
@@ -0,0 +1,16 @@
+/* Copyright 2019 The Matrix.org Foundation C.I.C.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+DROP INDEX IF EXISTS redactions_have_censored;
diff --git a/synapse/storage/databases/main/schema/delta/56/remove_tombstoned_rooms_from_directory.sql b/synapse/storage/databases/main/schema/delta/56/remove_tombstoned_rooms_from_directory.sql
new file mode 100644
index 00000000..aeb17813
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/56/remove_tombstoned_rooms_from_directory.sql
@@ -0,0 +1,18 @@
+/* Copyright 2020 The Matrix.org Foundation C.I.C.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+-- Now that #6232 is a thing, we can remove old rooms from the directory.
+INSERT INTO background_updates (update_name, progress_json) VALUES
+ ('remove_tombstoned_rooms_from_directory', '{}');
diff --git a/synapse/storage/databases/main/schema/delta/56/room_key_etag.sql b/synapse/storage/databases/main/schema/delta/56/room_key_etag.sql
new file mode 100644
index 00000000..7d70dd07
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/56/room_key_etag.sql
@@ -0,0 +1,17 @@
+/* Copyright 2019 Matrix.org Foundation C.I.C.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+-- store the current etag of backup version
+ALTER TABLE e2e_room_keys_versions ADD COLUMN etag BIGINT;
diff --git a/synapse/storage/databases/main/schema/delta/56/room_membership_idx.sql b/synapse/storage/databases/main/schema/delta/56/room_membership_idx.sql
new file mode 100644
index 00000000..92ab1f5e
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/56/room_membership_idx.sql
@@ -0,0 +1,18 @@
+/* Copyright 2019 The Matrix.org Foundation C.I.C.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+-- Adds an index on room_memberships for fetching all forgotten rooms for a user
+INSERT INTO background_updates (update_name, progress_json) VALUES
+ ('room_membership_forgotten_idx', '{}');
diff --git a/synapse/storage/databases/main/schema/delta/56/room_retention.sql b/synapse/storage/databases/main/schema/delta/56/room_retention.sql
new file mode 100644
index 00000000..ee6cdf7a
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/56/room_retention.sql
@@ -0,0 +1,33 @@
+/* Copyright 2019 New Vector Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+-- Tracks the retention policy of a room.
+-- A NULL max_lifetime or min_lifetime means that the matching property is not defined in
+-- the room's retention policy state event.
+-- If a room doesn't have a retention policy state event in its state, both max_lifetime
+-- and min_lifetime are NULL.
+CREATE TABLE IF NOT EXISTS room_retention(
+ room_id TEXT,
+ event_id TEXT,
+ min_lifetime BIGINT,
+ max_lifetime BIGINT,
+
+ PRIMARY KEY(room_id, event_id)
+);
+
+CREATE INDEX room_retention_max_lifetime_idx on room_retention(max_lifetime);
+
+INSERT INTO background_updates (update_name, progress_json) VALUES
+ ('insert_room_retention', '{}');
diff --git a/synapse/storage/databases/main/schema/delta/56/signing_keys.sql b/synapse/storage/databases/main/schema/delta/56/signing_keys.sql
new file mode 100644
index 00000000..5c5fffca
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/56/signing_keys.sql
@@ -0,0 +1,56 @@
+/* Copyright 2019 New Vector Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+-- cross-signing keys
+CREATE TABLE IF NOT EXISTS e2e_cross_signing_keys (
+ user_id TEXT NOT NULL,
+ -- the type of cross-signing key (master, user_signing, or self_signing)
+ keytype TEXT NOT NULL,
+ -- the full key information, as a json-encoded dict
+ keydata TEXT NOT NULL,
+ -- for keeping the keys in order, so that we can fetch the latest one
+ stream_id BIGINT NOT NULL
+);
+
+CREATE UNIQUE INDEX e2e_cross_signing_keys_idx ON e2e_cross_signing_keys(user_id, keytype, stream_id);
+
+-- cross-signing signatures
+CREATE TABLE IF NOT EXISTS e2e_cross_signing_signatures (
+ -- user who did the signing
+ user_id TEXT NOT NULL,
+ -- key used to sign
+ key_id TEXT NOT NULL,
+ -- user who was signed
+ target_user_id TEXT NOT NULL,
+ -- device/key that was signed
+ target_device_id TEXT NOT NULL,
+ -- the actual signature
+ signature TEXT NOT NULL
+);
+
+-- replaced by the index created in signing_keys_nonunique_signatures.sql
+-- CREATE UNIQUE INDEX e2e_cross_signing_signatures_idx ON e2e_cross_signing_signatures(user_id, target_user_id, target_device_id);
+
+-- stream of user signature updates
+CREATE TABLE IF NOT EXISTS user_signature_stream (
+ -- uses the same stream ID as device list stream
+ stream_id BIGINT NOT NULL,
+ -- user who did the signing
+ from_user_id TEXT NOT NULL,
+ -- list of users who were signed, as a JSON array
+ user_ids TEXT NOT NULL
+);
+
+CREATE UNIQUE INDEX user_signature_stream_idx ON user_signature_stream(stream_id);
diff --git a/synapse/storage/databases/main/schema/delta/56/signing_keys_nonunique_signatures.sql b/synapse/storage/databases/main/schema/delta/56/signing_keys_nonunique_signatures.sql
new file mode 100644
index 00000000..0aa90ebf
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/56/signing_keys_nonunique_signatures.sql
@@ -0,0 +1,22 @@
+/* Copyright 2019 The Matrix.org Foundation C.I.C.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* The cross-signing signatures index should not be a unique index, because a
+ * user may upload multiple signatures for the same target user. The previous
+ * index was unique, so delete it if it's there and create a new non-unique
+ * index. */
+
+DROP INDEX IF EXISTS e2e_cross_signing_signatures_idx; CREATE INDEX IF NOT
+EXISTS e2e_cross_signing_signatures2_idx ON e2e_cross_signing_signatures(user_id, target_user_id, target_device_id);
diff --git a/synapse/storage/databases/main/schema/delta/56/stats_separated.sql b/synapse/storage/databases/main/schema/delta/56/stats_separated.sql
new file mode 100644
index 00000000..bbdde121
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/56/stats_separated.sql
@@ -0,0 +1,156 @@
+/* Copyright 2018 New Vector Ltd
+ * Copyright 2019 The Matrix.org Foundation C.I.C.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+----- First clean up from previous versions of room stats.
+
+-- First remove old stats stuff
+DROP TABLE IF EXISTS room_stats;
+DROP TABLE IF EXISTS room_state;
+DROP TABLE IF EXISTS room_stats_state;
+DROP TABLE IF EXISTS user_stats;
+DROP TABLE IF EXISTS room_stats_earliest_tokens;
+DROP TABLE IF EXISTS _temp_populate_stats_position;
+DROP TABLE IF EXISTS _temp_populate_stats_rooms;
+DROP TABLE IF EXISTS stats_stream_pos;
+
+-- Unschedule old background updates if they're still scheduled
+DELETE FROM background_updates WHERE update_name IN (
+ 'populate_stats_createtables',
+ 'populate_stats_process_rooms',
+ 'populate_stats_process_users',
+ 'populate_stats_cleanup'
+);
+
+-- this relies on current_state_events.membership having been populated, so add
+-- a dependency on current_state_events_membership.
+INSERT INTO background_updates (update_name, progress_json, depends_on) VALUES
+ ('populate_stats_process_rooms', '{}', 'current_state_events_membership');
+
+-- this also relies on current_state_events.membership having been populated, but
+-- we get that as a side-effect of depending on populate_stats_process_rooms.
+INSERT INTO background_updates (update_name, progress_json, depends_on) VALUES
+ ('populate_stats_process_users', '{}', 'populate_stats_process_rooms');
+
+----- Create tables for our version of room stats.
+
+-- single-row table to track position of incremental updates
+DROP TABLE IF EXISTS stats_incremental_position;
+CREATE TABLE stats_incremental_position (
+ Lock CHAR(1) NOT NULL DEFAULT 'X' UNIQUE, -- Makes sure this table only has one row.
+ stream_id BIGINT NOT NULL,
+ CHECK (Lock='X')
+);
+
+-- insert a null row and make sure it is the only one.
+INSERT INTO stats_incremental_position (
+ stream_id
+) SELECT COALESCE(MAX(stream_ordering), 0) from events;
+
+-- represents PRESENT room statistics for a room
+-- only holds absolute fields
+DROP TABLE IF EXISTS room_stats_current;
+CREATE TABLE room_stats_current (
+ room_id TEXT NOT NULL PRIMARY KEY,
+
+ -- These are absolute counts
+ current_state_events INT NOT NULL,
+ joined_members INT NOT NULL,
+ invited_members INT NOT NULL,
+ left_members INT NOT NULL,
+ banned_members INT NOT NULL,
+
+ local_users_in_room INT NOT NULL,
+
+ -- The maximum delta stream position that this row takes into account.
+ completed_delta_stream_id BIGINT NOT NULL
+);
+
+
+-- represents HISTORICAL room statistics for a room
+DROP TABLE IF EXISTS room_stats_historical;
+CREATE TABLE room_stats_historical (
+ room_id TEXT NOT NULL,
+ -- These stats cover the time from (end_ts - bucket_size)...end_ts (in ms).
+ -- Note that end_ts is quantised.
+ end_ts BIGINT NOT NULL,
+ bucket_size BIGINT NOT NULL,
+
+ -- These stats are absolute counts
+ current_state_events BIGINT NOT NULL,
+ joined_members BIGINT NOT NULL,
+ invited_members BIGINT NOT NULL,
+ left_members BIGINT NOT NULL,
+ banned_members BIGINT NOT NULL,
+ local_users_in_room BIGINT NOT NULL,
+
+ -- These stats are per time slice
+ total_events BIGINT NOT NULL,
+ total_event_bytes BIGINT NOT NULL,
+
+ PRIMARY KEY (room_id, end_ts)
+);
+
+-- We use this index to speed up deletion of ancient room stats.
+CREATE INDEX room_stats_historical_end_ts ON room_stats_historical (end_ts);
+
+-- represents PRESENT statistics for a user
+-- only holds absolute fields
+DROP TABLE IF EXISTS user_stats_current;
+CREATE TABLE user_stats_current (
+ user_id TEXT NOT NULL PRIMARY KEY,
+
+ joined_rooms BIGINT NOT NULL,
+
+ -- The maximum delta stream position that this row takes into account.
+ completed_delta_stream_id BIGINT NOT NULL
+);
+
+-- represents HISTORICAL statistics for a user
+DROP TABLE IF EXISTS user_stats_historical;
+CREATE TABLE user_stats_historical (
+ user_id TEXT NOT NULL,
+ end_ts BIGINT NOT NULL,
+ bucket_size BIGINT NOT NULL,
+
+ joined_rooms BIGINT NOT NULL,
+
+ invites_sent BIGINT NOT NULL,
+ rooms_created BIGINT NOT NULL,
+ total_events BIGINT NOT NULL,
+ total_event_bytes BIGINT NOT NULL,
+
+ PRIMARY KEY (user_id, end_ts)
+);
+
+-- We use this index to speed up deletion of ancient user stats.
+CREATE INDEX user_stats_historical_end_ts ON user_stats_historical (end_ts);
+
+
+CREATE TABLE room_stats_state (
+ room_id TEXT NOT NULL,
+ name TEXT,
+ canonical_alias TEXT,
+ join_rules TEXT,
+ history_visibility TEXT,
+ encryption TEXT,
+ avatar TEXT,
+ guest_access TEXT,
+ is_federatable BOOLEAN,
+ topic TEXT
+);
+
+CREATE UNIQUE INDEX room_stats_state_room ON room_stats_state(room_id);
diff --git a/synapse/storage/databases/main/schema/delta/56/unique_user_filter_index.py b/synapse/storage/databases/main/schema/delta/56/unique_user_filter_index.py
new file mode 100644
index 00000000..1de8b549
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/56/unique_user_filter_index.py
@@ -0,0 +1,52 @@
+import logging
+
+from synapse.storage.engines import PostgresEngine
+
+logger = logging.getLogger(__name__)
+
+
+"""
+This migration updates the user_filters table as follows:
+
+ - drops any (user_id, filter_id) duplicates
+ - makes the columns NON-NULLable
+ - turns the index into a UNIQUE index
+"""
+
+
+def run_upgrade(cur, database_engine, *args, **kwargs):
+ pass
+
+
+def run_create(cur, database_engine, *args, **kwargs):
+ if isinstance(database_engine, PostgresEngine):
+ select_clause = """
+ SELECT DISTINCT ON (user_id, filter_id) user_id, filter_id, filter_json
+ FROM user_filters
+ """
+ else:
+ select_clause = """
+ SELECT * FROM user_filters GROUP BY user_id, filter_id
+ """
+ sql = """
+ DROP TABLE IF EXISTS user_filters_migration;
+ DROP INDEX IF EXISTS user_filters_unique;
+ CREATE TABLE user_filters_migration (
+ user_id TEXT NOT NULL,
+ filter_id BIGINT NOT NULL,
+ filter_json BYTEA NOT NULL
+ );
+ INSERT INTO user_filters_migration (user_id, filter_id, filter_json)
+ %s;
+ CREATE UNIQUE INDEX user_filters_unique ON user_filters_migration
+ (user_id, filter_id);
+ DROP TABLE user_filters;
+ ALTER TABLE user_filters_migration RENAME TO user_filters;
+ """ % (
+ select_clause,
+ )
+
+ if isinstance(database_engine, PostgresEngine):
+ cur.execute(sql)
+ else:
+ cur.executescript(sql)
diff --git a/synapse/storage/databases/main/schema/delta/56/user_external_ids.sql b/synapse/storage/databases/main/schema/delta/56/user_external_ids.sql
new file mode 100644
index 00000000..91390c45
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/56/user_external_ids.sql
@@ -0,0 +1,24 @@
+/* Copyright 2019 The Matrix.org Foundation C.I.C.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * a table which records mappings from external auth providers to mxids
+ */
+CREATE TABLE IF NOT EXISTS user_external_ids (
+ auth_provider TEXT NOT NULL,
+ external_id TEXT NOT NULL,
+ user_id TEXT NOT NULL,
+ UNIQUE (auth_provider, external_id)
+);
diff --git a/synapse/storage/databases/main/schema/delta/56/users_in_public_rooms_idx.sql b/synapse/storage/databases/main/schema/delta/56/users_in_public_rooms_idx.sql
new file mode 100644
index 00000000..149f8be8
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/56/users_in_public_rooms_idx.sql
@@ -0,0 +1,17 @@
+/* Copyright 2019 Matrix.org Foundation CIC
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+-- this was apparently forgotten when the table was created back in delta 53.
+CREATE INDEX users_in_public_rooms_r_idx ON users_in_public_rooms(room_id);
diff --git a/synapse/storage/databases/main/schema/delta/57/delete_old_current_state_events.sql b/synapse/storage/databases/main/schema/delta/57/delete_old_current_state_events.sql
new file mode 100644
index 00000000..aec06c82
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/57/delete_old_current_state_events.sql
@@ -0,0 +1,22 @@
+/* Copyright 2020 The Matrix.org Foundation C.I.C
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+-- Add background update to go and delete current state events for rooms the
+-- server is no longer in.
+--
+-- this relies on the 'membership' column of current_state_events, so make sure
+-- that's populated first!
+INSERT into background_updates (update_name, progress_json, depends_on)
+ VALUES ('delete_old_current_state_events', '{}', 'current_state_events_membership');
diff --git a/synapse/storage/databases/main/schema/delta/57/device_list_remote_cache_stale.sql b/synapse/storage/databases/main/schema/delta/57/device_list_remote_cache_stale.sql
new file mode 100644
index 00000000..c3b6de20
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/57/device_list_remote_cache_stale.sql
@@ -0,0 +1,25 @@
+/* Copyright 2020 The Matrix.org Foundation C.I.C
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+-- Records whether the server thinks that the remote users cached device lists
+-- may be out of date (e.g. if we have received a to device message from a
+-- device we don't know about).
+CREATE TABLE IF NOT EXISTS device_lists_remote_resync (
+ user_id TEXT NOT NULL,
+ added_ts BIGINT NOT NULL
+);
+
+CREATE UNIQUE INDEX device_lists_remote_resync_idx ON device_lists_remote_resync (user_id);
+CREATE INDEX device_lists_remote_resync_ts_idx ON device_lists_remote_resync (added_ts);
diff --git a/synapse/storage/databases/main/schema/delta/57/local_current_membership.py b/synapse/storage/databases/main/schema/delta/57/local_current_membership.py
new file mode 100644
index 00000000..63b5acdc
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/57/local_current_membership.py
@@ -0,0 +1,98 @@
+# -*- coding: utf-8 -*-
+# Copyright 2020 New Vector Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+# We create a new table called `local_current_membership` that stores the latest
+# membership state of local users in rooms, which helps track leaves/bans/etc
+# even if the server has left the room (and so has deleted the room from
+# `current_state_events`). This will also include outstanding invites for local
+# users for rooms the server isn't in.
+#
+# If the server isn't and hasn't been in the room then it will only include
+# outsstanding invites, and not e.g. pre-emptive bans of local users.
+#
+# If the server later rejoins a room `local_current_membership` can simply be
+# replaced with the new current state of the room (which results in the
+# equivalent behaviour as if the server had remained in the room).
+
+
+def run_upgrade(cur, database_engine, config, *args, **kwargs):
+ # We need to do the insert in `run_upgrade` section as we don't have access
+ # to `config` in `run_create`.
+
+ # This upgrade may take a bit of time for large servers (e.g. one minute for
+ # matrix.org) but means we avoid a lots of book keeping required to do it as
+ # a background update.
+
+ # We check if the `current_state_events.membership` is up to date by
+ # checking if the relevant background update has finished. If it has
+ # finished we can avoid doing a join against `room_memberships`, which
+ # speesd things up.
+ cur.execute(
+ """SELECT 1 FROM background_updates
+ WHERE update_name = 'current_state_events_membership'
+ """
+ )
+ current_state_membership_up_to_date = not bool(cur.fetchone())
+
+ # Cheekily drop and recreate indices, as that is faster.
+ cur.execute("DROP INDEX local_current_membership_idx")
+ cur.execute("DROP INDEX local_current_membership_room_idx")
+
+ if current_state_membership_up_to_date:
+ sql = """
+ INSERT INTO local_current_membership (room_id, user_id, event_id, membership)
+ SELECT c.room_id, state_key AS user_id, event_id, c.membership
+ FROM current_state_events AS c
+ WHERE type = 'm.room.member' AND c.membership IS NOT NULL AND state_key LIKE ?
+ """
+ else:
+ # We can't rely on the membership column, so we need to join against
+ # `room_memberships`.
+ sql = """
+ INSERT INTO local_current_membership (room_id, user_id, event_id, membership)
+ SELECT c.room_id, state_key AS user_id, event_id, r.membership
+ FROM current_state_events AS c
+ INNER JOIN room_memberships AS r USING (event_id)
+ WHERE type = 'm.room.member' AND state_key LIKE ?
+ """
+ sql = database_engine.convert_param_style(sql)
+ cur.execute(sql, ("%:" + config.server_name,))
+
+ cur.execute(
+ "CREATE UNIQUE INDEX local_current_membership_idx ON local_current_membership(user_id, room_id)"
+ )
+ cur.execute(
+ "CREATE INDEX local_current_membership_room_idx ON local_current_membership(room_id)"
+ )
+
+
+def run_create(cur, database_engine, *args, **kwargs):
+ cur.execute(
+ """
+ CREATE TABLE local_current_membership (
+ room_id TEXT NOT NULL,
+ user_id TEXT NOT NULL,
+ event_id TEXT NOT NULL,
+ membership TEXT NOT NULL
+ )"""
+ )
+
+ cur.execute(
+ "CREATE UNIQUE INDEX local_current_membership_idx ON local_current_membership(user_id, room_id)"
+ )
+ cur.execute(
+ "CREATE INDEX local_current_membership_room_idx ON local_current_membership(room_id)"
+ )
diff --git a/synapse/storage/databases/main/schema/delta/57/remove_sent_outbound_pokes.sql b/synapse/storage/databases/main/schema/delta/57/remove_sent_outbound_pokes.sql
new file mode 100644
index 00000000..133d80af
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/57/remove_sent_outbound_pokes.sql
@@ -0,0 +1,21 @@
+/* Copyright 2020 The Matrix.org Foundation C.I.C
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+-- we no longer keep sent outbound device pokes in the db; clear them out
+-- so that we don't have to worry about them.
+--
+-- This is a sequence scan, but it doesn't take too long.
+
+DELETE FROM device_lists_outbound_pokes WHERE sent;
diff --git a/synapse/storage/databases/main/schema/delta/57/rooms_version_column.sql b/synapse/storage/databases/main/schema/delta/57/rooms_version_column.sql
new file mode 100644
index 00000000..352a66f5
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/57/rooms_version_column.sql
@@ -0,0 +1,24 @@
+/* Copyright 2020 The Matrix.org Foundation C.I.C
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+-- We want to start storing the room version independently of
+-- `current_state_events` so that we can delete stale entries from it without
+-- losing the information.
+ALTER TABLE rooms ADD COLUMN room_version TEXT;
+
+
+INSERT into background_updates (update_name, progress_json)
+ VALUES ('add_rooms_room_version_column', '{}');
diff --git a/synapse/storage/databases/main/schema/delta/57/rooms_version_column_2.sql.postgres b/synapse/storage/databases/main/schema/delta/57/rooms_version_column_2.sql.postgres
new file mode 100644
index 00000000..c601cff6
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/57/rooms_version_column_2.sql.postgres
@@ -0,0 +1,35 @@
+/* Copyright 2020 The Matrix.org Foundation C.I.C.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+-- when we first added the room_version column, it was populated via a background
+-- update. We now need it to be populated before synapse starts, so we populate
+-- any remaining rows with a NULL room version now. For servers which have completed
+-- the background update, this will be pretty quick.
+
+-- the following query will set room_version to NULL if no create event is found for
+-- the room in current_state_events, and will set it to '1' if a create event with no
+-- room_version is found.
+
+UPDATE rooms SET room_version=(
+ SELECT COALESCE(json::json->'content'->>'room_version','1')
+ FROM current_state_events cse INNER JOIN event_json ej USING (event_id)
+ WHERE cse.room_id=rooms.room_id AND cse.type='m.room.create' AND cse.state_key=''
+) WHERE rooms.room_version IS NULL;
+
+-- we still allow the background update to complete: it has the useful side-effect of
+-- populating `rooms` with any missing rooms (based on the current_state_events table).
+
+-- see also rooms_version_column_2.sql.sqlite which has a copy of the above query, using
+-- sqlite syntax for the json extraction.
diff --git a/synapse/storage/databases/main/schema/delta/57/rooms_version_column_2.sql.sqlite b/synapse/storage/databases/main/schema/delta/57/rooms_version_column_2.sql.sqlite
new file mode 100644
index 00000000..335c6f20
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/57/rooms_version_column_2.sql.sqlite
@@ -0,0 +1,22 @@
+/* Copyright 2020 The Matrix.org Foundation C.I.C.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+-- see rooms_version_column_2.sql.postgres for details of what's going on here.
+
+UPDATE rooms SET room_version=(
+ SELECT COALESCE(json_extract(ej.json, '$.content.room_version'), '1')
+ FROM current_state_events cse INNER JOIN event_json ej USING (event_id)
+ WHERE cse.room_id=rooms.room_id AND cse.type='m.room.create' AND cse.state_key=''
+) WHERE rooms.room_version IS NULL;
diff --git a/synapse/storage/databases/main/schema/delta/57/rooms_version_column_3.sql.postgres b/synapse/storage/databases/main/schema/delta/57/rooms_version_column_3.sql.postgres
new file mode 100644
index 00000000..92aaadde
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/57/rooms_version_column_3.sql.postgres
@@ -0,0 +1,39 @@
+/* Copyright 2020 The Matrix.org Foundation C.I.C.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+-- When we first added the room_version column to the rooms table, it was populated from
+-- the current_state_events table. However, there was an issue causing a background
+-- update to clean up the current_state_events table for rooms where the server is no
+-- longer participating, before that column could be populated. Therefore, some rooms had
+-- a NULL room_version.
+
+-- The rooms_version_column_2.sql.* delta files were introduced to make the populating
+-- synchronous instead of running it in a background update, which fixed this issue.
+-- However, all of the instances of Synapse installed or updated in the meantime got
+-- their rooms table corrupted with NULL room_versions.
+
+-- This query fishes out the room versions from the create event using the state_events
+-- table instead of the current_state_events one, as the former still have all of the
+-- create events.
+
+UPDATE rooms SET room_version=(
+ SELECT COALESCE(json::json->'content'->>'room_version','1')
+ FROM state_events se INNER JOIN event_json ej USING (event_id)
+ WHERE se.room_id=rooms.room_id AND se.type='m.room.create' AND se.state_key=''
+ LIMIT 1
+) WHERE rooms.room_version IS NULL;
+
+-- see also rooms_version_column_3.sql.sqlite which has a copy of the above query, using
+-- sqlite syntax for the json extraction.
diff --git a/synapse/storage/databases/main/schema/delta/57/rooms_version_column_3.sql.sqlite b/synapse/storage/databases/main/schema/delta/57/rooms_version_column_3.sql.sqlite
new file mode 100644
index 00000000..e19dab97
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/57/rooms_version_column_3.sql.sqlite
@@ -0,0 +1,23 @@
+/* Copyright 2020 The Matrix.org Foundation C.I.C.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+-- see rooms_version_column_3.sql.postgres for details of what's going on here.
+
+UPDATE rooms SET room_version=(
+ SELECT COALESCE(json_extract(ej.json, '$.content.room_version'), '1')
+ FROM state_events se INNER JOIN event_json ej USING (event_id)
+ WHERE se.room_id=rooms.room_id AND se.type='m.room.create' AND se.state_key=''
+ LIMIT 1
+) WHERE rooms.room_version IS NULL;
diff --git a/synapse/storage/databases/main/schema/delta/58/02remove_dup_outbound_pokes.sql b/synapse/storage/databases/main/schema/delta/58/02remove_dup_outbound_pokes.sql
new file mode 100644
index 00000000..fdc39e9b
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/58/02remove_dup_outbound_pokes.sql
@@ -0,0 +1,22 @@
+/* Copyright 2020 The Matrix.org Foundation C.I.C
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ /* for some reason, we have accumulated duplicate entries in
+ * device_lists_outbound_pokes, which makes prune_outbound_device_list_pokes less
+ * efficient.
+ */
+
+INSERT INTO background_updates (ordering, update_name, progress_json)
+ VALUES (5800, 'remove_dup_outbound_pokes', '{}');
diff --git a/synapse/storage/databases/main/schema/delta/58/03persist_ui_auth.sql b/synapse/storage/databases/main/schema/delta/58/03persist_ui_auth.sql
new file mode 100644
index 00000000..dcb593fc
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/58/03persist_ui_auth.sql
@@ -0,0 +1,36 @@
+/* Copyright 2020 The Matrix.org Foundation C.I.C
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+CREATE TABLE IF NOT EXISTS ui_auth_sessions(
+ session_id TEXT NOT NULL, -- The session ID passed to the client.
+ creation_time BIGINT NOT NULL, -- The time this session was created (epoch time in milliseconds).
+ serverdict TEXT NOT NULL, -- A JSON dictionary of arbitrary data added by Synapse.
+ clientdict TEXT NOT NULL, -- A JSON dictionary of arbitrary data from the client.
+ uri TEXT NOT NULL, -- The URI the UI authentication session is using.
+ method TEXT NOT NULL, -- The HTTP method the UI authentication session is using.
+ -- The clientdict, uri, and method make up an tuple that must be immutable
+ -- throughout the lifetime of the UI Auth session.
+ description TEXT NOT NULL, -- A human readable description of the operation which caused the UI Auth flow to occur.
+ UNIQUE (session_id)
+);
+
+CREATE TABLE IF NOT EXISTS ui_auth_sessions_credentials(
+ session_id TEXT NOT NULL, -- The corresponding UI Auth session.
+ stage_type TEXT NOT NULL, -- The stage type.
+ result TEXT NOT NULL, -- The result of the stage verification, stored as JSON.
+ UNIQUE (session_id, stage_type),
+ FOREIGN KEY (session_id)
+ REFERENCES ui_auth_sessions (session_id)
+);
diff --git a/synapse/storage/databases/main/schema/delta/58/05cache_instance.sql.postgres b/synapse/storage/databases/main/schema/delta/58/05cache_instance.sql.postgres
new file mode 100644
index 00000000..aa46eb0e
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/58/05cache_instance.sql.postgres
@@ -0,0 +1,30 @@
+/* Copyright 2020 The Matrix.org Foundation C.I.C
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+-- We keep the old table here to enable us to roll back. It doesn't matter
+-- that we have dropped all the data here.
+TRUNCATE cache_invalidation_stream;
+
+CREATE TABLE cache_invalidation_stream_by_instance (
+ stream_id BIGINT NOT NULL,
+ instance_name TEXT NOT NULL,
+ cache_func TEXT NOT NULL,
+ keys TEXT[],
+ invalidation_ts BIGINT
+);
+
+CREATE UNIQUE INDEX cache_invalidation_stream_by_instance_id ON cache_invalidation_stream_by_instance(stream_id);
+
+CREATE SEQUENCE cache_invalidation_stream_seq;
diff --git a/synapse/storage/databases/main/schema/delta/58/06dlols_unique_idx.py b/synapse/storage/databases/main/schema/delta/58/06dlols_unique_idx.py
new file mode 100644
index 00000000..d353f2bc
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/58/06dlols_unique_idx.py
@@ -0,0 +1,80 @@
+# Copyright 2020 The Matrix.org Foundation C.I.C.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+This migration rebuilds the device_lists_outbound_last_success table without duplicate
+entries, and with a UNIQUE index.
+"""
+
+import logging
+from io import StringIO
+
+from synapse.storage.engines import BaseDatabaseEngine, PostgresEngine
+from synapse.storage.prepare_database import execute_statements_from_stream
+from synapse.storage.types import Cursor
+
+logger = logging.getLogger(__name__)
+
+
+def run_upgrade(*args, **kwargs):
+ pass
+
+
+def run_create(cur: Cursor, database_engine: BaseDatabaseEngine, *args, **kwargs):
+ # some instances might already have this index, in which case we can skip this
+ if isinstance(database_engine, PostgresEngine):
+ cur.execute(
+ """
+ SELECT 1 FROM pg_class WHERE relkind = 'i'
+ AND relname = 'device_lists_outbound_last_success_unique_idx'
+ """
+ )
+
+ if cur.rowcount:
+ logger.info(
+ "Unique index exists on device_lists_outbound_last_success: "
+ "skipping rebuild"
+ )
+ return
+
+ logger.info("Rebuilding device_lists_outbound_last_success with unique index")
+ execute_statements_from_stream(cur, StringIO(_rebuild_commands))
+
+
+# there might be duplicates, so the easiest way to achieve this is to create a new
+# table with the right data, and renaming it into place
+
+_rebuild_commands = """
+DROP TABLE IF EXISTS device_lists_outbound_last_success_new;
+
+CREATE TABLE device_lists_outbound_last_success_new (
+ destination TEXT NOT NULL,
+ user_id TEXT NOT NULL,
+ stream_id BIGINT NOT NULL
+);
+
+-- this took about 30 seconds on matrix.org's 16 million rows.
+INSERT INTO device_lists_outbound_last_success_new
+ SELECT destination, user_id, MAX(stream_id) FROM device_lists_outbound_last_success
+ GROUP BY destination, user_id;
+
+-- and this another 30 seconds.
+CREATE UNIQUE INDEX device_lists_outbound_last_success_unique_idx
+ ON device_lists_outbound_last_success_new (destination, user_id);
+
+DROP TABLE device_lists_outbound_last_success;
+
+ALTER TABLE device_lists_outbound_last_success_new
+ RENAME TO device_lists_outbound_last_success;
+"""
diff --git a/synapse/storage/databases/main/schema/delta/58/08_media_safe_from_quarantine.sql.postgres b/synapse/storage/databases/main/schema/delta/58/08_media_safe_from_quarantine.sql.postgres
new file mode 100644
index 00000000..597f2ffd
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/58/08_media_safe_from_quarantine.sql.postgres
@@ -0,0 +1,18 @@
+/* Copyright 2020 The Matrix.org Foundation C.I.C
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+-- The local_media_repository should have files which do not get quarantined,
+-- e.g. files from sticker packs.
+ALTER TABLE local_media_repository ADD COLUMN safe_from_quarantine BOOLEAN NOT NULL DEFAULT FALSE;
diff --git a/synapse/storage/databases/main/schema/delta/58/08_media_safe_from_quarantine.sql.sqlite b/synapse/storage/databases/main/schema/delta/58/08_media_safe_from_quarantine.sql.sqlite
new file mode 100644
index 00000000..69db89ac
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/58/08_media_safe_from_quarantine.sql.sqlite
@@ -0,0 +1,18 @@
+/* Copyright 2020 The Matrix.org Foundation C.I.C
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+-- The local_media_repository should have files which do not get quarantined,
+-- e.g. files from sticker packs.
+ALTER TABLE local_media_repository ADD COLUMN safe_from_quarantine BOOLEAN NOT NULL DEFAULT 0;
diff --git a/synapse/storage/databases/main/schema/delta/58/10drop_local_rejections_stream.sql b/synapse/storage/databases/main/schema/delta/58/10drop_local_rejections_stream.sql
new file mode 100644
index 00000000..eb57203e
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/58/10drop_local_rejections_stream.sql
@@ -0,0 +1,22 @@
+/* Copyright 2020 The Matrix.org Foundation C.I.C
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+The version of synapse 1.16.0 on pypi incorrectly contained a migration which
+added a table called 'local_rejections_stream'. This table is not used, and
+we drop it here for anyone who was affected.
+*/
+
+DROP TABLE IF EXISTS local_rejections_stream;
diff --git a/synapse/storage/databases/main/schema/delta/58/10federation_pos_instance_name.sql b/synapse/storage/databases/main/schema/delta/58/10federation_pos_instance_name.sql
new file mode 100644
index 00000000..1cc2633a
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/58/10federation_pos_instance_name.sql
@@ -0,0 +1,22 @@
+/* Copyright 2020 The Matrix.org Foundation C.I.C
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+-- We need to store the stream positions by instance in a sharded config world.
+--
+-- We default to master as we want the column to be NOT NULL and we correctly
+-- reset the instance name to match the config each time we start up.
+ALTER TABLE federation_stream_position ADD COLUMN instance_name TEXT NOT NULL DEFAULT 'master';
+
+CREATE UNIQUE INDEX federation_stream_position_instance ON federation_stream_position(type, instance_name);
diff --git a/synapse/storage/databases/main/schema/delta/58/11user_id_seq.py b/synapse/storage/databases/main/schema/delta/58/11user_id_seq.py
new file mode 100644
index 00000000..4310ec12
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/58/11user_id_seq.py
@@ -0,0 +1,34 @@
+# Copyright 2020 The Matrix.org Foundation C.I.C.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+Adds a postgres SEQUENCE for generating guest user IDs.
+"""
+
+from synapse.storage.databases.main.registration import (
+ find_max_generated_user_id_localpart,
+)
+from synapse.storage.engines import PostgresEngine
+
+
+def run_create(cur, database_engine, *args, **kwargs):
+ if not isinstance(database_engine, PostgresEngine):
+ return
+
+ next_id = find_max_generated_user_id_localpart(cur) + 1
+ cur.execute("CREATE SEQUENCE user_id_seq START WITH %s", (next_id,))
+
+
+def run_upgrade(*args, **kwargs):
+ pass
diff --git a/synapse/storage/databases/main/schema/delta/58/12room_stats.sql b/synapse/storage/databases/main/schema/delta/58/12room_stats.sql
new file mode 100644
index 00000000..cade5dcc
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/58/12room_stats.sql
@@ -0,0 +1,32 @@
+/* Copyright 2020 The Matrix.org Foundation C.I.C.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+-- Recalculate the stats for all rooms after the fix to joined_members erroneously
+-- incrementing on per-room profile changes.
+
+-- Note that the populate_stats_process_rooms background update is already set to
+-- run if you're upgrading from Synapse <1.0.0.
+
+-- Additionally, if you've upgraded to v1.18.0 (which doesn't include this fix),
+-- this bg job runs, and then update to v1.19.0, you'd end up with only half of
+-- your rooms having room stats recalculated after this fix was in place.
+
+-- So we've switched the old `populate_stats_process_rooms` background job to a
+-- no-op, and then kick off a bg job with a new name, but with the same
+-- functionality as the old one. This effectively restarts the background job
+-- from the beginning, without running it twice in a row, supporting both
+-- upgrade usecases.
+INSERT INTO background_updates (update_name, progress_json) VALUES
+ ('populate_stats_process_rooms_2', '{}');
diff --git a/synapse/storage/databases/main/schema/full_schemas/16/application_services.sql b/synapse/storage/databases/main/schema/full_schemas/16/application_services.sql
new file mode 100644
index 00000000..883fcd10
--- /dev/null
+++ b/synapse/storage/databases/main/schema/full_schemas/16/application_services.sql
@@ -0,0 +1,37 @@
+/* Copyright 2015, 2016 OpenMarket Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* We used to create tables called application_services and
+ * application_services_regex, but these are no longer used and are removed in
+ * delta 54.
+ */
+
+
+CREATE TABLE IF NOT EXISTS application_services_state(
+ as_id TEXT PRIMARY KEY,
+ state VARCHAR(5),
+ last_txn INTEGER
+);
+
+CREATE TABLE IF NOT EXISTS application_services_txns(
+ as_id TEXT NOT NULL,
+ txn_id INTEGER NOT NULL,
+ event_ids TEXT NOT NULL,
+ UNIQUE(as_id, txn_id)
+);
+
+CREATE INDEX application_services_txns_id ON application_services_txns (
+ as_id
+);
diff --git a/synapse/storage/databases/main/schema/full_schemas/16/event_edges.sql b/synapse/storage/databases/main/schema/full_schemas/16/event_edges.sql
new file mode 100644
index 00000000..10ce2aa7
--- /dev/null
+++ b/synapse/storage/databases/main/schema/full_schemas/16/event_edges.sql
@@ -0,0 +1,70 @@
+/* Copyright 2014-2016 OpenMarket Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* We used to create tables called event_destinations and
+ * state_forward_extremities, but these are no longer used and are removed in
+ * delta 54.
+ */
+
+CREATE TABLE IF NOT EXISTS event_forward_extremities(
+ event_id TEXT NOT NULL,
+ room_id TEXT NOT NULL,
+ UNIQUE (event_id, room_id)
+);
+
+CREATE INDEX ev_extrem_room ON event_forward_extremities(room_id);
+CREATE INDEX ev_extrem_id ON event_forward_extremities(event_id);
+
+
+CREATE TABLE IF NOT EXISTS event_backward_extremities(
+ event_id TEXT NOT NULL,
+ room_id TEXT NOT NULL,
+ UNIQUE (event_id, room_id)
+);
+
+CREATE INDEX ev_b_extrem_room ON event_backward_extremities(room_id);
+CREATE INDEX ev_b_extrem_id ON event_backward_extremities(event_id);
+
+
+CREATE TABLE IF NOT EXISTS event_edges(
+ event_id TEXT NOT NULL,
+ prev_event_id TEXT NOT NULL,
+ room_id TEXT NOT NULL,
+ is_state BOOL NOT NULL, -- true if this is a prev_state edge rather than a regular
+ -- event dag edge.
+ UNIQUE (event_id, prev_event_id, room_id, is_state)
+);
+
+CREATE INDEX ev_edges_id ON event_edges(event_id);
+CREATE INDEX ev_edges_prev_id ON event_edges(prev_event_id);
+
+
+CREATE TABLE IF NOT EXISTS room_depth(
+ room_id TEXT NOT NULL,
+ min_depth INTEGER NOT NULL,
+ UNIQUE (room_id)
+);
+
+CREATE INDEX room_depth_room ON room_depth(room_id);
+
+CREATE TABLE IF NOT EXISTS event_auth(
+ event_id TEXT NOT NULL,
+ auth_id TEXT NOT NULL,
+ room_id TEXT NOT NULL,
+ UNIQUE (event_id, auth_id, room_id)
+);
+
+CREATE INDEX evauth_edges_id ON event_auth(event_id);
+CREATE INDEX evauth_edges_auth_id ON event_auth(auth_id);
diff --git a/synapse/storage/databases/main/schema/full_schemas/16/event_signatures.sql b/synapse/storage/databases/main/schema/full_schemas/16/event_signatures.sql
new file mode 100644
index 00000000..95826da4
--- /dev/null
+++ b/synapse/storage/databases/main/schema/full_schemas/16/event_signatures.sql
@@ -0,0 +1,38 @@
+/* Copyright 2014-2016 OpenMarket Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ /* We used to create tables called event_content_hashes and event_edge_hashes,
+ * but these are no longer used and are removed in delta 54.
+ */
+
+CREATE TABLE IF NOT EXISTS event_reference_hashes (
+ event_id TEXT,
+ algorithm TEXT,
+ hash bytea,
+ UNIQUE (event_id, algorithm)
+);
+
+CREATE INDEX event_reference_hashes_id ON event_reference_hashes(event_id);
+
+
+CREATE TABLE IF NOT EXISTS event_signatures (
+ event_id TEXT,
+ signature_name TEXT,
+ key_id TEXT,
+ signature bytea,
+ UNIQUE (event_id, signature_name, key_id)
+);
+
+CREATE INDEX event_signatures_id ON event_signatures(event_id);
diff --git a/synapse/storage/databases/main/schema/full_schemas/16/im.sql b/synapse/storage/databases/main/schema/full_schemas/16/im.sql
new file mode 100644
index 00000000..a1a2aa8e
--- /dev/null
+++ b/synapse/storage/databases/main/schema/full_schemas/16/im.sql
@@ -0,0 +1,120 @@
+/* Copyright 2014-2016 OpenMarket Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* We used to create tables called room_hosts and feedback,
+ * but these are no longer used and are removed in delta 54.
+ */
+
+CREATE TABLE IF NOT EXISTS events(
+ stream_ordering INTEGER PRIMARY KEY,
+ topological_ordering BIGINT NOT NULL,
+ event_id TEXT NOT NULL,
+ type TEXT NOT NULL,
+ room_id TEXT NOT NULL,
+
+ -- 'content' used to be created NULLable, but as of delta 50 we drop that constraint.
+ -- the hack we use to drop the constraint doesn't work for an in-memory sqlite
+ -- database, which breaks the sytests. Hence, we no longer make it nullable.
+ content TEXT,
+
+ unrecognized_keys TEXT,
+ processed BOOL NOT NULL,
+ outlier BOOL NOT NULL,
+ depth BIGINT DEFAULT 0 NOT NULL,
+ UNIQUE (event_id)
+);
+
+CREATE INDEX events_stream_ordering ON events (stream_ordering);
+CREATE INDEX events_topological_ordering ON events (topological_ordering);
+CREATE INDEX events_order ON events (topological_ordering, stream_ordering);
+CREATE INDEX events_room_id ON events (room_id);
+CREATE INDEX events_order_room ON events (
+ room_id, topological_ordering, stream_ordering
+);
+
+
+CREATE TABLE IF NOT EXISTS event_json(
+ event_id TEXT NOT NULL,
+ room_id TEXT NOT NULL,
+ internal_metadata TEXT NOT NULL,
+ json TEXT NOT NULL,
+ UNIQUE (event_id)
+);
+
+CREATE INDEX event_json_room_id ON event_json(room_id);
+
+
+CREATE TABLE IF NOT EXISTS state_events(
+ event_id TEXT NOT NULL,
+ room_id TEXT NOT NULL,
+ type TEXT NOT NULL,
+ state_key TEXT NOT NULL,
+ prev_state TEXT,
+ UNIQUE (event_id)
+);
+
+CREATE INDEX state_events_room_id ON state_events (room_id);
+CREATE INDEX state_events_type ON state_events (type);
+CREATE INDEX state_events_state_key ON state_events (state_key);
+
+
+CREATE TABLE IF NOT EXISTS current_state_events(
+ event_id TEXT NOT NULL,
+ room_id TEXT NOT NULL,
+ type TEXT NOT NULL,
+ state_key TEXT NOT NULL,
+ UNIQUE (event_id),
+ UNIQUE (room_id, type, state_key)
+);
+
+CREATE INDEX current_state_events_room_id ON current_state_events (room_id);
+CREATE INDEX current_state_events_type ON current_state_events (type);
+CREATE INDEX current_state_events_state_key ON current_state_events (state_key);
+
+CREATE TABLE IF NOT EXISTS room_memberships(
+ event_id TEXT NOT NULL,
+ user_id TEXT NOT NULL,
+ sender TEXT NOT NULL,
+ room_id TEXT NOT NULL,
+ membership TEXT NOT NULL,
+ UNIQUE (event_id)
+);
+
+CREATE INDEX room_memberships_room_id ON room_memberships (room_id);
+CREATE INDEX room_memberships_user_id ON room_memberships (user_id);
+
+CREATE TABLE IF NOT EXISTS topics(
+ event_id TEXT NOT NULL,
+ room_id TEXT NOT NULL,
+ topic TEXT NOT NULL,
+ UNIQUE (event_id)
+);
+
+CREATE INDEX topics_room_id ON topics(room_id);
+
+CREATE TABLE IF NOT EXISTS room_names(
+ event_id TEXT NOT NULL,
+ room_id TEXT NOT NULL,
+ name TEXT NOT NULL,
+ UNIQUE (event_id)
+);
+
+CREATE INDEX room_names_room_id ON room_names(room_id);
+
+CREATE TABLE IF NOT EXISTS rooms(
+ room_id TEXT PRIMARY KEY NOT NULL,
+ is_public BOOL,
+ creator TEXT
+);
diff --git a/synapse/storage/databases/main/schema/full_schemas/16/keys.sql b/synapse/storage/databases/main/schema/full_schemas/16/keys.sql
new file mode 100644
index 00000000..11cdffdb
--- /dev/null
+++ b/synapse/storage/databases/main/schema/full_schemas/16/keys.sql
@@ -0,0 +1,26 @@
+/* Copyright 2014-2016 OpenMarket Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+-- we used to create a table called server_tls_certificates, but this is no
+-- longer used, and is removed in delta 54.
+
+CREATE TABLE IF NOT EXISTS server_signature_keys(
+ server_name TEXT, -- Server name.
+ key_id TEXT, -- Key version.
+ from_server TEXT, -- Which key server the key was fetched form.
+ ts_added_ms BIGINT, -- When the key was added.
+ verify_key bytea, -- NACL verification key.
+ UNIQUE (server_name, key_id)
+);
diff --git a/synapse/storage/databases/main/schema/full_schemas/16/media_repository.sql b/synapse/storage/databases/main/schema/full_schemas/16/media_repository.sql
new file mode 100644
index 00000000..8f3759bb
--- /dev/null
+++ b/synapse/storage/databases/main/schema/full_schemas/16/media_repository.sql
@@ -0,0 +1,68 @@
+/* Copyright 2014-2016 OpenMarket Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+CREATE TABLE IF NOT EXISTS local_media_repository (
+ media_id TEXT, -- The id used to refer to the media.
+ media_type TEXT, -- The MIME-type of the media.
+ media_length INTEGER, -- Length of the media in bytes.
+ created_ts BIGINT, -- When the content was uploaded in ms.
+ upload_name TEXT, -- The name the media was uploaded with.
+ user_id TEXT, -- The user who uploaded the file.
+ UNIQUE (media_id)
+);
+
+CREATE TABLE IF NOT EXISTS local_media_repository_thumbnails (
+ media_id TEXT, -- The id used to refer to the media.
+ thumbnail_width INTEGER, -- The width of the thumbnail in pixels.
+ thumbnail_height INTEGER, -- The height of the thumbnail in pixels.
+ thumbnail_type TEXT, -- The MIME-type of the thumbnail.
+ thumbnail_method TEXT, -- The method used to make the thumbnail.
+ thumbnail_length INTEGER, -- The length of the thumbnail in bytes.
+ UNIQUE (
+ media_id, thumbnail_width, thumbnail_height, thumbnail_type
+ )
+);
+
+CREATE INDEX local_media_repository_thumbnails_media_id
+ ON local_media_repository_thumbnails (media_id);
+
+CREATE TABLE IF NOT EXISTS remote_media_cache (
+ media_origin TEXT, -- The remote HS the media came from.
+ media_id TEXT, -- The id used to refer to the media on that server.
+ media_type TEXT, -- The MIME-type of the media.
+ created_ts BIGINT, -- When the content was uploaded in ms.
+ upload_name TEXT, -- The name the media was uploaded with.
+ media_length INTEGER, -- Length of the media in bytes.
+ filesystem_id TEXT, -- The name used to store the media on disk.
+ UNIQUE (media_origin, media_id)
+);
+
+CREATE TABLE IF NOT EXISTS remote_media_cache_thumbnails (
+ media_origin TEXT, -- The remote HS the media came from.
+ media_id TEXT, -- The id used to refer to the media.
+ thumbnail_width INTEGER, -- The width of the thumbnail in pixels.
+ thumbnail_height INTEGER, -- The height of the thumbnail in pixels.
+ thumbnail_method TEXT, -- The method used to make the thumbnail
+ thumbnail_type TEXT, -- The MIME-type of the thumbnail.
+ thumbnail_length INTEGER, -- The length of the thumbnail in bytes.
+ filesystem_id TEXT, -- The name used to store the media on disk.
+ UNIQUE (
+ media_origin, media_id, thumbnail_width, thumbnail_height,
+ thumbnail_type
+ )
+);
+
+CREATE INDEX remote_media_cache_thumbnails_media_id
+ ON remote_media_cache_thumbnails (media_id);
diff --git a/synapse/storage/databases/main/schema/full_schemas/16/presence.sql b/synapse/storage/databases/main/schema/full_schemas/16/presence.sql
new file mode 100644
index 00000000..01d2d8f8
--- /dev/null
+++ b/synapse/storage/databases/main/schema/full_schemas/16/presence.sql
@@ -0,0 +1,32 @@
+/* Copyright 2014-2016 OpenMarket Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+CREATE TABLE IF NOT EXISTS presence(
+ user_id TEXT NOT NULL,
+ state VARCHAR(20),
+ status_msg TEXT,
+ mtime BIGINT, -- miliseconds since last state change
+ UNIQUE (user_id)
+);
+
+-- For each of /my/ users which possibly-remote users are allowed to see their
+-- presence state
+CREATE TABLE IF NOT EXISTS presence_allow_inbound(
+ observed_user_id TEXT NOT NULL,
+ observer_user_id TEXT NOT NULL, -- a UserID,
+ UNIQUE (observed_user_id, observer_user_id)
+);
+
+-- We used to create a table called presence_list, but this is no longer used
+-- and is removed in delta 54. \ No newline at end of file
diff --git a/synapse/storage/databases/main/schema/full_schemas/16/profiles.sql b/synapse/storage/databases/main/schema/full_schemas/16/profiles.sql
new file mode 100644
index 00000000..c04f4747
--- /dev/null
+++ b/synapse/storage/databases/main/schema/full_schemas/16/profiles.sql
@@ -0,0 +1,20 @@
+/* Copyright 2014-2016 OpenMarket Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+CREATE TABLE IF NOT EXISTS profiles(
+ user_id TEXT NOT NULL,
+ displayname TEXT,
+ avatar_url TEXT,
+ UNIQUE(user_id)
+);
diff --git a/synapse/storage/databases/main/schema/full_schemas/16/push.sql b/synapse/storage/databases/main/schema/full_schemas/16/push.sql
new file mode 100644
index 00000000..e44465cf
--- /dev/null
+++ b/synapse/storage/databases/main/schema/full_schemas/16/push.sql
@@ -0,0 +1,74 @@
+/* Copyright 2015, 2016 OpenMarket Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+CREATE TABLE IF NOT EXISTS rejections(
+ event_id TEXT NOT NULL,
+ reason TEXT NOT NULL,
+ last_check TEXT NOT NULL,
+ UNIQUE (event_id)
+);
+
+-- Push notification endpoints that users have configured
+CREATE TABLE IF NOT EXISTS pushers (
+ id BIGINT PRIMARY KEY,
+ user_name TEXT NOT NULL,
+ access_token BIGINT DEFAULT NULL,
+ profile_tag VARCHAR(32) NOT NULL,
+ kind VARCHAR(8) NOT NULL,
+ app_id VARCHAR(64) NOT NULL,
+ app_display_name VARCHAR(64) NOT NULL,
+ device_display_name VARCHAR(128) NOT NULL,
+ pushkey bytea NOT NULL,
+ ts BIGINT NOT NULL,
+ lang VARCHAR(8),
+ data bytea,
+ last_token TEXT,
+ last_success BIGINT,
+ failing_since BIGINT,
+ UNIQUE (app_id, pushkey)
+);
+
+CREATE TABLE IF NOT EXISTS push_rules (
+ id BIGINT PRIMARY KEY,
+ user_name TEXT NOT NULL,
+ rule_id TEXT NOT NULL,
+ priority_class SMALLINT NOT NULL,
+ priority INTEGER NOT NULL DEFAULT 0,
+ conditions TEXT NOT NULL,
+ actions TEXT NOT NULL,
+ UNIQUE(user_name, rule_id)
+);
+
+CREATE INDEX push_rules_user_name on push_rules (user_name);
+
+CREATE TABLE IF NOT EXISTS user_filters(
+ user_id TEXT,
+ filter_id BIGINT,
+ filter_json bytea
+);
+
+CREATE INDEX user_filters_by_user_id_filter_id ON user_filters(
+ user_id, filter_id
+);
+
+CREATE TABLE IF NOT EXISTS push_rules_enable (
+ id BIGINT PRIMARY KEY,
+ user_name TEXT NOT NULL,
+ rule_id TEXT NOT NULL,
+ enabled SMALLINT,
+ UNIQUE(user_name, rule_id)
+);
+
+CREATE INDEX push_rules_enable_user_name on push_rules_enable (user_name);
diff --git a/synapse/storage/databases/main/schema/full_schemas/16/redactions.sql b/synapse/storage/databases/main/schema/full_schemas/16/redactions.sql
new file mode 100644
index 00000000..318f0d9a
--- /dev/null
+++ b/synapse/storage/databases/main/schema/full_schemas/16/redactions.sql
@@ -0,0 +1,22 @@
+/* Copyright 2014-2016 OpenMarket Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+CREATE TABLE IF NOT EXISTS redactions (
+ event_id TEXT NOT NULL,
+ redacts TEXT NOT NULL,
+ UNIQUE (event_id)
+);
+
+CREATE INDEX redactions_event_id ON redactions (event_id);
+CREATE INDEX redactions_redacts ON redactions (redacts);
diff --git a/synapse/storage/databases/main/schema/full_schemas/16/room_aliases.sql b/synapse/storage/databases/main/schema/full_schemas/16/room_aliases.sql
new file mode 100644
index 00000000..d47da3b1
--- /dev/null
+++ b/synapse/storage/databases/main/schema/full_schemas/16/room_aliases.sql
@@ -0,0 +1,29 @@
+/* Copyright 2014-2016 OpenMarket Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+CREATE TABLE IF NOT EXISTS room_aliases(
+ room_alias TEXT NOT NULL,
+ room_id TEXT NOT NULL,
+ UNIQUE (room_alias)
+);
+
+CREATE INDEX room_aliases_id ON room_aliases(room_id);
+
+CREATE TABLE IF NOT EXISTS room_alias_servers(
+ room_alias TEXT NOT NULL,
+ server TEXT NOT NULL
+);
+
+CREATE INDEX room_alias_servers_alias ON room_alias_servers(room_alias);
diff --git a/synapse/storage/databases/main/schema/full_schemas/16/state.sql b/synapse/storage/databases/main/schema/full_schemas/16/state.sql
new file mode 100644
index 00000000..96391a8f
--- /dev/null
+++ b/synapse/storage/databases/main/schema/full_schemas/16/state.sql
@@ -0,0 +1,40 @@
+/* Copyright 2014-2016 OpenMarket Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+CREATE TABLE IF NOT EXISTS state_groups(
+ id BIGINT PRIMARY KEY,
+ room_id TEXT NOT NULL,
+ event_id TEXT NOT NULL
+);
+
+CREATE TABLE IF NOT EXISTS state_groups_state(
+ state_group BIGINT NOT NULL,
+ room_id TEXT NOT NULL,
+ type TEXT NOT NULL,
+ state_key TEXT NOT NULL,
+ event_id TEXT NOT NULL
+);
+
+CREATE TABLE IF NOT EXISTS event_to_state_groups(
+ event_id TEXT NOT NULL,
+ state_group BIGINT NOT NULL,
+ UNIQUE (event_id)
+);
+
+CREATE INDEX state_groups_id ON state_groups(id);
+
+CREATE INDEX state_groups_state_id ON state_groups_state(state_group);
+CREATE INDEX state_groups_state_tuple ON state_groups_state(room_id, type, state_key);
+CREATE INDEX event_to_state_groups_id ON event_to_state_groups(event_id);
diff --git a/synapse/storage/databases/main/schema/full_schemas/16/transactions.sql b/synapse/storage/databases/main/schema/full_schemas/16/transactions.sql
new file mode 100644
index 00000000..17e67bed
--- /dev/null
+++ b/synapse/storage/databases/main/schema/full_schemas/16/transactions.sql
@@ -0,0 +1,44 @@
+/* Copyright 2014-2016 OpenMarket Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+-- Stores what transaction ids we have received and what our response was
+CREATE TABLE IF NOT EXISTS received_transactions(
+ transaction_id TEXT,
+ origin TEXT,
+ ts BIGINT,
+ response_code INTEGER,
+ response_json bytea,
+ has_been_referenced smallint default 0, -- Whether thishas been referenced by a prev_tx
+ UNIQUE (transaction_id, origin)
+);
+
+CREATE INDEX transactions_have_ref ON received_transactions(origin, has_been_referenced);-- WHERE has_been_referenced = 0;
+
+-- For sent transactions only.
+CREATE TABLE IF NOT EXISTS transaction_id_to_pdu(
+ transaction_id INTEGER,
+ destination TEXT,
+ pdu_id TEXT,
+ pdu_origin TEXT,
+ UNIQUE (transaction_id, destination)
+);
+
+CREATE INDEX transaction_id_to_pdu_dest ON transaction_id_to_pdu(destination);
+
+-- To track destination health
+CREATE TABLE IF NOT EXISTS destinations(
+ destination TEXT PRIMARY KEY,
+ retry_last_ts BIGINT,
+ retry_interval INTEGER
+);
diff --git a/synapse/storage/databases/main/schema/full_schemas/16/users.sql b/synapse/storage/databases/main/schema/full_schemas/16/users.sql
new file mode 100644
index 00000000..f013aa8b
--- /dev/null
+++ b/synapse/storage/databases/main/schema/full_schemas/16/users.sql
@@ -0,0 +1,42 @@
+/* Copyright 2014-2016 OpenMarket Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+CREATE TABLE IF NOT EXISTS users(
+ name TEXT,
+ password_hash TEXT,
+ creation_ts BIGINT,
+ admin SMALLINT DEFAULT 0 NOT NULL,
+ UNIQUE(name)
+);
+
+CREATE TABLE IF NOT EXISTS access_tokens(
+ id BIGINT PRIMARY KEY,
+ user_id TEXT NOT NULL,
+ device_id TEXT,
+ token TEXT NOT NULL,
+ last_used BIGINT,
+ UNIQUE(token)
+);
+
+CREATE TABLE IF NOT EXISTS user_ips (
+ user_id TEXT NOT NULL,
+ access_token TEXT NOT NULL,
+ device_id TEXT,
+ ip TEXT NOT NULL,
+ user_agent TEXT NOT NULL,
+ last_seen BIGINT NOT NULL
+);
+
+CREATE INDEX user_ips_user ON user_ips(user_id);
+CREATE INDEX user_ips_user_ip ON user_ips(user_id, access_token, ip);
diff --git a/synapse/storage/databases/main/schema/full_schemas/54/full.sql.postgres b/synapse/storage/databases/main/schema/full_schemas/54/full.sql.postgres
new file mode 100644
index 00000000..889a9a0c
--- /dev/null
+++ b/synapse/storage/databases/main/schema/full_schemas/54/full.sql.postgres
@@ -0,0 +1,1983 @@
+
+
+
+
+
+CREATE TABLE access_tokens (
+ id bigint NOT NULL,
+ user_id text NOT NULL,
+ device_id text,
+ token text NOT NULL,
+ last_used bigint
+);
+
+
+
+CREATE TABLE account_data (
+ user_id text NOT NULL,
+ account_data_type text NOT NULL,
+ stream_id bigint NOT NULL,
+ content text NOT NULL
+);
+
+
+
+CREATE TABLE account_data_max_stream_id (
+ lock character(1) DEFAULT 'X'::bpchar NOT NULL,
+ stream_id bigint NOT NULL,
+ CONSTRAINT private_user_data_max_stream_id_lock_check CHECK ((lock = 'X'::bpchar))
+);
+
+
+
+CREATE TABLE account_validity (
+ user_id text NOT NULL,
+ expiration_ts_ms bigint NOT NULL,
+ email_sent boolean NOT NULL,
+ renewal_token text
+);
+
+
+
+CREATE TABLE application_services_state (
+ as_id text NOT NULL,
+ state character varying(5),
+ last_txn integer
+);
+
+
+
+CREATE TABLE application_services_txns (
+ as_id text NOT NULL,
+ txn_id integer NOT NULL,
+ event_ids text NOT NULL
+);
+
+
+
+CREATE TABLE appservice_room_list (
+ appservice_id text NOT NULL,
+ network_id text NOT NULL,
+ room_id text NOT NULL
+);
+
+
+
+CREATE TABLE appservice_stream_position (
+ lock character(1) DEFAULT 'X'::bpchar NOT NULL,
+ stream_ordering bigint,
+ CONSTRAINT appservice_stream_position_lock_check CHECK ((lock = 'X'::bpchar))
+);
+
+
+CREATE TABLE blocked_rooms (
+ room_id text NOT NULL,
+ user_id text NOT NULL
+);
+
+
+
+CREATE TABLE cache_invalidation_stream (
+ stream_id bigint,
+ cache_func text,
+ keys text[],
+ invalidation_ts bigint
+);
+
+
+
+CREATE TABLE current_state_delta_stream (
+ stream_id bigint NOT NULL,
+ room_id text NOT NULL,
+ type text NOT NULL,
+ state_key text NOT NULL,
+ event_id text,
+ prev_event_id text
+);
+
+
+
+CREATE TABLE current_state_events (
+ event_id text NOT NULL,
+ room_id text NOT NULL,
+ type text NOT NULL,
+ state_key text NOT NULL
+);
+
+
+
+CREATE TABLE deleted_pushers (
+ stream_id bigint NOT NULL,
+ app_id text NOT NULL,
+ pushkey text NOT NULL,
+ user_id text NOT NULL
+);
+
+
+
+CREATE TABLE destinations (
+ destination text NOT NULL,
+ retry_last_ts bigint,
+ retry_interval integer
+);
+
+
+
+CREATE TABLE device_federation_inbox (
+ origin text NOT NULL,
+ message_id text NOT NULL,
+ received_ts bigint NOT NULL
+);
+
+
+
+CREATE TABLE device_federation_outbox (
+ destination text NOT NULL,
+ stream_id bigint NOT NULL,
+ queued_ts bigint NOT NULL,
+ messages_json text NOT NULL
+);
+
+
+
+CREATE TABLE device_inbox (
+ user_id text NOT NULL,
+ device_id text NOT NULL,
+ stream_id bigint NOT NULL,
+ message_json text NOT NULL
+);
+
+
+
+CREATE TABLE device_lists_outbound_last_success (
+ destination text NOT NULL,
+ user_id text NOT NULL,
+ stream_id bigint NOT NULL
+);
+
+
+
+CREATE TABLE device_lists_outbound_pokes (
+ destination text NOT NULL,
+ stream_id bigint NOT NULL,
+ user_id text NOT NULL,
+ device_id text NOT NULL,
+ sent boolean NOT NULL,
+ ts bigint NOT NULL
+);
+
+
+
+CREATE TABLE device_lists_remote_cache (
+ user_id text NOT NULL,
+ device_id text NOT NULL,
+ content text NOT NULL
+);
+
+
+
+CREATE TABLE device_lists_remote_extremeties (
+ user_id text NOT NULL,
+ stream_id text NOT NULL
+);
+
+
+
+CREATE TABLE device_lists_stream (
+ stream_id bigint NOT NULL,
+ user_id text NOT NULL,
+ device_id text NOT NULL
+);
+
+
+
+CREATE TABLE device_max_stream_id (
+ stream_id bigint NOT NULL
+);
+
+
+
+CREATE TABLE devices (
+ user_id text NOT NULL,
+ device_id text NOT NULL,
+ display_name text
+);
+
+
+
+CREATE TABLE e2e_device_keys_json (
+ user_id text NOT NULL,
+ device_id text NOT NULL,
+ ts_added_ms bigint NOT NULL,
+ key_json text NOT NULL
+);
+
+
+
+CREATE TABLE e2e_one_time_keys_json (
+ user_id text NOT NULL,
+ device_id text NOT NULL,
+ algorithm text NOT NULL,
+ key_id text NOT NULL,
+ ts_added_ms bigint NOT NULL,
+ key_json text NOT NULL
+);
+
+
+
+CREATE TABLE e2e_room_keys (
+ user_id text NOT NULL,
+ room_id text NOT NULL,
+ session_id text NOT NULL,
+ version bigint NOT NULL,
+ first_message_index integer,
+ forwarded_count integer,
+ is_verified boolean,
+ session_data text NOT NULL
+);
+
+
+
+CREATE TABLE e2e_room_keys_versions (
+ user_id text NOT NULL,
+ version bigint NOT NULL,
+ algorithm text NOT NULL,
+ auth_data text NOT NULL,
+ deleted smallint DEFAULT 0 NOT NULL
+);
+
+
+
+CREATE TABLE erased_users (
+ user_id text NOT NULL
+);
+
+
+
+CREATE TABLE event_auth (
+ event_id text NOT NULL,
+ auth_id text NOT NULL,
+ room_id text NOT NULL
+);
+
+
+
+CREATE TABLE event_backward_extremities (
+ event_id text NOT NULL,
+ room_id text NOT NULL
+);
+
+
+
+CREATE TABLE event_edges (
+ event_id text NOT NULL,
+ prev_event_id text NOT NULL,
+ room_id text NOT NULL,
+ is_state boolean NOT NULL
+);
+
+
+
+CREATE TABLE event_forward_extremities (
+ event_id text NOT NULL,
+ room_id text NOT NULL
+);
+
+
+
+CREATE TABLE event_json (
+ event_id text NOT NULL,
+ room_id text NOT NULL,
+ internal_metadata text NOT NULL,
+ json text NOT NULL,
+ format_version integer
+);
+
+
+
+CREATE TABLE event_push_actions (
+ room_id text NOT NULL,
+ event_id text NOT NULL,
+ user_id text NOT NULL,
+ profile_tag character varying(32),
+ actions text NOT NULL,
+ topological_ordering bigint,
+ stream_ordering bigint,
+ notif smallint,
+ highlight smallint
+);
+
+
+
+CREATE TABLE event_push_actions_staging (
+ event_id text NOT NULL,
+ user_id text NOT NULL,
+ actions text NOT NULL,
+ notif smallint NOT NULL,
+ highlight smallint NOT NULL
+);
+
+
+
+CREATE TABLE event_push_summary (
+ user_id text NOT NULL,
+ room_id text NOT NULL,
+ notif_count bigint NOT NULL,
+ stream_ordering bigint NOT NULL
+);
+
+
+
+CREATE TABLE event_push_summary_stream_ordering (
+ lock character(1) DEFAULT 'X'::bpchar NOT NULL,
+ stream_ordering bigint NOT NULL,
+ CONSTRAINT event_push_summary_stream_ordering_lock_check CHECK ((lock = 'X'::bpchar))
+);
+
+
+
+CREATE TABLE event_reference_hashes (
+ event_id text,
+ algorithm text,
+ hash bytea
+);
+
+
+
+CREATE TABLE event_relations (
+ event_id text NOT NULL,
+ relates_to_id text NOT NULL,
+ relation_type text NOT NULL,
+ aggregation_key text
+);
+
+
+
+CREATE TABLE event_reports (
+ id bigint NOT NULL,
+ received_ts bigint NOT NULL,
+ room_id text NOT NULL,
+ event_id text NOT NULL,
+ user_id text NOT NULL,
+ reason text,
+ content text
+);
+
+
+
+CREATE TABLE event_search (
+ event_id text,
+ room_id text,
+ sender text,
+ key text,
+ vector tsvector,
+ origin_server_ts bigint,
+ stream_ordering bigint
+);
+
+
+
+CREATE TABLE event_to_state_groups (
+ event_id text NOT NULL,
+ state_group bigint NOT NULL
+);
+
+
+
+CREATE TABLE events (
+ stream_ordering integer NOT NULL,
+ topological_ordering bigint NOT NULL,
+ event_id text NOT NULL,
+ type text NOT NULL,
+ room_id text NOT NULL,
+ content text,
+ unrecognized_keys text,
+ processed boolean NOT NULL,
+ outlier boolean NOT NULL,
+ depth bigint DEFAULT 0 NOT NULL,
+ origin_server_ts bigint,
+ received_ts bigint,
+ sender text,
+ contains_url boolean
+);
+
+
+
+CREATE TABLE ex_outlier_stream (
+ event_stream_ordering bigint NOT NULL,
+ event_id text NOT NULL,
+ state_group bigint NOT NULL
+);
+
+
+
+CREATE TABLE federation_stream_position (
+ type text NOT NULL,
+ stream_id integer NOT NULL
+);
+
+
+
+CREATE TABLE group_attestations_remote (
+ group_id text NOT NULL,
+ user_id text NOT NULL,
+ valid_until_ms bigint NOT NULL,
+ attestation_json text NOT NULL
+);
+
+
+
+CREATE TABLE group_attestations_renewals (
+ group_id text NOT NULL,
+ user_id text NOT NULL,
+ valid_until_ms bigint NOT NULL
+);
+
+
+
+CREATE TABLE group_invites (
+ group_id text NOT NULL,
+ user_id text NOT NULL
+);
+
+
+
+CREATE TABLE group_roles (
+ group_id text NOT NULL,
+ role_id text NOT NULL,
+ profile text NOT NULL,
+ is_public boolean NOT NULL
+);
+
+
+
+CREATE TABLE group_room_categories (
+ group_id text NOT NULL,
+ category_id text NOT NULL,
+ profile text NOT NULL,
+ is_public boolean NOT NULL
+);
+
+
+
+CREATE TABLE group_rooms (
+ group_id text NOT NULL,
+ room_id text NOT NULL,
+ is_public boolean NOT NULL
+);
+
+
+
+CREATE TABLE group_summary_roles (
+ group_id text NOT NULL,
+ role_id text NOT NULL,
+ role_order bigint NOT NULL,
+ CONSTRAINT group_summary_roles_role_order_check CHECK ((role_order > 0))
+);
+
+
+
+CREATE TABLE group_summary_room_categories (
+ group_id text NOT NULL,
+ category_id text NOT NULL,
+ cat_order bigint NOT NULL,
+ CONSTRAINT group_summary_room_categories_cat_order_check CHECK ((cat_order > 0))
+);
+
+
+
+CREATE TABLE group_summary_rooms (
+ group_id text NOT NULL,
+ room_id text NOT NULL,
+ category_id text NOT NULL,
+ room_order bigint NOT NULL,
+ is_public boolean NOT NULL,
+ CONSTRAINT group_summary_rooms_room_order_check CHECK ((room_order > 0))
+);
+
+
+
+CREATE TABLE group_summary_users (
+ group_id text NOT NULL,
+ user_id text NOT NULL,
+ role_id text NOT NULL,
+ user_order bigint NOT NULL,
+ is_public boolean NOT NULL
+);
+
+
+
+CREATE TABLE group_users (
+ group_id text NOT NULL,
+ user_id text NOT NULL,
+ is_admin boolean NOT NULL,
+ is_public boolean NOT NULL
+);
+
+
+
+CREATE TABLE groups (
+ group_id text NOT NULL,
+ name text,
+ avatar_url text,
+ short_description text,
+ long_description text,
+ is_public boolean NOT NULL,
+ join_policy text DEFAULT 'invite'::text NOT NULL
+);
+
+
+
+CREATE TABLE guest_access (
+ event_id text NOT NULL,
+ room_id text NOT NULL,
+ guest_access text NOT NULL
+);
+
+
+
+CREATE TABLE history_visibility (
+ event_id text NOT NULL,
+ room_id text NOT NULL,
+ history_visibility text NOT NULL
+);
+
+
+
+CREATE TABLE local_group_membership (
+ group_id text NOT NULL,
+ user_id text NOT NULL,
+ is_admin boolean NOT NULL,
+ membership text NOT NULL,
+ is_publicised boolean NOT NULL,
+ content text NOT NULL
+);
+
+
+
+CREATE TABLE local_group_updates (
+ stream_id bigint NOT NULL,
+ group_id text NOT NULL,
+ user_id text NOT NULL,
+ type text NOT NULL,
+ content text NOT NULL
+);
+
+
+
+CREATE TABLE local_invites (
+ stream_id bigint NOT NULL,
+ inviter text NOT NULL,
+ invitee text NOT NULL,
+ event_id text NOT NULL,
+ room_id text NOT NULL,
+ locally_rejected text,
+ replaced_by text
+);
+
+
+
+CREATE TABLE local_media_repository (
+ media_id text,
+ media_type text,
+ media_length integer,
+ created_ts bigint,
+ upload_name text,
+ user_id text,
+ quarantined_by text,
+ url_cache text,
+ last_access_ts bigint
+);
+
+
+
+CREATE TABLE local_media_repository_thumbnails (
+ media_id text,
+ thumbnail_width integer,
+ thumbnail_height integer,
+ thumbnail_type text,
+ thumbnail_method text,
+ thumbnail_length integer
+);
+
+
+
+CREATE TABLE local_media_repository_url_cache (
+ url text,
+ response_code integer,
+ etag text,
+ expires_ts bigint,
+ og text,
+ media_id text,
+ download_ts bigint
+);
+
+
+
+CREATE TABLE monthly_active_users (
+ user_id text NOT NULL,
+ "timestamp" bigint NOT NULL
+);
+
+
+
+CREATE TABLE open_id_tokens (
+ token text NOT NULL,
+ ts_valid_until_ms bigint NOT NULL,
+ user_id text NOT NULL
+);
+
+
+
+CREATE TABLE presence (
+ user_id text NOT NULL,
+ state character varying(20),
+ status_msg text,
+ mtime bigint
+);
+
+
+
+CREATE TABLE presence_allow_inbound (
+ observed_user_id text NOT NULL,
+ observer_user_id text NOT NULL
+);
+
+
+
+CREATE TABLE presence_stream (
+ stream_id bigint,
+ user_id text,
+ state text,
+ last_active_ts bigint,
+ last_federation_update_ts bigint,
+ last_user_sync_ts bigint,
+ status_msg text,
+ currently_active boolean
+);
+
+
+
+CREATE TABLE profiles (
+ user_id text NOT NULL,
+ displayname text,
+ avatar_url text
+);
+
+
+
+CREATE TABLE public_room_list_stream (
+ stream_id bigint NOT NULL,
+ room_id text NOT NULL,
+ visibility boolean NOT NULL,
+ appservice_id text,
+ network_id text
+);
+
+
+
+CREATE TABLE push_rules (
+ id bigint NOT NULL,
+ user_name text NOT NULL,
+ rule_id text NOT NULL,
+ priority_class smallint NOT NULL,
+ priority integer DEFAULT 0 NOT NULL,
+ conditions text NOT NULL,
+ actions text NOT NULL
+);
+
+
+
+CREATE TABLE push_rules_enable (
+ id bigint NOT NULL,
+ user_name text NOT NULL,
+ rule_id text NOT NULL,
+ enabled smallint
+);
+
+
+
+CREATE TABLE push_rules_stream (
+ stream_id bigint NOT NULL,
+ event_stream_ordering bigint NOT NULL,
+ user_id text NOT NULL,
+ rule_id text NOT NULL,
+ op text NOT NULL,
+ priority_class smallint,
+ priority integer,
+ conditions text,
+ actions text
+);
+
+
+
+CREATE TABLE pusher_throttle (
+ pusher bigint NOT NULL,
+ room_id text NOT NULL,
+ last_sent_ts bigint,
+ throttle_ms bigint
+);
+
+
+
+CREATE TABLE pushers (
+ id bigint NOT NULL,
+ user_name text NOT NULL,
+ access_token bigint,
+ profile_tag text NOT NULL,
+ kind text NOT NULL,
+ app_id text NOT NULL,
+ app_display_name text NOT NULL,
+ device_display_name text NOT NULL,
+ pushkey text NOT NULL,
+ ts bigint NOT NULL,
+ lang text,
+ data text,
+ last_stream_ordering integer,
+ last_success bigint,
+ failing_since bigint
+);
+
+
+
+CREATE TABLE ratelimit_override (
+ user_id text NOT NULL,
+ messages_per_second bigint,
+ burst_count bigint
+);
+
+
+
+CREATE TABLE receipts_graph (
+ room_id text NOT NULL,
+ receipt_type text NOT NULL,
+ user_id text NOT NULL,
+ event_ids text NOT NULL,
+ data text NOT NULL
+);
+
+
+
+CREATE TABLE receipts_linearized (
+ stream_id bigint NOT NULL,
+ room_id text NOT NULL,
+ receipt_type text NOT NULL,
+ user_id text NOT NULL,
+ event_id text NOT NULL,
+ data text NOT NULL
+);
+
+
+
+CREATE TABLE received_transactions (
+ transaction_id text,
+ origin text,
+ ts bigint,
+ response_code integer,
+ response_json bytea,
+ has_been_referenced smallint DEFAULT 0
+);
+
+
+
+CREATE TABLE redactions (
+ event_id text NOT NULL,
+ redacts text NOT NULL
+);
+
+
+
+CREATE TABLE rejections (
+ event_id text NOT NULL,
+ reason text NOT NULL,
+ last_check text NOT NULL
+);
+
+
+
+CREATE TABLE remote_media_cache (
+ media_origin text,
+ media_id text,
+ media_type text,
+ created_ts bigint,
+ upload_name text,
+ media_length integer,
+ filesystem_id text,
+ last_access_ts bigint,
+ quarantined_by text
+);
+
+
+
+CREATE TABLE remote_media_cache_thumbnails (
+ media_origin text,
+ media_id text,
+ thumbnail_width integer,
+ thumbnail_height integer,
+ thumbnail_method text,
+ thumbnail_type text,
+ thumbnail_length integer,
+ filesystem_id text
+);
+
+
+
+CREATE TABLE remote_profile_cache (
+ user_id text NOT NULL,
+ displayname text,
+ avatar_url text,
+ last_check bigint NOT NULL
+);
+
+
+
+CREATE TABLE room_account_data (
+ user_id text NOT NULL,
+ room_id text NOT NULL,
+ account_data_type text NOT NULL,
+ stream_id bigint NOT NULL,
+ content text NOT NULL
+);
+
+
+
+CREATE TABLE room_alias_servers (
+ room_alias text NOT NULL,
+ server text NOT NULL
+);
+
+
+
+CREATE TABLE room_aliases (
+ room_alias text NOT NULL,
+ room_id text NOT NULL,
+ creator text
+);
+
+
+
+CREATE TABLE room_depth (
+ room_id text NOT NULL,
+ min_depth integer NOT NULL
+);
+
+
+
+CREATE TABLE room_memberships (
+ event_id text NOT NULL,
+ user_id text NOT NULL,
+ sender text NOT NULL,
+ room_id text NOT NULL,
+ membership text NOT NULL,
+ forgotten integer DEFAULT 0,
+ display_name text,
+ avatar_url text
+);
+
+
+
+CREATE TABLE room_names (
+ event_id text NOT NULL,
+ room_id text NOT NULL,
+ name text NOT NULL
+);
+
+
+
+CREATE TABLE room_state (
+ room_id text NOT NULL,
+ join_rules text,
+ history_visibility text,
+ encryption text,
+ name text,
+ topic text,
+ avatar text,
+ canonical_alias text
+);
+
+
+
+CREATE TABLE room_stats (
+ room_id text NOT NULL,
+ ts bigint NOT NULL,
+ bucket_size integer NOT NULL,
+ current_state_events integer NOT NULL,
+ joined_members integer NOT NULL,
+ invited_members integer NOT NULL,
+ left_members integer NOT NULL,
+ banned_members integer NOT NULL,
+ state_events integer NOT NULL
+);
+
+
+
+CREATE TABLE room_stats_earliest_token (
+ room_id text NOT NULL,
+ token bigint NOT NULL
+);
+
+
+
+CREATE TABLE room_tags (
+ user_id text NOT NULL,
+ room_id text NOT NULL,
+ tag text NOT NULL,
+ content text NOT NULL
+);
+
+
+
+CREATE TABLE room_tags_revisions (
+ user_id text NOT NULL,
+ room_id text NOT NULL,
+ stream_id bigint NOT NULL
+);
+
+
+
+CREATE TABLE rooms (
+ room_id text NOT NULL,
+ is_public boolean,
+ creator text
+);
+
+
+
+CREATE TABLE server_keys_json (
+ server_name text NOT NULL,
+ key_id text NOT NULL,
+ from_server text NOT NULL,
+ ts_added_ms bigint NOT NULL,
+ ts_valid_until_ms bigint NOT NULL,
+ key_json bytea NOT NULL
+);
+
+
+
+CREATE TABLE server_signature_keys (
+ server_name text,
+ key_id text,
+ from_server text,
+ ts_added_ms bigint,
+ verify_key bytea,
+ ts_valid_until_ms bigint
+);
+
+
+
+CREATE TABLE state_events (
+ event_id text NOT NULL,
+ room_id text NOT NULL,
+ type text NOT NULL,
+ state_key text NOT NULL,
+ prev_state text
+);
+
+
+
+CREATE TABLE stats_stream_pos (
+ lock character(1) DEFAULT 'X'::bpchar NOT NULL,
+ stream_id bigint,
+ CONSTRAINT stats_stream_pos_lock_check CHECK ((lock = 'X'::bpchar))
+);
+
+
+
+CREATE TABLE stream_ordering_to_exterm (
+ stream_ordering bigint NOT NULL,
+ room_id text NOT NULL,
+ event_id text NOT NULL
+);
+
+
+
+CREATE TABLE threepid_guest_access_tokens (
+ medium text,
+ address text,
+ guest_access_token text,
+ first_inviter text
+);
+
+
+
+CREATE TABLE topics (
+ event_id text NOT NULL,
+ room_id text NOT NULL,
+ topic text NOT NULL
+);
+
+
+
+CREATE TABLE user_daily_visits (
+ user_id text NOT NULL,
+ device_id text,
+ "timestamp" bigint NOT NULL
+);
+
+
+
+CREATE TABLE user_directory (
+ user_id text NOT NULL,
+ room_id text,
+ display_name text,
+ avatar_url text
+);
+
+
+
+CREATE TABLE user_directory_search (
+ user_id text NOT NULL,
+ vector tsvector
+);
+
+
+
+CREATE TABLE user_directory_stream_pos (
+ lock character(1) DEFAULT 'X'::bpchar NOT NULL,
+ stream_id bigint,
+ CONSTRAINT user_directory_stream_pos_lock_check CHECK ((lock = 'X'::bpchar))
+);
+
+
+
+CREATE TABLE user_filters (
+ user_id text,
+ filter_id bigint,
+ filter_json bytea
+);
+
+
+
+CREATE TABLE user_ips (
+ user_id text NOT NULL,
+ access_token text NOT NULL,
+ device_id text,
+ ip text NOT NULL,
+ user_agent text NOT NULL,
+ last_seen bigint NOT NULL
+);
+
+
+
+CREATE TABLE user_stats (
+ user_id text NOT NULL,
+ ts bigint NOT NULL,
+ bucket_size integer NOT NULL,
+ public_rooms integer NOT NULL,
+ private_rooms integer NOT NULL
+);
+
+
+
+CREATE TABLE user_threepid_id_server (
+ user_id text NOT NULL,
+ medium text NOT NULL,
+ address text NOT NULL,
+ id_server text NOT NULL
+);
+
+
+
+CREATE TABLE user_threepids (
+ user_id text NOT NULL,
+ medium text NOT NULL,
+ address text NOT NULL,
+ validated_at bigint NOT NULL,
+ added_at bigint NOT NULL
+);
+
+
+
+CREATE TABLE users (
+ name text,
+ password_hash text,
+ creation_ts bigint,
+ admin smallint DEFAULT 0 NOT NULL,
+ upgrade_ts bigint,
+ is_guest smallint DEFAULT 0 NOT NULL,
+ appservice_id text,
+ consent_version text,
+ consent_server_notice_sent text,
+ user_type text
+);
+
+
+
+CREATE TABLE users_in_public_rooms (
+ user_id text NOT NULL,
+ room_id text NOT NULL
+);
+
+
+
+CREATE TABLE users_pending_deactivation (
+ user_id text NOT NULL
+);
+
+
+
+CREATE TABLE users_who_share_private_rooms (
+ user_id text NOT NULL,
+ other_user_id text NOT NULL,
+ room_id text NOT NULL
+);
+
+
+
+ALTER TABLE ONLY access_tokens
+ ADD CONSTRAINT access_tokens_pkey PRIMARY KEY (id);
+
+
+
+ALTER TABLE ONLY access_tokens
+ ADD CONSTRAINT access_tokens_token_key UNIQUE (token);
+
+
+
+ALTER TABLE ONLY account_data
+ ADD CONSTRAINT account_data_uniqueness UNIQUE (user_id, account_data_type);
+
+
+
+ALTER TABLE ONLY account_validity
+ ADD CONSTRAINT account_validity_pkey PRIMARY KEY (user_id);
+
+
+
+ALTER TABLE ONLY application_services_state
+ ADD CONSTRAINT application_services_state_pkey PRIMARY KEY (as_id);
+
+
+
+ALTER TABLE ONLY application_services_txns
+ ADD CONSTRAINT application_services_txns_as_id_txn_id_key UNIQUE (as_id, txn_id);
+
+
+
+ALTER TABLE ONLY appservice_stream_position
+ ADD CONSTRAINT appservice_stream_position_lock_key UNIQUE (lock);
+
+
+
+ALTER TABLE ONLY current_state_events
+ ADD CONSTRAINT current_state_events_event_id_key UNIQUE (event_id);
+
+
+
+ALTER TABLE ONLY current_state_events
+ ADD CONSTRAINT current_state_events_room_id_type_state_key_key UNIQUE (room_id, type, state_key);
+
+
+
+ALTER TABLE ONLY destinations
+ ADD CONSTRAINT destinations_pkey PRIMARY KEY (destination);
+
+
+
+ALTER TABLE ONLY devices
+ ADD CONSTRAINT device_uniqueness UNIQUE (user_id, device_id);
+
+
+
+ALTER TABLE ONLY e2e_device_keys_json
+ ADD CONSTRAINT e2e_device_keys_json_uniqueness UNIQUE (user_id, device_id);
+
+
+
+ALTER TABLE ONLY e2e_one_time_keys_json
+ ADD CONSTRAINT e2e_one_time_keys_json_uniqueness UNIQUE (user_id, device_id, algorithm, key_id);
+
+
+
+ALTER TABLE ONLY event_backward_extremities
+ ADD CONSTRAINT event_backward_extremities_event_id_room_id_key UNIQUE (event_id, room_id);
+
+
+
+ALTER TABLE ONLY event_edges
+ ADD CONSTRAINT event_edges_event_id_prev_event_id_room_id_is_state_key UNIQUE (event_id, prev_event_id, room_id, is_state);
+
+
+
+ALTER TABLE ONLY event_forward_extremities
+ ADD CONSTRAINT event_forward_extremities_event_id_room_id_key UNIQUE (event_id, room_id);
+
+
+
+ALTER TABLE ONLY event_push_actions
+ ADD CONSTRAINT event_id_user_id_profile_tag_uniqueness UNIQUE (room_id, event_id, user_id, profile_tag);
+
+
+
+ALTER TABLE ONLY event_json
+ ADD CONSTRAINT event_json_event_id_key UNIQUE (event_id);
+
+
+
+ALTER TABLE ONLY event_push_summary_stream_ordering
+ ADD CONSTRAINT event_push_summary_stream_ordering_lock_key UNIQUE (lock);
+
+
+
+ALTER TABLE ONLY event_reference_hashes
+ ADD CONSTRAINT event_reference_hashes_event_id_algorithm_key UNIQUE (event_id, algorithm);
+
+
+
+ALTER TABLE ONLY event_reports
+ ADD CONSTRAINT event_reports_pkey PRIMARY KEY (id);
+
+
+
+ALTER TABLE ONLY event_to_state_groups
+ ADD CONSTRAINT event_to_state_groups_event_id_key UNIQUE (event_id);
+
+
+
+ALTER TABLE ONLY events
+ ADD CONSTRAINT events_event_id_key UNIQUE (event_id);
+
+
+
+ALTER TABLE ONLY events
+ ADD CONSTRAINT events_pkey PRIMARY KEY (stream_ordering);
+
+
+
+ALTER TABLE ONLY ex_outlier_stream
+ ADD CONSTRAINT ex_outlier_stream_pkey PRIMARY KEY (event_stream_ordering);
+
+
+
+ALTER TABLE ONLY group_roles
+ ADD CONSTRAINT group_roles_group_id_role_id_key UNIQUE (group_id, role_id);
+
+
+
+ALTER TABLE ONLY group_room_categories
+ ADD CONSTRAINT group_room_categories_group_id_category_id_key UNIQUE (group_id, category_id);
+
+
+
+ALTER TABLE ONLY group_summary_roles
+ ADD CONSTRAINT group_summary_roles_group_id_role_id_role_order_key UNIQUE (group_id, role_id, role_order);
+
+
+
+ALTER TABLE ONLY group_summary_room_categories
+ ADD CONSTRAINT group_summary_room_categories_group_id_category_id_cat_orde_key UNIQUE (group_id, category_id, cat_order);
+
+
+
+ALTER TABLE ONLY group_summary_rooms
+ ADD CONSTRAINT group_summary_rooms_group_id_category_id_room_id_room_order_key UNIQUE (group_id, category_id, room_id, room_order);
+
+
+
+ALTER TABLE ONLY guest_access
+ ADD CONSTRAINT guest_access_event_id_key UNIQUE (event_id);
+
+
+
+ALTER TABLE ONLY history_visibility
+ ADD CONSTRAINT history_visibility_event_id_key UNIQUE (event_id);
+
+
+
+ALTER TABLE ONLY local_media_repository
+ ADD CONSTRAINT local_media_repository_media_id_key UNIQUE (media_id);
+
+
+
+ALTER TABLE ONLY local_media_repository_thumbnails
+ ADD CONSTRAINT local_media_repository_thumbn_media_id_thumbnail_width_thum_key UNIQUE (media_id, thumbnail_width, thumbnail_height, thumbnail_type);
+
+
+
+ALTER TABLE ONLY user_threepids
+ ADD CONSTRAINT medium_address UNIQUE (medium, address);
+
+
+
+ALTER TABLE ONLY open_id_tokens
+ ADD CONSTRAINT open_id_tokens_pkey PRIMARY KEY (token);
+
+
+
+ALTER TABLE ONLY presence_allow_inbound
+ ADD CONSTRAINT presence_allow_inbound_observed_user_id_observer_user_id_key UNIQUE (observed_user_id, observer_user_id);
+
+
+
+ALTER TABLE ONLY presence
+ ADD CONSTRAINT presence_user_id_key UNIQUE (user_id);
+
+
+
+ALTER TABLE ONLY account_data_max_stream_id
+ ADD CONSTRAINT private_user_data_max_stream_id_lock_key UNIQUE (lock);
+
+
+
+ALTER TABLE ONLY profiles
+ ADD CONSTRAINT profiles_user_id_key UNIQUE (user_id);
+
+
+
+ALTER TABLE ONLY push_rules_enable
+ ADD CONSTRAINT push_rules_enable_pkey PRIMARY KEY (id);
+
+
+
+ALTER TABLE ONLY push_rules_enable
+ ADD CONSTRAINT push_rules_enable_user_name_rule_id_key UNIQUE (user_name, rule_id);
+
+
+
+ALTER TABLE ONLY push_rules
+ ADD CONSTRAINT push_rules_pkey PRIMARY KEY (id);
+
+
+
+ALTER TABLE ONLY push_rules
+ ADD CONSTRAINT push_rules_user_name_rule_id_key UNIQUE (user_name, rule_id);
+
+
+
+ALTER TABLE ONLY pusher_throttle
+ ADD CONSTRAINT pusher_throttle_pkey PRIMARY KEY (pusher, room_id);
+
+
+
+ALTER TABLE ONLY pushers
+ ADD CONSTRAINT pushers2_app_id_pushkey_user_name_key UNIQUE (app_id, pushkey, user_name);
+
+
+
+ALTER TABLE ONLY pushers
+ ADD CONSTRAINT pushers2_pkey PRIMARY KEY (id);
+
+
+
+ALTER TABLE ONLY receipts_graph
+ ADD CONSTRAINT receipts_graph_uniqueness UNIQUE (room_id, receipt_type, user_id);
+
+
+
+ALTER TABLE ONLY receipts_linearized
+ ADD CONSTRAINT receipts_linearized_uniqueness UNIQUE (room_id, receipt_type, user_id);
+
+
+
+ALTER TABLE ONLY received_transactions
+ ADD CONSTRAINT received_transactions_transaction_id_origin_key UNIQUE (transaction_id, origin);
+
+
+
+ALTER TABLE ONLY redactions
+ ADD CONSTRAINT redactions_event_id_key UNIQUE (event_id);
+
+
+
+ALTER TABLE ONLY rejections
+ ADD CONSTRAINT rejections_event_id_key UNIQUE (event_id);
+
+
+
+ALTER TABLE ONLY remote_media_cache
+ ADD CONSTRAINT remote_media_cache_media_origin_media_id_key UNIQUE (media_origin, media_id);
+
+
+
+ALTER TABLE ONLY remote_media_cache_thumbnails
+ ADD CONSTRAINT remote_media_cache_thumbnails_media_origin_media_id_thumbna_key UNIQUE (media_origin, media_id, thumbnail_width, thumbnail_height, thumbnail_type);
+
+
+
+ALTER TABLE ONLY room_account_data
+ ADD CONSTRAINT room_account_data_uniqueness UNIQUE (user_id, room_id, account_data_type);
+
+
+
+ALTER TABLE ONLY room_aliases
+ ADD CONSTRAINT room_aliases_room_alias_key UNIQUE (room_alias);
+
+
+
+ALTER TABLE ONLY room_depth
+ ADD CONSTRAINT room_depth_room_id_key UNIQUE (room_id);
+
+
+
+ALTER TABLE ONLY room_memberships
+ ADD CONSTRAINT room_memberships_event_id_key UNIQUE (event_id);
+
+
+
+ALTER TABLE ONLY room_names
+ ADD CONSTRAINT room_names_event_id_key UNIQUE (event_id);
+
+
+
+ALTER TABLE ONLY room_tags_revisions
+ ADD CONSTRAINT room_tag_revisions_uniqueness UNIQUE (user_id, room_id);
+
+
+
+ALTER TABLE ONLY room_tags
+ ADD CONSTRAINT room_tag_uniqueness UNIQUE (user_id, room_id, tag);
+
+
+
+ALTER TABLE ONLY rooms
+ ADD CONSTRAINT rooms_pkey PRIMARY KEY (room_id);
+
+
+
+ALTER TABLE ONLY server_keys_json
+ ADD CONSTRAINT server_keys_json_uniqueness UNIQUE (server_name, key_id, from_server);
+
+
+
+ALTER TABLE ONLY server_signature_keys
+ ADD CONSTRAINT server_signature_keys_server_name_key_id_key UNIQUE (server_name, key_id);
+
+
+
+ALTER TABLE ONLY state_events
+ ADD CONSTRAINT state_events_event_id_key UNIQUE (event_id);
+
+
+ALTER TABLE ONLY stats_stream_pos
+ ADD CONSTRAINT stats_stream_pos_lock_key UNIQUE (lock);
+
+
+
+ALTER TABLE ONLY topics
+ ADD CONSTRAINT topics_event_id_key UNIQUE (event_id);
+
+
+
+ALTER TABLE ONLY user_directory_stream_pos
+ ADD CONSTRAINT user_directory_stream_pos_lock_key UNIQUE (lock);
+
+
+
+ALTER TABLE ONLY users
+ ADD CONSTRAINT users_name_key UNIQUE (name);
+
+
+
+CREATE INDEX access_tokens_device_id ON access_tokens USING btree (user_id, device_id);
+
+
+
+CREATE INDEX account_data_stream_id ON account_data USING btree (user_id, stream_id);
+
+
+
+CREATE INDEX application_services_txns_id ON application_services_txns USING btree (as_id);
+
+
+
+CREATE UNIQUE INDEX appservice_room_list_idx ON appservice_room_list USING btree (appservice_id, network_id, room_id);
+
+
+
+CREATE UNIQUE INDEX blocked_rooms_idx ON blocked_rooms USING btree (room_id);
+
+
+
+CREATE INDEX cache_invalidation_stream_id ON cache_invalidation_stream USING btree (stream_id);
+
+
+
+CREATE INDEX current_state_delta_stream_idx ON current_state_delta_stream USING btree (stream_id);
+
+
+
+CREATE INDEX current_state_events_member_index ON current_state_events USING btree (state_key) WHERE (type = 'm.room.member'::text);
+
+
+
+CREATE INDEX deleted_pushers_stream_id ON deleted_pushers USING btree (stream_id);
+
+
+
+CREATE INDEX device_federation_inbox_sender_id ON device_federation_inbox USING btree (origin, message_id);
+
+
+
+CREATE INDEX device_federation_outbox_destination_id ON device_federation_outbox USING btree (destination, stream_id);
+
+
+
+CREATE INDEX device_federation_outbox_id ON device_federation_outbox USING btree (stream_id);
+
+
+
+CREATE INDEX device_inbox_stream_id_user_id ON device_inbox USING btree (stream_id, user_id);
+
+
+
+CREATE INDEX device_inbox_user_stream_id ON device_inbox USING btree (user_id, device_id, stream_id);
+
+
+
+CREATE INDEX device_lists_outbound_last_success_idx ON device_lists_outbound_last_success USING btree (destination, user_id, stream_id);
+
+
+
+CREATE INDEX device_lists_outbound_pokes_id ON device_lists_outbound_pokes USING btree (destination, stream_id);
+
+
+
+CREATE INDEX device_lists_outbound_pokes_stream ON device_lists_outbound_pokes USING btree (stream_id);
+
+
+
+CREATE INDEX device_lists_outbound_pokes_user ON device_lists_outbound_pokes USING btree (destination, user_id);
+
+
+
+CREATE UNIQUE INDEX device_lists_remote_cache_unique_id ON device_lists_remote_cache USING btree (user_id, device_id);
+
+
+
+CREATE UNIQUE INDEX device_lists_remote_extremeties_unique_idx ON device_lists_remote_extremeties USING btree (user_id);
+
+
+
+CREATE INDEX device_lists_stream_id ON device_lists_stream USING btree (stream_id, user_id);
+
+
+
+CREATE INDEX device_lists_stream_user_id ON device_lists_stream USING btree (user_id, device_id);
+
+
+
+CREATE UNIQUE INDEX e2e_room_keys_idx ON e2e_room_keys USING btree (user_id, room_id, session_id);
+
+
+
+CREATE UNIQUE INDEX e2e_room_keys_versions_idx ON e2e_room_keys_versions USING btree (user_id, version);
+
+
+
+CREATE UNIQUE INDEX erased_users_user ON erased_users USING btree (user_id);
+
+
+
+CREATE INDEX ev_b_extrem_id ON event_backward_extremities USING btree (event_id);
+
+
+
+CREATE INDEX ev_b_extrem_room ON event_backward_extremities USING btree (room_id);
+
+
+
+CREATE INDEX ev_edges_id ON event_edges USING btree (event_id);
+
+
+
+CREATE INDEX ev_edges_prev_id ON event_edges USING btree (prev_event_id);
+
+
+
+CREATE INDEX ev_extrem_id ON event_forward_extremities USING btree (event_id);
+
+
+
+CREATE INDEX ev_extrem_room ON event_forward_extremities USING btree (room_id);
+
+
+
+CREATE INDEX evauth_edges_id ON event_auth USING btree (event_id);
+
+
+
+CREATE INDEX event_contains_url_index ON events USING btree (room_id, topological_ordering, stream_ordering) WHERE ((contains_url = true) AND (outlier = false));
+
+
+
+CREATE INDEX event_json_room_id ON event_json USING btree (room_id);
+
+
+
+CREATE INDEX event_push_actions_highlights_index ON event_push_actions USING btree (user_id, room_id, topological_ordering, stream_ordering) WHERE (highlight = 1);
+
+
+
+CREATE INDEX event_push_actions_rm_tokens ON event_push_actions USING btree (user_id, room_id, topological_ordering, stream_ordering);
+
+
+
+CREATE INDEX event_push_actions_room_id_user_id ON event_push_actions USING btree (room_id, user_id);
+
+
+
+CREATE INDEX event_push_actions_staging_id ON event_push_actions_staging USING btree (event_id);
+
+
+
+CREATE INDEX event_push_actions_stream_ordering ON event_push_actions USING btree (stream_ordering, user_id);
+
+
+
+CREATE INDEX event_push_actions_u_highlight ON event_push_actions USING btree (user_id, stream_ordering);
+
+
+
+CREATE INDEX event_push_summary_user_rm ON event_push_summary USING btree (user_id, room_id);
+
+
+
+CREATE INDEX event_reference_hashes_id ON event_reference_hashes USING btree (event_id);
+
+
+
+CREATE UNIQUE INDEX event_relations_id ON event_relations USING btree (event_id);
+
+
+
+CREATE INDEX event_relations_relates ON event_relations USING btree (relates_to_id, relation_type, aggregation_key);
+
+
+
+CREATE INDEX event_search_ev_ridx ON event_search USING btree (room_id);
+
+
+
+CREATE UNIQUE INDEX event_search_event_id_idx ON event_search USING btree (event_id);
+
+
+
+CREATE INDEX event_search_fts_idx ON event_search USING gin (vector);
+
+
+
+CREATE INDEX event_to_state_groups_sg_index ON event_to_state_groups USING btree (state_group);
+
+
+
+CREATE INDEX events_order_room ON events USING btree (room_id, topological_ordering, stream_ordering);
+
+
+
+CREATE INDEX events_room_stream ON events USING btree (room_id, stream_ordering);
+
+
+
+CREATE INDEX events_ts ON events USING btree (origin_server_ts, stream_ordering);
+
+
+
+CREATE INDEX group_attestations_remote_g_idx ON group_attestations_remote USING btree (group_id, user_id);
+
+
+
+CREATE INDEX group_attestations_remote_u_idx ON group_attestations_remote USING btree (user_id);
+
+
+
+CREATE INDEX group_attestations_remote_v_idx ON group_attestations_remote USING btree (valid_until_ms);
+
+
+
+CREATE INDEX group_attestations_renewals_g_idx ON group_attestations_renewals USING btree (group_id, user_id);
+
+
+
+CREATE INDEX group_attestations_renewals_u_idx ON group_attestations_renewals USING btree (user_id);
+
+
+
+CREATE INDEX group_attestations_renewals_v_idx ON group_attestations_renewals USING btree (valid_until_ms);
+
+
+
+CREATE UNIQUE INDEX group_invites_g_idx ON group_invites USING btree (group_id, user_id);
+
+
+
+CREATE INDEX group_invites_u_idx ON group_invites USING btree (user_id);
+
+
+
+CREATE UNIQUE INDEX group_rooms_g_idx ON group_rooms USING btree (group_id, room_id);
+
+
+
+CREATE INDEX group_rooms_r_idx ON group_rooms USING btree (room_id);
+
+
+
+CREATE UNIQUE INDEX group_summary_rooms_g_idx ON group_summary_rooms USING btree (group_id, room_id, category_id);
+
+
+
+CREATE INDEX group_summary_users_g_idx ON group_summary_users USING btree (group_id);
+
+
+
+CREATE UNIQUE INDEX group_users_g_idx ON group_users USING btree (group_id, user_id);
+
+
+
+CREATE INDEX group_users_u_idx ON group_users USING btree (user_id);
+
+
+
+CREATE UNIQUE INDEX groups_idx ON groups USING btree (group_id);
+
+
+
+CREATE INDEX local_group_membership_g_idx ON local_group_membership USING btree (group_id);
+
+
+
+CREATE INDEX local_group_membership_u_idx ON local_group_membership USING btree (user_id, group_id);
+
+
+
+CREATE INDEX local_invites_for_user_idx ON local_invites USING btree (invitee, locally_rejected, replaced_by, room_id);
+
+
+
+CREATE INDEX local_invites_id ON local_invites USING btree (stream_id);
+
+
+
+CREATE INDEX local_media_repository_thumbnails_media_id ON local_media_repository_thumbnails USING btree (media_id);
+
+
+
+CREATE INDEX local_media_repository_url_cache_by_url_download_ts ON local_media_repository_url_cache USING btree (url, download_ts);
+
+
+
+CREATE INDEX local_media_repository_url_cache_expires_idx ON local_media_repository_url_cache USING btree (expires_ts);
+
+
+
+CREATE INDEX local_media_repository_url_cache_media_idx ON local_media_repository_url_cache USING btree (media_id);
+
+
+
+CREATE INDEX local_media_repository_url_idx ON local_media_repository USING btree (created_ts) WHERE (url_cache IS NOT NULL);
+
+
+
+CREATE INDEX monthly_active_users_time_stamp ON monthly_active_users USING btree ("timestamp");
+
+
+
+CREATE UNIQUE INDEX monthly_active_users_users ON monthly_active_users USING btree (user_id);
+
+
+
+CREATE INDEX open_id_tokens_ts_valid_until_ms ON open_id_tokens USING btree (ts_valid_until_ms);
+
+
+
+CREATE INDEX presence_stream_id ON presence_stream USING btree (stream_id, user_id);
+
+
+
+CREATE INDEX presence_stream_user_id ON presence_stream USING btree (user_id);
+
+
+
+CREATE INDEX public_room_index ON rooms USING btree (is_public);
+
+
+
+CREATE INDEX public_room_list_stream_idx ON public_room_list_stream USING btree (stream_id);
+
+
+
+CREATE INDEX public_room_list_stream_rm_idx ON public_room_list_stream USING btree (room_id, stream_id);
+
+
+
+CREATE INDEX push_rules_enable_user_name ON push_rules_enable USING btree (user_name);
+
+
+
+CREATE INDEX push_rules_stream_id ON push_rules_stream USING btree (stream_id);
+
+
+
+CREATE INDEX push_rules_stream_user_stream_id ON push_rules_stream USING btree (user_id, stream_id);
+
+
+
+CREATE INDEX push_rules_user_name ON push_rules USING btree (user_name);
+
+
+
+CREATE UNIQUE INDEX ratelimit_override_idx ON ratelimit_override USING btree (user_id);
+
+
+
+CREATE INDEX receipts_linearized_id ON receipts_linearized USING btree (stream_id);
+
+
+
+CREATE INDEX receipts_linearized_room_stream ON receipts_linearized USING btree (room_id, stream_id);
+
+
+
+CREATE INDEX receipts_linearized_user ON receipts_linearized USING btree (user_id);
+
+
+
+CREATE INDEX received_transactions_ts ON received_transactions USING btree (ts);
+
+
+
+CREATE INDEX redactions_redacts ON redactions USING btree (redacts);
+
+
+
+CREATE INDEX remote_profile_cache_time ON remote_profile_cache USING btree (last_check);
+
+
+
+CREATE UNIQUE INDEX remote_profile_cache_user_id ON remote_profile_cache USING btree (user_id);
+
+
+
+CREATE INDEX room_account_data_stream_id ON room_account_data USING btree (user_id, stream_id);
+
+
+
+CREATE INDEX room_alias_servers_alias ON room_alias_servers USING btree (room_alias);
+
+
+
+CREATE INDEX room_aliases_id ON room_aliases USING btree (room_id);
+
+
+
+CREATE INDEX room_depth_room ON room_depth USING btree (room_id);
+
+
+
+CREATE INDEX room_memberships_room_id ON room_memberships USING btree (room_id);
+
+
+
+CREATE INDEX room_memberships_user_id ON room_memberships USING btree (user_id);
+
+
+
+CREATE INDEX room_names_room_id ON room_names USING btree (room_id);
+
+
+
+CREATE UNIQUE INDEX room_state_room ON room_state USING btree (room_id);
+
+
+
+CREATE UNIQUE INDEX room_stats_earliest_token_idx ON room_stats_earliest_token USING btree (room_id);
+
+
+
+CREATE UNIQUE INDEX room_stats_room_ts ON room_stats USING btree (room_id, ts);
+
+
+
+CREATE INDEX stream_ordering_to_exterm_idx ON stream_ordering_to_exterm USING btree (stream_ordering);
+
+
+
+CREATE INDEX stream_ordering_to_exterm_rm_idx ON stream_ordering_to_exterm USING btree (room_id, stream_ordering);
+
+
+
+CREATE UNIQUE INDEX threepid_guest_access_tokens_index ON threepid_guest_access_tokens USING btree (medium, address);
+
+
+
+CREATE INDEX topics_room_id ON topics USING btree (room_id);
+
+
+
+CREATE INDEX user_daily_visits_ts_idx ON user_daily_visits USING btree ("timestamp");
+
+
+
+CREATE INDEX user_daily_visits_uts_idx ON user_daily_visits USING btree (user_id, "timestamp");
+
+
+
+CREATE INDEX user_directory_room_idx ON user_directory USING btree (room_id);
+
+
+
+CREATE INDEX user_directory_search_fts_idx ON user_directory_search USING gin (vector);
+
+
+
+CREATE UNIQUE INDEX user_directory_search_user_idx ON user_directory_search USING btree (user_id);
+
+
+
+CREATE UNIQUE INDEX user_directory_user_idx ON user_directory USING btree (user_id);
+
+
+
+CREATE INDEX user_filters_by_user_id_filter_id ON user_filters USING btree (user_id, filter_id);
+
+
+
+CREATE INDEX user_ips_device_id ON user_ips USING btree (user_id, device_id, last_seen);
+
+
+
+CREATE INDEX user_ips_last_seen ON user_ips USING btree (user_id, last_seen);
+
+
+
+CREATE INDEX user_ips_last_seen_only ON user_ips USING btree (last_seen);
+
+
+
+CREATE UNIQUE INDEX user_ips_user_token_ip_unique_index ON user_ips USING btree (user_id, access_token, ip);
+
+
+
+CREATE UNIQUE INDEX user_stats_user_ts ON user_stats USING btree (user_id, ts);
+
+
+
+CREATE UNIQUE INDEX user_threepid_id_server_idx ON user_threepid_id_server USING btree (user_id, medium, address, id_server);
+
+
+
+CREATE INDEX user_threepids_medium_address ON user_threepids USING btree (medium, address);
+
+
+
+CREATE INDEX user_threepids_user_id ON user_threepids USING btree (user_id);
+
+
+
+CREATE INDEX users_creation_ts ON users USING btree (creation_ts);
+
+
+
+CREATE UNIQUE INDEX users_in_public_rooms_u_idx ON users_in_public_rooms USING btree (user_id, room_id);
+
+
+
+CREATE INDEX users_who_share_private_rooms_o_idx ON users_who_share_private_rooms USING btree (other_user_id);
+
+
+
+CREATE INDEX users_who_share_private_rooms_r_idx ON users_who_share_private_rooms USING btree (room_id);
+
+
+
+CREATE UNIQUE INDEX users_who_share_private_rooms_u_idx ON users_who_share_private_rooms USING btree (user_id, other_user_id, room_id);
diff --git a/synapse/storage/databases/main/schema/full_schemas/54/full.sql.sqlite b/synapse/storage/databases/main/schema/full_schemas/54/full.sql.sqlite
new file mode 100644
index 00000000..a0411ede
--- /dev/null
+++ b/synapse/storage/databases/main/schema/full_schemas/54/full.sql.sqlite
@@ -0,0 +1,253 @@
+CREATE TABLE application_services_state( as_id TEXT PRIMARY KEY, state VARCHAR(5), last_txn INTEGER );
+CREATE TABLE application_services_txns( as_id TEXT NOT NULL, txn_id INTEGER NOT NULL, event_ids TEXT NOT NULL, UNIQUE(as_id, txn_id) );
+CREATE INDEX application_services_txns_id ON application_services_txns ( as_id );
+CREATE TABLE presence( user_id TEXT NOT NULL, state VARCHAR(20), status_msg TEXT, mtime BIGINT, UNIQUE (user_id) );
+CREATE TABLE presence_allow_inbound( observed_user_id TEXT NOT NULL, observer_user_id TEXT NOT NULL, UNIQUE (observed_user_id, observer_user_id) );
+CREATE TABLE users( name TEXT, password_hash TEXT, creation_ts BIGINT, admin SMALLINT DEFAULT 0 NOT NULL, upgrade_ts BIGINT, is_guest SMALLINT DEFAULT 0 NOT NULL, appservice_id TEXT, consent_version TEXT, consent_server_notice_sent TEXT, user_type TEXT DEFAULT NULL, UNIQUE(name) );
+CREATE TABLE access_tokens( id BIGINT PRIMARY KEY, user_id TEXT NOT NULL, device_id TEXT, token TEXT NOT NULL, last_used BIGINT, UNIQUE(token) );
+CREATE TABLE user_ips ( user_id TEXT NOT NULL, access_token TEXT NOT NULL, device_id TEXT, ip TEXT NOT NULL, user_agent TEXT NOT NULL, last_seen BIGINT NOT NULL );
+CREATE TABLE profiles( user_id TEXT NOT NULL, displayname TEXT, avatar_url TEXT, UNIQUE(user_id) );
+CREATE TABLE received_transactions( transaction_id TEXT, origin TEXT, ts BIGINT, response_code INTEGER, response_json bytea, has_been_referenced smallint default 0, UNIQUE (transaction_id, origin) );
+CREATE TABLE destinations( destination TEXT PRIMARY KEY, retry_last_ts BIGINT, retry_interval INTEGER );
+CREATE TABLE events( stream_ordering INTEGER PRIMARY KEY, topological_ordering BIGINT NOT NULL, event_id TEXT NOT NULL, type TEXT NOT NULL, room_id TEXT NOT NULL, content TEXT, unrecognized_keys TEXT, processed BOOL NOT NULL, outlier BOOL NOT NULL, depth BIGINT DEFAULT 0 NOT NULL, origin_server_ts BIGINT, received_ts BIGINT, sender TEXT, contains_url BOOLEAN, UNIQUE (event_id) );
+CREATE INDEX events_order_room ON events ( room_id, topological_ordering, stream_ordering );
+CREATE TABLE event_json( event_id TEXT NOT NULL, room_id TEXT NOT NULL, internal_metadata TEXT NOT NULL, json TEXT NOT NULL, format_version INTEGER, UNIQUE (event_id) );
+CREATE INDEX event_json_room_id ON event_json(room_id);
+CREATE TABLE state_events( event_id TEXT NOT NULL, room_id TEXT NOT NULL, type TEXT NOT NULL, state_key TEXT NOT NULL, prev_state TEXT, UNIQUE (event_id) );
+CREATE TABLE current_state_events( event_id TEXT NOT NULL, room_id TEXT NOT NULL, type TEXT NOT NULL, state_key TEXT NOT NULL, UNIQUE (event_id), UNIQUE (room_id, type, state_key) );
+CREATE TABLE room_memberships( event_id TEXT NOT NULL, user_id TEXT NOT NULL, sender TEXT NOT NULL, room_id TEXT NOT NULL, membership TEXT NOT NULL, forgotten INTEGER DEFAULT 0, display_name TEXT, avatar_url TEXT, UNIQUE (event_id) );
+CREATE INDEX room_memberships_room_id ON room_memberships (room_id);
+CREATE INDEX room_memberships_user_id ON room_memberships (user_id);
+CREATE TABLE topics( event_id TEXT NOT NULL, room_id TEXT NOT NULL, topic TEXT NOT NULL, UNIQUE (event_id) );
+CREATE INDEX topics_room_id ON topics(room_id);
+CREATE TABLE room_names( event_id TEXT NOT NULL, room_id TEXT NOT NULL, name TEXT NOT NULL, UNIQUE (event_id) );
+CREATE INDEX room_names_room_id ON room_names(room_id);
+CREATE TABLE rooms( room_id TEXT PRIMARY KEY NOT NULL, is_public BOOL, creator TEXT );
+CREATE TABLE server_signature_keys( server_name TEXT, key_id TEXT, from_server TEXT, ts_added_ms BIGINT, verify_key bytea, ts_valid_until_ms BIGINT, UNIQUE (server_name, key_id) );
+CREATE TABLE rejections( event_id TEXT NOT NULL, reason TEXT NOT NULL, last_check TEXT NOT NULL, UNIQUE (event_id) );
+CREATE TABLE push_rules ( id BIGINT PRIMARY KEY, user_name TEXT NOT NULL, rule_id TEXT NOT NULL, priority_class SMALLINT NOT NULL, priority INTEGER NOT NULL DEFAULT 0, conditions TEXT NOT NULL, actions TEXT NOT NULL, UNIQUE(user_name, rule_id) );
+CREATE INDEX push_rules_user_name on push_rules (user_name);
+CREATE TABLE user_filters( user_id TEXT, filter_id BIGINT, filter_json bytea );
+CREATE INDEX user_filters_by_user_id_filter_id ON user_filters( user_id, filter_id );
+CREATE TABLE push_rules_enable ( id BIGINT PRIMARY KEY, user_name TEXT NOT NULL, rule_id TEXT NOT NULL, enabled SMALLINT, UNIQUE(user_name, rule_id) );
+CREATE INDEX push_rules_enable_user_name on push_rules_enable (user_name);
+CREATE TABLE event_forward_extremities( event_id TEXT NOT NULL, room_id TEXT NOT NULL, UNIQUE (event_id, room_id) );
+CREATE INDEX ev_extrem_room ON event_forward_extremities(room_id);
+CREATE INDEX ev_extrem_id ON event_forward_extremities(event_id);
+CREATE TABLE event_backward_extremities( event_id TEXT NOT NULL, room_id TEXT NOT NULL, UNIQUE (event_id, room_id) );
+CREATE INDEX ev_b_extrem_room ON event_backward_extremities(room_id);
+CREATE INDEX ev_b_extrem_id ON event_backward_extremities(event_id);
+CREATE TABLE event_edges( event_id TEXT NOT NULL, prev_event_id TEXT NOT NULL, room_id TEXT NOT NULL, is_state BOOL NOT NULL, UNIQUE (event_id, prev_event_id, room_id, is_state) );
+CREATE INDEX ev_edges_id ON event_edges(event_id);
+CREATE INDEX ev_edges_prev_id ON event_edges(prev_event_id);
+CREATE TABLE room_depth( room_id TEXT NOT NULL, min_depth INTEGER NOT NULL, UNIQUE (room_id) );
+CREATE INDEX room_depth_room ON room_depth(room_id);
+CREATE TABLE event_to_state_groups( event_id TEXT NOT NULL, state_group BIGINT NOT NULL, UNIQUE (event_id) );
+CREATE TABLE local_media_repository ( media_id TEXT, media_type TEXT, media_length INTEGER, created_ts BIGINT, upload_name TEXT, user_id TEXT, quarantined_by TEXT, url_cache TEXT, last_access_ts BIGINT, UNIQUE (media_id) );
+CREATE TABLE local_media_repository_thumbnails ( media_id TEXT, thumbnail_width INTEGER, thumbnail_height INTEGER, thumbnail_type TEXT, thumbnail_method TEXT, thumbnail_length INTEGER, UNIQUE ( media_id, thumbnail_width, thumbnail_height, thumbnail_type ) );
+CREATE INDEX local_media_repository_thumbnails_media_id ON local_media_repository_thumbnails (media_id);
+CREATE TABLE remote_media_cache ( media_origin TEXT, media_id TEXT, media_type TEXT, created_ts BIGINT, upload_name TEXT, media_length INTEGER, filesystem_id TEXT, last_access_ts BIGINT, quarantined_by TEXT, UNIQUE (media_origin, media_id) );
+CREATE TABLE remote_media_cache_thumbnails ( media_origin TEXT, media_id TEXT, thumbnail_width INTEGER, thumbnail_height INTEGER, thumbnail_method TEXT, thumbnail_type TEXT, thumbnail_length INTEGER, filesystem_id TEXT, UNIQUE ( media_origin, media_id, thumbnail_width, thumbnail_height, thumbnail_type ) );
+CREATE TABLE redactions ( event_id TEXT NOT NULL, redacts TEXT NOT NULL, UNIQUE (event_id) );
+CREATE INDEX redactions_redacts ON redactions (redacts);
+CREATE TABLE room_aliases( room_alias TEXT NOT NULL, room_id TEXT NOT NULL, creator TEXT, UNIQUE (room_alias) );
+CREATE INDEX room_aliases_id ON room_aliases(room_id);
+CREATE TABLE room_alias_servers( room_alias TEXT NOT NULL, server TEXT NOT NULL );
+CREATE INDEX room_alias_servers_alias ON room_alias_servers(room_alias);
+CREATE TABLE event_reference_hashes ( event_id TEXT, algorithm TEXT, hash bytea, UNIQUE (event_id, algorithm) );
+CREATE INDEX event_reference_hashes_id ON event_reference_hashes(event_id);
+CREATE TABLE IF NOT EXISTS "server_keys_json" ( server_name TEXT NOT NULL, key_id TEXT NOT NULL, from_server TEXT NOT NULL, ts_added_ms BIGINT NOT NULL, ts_valid_until_ms BIGINT NOT NULL, key_json bytea NOT NULL, CONSTRAINT server_keys_json_uniqueness UNIQUE (server_name, key_id, from_server) );
+CREATE TABLE e2e_device_keys_json ( user_id TEXT NOT NULL, device_id TEXT NOT NULL, ts_added_ms BIGINT NOT NULL, key_json TEXT NOT NULL, CONSTRAINT e2e_device_keys_json_uniqueness UNIQUE (user_id, device_id) );
+CREATE TABLE e2e_one_time_keys_json ( user_id TEXT NOT NULL, device_id TEXT NOT NULL, algorithm TEXT NOT NULL, key_id TEXT NOT NULL, ts_added_ms BIGINT NOT NULL, key_json TEXT NOT NULL, CONSTRAINT e2e_one_time_keys_json_uniqueness UNIQUE (user_id, device_id, algorithm, key_id) );
+CREATE TABLE receipts_graph( room_id TEXT NOT NULL, receipt_type TEXT NOT NULL, user_id TEXT NOT NULL, event_ids TEXT NOT NULL, data TEXT NOT NULL, CONSTRAINT receipts_graph_uniqueness UNIQUE (room_id, receipt_type, user_id) );
+CREATE TABLE receipts_linearized ( stream_id BIGINT NOT NULL, room_id TEXT NOT NULL, receipt_type TEXT NOT NULL, user_id TEXT NOT NULL, event_id TEXT NOT NULL, data TEXT NOT NULL, CONSTRAINT receipts_linearized_uniqueness UNIQUE (room_id, receipt_type, user_id) );
+CREATE INDEX receipts_linearized_id ON receipts_linearized( stream_id );
+CREATE INDEX receipts_linearized_room_stream ON receipts_linearized( room_id, stream_id );
+CREATE TABLE IF NOT EXISTS "user_threepids" ( user_id TEXT NOT NULL, medium TEXT NOT NULL, address TEXT NOT NULL, validated_at BIGINT NOT NULL, added_at BIGINT NOT NULL, CONSTRAINT medium_address UNIQUE (medium, address) );
+CREATE INDEX user_threepids_user_id ON user_threepids(user_id);
+CREATE VIRTUAL TABLE event_search USING fts4 ( event_id, room_id, sender, key, value )
+/* event_search(event_id,room_id,sender,"key",value) */;
+CREATE TABLE IF NOT EXISTS 'event_search_content'(docid INTEGER PRIMARY KEY, 'c0event_id', 'c1room_id', 'c2sender', 'c3key', 'c4value');
+CREATE TABLE IF NOT EXISTS 'event_search_segments'(blockid INTEGER PRIMARY KEY, block BLOB);
+CREATE TABLE IF NOT EXISTS 'event_search_segdir'(level INTEGER,idx INTEGER,start_block INTEGER,leaves_end_block INTEGER,end_block INTEGER,root BLOB,PRIMARY KEY(level, idx));
+CREATE TABLE IF NOT EXISTS 'event_search_docsize'(docid INTEGER PRIMARY KEY, size BLOB);
+CREATE TABLE IF NOT EXISTS 'event_search_stat'(id INTEGER PRIMARY KEY, value BLOB);
+CREATE TABLE guest_access( event_id TEXT NOT NULL, room_id TEXT NOT NULL, guest_access TEXT NOT NULL, UNIQUE (event_id) );
+CREATE TABLE history_visibility( event_id TEXT NOT NULL, room_id TEXT NOT NULL, history_visibility TEXT NOT NULL, UNIQUE (event_id) );
+CREATE TABLE room_tags( user_id TEXT NOT NULL, room_id TEXT NOT NULL, tag TEXT NOT NULL, content TEXT NOT NULL, CONSTRAINT room_tag_uniqueness UNIQUE (user_id, room_id, tag) );
+CREATE TABLE room_tags_revisions ( user_id TEXT NOT NULL, room_id TEXT NOT NULL, stream_id BIGINT NOT NULL, CONSTRAINT room_tag_revisions_uniqueness UNIQUE (user_id, room_id) );
+CREATE TABLE IF NOT EXISTS "account_data_max_stream_id"( Lock CHAR(1) NOT NULL DEFAULT 'X' UNIQUE, stream_id BIGINT NOT NULL, CHECK (Lock='X') );
+CREATE TABLE account_data( user_id TEXT NOT NULL, account_data_type TEXT NOT NULL, stream_id BIGINT NOT NULL, content TEXT NOT NULL, CONSTRAINT account_data_uniqueness UNIQUE (user_id, account_data_type) );
+CREATE TABLE room_account_data( user_id TEXT NOT NULL, room_id TEXT NOT NULL, account_data_type TEXT NOT NULL, stream_id BIGINT NOT NULL, content TEXT NOT NULL, CONSTRAINT room_account_data_uniqueness UNIQUE (user_id, room_id, account_data_type) );
+CREATE INDEX account_data_stream_id on account_data(user_id, stream_id);
+CREATE INDEX room_account_data_stream_id on room_account_data(user_id, stream_id);
+CREATE INDEX events_ts ON events(origin_server_ts, stream_ordering);
+CREATE TABLE event_push_actions( room_id TEXT NOT NULL, event_id TEXT NOT NULL, user_id TEXT NOT NULL, profile_tag VARCHAR(32), actions TEXT NOT NULL, topological_ordering BIGINT, stream_ordering BIGINT, notif SMALLINT, highlight SMALLINT, CONSTRAINT event_id_user_id_profile_tag_uniqueness UNIQUE (room_id, event_id, user_id, profile_tag) );
+CREATE INDEX event_push_actions_room_id_user_id on event_push_actions(room_id, user_id);
+CREATE INDEX events_room_stream on events(room_id, stream_ordering);
+CREATE INDEX public_room_index on rooms(is_public);
+CREATE INDEX receipts_linearized_user ON receipts_linearized( user_id );
+CREATE INDEX event_push_actions_rm_tokens on event_push_actions( user_id, room_id, topological_ordering, stream_ordering );
+CREATE TABLE presence_stream( stream_id BIGINT, user_id TEXT, state TEXT, last_active_ts BIGINT, last_federation_update_ts BIGINT, last_user_sync_ts BIGINT, status_msg TEXT, currently_active BOOLEAN );
+CREATE INDEX presence_stream_id ON presence_stream(stream_id, user_id);
+CREATE INDEX presence_stream_user_id ON presence_stream(user_id);
+CREATE TABLE push_rules_stream( stream_id BIGINT NOT NULL, event_stream_ordering BIGINT NOT NULL, user_id TEXT NOT NULL, rule_id TEXT NOT NULL, op TEXT NOT NULL, priority_class SMALLINT, priority INTEGER, conditions TEXT, actions TEXT );
+CREATE INDEX push_rules_stream_id ON push_rules_stream(stream_id);
+CREATE INDEX push_rules_stream_user_stream_id on push_rules_stream(user_id, stream_id);
+CREATE TABLE ex_outlier_stream( event_stream_ordering BIGINT PRIMARY KEY NOT NULL, event_id TEXT NOT NULL, state_group BIGINT NOT NULL );
+CREATE TABLE threepid_guest_access_tokens( medium TEXT, address TEXT, guest_access_token TEXT, first_inviter TEXT );
+CREATE UNIQUE INDEX threepid_guest_access_tokens_index ON threepid_guest_access_tokens(medium, address);
+CREATE TABLE local_invites( stream_id BIGINT NOT NULL, inviter TEXT NOT NULL, invitee TEXT NOT NULL, event_id TEXT NOT NULL, room_id TEXT NOT NULL, locally_rejected TEXT, replaced_by TEXT );
+CREATE INDEX local_invites_id ON local_invites(stream_id);
+CREATE INDEX local_invites_for_user_idx ON local_invites(invitee, locally_rejected, replaced_by, room_id);
+CREATE INDEX event_push_actions_stream_ordering on event_push_actions( stream_ordering, user_id );
+CREATE TABLE open_id_tokens ( token TEXT NOT NULL PRIMARY KEY, ts_valid_until_ms bigint NOT NULL, user_id TEXT NOT NULL, UNIQUE (token) );
+CREATE INDEX open_id_tokens_ts_valid_until_ms ON open_id_tokens(ts_valid_until_ms);
+CREATE TABLE pusher_throttle( pusher BIGINT NOT NULL, room_id TEXT NOT NULL, last_sent_ts BIGINT, throttle_ms BIGINT, PRIMARY KEY (pusher, room_id) );
+CREATE TABLE event_reports( id BIGINT NOT NULL PRIMARY KEY, received_ts BIGINT NOT NULL, room_id TEXT NOT NULL, event_id TEXT NOT NULL, user_id TEXT NOT NULL, reason TEXT, content TEXT );
+CREATE TABLE devices ( user_id TEXT NOT NULL, device_id TEXT NOT NULL, display_name TEXT, CONSTRAINT device_uniqueness UNIQUE (user_id, device_id) );
+CREATE TABLE appservice_stream_position( Lock CHAR(1) NOT NULL DEFAULT 'X' UNIQUE, stream_ordering BIGINT, CHECK (Lock='X') );
+CREATE TABLE device_inbox ( user_id TEXT NOT NULL, device_id TEXT NOT NULL, stream_id BIGINT NOT NULL, message_json TEXT NOT NULL );
+CREATE INDEX device_inbox_user_stream_id ON device_inbox(user_id, device_id, stream_id);
+CREATE INDEX received_transactions_ts ON received_transactions(ts);
+CREATE TABLE device_federation_outbox ( destination TEXT NOT NULL, stream_id BIGINT NOT NULL, queued_ts BIGINT NOT NULL, messages_json TEXT NOT NULL );
+CREATE INDEX device_federation_outbox_destination_id ON device_federation_outbox(destination, stream_id);
+CREATE TABLE device_federation_inbox ( origin TEXT NOT NULL, message_id TEXT NOT NULL, received_ts BIGINT NOT NULL );
+CREATE INDEX device_federation_inbox_sender_id ON device_federation_inbox(origin, message_id);
+CREATE TABLE device_max_stream_id ( stream_id BIGINT NOT NULL );
+CREATE TABLE public_room_list_stream ( stream_id BIGINT NOT NULL, room_id TEXT NOT NULL, visibility BOOLEAN NOT NULL , appservice_id TEXT, network_id TEXT);
+CREATE INDEX public_room_list_stream_idx on public_room_list_stream( stream_id );
+CREATE INDEX public_room_list_stream_rm_idx on public_room_list_stream( room_id, stream_id );
+CREATE TABLE stream_ordering_to_exterm ( stream_ordering BIGINT NOT NULL, room_id TEXT NOT NULL, event_id TEXT NOT NULL );
+CREATE INDEX stream_ordering_to_exterm_idx on stream_ordering_to_exterm( stream_ordering );
+CREATE INDEX stream_ordering_to_exterm_rm_idx on stream_ordering_to_exterm( room_id, stream_ordering );
+CREATE TABLE IF NOT EXISTS "event_auth"( event_id TEXT NOT NULL, auth_id TEXT NOT NULL, room_id TEXT NOT NULL );
+CREATE INDEX evauth_edges_id ON event_auth(event_id);
+CREATE INDEX user_threepids_medium_address on user_threepids (medium, address);
+CREATE TABLE appservice_room_list( appservice_id TEXT NOT NULL, network_id TEXT NOT NULL, room_id TEXT NOT NULL );
+CREATE UNIQUE INDEX appservice_room_list_idx ON appservice_room_list( appservice_id, network_id, room_id );
+CREATE INDEX device_federation_outbox_id ON device_federation_outbox(stream_id);
+CREATE TABLE federation_stream_position( type TEXT NOT NULL, stream_id INTEGER NOT NULL );
+CREATE TABLE device_lists_remote_cache ( user_id TEXT NOT NULL, device_id TEXT NOT NULL, content TEXT NOT NULL );
+CREATE TABLE device_lists_remote_extremeties ( user_id TEXT NOT NULL, stream_id TEXT NOT NULL );
+CREATE TABLE device_lists_stream ( stream_id BIGINT NOT NULL, user_id TEXT NOT NULL, device_id TEXT NOT NULL );
+CREATE INDEX device_lists_stream_id ON device_lists_stream(stream_id, user_id);
+CREATE TABLE device_lists_outbound_pokes ( destination TEXT NOT NULL, stream_id BIGINT NOT NULL, user_id TEXT NOT NULL, device_id TEXT NOT NULL, sent BOOLEAN NOT NULL, ts BIGINT NOT NULL );
+CREATE INDEX device_lists_outbound_pokes_id ON device_lists_outbound_pokes(destination, stream_id);
+CREATE INDEX device_lists_outbound_pokes_user ON device_lists_outbound_pokes(destination, user_id);
+CREATE TABLE event_push_summary ( user_id TEXT NOT NULL, room_id TEXT NOT NULL, notif_count BIGINT NOT NULL, stream_ordering BIGINT NOT NULL );
+CREATE INDEX event_push_summary_user_rm ON event_push_summary(user_id, room_id);
+CREATE TABLE event_push_summary_stream_ordering ( Lock CHAR(1) NOT NULL DEFAULT 'X' UNIQUE, stream_ordering BIGINT NOT NULL, CHECK (Lock='X') );
+CREATE TABLE IF NOT EXISTS "pushers" ( id BIGINT PRIMARY KEY, user_name TEXT NOT NULL, access_token BIGINT DEFAULT NULL, profile_tag TEXT NOT NULL, kind TEXT NOT NULL, app_id TEXT NOT NULL, app_display_name TEXT NOT NULL, device_display_name TEXT NOT NULL, pushkey TEXT NOT NULL, ts BIGINT NOT NULL, lang TEXT, data TEXT, last_stream_ordering INTEGER, last_success BIGINT, failing_since BIGINT, UNIQUE (app_id, pushkey, user_name) );
+CREATE INDEX device_lists_outbound_pokes_stream ON device_lists_outbound_pokes(stream_id);
+CREATE TABLE ratelimit_override ( user_id TEXT NOT NULL, messages_per_second BIGINT, burst_count BIGINT );
+CREATE UNIQUE INDEX ratelimit_override_idx ON ratelimit_override(user_id);
+CREATE TABLE current_state_delta_stream ( stream_id BIGINT NOT NULL, room_id TEXT NOT NULL, type TEXT NOT NULL, state_key TEXT NOT NULL, event_id TEXT, prev_event_id TEXT );
+CREATE INDEX current_state_delta_stream_idx ON current_state_delta_stream(stream_id);
+CREATE TABLE device_lists_outbound_last_success ( destination TEXT NOT NULL, user_id TEXT NOT NULL, stream_id BIGINT NOT NULL );
+CREATE INDEX device_lists_outbound_last_success_idx ON device_lists_outbound_last_success( destination, user_id, stream_id );
+CREATE TABLE user_directory_stream_pos ( Lock CHAR(1) NOT NULL DEFAULT 'X' UNIQUE, stream_id BIGINT, CHECK (Lock='X') );
+CREATE VIRTUAL TABLE user_directory_search USING fts4 ( user_id, value )
+/* user_directory_search(user_id,value) */;
+CREATE TABLE IF NOT EXISTS 'user_directory_search_content'(docid INTEGER PRIMARY KEY, 'c0user_id', 'c1value');
+CREATE TABLE IF NOT EXISTS 'user_directory_search_segments'(blockid INTEGER PRIMARY KEY, block BLOB);
+CREATE TABLE IF NOT EXISTS 'user_directory_search_segdir'(level INTEGER,idx INTEGER,start_block INTEGER,leaves_end_block INTEGER,end_block INTEGER,root BLOB,PRIMARY KEY(level, idx));
+CREATE TABLE IF NOT EXISTS 'user_directory_search_docsize'(docid INTEGER PRIMARY KEY, size BLOB);
+CREATE TABLE IF NOT EXISTS 'user_directory_search_stat'(id INTEGER PRIMARY KEY, value BLOB);
+CREATE TABLE blocked_rooms ( room_id TEXT NOT NULL, user_id TEXT NOT NULL );
+CREATE UNIQUE INDEX blocked_rooms_idx ON blocked_rooms(room_id);
+CREATE TABLE IF NOT EXISTS "local_media_repository_url_cache"( url TEXT, response_code INTEGER, etag TEXT, expires_ts BIGINT, og TEXT, media_id TEXT, download_ts BIGINT );
+CREATE INDEX local_media_repository_url_cache_expires_idx ON local_media_repository_url_cache(expires_ts);
+CREATE INDEX local_media_repository_url_cache_by_url_download_ts ON local_media_repository_url_cache(url, download_ts);
+CREATE INDEX local_media_repository_url_cache_media_idx ON local_media_repository_url_cache(media_id);
+CREATE TABLE group_users ( group_id TEXT NOT NULL, user_id TEXT NOT NULL, is_admin BOOLEAN NOT NULL, is_public BOOLEAN NOT NULL );
+CREATE TABLE group_invites ( group_id TEXT NOT NULL, user_id TEXT NOT NULL );
+CREATE TABLE group_rooms ( group_id TEXT NOT NULL, room_id TEXT NOT NULL, is_public BOOLEAN NOT NULL );
+CREATE TABLE group_summary_rooms ( group_id TEXT NOT NULL, room_id TEXT NOT NULL, category_id TEXT NOT NULL, room_order BIGINT NOT NULL, is_public BOOLEAN NOT NULL, UNIQUE (group_id, category_id, room_id, room_order), CHECK (room_order > 0) );
+CREATE UNIQUE INDEX group_summary_rooms_g_idx ON group_summary_rooms(group_id, room_id, category_id);
+CREATE TABLE group_summary_room_categories ( group_id TEXT NOT NULL, category_id TEXT NOT NULL, cat_order BIGINT NOT NULL, UNIQUE (group_id, category_id, cat_order), CHECK (cat_order > 0) );
+CREATE TABLE group_room_categories ( group_id TEXT NOT NULL, category_id TEXT NOT NULL, profile TEXT NOT NULL, is_public BOOLEAN NOT NULL, UNIQUE (group_id, category_id) );
+CREATE TABLE group_summary_users ( group_id TEXT NOT NULL, user_id TEXT NOT NULL, role_id TEXT NOT NULL, user_order BIGINT NOT NULL, is_public BOOLEAN NOT NULL );
+CREATE INDEX group_summary_users_g_idx ON group_summary_users(group_id);
+CREATE TABLE group_summary_roles ( group_id TEXT NOT NULL, role_id TEXT NOT NULL, role_order BIGINT NOT NULL, UNIQUE (group_id, role_id, role_order), CHECK (role_order > 0) );
+CREATE TABLE group_roles ( group_id TEXT NOT NULL, role_id TEXT NOT NULL, profile TEXT NOT NULL, is_public BOOLEAN NOT NULL, UNIQUE (group_id, role_id) );
+CREATE TABLE group_attestations_renewals ( group_id TEXT NOT NULL, user_id TEXT NOT NULL, valid_until_ms BIGINT NOT NULL );
+CREATE INDEX group_attestations_renewals_g_idx ON group_attestations_renewals(group_id, user_id);
+CREATE INDEX group_attestations_renewals_u_idx ON group_attestations_renewals(user_id);
+CREATE INDEX group_attestations_renewals_v_idx ON group_attestations_renewals(valid_until_ms);
+CREATE TABLE group_attestations_remote ( group_id TEXT NOT NULL, user_id TEXT NOT NULL, valid_until_ms BIGINT NOT NULL, attestation_json TEXT NOT NULL );
+CREATE INDEX group_attestations_remote_g_idx ON group_attestations_remote(group_id, user_id);
+CREATE INDEX group_attestations_remote_u_idx ON group_attestations_remote(user_id);
+CREATE INDEX group_attestations_remote_v_idx ON group_attestations_remote(valid_until_ms);
+CREATE TABLE local_group_membership ( group_id TEXT NOT NULL, user_id TEXT NOT NULL, is_admin BOOLEAN NOT NULL, membership TEXT NOT NULL, is_publicised BOOLEAN NOT NULL, content TEXT NOT NULL );
+CREATE INDEX local_group_membership_u_idx ON local_group_membership(user_id, group_id);
+CREATE INDEX local_group_membership_g_idx ON local_group_membership(group_id);
+CREATE TABLE local_group_updates ( stream_id BIGINT NOT NULL, group_id TEXT NOT NULL, user_id TEXT NOT NULL, type TEXT NOT NULL, content TEXT NOT NULL );
+CREATE TABLE remote_profile_cache ( user_id TEXT NOT NULL, displayname TEXT, avatar_url TEXT, last_check BIGINT NOT NULL );
+CREATE UNIQUE INDEX remote_profile_cache_user_id ON remote_profile_cache(user_id);
+CREATE INDEX remote_profile_cache_time ON remote_profile_cache(last_check);
+CREATE TABLE IF NOT EXISTS "deleted_pushers" ( stream_id BIGINT NOT NULL, app_id TEXT NOT NULL, pushkey TEXT NOT NULL, user_id TEXT NOT NULL );
+CREATE INDEX deleted_pushers_stream_id ON deleted_pushers (stream_id);
+CREATE TABLE IF NOT EXISTS "groups" ( group_id TEXT NOT NULL, name TEXT, avatar_url TEXT, short_description TEXT, long_description TEXT, is_public BOOL NOT NULL , join_policy TEXT NOT NULL DEFAULT 'invite');
+CREATE UNIQUE INDEX groups_idx ON groups(group_id);
+CREATE TABLE IF NOT EXISTS "user_directory" ( user_id TEXT NOT NULL, room_id TEXT, display_name TEXT, avatar_url TEXT );
+CREATE INDEX user_directory_room_idx ON user_directory(room_id);
+CREATE UNIQUE INDEX user_directory_user_idx ON user_directory(user_id);
+CREATE TABLE event_push_actions_staging ( event_id TEXT NOT NULL, user_id TEXT NOT NULL, actions TEXT NOT NULL, notif SMALLINT NOT NULL, highlight SMALLINT NOT NULL );
+CREATE INDEX event_push_actions_staging_id ON event_push_actions_staging(event_id);
+CREATE TABLE users_pending_deactivation ( user_id TEXT NOT NULL );
+CREATE UNIQUE INDEX group_invites_g_idx ON group_invites(group_id, user_id);
+CREATE UNIQUE INDEX group_users_g_idx ON group_users(group_id, user_id);
+CREATE INDEX group_users_u_idx ON group_users(user_id);
+CREATE INDEX group_invites_u_idx ON group_invites(user_id);
+CREATE UNIQUE INDEX group_rooms_g_idx ON group_rooms(group_id, room_id);
+CREATE INDEX group_rooms_r_idx ON group_rooms(room_id);
+CREATE TABLE user_daily_visits ( user_id TEXT NOT NULL, device_id TEXT, timestamp BIGINT NOT NULL );
+CREATE INDEX user_daily_visits_uts_idx ON user_daily_visits(user_id, timestamp);
+CREATE INDEX user_daily_visits_ts_idx ON user_daily_visits(timestamp);
+CREATE TABLE erased_users ( user_id TEXT NOT NULL );
+CREATE UNIQUE INDEX erased_users_user ON erased_users(user_id);
+CREATE TABLE monthly_active_users ( user_id TEXT NOT NULL, timestamp BIGINT NOT NULL );
+CREATE UNIQUE INDEX monthly_active_users_users ON monthly_active_users(user_id);
+CREATE INDEX monthly_active_users_time_stamp ON monthly_active_users(timestamp);
+CREATE TABLE IF NOT EXISTS "e2e_room_keys_versions" ( user_id TEXT NOT NULL, version BIGINT NOT NULL, algorithm TEXT NOT NULL, auth_data TEXT NOT NULL, deleted SMALLINT DEFAULT 0 NOT NULL );
+CREATE UNIQUE INDEX e2e_room_keys_versions_idx ON e2e_room_keys_versions(user_id, version);
+CREATE TABLE IF NOT EXISTS "e2e_room_keys" ( user_id TEXT NOT NULL, room_id TEXT NOT NULL, session_id TEXT NOT NULL, version BIGINT NOT NULL, first_message_index INT, forwarded_count INT, is_verified BOOLEAN, session_data TEXT NOT NULL );
+CREATE UNIQUE INDEX e2e_room_keys_idx ON e2e_room_keys(user_id, room_id, session_id);
+CREATE TABLE users_who_share_private_rooms ( user_id TEXT NOT NULL, other_user_id TEXT NOT NULL, room_id TEXT NOT NULL );
+CREATE UNIQUE INDEX users_who_share_private_rooms_u_idx ON users_who_share_private_rooms(user_id, other_user_id, room_id);
+CREATE INDEX users_who_share_private_rooms_r_idx ON users_who_share_private_rooms(room_id);
+CREATE INDEX users_who_share_private_rooms_o_idx ON users_who_share_private_rooms(other_user_id);
+CREATE TABLE user_threepid_id_server ( user_id TEXT NOT NULL, medium TEXT NOT NULL, address TEXT NOT NULL, id_server TEXT NOT NULL );
+CREATE UNIQUE INDEX user_threepid_id_server_idx ON user_threepid_id_server( user_id, medium, address, id_server );
+CREATE TABLE users_in_public_rooms ( user_id TEXT NOT NULL, room_id TEXT NOT NULL );
+CREATE UNIQUE INDEX users_in_public_rooms_u_idx ON users_in_public_rooms(user_id, room_id);
+CREATE TABLE account_validity ( user_id TEXT PRIMARY KEY, expiration_ts_ms BIGINT NOT NULL, email_sent BOOLEAN NOT NULL, renewal_token TEXT );
+CREATE TABLE event_relations ( event_id TEXT NOT NULL, relates_to_id TEXT NOT NULL, relation_type TEXT NOT NULL, aggregation_key TEXT );
+CREATE UNIQUE INDEX event_relations_id ON event_relations(event_id);
+CREATE INDEX event_relations_relates ON event_relations(relates_to_id, relation_type, aggregation_key);
+CREATE TABLE stats_stream_pos ( Lock CHAR(1) NOT NULL DEFAULT 'X' UNIQUE, stream_id BIGINT, CHECK (Lock='X') );
+CREATE TABLE user_stats ( user_id TEXT NOT NULL, ts BIGINT NOT NULL, bucket_size INT NOT NULL, public_rooms INT NOT NULL, private_rooms INT NOT NULL );
+CREATE UNIQUE INDEX user_stats_user_ts ON user_stats(user_id, ts);
+CREATE TABLE room_stats ( room_id TEXT NOT NULL, ts BIGINT NOT NULL, bucket_size INT NOT NULL, current_state_events INT NOT NULL, joined_members INT NOT NULL, invited_members INT NOT NULL, left_members INT NOT NULL, banned_members INT NOT NULL, state_events INT NOT NULL );
+CREATE UNIQUE INDEX room_stats_room_ts ON room_stats(room_id, ts);
+CREATE TABLE room_state ( room_id TEXT NOT NULL, join_rules TEXT, history_visibility TEXT, encryption TEXT, name TEXT, topic TEXT, avatar TEXT, canonical_alias TEXT );
+CREATE UNIQUE INDEX room_state_room ON room_state(room_id);
+CREATE TABLE room_stats_earliest_token ( room_id TEXT NOT NULL, token BIGINT NOT NULL );
+CREATE UNIQUE INDEX room_stats_earliest_token_idx ON room_stats_earliest_token(room_id);
+CREATE INDEX access_tokens_device_id ON access_tokens (user_id, device_id);
+CREATE INDEX user_ips_device_id ON user_ips (user_id, device_id, last_seen);
+CREATE INDEX event_contains_url_index ON events (room_id, topological_ordering, stream_ordering);
+CREATE INDEX event_push_actions_u_highlight ON event_push_actions (user_id, stream_ordering);
+CREATE INDEX event_push_actions_highlights_index ON event_push_actions (user_id, room_id, topological_ordering, stream_ordering);
+CREATE INDEX current_state_events_member_index ON current_state_events (state_key);
+CREATE INDEX device_inbox_stream_id_user_id ON device_inbox (stream_id, user_id);
+CREATE INDEX device_lists_stream_user_id ON device_lists_stream (user_id, device_id);
+CREATE INDEX local_media_repository_url_idx ON local_media_repository (created_ts);
+CREATE INDEX user_ips_last_seen ON user_ips (user_id, last_seen);
+CREATE INDEX user_ips_last_seen_only ON user_ips (last_seen);
+CREATE INDEX users_creation_ts ON users (creation_ts);
+CREATE INDEX event_to_state_groups_sg_index ON event_to_state_groups (state_group);
+CREATE UNIQUE INDEX device_lists_remote_cache_unique_id ON device_lists_remote_cache (user_id, device_id);
+CREATE UNIQUE INDEX device_lists_remote_extremeties_unique_idx ON device_lists_remote_extremeties (user_id);
+CREATE UNIQUE INDEX user_ips_user_token_ip_unique_index ON user_ips (user_id, access_token, ip);
diff --git a/synapse/storage/databases/main/schema/full_schemas/54/stream_positions.sql b/synapse/storage/databases/main/schema/full_schemas/54/stream_positions.sql
new file mode 100644
index 00000000..91d21b29
--- /dev/null
+++ b/synapse/storage/databases/main/schema/full_schemas/54/stream_positions.sql
@@ -0,0 +1,8 @@
+
+INSERT INTO appservice_stream_position (stream_ordering) SELECT COALESCE(MAX(stream_ordering), 0) FROM events;
+INSERT INTO federation_stream_position (type, stream_id) VALUES ('federation', -1);
+INSERT INTO federation_stream_position (type, stream_id) SELECT 'events', coalesce(max(stream_ordering), -1) FROM events;
+INSERT INTO user_directory_stream_pos (stream_id) VALUES (0);
+INSERT INTO stats_stream_pos (stream_id) VALUES (0);
+INSERT INTO event_push_summary_stream_ordering (stream_ordering) VALUES (0);
+-- device_max_stream_id is handled separately in 56/device_stream_id_insert.sql \ No newline at end of file
diff --git a/synapse/storage/databases/main/schema/full_schemas/README.md b/synapse/storage/databases/main/schema/full_schemas/README.md
new file mode 100644
index 00000000..c00f2871
--- /dev/null
+++ b/synapse/storage/databases/main/schema/full_schemas/README.md
@@ -0,0 +1,21 @@
+# Synapse Database Schemas
+
+These schemas are used as a basis to create brand new Synapse databases, on both
+SQLite3 and Postgres.
+
+## Building full schema dumps
+
+If you want to recreate these schemas, they need to be made from a database that
+has had all background updates run.
+
+To do so, use `scripts-dev/make_full_schema.sh`. This will produce new
+`full.sql.postgres ` and `full.sql.sqlite` files.
+
+Ensure postgres is installed and your user has the ability to run bash commands
+such as `createdb`, then call
+
+ ./scripts-dev/make_full_schema.sh -p postgres_username -o output_dir/
+
+There are currently two folders with full-schema snapshots. `16` is a snapshot
+from 2015, for historical reference. The other contains the most recent full
+schema snapshot.
diff --git a/synapse/storage/databases/main/search.py b/synapse/storage/databases/main/search.py
new file mode 100644
index 00000000..7f8d1880
--- /dev/null
+++ b/synapse/storage/databases/main/search.py
@@ -0,0 +1,711 @@
+# -*- coding: utf-8 -*-
+# Copyright 2015, 2016 OpenMarket Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import logging
+import re
+from collections import namedtuple
+from typing import List, Optional
+
+from synapse.api.errors import SynapseError
+from synapse.storage._base import SQLBaseStore, db_to_json, make_in_list_sql_clause
+from synapse.storage.database import DatabasePool
+from synapse.storage.databases.main.events_worker import EventRedactBehaviour
+from synapse.storage.engines import PostgresEngine, Sqlite3Engine
+
+logger = logging.getLogger(__name__)
+
+SearchEntry = namedtuple(
+ "SearchEntry",
+ ["key", "value", "event_id", "room_id", "stream_ordering", "origin_server_ts"],
+)
+
+
+class SearchWorkerStore(SQLBaseStore):
+ def store_search_entries_txn(self, txn, entries):
+ """Add entries to the search table
+
+ Args:
+ txn (cursor):
+ entries (iterable[SearchEntry]):
+ entries to be added to the table
+ """
+ if not self.hs.config.enable_search:
+ return
+ if isinstance(self.database_engine, PostgresEngine):
+ sql = (
+ "INSERT INTO event_search"
+ " (event_id, room_id, key, vector, stream_ordering, origin_server_ts)"
+ " VALUES (?,?,?,to_tsvector('english', ?),?,?)"
+ )
+
+ args = (
+ (
+ entry.event_id,
+ entry.room_id,
+ entry.key,
+ entry.value,
+ entry.stream_ordering,
+ entry.origin_server_ts,
+ )
+ for entry in entries
+ )
+
+ txn.executemany(sql, args)
+
+ elif isinstance(self.database_engine, Sqlite3Engine):
+ sql = (
+ "INSERT INTO event_search (event_id, room_id, key, value)"
+ " VALUES (?,?,?,?)"
+ )
+ args = (
+ (entry.event_id, entry.room_id, entry.key, entry.value)
+ for entry in entries
+ )
+
+ txn.executemany(sql, args)
+ else:
+ # This should be unreachable.
+ raise Exception("Unrecognized database engine")
+
+
+class SearchBackgroundUpdateStore(SearchWorkerStore):
+
+ EVENT_SEARCH_UPDATE_NAME = "event_search"
+ EVENT_SEARCH_ORDER_UPDATE_NAME = "event_search_order"
+ EVENT_SEARCH_USE_GIST_POSTGRES_NAME = "event_search_postgres_gist"
+ EVENT_SEARCH_USE_GIN_POSTGRES_NAME = "event_search_postgres_gin"
+
+ def __init__(self, database: DatabasePool, db_conn, hs):
+ super(SearchBackgroundUpdateStore, self).__init__(database, db_conn, hs)
+
+ if not hs.config.enable_search:
+ return
+
+ self.db_pool.updates.register_background_update_handler(
+ self.EVENT_SEARCH_UPDATE_NAME, self._background_reindex_search
+ )
+ self.db_pool.updates.register_background_update_handler(
+ self.EVENT_SEARCH_ORDER_UPDATE_NAME, self._background_reindex_search_order
+ )
+
+ # we used to have a background update to turn the GIN index into a
+ # GIST one; we no longer do that (obviously) because we actually want
+ # a GIN index. However, it's possible that some people might still have
+ # the background update queued, so we register a handler to clear the
+ # background update.
+ self.db_pool.updates.register_noop_background_update(
+ self.EVENT_SEARCH_USE_GIST_POSTGRES_NAME
+ )
+
+ self.db_pool.updates.register_background_update_handler(
+ self.EVENT_SEARCH_USE_GIN_POSTGRES_NAME, self._background_reindex_gin_search
+ )
+
+ async def _background_reindex_search(self, progress, batch_size):
+ # we work through the events table from highest stream id to lowest
+ target_min_stream_id = progress["target_min_stream_id_inclusive"]
+ max_stream_id = progress["max_stream_id_exclusive"]
+ rows_inserted = progress.get("rows_inserted", 0)
+
+ TYPES = ["m.room.name", "m.room.message", "m.room.topic"]
+
+ def reindex_search_txn(txn):
+ sql = (
+ "SELECT stream_ordering, event_id, room_id, type, json, "
+ " origin_server_ts FROM events"
+ " JOIN event_json USING (room_id, event_id)"
+ " WHERE ? <= stream_ordering AND stream_ordering < ?"
+ " AND (%s)"
+ " ORDER BY stream_ordering DESC"
+ " LIMIT ?"
+ ) % (" OR ".join("type = '%s'" % (t,) for t in TYPES),)
+
+ txn.execute(sql, (target_min_stream_id, max_stream_id, batch_size))
+
+ # we could stream straight from the results into
+ # store_search_entries_txn with a generator function, but that
+ # would mean having two cursors open on the database at once.
+ # Instead we just build a list of results.
+ rows = self.db_pool.cursor_to_dict(txn)
+ if not rows:
+ return 0
+
+ min_stream_id = rows[-1]["stream_ordering"]
+
+ event_search_rows = []
+ for row in rows:
+ try:
+ event_id = row["event_id"]
+ room_id = row["room_id"]
+ etype = row["type"]
+ stream_ordering = row["stream_ordering"]
+ origin_server_ts = row["origin_server_ts"]
+ try:
+ event_json = db_to_json(row["json"])
+ content = event_json["content"]
+ except Exception:
+ continue
+
+ if etype == "m.room.message":
+ key = "content.body"
+ value = content["body"]
+ elif etype == "m.room.topic":
+ key = "content.topic"
+ value = content["topic"]
+ elif etype == "m.room.name":
+ key = "content.name"
+ value = content["name"]
+ else:
+ raise Exception("unexpected event type %s" % etype)
+ except (KeyError, AttributeError):
+ # If the event is missing a necessary field then
+ # skip over it.
+ continue
+
+ if not isinstance(value, str):
+ # If the event body, name or topic isn't a string
+ # then skip over it
+ continue
+
+ event_search_rows.append(
+ SearchEntry(
+ key=key,
+ value=value,
+ event_id=event_id,
+ room_id=room_id,
+ stream_ordering=stream_ordering,
+ origin_server_ts=origin_server_ts,
+ )
+ )
+
+ self.store_search_entries_txn(txn, event_search_rows)
+
+ progress = {
+ "target_min_stream_id_inclusive": target_min_stream_id,
+ "max_stream_id_exclusive": min_stream_id,
+ "rows_inserted": rows_inserted + len(event_search_rows),
+ }
+
+ self.db_pool.updates._background_update_progress_txn(
+ txn, self.EVENT_SEARCH_UPDATE_NAME, progress
+ )
+
+ return len(event_search_rows)
+
+ result = await self.db_pool.runInteraction(
+ self.EVENT_SEARCH_UPDATE_NAME, reindex_search_txn
+ )
+
+ if not result:
+ await self.db_pool.updates._end_background_update(
+ self.EVENT_SEARCH_UPDATE_NAME
+ )
+
+ return result
+
+ async def _background_reindex_gin_search(self, progress, batch_size):
+ """This handles old synapses which used GIST indexes, if any;
+ converting them back to be GIN as per the actual schema.
+ """
+
+ def create_index(conn):
+ conn.rollback()
+
+ # we have to set autocommit, because postgres refuses to
+ # CREATE INDEX CONCURRENTLY without it.
+ conn.set_session(autocommit=True)
+
+ try:
+ c = conn.cursor()
+
+ # if we skipped the conversion to GIST, we may already/still
+ # have an event_search_fts_idx; unfortunately postgres 9.4
+ # doesn't support CREATE INDEX IF EXISTS so we just catch the
+ # exception and ignore it.
+ import psycopg2
+
+ try:
+ c.execute(
+ "CREATE INDEX CONCURRENTLY event_search_fts_idx"
+ " ON event_search USING GIN (vector)"
+ )
+ except psycopg2.ProgrammingError as e:
+ logger.warning(
+ "Ignoring error %r when trying to switch from GIST to GIN", e
+ )
+
+ # we should now be able to delete the GIST index.
+ c.execute("DROP INDEX IF EXISTS event_search_fts_idx_gist")
+ finally:
+ conn.set_session(autocommit=False)
+
+ if isinstance(self.database_engine, PostgresEngine):
+ await self.db_pool.runWithConnection(create_index)
+
+ await self.db_pool.updates._end_background_update(
+ self.EVENT_SEARCH_USE_GIN_POSTGRES_NAME
+ )
+ return 1
+
+ async def _background_reindex_search_order(self, progress, batch_size):
+ target_min_stream_id = progress["target_min_stream_id_inclusive"]
+ max_stream_id = progress["max_stream_id_exclusive"]
+ rows_inserted = progress.get("rows_inserted", 0)
+ have_added_index = progress["have_added_indexes"]
+
+ if not have_added_index:
+
+ def create_index(conn):
+ conn.rollback()
+ conn.set_session(autocommit=True)
+ c = conn.cursor()
+
+ # We create with NULLS FIRST so that when we search *backwards*
+ # we get the ones with non null origin_server_ts *first*
+ c.execute(
+ "CREATE INDEX CONCURRENTLY event_search_room_order ON event_search("
+ "room_id, origin_server_ts NULLS FIRST, stream_ordering NULLS FIRST)"
+ )
+ c.execute(
+ "CREATE INDEX CONCURRENTLY event_search_order ON event_search("
+ "origin_server_ts NULLS FIRST, stream_ordering NULLS FIRST)"
+ )
+ conn.set_session(autocommit=False)
+
+ await self.db_pool.runWithConnection(create_index)
+
+ pg = dict(progress)
+ pg["have_added_indexes"] = True
+
+ await self.db_pool.runInteraction(
+ self.EVENT_SEARCH_ORDER_UPDATE_NAME,
+ self.db_pool.updates._background_update_progress_txn,
+ self.EVENT_SEARCH_ORDER_UPDATE_NAME,
+ pg,
+ )
+
+ def reindex_search_txn(txn):
+ sql = (
+ "UPDATE event_search AS es SET stream_ordering = e.stream_ordering,"
+ " origin_server_ts = e.origin_server_ts"
+ " FROM events AS e"
+ " WHERE e.event_id = es.event_id"
+ " AND ? <= e.stream_ordering AND e.stream_ordering < ?"
+ " RETURNING es.stream_ordering"
+ )
+
+ min_stream_id = max_stream_id - batch_size
+ txn.execute(sql, (min_stream_id, max_stream_id))
+ rows = txn.fetchall()
+
+ if min_stream_id < target_min_stream_id:
+ # We've recached the end.
+ return len(rows), False
+
+ progress = {
+ "target_min_stream_id_inclusive": target_min_stream_id,
+ "max_stream_id_exclusive": min_stream_id,
+ "rows_inserted": rows_inserted + len(rows),
+ "have_added_indexes": True,
+ }
+
+ self.db_pool.updates._background_update_progress_txn(
+ txn, self.EVENT_SEARCH_ORDER_UPDATE_NAME, progress
+ )
+
+ return len(rows), True
+
+ num_rows, finished = await self.db_pool.runInteraction(
+ self.EVENT_SEARCH_ORDER_UPDATE_NAME, reindex_search_txn
+ )
+
+ if not finished:
+ await self.db_pool.updates._end_background_update(
+ self.EVENT_SEARCH_ORDER_UPDATE_NAME
+ )
+
+ return num_rows
+
+
+class SearchStore(SearchBackgroundUpdateStore):
+ def __init__(self, database: DatabasePool, db_conn, hs):
+ super(SearchStore, self).__init__(database, db_conn, hs)
+
+ async def search_msgs(self, room_ids, search_term, keys):
+ """Performs a full text search over events with given keys.
+
+ Args:
+ room_ids (list): List of room ids to search in
+ search_term (str): Search term to search for
+ keys (list): List of keys to search in, currently supports
+ "content.body", "content.name", "content.topic"
+
+ Returns:
+ list of dicts
+ """
+ clauses = []
+
+ search_query = _parse_query(self.database_engine, search_term)
+
+ args = []
+
+ # Make sure we don't explode because the person is in too many rooms.
+ # We filter the results below regardless.
+ if len(room_ids) < 500:
+ clause, args = make_in_list_sql_clause(
+ self.database_engine, "room_id", room_ids
+ )
+ clauses = [clause]
+
+ local_clauses = []
+ for key in keys:
+ local_clauses.append("key = ?")
+ args.append(key)
+
+ clauses.append("(%s)" % (" OR ".join(local_clauses),))
+
+ count_args = args
+ count_clauses = clauses
+
+ if isinstance(self.database_engine, PostgresEngine):
+ sql = (
+ "SELECT ts_rank_cd(vector, to_tsquery('english', ?)) AS rank,"
+ " room_id, event_id"
+ " FROM event_search"
+ " WHERE vector @@ to_tsquery('english', ?)"
+ )
+ args = [search_query, search_query] + args
+
+ count_sql = (
+ "SELECT room_id, count(*) as count FROM event_search"
+ " WHERE vector @@ to_tsquery('english', ?)"
+ )
+ count_args = [search_query] + count_args
+ elif isinstance(self.database_engine, Sqlite3Engine):
+ sql = (
+ "SELECT rank(matchinfo(event_search)) as rank, room_id, event_id"
+ " FROM event_search"
+ " WHERE value MATCH ?"
+ )
+ args = [search_query] + args
+
+ count_sql = (
+ "SELECT room_id, count(*) as count FROM event_search"
+ " WHERE value MATCH ?"
+ )
+ count_args = [search_term] + count_args
+ else:
+ # This should be unreachable.
+ raise Exception("Unrecognized database engine")
+
+ for clause in clauses:
+ sql += " AND " + clause
+
+ for clause in count_clauses:
+ count_sql += " AND " + clause
+
+ # We add an arbitrary limit here to ensure we don't try to pull the
+ # entire table from the database.
+ sql += " ORDER BY rank DESC LIMIT 500"
+
+ results = await self.db_pool.execute(
+ "search_msgs", self.db_pool.cursor_to_dict, sql, *args
+ )
+
+ results = list(filter(lambda row: row["room_id"] in room_ids, results))
+
+ # We set redact_behaviour to BLOCK here to prevent redacted events being returned in
+ # search results (which is a data leak)
+ events = await self.get_events_as_list(
+ [r["event_id"] for r in results],
+ redact_behaviour=EventRedactBehaviour.BLOCK,
+ )
+
+ event_map = {ev.event_id: ev for ev in events}
+
+ highlights = None
+ if isinstance(self.database_engine, PostgresEngine):
+ highlights = await self._find_highlights_in_postgres(search_query, events)
+
+ count_sql += " GROUP BY room_id"
+
+ count_results = await self.db_pool.execute(
+ "search_rooms_count", self.db_pool.cursor_to_dict, count_sql, *count_args
+ )
+
+ count = sum(row["count"] for row in count_results if row["room_id"] in room_ids)
+
+ return {
+ "results": [
+ {"event": event_map[r["event_id"]], "rank": r["rank"]}
+ for r in results
+ if r["event_id"] in event_map
+ ],
+ "highlights": highlights,
+ "count": count,
+ }
+
+ async def search_rooms(
+ self,
+ room_ids: List[str],
+ search_term: str,
+ keys: List[str],
+ limit,
+ pagination_token: Optional[str] = None,
+ ) -> List[dict]:
+ """Performs a full text search over events with given keys.
+
+ Args:
+ room_ids: The room_ids to search in
+ search_term: Search term to search for
+ keys: List of keys to search in, currently supports "content.body",
+ "content.name", "content.topic"
+ pagination_token: A pagination token previously returned
+
+ Returns:
+ Each match as a dictionary.
+ """
+ clauses = []
+
+ search_query = _parse_query(self.database_engine, search_term)
+
+ args = []
+
+ # Make sure we don't explode because the person is in too many rooms.
+ # We filter the results below regardless.
+ if len(room_ids) < 500:
+ clause, args = make_in_list_sql_clause(
+ self.database_engine, "room_id", room_ids
+ )
+ clauses = [clause]
+
+ local_clauses = []
+ for key in keys:
+ local_clauses.append("key = ?")
+ args.append(key)
+
+ clauses.append("(%s)" % (" OR ".join(local_clauses),))
+
+ # take copies of the current args and clauses lists, before adding
+ # pagination clauses to main query.
+ count_args = list(args)
+ count_clauses = list(clauses)
+
+ if pagination_token:
+ try:
+ origin_server_ts, stream = pagination_token.split(",")
+ origin_server_ts = int(origin_server_ts)
+ stream = int(stream)
+ except Exception:
+ raise SynapseError(400, "Invalid pagination token")
+
+ clauses.append(
+ "(origin_server_ts < ?"
+ " OR (origin_server_ts = ? AND stream_ordering < ?))"
+ )
+ args.extend([origin_server_ts, origin_server_ts, stream])
+
+ if isinstance(self.database_engine, PostgresEngine):
+ sql = (
+ "SELECT ts_rank_cd(vector, to_tsquery('english', ?)) as rank,"
+ " origin_server_ts, stream_ordering, room_id, event_id"
+ " FROM event_search"
+ " WHERE vector @@ to_tsquery('english', ?) AND "
+ )
+ args = [search_query, search_query] + args
+
+ count_sql = (
+ "SELECT room_id, count(*) as count FROM event_search"
+ " WHERE vector @@ to_tsquery('english', ?) AND "
+ )
+ count_args = [search_query] + count_args
+ elif isinstance(self.database_engine, Sqlite3Engine):
+ # We use CROSS JOIN here to ensure we use the right indexes.
+ # https://sqlite.org/optoverview.html#crossjoin
+ #
+ # We want to use the full text search index on event_search to
+ # extract all possible matches first, then lookup those matches
+ # in the events table to get the topological ordering. We need
+ # to use the indexes in this order because sqlite refuses to
+ # MATCH unless it uses the full text search index
+ sql = (
+ "SELECT rank(matchinfo) as rank, room_id, event_id,"
+ " origin_server_ts, stream_ordering"
+ " FROM (SELECT key, event_id, matchinfo(event_search) as matchinfo"
+ " FROM event_search"
+ " WHERE value MATCH ?"
+ " )"
+ " CROSS JOIN events USING (event_id)"
+ " WHERE "
+ )
+ args = [search_query] + args
+
+ count_sql = (
+ "SELECT room_id, count(*) as count FROM event_search"
+ " WHERE value MATCH ? AND "
+ )
+ count_args = [search_term] + count_args
+ else:
+ # This should be unreachable.
+ raise Exception("Unrecognized database engine")
+
+ sql += " AND ".join(clauses)
+ count_sql += " AND ".join(count_clauses)
+
+ # We add an arbitrary limit here to ensure we don't try to pull the
+ # entire table from the database.
+ if isinstance(self.database_engine, PostgresEngine):
+ sql += (
+ " ORDER BY origin_server_ts DESC NULLS LAST,"
+ " stream_ordering DESC NULLS LAST LIMIT ?"
+ )
+ elif isinstance(self.database_engine, Sqlite3Engine):
+ sql += " ORDER BY origin_server_ts DESC, stream_ordering DESC LIMIT ?"
+ else:
+ raise Exception("Unrecognized database engine")
+
+ args.append(limit)
+
+ results = await self.db_pool.execute(
+ "search_rooms", self.db_pool.cursor_to_dict, sql, *args
+ )
+
+ results = list(filter(lambda row: row["room_id"] in room_ids, results))
+
+ # We set redact_behaviour to BLOCK here to prevent redacted events being returned in
+ # search results (which is a data leak)
+ events = await self.get_events_as_list(
+ [r["event_id"] for r in results],
+ redact_behaviour=EventRedactBehaviour.BLOCK,
+ )
+
+ event_map = {ev.event_id: ev for ev in events}
+
+ highlights = None
+ if isinstance(self.database_engine, PostgresEngine):
+ highlights = await self._find_highlights_in_postgres(search_query, events)
+
+ count_sql += " GROUP BY room_id"
+
+ count_results = await self.db_pool.execute(
+ "search_rooms_count", self.db_pool.cursor_to_dict, count_sql, *count_args
+ )
+
+ count = sum(row["count"] for row in count_results if row["room_id"] in room_ids)
+
+ return {
+ "results": [
+ {
+ "event": event_map[r["event_id"]],
+ "rank": r["rank"],
+ "pagination_token": "%s,%s"
+ % (r["origin_server_ts"], r["stream_ordering"]),
+ }
+ for r in results
+ if r["event_id"] in event_map
+ ],
+ "highlights": highlights,
+ "count": count,
+ }
+
+ def _find_highlights_in_postgres(self, search_query, events):
+ """Given a list of events and a search term, return a list of words
+ that match from the content of the event.
+
+ This is used to give a list of words that clients can match against to
+ highlight the matching parts.
+
+ Args:
+ search_query (str)
+ events (list): A list of events
+
+ Returns:
+ deferred : A set of strings.
+ """
+
+ def f(txn):
+ highlight_words = set()
+ for event in events:
+ # As a hack we simply join values of all possible keys. This is
+ # fine since we're only using them to find possible highlights.
+ values = []
+ for key in ("body", "name", "topic"):
+ v = event.content.get(key, None)
+ if v:
+ values.append(v)
+
+ if not values:
+ continue
+
+ value = " ".join(values)
+
+ # We need to find some values for StartSel and StopSel that
+ # aren't in the value so that we can pick results out.
+ start_sel = "<"
+ stop_sel = ">"
+
+ while start_sel in value:
+ start_sel += "<"
+ while stop_sel in value:
+ stop_sel += ">"
+
+ query = "SELECT ts_headline(?, to_tsquery('english', ?), %s)" % (
+ _to_postgres_options(
+ {
+ "StartSel": start_sel,
+ "StopSel": stop_sel,
+ "MaxFragments": "50",
+ }
+ )
+ )
+ txn.execute(query, (value, search_query))
+ (headline,) = txn.fetchall()[0]
+
+ # Now we need to pick the possible highlights out of the haedline
+ # result.
+ matcher_regex = "%s(.*?)%s" % (
+ re.escape(start_sel),
+ re.escape(stop_sel),
+ )
+
+ res = re.findall(matcher_regex, headline)
+ highlight_words.update([r.lower() for r in res])
+
+ return highlight_words
+
+ return self.db_pool.runInteraction("_find_highlights", f)
+
+
+def _to_postgres_options(options_dict):
+ return "'%s'" % (",".join("%s=%s" % (k, v) for k, v in options_dict.items()),)
+
+
+def _parse_query(database_engine, search_term):
+ """Takes a plain unicode string from the user and converts it into a form
+ that can be passed to database.
+ We use this so that we can add prefix matching, which isn't something
+ that is supported by default.
+ """
+
+ # Pull out the individual words, discarding any non-word characters.
+ results = re.findall(r"([\w\-]+)", search_term, re.UNICODE)
+
+ if isinstance(database_engine, PostgresEngine):
+ return " & ".join(result + ":*" for result in results)
+ elif isinstance(database_engine, Sqlite3Engine):
+ return " & ".join(result + "*" for result in results)
+ else:
+ # This should be unreachable.
+ raise Exception("Unrecognized database engine")
diff --git a/synapse/storage/databases/main/signatures.py b/synapse/storage/databases/main/signatures.py
new file mode 100644
index 00000000..be191dd8
--- /dev/null
+++ b/synapse/storage/databases/main/signatures.py
@@ -0,0 +1,68 @@
+# -*- coding: utf-8 -*-
+# Copyright 2014-2016 OpenMarket Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from unpaddedbase64 import encode_base64
+
+from synapse.storage._base import SQLBaseStore
+from synapse.util.caches.descriptors import cached, cachedList
+
+
+class SignatureWorkerStore(SQLBaseStore):
+ @cached()
+ def get_event_reference_hash(self, event_id):
+ # This is a dummy function to allow get_event_reference_hashes
+ # to use its cache
+ raise NotImplementedError()
+
+ @cachedList(
+ cached_method_name="get_event_reference_hash", list_name="event_ids", num_args=1
+ )
+ def get_event_reference_hashes(self, event_ids):
+ def f(txn):
+ return {
+ event_id: self._get_event_reference_hashes_txn(txn, event_id)
+ for event_id in event_ids
+ }
+
+ return self.db_pool.runInteraction("get_event_reference_hashes", f)
+
+ async def add_event_hashes(self, event_ids):
+ hashes = await self.get_event_reference_hashes(event_ids)
+ hashes = {
+ e_id: {k: encode_base64(v) for k, v in h.items() if k == "sha256"}
+ for e_id, h in hashes.items()
+ }
+
+ return list(hashes.items())
+
+ def _get_event_reference_hashes_txn(self, txn, event_id):
+ """Get all the hashes for a given PDU.
+ Args:
+ txn (cursor):
+ event_id (str): Id for the Event.
+ Returns:
+ A dict[unicode, bytes] of algorithm -> hash.
+ """
+ query = (
+ "SELECT algorithm, hash"
+ " FROM event_reference_hashes"
+ " WHERE event_id = ?"
+ )
+ txn.execute(query, (event_id,))
+ return {k: v for k, v in txn}
+
+
+class SignatureStore(SignatureWorkerStore):
+ """Persistence for event signatures and hashes"""
diff --git a/synapse/storage/databases/main/state.py b/synapse/storage/databases/main/state.py
new file mode 100644
index 00000000..96e0378e
--- /dev/null
+++ b/synapse/storage/databases/main/state.py
@@ -0,0 +1,509 @@
+# -*- coding: utf-8 -*-
+# Copyright 2014-2016 OpenMarket Ltd
+# Copyright 2020 The Matrix.org Foundation C.I.C.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import collections.abc
+import logging
+from collections import namedtuple
+from typing import Iterable, Optional, Set
+
+from synapse.api.constants import EventTypes, Membership
+from synapse.api.errors import NotFoundError, UnsupportedRoomVersionError
+from synapse.api.room_versions import KNOWN_ROOM_VERSIONS, RoomVersion
+from synapse.events import EventBase
+from synapse.storage._base import SQLBaseStore
+from synapse.storage.database import DatabasePool
+from synapse.storage.databases.main.events_worker import EventsWorkerStore
+from synapse.storage.databases.main.roommember import RoomMemberWorkerStore
+from synapse.storage.state import StateFilter
+from synapse.util.caches import intern_string
+from synapse.util.caches.descriptors import cached, cachedList
+
+logger = logging.getLogger(__name__)
+
+
+MAX_STATE_DELTA_HOPS = 100
+
+
+class _GetStateGroupDelta(
+ namedtuple("_GetStateGroupDelta", ("prev_group", "delta_ids"))
+):
+ """Return type of get_state_group_delta that implements __len__, which lets
+ us use the itrable flag when caching
+ """
+
+ __slots__ = []
+
+ def __len__(self):
+ return len(self.delta_ids) if self.delta_ids else 0
+
+
+# this inherits from EventsWorkerStore because it calls self.get_events
+class StateGroupWorkerStore(EventsWorkerStore, SQLBaseStore):
+ """The parts of StateGroupStore that can be called from workers.
+ """
+
+ def __init__(self, database: DatabasePool, db_conn, hs):
+ super(StateGroupWorkerStore, self).__init__(database, db_conn, hs)
+
+ async def get_room_version(self, room_id: str) -> RoomVersion:
+ """Get the room_version of a given room
+
+ Raises:
+ NotFoundError: if the room is unknown
+
+ UnsupportedRoomVersionError: if the room uses an unknown room version.
+ Typically this happens if support for the room's version has been
+ removed from Synapse.
+ """
+ room_version_id = await self.get_room_version_id(room_id)
+ v = KNOWN_ROOM_VERSIONS.get(room_version_id)
+
+ if not v:
+ raise UnsupportedRoomVersionError(
+ "Room %s uses a room version %s which is no longer supported"
+ % (room_id, room_version_id)
+ )
+
+ return v
+
+ @cached(max_entries=10000)
+ async def get_room_version_id(self, room_id: str) -> str:
+ """Get the room_version of a given room
+
+ Raises:
+ NotFoundError: if the room is unknown
+ """
+
+ # First we try looking up room version from the database, but for old
+ # rooms we might not have added the room version to it yet so we fall
+ # back to previous behaviour and look in current state events.
+
+ # We really should have an entry in the rooms table for every room we
+ # care about, but let's be a bit paranoid (at least while the background
+ # update is happening) to avoid breaking existing rooms.
+ version = await self.db_pool.simple_select_one_onecol(
+ table="rooms",
+ keyvalues={"room_id": room_id},
+ retcol="room_version",
+ desc="get_room_version",
+ allow_none=True,
+ )
+
+ if version is not None:
+ return version
+
+ # Retrieve the room's create event
+ create_event = await self.get_create_event_for_room(room_id)
+ return create_event.content.get("room_version", "1")
+
+ async def get_room_predecessor(self, room_id: str) -> Optional[dict]:
+ """Get the predecessor of an upgraded room if it exists.
+ Otherwise return None.
+
+ Args:
+ room_id: The room ID.
+
+ Returns:
+ A dictionary containing the structure of the predecessor
+ field from the room's create event. The structure is subject to other servers,
+ but it is expected to be:
+ * room_id (str): The room ID of the predecessor room
+ * event_id (str): The ID of the tombstone event in the predecessor room
+
+ None if a predecessor key is not found, or is not a dictionary.
+
+ Raises:
+ NotFoundError if the given room is unknown
+ """
+ # Retrieve the room's create event
+ create_event = await self.get_create_event_for_room(room_id)
+
+ # Retrieve the predecessor key of the create event
+ predecessor = create_event.content.get("predecessor", None)
+
+ # Ensure the key is a dictionary
+ if not isinstance(predecessor, collections.abc.Mapping):
+ return None
+
+ return predecessor
+
+ async def get_create_event_for_room(self, room_id: str) -> EventBase:
+ """Get the create state event for a room.
+
+ Args:
+ room_id: The room ID.
+
+ Returns:
+ The room creation event.
+
+ Raises:
+ NotFoundError if the room is unknown
+ """
+ state_ids = await self.get_current_state_ids(room_id)
+ create_id = state_ids.get((EventTypes.Create, ""))
+
+ # If we can't find the create event, assume we've hit a dead end
+ if not create_id:
+ raise NotFoundError("Unknown room %s" % (room_id,))
+
+ # Retrieve the room's create event and return
+ create_event = await self.get_event(create_id)
+ return create_event
+
+ @cached(max_entries=100000, iterable=True)
+ def get_current_state_ids(self, room_id):
+ """Get the current state event ids for a room based on the
+ current_state_events table.
+
+ Args:
+ room_id (str)
+
+ Returns:
+ deferred: dict of (type, state_key) -> event_id
+ """
+
+ def _get_current_state_ids_txn(txn):
+ txn.execute(
+ """SELECT type, state_key, event_id FROM current_state_events
+ WHERE room_id = ?
+ """,
+ (room_id,),
+ )
+
+ return {(intern_string(r[0]), intern_string(r[1])): r[2] for r in txn}
+
+ return self.db_pool.runInteraction(
+ "get_current_state_ids", _get_current_state_ids_txn
+ )
+
+ # FIXME: how should this be cached?
+ def get_filtered_current_state_ids(
+ self, room_id: str, state_filter: StateFilter = StateFilter.all()
+ ):
+ """Get the current state event of a given type for a room based on the
+ current_state_events table. This may not be as up-to-date as the result
+ of doing a fresh state resolution as per state_handler.get_current_state
+
+ Args:
+ room_id
+ state_filter: The state filter used to fetch state
+ from the database.
+
+ Returns:
+ defer.Deferred[StateMap[str]]: Map from type/state_key to event ID.
+ """
+
+ where_clause, where_args = state_filter.make_sql_filter_clause()
+
+ if not where_clause:
+ # We delegate to the cached version
+ return self.get_current_state_ids(room_id)
+
+ def _get_filtered_current_state_ids_txn(txn):
+ results = {}
+ sql = """
+ SELECT type, state_key, event_id FROM current_state_events
+ WHERE room_id = ?
+ """
+
+ if where_clause:
+ sql += " AND (%s)" % (where_clause,)
+
+ args = [room_id]
+ args.extend(where_args)
+ txn.execute(sql, args)
+ for row in txn:
+ typ, state_key, event_id = row
+ key = (intern_string(typ), intern_string(state_key))
+ results[key] = event_id
+
+ return results
+
+ return self.db_pool.runInteraction(
+ "get_filtered_current_state_ids", _get_filtered_current_state_ids_txn
+ )
+
+ async def get_canonical_alias_for_room(self, room_id: str) -> Optional[str]:
+ """Get canonical alias for room, if any
+
+ Args:
+ room_id: The room ID
+
+ Returns:
+ The canonical alias, if any
+ """
+
+ state = await self.get_filtered_current_state_ids(
+ room_id, StateFilter.from_types([(EventTypes.CanonicalAlias, "")])
+ )
+
+ event_id = state.get((EventTypes.CanonicalAlias, ""))
+ if not event_id:
+ return
+
+ event = await self.get_event(event_id, allow_none=True)
+ if not event:
+ return
+
+ return event.content.get("canonical_alias")
+
+ @cached(max_entries=50000)
+ def _get_state_group_for_event(self, event_id):
+ return self.db_pool.simple_select_one_onecol(
+ table="event_to_state_groups",
+ keyvalues={"event_id": event_id},
+ retcol="state_group",
+ allow_none=True,
+ desc="_get_state_group_for_event",
+ )
+
+ @cachedList(
+ cached_method_name="_get_state_group_for_event",
+ list_name="event_ids",
+ num_args=1,
+ inlineCallbacks=True,
+ )
+ def _get_state_group_for_events(self, event_ids):
+ """Returns mapping event_id -> state_group
+ """
+ rows = yield self.db_pool.simple_select_many_batch(
+ table="event_to_state_groups",
+ column="event_id",
+ iterable=event_ids,
+ keyvalues={},
+ retcols=("event_id", "state_group"),
+ desc="_get_state_group_for_events",
+ )
+
+ return {row["event_id"]: row["state_group"] for row in rows}
+
+ async def get_referenced_state_groups(
+ self, state_groups: Iterable[int]
+ ) -> Set[int]:
+ """Check if the state groups are referenced by events.
+
+ Args:
+ state_groups
+
+ Returns:
+ The subset of state groups that are referenced.
+ """
+
+ rows = await self.db_pool.simple_select_many_batch(
+ table="event_to_state_groups",
+ column="state_group",
+ iterable=state_groups,
+ keyvalues={},
+ retcols=("DISTINCT state_group",),
+ desc="get_referenced_state_groups",
+ )
+
+ return {row["state_group"] for row in rows}
+
+
+class MainStateBackgroundUpdateStore(RoomMemberWorkerStore):
+
+ CURRENT_STATE_INDEX_UPDATE_NAME = "current_state_members_idx"
+ EVENT_STATE_GROUP_INDEX_UPDATE_NAME = "event_to_state_groups_sg_index"
+ DELETE_CURRENT_STATE_UPDATE_NAME = "delete_old_current_state_events"
+
+ def __init__(self, database: DatabasePool, db_conn, hs):
+ super(MainStateBackgroundUpdateStore, self).__init__(database, db_conn, hs)
+
+ self.server_name = hs.hostname
+
+ self.db_pool.updates.register_background_index_update(
+ self.CURRENT_STATE_INDEX_UPDATE_NAME,
+ index_name="current_state_events_member_index",
+ table="current_state_events",
+ columns=["state_key"],
+ where_clause="type='m.room.member'",
+ )
+ self.db_pool.updates.register_background_index_update(
+ self.EVENT_STATE_GROUP_INDEX_UPDATE_NAME,
+ index_name="event_to_state_groups_sg_index",
+ table="event_to_state_groups",
+ columns=["state_group"],
+ )
+ self.db_pool.updates.register_background_update_handler(
+ self.DELETE_CURRENT_STATE_UPDATE_NAME, self._background_remove_left_rooms,
+ )
+
+ async def _background_remove_left_rooms(self, progress, batch_size):
+ """Background update to delete rows from `current_state_events` and
+ `event_forward_extremities` tables of rooms that the server is no
+ longer joined to.
+ """
+
+ last_room_id = progress.get("last_room_id", "")
+
+ def _background_remove_left_rooms_txn(txn):
+ # get a batch of room ids to consider
+ sql = """
+ SELECT DISTINCT room_id FROM current_state_events
+ WHERE room_id > ? ORDER BY room_id LIMIT ?
+ """
+
+ txn.execute(sql, (last_room_id, batch_size))
+ room_ids = [row[0] for row in txn]
+ if not room_ids:
+ return True, set()
+
+ ###########################################################################
+ #
+ # exclude rooms where we have active members
+
+ sql = """
+ SELECT room_id
+ FROM local_current_membership
+ WHERE
+ room_id > ? AND room_id <= ?
+ AND membership = 'join'
+ GROUP BY room_id
+ """
+
+ txn.execute(sql, (last_room_id, room_ids[-1]))
+ joined_room_ids = {row[0] for row in txn}
+ to_delete = set(room_ids) - joined_room_ids
+
+ ###########################################################################
+ #
+ # exclude rooms which we are in the process of constructing; these otherwise
+ # qualify as "rooms with no local users", and would have their
+ # forward extremities cleaned up.
+
+ # the following query will return a list of rooms which have forward
+ # extremities that are *not* also the create event in the room - ie
+ # those that are not being created currently.
+
+ sql = """
+ SELECT DISTINCT efe.room_id
+ FROM event_forward_extremities efe
+ LEFT JOIN current_state_events cse ON
+ cse.event_id = efe.event_id
+ AND cse.type = 'm.room.create'
+ AND cse.state_key = ''
+ WHERE
+ cse.event_id IS NULL
+ AND efe.room_id > ? AND efe.room_id <= ?
+ """
+
+ txn.execute(sql, (last_room_id, room_ids[-1]))
+
+ # build a set of those rooms within `to_delete` that do not appear in
+ # the above, leaving us with the rooms in `to_delete` that *are* being
+ # created.
+ creating_rooms = to_delete.difference(row[0] for row in txn)
+ logger.info("skipping rooms which are being created: %s", creating_rooms)
+
+ # now remove the rooms being created from the list of those to delete.
+ #
+ # (we could have just taken the intersection of `to_delete` with the result
+ # of the sql query, but it's useful to be able to log `creating_rooms`; and
+ # having done so, it's quicker to remove the (few) creating rooms from
+ # `to_delete` than it is to form the intersection with the (larger) list of
+ # not-creating-rooms)
+
+ to_delete -= creating_rooms
+
+ ###########################################################################
+ #
+ # now clear the state for the rooms
+
+ logger.info("Deleting current state left rooms: %r", to_delete)
+
+ # First we get all users that we still think were joined to the
+ # room. This is so that we can mark those device lists as
+ # potentially stale, since there may have been a period where the
+ # server didn't share a room with the remote user and therefore may
+ # have missed any device updates.
+ rows = self.db_pool.simple_select_many_txn(
+ txn,
+ table="current_state_events",
+ column="room_id",
+ iterable=to_delete,
+ keyvalues={"type": EventTypes.Member, "membership": Membership.JOIN},
+ retcols=("state_key",),
+ )
+
+ potentially_left_users = {row["state_key"] for row in rows}
+
+ # Now lets actually delete the rooms from the DB.
+ self.db_pool.simple_delete_many_txn(
+ txn,
+ table="current_state_events",
+ column="room_id",
+ iterable=to_delete,
+ keyvalues={},
+ )
+
+ self.db_pool.simple_delete_many_txn(
+ txn,
+ table="event_forward_extremities",
+ column="room_id",
+ iterable=to_delete,
+ keyvalues={},
+ )
+
+ self.db_pool.updates._background_update_progress_txn(
+ txn,
+ self.DELETE_CURRENT_STATE_UPDATE_NAME,
+ {"last_room_id": room_ids[-1]},
+ )
+
+ return False, potentially_left_users
+
+ finished, potentially_left_users = await self.db_pool.runInteraction(
+ "_background_remove_left_rooms", _background_remove_left_rooms_txn
+ )
+
+ if finished:
+ await self.db_pool.updates._end_background_update(
+ self.DELETE_CURRENT_STATE_UPDATE_NAME
+ )
+
+ # Now go and check if we still share a room with the remote users in
+ # the deleted rooms. If not mark their device lists as stale.
+ joined_users = await self.get_users_server_still_shares_room_with(
+ potentially_left_users
+ )
+
+ for user_id in potentially_left_users - joined_users:
+ await self.mark_remote_user_device_list_as_unsubscribed(user_id)
+
+ return batch_size
+
+
+class StateStore(StateGroupWorkerStore, MainStateBackgroundUpdateStore):
+ """ Keeps track of the state at a given event.
+
+ This is done by the concept of `state groups`. Every event is a assigned
+ a state group (identified by an arbitrary string), which references a
+ collection of state events. The current state of an event is then the
+ collection of state events referenced by the event's state group.
+
+ Hence, every change in the current state causes a new state group to be
+ generated. However, if no change happens (e.g., if we get a message event
+ with only one parent it inherits the state group from its parent.)
+
+ There are three tables:
+ * `state_groups`: Stores group name, first event with in the group and
+ room id.
+ * `event_to_state_groups`: Maps events to state groups.
+ * `state_groups_state`: Maps state group to state events.
+ """
+
+ def __init__(self, database: DatabasePool, db_conn, hs):
+ super(StateStore, self).__init__(database, db_conn, hs)
diff --git a/synapse/storage/databases/main/state_deltas.py b/synapse/storage/databases/main/state_deltas.py
new file mode 100644
index 00000000..0d963c98
--- /dev/null
+++ b/synapse/storage/databases/main/state_deltas.py
@@ -0,0 +1,121 @@
+# -*- coding: utf-8 -*-
+# Copyright 2018 Vector Creations Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import logging
+
+from twisted.internet import defer
+
+from synapse.storage._base import SQLBaseStore
+
+logger = logging.getLogger(__name__)
+
+
+class StateDeltasStore(SQLBaseStore):
+ def get_current_state_deltas(self, prev_stream_id: int, max_stream_id: int):
+ """Fetch a list of room state changes since the given stream id
+
+ Each entry in the result contains the following fields:
+ - stream_id (int)
+ - room_id (str)
+ - type (str): event type
+ - state_key (str):
+ - event_id (str|None): new event_id for this state key. None if the
+ state has been deleted.
+ - prev_event_id (str|None): previous event_id for this state key. None
+ if it's new state.
+
+ Args:
+ prev_stream_id (int): point to get changes since (exclusive)
+ max_stream_id (int): the point that we know has been correctly persisted
+ - ie, an upper limit to return changes from.
+
+ Returns:
+ Deferred[tuple[int, list[dict]]: A tuple consisting of:
+ - the stream id which these results go up to
+ - list of current_state_delta_stream rows. If it is empty, we are
+ up to date.
+ """
+ prev_stream_id = int(prev_stream_id)
+
+ # check we're not going backwards
+ assert prev_stream_id <= max_stream_id
+
+ if not self._curr_state_delta_stream_cache.has_any_entity_changed(
+ prev_stream_id
+ ):
+ # if the CSDs haven't changed between prev_stream_id and now, we
+ # know for certain that they haven't changed between prev_stream_id and
+ # max_stream_id.
+ return defer.succeed((max_stream_id, []))
+
+ def get_current_state_deltas_txn(txn):
+ # First we calculate the max stream id that will give us less than
+ # N results.
+ # We arbitarily limit to 100 stream_id entries to ensure we don't
+ # select toooo many.
+ sql = """
+ SELECT stream_id, count(*)
+ FROM current_state_delta_stream
+ WHERE stream_id > ? AND stream_id <= ?
+ GROUP BY stream_id
+ ORDER BY stream_id ASC
+ LIMIT 100
+ """
+ txn.execute(sql, (prev_stream_id, max_stream_id))
+
+ total = 0
+
+ for stream_id, count in txn:
+ total += count
+ if total > 100:
+ # We arbitarily limit to 100 entries to ensure we don't
+ # select toooo many.
+ logger.debug(
+ "Clipping current_state_delta_stream rows to stream_id %i",
+ stream_id,
+ )
+ clipped_stream_id = stream_id
+ break
+ else:
+ # if there's no problem, we may as well go right up to the max_stream_id
+ clipped_stream_id = max_stream_id
+
+ # Now actually get the deltas
+ sql = """
+ SELECT stream_id, room_id, type, state_key, event_id, prev_event_id
+ FROM current_state_delta_stream
+ WHERE ? < stream_id AND stream_id <= ?
+ ORDER BY stream_id ASC
+ """
+ txn.execute(sql, (prev_stream_id, clipped_stream_id))
+ return clipped_stream_id, self.db_pool.cursor_to_dict(txn)
+
+ return self.db_pool.runInteraction(
+ "get_current_state_deltas", get_current_state_deltas_txn
+ )
+
+ def _get_max_stream_id_in_current_state_deltas_txn(self, txn):
+ return self.db_pool.simple_select_one_onecol_txn(
+ txn,
+ table="current_state_delta_stream",
+ keyvalues={},
+ retcol="COALESCE(MAX(stream_id), -1)",
+ )
+
+ def get_max_stream_id_in_current_state_deltas(self):
+ return self.db_pool.runInteraction(
+ "get_max_stream_id_in_current_state_deltas",
+ self._get_max_stream_id_in_current_state_deltas_txn,
+ )
diff --git a/synapse/storage/databases/main/stats.py b/synapse/storage/databases/main/stats.py
new file mode 100644
index 00000000..802c9019
--- /dev/null
+++ b/synapse/storage/databases/main/stats.py
@@ -0,0 +1,886 @@
+# -*- coding: utf-8 -*-
+# Copyright 2018, 2019 New Vector Ltd
+# Copyright 2019 The Matrix.org Foundation C.I.C.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import logging
+from itertools import chain
+from typing import Tuple
+
+from twisted.internet.defer import DeferredLock
+
+from synapse.api.constants import EventTypes, Membership
+from synapse.storage.database import DatabasePool
+from synapse.storage.databases.main.state_deltas import StateDeltasStore
+from synapse.storage.engines import PostgresEngine
+from synapse.util.caches.descriptors import cached
+
+logger = logging.getLogger(__name__)
+
+# these fields track absolutes (e.g. total number of rooms on the server)
+# You can think of these as Prometheus Gauges.
+# You can draw these stats on a line graph.
+# Example: number of users in a room
+ABSOLUTE_STATS_FIELDS = {
+ "room": (
+ "current_state_events",
+ "joined_members",
+ "invited_members",
+ "left_members",
+ "banned_members",
+ "local_users_in_room",
+ ),
+ "user": ("joined_rooms",),
+}
+
+# these fields are per-timeslice and so should be reset to 0 upon a new slice
+# You can draw these stats on a histogram.
+# Example: number of events sent locally during a time slice
+PER_SLICE_FIELDS = {
+ "room": ("total_events", "total_event_bytes"),
+ "user": ("invites_sent", "rooms_created", "total_events", "total_event_bytes"),
+}
+
+TYPE_TO_TABLE = {"room": ("room_stats", "room_id"), "user": ("user_stats", "user_id")}
+
+# these are the tables (& ID columns) which contain our actual subjects
+TYPE_TO_ORIGIN_TABLE = {"room": ("rooms", "room_id"), "user": ("users", "name")}
+
+
+class StatsStore(StateDeltasStore):
+ def __init__(self, database: DatabasePool, db_conn, hs):
+ super(StatsStore, self).__init__(database, db_conn, hs)
+
+ self.server_name = hs.hostname
+ self.clock = self.hs.get_clock()
+ self.stats_enabled = hs.config.stats_enabled
+ self.stats_bucket_size = hs.config.stats_bucket_size
+
+ self.stats_delta_processing_lock = DeferredLock()
+
+ self.db_pool.updates.register_background_update_handler(
+ "populate_stats_process_rooms", self._populate_stats_process_rooms
+ )
+ self.db_pool.updates.register_background_update_handler(
+ "populate_stats_process_rooms_2", self._populate_stats_process_rooms_2
+ )
+ self.db_pool.updates.register_background_update_handler(
+ "populate_stats_process_users", self._populate_stats_process_users
+ )
+ # we no longer need to perform clean-up, but we will give ourselves
+ # the potential to reintroduce it in the future – so documentation
+ # will still encourage the use of this no-op handler.
+ self.db_pool.updates.register_noop_background_update("populate_stats_cleanup")
+ self.db_pool.updates.register_noop_background_update("populate_stats_prepare")
+
+ def quantise_stats_time(self, ts):
+ """
+ Quantises a timestamp to be a multiple of the bucket size.
+
+ Args:
+ ts (int): the timestamp to quantise, in milliseconds since the Unix
+ Epoch
+
+ Returns:
+ int: a timestamp which
+ - is divisible by the bucket size;
+ - is no later than `ts`; and
+ - is the largest such timestamp.
+ """
+ return (ts // self.stats_bucket_size) * self.stats_bucket_size
+
+ async def _populate_stats_process_users(self, progress, batch_size):
+ """
+ This is a background update which regenerates statistics for users.
+ """
+ if not self.stats_enabled:
+ await self.db_pool.updates._end_background_update(
+ "populate_stats_process_users"
+ )
+ return 1
+
+ last_user_id = progress.get("last_user_id", "")
+
+ def _get_next_batch(txn):
+ sql = """
+ SELECT DISTINCT name FROM users
+ WHERE name > ?
+ ORDER BY name ASC
+ LIMIT ?
+ """
+ txn.execute(sql, (last_user_id, batch_size))
+ return [r for r, in txn]
+
+ users_to_work_on = await self.db_pool.runInteraction(
+ "_populate_stats_process_users", _get_next_batch
+ )
+
+ # No more rooms -- complete the transaction.
+ if not users_to_work_on:
+ await self.db_pool.updates._end_background_update(
+ "populate_stats_process_users"
+ )
+ return 1
+
+ for user_id in users_to_work_on:
+ await self._calculate_and_set_initial_state_for_user(user_id)
+ progress["last_user_id"] = user_id
+
+ await self.db_pool.runInteraction(
+ "populate_stats_process_users",
+ self.db_pool.updates._background_update_progress_txn,
+ "populate_stats_process_users",
+ progress,
+ )
+
+ return len(users_to_work_on)
+
+ async def _populate_stats_process_rooms(self, progress, batch_size):
+ """
+ This was a background update which regenerated statistics for rooms.
+
+ It has been replaced by StatsStore._populate_stats_process_rooms_2. This background
+ job has been scheduled to run as part of Synapse v1.0.0, and again now. To ensure
+ someone upgrading from <v1.0.0, this background task has been turned into a no-op
+ so that the potentially expensive task is not run twice.
+
+ Further context: https://github.com/matrix-org/synapse/pull/7977
+ """
+ await self.db_pool.updates._end_background_update(
+ "populate_stats_process_rooms"
+ )
+ return 1
+
+ async def _populate_stats_process_rooms_2(self, progress, batch_size):
+ """
+ This is a background update which regenerates statistics for rooms.
+
+ It replaces StatsStore._populate_stats_process_rooms. See its docstring for the
+ reasoning.
+ """
+ if not self.stats_enabled:
+ await self.db_pool.updates._end_background_update(
+ "populate_stats_process_rooms_2"
+ )
+ return 1
+
+ last_room_id = progress.get("last_room_id", "")
+
+ def _get_next_batch(txn):
+ sql = """
+ SELECT DISTINCT room_id FROM current_state_events
+ WHERE room_id > ?
+ ORDER BY room_id ASC
+ LIMIT ?
+ """
+ txn.execute(sql, (last_room_id, batch_size))
+ return [r for r, in txn]
+
+ rooms_to_work_on = await self.db_pool.runInteraction(
+ "populate_stats_rooms_2_get_batch", _get_next_batch
+ )
+
+ # No more rooms -- complete the transaction.
+ if not rooms_to_work_on:
+ await self.db_pool.updates._end_background_update(
+ "populate_stats_process_rooms_2"
+ )
+ return 1
+
+ for room_id in rooms_to_work_on:
+ await self._calculate_and_set_initial_state_for_room(room_id)
+ progress["last_room_id"] = room_id
+
+ await self.db_pool.runInteraction(
+ "_populate_stats_process_rooms_2",
+ self.db_pool.updates._background_update_progress_txn,
+ "populate_stats_process_rooms_2",
+ progress,
+ )
+
+ return len(rooms_to_work_on)
+
+ def get_stats_positions(self):
+ """
+ Returns the stats processor positions.
+ """
+ return self.db_pool.simple_select_one_onecol(
+ table="stats_incremental_position",
+ keyvalues={},
+ retcol="stream_id",
+ desc="stats_incremental_position",
+ )
+
+ def update_room_state(self, room_id, fields):
+ """
+ Args:
+ room_id (str)
+ fields (dict[str:Any])
+ """
+
+ # For whatever reason some of the fields may contain null bytes, which
+ # postgres isn't a fan of, so we replace those fields with null.
+ for col in (
+ "join_rules",
+ "history_visibility",
+ "encryption",
+ "name",
+ "topic",
+ "avatar",
+ "canonical_alias",
+ ):
+ field = fields.get(col)
+ if field and "\0" in field:
+ fields[col] = None
+
+ return self.db_pool.simple_upsert(
+ table="room_stats_state",
+ keyvalues={"room_id": room_id},
+ values=fields,
+ desc="update_room_state",
+ )
+
+ def get_statistics_for_subject(self, stats_type, stats_id, start, size=100):
+ """
+ Get statistics for a given subject.
+
+ Args:
+ stats_type (str): The type of subject
+ stats_id (str): The ID of the subject (e.g. room_id or user_id)
+ start (int): Pagination start. Number of entries, not timestamp.
+ size (int): How many entries to return.
+
+ Returns:
+ Deferred[list[dict]], where the dict has the keys of
+ ABSOLUTE_STATS_FIELDS[stats_type], and "bucket_size" and "end_ts".
+ """
+ return self.db_pool.runInteraction(
+ "get_statistics_for_subject",
+ self._get_statistics_for_subject_txn,
+ stats_type,
+ stats_id,
+ start,
+ size,
+ )
+
+ def _get_statistics_for_subject_txn(
+ self, txn, stats_type, stats_id, start, size=100
+ ):
+ """
+ Transaction-bound version of L{get_statistics_for_subject}.
+ """
+
+ table, id_col = TYPE_TO_TABLE[stats_type]
+ selected_columns = list(
+ ABSOLUTE_STATS_FIELDS[stats_type] + PER_SLICE_FIELDS[stats_type]
+ )
+
+ slice_list = self.db_pool.simple_select_list_paginate_txn(
+ txn,
+ table + "_historical",
+ "end_ts",
+ start,
+ size,
+ retcols=selected_columns + ["bucket_size", "end_ts"],
+ keyvalues={id_col: stats_id},
+ order_direction="DESC",
+ )
+
+ return slice_list
+
+ @cached()
+ def get_earliest_token_for_stats(self, stats_type, id):
+ """
+ Fetch the "earliest token". This is used by the room stats delta
+ processor to ignore deltas that have been processed between the
+ start of the background task and any particular room's stats
+ being calculated.
+
+ Returns:
+ Deferred[int]
+ """
+ table, id_col = TYPE_TO_TABLE[stats_type]
+
+ return self.db_pool.simple_select_one_onecol(
+ "%s_current" % (table,),
+ keyvalues={id_col: id},
+ retcol="completed_delta_stream_id",
+ allow_none=True,
+ )
+
+ def bulk_update_stats_delta(self, ts, updates, stream_id):
+ """Bulk update stats tables for a given stream_id and updates the stats
+ incremental position.
+
+ Args:
+ ts (int): Current timestamp in ms
+ updates(dict[str, dict[str, dict[str, Counter]]]): The updates to
+ commit as a mapping stats_type -> stats_id -> field -> delta.
+ stream_id (int): Current position.
+
+ Returns:
+ Deferred
+ """
+
+ def _bulk_update_stats_delta_txn(txn):
+ for stats_type, stats_updates in updates.items():
+ for stats_id, fields in stats_updates.items():
+ logger.debug(
+ "Updating %s stats for %s: %s", stats_type, stats_id, fields
+ )
+ self._update_stats_delta_txn(
+ txn,
+ ts=ts,
+ stats_type=stats_type,
+ stats_id=stats_id,
+ fields=fields,
+ complete_with_stream_id=stream_id,
+ )
+
+ self.db_pool.simple_update_one_txn(
+ txn,
+ table="stats_incremental_position",
+ keyvalues={},
+ updatevalues={"stream_id": stream_id},
+ )
+
+ return self.db_pool.runInteraction(
+ "bulk_update_stats_delta", _bulk_update_stats_delta_txn
+ )
+
+ def update_stats_delta(
+ self,
+ ts,
+ stats_type,
+ stats_id,
+ fields,
+ complete_with_stream_id,
+ absolute_field_overrides=None,
+ ):
+ """
+ Updates the statistics for a subject, with a delta (difference/relative
+ change).
+
+ Args:
+ ts (int): timestamp of the change
+ stats_type (str): "room" or "user" – the kind of subject
+ stats_id (str): the subject's ID (room ID or user ID)
+ fields (dict[str, int]): Deltas of stats values.
+ complete_with_stream_id (int, optional):
+ If supplied, converts an incomplete row into a complete row,
+ with the supplied stream_id marked as the stream_id where the
+ row was completed.
+ absolute_field_overrides (dict[str, int]): Current stats values
+ (i.e. not deltas) of absolute fields.
+ Does not work with per-slice fields.
+ """
+
+ return self.db_pool.runInteraction(
+ "update_stats_delta",
+ self._update_stats_delta_txn,
+ ts,
+ stats_type,
+ stats_id,
+ fields,
+ complete_with_stream_id=complete_with_stream_id,
+ absolute_field_overrides=absolute_field_overrides,
+ )
+
+ def _update_stats_delta_txn(
+ self,
+ txn,
+ ts,
+ stats_type,
+ stats_id,
+ fields,
+ complete_with_stream_id,
+ absolute_field_overrides=None,
+ ):
+ if absolute_field_overrides is None:
+ absolute_field_overrides = {}
+
+ table, id_col = TYPE_TO_TABLE[stats_type]
+
+ quantised_ts = self.quantise_stats_time(int(ts))
+ end_ts = quantised_ts + self.stats_bucket_size
+
+ # Lets be paranoid and check that all the given field names are known
+ abs_field_names = ABSOLUTE_STATS_FIELDS[stats_type]
+ slice_field_names = PER_SLICE_FIELDS[stats_type]
+ for field in chain(fields.keys(), absolute_field_overrides.keys()):
+ if field not in abs_field_names and field not in slice_field_names:
+ # guard against potential SQL injection dodginess
+ raise ValueError(
+ "%s is not a recognised field"
+ " for stats type %s" % (field, stats_type)
+ )
+
+ # Per slice fields do not get added to the _current table
+
+ # This calculates the deltas (`field = field + ?` values)
+ # for absolute fields,
+ # * defaulting to 0 if not specified
+ # (required for the INSERT part of upserting to work)
+ # * omitting overrides specified in `absolute_field_overrides`
+ deltas_of_absolute_fields = {
+ key: fields.get(key, 0)
+ for key in abs_field_names
+ if key not in absolute_field_overrides
+ }
+
+ # Keep the delta stream ID field up to date
+ absolute_field_overrides = absolute_field_overrides.copy()
+ absolute_field_overrides["completed_delta_stream_id"] = complete_with_stream_id
+
+ # first upsert the `_current` table
+ self._upsert_with_additive_relatives_txn(
+ txn=txn,
+ table=table + "_current",
+ keyvalues={id_col: stats_id},
+ absolutes=absolute_field_overrides,
+ additive_relatives=deltas_of_absolute_fields,
+ )
+
+ per_slice_additive_relatives = {
+ key: fields.get(key, 0) for key in slice_field_names
+ }
+ self._upsert_copy_from_table_with_additive_relatives_txn(
+ txn=txn,
+ into_table=table + "_historical",
+ keyvalues={id_col: stats_id},
+ extra_dst_insvalues={"bucket_size": self.stats_bucket_size},
+ extra_dst_keyvalues={"end_ts": end_ts},
+ additive_relatives=per_slice_additive_relatives,
+ src_table=table + "_current",
+ copy_columns=abs_field_names,
+ )
+
+ def _upsert_with_additive_relatives_txn(
+ self, txn, table, keyvalues, absolutes, additive_relatives
+ ):
+ """Used to update values in the stats tables.
+
+ This is basically a slightly convoluted upsert that *adds* to any
+ existing rows.
+
+ Args:
+ txn
+ table (str): Table name
+ keyvalues (dict[str, any]): Row-identifying key values
+ absolutes (dict[str, any]): Absolute (set) fields
+ additive_relatives (dict[str, int]): Fields that will be added onto
+ if existing row present.
+ """
+ if self.database_engine.can_native_upsert:
+ absolute_updates = [
+ "%(field)s = EXCLUDED.%(field)s" % {"field": field}
+ for field in absolutes.keys()
+ ]
+
+ relative_updates = [
+ "%(field)s = EXCLUDED.%(field)s + %(table)s.%(field)s"
+ % {"table": table, "field": field}
+ for field in additive_relatives.keys()
+ ]
+
+ insert_cols = []
+ qargs = []
+
+ for (key, val) in chain(
+ keyvalues.items(), absolutes.items(), additive_relatives.items()
+ ):
+ insert_cols.append(key)
+ qargs.append(val)
+
+ sql = """
+ INSERT INTO %(table)s (%(insert_cols_cs)s)
+ VALUES (%(insert_vals_qs)s)
+ ON CONFLICT (%(key_columns)s) DO UPDATE SET %(updates)s
+ """ % {
+ "table": table,
+ "insert_cols_cs": ", ".join(insert_cols),
+ "insert_vals_qs": ", ".join(
+ ["?"] * (len(keyvalues) + len(absolutes) + len(additive_relatives))
+ ),
+ "key_columns": ", ".join(keyvalues),
+ "updates": ", ".join(chain(absolute_updates, relative_updates)),
+ }
+
+ txn.execute(sql, qargs)
+ else:
+ self.database_engine.lock_table(txn, table)
+ retcols = list(chain(absolutes.keys(), additive_relatives.keys()))
+ current_row = self.db_pool.simple_select_one_txn(
+ txn, table, keyvalues, retcols, allow_none=True
+ )
+ if current_row is None:
+ merged_dict = {**keyvalues, **absolutes, **additive_relatives}
+ self.db_pool.simple_insert_txn(txn, table, merged_dict)
+ else:
+ for (key, val) in additive_relatives.items():
+ current_row[key] += val
+ current_row.update(absolutes)
+ self.db_pool.simple_update_one_txn(txn, table, keyvalues, current_row)
+
+ def _upsert_copy_from_table_with_additive_relatives_txn(
+ self,
+ txn,
+ into_table,
+ keyvalues,
+ extra_dst_keyvalues,
+ extra_dst_insvalues,
+ additive_relatives,
+ src_table,
+ copy_columns,
+ ):
+ """Updates the historic stats table with latest updates.
+
+ This involves copying "absolute" fields from the `_current` table, and
+ adding relative fields to any existing values.
+
+ Args:
+ txn: Transaction
+ into_table (str): The destination table to UPSERT the row into
+ keyvalues (dict[str, any]): Row-identifying key values
+ extra_dst_keyvalues (dict[str, any]): Additional keyvalues
+ for `into_table`.
+ extra_dst_insvalues (dict[str, any]): Additional values to insert
+ on new row creation for `into_table`.
+ additive_relatives (dict[str, any]): Fields that will be added onto
+ if existing row present. (Must be disjoint from copy_columns.)
+ src_table (str): The source table to copy from
+ copy_columns (iterable[str]): The list of columns to copy
+ """
+ if self.database_engine.can_native_upsert:
+ ins_columns = chain(
+ keyvalues,
+ copy_columns,
+ additive_relatives,
+ extra_dst_keyvalues,
+ extra_dst_insvalues,
+ )
+ sel_exprs = chain(
+ keyvalues,
+ copy_columns,
+ (
+ "?"
+ for _ in chain(
+ additive_relatives, extra_dst_keyvalues, extra_dst_insvalues
+ )
+ ),
+ )
+ keyvalues_where = ("%s = ?" % f for f in keyvalues)
+
+ sets_cc = ("%s = EXCLUDED.%s" % (f, f) for f in copy_columns)
+ sets_ar = (
+ "%s = EXCLUDED.%s + %s.%s" % (f, f, into_table, f)
+ for f in additive_relatives
+ )
+
+ sql = """
+ INSERT INTO %(into_table)s (%(ins_columns)s)
+ SELECT %(sel_exprs)s
+ FROM %(src_table)s
+ WHERE %(keyvalues_where)s
+ ON CONFLICT (%(keyvalues)s)
+ DO UPDATE SET %(sets)s
+ """ % {
+ "into_table": into_table,
+ "ins_columns": ", ".join(ins_columns),
+ "sel_exprs": ", ".join(sel_exprs),
+ "keyvalues_where": " AND ".join(keyvalues_where),
+ "src_table": src_table,
+ "keyvalues": ", ".join(
+ chain(keyvalues.keys(), extra_dst_keyvalues.keys())
+ ),
+ "sets": ", ".join(chain(sets_cc, sets_ar)),
+ }
+
+ qargs = list(
+ chain(
+ additive_relatives.values(),
+ extra_dst_keyvalues.values(),
+ extra_dst_insvalues.values(),
+ keyvalues.values(),
+ )
+ )
+ txn.execute(sql, qargs)
+ else:
+ self.database_engine.lock_table(txn, into_table)
+ src_row = self.db_pool.simple_select_one_txn(
+ txn, src_table, keyvalues, copy_columns
+ )
+ all_dest_keyvalues = {**keyvalues, **extra_dst_keyvalues}
+ dest_current_row = self.db_pool.simple_select_one_txn(
+ txn,
+ into_table,
+ keyvalues=all_dest_keyvalues,
+ retcols=list(chain(additive_relatives.keys(), copy_columns)),
+ allow_none=True,
+ )
+
+ if dest_current_row is None:
+ merged_dict = {
+ **keyvalues,
+ **extra_dst_keyvalues,
+ **extra_dst_insvalues,
+ **src_row,
+ **additive_relatives,
+ }
+ self.db_pool.simple_insert_txn(txn, into_table, merged_dict)
+ else:
+ for (key, val) in additive_relatives.items():
+ src_row[key] = dest_current_row[key] + val
+ self.db_pool.simple_update_txn(
+ txn, into_table, all_dest_keyvalues, src_row
+ )
+
+ def get_changes_room_total_events_and_bytes(self, min_pos, max_pos):
+ """Fetches the counts of events in the given range of stream IDs.
+
+ Args:
+ min_pos (int)
+ max_pos (int)
+
+ Returns:
+ Deferred[dict[str, dict[str, int]]]: Mapping of room ID to field
+ changes.
+ """
+
+ return self.db_pool.runInteraction(
+ "stats_incremental_total_events_and_bytes",
+ self.get_changes_room_total_events_and_bytes_txn,
+ min_pos,
+ max_pos,
+ )
+
+ def get_changes_room_total_events_and_bytes_txn(self, txn, low_pos, high_pos):
+ """Gets the total_events and total_event_bytes counts for rooms and
+ senders, in a range of stream_orderings (including backfilled events).
+
+ Args:
+ txn
+ low_pos (int): Low stream ordering
+ high_pos (int): High stream ordering
+
+ Returns:
+ tuple[dict[str, dict[str, int]], dict[str, dict[str, int]]]: The
+ room and user deltas for total_events/total_event_bytes in the
+ format of `stats_id` -> fields
+ """
+
+ if low_pos >= high_pos:
+ # nothing to do here.
+ return {}, {}
+
+ if isinstance(self.database_engine, PostgresEngine):
+ new_bytes_expression = "OCTET_LENGTH(json)"
+ else:
+ new_bytes_expression = "LENGTH(CAST(json AS BLOB))"
+
+ sql = """
+ SELECT events.room_id, COUNT(*) AS new_events, SUM(%s) AS new_bytes
+ FROM events INNER JOIN event_json USING (event_id)
+ WHERE (? < stream_ordering AND stream_ordering <= ?)
+ OR (? <= stream_ordering AND stream_ordering <= ?)
+ GROUP BY events.room_id
+ """ % (
+ new_bytes_expression,
+ )
+
+ txn.execute(sql, (low_pos, high_pos, -high_pos, -low_pos))
+
+ room_deltas = {
+ room_id: {"total_events": new_events, "total_event_bytes": new_bytes}
+ for room_id, new_events, new_bytes in txn
+ }
+
+ sql = """
+ SELECT events.sender, COUNT(*) AS new_events, SUM(%s) AS new_bytes
+ FROM events INNER JOIN event_json USING (event_id)
+ WHERE (? < stream_ordering AND stream_ordering <= ?)
+ OR (? <= stream_ordering AND stream_ordering <= ?)
+ GROUP BY events.sender
+ """ % (
+ new_bytes_expression,
+ )
+
+ txn.execute(sql, (low_pos, high_pos, -high_pos, -low_pos))
+
+ user_deltas = {
+ user_id: {"total_events": new_events, "total_event_bytes": new_bytes}
+ for user_id, new_events, new_bytes in txn
+ if self.hs.is_mine_id(user_id)
+ }
+
+ return room_deltas, user_deltas
+
+ async def _calculate_and_set_initial_state_for_room(
+ self, room_id: str
+ ) -> Tuple[dict, dict, int]:
+ """Calculate and insert an entry into room_stats_current.
+
+ Args:
+ room_id: The room ID under calculation.
+
+ Returns:
+ A tuple of room state, membership counts and stream position.
+ """
+
+ def _fetch_current_state_stats(txn):
+ pos = self.get_room_max_stream_ordering()
+
+ rows = self.db_pool.simple_select_many_txn(
+ txn,
+ table="current_state_events",
+ column="type",
+ iterable=[
+ EventTypes.Create,
+ EventTypes.JoinRules,
+ EventTypes.RoomHistoryVisibility,
+ EventTypes.RoomEncryption,
+ EventTypes.Name,
+ EventTypes.Topic,
+ EventTypes.RoomAvatar,
+ EventTypes.CanonicalAlias,
+ ],
+ keyvalues={"room_id": room_id, "state_key": ""},
+ retcols=["event_id"],
+ )
+
+ event_ids = [row["event_id"] for row in rows]
+
+ txn.execute(
+ """
+ SELECT membership, count(*) FROM current_state_events
+ WHERE room_id = ? AND type = 'm.room.member'
+ GROUP BY membership
+ """,
+ (room_id,),
+ )
+ membership_counts = {membership: cnt for membership, cnt in txn}
+
+ txn.execute(
+ """
+ SELECT COALESCE(count(*), 0) FROM current_state_events
+ WHERE room_id = ?
+ """,
+ (room_id,),
+ )
+
+ (current_state_events_count,) = txn.fetchone()
+
+ users_in_room = self.get_users_in_room_txn(txn, room_id)
+
+ return (
+ event_ids,
+ membership_counts,
+ current_state_events_count,
+ users_in_room,
+ pos,
+ )
+
+ (
+ event_ids,
+ membership_counts,
+ current_state_events_count,
+ users_in_room,
+ pos,
+ ) = await self.db_pool.runInteraction(
+ "get_initial_state_for_room", _fetch_current_state_stats
+ )
+
+ state_event_map = await self.get_events(event_ids, get_prev_content=False)
+
+ room_state = {
+ "join_rules": None,
+ "history_visibility": None,
+ "encryption": None,
+ "name": None,
+ "topic": None,
+ "avatar": None,
+ "canonical_alias": None,
+ "is_federatable": True,
+ }
+
+ for event in state_event_map.values():
+ if event.type == EventTypes.JoinRules:
+ room_state["join_rules"] = event.content.get("join_rule")
+ elif event.type == EventTypes.RoomHistoryVisibility:
+ room_state["history_visibility"] = event.content.get(
+ "history_visibility"
+ )
+ elif event.type == EventTypes.RoomEncryption:
+ room_state["encryption"] = event.content.get("algorithm")
+ elif event.type == EventTypes.Name:
+ room_state["name"] = event.content.get("name")
+ elif event.type == EventTypes.Topic:
+ room_state["topic"] = event.content.get("topic")
+ elif event.type == EventTypes.RoomAvatar:
+ room_state["avatar"] = event.content.get("url")
+ elif event.type == EventTypes.CanonicalAlias:
+ room_state["canonical_alias"] = event.content.get("alias")
+ elif event.type == EventTypes.Create:
+ room_state["is_federatable"] = (
+ event.content.get("m.federate", True) is True
+ )
+
+ await self.update_room_state(room_id, room_state)
+
+ local_users_in_room = [u for u in users_in_room if self.hs.is_mine_id(u)]
+
+ await self.update_stats_delta(
+ ts=self.clock.time_msec(),
+ stats_type="room",
+ stats_id=room_id,
+ fields={},
+ complete_with_stream_id=pos,
+ absolute_field_overrides={
+ "current_state_events": current_state_events_count,
+ "joined_members": membership_counts.get(Membership.JOIN, 0),
+ "invited_members": membership_counts.get(Membership.INVITE, 0),
+ "left_members": membership_counts.get(Membership.LEAVE, 0),
+ "banned_members": membership_counts.get(Membership.BAN, 0),
+ "local_users_in_room": len(local_users_in_room),
+ },
+ )
+
+ async def _calculate_and_set_initial_state_for_user(self, user_id):
+ def _calculate_and_set_initial_state_for_user_txn(txn):
+ pos = self._get_max_stream_id_in_current_state_deltas_txn(txn)
+
+ txn.execute(
+ """
+ SELECT COUNT(distinct room_id) FROM current_state_events
+ WHERE type = 'm.room.member' AND state_key = ?
+ AND membership = 'join'
+ """,
+ (user_id,),
+ )
+ (count,) = txn.fetchone()
+ return count, pos
+
+ joined_rooms, pos = await self.db_pool.runInteraction(
+ "calculate_and_set_initial_state_for_user",
+ _calculate_and_set_initial_state_for_user_txn,
+ )
+
+ await self.update_stats_delta(
+ ts=self.clock.time_msec(),
+ stats_type="user",
+ stats_id=user_id,
+ fields={},
+ complete_with_stream_id=pos,
+ absolute_field_overrides={"joined_rooms": joined_rooms},
+ )
diff --git a/synapse/storage/databases/main/stream.py b/synapse/storage/databases/main/stream.py
new file mode 100644
index 00000000..aaf22589
--- /dev/null
+++ b/synapse/storage/databases/main/stream.py
@@ -0,0 +1,1064 @@
+# -*- coding: utf-8 -*-
+# Copyright 2014-2016 OpenMarket Ltd
+# Copyright 2017 Vector Creations Ltd
+# Copyright 2018-2019 New Vector Ltd
+# Copyright 2019 The Matrix.org Foundation C.I.C.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+""" This module is responsible for getting events from the DB for pagination
+and event streaming.
+
+The order it returns events in depend on whether we are streaming forwards or
+are paginating backwards. We do this because we want to handle out of order
+messages nicely, while still returning them in the correct order when we
+paginate bacwards.
+
+This is implemented by keeping two ordering columns: stream_ordering and
+topological_ordering. Stream ordering is basically insertion/received order
+(except for events from backfill requests). The topological_ordering is a
+weak ordering of events based on the pdu graph.
+
+This means that we have to have two different types of tokens, depending on
+what sort order was used:
+ - stream tokens are of the form: "s%d", which maps directly to the column
+ - topological tokems: "t%d-%d", where the integers map to the topological
+ and stream ordering columns respectively.
+"""
+
+import abc
+import logging
+from collections import namedtuple
+from typing import Optional
+
+from twisted.internet import defer
+
+from synapse.logging.context import make_deferred_yieldable, run_in_background
+from synapse.storage._base import SQLBaseStore
+from synapse.storage.database import DatabasePool, make_in_list_sql_clause
+from synapse.storage.databases.main.events_worker import EventsWorkerStore
+from synapse.storage.engines import PostgresEngine
+from synapse.types import RoomStreamToken
+from synapse.util.caches.stream_change_cache import StreamChangeCache
+
+logger = logging.getLogger(__name__)
+
+
+MAX_STREAM_SIZE = 1000
+
+
+_STREAM_TOKEN = "stream"
+_TOPOLOGICAL_TOKEN = "topological"
+
+
+# Used as return values for pagination APIs
+_EventDictReturn = namedtuple(
+ "_EventDictReturn", ("event_id", "topological_ordering", "stream_ordering")
+)
+
+
+def generate_pagination_where_clause(
+ direction, column_names, from_token, to_token, engine
+):
+ """Creates an SQL expression to bound the columns by the pagination
+ tokens.
+
+ For example creates an SQL expression like:
+
+ (6, 7) >= (topological_ordering, stream_ordering)
+ AND (5, 3) < (topological_ordering, stream_ordering)
+
+ would be generated for dir=b, from_token=(6, 7) and to_token=(5, 3).
+
+ Note that tokens are considered to be after the row they are in, e.g. if
+ a row A has a token T, then we consider A to be before T. This convention
+ is important when figuring out inequalities for the generated SQL, and
+ produces the following result:
+ - If paginating forwards then we exclude any rows matching the from
+ token, but include those that match the to token.
+ - If paginating backwards then we include any rows matching the from
+ token, but include those that match the to token.
+
+ Args:
+ direction (str): Whether we're paginating backwards("b") or
+ forwards ("f").
+ column_names (tuple[str, str]): The column names to bound. Must *not*
+ be user defined as these get inserted directly into the SQL
+ statement without escapes.
+ from_token (tuple[int, int]|None): The start point for the pagination.
+ This is an exclusive minimum bound if direction is "f", and an
+ inclusive maximum bound if direction is "b".
+ to_token (tuple[int, int]|None): The endpoint point for the pagination.
+ This is an inclusive maximum bound if direction is "f", and an
+ exclusive minimum bound if direction is "b".
+ engine: The database engine to generate the clauses for
+
+ Returns:
+ str: The sql expression
+ """
+ assert direction in ("b", "f")
+
+ where_clause = []
+ if from_token:
+ where_clause.append(
+ _make_generic_sql_bound(
+ bound=">=" if direction == "b" else "<",
+ column_names=column_names,
+ values=from_token,
+ engine=engine,
+ )
+ )
+
+ if to_token:
+ where_clause.append(
+ _make_generic_sql_bound(
+ bound="<" if direction == "b" else ">=",
+ column_names=column_names,
+ values=to_token,
+ engine=engine,
+ )
+ )
+
+ return " AND ".join(where_clause)
+
+
+def _make_generic_sql_bound(bound, column_names, values, engine):
+ """Create an SQL expression that bounds the given column names by the
+ values, e.g. create the equivalent of `(1, 2) < (col1, col2)`.
+
+ Only works with two columns.
+
+ Older versions of SQLite don't support that syntax so we have to expand it
+ out manually.
+
+ Args:
+ bound (str): The comparison operator to use. One of ">", "<", ">=",
+ "<=", where the values are on the left and columns on the right.
+ names (tuple[str, str]): The column names. Must *not* be user defined
+ as these get inserted directly into the SQL statement without
+ escapes.
+ values (tuple[int|None, int]): The values to bound the columns by. If
+ the first value is None then only creates a bound on the second
+ column.
+ engine: The database engine to generate the SQL for
+
+ Returns:
+ str
+ """
+
+ assert bound in (">", "<", ">=", "<=")
+
+ name1, name2 = column_names
+ val1, val2 = values
+
+ if val1 is None:
+ val2 = int(val2)
+ return "(%d %s %s)" % (val2, bound, name2)
+
+ val1 = int(val1)
+ val2 = int(val2)
+
+ if isinstance(engine, PostgresEngine):
+ # Postgres doesn't optimise ``(x < a) OR (x=a AND y<b)`` as well
+ # as it optimises ``(x,y) < (a,b)`` on multicolumn indexes. So we
+ # use the later form when running against postgres.
+ return "((%d,%d) %s (%s,%s))" % (val1, val2, bound, name1, name2)
+
+ # We want to generate queries of e.g. the form:
+ #
+ # (val1 < name1 OR (val1 = name1 AND val2 <= name2))
+ #
+ # which is equivalent to (val1, val2) < (name1, name2)
+
+ return """(
+ {val1:d} {strict_bound} {name1}
+ OR ({val1:d} = {name1} AND {val2:d} {bound} {name2})
+ )""".format(
+ name1=name1,
+ val1=val1,
+ name2=name2,
+ val2=val2,
+ strict_bound=bound[0], # The first bound must always be strict equality here
+ bound=bound,
+ )
+
+
+def filter_to_clause(event_filter):
+ # NB: This may create SQL clauses that don't optimise well (and we don't
+ # have indices on all possible clauses). E.g. it may create
+ # "room_id == X AND room_id != X", which postgres doesn't optimise.
+
+ if not event_filter:
+ return "", []
+
+ clauses = []
+ args = []
+
+ if event_filter.types:
+ clauses.append("(%s)" % " OR ".join("type = ?" for _ in event_filter.types))
+ args.extend(event_filter.types)
+
+ for typ in event_filter.not_types:
+ clauses.append("type != ?")
+ args.append(typ)
+
+ if event_filter.senders:
+ clauses.append("(%s)" % " OR ".join("sender = ?" for _ in event_filter.senders))
+ args.extend(event_filter.senders)
+
+ for sender in event_filter.not_senders:
+ clauses.append("sender != ?")
+ args.append(sender)
+
+ if event_filter.rooms:
+ clauses.append("(%s)" % " OR ".join("room_id = ?" for _ in event_filter.rooms))
+ args.extend(event_filter.rooms)
+
+ for room_id in event_filter.not_rooms:
+ clauses.append("room_id != ?")
+ args.append(room_id)
+
+ if event_filter.contains_url:
+ clauses.append("contains_url = ?")
+ args.append(event_filter.contains_url)
+
+ # We're only applying the "labels" filter on the database query, because applying the
+ # "not_labels" filter via a SQL query is non-trivial. Instead, we let
+ # event_filter.check_fields apply it, which is not as efficient but makes the
+ # implementation simpler.
+ if event_filter.labels:
+ clauses.append("(%s)" % " OR ".join("label = ?" for _ in event_filter.labels))
+ args.extend(event_filter.labels)
+
+ return " AND ".join(clauses), args
+
+
+class StreamWorkerStore(EventsWorkerStore, SQLBaseStore):
+ """This is an abstract base class where subclasses must implement
+ `get_room_max_stream_ordering` and `get_room_min_stream_ordering`
+ which can be called in the initializer.
+ """
+
+ __metaclass__ = abc.ABCMeta
+
+ def __init__(self, database: DatabasePool, db_conn, hs):
+ super(StreamWorkerStore, self).__init__(database, db_conn, hs)
+
+ self._instance_name = hs.get_instance_name()
+ self._send_federation = hs.should_send_federation()
+ self._federation_shard_config = hs.config.worker.federation_shard_config
+
+ # If we're a process that sends federation we may need to reset the
+ # `federation_stream_position` table to match the current sharding
+ # config. We don't do this now as otherwise two processes could conflict
+ # during startup which would cause one to die.
+ self._need_to_reset_federation_stream_positions = self._send_federation
+
+ events_max = self.get_room_max_stream_ordering()
+ event_cache_prefill, min_event_val = self.db_pool.get_cache_dict(
+ db_conn,
+ "events",
+ entity_column="room_id",
+ stream_column="stream_ordering",
+ max_value=events_max,
+ )
+ self._events_stream_cache = StreamChangeCache(
+ "EventsRoomStreamChangeCache",
+ min_event_val,
+ prefilled_cache=event_cache_prefill,
+ )
+ self._membership_stream_cache = StreamChangeCache(
+ "MembershipStreamChangeCache", events_max
+ )
+
+ self._stream_order_on_start = self.get_room_max_stream_ordering()
+
+ @abc.abstractmethod
+ def get_room_max_stream_ordering(self):
+ raise NotImplementedError()
+
+ @abc.abstractmethod
+ def get_room_min_stream_ordering(self):
+ raise NotImplementedError()
+
+ @defer.inlineCallbacks
+ def get_room_events_stream_for_rooms(
+ self, room_ids, from_key, to_key, limit=0, order="DESC"
+ ):
+ """Get new room events in stream ordering since `from_key`.
+
+ Args:
+ room_id (str)
+ from_key (str): Token from which no events are returned before
+ to_key (str): Token from which no events are returned after. (This
+ is typically the current stream token)
+ limit (int): Maximum number of events to return
+ order (str): Either "DESC" or "ASC". Determines which events are
+ returned when the result is limited. If "DESC" then the most
+ recent `limit` events are returned, otherwise returns the
+ oldest `limit` events.
+
+ Returns:
+ Deferred[dict[str,tuple[list[FrozenEvent], str]]]
+ A map from room id to a tuple containing:
+ - list of recent events in the room
+ - stream ordering key for the start of the chunk of events returned.
+ """
+ from_id = RoomStreamToken.parse_stream_token(from_key).stream
+
+ room_ids = yield self._events_stream_cache.get_entities_changed(
+ room_ids, from_id
+ )
+
+ if not room_ids:
+ return {}
+
+ results = {}
+ room_ids = list(room_ids)
+ for rm_ids in (room_ids[i : i + 20] for i in range(0, len(room_ids), 20)):
+ res = yield make_deferred_yieldable(
+ defer.gatherResults(
+ [
+ run_in_background(
+ self.get_room_events_stream_for_room,
+ room_id,
+ from_key,
+ to_key,
+ limit,
+ order=order,
+ )
+ for room_id in rm_ids
+ ],
+ consumeErrors=True,
+ )
+ )
+ results.update(dict(zip(rm_ids, res)))
+
+ return results
+
+ def get_rooms_that_changed(self, room_ids, from_key):
+ """Given a list of rooms and a token, return rooms where there may have
+ been changes.
+
+ Args:
+ room_ids (list)
+ from_key (str): The room_key portion of a StreamToken
+ """
+ from_key = RoomStreamToken.parse_stream_token(from_key).stream
+ return {
+ room_id
+ for room_id in room_ids
+ if self._events_stream_cache.has_entity_changed(room_id, from_key)
+ }
+
+ @defer.inlineCallbacks
+ def get_room_events_stream_for_room(
+ self, room_id, from_key, to_key, limit=0, order="DESC"
+ ):
+
+ """Get new room events in stream ordering since `from_key`.
+
+ Args:
+ room_id (str)
+ from_key (str): Token from which no events are returned before
+ to_key (str): Token from which no events are returned after. (This
+ is typically the current stream token)
+ limit (int): Maximum number of events to return
+ order (str): Either "DESC" or "ASC". Determines which events are
+ returned when the result is limited. If "DESC" then the most
+ recent `limit` events are returned, otherwise returns the
+ oldest `limit` events.
+
+ Returns:
+ Deferred[tuple[list[FrozenEvent], str]]: Returns the list of
+ events (in ascending order) and the token from the start of
+ the chunk of events returned.
+ """
+ if from_key == to_key:
+ return [], from_key
+
+ from_id = RoomStreamToken.parse_stream_token(from_key).stream
+ to_id = RoomStreamToken.parse_stream_token(to_key).stream
+
+ has_changed = yield self._events_stream_cache.has_entity_changed(
+ room_id, from_id
+ )
+
+ if not has_changed:
+ return [], from_key
+
+ def f(txn):
+ sql = (
+ "SELECT event_id, stream_ordering FROM events WHERE"
+ " room_id = ?"
+ " AND not outlier"
+ " AND stream_ordering > ? AND stream_ordering <= ?"
+ " ORDER BY stream_ordering %s LIMIT ?"
+ ) % (order,)
+ txn.execute(sql, (room_id, from_id, to_id, limit))
+
+ rows = [_EventDictReturn(row[0], None, row[1]) for row in txn]
+ return rows
+
+ rows = yield self.db_pool.runInteraction("get_room_events_stream_for_room", f)
+
+ ret = yield self.get_events_as_list(
+ [r.event_id for r in rows], get_prev_content=True
+ )
+
+ self._set_before_and_after(ret, rows, topo_order=from_id is None)
+
+ if order.lower() == "desc":
+ ret.reverse()
+
+ if rows:
+ key = "s%d" % min(r.stream_ordering for r in rows)
+ else:
+ # Assume we didn't get anything because there was nothing to
+ # get.
+ key = from_key
+
+ return ret, key
+
+ @defer.inlineCallbacks
+ def get_membership_changes_for_user(self, user_id, from_key, to_key):
+ from_id = RoomStreamToken.parse_stream_token(from_key).stream
+ to_id = RoomStreamToken.parse_stream_token(to_key).stream
+
+ if from_key == to_key:
+ return []
+
+ if from_id:
+ has_changed = self._membership_stream_cache.has_entity_changed(
+ user_id, int(from_id)
+ )
+ if not has_changed:
+ return []
+
+ def f(txn):
+ sql = (
+ "SELECT m.event_id, stream_ordering FROM events AS e,"
+ " room_memberships AS m"
+ " WHERE e.event_id = m.event_id"
+ " AND m.user_id = ?"
+ " AND e.stream_ordering > ? AND e.stream_ordering <= ?"
+ " ORDER BY e.stream_ordering ASC"
+ )
+ txn.execute(sql, (user_id, from_id, to_id))
+
+ rows = [_EventDictReturn(row[0], None, row[1]) for row in txn]
+
+ return rows
+
+ rows = yield self.db_pool.runInteraction("get_membership_changes_for_user", f)
+
+ ret = yield self.get_events_as_list(
+ [r.event_id for r in rows], get_prev_content=True
+ )
+
+ self._set_before_and_after(ret, rows, topo_order=False)
+
+ return ret
+
+ @defer.inlineCallbacks
+ def get_recent_events_for_room(self, room_id, limit, end_token):
+ """Get the most recent events in the room in topological ordering.
+
+ Args:
+ room_id (str)
+ limit (int)
+ end_token (str): The stream token representing now.
+
+ Returns:
+ Deferred[tuple[list[FrozenEvent], str]]: Returns a list of
+ events and a token pointing to the start of the returned
+ events.
+ The events returned are in ascending order.
+ """
+
+ rows, token = yield self.get_recent_event_ids_for_room(
+ room_id, limit, end_token
+ )
+
+ events = yield self.get_events_as_list(
+ [r.event_id for r in rows], get_prev_content=True
+ )
+
+ self._set_before_and_after(events, rows)
+
+ return (events, token)
+
+ @defer.inlineCallbacks
+ def get_recent_event_ids_for_room(self, room_id, limit, end_token):
+ """Get the most recent events in the room in topological ordering.
+
+ Args:
+ room_id (str)
+ limit (int)
+ end_token (str): The stream token representing now.
+
+ Returns:
+ Deferred[tuple[list[_EventDictReturn], str]]: Returns a list of
+ _EventDictReturn and a token pointing to the start of the returned
+ events.
+ The events returned are in ascending order.
+ """
+ # Allow a zero limit here, and no-op.
+ if limit == 0:
+ return [], end_token
+
+ end_token = RoomStreamToken.parse(end_token)
+
+ rows, token = yield self.db_pool.runInteraction(
+ "get_recent_event_ids_for_room",
+ self._paginate_room_events_txn,
+ room_id,
+ from_token=end_token,
+ limit=limit,
+ )
+
+ # We want to return the results in ascending order.
+ rows.reverse()
+
+ return rows, token
+
+ def get_room_event_before_stream_ordering(self, room_id, stream_ordering):
+ """Gets details of the first event in a room at or before a stream ordering
+
+ Args:
+ room_id (str):
+ stream_ordering (int):
+
+ Returns:
+ Deferred[(int, int, str)]:
+ (stream ordering, topological ordering, event_id)
+ """
+
+ def _f(txn):
+ sql = (
+ "SELECT stream_ordering, topological_ordering, event_id"
+ " FROM events"
+ " WHERE room_id = ? AND stream_ordering <= ?"
+ " AND NOT outlier"
+ " ORDER BY stream_ordering DESC"
+ " LIMIT 1"
+ )
+ txn.execute(sql, (room_id, stream_ordering))
+ return txn.fetchone()
+
+ return self.db_pool.runInteraction("get_room_event_before_stream_ordering", _f)
+
+ async def get_room_events_max_id(self, room_id: Optional[str] = None) -> str:
+ """Returns the current token for rooms stream.
+
+ By default, it returns the current global stream token. Specifying a
+ `room_id` causes it to return the current room specific topological
+ token.
+ """
+ token = self.get_room_max_stream_ordering()
+ if room_id is None:
+ return "s%d" % (token,)
+ else:
+ topo = await self.db_pool.runInteraction(
+ "_get_max_topological_txn", self._get_max_topological_txn, room_id
+ )
+ return "t%d-%d" % (topo, token)
+
+ def get_stream_token_for_event(self, event_id):
+ """The stream token for an event
+ Args:
+ event_id(str): The id of the event to look up a stream token for.
+ Raises:
+ StoreError if the event wasn't in the database.
+ Returns:
+ A deferred "s%d" stream token.
+ """
+ return self.db_pool.simple_select_one_onecol(
+ table="events", keyvalues={"event_id": event_id}, retcol="stream_ordering"
+ ).addCallback(lambda row: "s%d" % (row,))
+
+ def get_topological_token_for_event(self, event_id):
+ """The stream token for an event
+ Args:
+ event_id(str): The id of the event to look up a stream token for.
+ Raises:
+ StoreError if the event wasn't in the database.
+ Returns:
+ A deferred "t%d-%d" topological token.
+ """
+ return self.db_pool.simple_select_one(
+ table="events",
+ keyvalues={"event_id": event_id},
+ retcols=("stream_ordering", "topological_ordering"),
+ desc="get_topological_token_for_event",
+ ).addCallback(
+ lambda row: "t%d-%d" % (row["topological_ordering"], row["stream_ordering"])
+ )
+
+ def get_max_topological_token(self, room_id, stream_key):
+ """Get the max topological token in a room before the given stream
+ ordering.
+
+ Args:
+ room_id (str)
+ stream_key (int)
+
+ Returns:
+ Deferred[int]
+ """
+ sql = (
+ "SELECT coalesce(max(topological_ordering), 0) FROM events"
+ " WHERE room_id = ? AND stream_ordering < ?"
+ )
+ return self.db_pool.execute(
+ "get_max_topological_token", None, sql, room_id, stream_key
+ ).addCallback(lambda r: r[0][0] if r else 0)
+
+ def _get_max_topological_txn(self, txn, room_id):
+ txn.execute(
+ "SELECT MAX(topological_ordering) FROM events WHERE room_id = ?",
+ (room_id,),
+ )
+
+ rows = txn.fetchall()
+ return rows[0][0] if rows else 0
+
+ @staticmethod
+ def _set_before_and_after(events, rows, topo_order=True):
+ """Inserts ordering information to events' internal metadata from
+ the DB rows.
+
+ Args:
+ events (list[FrozenEvent])
+ rows (list[_EventDictReturn])
+ topo_order (bool): Whether the events were ordered topologically
+ or by stream ordering. If true then all rows should have a non
+ null topological_ordering.
+ """
+ for event, row in zip(events, rows):
+ stream = row.stream_ordering
+ if topo_order and row.topological_ordering:
+ topo = row.topological_ordering
+ else:
+ topo = None
+ internal = event.internal_metadata
+ internal.before = str(RoomStreamToken(topo, stream - 1))
+ internal.after = str(RoomStreamToken(topo, stream))
+ internal.order = (int(topo) if topo else 0, int(stream))
+
+ @defer.inlineCallbacks
+ def get_events_around(
+ self, room_id, event_id, before_limit, after_limit, event_filter=None
+ ):
+ """Retrieve events and pagination tokens around a given event in a
+ room.
+
+ Args:
+ room_id (str)
+ event_id (str)
+ before_limit (int)
+ after_limit (int)
+ event_filter (Filter|None)
+
+ Returns:
+ dict
+ """
+
+ results = yield self.db_pool.runInteraction(
+ "get_events_around",
+ self._get_events_around_txn,
+ room_id,
+ event_id,
+ before_limit,
+ after_limit,
+ event_filter,
+ )
+
+ events_before = yield self.get_events_as_list(
+ list(results["before"]["event_ids"]), get_prev_content=True
+ )
+
+ events_after = yield self.get_events_as_list(
+ list(results["after"]["event_ids"]), get_prev_content=True
+ )
+
+ return {
+ "events_before": events_before,
+ "events_after": events_after,
+ "start": results["before"]["token"],
+ "end": results["after"]["token"],
+ }
+
+ def _get_events_around_txn(
+ self, txn, room_id, event_id, before_limit, after_limit, event_filter
+ ):
+ """Retrieves event_ids and pagination tokens around a given event in a
+ room.
+
+ Args:
+ room_id (str)
+ event_id (str)
+ before_limit (int)
+ after_limit (int)
+ event_filter (Filter|None)
+
+ Returns:
+ dict
+ """
+
+ results = self.db_pool.simple_select_one_txn(
+ txn,
+ "events",
+ keyvalues={"event_id": event_id, "room_id": room_id},
+ retcols=["stream_ordering", "topological_ordering"],
+ )
+
+ # Paginating backwards includes the event at the token, but paginating
+ # forward doesn't.
+ before_token = RoomStreamToken(
+ results["topological_ordering"] - 1, results["stream_ordering"]
+ )
+
+ after_token = RoomStreamToken(
+ results["topological_ordering"], results["stream_ordering"]
+ )
+
+ rows, start_token = self._paginate_room_events_txn(
+ txn,
+ room_id,
+ before_token,
+ direction="b",
+ limit=before_limit,
+ event_filter=event_filter,
+ )
+ events_before = [r.event_id for r in rows]
+
+ rows, end_token = self._paginate_room_events_txn(
+ txn,
+ room_id,
+ after_token,
+ direction="f",
+ limit=after_limit,
+ event_filter=event_filter,
+ )
+ events_after = [r.event_id for r in rows]
+
+ return {
+ "before": {"event_ids": events_before, "token": start_token},
+ "after": {"event_ids": events_after, "token": end_token},
+ }
+
+ @defer.inlineCallbacks
+ def get_all_new_events_stream(self, from_id, current_id, limit):
+ """Get all new events
+
+ Returns all events with from_id < stream_ordering <= current_id.
+
+ Args:
+ from_id (int): the stream_ordering of the last event we processed
+ current_id (int): the stream_ordering of the most recently processed event
+ limit (int): the maximum number of events to return
+
+ Returns:
+ Deferred[Tuple[int, list[FrozenEvent]]]: A tuple of (next_id, events), where
+ `next_id` is the next value to pass as `from_id` (it will either be the
+ stream_ordering of the last returned event, or, if fewer than `limit` events
+ were found, `current_id`.
+ """
+
+ def get_all_new_events_stream_txn(txn):
+ sql = (
+ "SELECT e.stream_ordering, e.event_id"
+ " FROM events AS e"
+ " WHERE"
+ " ? < e.stream_ordering AND e.stream_ordering <= ?"
+ " ORDER BY e.stream_ordering ASC"
+ " LIMIT ?"
+ )
+
+ txn.execute(sql, (from_id, current_id, limit))
+ rows = txn.fetchall()
+
+ upper_bound = current_id
+ if len(rows) == limit:
+ upper_bound = rows[-1][0]
+
+ return upper_bound, [row[1] for row in rows]
+
+ upper_bound, event_ids = yield self.db_pool.runInteraction(
+ "get_all_new_events_stream", get_all_new_events_stream_txn
+ )
+
+ events = yield self.get_events_as_list(event_ids)
+
+ return upper_bound, events
+
+ async def get_federation_out_pos(self, typ: str) -> int:
+ if self._need_to_reset_federation_stream_positions:
+ await self.db_pool.runInteraction(
+ "_reset_federation_positions_txn", self._reset_federation_positions_txn
+ )
+ self._need_to_reset_federation_stream_positions = False
+
+ return await self.db_pool.simple_select_one_onecol(
+ table="federation_stream_position",
+ retcol="stream_id",
+ keyvalues={"type": typ, "instance_name": self._instance_name},
+ desc="get_federation_out_pos",
+ )
+
+ async def update_federation_out_pos(self, typ, stream_id):
+ if self._need_to_reset_federation_stream_positions:
+ await self.db_pool.runInteraction(
+ "_reset_federation_positions_txn", self._reset_federation_positions_txn
+ )
+ self._need_to_reset_federation_stream_positions = False
+
+ return await self.db_pool.simple_update_one(
+ table="federation_stream_position",
+ keyvalues={"type": typ, "instance_name": self._instance_name},
+ updatevalues={"stream_id": stream_id},
+ desc="update_federation_out_pos",
+ )
+
+ def _reset_federation_positions_txn(self, txn):
+ """Fiddles with the `federation_stream_position` table to make it match
+ the configured federation sender instances during start up.
+ """
+
+ # The federation sender instances may have changed, so we need to
+ # massage the `federation_stream_position` table to have a row per type
+ # per instance sending federation. If there is a mismatch we update the
+ # table with the correct rows using the *minimum* stream ID seen. This
+ # may result in resending of events/EDUs to remote servers, but that is
+ # preferable to dropping them.
+
+ if not self._send_federation:
+ return
+
+ # Pull out the configured instances. If we don't have a shard config then
+ # we assume that we're the only instance sending.
+ configured_instances = self._federation_shard_config.instances
+ if not configured_instances:
+ configured_instances = [self._instance_name]
+ elif self._instance_name not in configured_instances:
+ return
+
+ instances_in_table = self.db_pool.simple_select_onecol_txn(
+ txn,
+ table="federation_stream_position",
+ keyvalues={},
+ retcol="instance_name",
+ )
+
+ if set(instances_in_table) == set(configured_instances):
+ # Nothing to do
+ return
+
+ sql = """
+ SELECT type, MIN(stream_id) FROM federation_stream_position
+ GROUP BY type
+ """
+ txn.execute(sql)
+ min_positions = dict(txn) # Map from type -> min position
+
+ # Ensure we do actually have some values here
+ assert set(min_positions) == {"federation", "events"}
+
+ sql = """
+ DELETE FROM federation_stream_position
+ WHERE NOT (%s)
+ """
+ clause, args = make_in_list_sql_clause(
+ txn.database_engine, "instance_name", configured_instances
+ )
+ txn.execute(sql % (clause,), args)
+
+ for typ, stream_id in min_positions.items():
+ self.db_pool.simple_upsert_txn(
+ txn,
+ table="federation_stream_position",
+ keyvalues={"type": typ, "instance_name": self._instance_name},
+ values={"stream_id": stream_id},
+ )
+
+ def has_room_changed_since(self, room_id, stream_id):
+ return self._events_stream_cache.has_entity_changed(room_id, stream_id)
+
+ def _paginate_room_events_txn(
+ self,
+ txn,
+ room_id,
+ from_token,
+ to_token=None,
+ direction="b",
+ limit=-1,
+ event_filter=None,
+ ):
+ """Returns list of events before or after a given token.
+
+ Args:
+ txn
+ room_id (str)
+ from_token (RoomStreamToken): The token used to stream from
+ to_token (RoomStreamToken|None): A token which if given limits the
+ results to only those before
+ direction(char): Either 'b' or 'f' to indicate whether we are
+ paginating forwards or backwards from `from_key`.
+ limit (int): The maximum number of events to return.
+ event_filter (Filter|None): If provided filters the events to
+ those that match the filter.
+
+ Returns:
+ Deferred[tuple[list[_EventDictReturn], str]]: Returns the results
+ as a list of _EventDictReturn and a token that points to the end
+ of the result set. If no events are returned then the end of the
+ stream has been reached (i.e. there are no events between
+ `from_token` and `to_token`), or `limit` is zero.
+ """
+
+ assert int(limit) >= 0
+
+ # Tokens really represent positions between elements, but we use
+ # the convention of pointing to the event before the gap. Hence
+ # we have a bit of asymmetry when it comes to equalities.
+ args = [False, room_id]
+ if direction == "b":
+ order = "DESC"
+ else:
+ order = "ASC"
+
+ bounds = generate_pagination_where_clause(
+ direction=direction,
+ column_names=("topological_ordering", "stream_ordering"),
+ from_token=from_token,
+ to_token=to_token,
+ engine=self.database_engine,
+ )
+
+ filter_clause, filter_args = filter_to_clause(event_filter)
+
+ if filter_clause:
+ bounds += " AND " + filter_clause
+ args.extend(filter_args)
+
+ args.append(int(limit))
+
+ select_keywords = "SELECT"
+ join_clause = ""
+ if event_filter and event_filter.labels:
+ # If we're not filtering on a label, then joining on event_labels will
+ # return as many row for a single event as the number of labels it has. To
+ # avoid this, only join if we're filtering on at least one label.
+ join_clause = """
+ LEFT JOIN event_labels
+ USING (event_id, room_id, topological_ordering)
+ """
+ if len(event_filter.labels) > 1:
+ # Using DISTINCT in this SELECT query is quite expensive, because it
+ # requires the engine to sort on the entire (not limited) result set,
+ # i.e. the entire events table. We only need to use it when we're
+ # filtering on more than two labels, because that's the only scenario
+ # in which we can possibly to get multiple times the same event ID in
+ # the results.
+ select_keywords += "DISTINCT"
+
+ sql = """
+ %(select_keywords)s event_id, topological_ordering, stream_ordering
+ FROM events
+ %(join_clause)s
+ WHERE outlier = ? AND room_id = ? AND %(bounds)s
+ ORDER BY topological_ordering %(order)s,
+ stream_ordering %(order)s LIMIT ?
+ """ % {
+ "select_keywords": select_keywords,
+ "join_clause": join_clause,
+ "bounds": bounds,
+ "order": order,
+ }
+
+ txn.execute(sql, args)
+
+ rows = [_EventDictReturn(row[0], row[1], row[2]) for row in txn]
+
+ if rows:
+ topo = rows[-1].topological_ordering
+ toke = rows[-1].stream_ordering
+ if direction == "b":
+ # Tokens are positions between events.
+ # This token points *after* the last event in the chunk.
+ # We need it to point to the event before it in the chunk
+ # when we are going backwards so we subtract one from the
+ # stream part.
+ toke -= 1
+ next_token = RoomStreamToken(topo, toke)
+ else:
+ # TODO (erikj): We should work out what to do here instead.
+ next_token = to_token if to_token else from_token
+
+ return rows, str(next_token)
+
+ @defer.inlineCallbacks
+ def paginate_room_events(
+ self, room_id, from_key, to_key=None, direction="b", limit=-1, event_filter=None
+ ):
+ """Returns list of events before or after a given token.
+
+ Args:
+ room_id (str)
+ from_key (str): The token used to stream from
+ to_key (str|None): A token which if given limits the results to
+ only those before
+ direction(char): Either 'b' or 'f' to indicate whether we are
+ paginating forwards or backwards from `from_key`.
+ limit (int): The maximum number of events to return.
+ event_filter (Filter|None): If provided filters the events to
+ those that match the filter.
+
+ Returns:
+ tuple[list[FrozenEvent], str]: Returns the results as a list of
+ events and a token that points to the end of the result set. If no
+ events are returned then the end of the stream has been reached
+ (i.e. there are no events between `from_key` and `to_key`).
+ """
+
+ from_key = RoomStreamToken.parse(from_key)
+ if to_key:
+ to_key = RoomStreamToken.parse(to_key)
+
+ rows, token = yield self.db_pool.runInteraction(
+ "paginate_room_events",
+ self._paginate_room_events_txn,
+ room_id,
+ from_key,
+ to_key,
+ direction,
+ limit,
+ event_filter,
+ )
+
+ events = yield self.get_events_as_list(
+ [r.event_id for r in rows], get_prev_content=True
+ )
+
+ self._set_before_and_after(events, rows)
+
+ return (events, token)
+
+
+class StreamStore(StreamWorkerStore):
+ def get_room_max_stream_ordering(self):
+ return self._stream_id_gen.get_current_token()
+
+ def get_room_min_stream_ordering(self):
+ return self._backfill_id_gen.get_current_token()
diff --git a/synapse/storage/databases/main/tags.py b/synapse/storage/databases/main/tags.py
new file mode 100644
index 00000000..e4e0a0c4
--- /dev/null
+++ b/synapse/storage/databases/main/tags.py
@@ -0,0 +1,291 @@
+# -*- coding: utf-8 -*-
+# Copyright 2014-2016 OpenMarket Ltd
+# Copyright 2018 New Vector Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import logging
+from typing import Dict, List, Tuple
+
+from canonicaljson import json
+
+from synapse.storage._base import db_to_json
+from synapse.storage.databases.main.account_data import AccountDataWorkerStore
+from synapse.types import JsonDict
+from synapse.util.caches.descriptors import cached
+
+logger = logging.getLogger(__name__)
+
+
+class TagsWorkerStore(AccountDataWorkerStore):
+ @cached()
+ async def get_tags_for_user(self, user_id: str) -> Dict[str, Dict[str, JsonDict]]:
+ """Get all the tags for a user.
+
+
+ Args:
+ user_id: The user to get the tags for.
+ Returns:
+ A mapping from room_id strings to dicts mapping from tag strings to
+ tag content.
+ """
+
+ rows = await self.db_pool.simple_select_list(
+ "room_tags", {"user_id": user_id}, ["room_id", "tag", "content"]
+ )
+
+ tags_by_room = {}
+ for row in rows:
+ room_tags = tags_by_room.setdefault(row["room_id"], {})
+ room_tags[row["tag"]] = db_to_json(row["content"])
+ return tags_by_room
+
+ async def get_all_updated_tags(
+ self, instance_name: str, last_id: int, current_id: int, limit: int
+ ) -> Tuple[List[Tuple[int, tuple]], int, bool]:
+ """Get updates for tags replication stream.
+
+ Args:
+ instance_name: The writer we want to fetch updates from. Unused
+ here since there is only ever one writer.
+ last_id: The token to fetch updates from. Exclusive.
+ current_id: The token to fetch updates up to. Inclusive.
+ limit: The requested limit for the number of rows to return. The
+ function may return more or fewer rows.
+
+ Returns:
+ A tuple consisting of: the updates, a token to use to fetch
+ subsequent updates, and whether we returned fewer rows than exists
+ between the requested tokens due to the limit.
+
+ The token returned can be used in a subsequent call to this
+ function to get further updatees.
+
+ The updates are a list of 2-tuples of stream ID and the row data
+ """
+
+ if last_id == current_id:
+ return [], current_id, False
+
+ def get_all_updated_tags_txn(txn):
+ sql = (
+ "SELECT stream_id, user_id, room_id"
+ " FROM room_tags_revisions as r"
+ " WHERE ? < stream_id AND stream_id <= ?"
+ " ORDER BY stream_id ASC LIMIT ?"
+ )
+ txn.execute(sql, (last_id, current_id, limit))
+ return txn.fetchall()
+
+ tag_ids = await self.db_pool.runInteraction(
+ "get_all_updated_tags", get_all_updated_tags_txn
+ )
+
+ def get_tag_content(txn, tag_ids):
+ sql = "SELECT tag, content FROM room_tags WHERE user_id=? AND room_id=?"
+ results = []
+ for stream_id, user_id, room_id in tag_ids:
+ txn.execute(sql, (user_id, room_id))
+ tags = []
+ for tag, content in txn:
+ tags.append(json.dumps(tag) + ":" + content)
+ tag_json = "{" + ",".join(tags) + "}"
+ results.append((stream_id, (user_id, room_id, tag_json)))
+
+ return results
+
+ batch_size = 50
+ results = []
+ for i in range(0, len(tag_ids), batch_size):
+ tags = await self.db_pool.runInteraction(
+ "get_all_updated_tag_content",
+ get_tag_content,
+ tag_ids[i : i + batch_size],
+ )
+ results.extend(tags)
+
+ limited = False
+ upto_token = current_id
+ if len(results) >= limit:
+ upto_token = results[-1][0]
+ limited = True
+
+ return results, upto_token, limited
+
+ async def get_updated_tags(
+ self, user_id: str, stream_id: int
+ ) -> Dict[str, List[str]]:
+ """Get all the tags for the rooms where the tags have changed since the
+ given version
+
+ Args:
+ user_id(str): The user to get the tags for.
+ stream_id(int): The earliest update to get for the user.
+
+ Returns:
+ A mapping from room_id strings to lists of tag strings for all the
+ rooms that changed since the stream_id token.
+ """
+
+ def get_updated_tags_txn(txn):
+ sql = (
+ "SELECT room_id from room_tags_revisions"
+ " WHERE user_id = ? AND stream_id > ?"
+ )
+ txn.execute(sql, (user_id, stream_id))
+ room_ids = [row[0] for row in txn]
+ return room_ids
+
+ changed = self._account_data_stream_cache.has_entity_changed(
+ user_id, int(stream_id)
+ )
+ if not changed:
+ return {}
+
+ room_ids = await self.db_pool.runInteraction(
+ "get_updated_tags", get_updated_tags_txn
+ )
+
+ results = {}
+ if room_ids:
+ tags_by_room = await self.get_tags_for_user(user_id)
+ for room_id in room_ids:
+ results[room_id] = tags_by_room.get(room_id, {})
+
+ return results
+
+ async def get_tags_for_room(
+ self, user_id: str, room_id: str
+ ) -> Dict[str, JsonDict]:
+ """Get all the tags for the given room
+
+ Args:
+ user_id: The user to get tags for
+ room_id: The room to get tags for
+
+ Returns:
+ A mapping of tags to tag content.
+ """
+ rows = await self.db_pool.simple_select_list(
+ table="room_tags",
+ keyvalues={"user_id": user_id, "room_id": room_id},
+ retcols=("tag", "content"),
+ desc="get_tags_for_room",
+ )
+ return {row["tag"]: db_to_json(row["content"]) for row in rows}
+
+
+class TagsStore(TagsWorkerStore):
+ async def add_tag_to_room(
+ self, user_id: str, room_id: str, tag: str, content: JsonDict
+ ) -> int:
+ """Add a tag to a room for a user.
+
+ Args:
+ user_id: The user to add a tag for.
+ room_id: The room to add a tag for.
+ tag: The tag name to add.
+ content: A json object to associate with the tag.
+
+ Returns:
+ The next account data ID.
+ """
+ content_json = json.dumps(content)
+
+ def add_tag_txn(txn, next_id):
+ self.db_pool.simple_upsert_txn(
+ txn,
+ table="room_tags",
+ keyvalues={"user_id": user_id, "room_id": room_id, "tag": tag},
+ values={"content": content_json},
+ )
+ self._update_revision_txn(txn, user_id, room_id, next_id)
+
+ with self._account_data_id_gen.get_next() as next_id:
+ await self.db_pool.runInteraction("add_tag", add_tag_txn, next_id)
+
+ self.get_tags_for_user.invalidate((user_id,))
+
+ return self._account_data_id_gen.get_current_token()
+
+ async def remove_tag_from_room(self, user_id: str, room_id: str, tag: str) -> int:
+ """Remove a tag from a room for a user.
+
+ Returns:
+ The next account data ID.
+ """
+
+ def remove_tag_txn(txn, next_id):
+ sql = (
+ "DELETE FROM room_tags "
+ " WHERE user_id = ? AND room_id = ? AND tag = ?"
+ )
+ txn.execute(sql, (user_id, room_id, tag))
+ self._update_revision_txn(txn, user_id, room_id, next_id)
+
+ with self._account_data_id_gen.get_next() as next_id:
+ await self.db_pool.runInteraction("remove_tag", remove_tag_txn, next_id)
+
+ self.get_tags_for_user.invalidate((user_id,))
+
+ return self._account_data_id_gen.get_current_token()
+
+ def _update_revision_txn(
+ self, txn, user_id: str, room_id: str, next_id: int
+ ) -> None:
+ """Update the latest revision of the tags for the given user and room.
+
+ Args:
+ txn: The database cursor
+ user_id: The ID of the user.
+ room_id: The ID of the room.
+ next_id: The the revision to advance to.
+ """
+
+ txn.call_after(
+ self._account_data_stream_cache.entity_has_changed, user_id, next_id
+ )
+
+ # Note: This is only here for backwards compat to allow admins to
+ # roll back to a previous Synapse version. Next time we update the
+ # database version we can remove this table.
+ update_max_id_sql = (
+ "UPDATE account_data_max_stream_id"
+ " SET stream_id = ?"
+ " WHERE stream_id < ?"
+ )
+ txn.execute(update_max_id_sql, (next_id, next_id))
+
+ update_sql = (
+ "UPDATE room_tags_revisions"
+ " SET stream_id = ?"
+ " WHERE user_id = ?"
+ " AND room_id = ?"
+ )
+ txn.execute(update_sql, (next_id, user_id, room_id))
+
+ if txn.rowcount == 0:
+ insert_sql = (
+ "INSERT INTO room_tags_revisions (user_id, room_id, stream_id)"
+ " VALUES (?, ?, ?)"
+ )
+ try:
+ txn.execute(insert_sql, (user_id, room_id, next_id))
+ except self.database_engine.module.IntegrityError:
+ # Ignore insertion errors. It doesn't matter if the row wasn't
+ # inserted because if two updates happend concurrently the one
+ # with the higher stream_id will not be reported to a client
+ # unless the previous update has completed. It doesn't matter
+ # which stream_id ends up in the table, as long as it is higher
+ # than the id that the client has.
+ pass
diff --git a/synapse/storage/databases/main/transactions.py b/synapse/storage/databases/main/transactions.py
new file mode 100644
index 00000000..52668dbd
--- /dev/null
+++ b/synapse/storage/databases/main/transactions.py
@@ -0,0 +1,266 @@
+# -*- coding: utf-8 -*-
+# Copyright 2014-2016 OpenMarket Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import logging
+from collections import namedtuple
+
+from canonicaljson import encode_canonical_json
+
+from synapse.metrics.background_process_metrics import run_as_background_process
+from synapse.storage._base import SQLBaseStore, db_to_json
+from synapse.storage.database import DatabasePool
+from synapse.util.caches.expiringcache import ExpiringCache
+
+db_binary_type = memoryview
+
+logger = logging.getLogger(__name__)
+
+
+_TransactionRow = namedtuple(
+ "_TransactionRow",
+ ("id", "transaction_id", "destination", "ts", "response_code", "response_json"),
+)
+
+_UpdateTransactionRow = namedtuple(
+ "_TransactionRow", ("response_code", "response_json")
+)
+
+SENTINEL = object()
+
+
+class TransactionStore(SQLBaseStore):
+ """A collection of queries for handling PDUs.
+ """
+
+ def __init__(self, database: DatabasePool, db_conn, hs):
+ super(TransactionStore, self).__init__(database, db_conn, hs)
+
+ self._clock.looping_call(self._start_cleanup_transactions, 30 * 60 * 1000)
+
+ self._destination_retry_cache = ExpiringCache(
+ cache_name="get_destination_retry_timings",
+ clock=self._clock,
+ expiry_ms=5 * 60 * 1000,
+ )
+
+ def get_received_txn_response(self, transaction_id, origin):
+ """For an incoming transaction from a given origin, check if we have
+ already responded to it. If so, return the response code and response
+ body (as a dict).
+
+ Args:
+ transaction_id (str)
+ origin(str)
+
+ Returns:
+ tuple: None if we have not previously responded to
+ this transaction or a 2-tuple of (int, dict)
+ """
+
+ return self.db_pool.runInteraction(
+ "get_received_txn_response",
+ self._get_received_txn_response,
+ transaction_id,
+ origin,
+ )
+
+ def _get_received_txn_response(self, txn, transaction_id, origin):
+ result = self.db_pool.simple_select_one_txn(
+ txn,
+ table="received_transactions",
+ keyvalues={"transaction_id": transaction_id, "origin": origin},
+ retcols=(
+ "transaction_id",
+ "origin",
+ "ts",
+ "response_code",
+ "response_json",
+ "has_been_referenced",
+ ),
+ allow_none=True,
+ )
+
+ if result and result["response_code"]:
+ return result["response_code"], db_to_json(result["response_json"])
+
+ else:
+ return None
+
+ def set_received_txn_response(self, transaction_id, origin, code, response_dict):
+ """Persist the response we returened for an incoming transaction, and
+ should return for subsequent transactions with the same transaction_id
+ and origin.
+
+ Args:
+ txn
+ transaction_id (str)
+ origin (str)
+ code (int)
+ response_json (str)
+ """
+
+ return self.db_pool.simple_insert(
+ table="received_transactions",
+ values={
+ "transaction_id": transaction_id,
+ "origin": origin,
+ "response_code": code,
+ "response_json": db_binary_type(encode_canonical_json(response_dict)),
+ "ts": self._clock.time_msec(),
+ },
+ or_ignore=True,
+ desc="set_received_txn_response",
+ )
+
+ async def get_destination_retry_timings(self, destination):
+ """Gets the current retry timings (if any) for a given destination.
+
+ Args:
+ destination (str)
+
+ Returns:
+ None if not retrying
+ Otherwise a dict for the retry scheme
+ """
+
+ result = self._destination_retry_cache.get(destination, SENTINEL)
+ if result is not SENTINEL:
+ return result
+
+ result = await self.db_pool.runInteraction(
+ "get_destination_retry_timings",
+ self._get_destination_retry_timings,
+ destination,
+ )
+
+ # We don't hugely care about race conditions between getting and
+ # invalidating the cache, since we time out fairly quickly anyway.
+ self._destination_retry_cache[destination] = result
+ return result
+
+ def _get_destination_retry_timings(self, txn, destination):
+ result = self.db_pool.simple_select_one_txn(
+ txn,
+ table="destinations",
+ keyvalues={"destination": destination},
+ retcols=("destination", "failure_ts", "retry_last_ts", "retry_interval"),
+ allow_none=True,
+ )
+
+ if result and result["retry_last_ts"] > 0:
+ return result
+ else:
+ return None
+
+ def set_destination_retry_timings(
+ self, destination, failure_ts, retry_last_ts, retry_interval
+ ):
+ """Sets the current retry timings for a given destination.
+ Both timings should be zero if retrying is no longer occuring.
+
+ Args:
+ destination (str)
+ failure_ts (int|None) - when the server started failing (ms since epoch)
+ retry_last_ts (int) - time of last retry attempt in unix epoch ms
+ retry_interval (int) - how long until next retry in ms
+ """
+
+ self._destination_retry_cache.pop(destination, None)
+ return self.db_pool.runInteraction(
+ "set_destination_retry_timings",
+ self._set_destination_retry_timings,
+ destination,
+ failure_ts,
+ retry_last_ts,
+ retry_interval,
+ )
+
+ def _set_destination_retry_timings(
+ self, txn, destination, failure_ts, retry_last_ts, retry_interval
+ ):
+
+ if self.database_engine.can_native_upsert:
+ # Upsert retry time interval if retry_interval is zero (i.e. we're
+ # resetting it) or greater than the existing retry interval.
+
+ sql = """
+ INSERT INTO destinations (
+ destination, failure_ts, retry_last_ts, retry_interval
+ )
+ VALUES (?, ?, ?, ?)
+ ON CONFLICT (destination) DO UPDATE SET
+ failure_ts = EXCLUDED.failure_ts,
+ retry_last_ts = EXCLUDED.retry_last_ts,
+ retry_interval = EXCLUDED.retry_interval
+ WHERE
+ EXCLUDED.retry_interval = 0
+ OR destinations.retry_interval < EXCLUDED.retry_interval
+ """
+
+ txn.execute(sql, (destination, failure_ts, retry_last_ts, retry_interval))
+
+ return
+
+ self.database_engine.lock_table(txn, "destinations")
+
+ # We need to be careful here as the data may have changed from under us
+ # due to a worker setting the timings.
+
+ prev_row = self.db_pool.simple_select_one_txn(
+ txn,
+ table="destinations",
+ keyvalues={"destination": destination},
+ retcols=("failure_ts", "retry_last_ts", "retry_interval"),
+ allow_none=True,
+ )
+
+ if not prev_row:
+ self.db_pool.simple_insert_txn(
+ txn,
+ table="destinations",
+ values={
+ "destination": destination,
+ "failure_ts": failure_ts,
+ "retry_last_ts": retry_last_ts,
+ "retry_interval": retry_interval,
+ },
+ )
+ elif retry_interval == 0 or prev_row["retry_interval"] < retry_interval:
+ self.db_pool.simple_update_one_txn(
+ txn,
+ "destinations",
+ keyvalues={"destination": destination},
+ updatevalues={
+ "failure_ts": failure_ts,
+ "retry_last_ts": retry_last_ts,
+ "retry_interval": retry_interval,
+ },
+ )
+
+ def _start_cleanup_transactions(self):
+ return run_as_background_process(
+ "cleanup_transactions", self._cleanup_transactions
+ )
+
+ def _cleanup_transactions(self):
+ now = self._clock.time_msec()
+ month_ago = now - 30 * 24 * 60 * 60 * 1000
+
+ def _cleanup_transactions_txn(txn):
+ txn.execute("DELETE FROM received_transactions WHERE ts < ?", (month_ago,))
+
+ return self.db_pool.runInteraction(
+ "_cleanup_transactions", _cleanup_transactions_txn
+ )
diff --git a/synapse/storage/databases/main/ui_auth.py b/synapse/storage/databases/main/ui_auth.py
new file mode 100644
index 00000000..37276f73
--- /dev/null
+++ b/synapse/storage/databases/main/ui_auth.py
@@ -0,0 +1,300 @@
+# -*- coding: utf-8 -*-
+# Copyright 2020 Matrix.org Foundation C.I.C.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Any, Dict, Optional, Union
+
+import attr
+from canonicaljson import json
+
+from synapse.api.errors import StoreError
+from synapse.storage._base import SQLBaseStore, db_to_json
+from synapse.types import JsonDict
+from synapse.util import stringutils as stringutils
+
+
+@attr.s
+class UIAuthSessionData:
+ session_id = attr.ib(type=str)
+ # The dictionary from the client root level, not the 'auth' key.
+ clientdict = attr.ib(type=JsonDict)
+ # The URI and method the session was intiatied with. These are checked at
+ # each stage of the authentication to ensure that the asked for operation
+ # has not changed.
+ uri = attr.ib(type=str)
+ method = attr.ib(type=str)
+ # A string description of the operation that the current authentication is
+ # authorising.
+ description = attr.ib(type=str)
+
+
+class UIAuthWorkerStore(SQLBaseStore):
+ """
+ Manage user interactive authentication sessions.
+ """
+
+ async def create_ui_auth_session(
+ self, clientdict: JsonDict, uri: str, method: str, description: str,
+ ) -> UIAuthSessionData:
+ """
+ Creates a new user interactive authentication session.
+
+ The session can be used to track the stages necessary to authenticate a
+ user across multiple HTTP requests.
+
+ Args:
+ clientdict:
+ The dictionary from the client root level, not the 'auth' key.
+ uri:
+ The URI this session was initiated with, this is checked at each
+ stage of the authentication to ensure that the asked for
+ operation has not changed.
+ method:
+ The method this session was initiated with, this is checked at each
+ stage of the authentication to ensure that the asked for
+ operation has not changed.
+ description:
+ A string description of the operation that the current
+ authentication is authorising.
+ Returns:
+ The newly created session.
+ Raises:
+ StoreError if a unique session ID cannot be generated.
+ """
+ # The clientdict gets stored as JSON.
+ clientdict_json = json.dumps(clientdict)
+
+ # autogen a session ID and try to create it. We may clash, so just
+ # try a few times till one goes through, giving up eventually.
+ attempts = 0
+ while attempts < 5:
+ session_id = stringutils.random_string(24)
+
+ try:
+ await self.db_pool.simple_insert(
+ table="ui_auth_sessions",
+ values={
+ "session_id": session_id,
+ "clientdict": clientdict_json,
+ "uri": uri,
+ "method": method,
+ "description": description,
+ "serverdict": "{}",
+ "creation_time": self.hs.get_clock().time_msec(),
+ },
+ desc="create_ui_auth_session",
+ )
+ return UIAuthSessionData(
+ session_id, clientdict, uri, method, description
+ )
+ except self.db_pool.engine.module.IntegrityError:
+ attempts += 1
+ raise StoreError(500, "Couldn't generate a session ID.")
+
+ async def get_ui_auth_session(self, session_id: str) -> UIAuthSessionData:
+ """Retrieve a UI auth session.
+
+ Args:
+ session_id: The ID of the session.
+ Returns:
+ A dict containing the device information.
+ Raises:
+ StoreError if the session is not found.
+ """
+ result = await self.db_pool.simple_select_one(
+ table="ui_auth_sessions",
+ keyvalues={"session_id": session_id},
+ retcols=("clientdict", "uri", "method", "description"),
+ desc="get_ui_auth_session",
+ )
+
+ result["clientdict"] = db_to_json(result["clientdict"])
+
+ return UIAuthSessionData(session_id, **result)
+
+ async def mark_ui_auth_stage_complete(
+ self, session_id: str, stage_type: str, result: Union[str, bool, JsonDict],
+ ):
+ """
+ Mark a session stage as completed.
+
+ Args:
+ session_id: The ID of the corresponding session.
+ stage_type: The completed stage type.
+ result: The result of the stage verification.
+ Raises:
+ StoreError if the session cannot be found.
+ """
+ # Add (or update) the results of the current stage to the database.
+ #
+ # Note that we need to allow for the same stage to complete multiple
+ # times here so that registration is idempotent.
+ try:
+ await self.db_pool.simple_upsert(
+ table="ui_auth_sessions_credentials",
+ keyvalues={"session_id": session_id, "stage_type": stage_type},
+ values={"result": json.dumps(result)},
+ desc="mark_ui_auth_stage_complete",
+ )
+ except self.db_pool.engine.module.IntegrityError:
+ raise StoreError(400, "Unknown session ID: %s" % (session_id,))
+
+ async def get_completed_ui_auth_stages(
+ self, session_id: str
+ ) -> Dict[str, Union[str, bool, JsonDict]]:
+ """
+ Retrieve the completed stages of a UI authentication session.
+
+ Args:
+ session_id: The ID of the session.
+ Returns:
+ The completed stages mapped to the result of the verification of
+ that auth-type.
+ """
+ results = {}
+ for row in await self.db_pool.simple_select_list(
+ table="ui_auth_sessions_credentials",
+ keyvalues={"session_id": session_id},
+ retcols=("stage_type", "result"),
+ desc="get_completed_ui_auth_stages",
+ ):
+ results[row["stage_type"]] = db_to_json(row["result"])
+
+ return results
+
+ async def set_ui_auth_clientdict(
+ self, session_id: str, clientdict: JsonDict
+ ) -> None:
+ """
+ Store an updated clientdict for a given session ID.
+
+ Args:
+ session_id: The ID of this session as returned from check_auth
+ clientdict:
+ The dictionary from the client root level, not the 'auth' key.
+ """
+ # The clientdict gets stored as JSON.
+ clientdict_json = json.dumps(clientdict)
+
+ await self.db_pool.simple_update_one(
+ table="ui_auth_sessions",
+ keyvalues={"session_id": session_id},
+ updatevalues={"clientdict": clientdict_json},
+ desc="set_ui_auth_client_dict",
+ )
+
+ async def set_ui_auth_session_data(self, session_id: str, key: str, value: Any):
+ """
+ Store a key-value pair into the sessions data associated with this
+ request. This data is stored server-side and cannot be modified by
+ the client.
+
+ Args:
+ session_id: The ID of this session as returned from check_auth
+ key: The key to store the data under
+ value: The data to store
+ Raises:
+ StoreError if the session cannot be found.
+ """
+ await self.db_pool.runInteraction(
+ "set_ui_auth_session_data",
+ self._set_ui_auth_session_data_txn,
+ session_id,
+ key,
+ value,
+ )
+
+ def _set_ui_auth_session_data_txn(self, txn, session_id: str, key: str, value: Any):
+ # Get the current value.
+ result = self.db_pool.simple_select_one_txn(
+ txn,
+ table="ui_auth_sessions",
+ keyvalues={"session_id": session_id},
+ retcols=("serverdict",),
+ )
+
+ # Update it and add it back to the database.
+ serverdict = db_to_json(result["serverdict"])
+ serverdict[key] = value
+
+ self.db_pool.simple_update_one_txn(
+ txn,
+ table="ui_auth_sessions",
+ keyvalues={"session_id": session_id},
+ updatevalues={"serverdict": json.dumps(serverdict)},
+ )
+
+ async def get_ui_auth_session_data(
+ self, session_id: str, key: str, default: Optional[Any] = None
+ ) -> Any:
+ """
+ Retrieve data stored with set_session_data
+
+ Args:
+ session_id: The ID of this session as returned from check_auth
+ key: The key to store the data under
+ default: Value to return if the key has not been set
+ Raises:
+ StoreError if the session cannot be found.
+ """
+ result = await self.db_pool.simple_select_one(
+ table="ui_auth_sessions",
+ keyvalues={"session_id": session_id},
+ retcols=("serverdict",),
+ desc="get_ui_auth_session_data",
+ )
+
+ serverdict = db_to_json(result["serverdict"])
+
+ return serverdict.get(key, default)
+
+
+class UIAuthStore(UIAuthWorkerStore):
+ def delete_old_ui_auth_sessions(self, expiration_time: int):
+ """
+ Remove sessions which were last used earlier than the expiration time.
+
+ Args:
+ expiration_time: The latest time that is still considered valid.
+ This is an epoch time in milliseconds.
+
+ """
+ return self.db_pool.runInteraction(
+ "delete_old_ui_auth_sessions",
+ self._delete_old_ui_auth_sessions_txn,
+ expiration_time,
+ )
+
+ def _delete_old_ui_auth_sessions_txn(self, txn, expiration_time: int):
+ # Get the expired sessions.
+ sql = "SELECT session_id FROM ui_auth_sessions WHERE creation_time <= ?"
+ txn.execute(sql, [expiration_time])
+ session_ids = [r[0] for r in txn.fetchall()]
+
+ # Delete the corresponding completed credentials.
+ self.db_pool.simple_delete_many_txn(
+ txn,
+ table="ui_auth_sessions_credentials",
+ column="session_id",
+ iterable=session_ids,
+ keyvalues={},
+ )
+
+ # Finally, delete the sessions.
+ self.db_pool.simple_delete_many_txn(
+ txn,
+ table="ui_auth_sessions",
+ column="session_id",
+ iterable=session_ids,
+ keyvalues={},
+ )
diff --git a/synapse/storage/databases/main/user_directory.py b/synapse/storage/databases/main/user_directory.py
new file mode 100644
index 00000000..af21fe45
--- /dev/null
+++ b/synapse/storage/databases/main/user_directory.py
@@ -0,0 +1,809 @@
+# -*- coding: utf-8 -*-
+# Copyright 2017 Vector Creations Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import logging
+import re
+
+from synapse.api.constants import EventTypes, JoinRules
+from synapse.storage.database import DatabasePool
+from synapse.storage.databases.main.state import StateFilter
+from synapse.storage.databases.main.state_deltas import StateDeltasStore
+from synapse.storage.engines import PostgresEngine, Sqlite3Engine
+from synapse.types import get_domain_from_id, get_localpart_from_id
+from synapse.util.caches.descriptors import cached
+
+logger = logging.getLogger(__name__)
+
+
+TEMP_TABLE = "_temp_populate_user_directory"
+
+
+class UserDirectoryBackgroundUpdateStore(StateDeltasStore):
+
+ # How many records do we calculate before sending it to
+ # add_users_who_share_private_rooms?
+ SHARE_PRIVATE_WORKING_SET = 500
+
+ def __init__(self, database: DatabasePool, db_conn, hs):
+ super(UserDirectoryBackgroundUpdateStore, self).__init__(database, db_conn, hs)
+
+ self.server_name = hs.hostname
+
+ self.db_pool.updates.register_background_update_handler(
+ "populate_user_directory_createtables",
+ self._populate_user_directory_createtables,
+ )
+ self.db_pool.updates.register_background_update_handler(
+ "populate_user_directory_process_rooms",
+ self._populate_user_directory_process_rooms,
+ )
+ self.db_pool.updates.register_background_update_handler(
+ "populate_user_directory_process_users",
+ self._populate_user_directory_process_users,
+ )
+ self.db_pool.updates.register_background_update_handler(
+ "populate_user_directory_cleanup", self._populate_user_directory_cleanup
+ )
+
+ async def _populate_user_directory_createtables(self, progress, batch_size):
+
+ # Get all the rooms that we want to process.
+ def _make_staging_area(txn):
+ sql = (
+ "CREATE TABLE IF NOT EXISTS "
+ + TEMP_TABLE
+ + "_rooms(room_id TEXT NOT NULL, events BIGINT NOT NULL)"
+ )
+ txn.execute(sql)
+
+ sql = (
+ "CREATE TABLE IF NOT EXISTS "
+ + TEMP_TABLE
+ + "_position(position TEXT NOT NULL)"
+ )
+ txn.execute(sql)
+
+ # Get rooms we want to process from the database
+ sql = """
+ SELECT room_id, count(*) FROM current_state_events
+ GROUP BY room_id
+ """
+ txn.execute(sql)
+ rooms = [{"room_id": x[0], "events": x[1]} for x in txn.fetchall()]
+ self.db_pool.simple_insert_many_txn(txn, TEMP_TABLE + "_rooms", rooms)
+ del rooms
+
+ # If search all users is on, get all the users we want to add.
+ if self.hs.config.user_directory_search_all_users:
+ sql = (
+ "CREATE TABLE IF NOT EXISTS "
+ + TEMP_TABLE
+ + "_users(user_id TEXT NOT NULL)"
+ )
+ txn.execute(sql)
+
+ txn.execute("SELECT name FROM users")
+ users = [{"user_id": x[0]} for x in txn.fetchall()]
+
+ self.db_pool.simple_insert_many_txn(txn, TEMP_TABLE + "_users", users)
+
+ new_pos = await self.get_max_stream_id_in_current_state_deltas()
+ await self.db_pool.runInteraction(
+ "populate_user_directory_temp_build", _make_staging_area
+ )
+ await self.db_pool.simple_insert(
+ TEMP_TABLE + "_position", {"position": new_pos}
+ )
+
+ await self.db_pool.updates._end_background_update(
+ "populate_user_directory_createtables"
+ )
+ return 1
+
+ async def _populate_user_directory_cleanup(self, progress, batch_size):
+ """
+ Update the user directory stream position, then clean up the old tables.
+ """
+ position = await self.db_pool.simple_select_one_onecol(
+ TEMP_TABLE + "_position", None, "position"
+ )
+ await self.update_user_directory_stream_pos(position)
+
+ def _delete_staging_area(txn):
+ txn.execute("DROP TABLE IF EXISTS " + TEMP_TABLE + "_rooms")
+ txn.execute("DROP TABLE IF EXISTS " + TEMP_TABLE + "_users")
+ txn.execute("DROP TABLE IF EXISTS " + TEMP_TABLE + "_position")
+
+ await self.db_pool.runInteraction(
+ "populate_user_directory_cleanup", _delete_staging_area
+ )
+
+ await self.db_pool.updates._end_background_update(
+ "populate_user_directory_cleanup"
+ )
+ return 1
+
+ async def _populate_user_directory_process_rooms(self, progress, batch_size):
+ """
+ Args:
+ progress (dict)
+ batch_size (int): Maximum number of state events to process
+ per cycle.
+ """
+ state = self.hs.get_state_handler()
+
+ # If we don't have progress filed, delete everything.
+ if not progress:
+ await self.delete_all_from_user_dir()
+
+ def _get_next_batch(txn):
+ # Only fetch 250 rooms, so we don't fetch too many at once, even
+ # if those 250 rooms have less than batch_size state events.
+ sql = """
+ SELECT room_id, events FROM %s
+ ORDER BY events DESC
+ LIMIT 250
+ """ % (
+ TEMP_TABLE + "_rooms",
+ )
+ txn.execute(sql)
+ rooms_to_work_on = txn.fetchall()
+
+ if not rooms_to_work_on:
+ return None
+
+ # Get how many are left to process, so we can give status on how
+ # far we are in processing
+ txn.execute("SELECT COUNT(*) FROM " + TEMP_TABLE + "_rooms")
+ progress["remaining"] = txn.fetchone()[0]
+
+ return rooms_to_work_on
+
+ rooms_to_work_on = await self.db_pool.runInteraction(
+ "populate_user_directory_temp_read", _get_next_batch
+ )
+
+ # No more rooms -- complete the transaction.
+ if not rooms_to_work_on:
+ await self.db_pool.updates._end_background_update(
+ "populate_user_directory_process_rooms"
+ )
+ return 1
+
+ logger.debug(
+ "Processing the next %d rooms of %d remaining"
+ % (len(rooms_to_work_on), progress["remaining"])
+ )
+
+ processed_event_count = 0
+
+ for room_id, event_count in rooms_to_work_on:
+ is_in_room = await self.is_host_joined(room_id, self.server_name)
+
+ if is_in_room:
+ is_public = await self.is_room_world_readable_or_publicly_joinable(
+ room_id
+ )
+
+ users_with_profile = await state.get_current_users_in_room(room_id)
+ user_ids = set(users_with_profile)
+
+ # Update each user in the user directory.
+ for user_id, profile in users_with_profile.items():
+ await self.update_profile_in_user_dir(
+ user_id, profile.display_name, profile.avatar_url
+ )
+
+ to_insert = set()
+
+ if is_public:
+ for user_id in user_ids:
+ if self.get_if_app_services_interested_in_user(user_id):
+ continue
+
+ to_insert.add(user_id)
+
+ if to_insert:
+ await self.add_users_in_public_rooms(room_id, to_insert)
+ to_insert.clear()
+ else:
+ for user_id in user_ids:
+ if not self.hs.is_mine_id(user_id):
+ continue
+
+ if self.get_if_app_services_interested_in_user(user_id):
+ continue
+
+ for other_user_id in user_ids:
+ if user_id == other_user_id:
+ continue
+
+ user_set = (user_id, other_user_id)
+ to_insert.add(user_set)
+
+ # If it gets too big, stop and write to the database
+ # to prevent storing too much in RAM.
+ if len(to_insert) >= self.SHARE_PRIVATE_WORKING_SET:
+ await self.add_users_who_share_private_room(
+ room_id, to_insert
+ )
+ to_insert.clear()
+
+ if to_insert:
+ await self.add_users_who_share_private_room(room_id, to_insert)
+ to_insert.clear()
+
+ # We've finished a room. Delete it from the table.
+ await self.db_pool.simple_delete_one(
+ TEMP_TABLE + "_rooms", {"room_id": room_id}
+ )
+ # Update the remaining counter.
+ progress["remaining"] -= 1
+ await self.db_pool.runInteraction(
+ "populate_user_directory",
+ self.db_pool.updates._background_update_progress_txn,
+ "populate_user_directory_process_rooms",
+ progress,
+ )
+
+ processed_event_count += event_count
+
+ if processed_event_count > batch_size:
+ # Don't process any more rooms, we've hit our batch size.
+ return processed_event_count
+
+ return processed_event_count
+
+ async def _populate_user_directory_process_users(self, progress, batch_size):
+ """
+ If search_all_users is enabled, add all of the users to the user directory.
+ """
+ if not self.hs.config.user_directory_search_all_users:
+ await self.db_pool.updates._end_background_update(
+ "populate_user_directory_process_users"
+ )
+ return 1
+
+ def _get_next_batch(txn):
+ sql = "SELECT user_id FROM %s LIMIT %s" % (
+ TEMP_TABLE + "_users",
+ str(batch_size),
+ )
+ txn.execute(sql)
+ users_to_work_on = txn.fetchall()
+
+ if not users_to_work_on:
+ return None
+
+ users_to_work_on = [x[0] for x in users_to_work_on]
+
+ # Get how many are left to process, so we can give status on how
+ # far we are in processing
+ sql = "SELECT COUNT(*) FROM " + TEMP_TABLE + "_users"
+ txn.execute(sql)
+ progress["remaining"] = txn.fetchone()[0]
+
+ return users_to_work_on
+
+ users_to_work_on = await self.db_pool.runInteraction(
+ "populate_user_directory_temp_read", _get_next_batch
+ )
+
+ # No more users -- complete the transaction.
+ if not users_to_work_on:
+ await self.db_pool.updates._end_background_update(
+ "populate_user_directory_process_users"
+ )
+ return 1
+
+ logger.debug(
+ "Processing the next %d users of %d remaining"
+ % (len(users_to_work_on), progress["remaining"])
+ )
+
+ for user_id in users_to_work_on:
+ profile = await self.get_profileinfo(get_localpart_from_id(user_id))
+ await self.update_profile_in_user_dir(
+ user_id, profile.display_name, profile.avatar_url
+ )
+
+ # We've finished processing a user. Delete it from the table.
+ await self.db_pool.simple_delete_one(
+ TEMP_TABLE + "_users", {"user_id": user_id}
+ )
+ # Update the remaining counter.
+ progress["remaining"] -= 1
+ await self.db_pool.runInteraction(
+ "populate_user_directory",
+ self.db_pool.updates._background_update_progress_txn,
+ "populate_user_directory_process_users",
+ progress,
+ )
+
+ return len(users_to_work_on)
+
+ async def is_room_world_readable_or_publicly_joinable(self, room_id):
+ """Check if the room is either world_readable or publically joinable
+ """
+
+ # Create a state filter that only queries join and history state event
+ types_to_filter = (
+ (EventTypes.JoinRules, ""),
+ (EventTypes.RoomHistoryVisibility, ""),
+ )
+
+ current_state_ids = await self.get_filtered_current_state_ids(
+ room_id, StateFilter.from_types(types_to_filter)
+ )
+
+ join_rules_id = current_state_ids.get((EventTypes.JoinRules, ""))
+ if join_rules_id:
+ join_rule_ev = await self.get_event(join_rules_id, allow_none=True)
+ if join_rule_ev:
+ if join_rule_ev.content.get("join_rule") == JoinRules.PUBLIC:
+ return True
+
+ hist_vis_id = current_state_ids.get((EventTypes.RoomHistoryVisibility, ""))
+ if hist_vis_id:
+ hist_vis_ev = await self.get_event(hist_vis_id, allow_none=True)
+ if hist_vis_ev:
+ if hist_vis_ev.content.get("history_visibility") == "world_readable":
+ return True
+
+ return False
+
+ def update_profile_in_user_dir(self, user_id, display_name, avatar_url):
+ """
+ Update or add a user's profile in the user directory.
+ """
+
+ def _update_profile_in_user_dir_txn(txn):
+ new_entry = self.db_pool.simple_upsert_txn(
+ txn,
+ table="user_directory",
+ keyvalues={"user_id": user_id},
+ values={"display_name": display_name, "avatar_url": avatar_url},
+ lock=False, # We're only inserter
+ )
+
+ if isinstance(self.database_engine, PostgresEngine):
+ # We weight the localpart most highly, then display name and finally
+ # server name
+ if self.database_engine.can_native_upsert:
+ sql = """
+ INSERT INTO user_directory_search(user_id, vector)
+ VALUES (?,
+ setweight(to_tsvector('english', ?), 'A')
+ || setweight(to_tsvector('english', ?), 'D')
+ || setweight(to_tsvector('english', COALESCE(?, '')), 'B')
+ ) ON CONFLICT (user_id) DO UPDATE SET vector=EXCLUDED.vector
+ """
+ txn.execute(
+ sql,
+ (
+ user_id,
+ get_localpart_from_id(user_id),
+ get_domain_from_id(user_id),
+ display_name,
+ ),
+ )
+ else:
+ # TODO: Remove this code after we've bumped the minimum version
+ # of postgres to always support upserts, so we can get rid of
+ # `new_entry` usage
+ if new_entry is True:
+ sql = """
+ INSERT INTO user_directory_search(user_id, vector)
+ VALUES (?,
+ setweight(to_tsvector('english', ?), 'A')
+ || setweight(to_tsvector('english', ?), 'D')
+ || setweight(to_tsvector('english', COALESCE(?, '')), 'B')
+ )
+ """
+ txn.execute(
+ sql,
+ (
+ user_id,
+ get_localpart_from_id(user_id),
+ get_domain_from_id(user_id),
+ display_name,
+ ),
+ )
+ elif new_entry is False:
+ sql = """
+ UPDATE user_directory_search
+ SET vector = setweight(to_tsvector('english', ?), 'A')
+ || setweight(to_tsvector('english', ?), 'D')
+ || setweight(to_tsvector('english', COALESCE(?, '')), 'B')
+ WHERE user_id = ?
+ """
+ txn.execute(
+ sql,
+ (
+ get_localpart_from_id(user_id),
+ get_domain_from_id(user_id),
+ display_name,
+ user_id,
+ ),
+ )
+ else:
+ raise RuntimeError(
+ "upsert returned None when 'can_native_upsert' is False"
+ )
+ elif isinstance(self.database_engine, Sqlite3Engine):
+ value = "%s %s" % (user_id, display_name) if display_name else user_id
+ self.db_pool.simple_upsert_txn(
+ txn,
+ table="user_directory_search",
+ keyvalues={"user_id": user_id},
+ values={"value": value},
+ lock=False, # We're only inserter
+ )
+ else:
+ # This should be unreachable.
+ raise Exception("Unrecognized database engine")
+
+ txn.call_after(self.get_user_in_directory.invalidate, (user_id,))
+
+ return self.db_pool.runInteraction(
+ "update_profile_in_user_dir", _update_profile_in_user_dir_txn
+ )
+
+ def add_users_who_share_private_room(self, room_id, user_id_tuples):
+ """Insert entries into the users_who_share_private_rooms table. The first
+ user should be a local user.
+
+ Args:
+ room_id (str)
+ user_id_tuples([(str, str)]): iterable of 2-tuple of user IDs.
+ """
+
+ def _add_users_who_share_room_txn(txn):
+ self.db_pool.simple_upsert_many_txn(
+ txn,
+ table="users_who_share_private_rooms",
+ key_names=["user_id", "other_user_id", "room_id"],
+ key_values=[
+ (user_id, other_user_id, room_id)
+ for user_id, other_user_id in user_id_tuples
+ ],
+ value_names=(),
+ value_values=None,
+ )
+
+ return self.db_pool.runInteraction(
+ "add_users_who_share_room", _add_users_who_share_room_txn
+ )
+
+ def add_users_in_public_rooms(self, room_id, user_ids):
+ """Insert entries into the users_who_share_private_rooms table. The first
+ user should be a local user.
+
+ Args:
+ room_id (str)
+ user_ids (list[str])
+ """
+
+ def _add_users_in_public_rooms_txn(txn):
+
+ self.db_pool.simple_upsert_many_txn(
+ txn,
+ table="users_in_public_rooms",
+ key_names=["user_id", "room_id"],
+ key_values=[(user_id, room_id) for user_id in user_ids],
+ value_names=(),
+ value_values=None,
+ )
+
+ return self.db_pool.runInteraction(
+ "add_users_in_public_rooms", _add_users_in_public_rooms_txn
+ )
+
+ def delete_all_from_user_dir(self):
+ """Delete the entire user directory
+ """
+
+ def _delete_all_from_user_dir_txn(txn):
+ txn.execute("DELETE FROM user_directory")
+ txn.execute("DELETE FROM user_directory_search")
+ txn.execute("DELETE FROM users_in_public_rooms")
+ txn.execute("DELETE FROM users_who_share_private_rooms")
+ txn.call_after(self.get_user_in_directory.invalidate_all)
+
+ return self.db_pool.runInteraction(
+ "delete_all_from_user_dir", _delete_all_from_user_dir_txn
+ )
+
+ @cached()
+ def get_user_in_directory(self, user_id):
+ return self.db_pool.simple_select_one(
+ table="user_directory",
+ keyvalues={"user_id": user_id},
+ retcols=("display_name", "avatar_url"),
+ allow_none=True,
+ desc="get_user_in_directory",
+ )
+
+ def update_user_directory_stream_pos(self, stream_id):
+ return self.db_pool.simple_update_one(
+ table="user_directory_stream_pos",
+ keyvalues={},
+ updatevalues={"stream_id": stream_id},
+ desc="update_user_directory_stream_pos",
+ )
+
+
+class UserDirectoryStore(UserDirectoryBackgroundUpdateStore):
+
+ # How many records do we calculate before sending it to
+ # add_users_who_share_private_rooms?
+ SHARE_PRIVATE_WORKING_SET = 500
+
+ def __init__(self, database: DatabasePool, db_conn, hs):
+ super(UserDirectoryStore, self).__init__(database, db_conn, hs)
+
+ def remove_from_user_dir(self, user_id):
+ def _remove_from_user_dir_txn(txn):
+ self.db_pool.simple_delete_txn(
+ txn, table="user_directory", keyvalues={"user_id": user_id}
+ )
+ self.db_pool.simple_delete_txn(
+ txn, table="user_directory_search", keyvalues={"user_id": user_id}
+ )
+ self.db_pool.simple_delete_txn(
+ txn, table="users_in_public_rooms", keyvalues={"user_id": user_id}
+ )
+ self.db_pool.simple_delete_txn(
+ txn,
+ table="users_who_share_private_rooms",
+ keyvalues={"user_id": user_id},
+ )
+ self.db_pool.simple_delete_txn(
+ txn,
+ table="users_who_share_private_rooms",
+ keyvalues={"other_user_id": user_id},
+ )
+ txn.call_after(self.get_user_in_directory.invalidate, (user_id,))
+
+ return self.db_pool.runInteraction(
+ "remove_from_user_dir", _remove_from_user_dir_txn
+ )
+
+ async def get_users_in_dir_due_to_room(self, room_id):
+ """Get all user_ids that are in the room directory because they're
+ in the given room_id
+ """
+ user_ids_share_pub = await self.db_pool.simple_select_onecol(
+ table="users_in_public_rooms",
+ keyvalues={"room_id": room_id},
+ retcol="user_id",
+ desc="get_users_in_dir_due_to_room",
+ )
+
+ user_ids_share_priv = await self.db_pool.simple_select_onecol(
+ table="users_who_share_private_rooms",
+ keyvalues={"room_id": room_id},
+ retcol="other_user_id",
+ desc="get_users_in_dir_due_to_room",
+ )
+
+ user_ids = set(user_ids_share_pub)
+ user_ids.update(user_ids_share_priv)
+
+ return user_ids
+
+ def remove_user_who_share_room(self, user_id, room_id):
+ """
+ Deletes entries in the users_who_share_*_rooms table. The first
+ user should be a local user.
+
+ Args:
+ user_id (str)
+ room_id (str)
+ """
+
+ def _remove_user_who_share_room_txn(txn):
+ self.db_pool.simple_delete_txn(
+ txn,
+ table="users_who_share_private_rooms",
+ keyvalues={"user_id": user_id, "room_id": room_id},
+ )
+ self.db_pool.simple_delete_txn(
+ txn,
+ table="users_who_share_private_rooms",
+ keyvalues={"other_user_id": user_id, "room_id": room_id},
+ )
+ self.db_pool.simple_delete_txn(
+ txn,
+ table="users_in_public_rooms",
+ keyvalues={"user_id": user_id, "room_id": room_id},
+ )
+
+ return self.db_pool.runInteraction(
+ "remove_user_who_share_room", _remove_user_who_share_room_txn
+ )
+
+ async def get_user_dir_rooms_user_is_in(self, user_id):
+ """
+ Returns the rooms that a user is in.
+
+ Args:
+ user_id(str): Must be a local user
+
+ Returns:
+ list: user_id
+ """
+ rows = await self.db_pool.simple_select_onecol(
+ table="users_who_share_private_rooms",
+ keyvalues={"user_id": user_id},
+ retcol="room_id",
+ desc="get_rooms_user_is_in",
+ )
+
+ pub_rows = await self.db_pool.simple_select_onecol(
+ table="users_in_public_rooms",
+ keyvalues={"user_id": user_id},
+ retcol="room_id",
+ desc="get_rooms_user_is_in",
+ )
+
+ users = set(pub_rows)
+ users.update(rows)
+ return list(users)
+
+ def get_user_directory_stream_pos(self):
+ return self.db_pool.simple_select_one_onecol(
+ table="user_directory_stream_pos",
+ keyvalues={},
+ retcol="stream_id",
+ desc="get_user_directory_stream_pos",
+ )
+
+ async def search_user_dir(self, user_id, search_term, limit):
+ """Searches for users in directory
+
+ Returns:
+ dict of the form::
+
+ {
+ "limited": <bool>, # whether there were more results or not
+ "results": [ # Ordered by best match first
+ {
+ "user_id": <user_id>,
+ "display_name": <display_name>,
+ "avatar_url": <avatar_url>
+ }
+ ]
+ }
+ """
+
+ if self.hs.config.user_directory_search_all_users:
+ join_args = (user_id,)
+ where_clause = "user_id != ?"
+ else:
+ join_args = (user_id,)
+ where_clause = """
+ (
+ EXISTS (select 1 from users_in_public_rooms WHERE user_id = t.user_id)
+ OR EXISTS (
+ SELECT 1 FROM users_who_share_private_rooms
+ WHERE user_id = ? AND other_user_id = t.user_id
+ )
+ )
+ """
+
+ if isinstance(self.database_engine, PostgresEngine):
+ full_query, exact_query, prefix_query = _parse_query_postgres(search_term)
+
+ # We order by rank and then if they have profile info
+ # The ranking algorithm is hand tweaked for "best" results. Broadly
+ # the idea is we give a higher weight to exact matches.
+ # The array of numbers are the weights for the various part of the
+ # search: (domain, _, display name, localpart)
+ sql = """
+ SELECT d.user_id AS user_id, display_name, avatar_url
+ FROM user_directory_search as t
+ INNER JOIN user_directory AS d USING (user_id)
+ WHERE
+ %s
+ AND vector @@ to_tsquery('english', ?)
+ ORDER BY
+ (CASE WHEN d.user_id IS NOT NULL THEN 4.0 ELSE 1.0 END)
+ * (CASE WHEN display_name IS NOT NULL THEN 1.2 ELSE 1.0 END)
+ * (CASE WHEN avatar_url IS NOT NULL THEN 1.2 ELSE 1.0 END)
+ * (
+ 3 * ts_rank_cd(
+ '{0.1, 0.1, 0.9, 1.0}',
+ vector,
+ to_tsquery('english', ?),
+ 8
+ )
+ + ts_rank_cd(
+ '{0.1, 0.1, 0.9, 1.0}',
+ vector,
+ to_tsquery('english', ?),
+ 8
+ )
+ )
+ DESC,
+ display_name IS NULL,
+ avatar_url IS NULL
+ LIMIT ?
+ """ % (
+ where_clause,
+ )
+ args = join_args + (full_query, exact_query, prefix_query, limit + 1)
+ elif isinstance(self.database_engine, Sqlite3Engine):
+ search_query = _parse_query_sqlite(search_term)
+
+ sql = """
+ SELECT d.user_id AS user_id, display_name, avatar_url
+ FROM user_directory_search as t
+ INNER JOIN user_directory AS d USING (user_id)
+ WHERE
+ %s
+ AND value MATCH ?
+ ORDER BY
+ rank(matchinfo(user_directory_search)) DESC,
+ display_name IS NULL,
+ avatar_url IS NULL
+ LIMIT ?
+ """ % (
+ where_clause,
+ )
+ args = join_args + (search_query, limit + 1)
+ else:
+ # This should be unreachable.
+ raise Exception("Unrecognized database engine")
+
+ results = await self.db_pool.execute(
+ "search_user_dir", self.db_pool.cursor_to_dict, sql, *args
+ )
+
+ limited = len(results) > limit
+
+ return {"limited": limited, "results": results}
+
+
+def _parse_query_sqlite(search_term):
+ """Takes a plain unicode string from the user and converts it into a form
+ that can be passed to database.
+ We use this so that we can add prefix matching, which isn't something
+ that is supported by default.
+
+ We specifically add both a prefix and non prefix matching term so that
+ exact matches get ranked higher.
+ """
+
+ # Pull out the individual words, discarding any non-word characters.
+ results = re.findall(r"([\w\-]+)", search_term, re.UNICODE)
+ return " & ".join("(%s* OR %s)" % (result, result) for result in results)
+
+
+def _parse_query_postgres(search_term):
+ """Takes a plain unicode string from the user and converts it into a form
+ that can be passed to database.
+ We use this so that we can add prefix matching, which isn't something
+ that is supported by default.
+ """
+
+ # Pull out the individual words, discarding any non-word characters.
+ results = re.findall(r"([\w\-]+)", search_term, re.UNICODE)
+
+ both = " & ".join("(%s:* | %s)" % (result, result) for result in results)
+ exact = " & ".join("%s" % (result,) for result in results)
+ prefix = " & ".join("%s:*" % (result,) for result in results)
+
+ return both, exact, prefix
diff --git a/synapse/storage/databases/main/user_erasure_store.py b/synapse/storage/databases/main/user_erasure_store.py
new file mode 100644
index 00000000..ab6cb2c1
--- /dev/null
+++ b/synapse/storage/databases/main/user_erasure_store.py
@@ -0,0 +1,113 @@
+# -*- coding: utf-8 -*-
+# Copyright 2018 New Vector Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import operator
+
+from synapse.storage._base import SQLBaseStore
+from synapse.util.caches.descriptors import cached, cachedList
+
+
+class UserErasureWorkerStore(SQLBaseStore):
+ @cached()
+ def is_user_erased(self, user_id):
+ """
+ Check if the given user id has requested erasure
+
+ Args:
+ user_id (str): full user id to check
+
+ Returns:
+ Deferred[bool]: True if the user has requested erasure
+ """
+ return self.db_pool.simple_select_onecol(
+ table="erased_users",
+ keyvalues={"user_id": user_id},
+ retcol="1",
+ desc="is_user_erased",
+ ).addCallback(operator.truth)
+
+ @cachedList(
+ cached_method_name="is_user_erased", list_name="user_ids", inlineCallbacks=True
+ )
+ def are_users_erased(self, user_ids):
+ """
+ Checks which users in a list have requested erasure
+
+ Args:
+ user_ids (iterable[str]): full user id to check
+
+ Returns:
+ Deferred[dict[str, bool]]:
+ for each user, whether the user has requested erasure.
+ """
+ # this serves the dual purpose of (a) making sure we can do len and
+ # iterate it multiple times, and (b) avoiding duplicates.
+ user_ids = tuple(set(user_ids))
+
+ rows = yield self.db_pool.simple_select_many_batch(
+ table="erased_users",
+ column="user_id",
+ iterable=user_ids,
+ retcols=("user_id",),
+ desc="are_users_erased",
+ )
+ erased_users = {row["user_id"] for row in rows}
+
+ res = {u: u in erased_users for u in user_ids}
+ return res
+
+
+class UserErasureStore(UserErasureWorkerStore):
+ def mark_user_erased(self, user_id: str) -> None:
+ """Indicate that user_id wishes their message history to be erased.
+
+ Args:
+ user_id: full user_id to be erased
+ """
+
+ def f(txn):
+ # first check if they are already in the list
+ txn.execute("SELECT 1 FROM erased_users WHERE user_id = ?", (user_id,))
+ if txn.fetchone():
+ return
+
+ # they are not already there: do the insert.
+ txn.execute("INSERT INTO erased_users (user_id) VALUES (?)", (user_id,))
+
+ self._invalidate_cache_and_stream(txn, self.is_user_erased, (user_id,))
+
+ return self.db_pool.runInteraction("mark_user_erased", f)
+
+ def mark_user_not_erased(self, user_id: str) -> None:
+ """Indicate that user_id is no longer erased.
+
+ Args:
+ user_id: full user_id to be un-erased
+ """
+
+ def f(txn):
+ # first check if they are already in the list
+ txn.execute("SELECT 1 FROM erased_users WHERE user_id = ?", (user_id,))
+ if not txn.fetchone():
+ return
+
+ # They are there, delete them.
+ self.simple_delete_one_txn(
+ txn, "erased_users", keyvalues={"user_id": user_id}
+ )
+
+ self._invalidate_cache_and_stream(txn, self.is_user_erased, (user_id,))
+
+ return self.db_pool.runInteraction("mark_user_not_erased", f)
diff --git a/synapse/storage/databases/state/__init__.py b/synapse/storage/databases/state/__init__.py
new file mode 100644
index 00000000..c90d0228
--- /dev/null
+++ b/synapse/storage/databases/state/__init__.py
@@ -0,0 +1,16 @@
+# -*- coding: utf-8 -*-
+# Copyright 2019 The Matrix.org Foundation C.I.C.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from synapse.storage.databases.state.store import StateGroupDataStore # noqa: F401
diff --git a/synapse/storage/databases/state/bg_updates.py b/synapse/storage/databases/state/bg_updates.py
new file mode 100644
index 00000000..139085b6
--- /dev/null
+++ b/synapse/storage/databases/state/bg_updates.py
@@ -0,0 +1,370 @@
+# -*- coding: utf-8 -*-
+# Copyright 2014-2016 OpenMarket Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import logging
+
+from synapse.storage._base import SQLBaseStore
+from synapse.storage.database import DatabasePool
+from synapse.storage.engines import PostgresEngine
+from synapse.storage.state import StateFilter
+
+logger = logging.getLogger(__name__)
+
+
+MAX_STATE_DELTA_HOPS = 100
+
+
+class StateGroupBackgroundUpdateStore(SQLBaseStore):
+ """Defines functions related to state groups needed to run the state backgroud
+ updates.
+ """
+
+ def _count_state_group_hops_txn(self, txn, state_group):
+ """Given a state group, count how many hops there are in the tree.
+
+ This is used to ensure the delta chains don't get too long.
+ """
+ if isinstance(self.database_engine, PostgresEngine):
+ sql = """
+ WITH RECURSIVE state(state_group) AS (
+ VALUES(?::bigint)
+ UNION ALL
+ SELECT prev_state_group FROM state_group_edges e, state s
+ WHERE s.state_group = e.state_group
+ )
+ SELECT count(*) FROM state;
+ """
+
+ txn.execute(sql, (state_group,))
+ row = txn.fetchone()
+ if row and row[0]:
+ return row[0]
+ else:
+ return 0
+ else:
+ # We don't use WITH RECURSIVE on sqlite3 as there are distributions
+ # that ship with an sqlite3 version that doesn't support it (e.g. wheezy)
+ next_group = state_group
+ count = 0
+
+ while next_group:
+ next_group = self.db_pool.simple_select_one_onecol_txn(
+ txn,
+ table="state_group_edges",
+ keyvalues={"state_group": next_group},
+ retcol="prev_state_group",
+ allow_none=True,
+ )
+ if next_group:
+ count += 1
+
+ return count
+
+ def _get_state_groups_from_groups_txn(
+ self, txn, groups, state_filter=StateFilter.all()
+ ):
+ results = {group: {} for group in groups}
+
+ where_clause, where_args = state_filter.make_sql_filter_clause()
+
+ # Unless the filter clause is empty, we're going to append it after an
+ # existing where clause
+ if where_clause:
+ where_clause = " AND (%s)" % (where_clause,)
+
+ if isinstance(self.database_engine, PostgresEngine):
+ # Temporarily disable sequential scans in this transaction. This is
+ # a temporary hack until we can add the right indices in
+ txn.execute("SET LOCAL enable_seqscan=off")
+
+ # The below query walks the state_group tree so that the "state"
+ # table includes all state_groups in the tree. It then joins
+ # against `state_groups_state` to fetch the latest state.
+ # It assumes that previous state groups are always numerically
+ # lesser.
+ # The PARTITION is used to get the event_id in the greatest state
+ # group for the given type, state_key.
+ # This may return multiple rows per (type, state_key), but last_value
+ # should be the same.
+ sql = """
+ WITH RECURSIVE state(state_group) AS (
+ VALUES(?::bigint)
+ UNION ALL
+ SELECT prev_state_group FROM state_group_edges e, state s
+ WHERE s.state_group = e.state_group
+ )
+ SELECT DISTINCT ON (type, state_key)
+ type, state_key, event_id
+ FROM state_groups_state
+ WHERE state_group IN (
+ SELECT state_group FROM state
+ ) %s
+ ORDER BY type, state_key, state_group DESC
+ """
+
+ for group in groups:
+ args = [group]
+ args.extend(where_args)
+
+ txn.execute(sql % (where_clause,), args)
+ for row in txn:
+ typ, state_key, event_id = row
+ key = (typ, state_key)
+ results[group][key] = event_id
+ else:
+ max_entries_returned = state_filter.max_entries_returned()
+
+ # We don't use WITH RECURSIVE on sqlite3 as there are distributions
+ # that ship with an sqlite3 version that doesn't support it (e.g. wheezy)
+ for group in groups:
+ next_group = group
+
+ while next_group:
+ # We did this before by getting the list of group ids, and
+ # then passing that list to sqlite to get latest event for
+ # each (type, state_key). However, that was terribly slow
+ # without the right indices (which we can't add until
+ # after we finish deduping state, which requires this func)
+ args = [next_group]
+ args.extend(where_args)
+
+ txn.execute(
+ "SELECT type, state_key, event_id FROM state_groups_state"
+ " WHERE state_group = ? " + where_clause,
+ args,
+ )
+ results[group].update(
+ ((typ, state_key), event_id)
+ for typ, state_key, event_id in txn
+ if (typ, state_key) not in results[group]
+ )
+
+ # If the number of entries in the (type,state_key)->event_id dict
+ # matches the number of (type,state_keys) types we were searching
+ # for, then we must have found them all, so no need to go walk
+ # further down the tree... UNLESS our types filter contained
+ # wildcards (i.e. Nones) in which case we have to do an exhaustive
+ # search
+ if (
+ max_entries_returned is not None
+ and len(results[group]) == max_entries_returned
+ ):
+ break
+
+ next_group = self.db_pool.simple_select_one_onecol_txn(
+ txn,
+ table="state_group_edges",
+ keyvalues={"state_group": next_group},
+ retcol="prev_state_group",
+ allow_none=True,
+ )
+
+ return results
+
+
+class StateBackgroundUpdateStore(StateGroupBackgroundUpdateStore):
+
+ STATE_GROUP_DEDUPLICATION_UPDATE_NAME = "state_group_state_deduplication"
+ STATE_GROUP_INDEX_UPDATE_NAME = "state_group_state_type_index"
+ STATE_GROUPS_ROOM_INDEX_UPDATE_NAME = "state_groups_room_id_idx"
+
+ def __init__(self, database: DatabasePool, db_conn, hs):
+ super(StateBackgroundUpdateStore, self).__init__(database, db_conn, hs)
+ self.db_pool.updates.register_background_update_handler(
+ self.STATE_GROUP_DEDUPLICATION_UPDATE_NAME,
+ self._background_deduplicate_state,
+ )
+ self.db_pool.updates.register_background_update_handler(
+ self.STATE_GROUP_INDEX_UPDATE_NAME, self._background_index_state
+ )
+ self.db_pool.updates.register_background_index_update(
+ self.STATE_GROUPS_ROOM_INDEX_UPDATE_NAME,
+ index_name="state_groups_room_id_idx",
+ table="state_groups",
+ columns=["room_id"],
+ )
+
+ async def _background_deduplicate_state(self, progress, batch_size):
+ """This background update will slowly deduplicate state by reencoding
+ them as deltas.
+ """
+ last_state_group = progress.get("last_state_group", 0)
+ rows_inserted = progress.get("rows_inserted", 0)
+ max_group = progress.get("max_group", None)
+
+ BATCH_SIZE_SCALE_FACTOR = 100
+
+ batch_size = max(1, int(batch_size / BATCH_SIZE_SCALE_FACTOR))
+
+ if max_group is None:
+ rows = await self.db_pool.execute(
+ "_background_deduplicate_state",
+ None,
+ "SELECT coalesce(max(id), 0) FROM state_groups",
+ )
+ max_group = rows[0][0]
+
+ def reindex_txn(txn):
+ new_last_state_group = last_state_group
+ for count in range(batch_size):
+ txn.execute(
+ "SELECT id, room_id FROM state_groups"
+ " WHERE ? < id AND id <= ?"
+ " ORDER BY id ASC"
+ " LIMIT 1",
+ (new_last_state_group, max_group),
+ )
+ row = txn.fetchone()
+ if row:
+ state_group, room_id = row
+
+ if not row or not state_group:
+ return True, count
+
+ txn.execute(
+ "SELECT state_group FROM state_group_edges"
+ " WHERE state_group = ?",
+ (state_group,),
+ )
+
+ # If we reach a point where we've already started inserting
+ # edges we should stop.
+ if txn.fetchall():
+ return True, count
+
+ txn.execute(
+ "SELECT coalesce(max(id), 0) FROM state_groups"
+ " WHERE id < ? AND room_id = ?",
+ (state_group, room_id),
+ )
+ (prev_group,) = txn.fetchone()
+ new_last_state_group = state_group
+
+ if prev_group:
+ potential_hops = self._count_state_group_hops_txn(txn, prev_group)
+ if potential_hops >= MAX_STATE_DELTA_HOPS:
+ # We want to ensure chains are at most this long,#
+ # otherwise read performance degrades.
+ continue
+
+ prev_state = self._get_state_groups_from_groups_txn(
+ txn, [prev_group]
+ )
+ prev_state = prev_state[prev_group]
+
+ curr_state = self._get_state_groups_from_groups_txn(
+ txn, [state_group]
+ )
+ curr_state = curr_state[state_group]
+
+ if not set(prev_state.keys()) - set(curr_state.keys()):
+ # We can only do a delta if the current has a strict super set
+ # of keys
+
+ delta_state = {
+ key: value
+ for key, value in curr_state.items()
+ if prev_state.get(key, None) != value
+ }
+
+ self.db_pool.simple_delete_txn(
+ txn,
+ table="state_group_edges",
+ keyvalues={"state_group": state_group},
+ )
+
+ self.db_pool.simple_insert_txn(
+ txn,
+ table="state_group_edges",
+ values={
+ "state_group": state_group,
+ "prev_state_group": prev_group,
+ },
+ )
+
+ self.db_pool.simple_delete_txn(
+ txn,
+ table="state_groups_state",
+ keyvalues={"state_group": state_group},
+ )
+
+ self.db_pool.simple_insert_many_txn(
+ txn,
+ table="state_groups_state",
+ values=[
+ {
+ "state_group": state_group,
+ "room_id": room_id,
+ "type": key[0],
+ "state_key": key[1],
+ "event_id": state_id,
+ }
+ for key, state_id in delta_state.items()
+ ],
+ )
+
+ progress = {
+ "last_state_group": state_group,
+ "rows_inserted": rows_inserted + batch_size,
+ "max_group": max_group,
+ }
+
+ self.db_pool.updates._background_update_progress_txn(
+ txn, self.STATE_GROUP_DEDUPLICATION_UPDATE_NAME, progress
+ )
+
+ return False, batch_size
+
+ finished, result = await self.db_pool.runInteraction(
+ self.STATE_GROUP_DEDUPLICATION_UPDATE_NAME, reindex_txn
+ )
+
+ if finished:
+ await self.db_pool.updates._end_background_update(
+ self.STATE_GROUP_DEDUPLICATION_UPDATE_NAME
+ )
+
+ return result * BATCH_SIZE_SCALE_FACTOR
+
+ async def _background_index_state(self, progress, batch_size):
+ def reindex_txn(conn):
+ conn.rollback()
+ if isinstance(self.database_engine, PostgresEngine):
+ # postgres insists on autocommit for the index
+ conn.set_session(autocommit=True)
+ try:
+ txn = conn.cursor()
+ txn.execute(
+ "CREATE INDEX CONCURRENTLY state_groups_state_type_idx"
+ " ON state_groups_state(state_group, type, state_key)"
+ )
+ txn.execute("DROP INDEX IF EXISTS state_groups_state_id")
+ finally:
+ conn.set_session(autocommit=False)
+ else:
+ txn = conn.cursor()
+ txn.execute(
+ "CREATE INDEX state_groups_state_type_idx"
+ " ON state_groups_state(state_group, type, state_key)"
+ )
+ txn.execute("DROP INDEX IF EXISTS state_groups_state_id")
+
+ await self.db_pool.runWithConnection(reindex_txn)
+
+ await self.db_pool.updates._end_background_update(
+ self.STATE_GROUP_INDEX_UPDATE_NAME
+ )
+
+ return 1
diff --git a/synapse/storage/databases/state/schema/delta/23/drop_state_index.sql b/synapse/storage/databases/state/schema/delta/23/drop_state_index.sql
new file mode 100644
index 00000000..ae09fa00
--- /dev/null
+++ b/synapse/storage/databases/state/schema/delta/23/drop_state_index.sql
@@ -0,0 +1,16 @@
+/* Copyright 2015, 2016 OpenMarket Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+DROP INDEX IF EXISTS state_groups_state_tuple;
diff --git a/synapse/storage/databases/state/schema/delta/30/state_stream.sql b/synapse/storage/databases/state/schema/delta/30/state_stream.sql
new file mode 100644
index 00000000..e85699e8
--- /dev/null
+++ b/synapse/storage/databases/state/schema/delta/30/state_stream.sql
@@ -0,0 +1,33 @@
+/* Copyright 2016 OpenMarket Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+/* We used to create a table called current_state_resets, but this is no
+ * longer used and is removed in delta 54.
+ */
+
+/* The outlier events that have aquired a state group typically through
+ * backfill. This is tracked separately to the events table, as assigning a
+ * state group change the position of the existing event in the stream
+ * ordering.
+ * However since a stream_ordering is assigned in persist_event for the
+ * (event, state) pair, we can use that stream_ordering to identify when
+ * the new state was assigned for the event.
+ */
+CREATE TABLE IF NOT EXISTS ex_outlier_stream(
+ event_stream_ordering BIGINT PRIMARY KEY NOT NULL,
+ event_id TEXT NOT NULL,
+ state_group BIGINT NOT NULL
+);
diff --git a/synapse/storage/databases/state/schema/delta/32/remove_state_indices.sql b/synapse/storage/databases/state/schema/delta/32/remove_state_indices.sql
new file mode 100644
index 00000000..1450313b
--- /dev/null
+++ b/synapse/storage/databases/state/schema/delta/32/remove_state_indices.sql
@@ -0,0 +1,19 @@
+/* Copyright 2016 OpenMarket Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+-- The following indices are redundant, other indices are equivalent or
+-- supersets
+DROP INDEX IF EXISTS state_groups_id; -- Duplicate of PRIMARY KEY
diff --git a/synapse/storage/databases/state/schema/delta/35/add_state_index.sql b/synapse/storage/databases/state/schema/delta/35/add_state_index.sql
new file mode 100644
index 00000000..33980d02
--- /dev/null
+++ b/synapse/storage/databases/state/schema/delta/35/add_state_index.sql
@@ -0,0 +1,17 @@
+/* Copyright 2016 OpenMarket Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+INSERT into background_updates (update_name, progress_json, depends_on)
+ VALUES ('state_group_state_type_index', '{}', 'state_group_state_deduplication');
diff --git a/synapse/storage/databases/state/schema/delta/35/state.sql b/synapse/storage/databases/state/schema/delta/35/state.sql
new file mode 100644
index 00000000..0f1fa68a
--- /dev/null
+++ b/synapse/storage/databases/state/schema/delta/35/state.sql
@@ -0,0 +1,22 @@
+/* Copyright 2016 OpenMarket Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+CREATE TABLE state_group_edges(
+ state_group BIGINT NOT NULL,
+ prev_state_group BIGINT NOT NULL
+);
+
+CREATE INDEX state_group_edges_idx ON state_group_edges(state_group);
+CREATE INDEX state_group_edges_prev_idx ON state_group_edges(prev_state_group);
diff --git a/synapse/storage/databases/state/schema/delta/35/state_dedupe.sql b/synapse/storage/databases/state/schema/delta/35/state_dedupe.sql
new file mode 100644
index 00000000..97e5067e
--- /dev/null
+++ b/synapse/storage/databases/state/schema/delta/35/state_dedupe.sql
@@ -0,0 +1,17 @@
+/* Copyright 2016 OpenMarket Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+INSERT into background_updates (update_name, progress_json)
+ VALUES ('state_group_state_deduplication', '{}');
diff --git a/synapse/storage/databases/state/schema/delta/47/state_group_seq.py b/synapse/storage/databases/state/schema/delta/47/state_group_seq.py
new file mode 100644
index 00000000..9fd1ccf6
--- /dev/null
+++ b/synapse/storage/databases/state/schema/delta/47/state_group_seq.py
@@ -0,0 +1,34 @@
+# Copyright 2018 New Vector Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from synapse.storage.engines import PostgresEngine
+
+
+def run_create(cur, database_engine, *args, **kwargs):
+ if isinstance(database_engine, PostgresEngine):
+ # if we already have some state groups, we want to start making new
+ # ones with a higher id.
+ cur.execute("SELECT max(id) FROM state_groups")
+ row = cur.fetchone()
+
+ if row[0] is None:
+ start_val = 1
+ else:
+ start_val = row[0] + 1
+
+ cur.execute("CREATE SEQUENCE state_group_id_seq START WITH %s", (start_val,))
+
+
+def run_upgrade(*args, **kwargs):
+ pass
diff --git a/synapse/storage/databases/state/schema/delta/56/state_group_room_idx.sql b/synapse/storage/databases/state/schema/delta/56/state_group_room_idx.sql
new file mode 100644
index 00000000..7916ef18
--- /dev/null
+++ b/synapse/storage/databases/state/schema/delta/56/state_group_room_idx.sql
@@ -0,0 +1,17 @@
+/* Copyright 2019 The Matrix.org Foundation C.I.C.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+INSERT INTO background_updates (update_name, progress_json) VALUES
+ ('state_groups_room_id_idx', '{}');
diff --git a/synapse/storage/databases/state/schema/full_schemas/54/full.sql b/synapse/storage/databases/state/schema/full_schemas/54/full.sql
new file mode 100644
index 00000000..35f97d6b
--- /dev/null
+++ b/synapse/storage/databases/state/schema/full_schemas/54/full.sql
@@ -0,0 +1,37 @@
+/* Copyright 2019 The Matrix.org Foundation C.I.C
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+CREATE TABLE state_groups (
+ id BIGINT PRIMARY KEY,
+ room_id TEXT NOT NULL,
+ event_id TEXT NOT NULL
+);
+
+CREATE TABLE state_groups_state (
+ state_group BIGINT NOT NULL,
+ room_id TEXT NOT NULL,
+ type TEXT NOT NULL,
+ state_key TEXT NOT NULL,
+ event_id TEXT NOT NULL
+);
+
+CREATE TABLE state_group_edges (
+ state_group BIGINT NOT NULL,
+ prev_state_group BIGINT NOT NULL
+);
+
+CREATE INDEX state_group_edges_idx ON state_group_edges (state_group);
+CREATE INDEX state_group_edges_prev_idx ON state_group_edges (prev_state_group);
+CREATE INDEX state_groups_state_type_idx ON state_groups_state (state_group, type, state_key);
diff --git a/synapse/storage/databases/state/schema/full_schemas/54/sequence.sql.postgres b/synapse/storage/databases/state/schema/full_schemas/54/sequence.sql.postgres
new file mode 100644
index 00000000..fcd926c9
--- /dev/null
+++ b/synapse/storage/databases/state/schema/full_schemas/54/sequence.sql.postgres
@@ -0,0 +1,21 @@
+/* Copyright 2019 The Matrix.org Foundation C.I.C
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+CREATE SEQUENCE state_group_id_seq
+ START WITH 1
+ INCREMENT BY 1
+ NO MINVALUE
+ NO MAXVALUE
+ CACHE 1;
diff --git a/synapse/storage/databases/state/store.py b/synapse/storage/databases/state/store.py
new file mode 100644
index 00000000..7f104ad9
--- /dev/null
+++ b/synapse/storage/databases/state/store.py
@@ -0,0 +1,644 @@
+# -*- coding: utf-8 -*-
+# Copyright 2014-2016 OpenMarket Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import logging
+from collections import namedtuple
+from typing import Dict, Iterable, List, Set, Tuple
+
+from twisted.internet import defer
+
+from synapse.api.constants import EventTypes
+from synapse.storage._base import SQLBaseStore
+from synapse.storage.database import DatabasePool
+from synapse.storage.databases.state.bg_updates import StateBackgroundUpdateStore
+from synapse.storage.state import StateFilter
+from synapse.storage.types import Cursor
+from synapse.storage.util.sequence import build_sequence_generator
+from synapse.types import StateMap
+from synapse.util.caches.descriptors import cached
+from synapse.util.caches.dictionary_cache import DictionaryCache
+
+logger = logging.getLogger(__name__)
+
+
+MAX_STATE_DELTA_HOPS = 100
+
+
+class _GetStateGroupDelta(
+ namedtuple("_GetStateGroupDelta", ("prev_group", "delta_ids"))
+):
+ """Return type of get_state_group_delta that implements __len__, which lets
+ us use the itrable flag when caching
+ """
+
+ __slots__ = []
+
+ def __len__(self):
+ return len(self.delta_ids) if self.delta_ids else 0
+
+
+class StateGroupDataStore(StateBackgroundUpdateStore, SQLBaseStore):
+ """A data store for fetching/storing state groups.
+ """
+
+ def __init__(self, database: DatabasePool, db_conn, hs):
+ super(StateGroupDataStore, self).__init__(database, db_conn, hs)
+
+ # Originally the state store used a single DictionaryCache to cache the
+ # event IDs for the state types in a given state group to avoid hammering
+ # on the state_group* tables.
+ #
+ # The point of using a DictionaryCache is that it can cache a subset
+ # of the state events for a given state group (i.e. a subset of the keys for a
+ # given dict which is an entry in the cache for a given state group ID).
+ #
+ # However, this poses problems when performing complicated queries
+ # on the store - for instance: "give me all the state for this group, but
+ # limit members to this subset of users", as DictionaryCache's API isn't
+ # rich enough to say "please cache any of these fields, apart from this subset".
+ # This is problematic when lazy loading members, which requires this behaviour,
+ # as without it the cache has no choice but to speculatively load all
+ # state events for the group, which negates the efficiency being sought.
+ #
+ # Rather than overcomplicating DictionaryCache's API, we instead split the
+ # state_group_cache into two halves - one for tracking non-member events,
+ # and the other for tracking member_events. This means that lazy loading
+ # queries can be made in a cache-friendly manner by querying both caches
+ # separately and then merging the result. So for the example above, you
+ # would query the members cache for a specific subset of state keys
+ # (which DictionaryCache will handle efficiently and fine) and the non-members
+ # cache for all state (which DictionaryCache will similarly handle fine)
+ # and then just merge the results together.
+ #
+ # We size the non-members cache to be smaller than the members cache as the
+ # vast majority of state in Matrix (today) is member events.
+
+ self._state_group_cache = DictionaryCache(
+ "*stateGroupCache*",
+ # TODO: this hasn't been tuned yet
+ 50000,
+ )
+ self._state_group_members_cache = DictionaryCache(
+ "*stateGroupMembersCache*", 500000,
+ )
+
+ def get_max_state_group_txn(txn: Cursor):
+ txn.execute("SELECT COALESCE(max(id), 0) FROM state_groups")
+ return txn.fetchone()[0]
+
+ self._state_group_seq_gen = build_sequence_generator(
+ self.database_engine, get_max_state_group_txn, "state_group_id_seq"
+ )
+
+ @cached(max_entries=10000, iterable=True)
+ def get_state_group_delta(self, state_group):
+ """Given a state group try to return a previous group and a delta between
+ the old and the new.
+
+ Returns:
+ (prev_group, delta_ids), where both may be None.
+ """
+
+ def _get_state_group_delta_txn(txn):
+ prev_group = self.db_pool.simple_select_one_onecol_txn(
+ txn,
+ table="state_group_edges",
+ keyvalues={"state_group": state_group},
+ retcol="prev_state_group",
+ allow_none=True,
+ )
+
+ if not prev_group:
+ return _GetStateGroupDelta(None, None)
+
+ delta_ids = self.db_pool.simple_select_list_txn(
+ txn,
+ table="state_groups_state",
+ keyvalues={"state_group": state_group},
+ retcols=("type", "state_key", "event_id"),
+ )
+
+ return _GetStateGroupDelta(
+ prev_group,
+ {(row["type"], row["state_key"]): row["event_id"] for row in delta_ids},
+ )
+
+ return self.db_pool.runInteraction(
+ "get_state_group_delta", _get_state_group_delta_txn
+ )
+
+ async def _get_state_groups_from_groups(
+ self, groups: List[int], state_filter: StateFilter
+ ) -> Dict[int, StateMap[str]]:
+ """Returns the state groups for a given set of groups from the
+ database, filtering on types of state events.
+
+ Args:
+ groups: list of state group IDs to query
+ state_filter: The state filter used to fetch state
+ from the database.
+ Returns:
+ Dict of state group to state map.
+ """
+ results = {}
+
+ chunks = [groups[i : i + 100] for i in range(0, len(groups), 100)]
+ for chunk in chunks:
+ res = await self.db_pool.runInteraction(
+ "_get_state_groups_from_groups",
+ self._get_state_groups_from_groups_txn,
+ chunk,
+ state_filter,
+ )
+ results.update(res)
+
+ return results
+
+ def _get_state_for_group_using_cache(self, cache, group, state_filter):
+ """Checks if group is in cache. See `_get_state_for_groups`
+
+ Args:
+ cache(DictionaryCache): the state group cache to use
+ group(int): The state group to lookup
+ state_filter (StateFilter): The state filter used to fetch state
+ from the database.
+
+ Returns 2-tuple (`state_dict`, `got_all`).
+ `got_all` is a bool indicating if we successfully retrieved all
+ requests state from the cache, if False we need to query the DB for the
+ missing state.
+ """
+ is_all, known_absent, state_dict_ids = cache.get(group)
+
+ if is_all or state_filter.is_full():
+ # Either we have everything or want everything, either way
+ # `is_all` tells us whether we've gotten everything.
+ return state_filter.filter_state(state_dict_ids), is_all
+
+ # tracks whether any of our requested types are missing from the cache
+ missing_types = False
+
+ if state_filter.has_wildcards():
+ # We don't know if we fetched all the state keys for the types in
+ # the filter that are wildcards, so we have to assume that we may
+ # have missed some.
+ missing_types = True
+ else:
+ # There aren't any wild cards, so `concrete_types()` returns the
+ # complete list of event types we're wanting.
+ for key in state_filter.concrete_types():
+ if key not in state_dict_ids and key not in known_absent:
+ missing_types = True
+ break
+
+ return state_filter.filter_state(state_dict_ids), not missing_types
+
+ async def _get_state_for_groups(
+ self, groups: Iterable[int], state_filter: StateFilter = StateFilter.all()
+ ) -> Dict[int, StateMap[str]]:
+ """Gets the state at each of a list of state groups, optionally
+ filtering by type/state_key
+
+ Args:
+ groups: list of state groups for which we want
+ to get the state.
+ state_filter: The state filter used to fetch state
+ from the database.
+ Returns:
+ Dict of state group to state map.
+ """
+
+ member_filter, non_member_filter = state_filter.get_member_split()
+
+ # Now we look them up in the member and non-member caches
+ (
+ non_member_state,
+ incomplete_groups_nm,
+ ) = self._get_state_for_groups_using_cache(
+ groups, self._state_group_cache, state_filter=non_member_filter
+ )
+
+ (member_state, incomplete_groups_m,) = self._get_state_for_groups_using_cache(
+ groups, self._state_group_members_cache, state_filter=member_filter
+ )
+
+ state = dict(non_member_state)
+ for group in groups:
+ state[group].update(member_state[group])
+
+ # Now fetch any missing groups from the database
+
+ incomplete_groups = incomplete_groups_m | incomplete_groups_nm
+
+ if not incomplete_groups:
+ return state
+
+ cache_sequence_nm = self._state_group_cache.sequence
+ cache_sequence_m = self._state_group_members_cache.sequence
+
+ # Help the cache hit ratio by expanding the filter a bit
+ db_state_filter = state_filter.return_expanded()
+
+ group_to_state_dict = await self._get_state_groups_from_groups(
+ list(incomplete_groups), state_filter=db_state_filter
+ )
+
+ # Now lets update the caches
+ self._insert_into_cache(
+ group_to_state_dict,
+ db_state_filter,
+ cache_seq_num_members=cache_sequence_m,
+ cache_seq_num_non_members=cache_sequence_nm,
+ )
+
+ # And finally update the result dict, by filtering out any extra
+ # stuff we pulled out of the database.
+ for group, group_state_dict in group_to_state_dict.items():
+ # We just replace any existing entries, as we will have loaded
+ # everything we need from the database anyway.
+ state[group] = state_filter.filter_state(group_state_dict)
+
+ return state
+
+ def _get_state_for_groups_using_cache(
+ self, groups: Iterable[int], cache: DictionaryCache, state_filter: StateFilter
+ ) -> Tuple[Dict[int, StateMap[str]], Set[int]]:
+ """Gets the state at each of a list of state groups, optionally
+ filtering by type/state_key, querying from a specific cache.
+
+ Args:
+ groups: list of state groups for which we want to get the state.
+ cache: the cache of group ids to state dicts which
+ we will pass through - either the normal state cache or the
+ specific members state cache.
+ state_filter: The state filter used to fetch state from the
+ database.
+
+ Returns:
+ Tuple of dict of state_group_id to state map of entries in the
+ cache, and the state group ids either missing from the cache or
+ incomplete.
+ """
+ results = {}
+ incomplete_groups = set()
+ for group in set(groups):
+ state_dict_ids, got_all = self._get_state_for_group_using_cache(
+ cache, group, state_filter
+ )
+ results[group] = state_dict_ids
+
+ if not got_all:
+ incomplete_groups.add(group)
+
+ return results, incomplete_groups
+
+ def _insert_into_cache(
+ self,
+ group_to_state_dict,
+ state_filter,
+ cache_seq_num_members,
+ cache_seq_num_non_members,
+ ):
+ """Inserts results from querying the database into the relevant cache.
+
+ Args:
+ group_to_state_dict (dict): The new entries pulled from database.
+ Map from state group to state dict
+ state_filter (StateFilter): The state filter used to fetch state
+ from the database.
+ cache_seq_num_members (int): Sequence number of member cache since
+ last lookup in cache
+ cache_seq_num_non_members (int): Sequence number of member cache since
+ last lookup in cache
+ """
+
+ # We need to work out which types we've fetched from the DB for the
+ # member vs non-member caches. This should be as accurate as possible,
+ # but can be an underestimate (e.g. when we have wild cards)
+
+ member_filter, non_member_filter = state_filter.get_member_split()
+ if member_filter.is_full():
+ # We fetched all member events
+ member_types = None
+ else:
+ # `concrete_types()` will only return a subset when there are wild
+ # cards in the filter, but that's fine.
+ member_types = member_filter.concrete_types()
+
+ if non_member_filter.is_full():
+ # We fetched all non member events
+ non_member_types = None
+ else:
+ non_member_types = non_member_filter.concrete_types()
+
+ for group, group_state_dict in group_to_state_dict.items():
+ state_dict_members = {}
+ state_dict_non_members = {}
+
+ for k, v in group_state_dict.items():
+ if k[0] == EventTypes.Member:
+ state_dict_members[k] = v
+ else:
+ state_dict_non_members[k] = v
+
+ self._state_group_members_cache.update(
+ cache_seq_num_members,
+ key=group,
+ value=state_dict_members,
+ fetched_keys=member_types,
+ )
+
+ self._state_group_cache.update(
+ cache_seq_num_non_members,
+ key=group,
+ value=state_dict_non_members,
+ fetched_keys=non_member_types,
+ )
+
+ def store_state_group(
+ self, event_id, room_id, prev_group, delta_ids, current_state_ids
+ ):
+ """Store a new set of state, returning a newly assigned state group.
+
+ Args:
+ event_id (str): The event ID for which the state was calculated
+ room_id (str)
+ prev_group (int|None): A previous state group for the room, optional.
+ delta_ids (dict|None): The delta between state at `prev_group` and
+ `current_state_ids`, if `prev_group` was given. Same format as
+ `current_state_ids`.
+ current_state_ids (dict): The state to store. Map of (type, state_key)
+ to event_id.
+
+ Returns:
+ Deferred[int]: The state group ID
+ """
+
+ def _store_state_group_txn(txn):
+ if current_state_ids is None:
+ # AFAIK, this can never happen
+ raise Exception("current_state_ids cannot be None")
+
+ state_group = self._state_group_seq_gen.get_next_id_txn(txn)
+
+ self.db_pool.simple_insert_txn(
+ txn,
+ table="state_groups",
+ values={"id": state_group, "room_id": room_id, "event_id": event_id},
+ )
+
+ # We persist as a delta if we can, while also ensuring the chain
+ # of deltas isn't tooo long, as otherwise read performance degrades.
+ if prev_group:
+ is_in_db = self.db_pool.simple_select_one_onecol_txn(
+ txn,
+ table="state_groups",
+ keyvalues={"id": prev_group},
+ retcol="id",
+ allow_none=True,
+ )
+ if not is_in_db:
+ raise Exception(
+ "Trying to persist state with unpersisted prev_group: %r"
+ % (prev_group,)
+ )
+
+ potential_hops = self._count_state_group_hops_txn(txn, prev_group)
+ if prev_group and potential_hops < MAX_STATE_DELTA_HOPS:
+ self.db_pool.simple_insert_txn(
+ txn,
+ table="state_group_edges",
+ values={"state_group": state_group, "prev_state_group": prev_group},
+ )
+
+ self.db_pool.simple_insert_many_txn(
+ txn,
+ table="state_groups_state",
+ values=[
+ {
+ "state_group": state_group,
+ "room_id": room_id,
+ "type": key[0],
+ "state_key": key[1],
+ "event_id": state_id,
+ }
+ for key, state_id in delta_ids.items()
+ ],
+ )
+ else:
+ self.db_pool.simple_insert_many_txn(
+ txn,
+ table="state_groups_state",
+ values=[
+ {
+ "state_group": state_group,
+ "room_id": room_id,
+ "type": key[0],
+ "state_key": key[1],
+ "event_id": state_id,
+ }
+ for key, state_id in current_state_ids.items()
+ ],
+ )
+
+ # Prefill the state group caches with this group.
+ # It's fine to use the sequence like this as the state group map
+ # is immutable. (If the map wasn't immutable then this prefill could
+ # race with another update)
+
+ current_member_state_ids = {
+ s: ev
+ for (s, ev) in current_state_ids.items()
+ if s[0] == EventTypes.Member
+ }
+ txn.call_after(
+ self._state_group_members_cache.update,
+ self._state_group_members_cache.sequence,
+ key=state_group,
+ value=dict(current_member_state_ids),
+ )
+
+ current_non_member_state_ids = {
+ s: ev
+ for (s, ev) in current_state_ids.items()
+ if s[0] != EventTypes.Member
+ }
+ txn.call_after(
+ self._state_group_cache.update,
+ self._state_group_cache.sequence,
+ key=state_group,
+ value=dict(current_non_member_state_ids),
+ )
+
+ return state_group
+
+ return self.db_pool.runInteraction("store_state_group", _store_state_group_txn)
+
+ def purge_unreferenced_state_groups(
+ self, room_id: str, state_groups_to_delete
+ ) -> defer.Deferred:
+ """Deletes no longer referenced state groups and de-deltas any state
+ groups that reference them.
+
+ Args:
+ room_id: The room the state groups belong to (must all be in the
+ same room).
+ state_groups_to_delete (Collection[int]): Set of all state groups
+ to delete.
+ """
+
+ return self.db_pool.runInteraction(
+ "purge_unreferenced_state_groups",
+ self._purge_unreferenced_state_groups,
+ room_id,
+ state_groups_to_delete,
+ )
+
+ def _purge_unreferenced_state_groups(self, txn, room_id, state_groups_to_delete):
+ logger.info(
+ "[purge] found %i state groups to delete", len(state_groups_to_delete)
+ )
+
+ rows = self.db_pool.simple_select_many_txn(
+ txn,
+ table="state_group_edges",
+ column="prev_state_group",
+ iterable=state_groups_to_delete,
+ keyvalues={},
+ retcols=("state_group",),
+ )
+
+ remaining_state_groups = {
+ row["state_group"]
+ for row in rows
+ if row["state_group"] not in state_groups_to_delete
+ }
+
+ logger.info(
+ "[purge] de-delta-ing %i remaining state groups",
+ len(remaining_state_groups),
+ )
+
+ # Now we turn the state groups that reference to-be-deleted state
+ # groups to non delta versions.
+ for sg in remaining_state_groups:
+ logger.info("[purge] de-delta-ing remaining state group %s", sg)
+ curr_state = self._get_state_groups_from_groups_txn(txn, [sg])
+ curr_state = curr_state[sg]
+
+ self.db_pool.simple_delete_txn(
+ txn, table="state_groups_state", keyvalues={"state_group": sg}
+ )
+
+ self.db_pool.simple_delete_txn(
+ txn, table="state_group_edges", keyvalues={"state_group": sg}
+ )
+
+ self.db_pool.simple_insert_many_txn(
+ txn,
+ table="state_groups_state",
+ values=[
+ {
+ "state_group": sg,
+ "room_id": room_id,
+ "type": key[0],
+ "state_key": key[1],
+ "event_id": state_id,
+ }
+ for key, state_id in curr_state.items()
+ ],
+ )
+
+ logger.info("[purge] removing redundant state groups")
+ txn.executemany(
+ "DELETE FROM state_groups_state WHERE state_group = ?",
+ ((sg,) for sg in state_groups_to_delete),
+ )
+ txn.executemany(
+ "DELETE FROM state_groups WHERE id = ?",
+ ((sg,) for sg in state_groups_to_delete),
+ )
+
+ async def get_previous_state_groups(
+ self, state_groups: Iterable[int]
+ ) -> Dict[int, int]:
+ """Fetch the previous groups of the given state groups.
+
+ Args:
+ state_groups
+
+ Returns:
+ A mapping from state group to previous state group.
+ """
+
+ rows = await self.db_pool.simple_select_many_batch(
+ table="state_group_edges",
+ column="prev_state_group",
+ iterable=state_groups,
+ keyvalues={},
+ retcols=("prev_state_group", "state_group"),
+ desc="get_previous_state_groups",
+ )
+
+ return {row["state_group"]: row["prev_state_group"] for row in rows}
+
+ def purge_room_state(self, room_id, state_groups_to_delete):
+ """Deletes all record of a room from state tables
+
+ Args:
+ room_id (str):
+ state_groups_to_delete (list[int]): State groups to delete
+ """
+
+ return self.db_pool.runInteraction(
+ "purge_room_state",
+ self._purge_room_state_txn,
+ room_id,
+ state_groups_to_delete,
+ )
+
+ def _purge_room_state_txn(self, txn, room_id, state_groups_to_delete):
+ # first we have to delete the state groups states
+ logger.info("[purge] removing %s from state_groups_state", room_id)
+
+ self.db_pool.simple_delete_many_txn(
+ txn,
+ table="state_groups_state",
+ column="state_group",
+ iterable=state_groups_to_delete,
+ keyvalues={},
+ )
+
+ # ... and the state group edges
+ logger.info("[purge] removing %s from state_group_edges", room_id)
+
+ self.db_pool.simple_delete_many_txn(
+ txn,
+ table="state_group_edges",
+ column="state_group",
+ iterable=state_groups_to_delete,
+ keyvalues={},
+ )
+
+ # ... and the state groups
+ logger.info("[purge] removing %s from state_groups", room_id)
+
+ self.db_pool.simple_delete_many_txn(
+ txn,
+ table="state_groups",
+ column="id",
+ iterable=state_groups_to_delete,
+ keyvalues={},
+ )