From 32a59a6495f8d463f82ae52283159359a9961c25 Mon Sep 17 00:00:00 2001 From: David Robertson Date: Thu, 23 Nov 2023 12:35:37 +0000 Subject: [PATCH] Keep track of `user_ips` and `monthly_active_users` when delegating auth (#16672) * Describe `insert_client_ip` * Pull out client_ips and MAU tracking to BaseAuth * Define HAS_AUTHLIB once in tests sick of copypasting * Track ips and token usage when delegating auth * Test that we track MAU and user_ips * Don't track `__oidc_admin` --- changelog.d/16672.feature | 1 + synapse/api/auth/base.py | 48 +++++++ synapse/api/auth/internal.py | 39 +----- synapse/api/auth/msc3861_delegated.py | 4 + synapse/storage/databases/main/client_ips.py | 21 +++ tests/config/test_oauth_delegation.py | 10 +- tests/handlers/test_oauth_delegation.py | 129 +++++++++++++++++-- tests/rest/admin/test_jwks.py | 8 +- tests/rest/client/test_keys.py | 8 +- tests/rest/test_well_known.py | 8 +- tests/utils.py | 7 + 11 files changed, 201 insertions(+), 82 deletions(-) create mode 100644 changelog.d/16672.feature diff --git a/changelog.d/16672.feature b/changelog.d/16672.feature new file mode 100644 index 000000000000..05ecf2220733 --- /dev/null +++ b/changelog.d/16672.feature @@ -0,0 +1 @@ +Restore tracking of requests and monthly active users when delegating authentication to an [MSC3861](https://github.com/matrix-org/synapse/pull/16672) OIDC provider. diff --git a/synapse/api/auth/base.py b/synapse/api/auth/base.py index 9321d6f18637..e2e3dc61b470 100644 --- a/synapse/api/auth/base.py +++ b/synapse/api/auth/base.py @@ -27,6 +27,8 @@ UnstableSpecAuthError, ) from synapse.appservice import ApplicationService +from synapse.http import get_request_user_agent +from synapse.http.site import SynapseRequest from synapse.logging.opentracing import trace from synapse.types import Requester, create_requester from synapse.util.cancellation import cancellable @@ -45,6 +47,9 @@ def __init__(self, hs: "HomeServer"): self.store = hs.get_datastores().main self._storage_controllers = hs.get_storage_controllers() + self._track_appservice_user_ips = hs.config.appservice.track_appservice_user_ips + self._track_puppeted_user_ips = hs.config.api.track_puppeted_user_ips + async def check_user_in_room( self, room_id: str, @@ -349,3 +354,46 @@ async def get_appservice_user( return create_requester( effective_user_id, app_service=app_service, device_id=effective_device_id ) + + async def _record_request( + self, request: SynapseRequest, requester: Requester + ) -> None: + """Record that this request was made. + + This updates the client_ips and monthly_active_user tables. + """ + ip_addr = request.get_client_ip_if_available() + + if ip_addr and (not requester.app_service or self._track_appservice_user_ips): + user_agent = get_request_user_agent(request) + access_token = self.get_access_token_from_request(request) + + # XXX(quenting): I'm 95% confident that we could skip setting the + # device_id to "dummy-device" for appservices, and that the only impact + # would be some rows which whould not deduplicate in the 'user_ips' + # table during the transition + recorded_device_id = ( + "dummy-device" + if requester.device_id is None and requester.app_service is not None + else requester.device_id + ) + await self.store.insert_client_ip( + user_id=requester.authenticated_entity, + access_token=access_token, + ip=ip_addr, + user_agent=user_agent, + device_id=recorded_device_id, + ) + + # Track also the puppeted user client IP if enabled and the user is puppeting + if ( + requester.user.to_string() != requester.authenticated_entity + and self._track_puppeted_user_ips + ): + await self.store.insert_client_ip( + user_id=requester.user.to_string(), + access_token=access_token, + ip=ip_addr, + user_agent=user_agent, + device_id=requester.device_id, + ) diff --git a/synapse/api/auth/internal.py b/synapse/api/auth/internal.py index 36ee9c8b8fc8..985cbb12788c 100644 --- a/synapse/api/auth/internal.py +++ b/synapse/api/auth/internal.py @@ -22,7 +22,6 @@ InvalidClientTokenError, MissingClientTokenError, ) -from synapse.http import get_request_user_agent from synapse.http.site import SynapseRequest from synapse.logging.opentracing import active_span, force_tracing, start_active_span from synapse.types import Requester, create_requester @@ -48,8 +47,6 @@ def __init__(self, hs: "HomeServer"): self._account_validity_handler = hs.get_account_validity_handler() self._macaroon_generator = hs.get_macaroon_generator() - self._track_appservice_user_ips = hs.config.appservice.track_appservice_user_ips - self._track_puppeted_user_ips = hs.config.api.track_puppeted_user_ips self._force_tracing_for_users = hs.config.tracing.force_tracing_for_users @cancellable @@ -115,9 +112,6 @@ async def _wrapped_get_user_by_req( Once get_user_by_req has set up the opentracing span, this does the actual work. """ try: - ip_addr = request.get_client_ip_if_available() - user_agent = get_request_user_agent(request) - access_token = self.get_access_token_from_request(request) # First check if it could be a request from an appservice @@ -154,38 +148,7 @@ async def _wrapped_get_user_by_req( errcode=Codes.EXPIRED_ACCOUNT, ) - if ip_addr and ( - not requester.app_service or self._track_appservice_user_ips - ): - # XXX(quenting): I'm 95% confident that we could skip setting the - # device_id to "dummy-device" for appservices, and that the only impact - # would be some rows which whould not deduplicate in the 'user_ips' - # table during the transition - recorded_device_id = ( - "dummy-device" - if requester.device_id is None and requester.app_service is not None - else requester.device_id - ) - await self.store.insert_client_ip( - user_id=requester.authenticated_entity, - access_token=access_token, - ip=ip_addr, - user_agent=user_agent, - device_id=recorded_device_id, - ) - - # Track also the puppeted user client IP if enabled and the user is puppeting - if ( - requester.user.to_string() != requester.authenticated_entity - and self._track_puppeted_user_ips - ): - await self.store.insert_client_ip( - user_id=requester.user.to_string(), - access_token=access_token, - ip=ip_addr, - user_agent=user_agent, - device_id=requester.device_id, - ) + await self._record_request(request, requester) if requester.is_guest and not allow_guest: raise AuthError( diff --git a/synapse/api/auth/msc3861_delegated.py b/synapse/api/auth/msc3861_delegated.py index 31bb035cc846..7373d815343b 100644 --- a/synapse/api/auth/msc3861_delegated.py +++ b/synapse/api/auth/msc3861_delegated.py @@ -227,6 +227,10 @@ async def get_user_by_req( # so that we don't provision the user if they don't have enough permission: requester = await self.get_user_by_access_token(access_token, allow_expired) + # Do not record requests from MAS using the virtual `__oidc_admin` user. + if access_token != self._admin_token: + await self._record_request(request, requester) + if not allow_guest and requester.is_guest: raise OAuthInsufficientScopeError([SCOPE_MATRIX_API]) diff --git a/synapse/storage/databases/main/client_ips.py b/synapse/storage/databases/main/client_ips.py index c006129625c2..d4b14aaebe7b 100644 --- a/synapse/storage/databases/main/client_ips.py +++ b/synapse/storage/databases/main/client_ips.py @@ -589,6 +589,27 @@ async def insert_client_ip( device_id: Optional[str], now: Optional[int] = None, ) -> None: + """Record that `user_id` used `access_token` from this `ip` address. + + This method does two things. + + 1. It queues up a row to be upserted into the `client_ips` table. These happen + periodically; see _update_client_ips_batch. + 2. It immediately records this user as having taken action for the purposes of + MAU tracking. + + Any DB writes take place on the background tasks worker, falling back to the + main process. If we're not that worker, this method emits a replication payload + to run this logic on that worker. + + Two caveats to note: + + - We only take action once per LAST_SEEN_GRANULARITY, to avoid spamming the + DB with writes. + - Requests using the sliding-sync proxy's user agent are excluded, as its + requests are not directly driven by end-users. This is a hack and we're not + very proud of it. + """ # The sync proxy continuously triggers /sync even if the user is not # present so should be excluded from user_ips entries. if user_agent == "sync-v3-proxy-": diff --git a/tests/config/test_oauth_delegation.py b/tests/config/test_oauth_delegation.py index 5c91031746e4..b1a9db02109b 100644 --- a/tests/config/test_oauth_delegation.py +++ b/tests/config/test_oauth_delegation.py @@ -22,15 +22,7 @@ from tests.server import get_clock, setup_test_homeserver from tests.unittest import TestCase, skip_unless -from tests.utils import default_config - -try: - import authlib # noqa: F401 - - HAS_AUTHLIB = True -except ImportError: - HAS_AUTHLIB = False - +from tests.utils import HAS_AUTHLIB, default_config # These are a few constants that are used as config parameters in the tests. SERVER_NAME = "test" diff --git a/tests/handlers/test_oauth_delegation.py b/tests/handlers/test_oauth_delegation.py index a72ecfdc97a9..a2b8e3d5621a 100644 --- a/tests/handlers/test_oauth_delegation.py +++ b/tests/handlers/test_oauth_delegation.py @@ -13,7 +13,8 @@ # limitations under the License. from http import HTTPStatus -from typing import Any, Dict, Union +from io import BytesIO +from typing import Any, Dict, Optional, Union from unittest.mock import ANY, AsyncMock, Mock from urllib.parse import parse_qs @@ -25,6 +26,8 @@ from signedjson.sign import sign_json from twisted.test.proto_helpers import MemoryReactor +from twisted.web.http_headers import Headers +from twisted.web.iweb import IResponse from synapse.api.errors import ( AuthError, @@ -33,23 +36,17 @@ OAuthInsufficientScopeError, SynapseError, ) +from synapse.http.site import SynapseRequest from synapse.rest import admin from synapse.rest.client import account, devices, keys, login, logout, register from synapse.server import HomeServer -from synapse.types import JsonDict +from synapse.types import JsonDict, UserID from synapse.util import Clock +from tests.server import FakeChannel from tests.test_utils import FakeResponse, get_awaitable_result -from tests.unittest import HomeserverTestCase, skip_unless -from tests.utils import mock_getRawHeaders - -try: - import authlib # noqa: F401 - - HAS_AUTHLIB = True -except ImportError: - HAS_AUTHLIB = False - +from tests.unittest import HomeserverTestCase, override_config, skip_unless +from tests.utils import HAS_AUTHLIB, checked_cast, mock_getRawHeaders # These are a few constants that are used as config parameters in the tests. SERVER_NAME = "test" @@ -75,6 +72,7 @@ SUBJECT = "abc-def-ghi" USERNAME = "test-user" USER_ID = "@" + USERNAME + ":" + SERVER_NAME +OIDC_ADMIN_USERID = f"@__oidc_admin:{SERVER_NAME}" async def get_json(url: str) -> JsonDict: @@ -134,7 +132,10 @@ def make_homeserver(self, reactor: MemoryReactor, clock: Clock) -> HomeServer: hs = self.setup_test_homeserver(proxied_http_client=self.http_client) - self.auth = hs.get_auth() + # Import this here so that we've checked that authlib is available. + from synapse.api.auth.msc3861_delegated import MSC3861DelegatedAuth + + self.auth = checked_cast(MSC3861DelegatedAuth, hs.get_auth()) return hs @@ -675,7 +676,8 @@ def test_admin_token(self) -> None: request.requestHeaders.getRawHeaders = mock_getRawHeaders() requester = self.get_success(self.auth.get_user_by_req(request)) self.assertEqual( - requester.user.to_string(), "@%s:%s" % ("__oidc_admin", SERVER_NAME) + requester.user.to_string(), + OIDC_ADMIN_USERID, ) self.assertEqual(requester.is_guest, False) self.assertEqual(requester.device_id, None) @@ -685,3 +687,102 @@ def test_admin_token(self) -> None: # There should be no call to the introspection endpoint self.http_client.request.assert_not_called() + + @override_config({"mau_stats_only": True}) + def test_request_tracking(self) -> None: + """Using an access token should update the client_ips and MAU tables.""" + # To start, there are no MAU users. + store = self.hs.get_datastores().main + mau = self.get_success(store.get_monthly_active_count()) + self.assertEqual(mau, 0) + + known_token = "token-token-GOOD-:)" + + async def mock_http_client_request( + method: str, + uri: str, + data: Optional[bytes] = None, + headers: Optional[Headers] = None, + ) -> IResponse: + """Mocked auth provider response.""" + assert method == "POST" + token = parse_qs(data)[b"token"][0].decode("utf-8") + if token == known_token: + return FakeResponse.json( + code=200, + payload={ + "active": True, + "scope": MATRIX_USER_SCOPE, + "sub": SUBJECT, + "username": USERNAME, + }, + ) + + return FakeResponse.json(code=200, payload={"active": False}) + + self.http_client.request = mock_http_client_request + + EXAMPLE_IPV4_ADDR = "123.123.123.123" + EXAMPLE_USER_AGENT = "httprettygood" + + # First test a known access token + channel = FakeChannel(self.site, self.reactor) + # type-ignore: FakeChannel is a mock of an HTTPChannel, not a proper HTTPChannel + req = SynapseRequest(channel, self.site) # type: ignore[arg-type] + req.client.host = EXAMPLE_IPV4_ADDR + req.requestHeaders.addRawHeader("Authorization", f"Bearer {known_token}") + req.requestHeaders.addRawHeader("User-Agent", EXAMPLE_USER_AGENT) + req.content = BytesIO(b"") + req.requestReceived( + b"GET", + b"/_matrix/client/v3/account/whoami", + b"1.1", + ) + channel.await_result() + self.assertEqual(channel.code, HTTPStatus.OK, channel.json_body) + self.assertEqual(channel.json_body["user_id"], USER_ID, channel.json_body) + + # Expect to see one MAU entry, from the first request + mau = self.get_success(store.get_monthly_active_count()) + self.assertEqual(mau, 1) + + conn_infos = self.get_success( + store.get_user_ip_and_agents(UserID.from_string(USER_ID)) + ) + self.assertEqual(len(conn_infos), 1, conn_infos) + conn_info = conn_infos[0] + self.assertEqual(conn_info["access_token"], known_token) + self.assertEqual(conn_info["ip"], EXAMPLE_IPV4_ADDR) + self.assertEqual(conn_info["user_agent"], EXAMPLE_USER_AGENT) + + # Now test MAS making a request using the special __oidc_admin token + MAS_IPV4_ADDR = "127.0.0.1" + MAS_USER_AGENT = "masmasmas" + + channel = FakeChannel(self.site, self.reactor) + req = SynapseRequest(channel, self.site) # type: ignore[arg-type] + req.client.host = MAS_IPV4_ADDR + req.requestHeaders.addRawHeader( + "Authorization", f"Bearer {self.auth._admin_token}" + ) + req.requestHeaders.addRawHeader("User-Agent", MAS_USER_AGENT) + req.content = BytesIO(b"") + req.requestReceived( + b"GET", + b"/_matrix/client/v3/account/whoami", + b"1.1", + ) + channel.await_result() + self.assertEqual(channel.code, HTTPStatus.OK, channel.json_body) + self.assertEqual( + channel.json_body["user_id"], OIDC_ADMIN_USERID, channel.json_body + ) + + # Still expect to see one MAU entry, from the first request + mau = self.get_success(store.get_monthly_active_count()) + self.assertEqual(mau, 1) + + conn_infos = self.get_success( + store.get_user_ip_and_agents(UserID.from_string(OIDC_ADMIN_USERID)) + ) + self.assertEqual(conn_infos, []) diff --git a/tests/rest/admin/test_jwks.py b/tests/rest/admin/test_jwks.py index a9a6191c7346..842e92c3d0e5 100644 --- a/tests/rest/admin/test_jwks.py +++ b/tests/rest/admin/test_jwks.py @@ -19,13 +19,7 @@ from synapse.rest.synapse.client import build_synapse_client_resource_tree from tests.unittest import HomeserverTestCase, override_config, skip_unless - -try: - import authlib # noqa: F401 - - HAS_AUTHLIB = True -except ImportError: - HAS_AUTHLIB = False +from tests.utils import HAS_AUTHLIB @skip_unless(HAS_AUTHLIB, "requires authlib") diff --git a/tests/rest/client/test_keys.py b/tests/rest/client/test_keys.py index 9f81a695fa80..a6023dff7abf 100644 --- a/tests/rest/client/test_keys.py +++ b/tests/rest/client/test_keys.py @@ -30,13 +30,7 @@ from tests import unittest from tests.http.server._base import make_request_with_cancellation_test from tests.unittest import override_config - -try: - import authlib # noqa: F401 - - HAS_AUTHLIB = True -except ImportError: - HAS_AUTHLIB = False +from tests.utils import HAS_AUTHLIB class KeyQueryTestCase(unittest.HomeserverTestCase): diff --git a/tests/rest/test_well_known.py b/tests/rest/test_well_known.py index 377243a1706d..7931a70abb29 100644 --- a/tests/rest/test_well_known.py +++ b/tests/rest/test_well_known.py @@ -16,13 +16,7 @@ from synapse.rest.well_known import well_known_resource from tests import unittest - -try: - import authlib # noqa: F401 - - HAS_AUTHLIB = True -except ImportError: - HAS_AUTHLIB = False +from tests.utils import HAS_AUTHLIB class WellKnownTests(unittest.HomeserverTestCase): diff --git a/tests/utils.py b/tests/utils.py index a0c87ad628bf..e0066fe15a7b 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -30,6 +30,13 @@ from synapse.storage.engines import create_engine from synapse.storage.prepare_database import prepare_database +try: + import authlib # noqa: F401 + + HAS_AUTHLIB = True +except ImportError: + HAS_AUTHLIB = False + # set this to True to run the tests against postgres instead of sqlite. # # When running under postgres, we first create a base database with the name