From 0b99662b564f03f05f0a5c1ee3ab6be792e71621 Mon Sep 17 00:00:00 2001 From: Matt Hammerly Date: Fri, 20 Dec 2024 11:42:19 -0500 Subject: [PATCH] disable populating log_context based on commit_sha/commit_id --- helpers/log_context.py | 55 ++++++-------------------- helpers/tests/unit/test_log_context.py | 16 +++----- 2 files changed, 17 insertions(+), 54 deletions(-) diff --git a/helpers/log_context.py b/helpers/log_context.py index b2699db1c..2a04c5586 100644 --- a/helpers/log_context.py +++ b/helpers/log_context.py @@ -6,7 +6,7 @@ from sentry_sdk import get_current_span from shared.config import get_config -from database.models.core import Commit, Owner, Repository +from database.models.core import Owner, Repository log = logging.getLogger("log_context") @@ -28,6 +28,11 @@ class LogContext: repo_name: str | None = None repo_id: int | None = None commit_sha: str | None = None + + # TODO: begin populating this again or remove it + # we can populate this again if we reduce query load by passing the Commit, + # Owner, and Repository models we use to populate the log context into the + # various task implementations so they don't fetch the same models again commit_id: int | None = None checkpoints_data: dict = field(default_factory=lambda: {}) @@ -58,48 +63,13 @@ def populate_from_sqlalchemy(self, dbsession): return try: - can_identify_commit = self.commit_id is not None or ( - self.commit_sha is not None and self.repo_id is not None - ) - - # commit_id or (commit_sha + repo_id) is enough to get everything else - if can_identify_commit: - query = ( - dbsession.query( - Commit.id_, - Commit.commitid, - Repository.repoid, - Repository.name, - Owner.ownerid, - Owner.username, - Owner.service, - Owner.plan, - ) - .join(Commit.repository) - .join(Repository.owner) - ) - - if self.commit_id is not None: - query = query.filter(Commit.id_ == self.commit_id) - else: - query = query.filter( - Commit.commitid == self.commit_sha, - Commit.repoid == self.repo_id, - ) - - ( - self.commit_id, - self.commit_sha, - self.repo_id, - self.repo_name, - self.owner_id, - self.owner_username, - self.owner_service, - self.owner_plan, - ) = query.first() + # - commit_id (or commit_sha + repo_id) is enough to get everything else + # - however, this slams the commit table and we rarely really need the + # DB PK for a commit, so we don't do this. + # - repo_id is enough to get repo and owner + # - owner_id is just enough to get owner - # repo_id is enough to get repo and owner - elif self.repo_id: + if self.repo_id: query = ( dbsession.query( Repository.name, @@ -120,7 +90,6 @@ def populate_from_sqlalchemy(self, dbsession): self.owner_plan, ) = query.first() - # owner_id is just enough for owner elif self.owner_id: query = dbsession.query( Owner.username, Owner.service, Owner.plan diff --git a/helpers/tests/unit/test_log_context.py b/helpers/tests/unit/test_log_context.py index 3a9f94451..27bc58dd6 100644 --- a/helpers/tests/unit/test_log_context.py +++ b/helpers/tests/unit/test_log_context.py @@ -75,18 +75,11 @@ def test_populate_just_commit_sha(dbsession): def test_populate_just_commit_id(dbsession): - owner, repo, commit = create_db_records(dbsession) + _owner, _repo, commit = create_db_records(dbsession) log_context = LogContext(commit_id=commit.id_) log_context.populate_from_sqlalchemy(dbsession) assert log_context == LogContext( - repo_id=repo.repoid, - repo_name="example-python", - owner_id=owner.ownerid, - owner_username="codecove2e", - owner_service="github", - owner_plan="users-basic", - commit_sha=commit.commitid, commit_id=commit.id_, ) @@ -104,7 +97,6 @@ def test_populate_repo_and_commit_sha(dbsession): owner_service="github", owner_plan="users-basic", commit_sha=commit.commitid, - commit_id=commit.id_, ) @@ -135,7 +127,9 @@ async def check_context_in_coroutine(): def test_as_dict(dbsession, mocker): owner, repo, commit = create_db_records(dbsession) - log_context = LogContext(commit_id=commit.id_, task_name="foo", task_id="bar") + log_context = LogContext( + commit_sha=commit.commitid, repo_id=repo.repoid, task_name="foo", task_id="bar" + ) log_context.populate_from_sqlalchemy(dbsession) mock_span = mocker.Mock() @@ -147,7 +141,7 @@ def test_as_dict(dbsession, mocker): assert log_context.as_dict() == { "task_name": "foo", "task_id": "bar", - "commit_id": commit.id_, + "commit_id": None, "commit_sha": commit.commitid, "repo_id": repo.repoid, "repo_name": repo.name,