diff --git a/database/models/reports.py b/database/models/reports.py index 249581e49..9a899a9e1 100644 --- a/database/models/reports.py +++ b/database/models/reports.py @@ -84,7 +84,7 @@ class Upload(CodecovBaseModel, MixinBaseClass): name = Column(types.String(100)) provider = Column(types.String(50)) report_id = Column(types.BigInteger, ForeignKey("reports_commitreport.id")) - report = relationship( + report: CommitReport = relationship( "CommitReport", foreign_keys=[report_id], back_populates="uploads" ) state = Column(types.String(100), nullable=False) diff --git a/rollouts/__init__.py b/rollouts/__init__.py index d8db69c21..3ad0271f5 100644 --- a/rollouts/__init__.py +++ b/rollouts/__init__.py @@ -22,3 +22,11 @@ def repo_slug(repo: Repository) -> str: "gitlab/codecov": "enabled", }, ) + +# Eventually we want all repos to use this +# This flag will just help us with the rollout process +USE_LABEL_INDEX_IN_REPORT_PROCESSING_BY_REPO_SLUG = Feature( + "use_label_index_in_report_processing", + 0.0, + overrides={"github/giovanni-guidini/sentry": "enabled"}, +) diff --git a/services/archive.py b/services/archive.py index e85d94d69..023cb297a 100644 --- a/services/archive.py +++ b/services/archive.py @@ -4,11 +4,12 @@ from datetime import datetime from enum import Enum from hashlib import md5 -from typing import Any +from typing import Dict from uuid import uuid4 from shared.config import get_config from shared.storage.base import BaseStorageService +from shared.storage.exceptions import FileNotInStorageError from shared.utils.ReportEncoder import ReportEncoder from helpers.metrics import metrics @@ -19,6 +20,9 @@ class MinioEndpoints(Enum): chunks = "{version}/repos/{repo_hash}/commits/{commitid}/{chunks_file_name}.txt" + label_index = ( + "{version}/repos/{repo_hash}/commits/{commitid}/{label_index_file_name}.json" + ) json_data = "{version}/repos/{repo_hash}/commits/{commitid}/json_data/{table}/{field}/{external_id}.json" json_data_no_commit = ( "{version}/repos/{repo_hash}/json_data/{table}/{field}/{external_id}.json" @@ -226,6 +230,20 @@ def write_json_data_to_storage( self.write_file(path, stringified_data) return path + def write_label_index(self, commit_sha, json_data, report_code=None) -> str: + label_index_file_name = ( + report_code + "_" if report_code is not None else "" + ) + "labels_index" + path = MinioEndpoints.label_index.get_path( + version="v4", + repo_hash=self.storage_hash, + commitid=commit_sha, + label_index_file_name=label_index_file_name, + ) + string_data = json.dumps(json_data) + self.write_file(path, string_data) + return path + """ Convenience method to write a chunks.txt file to storage. """ @@ -286,6 +304,22 @@ def read_chunks(self, commit_sha, report_code=None) -> str: return self.read_file(path).decode(errors="replace") + def read_label_index(self, commit_sha, report_code=None) -> Dict[str, str]: + label_index_file_name = ( + report_code + "_" if report_code is not None else "" + ) + "labels_index" + path = MinioEndpoints.label_index.get_path( + version="v4", + repo_hash=self.storage_hash, + commitid=commit_sha, + label_index_file_name=label_index_file_name, + ) + + try: + return json.loads(self.read_file(path).decode(errors="replace")) + except FileNotInStorageError: + return dict() + """ Delete a chunk file from the archive """ diff --git a/services/report/__init__.py b/services/report/__init__.py index 5bc59a5c4..405baa031 100644 --- a/services/report/__init__.py +++ b/services/report/__init__.py @@ -786,7 +786,12 @@ def build_report_from_raw_content( try: with metrics.timer(f"{self.metrics_prefix}.process_report") as t: result = process_raw_upload( - self.current_yaml, master, raw_uploaded_report, flags, session + self.current_yaml, + master, + raw_uploaded_report, + flags, + session, + upload=upload, ) report = result.report log.info( diff --git a/services/report/labels_index.py b/services/report/labels_index.py new file mode 100644 index 000000000..8140e3f0d --- /dev/null +++ b/services/report/labels_index.py @@ -0,0 +1,46 @@ +from typing import Dict + +from shared.reports.resources import Report + +from database.models.reports import CommitReport +from services.archive import ArchiveService + + +class LabelsIndexService(object): + _archive_client: ArchiveService + + def __init__(self, commit_report: CommitReport) -> None: + self.commit_report = commit_report + self._archive_client = ArchiveService( + repository=commit_report.commit.repository + ) + self.commit_sha = commit_report.commit.commitid + + def set_label_idx(self, report: Report): + # TODO: Needs shared update. + # if report._labels_index is not None: + # raise Exception( + # "Trying to set labels_index of Report, but it's already set" + # ) + # Load label index from storage + # JSON uses strings are keys, but we are using ints. + map_with_str_keys = self._archive_client.read_label_index( + self.commit_sha, self.commit_report.code + ) + loaded_index = {int(k): v for k, v in map_with_str_keys.items()} + return loaded_index + # TODO: Needs shared update. + # report.set_label_idx(loaded_index) + + def unset_label_idx(self, report: Report, label_index: Dict[str, str]): + # Write the updated index back into storage + # TODO: Needs shared update + # self._archive_client.write_label_index( + # self.commit_sha, report._labels_index, self.commit_report.code + # ) + self._archive_client.write_label_index( + self.commit_sha, label_index, self.commit_report.code + ) + # Remove reference to it + # TODO: Needs shared update + # report.unset_label_idx() diff --git a/services/report/raw_upload_processor.py b/services/report/raw_upload_processor.py index 4ac1ccee6..1a81535be 100644 --- a/services/report/raw_upload_processor.py +++ b/services/report/raw_upload_processor.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- +import json import logging import random import typing @@ -9,10 +10,12 @@ from shared.reports.resources import Report from shared.utils.sessions import Session, SessionType +from database.models.reports import Upload from helpers.exceptions import ReportEmptyError from helpers.labels import get_all_report_labels, get_labels_per_session +from rollouts import USE_LABEL_INDEX_IN_REPORT_PROCESSING_BY_REPO_SLUG, repo_slug from services.path_fixer import PathFixer -from services.report.fixes import get_fixes_from_raw +from services.report.labels_index import LabelsIndexService from services.report.parser.types import ParsedRawReport from services.report.report_builder import ReportBuilder, SpecialLabelsEnum from services.report.report_processor import process_report @@ -48,7 +51,12 @@ class UploadProcessingResult(object): @sentry_sdk.trace def process_raw_upload( - commit_yaml, original_report, reports: ParsedRawReport, flags, session=None + commit_yaml, + original_report, + reports: ParsedRawReport, + flags, + session=None, + upload: Upload = None, ) -> UploadProcessingResult: toc, env = None, None @@ -129,7 +137,11 @@ def process_raw_upload( if not temporary_report: raise ReportEmptyError("No files found in report.") session_manipulation_result = _adjust_sessions( - original_report, temporary_report, session, commit_yaml + original_report, + temporary_report, + to_merge_session=session, + current_yaml=commit_yaml, + upload=upload, ) original_report.merge(temporary_report, joined=joined) session.totals = temporary_report.totals @@ -147,7 +159,16 @@ class SessionAdjustmentResult(object): partially_deleted_sessions: set -def _adjust_sessions(original_report, to_merge_report, to_merge_session, current_yaml): +# RUSTIFYME +@sentry_sdk.trace +def _adjust_sessions( + original_report: Report, + to_merge_report: Report, + to_merge_session, + current_yaml, + *, + upload: Upload = None, +): session_ids_to_fully_delete = [] session_ids_to_partially_delete = [] to_merge_flags = to_merge_session.flags or [] @@ -164,6 +185,24 @@ def _adjust_sessions(original_report, to_merge_report, to_merge_session, current for f in flags_under_carryforward_rules if f not in to_partially_overwrite_flags ] + if upload is None and to_partially_overwrite_flags: + log.warning("Upload is None, but there are partial_overwrite_flags present") + if ( + upload + and USE_LABEL_INDEX_IN_REPORT_PROCESSING_BY_REPO_SLUG.check_value( + repo_slug(upload.report.commit.repository), default=False + ) + and to_partially_overwrite_flags + ): + label_index_service = LabelsIndexService(upload.report) + # TODO: Needs shared upload + # if original_report._labels_index is None: + # label_index_service.set_label_idx(original_report) + # # Make sure that the labels in the reports are in a good state to merge them + # make_sure_orginal_report_is_using_label_ids(original_report) + # make_sure_label_indexes_match(original_report, to_merge_report) + # # After this point we don't need the label index anymore, so we can release it to save memory + # label_index_service.unset_label_idx(original_report) if to_fully_overwrite_flags or to_partially_overwrite_flags: for sess_id, curr_sess in original_report.sessions.items(): if curr_sess.session_type == SessionType.carriedforward: diff --git a/services/report/tests/unit/test_labels_index.py b/services/report/tests/unit/test_labels_index.py new file mode 100644 index 000000000..7bdc3650d --- /dev/null +++ b/services/report/tests/unit/test_labels_index.py @@ -0,0 +1,85 @@ +import pytest +from shared.reports.resources import Report + +from database.tests.factories.core import ReportFactory +from helpers.labels import SpecialLabelsEnum +from services.report.labels_index import ArchiveService, LabelsIndexService + + +class TestLabelsIndex(object): + def test_init(self, dbsession, mocker): + commit_report = ReportFactory() + dbsession.add(commit_report) + dbsession.flush() + + labels_index_service = LabelsIndexService(commit_report) + assert labels_index_service._archive_client is not None + assert labels_index_service.commit_sha == commit_report.commit.commitid + + def test_set_label_idx(self, dbsession, mocker): + commit_report = ReportFactory() + dbsession.add(commit_report) + dbsession.flush() + # Notice that the keys are strings + # because self._archive_client.read_label_index returns the contents of a JSON file, + # and JSON can only have string keys. + sample_label_index = { + "0": SpecialLabelsEnum.CODECOV_ALL_LABELS_PLACEHOLDER.corresponding_label, + "1": "some_label", + "2": "another_label", + } + mocker.patch.object( + ArchiveService, "read_label_index", return_value=sample_label_index + ) + report = Report() + # TODO: Needs shared update + # assert report._labels_index == None + label_service = LabelsIndexService(commit_report) + res = label_service.set_label_idx(report) + assert res == { + 0: SpecialLabelsEnum.CODECOV_ALL_LABELS_PLACEHOLDER.corresponding_label, + 1: "some_label", + 2: "another_label", + } + + # TODO: Needs shared update + # def test_set_label_idx_already_set(self, dbsession, mocker): + # commit_report = ReportFactory() + # dbsession.add(commit_report) + # dbsession.flush() + # mock_read = mocker.patch.object(ArchiveService, "read_label_index") + # sample_label_index = { + # 0: SpecialLabelsEnum.CODECOV_ALL_LABELS_PLACEHOLDER.corresponding_label, + # 1: "some_label", + # 2: "another_label", + # } + # report = Report() + # report._labels_index = sample_label_index + # with pytest.raises(Exception) as exp: + # label_service = LabelsIndexService(commit_report) + # label_service.set_label_idx(report) + # mock_read.assert_not_called() + # assert ( + # str(exp.value) + # == "Trying to set labels_index of Report, but it's already set" + # ) + + def test_unset_label_idx(self, dbsession, mocker): + commit_report = ReportFactory() + dbsession.add(commit_report) + dbsession.flush() + sample_label_index = { + 0: SpecialLabelsEnum.CODECOV_ALL_LABELS_PLACEHOLDER.corresponding_label, + 1: "some_label", + 2: "another_label", + } + mock_write = mocker.patch.object(ArchiveService, "write_label_index") + report = Report() + # TODO: Needs shared update + # report._labels_index = sample_label_index + label_service = LabelsIndexService(commit_report) + label_service.unset_label_idx(report, sample_label_index) + # assert report._labels_index == None + mock_write.assert_called_with( + commit_report.commit.commitid, sample_label_index, commit_report.code + ) diff --git a/services/report/tests/unit/test_sessions.py b/services/report/tests/unit/test_sessions.py index 4f4529c2f..f7179fbbd 100644 --- a/services/report/tests/unit/test_sessions.py +++ b/services/report/tests/unit/test_sessions.py @@ -1,4 +1,5 @@ import pytest +from mock import MagicMock from shared.reports.editable import EditableReport, EditableReportFile from shared.reports.resources import ( LineSession, @@ -11,6 +12,7 @@ from shared.reports.types import CoverageDatapoint from shared.yaml import UserYaml +from database.tests.factories.core import RepositoryFactory from helpers.labels import SpecialLabelsEnum from services.report.raw_upload_processor import ( SessionAdjustmentResult, @@ -415,7 +417,9 @@ def test_adjust_sessions_full_cf_only(self, sample_first_report): }, } - def test_adjust_sessions_partial_cf_only_no_changes(self, sample_first_report): + def test_adjust_sessions_partial_cf_only_no_changes( + self, sample_first_report, mocker + ): first_to_merge_session = Session(flags=["enterprise"], id=3) second_report = Report( sessions={first_to_merge_session.id: first_to_merge_session} @@ -433,12 +437,72 @@ def test_adjust_sessions_partial_cf_only_no_changes(self, sample_first_report): } } ) + mock_label_index_service = mocker.patch( + "services.report.raw_upload_processor.LabelsIndexService" + ) first_value = self.convert_report_to_better_readable(sample_first_report) assert _adjust_sessions( sample_first_report, second_report, first_to_merge_session, current_yaml ) == SessionAdjustmentResult([], [0]) after_result = self.convert_report_to_better_readable(sample_first_report) assert after_result == first_value + mock_label_index_service.assert_not_called() + + def test_adjust_sessions_partial_cf_only_no_changes_encoding_labels( + self, sample_first_report, mocker + ): + first_to_merge_session = Session(flags=["enterprise"], id=3) + second_report = Report( + sessions={first_to_merge_session.id: first_to_merge_session} + ) + current_yaml = UserYaml( + { + "flag_management": { + "individual_flags": [ + { + "name": "enterprise", + "carryforward_mode": "labels", + "carryforward": True, + } + ] + } + } + ) + first_value = self.convert_report_to_better_readable(sample_first_report) + upload = MagicMock( + name="fake_upload", + **{ + "report": MagicMock( + name="fake_commit_report", + **{ + "code": None, + "commit": MagicMock( + name="fake_commit", + **{ + "repository": RepositoryFactory( + name="sentry", + owner__username="giovanni-guidini", + owner__service="github", + ) + } + ), + } + ) + } + ) + mock_label_index_service = mocker.patch( + "services.report.raw_upload_processor.LabelsIndexService" + ) + assert _adjust_sessions( + sample_first_report, + second_report, + first_to_merge_session, + current_yaml, + upload=upload, + ) == SessionAdjustmentResult([], [0]) + after_result = self.convert_report_to_better_readable(sample_first_report) + assert after_result == first_value + assert mock_label_index_service.call_count == 1 def test_adjust_sessions_partial_cf_only_some_changes(self, sample_first_report): first_to_merge_session = Session(flags=["enterprise"], id=3) diff --git a/services/tests/unit/test_archive_service.py b/services/tests/unit/test_archive_service.py index 0d34c7645..8cff7d25a 100644 --- a/services/tests/unit/test_archive_service.py +++ b/services/tests/unit/test_archive_service.py @@ -1,8 +1,10 @@ import json from shared.storage import MinioStorageService +from shared.storage.exceptions import FileNotInStorageError from database.tests.factories import RepositoryFactory +from database.tests.factories.core import CommitFactory from services.archive import ArchiveService from test_utils.base import BaseTestCase @@ -136,3 +138,80 @@ def test_write_report_details_to_storage_no_commitid(self, mocker, dbsession): is_already_gzipped=False, reduced_redundancy=False, ) + + +class TestLabelIndex(object): + def test_write_label_index_to_storage(self, mocker, dbsession): + commit = CommitFactory() + dbsession.add(commit) + dbsession.flush() + mock_write_file = mocker.patch.object(MinioStorageService, "write_file") + archive_service = ArchiveService(repository=commit.repository) + data = {1: "some_label", 2: "another_label", 3: "yet_another_label"} + path_for_default_code = archive_service.write_label_index( + commit.commitid, data, report_code=None + ) + assert ( + path_for_default_code + == f"v4/repos/{archive_service.storage_hash}/commits/{commit.commitid}/labels_index.json" + ) + mock_write_file.assert_called_with( + archive_service.root, + path_for_default_code, + json.dumps(data), + is_already_gzipped=False, + reduced_redundancy=False, + ) + + path_for_different_code = archive_service.write_label_index( + commit.commitid, data, report_code="local" + ) + assert ( + path_for_different_code + == f"v4/repos/{archive_service.storage_hash}/commits/{commit.commitid}/local_labels_index.json" + ) + mock_write_file.assert_called_with( + archive_service.root, + path_for_different_code, + json.dumps(data), + is_already_gzipped=False, + reduced_redundancy=False, + ) + + def test_read_label_index_from_storage(self, mocker, dbsession): + commit = CommitFactory() + dbsession.add(commit) + dbsession.flush() + # Notice that the keys are string. JSON uses strings as keys. + # It's not the responsibility of read_label_index to fix this detail + data = {"1": "some_label", "2": "another_label", "3": "yet_another_label"} + mock_read_file = mocker.patch.object( + ArchiveService, "read_file", return_value=json.dumps(data).encode() + ) + archive_service = ArchiveService(repository=commit.repository) + + assert archive_service.read_label_index(commit.commitid) == data + mock_read_file.assert_called_with( + f"v4/repos/{archive_service.storage_hash}/commits/{commit.commitid}/labels_index.json" + ) + + assert ( + archive_service.read_label_index(commit.commitid, report_code="local") + == data + ) + mock_read_file.assert_called_with( + f"v4/repos/{archive_service.storage_hash}/commits/{commit.commitid}/local_labels_index.json" + ) + + def test_read_label_index_from_storage_file_not_found(self, mocker, dbsession): + commit = CommitFactory() + dbsession.add(commit) + dbsession.flush() + mock_read_file = mocker.patch.object( + ArchiveService, "read_file", side_effect=FileNotInStorageError + ) + archive_service = ArchiveService(repository=commit.repository) + assert archive_service.read_label_index(commit.commitid) == {} + mock_read_file.assert_called_with( + f"v4/repos/{archive_service.storage_hash}/commits/{commit.commitid}/labels_index.json" + )