From 707b8d896253566ddc2da1890bbd9026c3d3c1bd Mon Sep 17 00:00:00 2001 From: Arik Gortsunian Date: Thu, 26 Dec 2024 16:35:00 +0200 Subject: [PATCH] [Core] Added logs for JQ misconfigured mappings (#1265) created a test to simulate wrong mapping and validated for single and multiple data rows # Description What -following a bug ticket https://getport.atlassian.net/browse/PORT-12112, I have created a test to simulate wrong mapping and validated for single and multiple data rows Why - when the customer misconfigures a mapping we do not display the misconfigured fields and thus the customer can't understand the reason for the non working pipe How - I've added a misconfiguration dict to map all of the misconfigured mappings the logs shown prior to this change ``` 2024-12-24 13:00:54.025 | DEBUG | port_ocean.utils.queue_utils:_start_processor_worker:21 - Processing async task 2024-12-24 13:00:54.036 | DEBUG | port_ocean.core.handlers.entity_processor.jq_entity_processor:_parse_items:232 - Finished parsing raw results into entities with 0 errors. errors: [] ``` after the change: ``` 2024-12-24 13:00:54.024 | INFO | port_ocean.core.handlers.entity_processor.jq_entity_processor:_parse_items:221 - Parsing 2 raw results into entities 2024-12-24 13:00:54.025 | DEBUG | port_ocean.utils.queue_utils:_start_processor_worker:21 - Processing async task 2024-12-24 13:00:54.036 | DEBUG | port_ocean.core.handlers.entity_processor.jq_entity_processor:_parse_items:232 - Finished parsing raw results into entities with 0 errors. errors: [] 2024-12-24 13:00:54.036 | INFO | The mapping resulted with invalid values for identifier, blueprint, properties. Mapping result: {'identifier': '.ark', 'foo': '.bazbar', 'desc': '.foobar', 'name': '.bar.baz'} ``` --- CHANGELOG.md | 7 +++ .../entity_processor/jq_entity_processor.py | 54 ++++++++++++++++--- .../core/integrations/mixins/sync_raw.py | 4 +- port_ocean/core/ocean_types.py | 13 ++++- .../test_jq_entity_processor.py | 34 ++++++++++++ pyproject.toml | 2 +- 6 files changed, 102 insertions(+), 12 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0bd446d4f8..4f3e579291 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,13 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## 0.16.1 (2024-12-25) + +### Bug Fixes + +- Added new info log for JQ mapping per batch to notify of misconfigured JQ mappings between a property and the JQ target + + ## 0.16.0 (2024-12-24) diff --git a/port_ocean/core/handlers/entity_processor/jq_entity_processor.py b/port_ocean/core/handlers/entity_processor/jq_entity_processor.py index 3384e32413..408621ebe8 100644 --- a/port_ocean/core/handlers/entity_processor/jq_entity_processor.py +++ b/port_ocean/core/handlers/entity_processor/jq_entity_processor.py @@ -34,6 +34,7 @@ class MappedEntity: entity: dict[str, Any] = field(default_factory=dict) did_entity_pass_selector: bool = False raw_data: Optional[dict[str, Any]] = None + misconfigurations: dict[str, str] = field(default_factory=dict) class JQEntityProcessor(BaseEntityProcessor): @@ -95,21 +96,37 @@ async def _search_as_bool(self, data: dict[str, Any], pattern: str) -> bool: ) async def _search_as_object( - self, data: dict[str, Any], obj: dict[str, Any] + self, + data: dict[str, Any], + obj: dict[str, Any], + misconfigurations: dict[str, str] | None = None, ) -> dict[str, Any | None]: + """ + Identify and extract the relevant value for the chosen key and populate it into the entity + :param data: the property itself that holds the key and the value, it is being passed to the task and we get back a task item, + if the data is a dict, we will recursively call this function again. + :param obj: the key that we want its value to be mapped into our entity. + :param misconfigurations: due to the recursive nature of this function, + we aim to have a dict that represents all of the misconfigured properties and when used recursively, + we pass this reference to misfoncigured object to add the relevant misconfigured keys. + :return: Mapped object with found value. + """ + search_tasks: dict[ str, Task[dict[str, Any | None]] | list[Task[dict[str, Any | None]]] ] = {} for key, value in obj.items(): if isinstance(value, list): search_tasks[key] = [ - asyncio.create_task(self._search_as_object(data, obj)) + asyncio.create_task( + self._search_as_object(data, obj, misconfigurations) + ) for obj in value ] elif isinstance(value, dict): search_tasks[key] = asyncio.create_task( - self._search_as_object(data, value) + self._search_as_object(data, value, misconfigurations) ) else: search_tasks[key] = asyncio.create_task(self._search(data, value)) @@ -118,12 +135,20 @@ async def _search_as_object( for key, task in search_tasks.items(): try: if isinstance(task, list): - result[key] = [await task for task in task] + result_list = [] + for task in task: + task_result = await task + if task_result is None and misconfigurations is not None: + misconfigurations[key] = obj[key] + result_list.append(task_result) + result[key] = result_list else: - result[key] = await task + task_result = await task + if task_result is None and misconfigurations is not None: + misconfigurations[key] = obj[key] + result[key] = task_result except Exception: result[key] = None - return result async def _get_mapped_entity( @@ -135,11 +160,15 @@ async def _get_mapped_entity( ) -> MappedEntity: should_run = await self._search_as_bool(data, selector_query) if parse_all or should_run: - mapped_entity = await self._search_as_object(data, raw_entity_mappings) + misconfigurations: dict[str, str] = {} + mapped_entity = await self._search_as_object( + data, raw_entity_mappings, misconfigurations + ) return MappedEntity( mapped_entity, did_entity_pass_selector=should_run, raw_data=data if should_run else None, + misconfigurations=misconfigurations, ) return MappedEntity() @@ -221,7 +250,11 @@ async def _parse_items( passed_entities = [] failed_entities = [] examples_to_send: list[dict[str, Any]] = [] + entity_misconfigurations: dict[str, str] = {} + missing_required_fields: bool = False for result in calculated_entities_results: + if len(result.misconfigurations) > 0: + entity_misconfigurations |= result.misconfigurations if result.entity.get("identifier") and result.entity.get("blueprint"): parsed_entity = Entity.parse_obj(result.entity) if result.did_entity_pass_selector: @@ -233,6 +266,12 @@ async def _parse_items( examples_to_send.append(result.raw_data) else: failed_entities.append(parsed_entity) + else: + missing_required_fields = True + if len(entity_misconfigurations) > 0: + logger.info( + f"The mapping resulted with invalid values for{" identifier, blueprint," if missing_required_fields else " "} properties. Mapping result: {entity_misconfigurations}" + ) if ( not calculated_entities_results and raw_results @@ -248,4 +287,5 @@ async def _parse_items( return CalculationResult( EntitySelectorDiff(passed=passed_entities, failed=failed_entities), errors, + misonfigured_entity_keys=entity_misconfigurations, ) diff --git a/port_ocean/core/integrations/mixins/sync_raw.py b/port_ocean/core/integrations/mixins/sync_raw.py index ecd94794f2..f46187a56b 100644 --- a/port_ocean/core/integrations/mixins/sync_raw.py +++ b/port_ocean/core/integrations/mixins/sync_raw.py @@ -185,7 +185,7 @@ async def _register_in_batches( send_raw_data_examples_amount = ( SEND_RAW_DATA_EXAMPLES_AMOUNT if ocean.config.send_raw_data_examples else 0 ) - all_entities, register_errors = await self._register_resource_raw( + all_entities, register_errors,_ = await self._register_resource_raw( resource_config, raw_results, user_agent_type, @@ -202,7 +202,7 @@ async def _register_in_batches( 0, send_raw_data_examples_amount - len(passed_entities) ) - entities, register_errors = await self._register_resource_raw( + entities, register_errors,_ = await self._register_resource_raw( resource_config, items, user_agent_type, diff --git a/port_ocean/core/ocean_types.py b/port_ocean/core/ocean_types.py index 493b58093c..c570f910b9 100644 --- a/port_ocean/core/ocean_types.py +++ b/port_ocean/core/ocean_types.py @@ -1,5 +1,13 @@ -from typing import TypedDict, Any, AsyncIterator, Callable, Awaitable, NamedTuple - +from typing import ( + TypedDict, + Any, + AsyncIterator, + Callable, + Awaitable, + NamedTuple, +) + +from dataclasses import field from port_ocean.core.models import Entity RAW_ITEM = dict[Any, Any] @@ -30,6 +38,7 @@ class EntitySelectorDiff(NamedTuple): class CalculationResult(NamedTuple): entity_selector_diff: EntitySelectorDiff errors: list[Exception] + misonfigured_entity_keys: dict[str, str] = field(default_factory=dict) class IntegrationEventsCallbacks(TypedDict): diff --git a/port_ocean/tests/core/handlers/entity_processor/test_jq_entity_processor.py b/port_ocean/tests/core/handlers/entity_processor/test_jq_entity_processor.py index 5b19303de8..1e24c8d7c0 100644 --- a/port_ocean/tests/core/handlers/entity_processor/test_jq_entity_processor.py +++ b/port_ocean/tests/core/handlers/entity_processor/test_jq_entity_processor.py @@ -269,3 +269,37 @@ async def test_parse_items_performance_10000( assert len(result.entity_selector_diff.passed) == 1 assert result.entity_selector_diff.passed[0].properties.get("foo") == "bar" assert not result.errors + + async def test_parse_items_wrong_mapping( + self, mocked_processor: JQEntityProcessor + ) -> None: + mapping = Mock() + mapping.port.entity.mappings.dict.return_value = { + "title": ".foo", + "identifier": ".ark", + "blueprint": ".baz", + "properties": { + "description": ".bazbar", + "url": ".foobar", + "defaultBranch": ".bar.baz", + }, + } + mapping.port.items_to_parse = None + mapping.selector.query = "true" + raw_results = [ + { + "foo": "bar", + "baz": "bazbar", + "bar": {"foobar": "barfoo", "baz": "barbaz"}, + }, + {"foo": "bar", "baz": "bazbar", "bar": {"foobar": "foobar"}}, + ] + result = await mocked_processor._parse_items(mapping, raw_results) + assert len(result.misonfigured_entity_keys) > 0 + assert len(result.misonfigured_entity_keys) == 4 + assert result.misonfigured_entity_keys == { + "identifier": ".ark", + "description": ".bazbar", + "url": ".foobar", + "defaultBranch": ".bar.baz", + } diff --git a/pyproject.toml b/pyproject.toml index 26005a99c5..f6c02bd788 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "port-ocean" -version = "0.16.0" +version = "0.16.1" description = "Port Ocean is a CLI tool for managing your Port projects." readme = "README.md" homepage = "https://app.getport.io"