From 0694fe5572e61d3a824ba4861f66536b1eaa7b8d Mon Sep 17 00:00:00 2001 From: Innokentii Konstantinov Date: Wed, 18 Dec 2024 12:11:21 +0800 Subject: [PATCH 1/6] Fix dynamic label template validation (#5363) 1. Fix https://github.com/grafana/irm/issues/530 - applied same validation logic as for multi-label extraction template to dynamic label. Actual fix is here - https://github.com/grafana/oncall/pull/5363/files#diff-58657df0f1ff9a8578a14504f1c6cfd240e45e084171c5bbeb09d975c3ec72ddR74 2. Some minor refactorings over static/dynamic/integration label naming. This work should be continued in separate PR. --- .../alerts/models/alert_receive_channel.py | 17 +- .../api/serializers/alert_receive_channel.py | 5 +- .../api/tests/test_alert_receive_channel.py | 18 +- engine/apps/labels/alert_group_labels.py | 161 +++++++++--------- engine/apps/labels/tests/test_alert_group.py | 117 +++++++++---- engine/apps/labels/tests/test_labels.py | 6 +- engine/apps/labels/tests/test_labels_cache.py | 14 +- engine/conftest.py | 2 +- 8 files changed, 199 insertions(+), 141 deletions(-) diff --git a/engine/apps/alerts/models/alert_receive_channel.py b/engine/apps/alerts/models/alert_receive_channel.py index f4661a3a10..74fc5d237a 100644 --- a/engine/apps/alerts/models/alert_receive_channel.py +++ b/engine/apps/alerts/models/alert_receive_channel.py @@ -301,16 +301,21 @@ class AlertReceiveChannel(IntegrationOptionsMixin, MaintainableObject): rate_limited_in_slack_at = models.DateTimeField(null=True, default=None) rate_limit_message_task_id = models.CharField(max_length=100, null=True, default=None) - AlertGroupCustomLabelsDB = list[tuple[str, str | None, str | None]] | None - alert_group_labels_custom: AlertGroupCustomLabelsDB = models.JSONField(null=True, default=None) + DynamicLabelsEntryDB = tuple[str, str | None, str | None] + DynamicLabelsConfigDB = list[DynamicLabelsEntryDB] | None + alert_group_labels_custom: DynamicLabelsConfigDB = models.JSONField(null=True, default=None) """ - Stores "custom labels" for alert group labels. Custom labels can be either "plain" or "templated". - For plain labels, the format is: [, , None] - For templated labels, the format is: [, None, ] + alert_group_labels_custom stores config of dynamic labels. It's stored as a list of tuples. + Format of tuple is: [, None, ]. + The second element is deprecated, so it's always None. It was used for static labels. + // TODO: refactor to use just regular DB fields for dynamic label config. """ alert_group_labels_template: str | None = models.TextField(null=True, default=None) - """Stores a Jinja2 template for "advanced label templating" for alert group labels.""" + """ + alert_group_labels_template is a Jinja2 template for "multi-label extraction template". + It extracts multiple labels from incoming alert payload. + """ additional_settings: dict | None = models.JSONField(null=True, default=None) diff --git a/engine/apps/api/serializers/alert_receive_channel.py b/engine/apps/api/serializers/alert_receive_channel.py index 9065ca6801..41c0e979ef 100644 --- a/engine/apps/api/serializers/alert_receive_channel.py +++ b/engine/apps/api/serializers/alert_receive_channel.py @@ -33,6 +33,7 @@ def _additional_settings_serializer_from_type(integration_type: str) -> serializ return cls +# TODO: refactor this types as w no longer support storing static labels in this field. # AlertGroupCustomLabelValue represents custom alert group label value for API requests # It handles two types of label's value: # 1. Just Label Value from a label repo for a static label @@ -206,7 +207,7 @@ def to_representation(cls, instance: AlertReceiveChannel) -> IntegrationAlertGro @staticmethod def _custom_labels_to_internal_value( custom_labels: AlertGroupCustomLabelsAPI, - ) -> AlertReceiveChannel.AlertGroupCustomLabelsDB: + ) -> AlertReceiveChannel.DynamicLabelsConfigDB: """Convert custom labels from API representation to the schema used by the JSONField on the model.""" return [ @@ -216,7 +217,7 @@ def _custom_labels_to_internal_value( @staticmethod def _custom_labels_to_representation( - custom_labels: AlertReceiveChannel.AlertGroupCustomLabelsDB, + custom_labels: AlertReceiveChannel.DynamicLabelsConfigDB, ) -> AlertGroupCustomLabelsAPI: """ Inverse of the _custom_labels_to_internal_value method above. diff --git a/engine/apps/api/tests/test_alert_receive_channel.py b/engine/apps/api/tests/test_alert_receive_channel.py index 460c335e26..fabf319660 100644 --- a/engine/apps/api/tests/test_alert_receive_channel.py +++ b/engine/apps/api/tests/test_alert_receive_channel.py @@ -1486,14 +1486,14 @@ def test_alert_receive_channel_contact_points_wrong_integration( def test_integration_filter_by_labels( make_organization_and_user_with_plugin_token, make_alert_receive_channel, - make_integration_label_association, + make_static_label_config, make_user_auth_headers, ): organization, user, token = make_organization_and_user_with_plugin_token() alert_receive_channel_1 = make_alert_receive_channel(organization) alert_receive_channel_2 = make_alert_receive_channel(organization) - associated_label_1 = make_integration_label_association(organization, alert_receive_channel_1) - associated_label_2 = make_integration_label_association(organization, alert_receive_channel_1) + associated_label_1 = make_static_label_config(organization, alert_receive_channel_1) + associated_label_2 = make_static_label_config(organization, alert_receive_channel_1) alert_receive_channel_2.labels.create( key=associated_label_1.key, value=associated_label_1.value, organization=organization ) @@ -1659,7 +1659,7 @@ def test_alert_group_labels_get( make_organization_and_user_with_plugin_token, make_alert_receive_channel, make_label_key_and_value, - make_integration_label_association, + make_static_label_config, make_user_auth_headers, ): organization, user, token = make_organization_and_user_with_plugin_token() @@ -1674,7 +1674,7 @@ def test_alert_group_labels_get( assert response.status_code == status.HTTP_200_OK assert response.json()["alert_group_labels"] == {"inheritable": {}, "custom": [], "template": None} - label = make_integration_label_association(organization, alert_receive_channel) + label = make_static_label_config(organization, alert_receive_channel) template = "{{ payload.labels | tojson }}" alert_receive_channel.alert_group_labels_template = template @@ -1707,14 +1707,14 @@ def test_alert_group_labels_get( def test_alert_group_labels_put( make_organization_and_user_with_plugin_token, make_alert_receive_channel, - make_integration_label_association, + make_static_label_config, make_user_auth_headers, ): organization, user, token = make_organization_and_user_with_plugin_token() alert_receive_channel = make_alert_receive_channel(organization) - label_1 = make_integration_label_association(organization, alert_receive_channel) - label_2 = make_integration_label_association(organization, alert_receive_channel) - label_3 = make_integration_label_association(organization, alert_receive_channel) + label_1 = make_static_label_config(organization, alert_receive_channel) + label_2 = make_static_label_config(organization, alert_receive_channel) + label_3 = make_static_label_config(organization, alert_receive_channel) custom = [ # plain label diff --git a/engine/apps/labels/alert_group_labels.py b/engine/apps/labels/alert_group_labels.py index 60bfad7c18..70dafeada3 100644 --- a/engine/apps/labels/alert_group_labels.py +++ b/engine/apps/labels/alert_group_labels.py @@ -10,10 +10,8 @@ if typing.TYPE_CHECKING: from apps.alerts.models import Alert, AlertGroup, AlertReceiveChannel - logger = logging.getLogger(__name__) - # What can be used as a label key/value coming out from the template LABEL_VALUE_TYPES = (str, int, float, bool) @@ -27,16 +25,14 @@ def gather_labels_from_alert_receive_channel_and_raw_request_data( if not is_labels_feature_enabled(alert_receive_channel.organization): return None - # inherit labels from the integration + # apply static labels by inheriting labels from the integration labels = { label.key.name: label.value.name for label in alert_receive_channel.labels.all().select_related("key", "value") } - # apply custom labels - labels.update(_custom_labels(alert_receive_channel, raw_request_data)) + labels.update(_apply_dynamic_labels(alert_receive_channel, raw_request_data)) - # apply template labels - labels.update(_template_labels(alert_receive_channel, raw_request_data)) + labels.update(_apply_multi_label_extraction_template(alert_receive_channel, raw_request_data)) return labels @@ -75,10 +71,10 @@ def assign_labels( AlertGroupAssociatedLabel.objects.bulk_create(alert_group_labels) -def _custom_labels( +def _apply_dynamic_labels( alert_receive_channel: "AlertReceiveChannel", raw_request_data: "Alert.RawRequestData" ) -> types.AlertLabels: - from apps.labels.models import MAX_VALUE_NAME_LENGTH, LabelKeyCache, LabelValueCache + from apps.labels.models import LabelKeyCache if alert_receive_channel.alert_group_labels_custom is None: return {} @@ -91,91 +87,79 @@ def _custom_labels( ).only("id", "name") } - # fetch up-to-date label value names - label_value_names = { - v.id: v.name - for v in LabelValueCache.objects.filter( - id__in=[label[1] for label in alert_receive_channel.alert_group_labels_custom if label[1]] - ).only("id", "name") - } - - rendered_labels = {} + result_labels = {} for label in alert_receive_channel.alert_group_labels_custom: - key_id, value_id, template = label - - if key_id in label_key_names: - key = label_key_names[key_id] - else: - logger.warning("Label key cache not found. %s", key_id) - continue - - if value_id: - if value_id in label_value_names: - rendered_labels[key] = label_value_names[value_id] - else: - logger.warning("Label value cache not found. %s", value_id) - continue - else: - try: - rendered_labels[key] = apply_jinja_template(template, raw_request_data) - except (JinjaTemplateError, JinjaTemplateWarning) as e: - logger.warning("Failed to apply template. %s", e.fallback_message) - continue - - labels = {} - for key in rendered_labels: - value = rendered_labels[key] - - # check value length - if len(value) == 0: - logger.warning("Template result value is empty. %s", value) - continue + label = _apply_dynamic_label_entry(label, label_key_names, raw_request_data) + if label: + key, value = label + result_labels[key] = value - if len(value) > MAX_VALUE_NAME_LENGTH: - logger.warning("Template result value is too long. %s", value) - continue + return result_labels - labels[key] = value - return labels +def _apply_dynamic_label_entry( + label: "AlertReceiveChannel.DynamicLabelsEntryDB", keys: dict, payload: "Alert.RawRequestData" +) -> typing.Optional[tuple[str, str]]: + key_id, value_id, template = label + key, value = "", "" + # check if key exists + if key_id in keys: + key = keys[key_id] + else: + logger.warning("Label key cache not found. %s", key_id) + return None -def _template_labels( + if value_id: + # if value_id is present - it's a static k-v pair. Deprecated. + logger.warning( + "value_id is present in dynamic label entry. It's deprecated & should not be there. %s", value_id + ) + elif template: + # otherwise, it's a key-template pair, applying template + try: + value = apply_jinja_template(template, payload) + except (JinjaTemplateError, JinjaTemplateWarning) as e: + logger.warning("Failed to apply template. %s", e.fallback_message) + return None + if not _validate_templated_value(value): + return None + else: + logger.warning("Label value is neither a value_id, nor a template. %s", key) + return key, value + + +def _apply_multi_label_extraction_template( alert_receive_channel: "AlertReceiveChannel", raw_request_data: "Alert.RawRequestData" ) -> types.AlertLabels: - from apps.labels.models import MAX_KEY_NAME_LENGTH, MAX_VALUE_NAME_LENGTH + from apps.labels.models import MAX_KEY_NAME_LENGTH if not alert_receive_channel.alert_group_labels_template: return {} + # render template - output will be a string. + # It's expected that it will be a JSON string, to be parsed into a dict. try: - rendered = apply_jinja_template(alert_receive_channel.alert_group_labels_template, raw_request_data) + rendered_labels = apply_jinja_template(alert_receive_channel.alert_group_labels_template, raw_request_data) except (JinjaTemplateError, JinjaTemplateWarning) as e: logger.warning("Failed to apply template. %s", e.fallback_message) return {} + # unmarshal rendered_labels JSON string to dict try: - rendered_labels = json.loads(rendered) + labels_dict = json.loads(rendered_labels) except (TypeError, json.JSONDecodeError): - logger.warning("Failed to parse template result. %s", rendered) + # it's expected, if user misconfigured the template + logger.warning("Failed to parse template result. %s", rendered_labels) return {} - if not isinstance(rendered_labels, dict): - logger.warning("Template result is not a dict. %s", rendered_labels) + if not isinstance(labels_dict, dict): + logger.warning("Template result is not a dict. %s", labels_dict) return {} - labels = {} - for key in rendered_labels: - value = rendered_labels[key] - - # check value type - if not isinstance(value, LABEL_VALUE_TYPES): - logger.warning("Template result value has invalid type. %s", value) - continue - - # convert value to string - value = str(value) - + # validate dict of labels, drop invalid keys & values, convert all values to strings + result_labels = {} + for key in labels_dict: # check key length if len(key) == 0: logger.warning("Template result key is empty. %s", key) @@ -185,15 +169,36 @@ def _template_labels( logger.warning("Template result key is too long. %s", key) continue - # check value length - if len(value) == 0: - logger.warning("Template result value is empty. %s", value) + # Checks specific to multi-label extraction template, because we're receiving value from a JSON: + # 1. check type + # 2. convert back to string + if not isinstance(labels_dict[key], LABEL_VALUE_TYPES): + logger.warning("Templated value has invalid type. %s", labels_dict[key]) continue + value = str(labels_dict[key]) - if len(value) > MAX_VALUE_NAME_LENGTH: - logger.warning("Template result value is too long. %s", value) + # apply common value checks + if not _validate_templated_value(value): continue - labels[key] = value + result_labels[key] = labels_dict[key] - return labels + return result_labels + + +def _validate_templated_value(value: str) -> bool: + from apps.labels.models import MAX_VALUE_NAME_LENGTH + + # check value length + if len(value) == 0: + logger.warning("Templated value value is empty. %s", value) + return False + + if len(value) > MAX_VALUE_NAME_LENGTH: + logger.warning("Templated value is too long. %s", value) + return False + + if value.lower().strip() == "none": + logger.warning("Templated value is None. %s", value) + return False + return True diff --git a/engine/apps/labels/tests/test_alert_group.py b/engine/apps/labels/tests/test_alert_group.py index ff94fb560f..5e8d4b13a8 100644 --- a/engine/apps/labels/tests/test_alert_group.py +++ b/engine/apps/labels/tests/test_alert_group.py @@ -3,7 +3,7 @@ import pytest from apps.alerts.models import Alert -from apps.labels.models import MAX_KEY_NAME_LENGTH, MAX_VALUE_NAME_LENGTH +from apps.labels.models import MAX_KEY_NAME_LENGTH, MAX_VALUE_NAME_LENGTH, LabelKeyCache, LabelValueCache TOO_LONG_KEY_NAME = "k" * (MAX_KEY_NAME_LENGTH + 1) TOO_LONG_VALUE_NAME = "v" * (MAX_VALUE_NAME_LENGTH + 1) @@ -12,11 +12,11 @@ @mock.patch("apps.labels.alert_group_labels.is_labels_feature_enabled", return_value=False) @pytest.mark.django_db def test_assign_labels_feature_flag_disabled( - _, make_organization, make_alert_receive_channel, make_integration_label_association + _, make_organization, make_alert_receive_channel, make_static_label_config ): organization = make_organization() alert_receive_channel = make_alert_receive_channel(organization) - make_integration_label_association(organization, alert_receive_channel) + make_static_label_config(organization, alert_receive_channel) alert = Alert.create( title="the title", @@ -32,36 +32,20 @@ def test_assign_labels_feature_flag_disabled( @pytest.mark.django_db -def test_assign_labels( +def test_multi_label_extraction_template( make_organization, make_alert_receive_channel, make_label_key_and_value, make_label_key, - make_integration_label_association, + make_static_label_config, ): organization = make_organization() - # create label repo labels - label_key, label_value = make_label_key_and_value(organization, key_name="a", value_name="b") - label_key_1 = make_label_key(organization=organization, key_name="c") - label_key_2 = make_label_key(organization=organization) - label_key_3 = make_label_key(organization=organization) - label_key_4 = make_label_key(organization=organization) - # create alert receive channel with all 3 types of labels alert_receive_channel = make_alert_receive_channel( organization, - alert_group_labels_custom=[ - [label_key.id, label_value.id, None], # plain label - ["nonexistent", label_value.id, None], # plain label with nonexistent key ID - [label_key_2.id, "nonexistent", None], # plain label with nonexistent value ID - [label_key_1.id, None, "{{ payload.c }}"], # templated label - [label_key_3.id, None, TOO_LONG_VALUE_NAME], # templated label too long - [label_key_4.id, None, "{{ payload.nonexistent }}"], # templated label with nonexistent key - ], alert_group_labels_template="{{ payload.advanced_template | tojson }}", ) - make_integration_label_association(organization, alert_receive_channel, key_name="e", value_name="f") # create alert group alert = Alert.create( @@ -69,9 +53,9 @@ def test_assign_labels( message="the message", alert_receive_channel=alert_receive_channel, raw_request_data={ - "c": "d", "advanced_template": { - "g": 123, + "cluster_id": 123, + "severity": "critical", "too_long": TOO_LONG_VALUE_NAME, TOO_LONG_KEY_NAME: "too_long", "invalid_type": {"test": "test"}, @@ -85,22 +69,87 @@ def test_assign_labels( # check alert group labels are assigned correctly, in the lexicographical order assert [(label.key_name, label.value_name) for label in alert.group.labels.all()] == [ - ("a", "b"), - ("c", "d"), - ("e", "f"), - ("g", "123"), + ("cluster_id", "123"), + ("severity", "critical"), ] @pytest.mark.django_db -def test_assign_labels_custom_labels_none( +def test_assign_dynamic_labels( make_organization, make_alert_receive_channel, - make_integration_label_association, + make_label_key_and_value, + make_label_key, + make_label_value, +): + organization = make_organization() + + # create label repo labels + label_key_severity = make_label_key(organization=organization, key_name="severity") + label_key_service = make_label_key(organization=organization, key_name="service") + # add values for severity key + _ = make_label_value(label_key_severity, value_name="critical") + + # set-up some keys to test invalid templates + label_key_cluster = make_label_key(organization=organization, key_name="cluster") + label_key_region = make_label_key(organization=organization, key_name="region") + label_key_team = make_label_key(organization=organization, key_name="team") + + # create alert receive channel with all 3 types of labels + alert_receive_channel = make_alert_receive_channel( + organization, + alert_group_labels_custom=[ + # valid templated label, parsed value present in label repo. Expected to be attached to group, + [label_key_severity.id, None, "{{ payload.severity }}"], + # valid templated label, parsed value NOT present in label repo Expected to be attached anyway. + [label_key_service.id, None, "{{ payload.service }}"], + # templated label too long. Expected to be ignored + [label_key_cluster.id, None, TOO_LONG_VALUE_NAME], + # templated label with jinja template pointing to nonexistent attribute in alert payload, Expected to be ignored + [label_key_region.id, None, "{{ payload.nonexistent }}"], + # templated label explicitly set to None. Expected to be ignored + [label_key_team.id, None, "{{ payload.nonexistent or None }}"], + # templated label with nonexistent key ID. Expected to be ignored + ["nonexistent", None, "{{ payload.severity }}"], + ], + ) + # create alert group + alert = Alert.create( + title="the title", + message="the message", + alert_receive_channel=alert_receive_channel, + raw_request_data={ + "severity": "critical", + "service": "oncall", + "extra": "hi", + }, + integration_unique_data={}, + image_url=None, + link_to_upstream_details=None, + ) + + assert [(label.key_name, label.value_name) for label in alert.group.labels.all()] == [ + ("service", "oncall"), + ("severity", "critical"), + ] + + +@pytest.mark.django_db +def test_assign_static_labels( + make_organization, + make_alert_receive_channel, + make_static_label_config, ): organization = make_organization() alert_receive_channel = make_alert_receive_channel(organization, alert_group_labels_custom=None) - make_integration_label_association(organization, alert_receive_channel, key_name="a", value_name="b") + # Configure a static label - expected to be attached to group. + make_static_label_config(organization, alert_receive_channel, key_name="severity", value_name="critical") + + # Configure a static label & delete key and value caches. Expected to be ignored. + make_static_label_config(organization, alert_receive_channel, key_name="service", value_name="oncall") + key = LabelKeyCache.objects.get(name="service") + LabelValueCache.objects.filter(name="oncall", key=key).delete() + key.delete() alert = Alert.create( title="the title", @@ -112,22 +161,22 @@ def test_assign_labels_custom_labels_none( link_to_upstream_details=None, ) - assert [(label.key_name, label.value_name) for label in alert.group.labels.all()] == [("a", "b")] + assert [(label.key_name, label.value_name) for label in alert.group.labels.all()] == [("severity", "critical")] @pytest.mark.django_db def test_assign_labels_too_many( - make_organization, make_alert_receive_channel, make_integration_label_association, make_label_key_and_value + make_organization, make_alert_receive_channel, make_static_label_config, make_label_key_and_value ): organization = make_organization() label_key, label_value = make_label_key_and_value(organization, key_name="a", value_name="test") alert_receive_channel = make_alert_receive_channel( organization, - alert_group_labels_custom=[[label_key.id, label_value.id, None]], alert_group_labels_template='{{ {"b": payload.b} | tojson }}', ) - make_integration_label_association(organization, alert_receive_channel, key_name="c", value_name="test") + make_static_label_config(organization, alert_receive_channel, key_name="a", value_name="test") + make_static_label_config(organization, alert_receive_channel, key_name="c", value_name="test") with mock.patch("apps.labels.alert_group_labels.MAX_LABELS_PER_ALERT_GROUP", 2): alert = Alert.create( diff --git a/engine/apps/labels/tests/test_labels.py b/engine/apps/labels/tests/test_labels.py index 594d0e9b48..8958612ef5 100644 --- a/engine/apps/labels/tests/test_labels.py +++ b/engine/apps/labels/tests/test_labels.py @@ -92,12 +92,12 @@ def test_label_associate_existing_label(make_label_key_and_value, make_organizat @pytest.mark.django_db def test_label_update_association_by_removing_label( - make_integration_label_association, make_organization, make_alert_receive_channel + make_static_label_config, make_organization, make_alert_receive_channel ): organization = make_organization() alert_receive_channel = make_alert_receive_channel(organization) - label_association_1 = make_integration_label_association(organization, alert_receive_channel) - label_association_2 = make_integration_label_association(organization, alert_receive_channel) + label_association_1 = make_static_label_config(organization, alert_receive_channel) + label_association_2 = make_static_label_config(organization, alert_receive_channel) labels_data = [ { "key": {"id": label_association_1.key_id, "name": label_association_1.key.name, "prescribed": False}, diff --git a/engine/apps/labels/tests/test_labels_cache.py b/engine/apps/labels/tests/test_labels_cache.py index c22516b190..870c76b96b 100644 --- a/engine/apps/labels/tests/test_labels_cache.py +++ b/engine/apps/labels/tests/test_labels_cache.py @@ -89,11 +89,11 @@ def test_update_labels_cache(make_organization, make_label_key_and_value, make_l @pytest.mark.django_db def test_update_instances_labels_cache_recently_synced( - make_organization, make_alert_receive_channel, make_integration_label_association + make_organization, make_alert_receive_channel, make_static_label_config ): organization = make_organization() alert_receive_channel = make_alert_receive_channel(organization) - label_association = make_integration_label_association(organization, alert_receive_channel) + label_association = make_static_label_config(organization, alert_receive_channel) assert not label_association.key.is_outdated assert not label_association.value.is_outdated @@ -109,11 +109,11 @@ def test_update_instances_labels_cache_recently_synced( @pytest.mark.django_db def test_update_instances_labels_cache_outdated( - make_organization, make_alert_receive_channel, make_integration_label_association + make_organization, make_alert_receive_channel, make_static_label_config ): organization = make_organization() alert_receive_channel = make_alert_receive_channel(organization) - label_association = make_integration_label_association(organization, alert_receive_channel) + label_association = make_static_label_config(organization, alert_receive_channel) outdated_last_synced = timezone.now() - timezone.timedelta(minutes=LABEL_OUTDATED_TIMEOUT_MINUTES + 1) LabelKeyCache.objects.filter(id=label_association.key_id).update(last_synced=outdated_last_synced) @@ -140,12 +140,10 @@ def test_update_instances_labels_cache_outdated( @pytest.mark.django_db -def test_update_instances_labels_cache_error( - make_organization, make_alert_receive_channel, make_integration_label_association -): +def test_update_instances_labels_cache_error(make_organization, make_alert_receive_channel, make_static_label_config): organization = make_organization() alert_receive_channel = make_alert_receive_channel(organization) - label_association = make_integration_label_association(organization, alert_receive_channel) + label_association = make_static_label_config(organization, alert_receive_channel) outdated_last_synced = timezone.now() - timezone.timedelta(minutes=LABEL_OUTDATED_TIMEOUT_MINUTES + 1) LabelKeyCache.objects.filter(id=label_association.key_id).update(last_synced=outdated_last_synced) diff --git a/engine/conftest.py b/engine/conftest.py index f749c92a88..c7e7475cf4 100644 --- a/engine/conftest.py +++ b/engine/conftest.py @@ -1088,7 +1088,7 @@ def _make_label_key_and_value(organization, key_id=None, key_name=None, value_id @pytest.fixture -def make_integration_label_association(make_label_key_and_value): +def make_static_label_config(make_label_key_and_value): def _make_integration_label_association( organization, alert_receive_channel, key_id=None, key_name=None, value_id=None, value_name=None, **kwargs ): From c36761e345bd5351b4f1b224e47f4384e073f87b Mon Sep 17 00:00:00 2001 From: Vadim Stepanov Date: Wed, 18 Dec 2024 16:35:44 +0000 Subject: [PATCH 2/6] Inbound email: download from S3 + convert HTML to plaintext (#5348) # What this PR does * Make `AmazonSESValidatedInboundWebhookView` able to download emails from S3 by providing AWS credentials via env variables * Convert HTML to plaintext when there's only `text/html` available ## Which issue(s) this PR closes Related to https://github.com/grafana/oncall-private/issues/2905 ## Checklist - [x] Unit, integration, and e2e (if applicable) tests updated - [x] Documentation added (or `pr:no public docs` PR label added if not required) - [x] Added the relevant release notes label (see labels prefixed w/ `release:`). These labels dictate how your PR will show up in the autogenerated release notes. --- engine/apps/email/inbound.py | 129 ++++++++++++++-- engine/apps/email/tests/test_inbound_email.py | 144 +++++++++++++++--- engine/settings/base.py | 3 + 3 files changed, 245 insertions(+), 31 deletions(-) diff --git a/engine/apps/email/inbound.py b/engine/apps/email/inbound.py index 6456d9fb27..e3863f49c5 100644 --- a/engine/apps/email/inbound.py +++ b/engine/apps/email/inbound.py @@ -7,6 +7,8 @@ from anymail.inbound import AnymailInboundMessage from anymail.signals import AnymailInboundEvent from anymail.webhooks import amazon_ses, mailgun, mailjet, mandrill, postal, postmark, sendgrid, sparkpost +from bs4 import BeautifulSoup +from django.conf import settings from django.http import HttpResponse, HttpResponseNotAllowed from django.utils import timezone from rest_framework import status @@ -25,6 +27,15 @@ class AmazonSESValidatedInboundWebhookView(amazon_ses.AmazonSESInboundWebhookVie # disable "Your Anymail webhooks are insecure and open to anyone on the web." warning warn_if_no_basic_auth = False + def __init__(self): + super().__init__( + session_params={ + "aws_access_key_id": settings.INBOUND_EMAIL_AWS_ACCESS_KEY_ID, + "aws_secret_access_key": settings.INBOUND_EMAIL_AWS_SECRET_ACCESS_KEY, + "region_name": settings.INBOUND_EMAIL_AWS_REGION, + }, + ) + def validate_request(self, request): """Add SNS message validation to Amazon SES inbound webhook view, which is not implemented in Anymail.""" if not validate_amazon_sns_message(self._parse_sns_message(request)): @@ -74,11 +85,10 @@ def dispatch(self, request): if request.method.lower() == "head": return HttpResponse(status=status.HTTP_200_OK) - integration_token = self.get_integration_token_from_request(request) - if integration_token is None: + if self.integration_token is None: return HttpResponse(status=status.HTTP_400_BAD_REQUEST) - request.inbound_email_integration_token = integration_token # used in RequestTimeLoggingMiddleware - return super().dispatch(request, alert_channel_key=integration_token) + request.inbound_email_integration_token = self.integration_token # used in RequestTimeLoggingMiddleware + return super().dispatch(request, alert_channel_key=self.integration_token) def post(self, request): payload = self.get_alert_payload_from_email_message(self.message) @@ -94,7 +104,8 @@ def post(self, request): ) return Response("OK", status=status.HTTP_200_OK) - def get_integration_token_from_request(self, request) -> Optional[str]: + @cached_property + def integration_token(self) -> Optional[str]: if not self.message: return None # First try envelope_recipient field. @@ -151,7 +162,8 @@ def message(self) -> AnymailInboundMessage | None: logger.error("Failed to parse inbound email message") return None - def check_inbound_email_settings_set(self): + @staticmethod + def check_inbound_email_settings_set(): """ Guard method to checks if INBOUND_EMAIL settings present. Returns InternalServerError if not. @@ -167,16 +179,105 @@ def check_inbound_email_settings_set(self): logger.error("InboundEmailWebhookView: INBOUND_EMAIL_DOMAIN env variable must be set.") return HttpResponse(status=status.HTTP_500_INTERNAL_SERVER_ERROR) - def get_alert_payload_from_email_message(self, email: AnymailInboundMessage) -> EmailAlertPayload: - subject = email.subject or "" - subject = subject.strip() - message = email.text or "" - message = message.strip() - sender = self.get_sender_from_email_message(email) + @classmethod + def get_alert_payload_from_email_message(cls, email: AnymailInboundMessage) -> EmailAlertPayload: + if email.text: + message = email.text.strip() + elif email.html: + message = cls.html_to_plaintext(email.html) + else: + message = "" + + return { + "subject": email.subject.strip() if email.subject else "", + "message": message, + "sender": cls.get_sender_from_email_message(email), + } + + @staticmethod + def html_to_plaintext(html: str) -> str: + """ + Converts HTML to plain text. Renders links as "text (href)" and removes any empty lines. + Converting HTML to plaintext is a non-trivial task, so this method may not work perfectly for all cases. + """ + soup = BeautifulSoup(html, "html.parser") + + # Browsers typically render these elements on their own line. + # There is no single official HTML5 list for this, so we go with HTML tags that render as + # display: block, display: list-item, display: table, display: table-row by default according to the HTML standard: + # https://html.spec.whatwg.org/multipage/rendering.html + newline_tags = [ + "address", + "article", + "aside", + "blockquote", + "body", + "center", + "dd", + "details", + "dialog", + "dir", + "div", + "dl", + "dt", + "fieldset", + "figcaption", + "figure", + "footer", + "form", + "h1", + "h2", + "h3", + "h4", + "h5", + "h6", + "header", + "hgroup", + "hr", + "html", + "legend", + "li", + "listing", + "main", + "menu", + "nav", + "ol", + "p", + "plaintext", + "pre", + "search", + "section", + "summary", + "table", + "tr", + "ul", + "xmp", + ] + # Insert a newline after each block-level element + for tag in soup.find_all(newline_tags): + tag.insert_before("\n") + tag.insert_after("\n") + + #
tags are also typically rendered as newlines + for br in soup.find_all("br"): + br.replace_with("\n") + + # example: "example" -> "example (https://example.com)" + for a in soup.find_all("a"): + if href := a.get("href"): + a.append(f" ({href})") + + for li in soup.find_all("li"): + li.insert_before("* ") + + for hr in soup.find_all("hr"): + hr.replace_with("-" * 32) - return {"subject": subject, "message": message, "sender": sender} + # remove empty lines + return "\n".join(line.strip() for line in soup.get_text().splitlines() if line.strip()) - def get_sender_from_email_message(self, email: AnymailInboundMessage) -> str: + @staticmethod + def get_sender_from_email_message(email: AnymailInboundMessage) -> str: try: if isinstance(email.from_email, list): sender = email.from_email[0].addr_spec diff --git a/engine/apps/email/tests/test_inbound_email.py b/engine/apps/email/tests/test_inbound_email.py index 808fbdfac4..d0b6ce929a 100644 --- a/engine/apps/email/tests/test_inbound_email.py +++ b/engine/apps/email/tests/test_inbound_email.py @@ -6,6 +6,7 @@ from textwrap import dedent from unittest.mock import ANY, Mock, patch +import httpretty import pytest from anymail.inbound import AnymailInboundMessage from cryptography import x509 @@ -54,13 +55,14 @@ MESSAGE = "This is a test email message body." -def _sns_inbound_email_payload_and_headers(sender_email, to_email, subject, message): +def _sns_inbound_email_setup(sender_email, to_email, subject, message, content_type="text/plain", s3=False): content = ( f"From: Sender Name <{sender_email}>\n" f"To: {to_email}\n" f"Subject: {subject}\n" "Date: Tue, 5 Nov 2024 16:05:39 +0000\n" - "Message-ID: \n\n" + "Message-ID: \n" + f"Content-Type: {content_type}\n\n" f"{message}\r\n" ) @@ -130,7 +132,7 @@ def _sns_inbound_email_payload_and_headers(sender_email, to_email, subject, mess {"name": "To", "value": to_email}, { "name": "Content-Type", - "value": 'multipart/alternative; boundary="00000000000036b9f706262c9312"', + "value": f"{content_type}", }, ], "commonHeaders": { @@ -152,12 +154,12 @@ def _sns_inbound_email_payload_and_headers(sender_email, to_email, subject, mess "dkimVerdict": {"status": "PASS"}, "dmarcVerdict": {"status": "PASS"}, "action": { - "type": "SNS", + "type": "S3" if s3 else "SNS", "topicArn": "arn:aws:sns:us-east-2:123456789012:test", - "encoding": "BASE64", + **({"bucketName": "test-s3-bucket", "objectKey": "test-object-key"} if s3 else {"encoding": "BASE64"}), }, }, - "content": b64encode(content.encode()).decode(), + **({} if s3 else {"content": b64encode(content.encode()).decode()}), } payload = { @@ -189,7 +191,7 @@ def _sns_inbound_email_payload_and_headers(sender_email, to_email, subject, mess "X-Amz-Sns-Message-Type": "Notification", "X-Amz-Sns-Message-Id": "example-message-id-1234", } - return payload, headers + return payload, headers, content def _mailgun_inbound_email_payload(sender_email, to_email, subject, message): @@ -444,7 +446,7 @@ def test_amazon_ses_pass(create_alert_mock, settings, make_organization, make_al token="test-token", ) - sns_payload, sns_headers = _sns_inbound_email_payload_and_headers( + sns_payload, sns_headers, _ = _sns_inbound_email_setup( sender_email=SENDER_EMAIL, to_email=TO_EMAIL, subject=SUBJECT, @@ -476,16 +478,17 @@ def test_amazon_ses_pass(create_alert_mock, settings, make_organization, make_al ) -@patch("requests.get", return_value=Mock(content=CERTIFICATE)) @patch.object(create_alert, "delay") +@httpretty.activate(verbose=True, allow_net_connect=True) @pytest.mark.django_db -def test_amazon_ses_validated_pass( - mock_create_alert, mock_requests_get, settings, make_organization, make_alert_receive_channel -): +def test_amazon_ses_validated_s3_pass(mock_create_alert, settings, make_organization, make_alert_receive_channel): settings.INBOUND_EMAIL_ESP = "amazon_ses_validated,mailgun" settings.INBOUND_EMAIL_DOMAIN = "inbound.example.com" settings.INBOUND_EMAIL_WEBHOOK_SECRET = "secret" settings.INBOUND_EMAIL_AMAZON_SNS_TOPIC_ARN = AMAZON_SNS_TOPIC_ARN + settings.INBOUND_EMAIL_AWS_ACCESS_KEY_ID = "test-access-key-id" + settings.INBOUND_EMAIL_AWS_SECRET_ACCESS_KEY = "test-secret-access-key" + settings.INBOUND_EMAIL_AWS_REGION = "us-east-2" organization = make_organization() alert_receive_channel = make_alert_receive_channel( @@ -494,11 +497,24 @@ def test_amazon_ses_validated_pass( token="test-token", ) - sns_payload, sns_headers = _sns_inbound_email_payload_and_headers( + sns_payload, sns_headers, content = _sns_inbound_email_setup( sender_email=SENDER_EMAIL, to_email=TO_EMAIL, subject=SUBJECT, message=MESSAGE, + s3=True, + ) + + httpretty.register_uri(httpretty.GET, SIGNING_CERT_URL, body=CERTIFICATE) + httpretty.register_uri( + httpretty.HEAD, + "https://test-s3-bucket.s3.us-east-2.amazonaws.com/test-object-key", + responses=[httpretty.Response(body="")], + ) + httpretty.register_uri( + httpretty.GET, + "https://test-s3-bucket.s3.us-east-2.amazonaws.com/test-object-key", + responses=[httpretty.Response(body=content)], ) client = APIClient() @@ -525,6 +541,100 @@ def test_amazon_ses_validated_pass( received_at=ANY, ) + assert len(httpretty.latest_requests()) == 3 + assert (httpretty.latest_requests()[0].method, httpretty.latest_requests()[0].path) == ( + "GET", + "/SimpleNotificationService-example.pem", + ) + assert (httpretty.latest_requests()[1].method, httpretty.latest_requests()[1].path) == ("HEAD", "/test-object-key") + assert (httpretty.latest_requests()[2].method, httpretty.latest_requests()[2].path) == ("GET", "/test-object-key") + + +@patch("requests.get", return_value=Mock(content=CERTIFICATE)) +@patch.object(create_alert, "delay") +@pytest.mark.django_db +def test_amazon_ses_validated_pass_html( + mock_create_alert, mock_requests_get, settings, make_organization, make_alert_receive_channel +): + settings.INBOUND_EMAIL_ESP = "amazon_ses_validated,mailgun" + settings.INBOUND_EMAIL_DOMAIN = "inbound.example.com" + settings.INBOUND_EMAIL_WEBHOOK_SECRET = "secret" + settings.INBOUND_EMAIL_AMAZON_SNS_TOPIC_ARN = AMAZON_SNS_TOPIC_ARN + + organization = make_organization() + alert_receive_channel = make_alert_receive_channel( + organization, + integration=AlertReceiveChannel.INTEGRATION_INBOUND_EMAIL, + token="test-token", + ) + + html_message = """\ + + title + +
+

h1

+


+

pbi span

new line


+ link +
    +
  • li1
  • +
  • li2
  • +
+ + + + + +
td1td2
+
+ + + """ + plaintext_message = ( + "title\n" + "h1\n" + "pbi span\n" + "new line\n" + "--------------------------------\n" + "link (https://example.com)\n" + "* li1\n" + "* li2\n" + "td1\n" + "td2" + ) + sns_payload, sns_headers, _ = _sns_inbound_email_setup( + sender_email=SENDER_EMAIL, + to_email=TO_EMAIL, + subject=SUBJECT, + message=html_message, + content_type="text/html", + ) + + client = APIClient() + response = client.post( + reverse("integrations:inbound_email_webhook"), + data=sns_payload, + headers=sns_headers, + format="json", + ) + + assert response.status_code == status.HTTP_200_OK + mock_create_alert.assert_called_once_with( + title=SUBJECT, + message=plaintext_message, + alert_receive_channel_pk=alert_receive_channel.pk, + image_url=None, + link_to_upstream_details=None, + integration_unique_data=None, + raw_request_data={ + "subject": SUBJECT, + "message": plaintext_message, + "sender": SENDER_EMAIL, + }, + received_at=ANY, + ) + mock_requests_get.assert_called_once_with(SIGNING_CERT_URL, timeout=5) @@ -546,7 +656,7 @@ def test_amazon_ses_validated_fail_wrong_sns_topic_arn( token="test-token", ) - sns_payload, sns_headers = _sns_inbound_email_payload_and_headers( + sns_payload, sns_headers, _ = _sns_inbound_email_setup( sender_email=SENDER_EMAIL, to_email=TO_EMAIL, subject=SUBJECT, @@ -584,7 +694,7 @@ def test_amazon_ses_validated_fail_wrong_signature( token="test-token", ) - sns_payload, sns_headers = _sns_inbound_email_payload_and_headers( + sns_payload, sns_headers, _ = _sns_inbound_email_setup( sender_email=SENDER_EMAIL, to_email=TO_EMAIL, subject=SUBJECT, @@ -622,7 +732,7 @@ def test_amazon_ses_validated_fail_cant_download_certificate( token="test-token", ) - sns_payload, sns_headers = _sns_inbound_email_payload_and_headers( + sns_payload, sns_headers, _ = _sns_inbound_email_setup( sender_email=SENDER_EMAIL, to_email=TO_EMAIL, subject=SUBJECT, @@ -656,7 +766,7 @@ def test_amazon_ses_validated_caches_certificate( token="test-token", ) - sns_payload, sns_headers = _sns_inbound_email_payload_and_headers( + sns_payload, sns_headers, _ = _sns_inbound_email_setup( sender_email=SENDER_EMAIL, to_email=TO_EMAIL, subject=SUBJECT, diff --git a/engine/settings/base.py b/engine/settings/base.py index 0f73c8d5af..007779f195 100644 --- a/engine/settings/base.py +++ b/engine/settings/base.py @@ -868,6 +868,9 @@ class BrokerTypes: INBOUND_EMAIL_DOMAIN = os.getenv("INBOUND_EMAIL_DOMAIN") INBOUND_EMAIL_WEBHOOK_SECRET = os.getenv("INBOUND_EMAIL_WEBHOOK_SECRET") INBOUND_EMAIL_AMAZON_SNS_TOPIC_ARN = os.getenv("INBOUND_EMAIL_AMAZON_SNS_TOPIC_ARN") +INBOUND_EMAIL_AWS_ACCESS_KEY_ID = os.getenv("INBOUND_EMAIL_AWS_ACCESS_KEY_ID") +INBOUND_EMAIL_AWS_SECRET_ACCESS_KEY = os.getenv("INBOUND_EMAIL_AWS_SECRET_ACCESS_KEY") +INBOUND_EMAIL_AWS_REGION = os.getenv("INBOUND_EMAIL_AWS_REGION") INSTALLED_ONCALL_INTEGRATIONS = [ # Featured From 62c4e86a5971f1913bf69745f1829b2e1506a4cf Mon Sep 17 00:00:00 2001 From: Joey Orlando Date: Wed, 18 Dec 2024 17:16:47 -0500 Subject: [PATCH 3/6] feat: allow PagerDuty migrator script to migrate data while ignoring users (#5375) --- tools/migrators/README.md | 75 ++++++++++++++++++++++-- tools/migrators/lib/pagerduty/config.py | 2 + tools/migrators/lib/pagerduty/migrate.py | 27 ++++++--- 3 files changed, 89 insertions(+), 15 deletions(-) diff --git a/tools/migrators/README.md b/tools/migrators/README.md index 61881ebf70..d5f2810c6a 100644 --- a/tools/migrators/README.md +++ b/tools/migrators/README.md @@ -150,6 +150,68 @@ oncall-migrator Consider modifying [alert templates](https://grafana.com/docs/oncall/latest/alert-behavior/alert-templates/) of the created webhook integrations to adjust them for incoming payloads. +### Migrate your PagerDuty data while ignoring Grafana users + +This scenario may be relevant where you are unable to import your list of Grafana users, but would like to experiment +with Grafana OnCall, using your existing PagerDuty setup as a starting point for experimentation. + +If this is relevant to you, you can migrate as such: + +```bash +# Step 1: run a plan of what will be migrated, ignoring users for now +docker run --rm \ +-e MIGRATING_FROM="pagerduty" \ +-e MODE="plan" \ +-e ONCALL_API_URL="" \ +-e ONCALL_API_TOKEN="" \ +-e PAGERDUTY_API_TOKEN="" \ +-e MIGRATE_USERS="false" \ +oncall-migrator + +# Step 2. Actually migrate your PagerDuty data, again ignoring users +docker run --rm \ +-e MIGRATING_FROM="pagerduty" \ +-e MODE="migrate" \ +-e ONCALL_API_URL="" \ +-e ONCALL_API_TOKEN="" \ +-e PAGERDUTY_API_TOKEN="" \ +-e MIGRATE_USERS="false" \ +oncall-migrator + +# Step 3. Optional; import your users from PagerDuty into your Grafana stack using our provided script +# For more information on our script, see "Migrating Users" section below for some more information on +# how users are migrated. +# +# Alternatively this can be done with other Grafana IAM methods. +# See Grafana's "Plan your IAM integration strategy" docs for more information on this. +# https://grafana.com/docs/grafana/latest/setup-grafana/configure-security/planning-iam-strategy/ +docker run --rm \ +-e MIGRATING_FROM="pagerduty" \ +-e GRAFANA_URL="" \ +-e GRAFANA_USERNAME="" \ +-e GRAFANA_PASSWORD="" \ +-e PAGERDUTY_API_TOKEN="" \ +oncall-migrator python /app/add_users_to_grafana.py + +# Step 4: When ready, run a plan of what will be migrated, including users this time +docker run --rm \ +-e MIGRATING_FROM="pagerduty" \ +-e MODE="plan" \ +-e ONCALL_API_URL="" \ +-e ONCALL_API_TOKEN="" \ +-e PAGERDUTY_API_TOKEN="" \ +oncall-migrator + +# Step 4: And finally, when ready, actually migrate your PagerDuty data, again including users +docker run --rm \ +-e MIGRATING_FROM="pagerduty" \ +-e MODE="migrate" \ +-e ONCALL_API_URL="" \ +-e ONCALL_API_TOKEN="" \ +-e PAGERDUTY_API_TOKEN="" \ +oncall-migrator +``` + ### Configuration Configuration is done via environment variables passed to the docker container. @@ -165,6 +227,7 @@ Configuration is done via environment variables passed to the docker container. | `UNSUPPORTED_INTEGRATION_TO_WEBHOOKS` | When set to `true`, integrations with unsupported type will be migrated to Grafana OnCall integrations with type "webhook". When set to `false`, integrations with unsupported type won't be migrated. | Boolean | `false` | | `EXPERIMENTAL_MIGRATE_EVENT_RULES` | Migrate global event rulesets to Grafana OnCall integrations. | Boolean | `false` | | `EXPERIMENTAL_MIGRATE_EVENT_RULES_LONG_NAMES` | Include service & integrations names from PD in migrated integrations (only effective when `EXPERIMENTAL_MIGRATE_EVENT_RULES` is `true`). | Boolean | `false` | +| `MIGRATE_USERS` | If `false`, will allow you to important all objects, while ignoring user references in schedules and escalation policies. In addition, if `false`, will also skip importing User notification rules. This may be helpful in cases where you are unable to import your list of Grafana users, but would like to experiment with OnCall using your existing PagerDuty setup as a starting point for experimentation. | Boolean | `true` | ### Resources @@ -340,9 +403,9 @@ Grafana users via the Grafana HTTP API. ```bash docker run --rm \ -e MIGRATING_FROM="pagerduty" \ --e GRAFANA_URL="http://localhost:3000" \ --e GRAFANA_USERNAME="admin" \ --e GRAFANA_PASSWORD="admin" \ +-e GRAFANA_URL="" \ +-e GRAFANA_USERNAME="" \ +-e GRAFANA_PASSWORD="" \ -e PAGERDUTY_API_TOKEN="" \ oncall-migrator python /app/add_users_to_grafana.py ``` @@ -352,9 +415,9 @@ oncall-migrator python /app/add_users_to_grafana.py ```bash docker run --rm \ -e MIGRATING_FROM="splunk" \ --e GRAFANA_URL="http://localhost:3000" \ --e GRAFANA_USERNAME="admin" \ --e GRAFANA_PASSWORD="admin" \ +-e GRAFANA_URL="" \ +-e GRAFANA_USERNAME="" \ +-e GRAFANA_PASSWORD="" \ -e SPLUNK_API_ID="" \ -e SPLUNK_API_KEY="" \ oncall-migrator python /app/add_users_to_grafana.py diff --git a/tools/migrators/lib/pagerduty/config.py b/tools/migrators/lib/pagerduty/config.py index 5c117dbd19..eabd0c4ae3 100644 --- a/tools/migrators/lib/pagerduty/config.py +++ b/tools/migrators/lib/pagerduty/config.py @@ -36,3 +36,5 @@ UNSUPPORTED_INTEGRATION_TO_WEBHOOKS = ( os.getenv("UNSUPPORTED_INTEGRATION_TO_WEBHOOKS", "false").lower() == "true" ) + +MIGRATE_USERS = os.getenv("MIGRATE_USERS", "true").lower() == "true" diff --git a/tools/migrators/lib/pagerduty/migrate.py b/tools/migrators/lib/pagerduty/migrate.py index 6079dbff9f..950fa10466 100644 --- a/tools/migrators/lib/pagerduty/migrate.py +++ b/tools/migrators/lib/pagerduty/migrate.py @@ -7,6 +7,7 @@ from lib.oncall.api_client import OnCallAPIClient from lib.pagerduty.config import ( EXPERIMENTAL_MIGRATE_EVENT_RULES, + MIGRATE_USERS, MODE, MODE_PLAN, PAGERDUTY_API_TOKEN, @@ -46,8 +47,12 @@ def migrate() -> None: session = APISession(PAGERDUTY_API_TOKEN) session.timeout = 20 - print("▶ Fetching users...") - users = session.list_all("users", params={"include[]": "notification_rules"}) + if MIGRATE_USERS: + print("▶ Fetching users...") + users = session.list_all("users", params={"include[]": "notification_rules"}) + else: + print("▶ Skipping user migration as MIGRATE_USERS is false...") + users = [] oncall_users = OnCallAPIClient.list_users_with_notification_rules() @@ -97,8 +102,9 @@ def migrate() -> None: rules = session.list_all(f"rulesets/{ruleset['id']}/rules") ruleset["rules"] = rules - for user in users: - match_user(user, oncall_users) + if MIGRATE_USERS: + for user in users: + match_user(user, oncall_users) user_id_map = { u["id"]: u["oncall_user"]["id"] if u["oncall_user"] else None for u in users @@ -138,11 +144,14 @@ def migrate() -> None: return - print("▶ Migrating user notification rules...") - for user in users: - if user["oncall_user"]: - migrate_notification_rules(user) - print(TAB + format_user(user)) + if MIGRATE_USERS: + print("▶ Migrating user notification rules...") + for user in users: + if user["oncall_user"]: + migrate_notification_rules(user) + print(TAB + format_user(user)) + else: + print("▶ Skipping migrating user notification rules as MIGRATE_USERS is false...") print("▶ Migrating schedules...") for schedule in schedules: From 54ff63a25dfdb80ba0e3f428efea78a67053bf69 Mon Sep 17 00:00:00 2001 From: Joey Orlando Date: Wed, 18 Dec 2024 17:27:36 -0500 Subject: [PATCH 4/6] chore: update pagerduty migrator docs --- tools/migrators/README.md | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/tools/migrators/README.md b/tools/migrators/README.md index d5f2810c6a..841f80eb15 100644 --- a/tools/migrators/README.md +++ b/tools/migrators/README.md @@ -155,7 +155,14 @@ webhook integrations to adjust them for incoming payloads. This scenario may be relevant where you are unable to import your list of Grafana users, but would like to experiment with Grafana OnCall, using your existing PagerDuty setup as a starting point for experimentation. -If this is relevant to you, you can migrate as such: +If this is relevant to you, you can migrate as such 👇 + +> [!IMPORTANT] +> As outlined several times in the documentation below, if you first import data into Grafana OnCall without +> including users, make changes to that data within OnCall, and then later re-import the data with users, Grafana OnCall +> will delete and recreate those objects, as part of the subsequent migration. +> +> As a result, any modifications you made after the initial import will be lost. ```bash # Step 1: run a plan of what will be migrated, ignoring users for now From 2503eafdc6eb15250dd3546be98e9f006b184e40 Mon Sep 17 00:00:00 2001 From: Joey Orlando Date: Thu, 19 Dec 2024 06:03:54 -0500 Subject: [PATCH 5/6] chore: add pagerduty migrator test + fix linting (#5378) --- tools/migrators/lib/pagerduty/migrate.py | 4 ++- .../lib/tests/pagerduty/test_migrate.py | 27 +++++++++++++++++++ 2 files changed, 30 insertions(+), 1 deletion(-) create mode 100644 tools/migrators/lib/tests/pagerduty/test_migrate.py diff --git a/tools/migrators/lib/pagerduty/migrate.py b/tools/migrators/lib/pagerduty/migrate.py index 950fa10466..68d5652ec5 100644 --- a/tools/migrators/lib/pagerduty/migrate.py +++ b/tools/migrators/lib/pagerduty/migrate.py @@ -151,7 +151,9 @@ def migrate() -> None: migrate_notification_rules(user) print(TAB + format_user(user)) else: - print("▶ Skipping migrating user notification rules as MIGRATE_USERS is false...") + print( + "▶ Skipping migrating user notification rules as MIGRATE_USERS is false..." + ) print("▶ Migrating schedules...") for schedule in schedules: diff --git a/tools/migrators/lib/tests/pagerduty/test_migrate.py b/tools/migrators/lib/tests/pagerduty/test_migrate.py new file mode 100644 index 0000000000..6a7b42eddc --- /dev/null +++ b/tools/migrators/lib/tests/pagerduty/test_migrate.py @@ -0,0 +1,27 @@ +from unittest.mock import call, patch + +from lib.pagerduty.migrate import migrate + + +@patch("lib.pagerduty.migrate.MIGRATE_USERS", False) +@patch("lib.pagerduty.migrate.APISession") +@patch("lib.pagerduty.migrate.OnCallAPIClient") +def test_users_are_skipped_when_migrate_users_is_false( + MockOnCallAPIClient, MockAPISession +): + mock_session = MockAPISession.return_value + mock_session.list_all.return_value = [] + mock_oncall_client = MockOnCallAPIClient.return_value + + migrate() + + # Assert no user-related fetching or migration occurs + assert mock_session.list_all.call_args_list == [ + call("schedules", params={"include[]": "schedule_layers", "time_zone": "UTC"}), + call("escalation_policies"), + call("services", params={"include[]": "integrations"}), + call("vendors"), + # no user notification rules fetching + ] + + mock_oncall_client.list_users_with_notification_rules.assert_not_called() From cc63ec314159fda82e3d8a33fe2f5fd1e2276c54 Mon Sep 17 00:00:00 2001 From: Matias Bordese Date: Thu, 19 Dec 2024 08:17:08 -0300 Subject: [PATCH 6/6] fix: return a throttled response if org is being synced for the first time during auth (#5374) Related to https://github.com/grafana/oncall-private/issues/2826 When Terraform triggers multiple requests and org needs to be synced in OnCall, the first request will wait for sync to complete but others will get an immediate response, before a 403, with these changes a 429 indicating to retry (Terraform [client](https://github.com/grafana/amixr-api-go-client/blob/main/client.go#L310) will handle the response and perform a retry). --- engine/apps/auth_token/auth.py | 3 +++ engine/apps/auth_token/tests/test_grafana_auth.py | 4 ++-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/engine/apps/auth_token/auth.py b/engine/apps/auth_token/auth.py index 36ea8d82c2..f96e5ef6f2 100644 --- a/engine/apps/auth_token/auth.py +++ b/engine/apps/auth_token/auth.py @@ -381,6 +381,9 @@ def get_organization(self, request, auth): # if organization exists, we are good) setup_organization(url, auth) organization = Organization.objects.filter(grafana_url=url).first() + if organization is None: + # sync may still be in progress, client should retry + raise exceptions.Throttled(detail="Organization being synced, please retry.") return organization if settings.LICENSE == settings.CLOUD_LICENSE_NAME: diff --git a/engine/apps/auth_token/tests/test_grafana_auth.py b/engine/apps/auth_token/tests/test_grafana_auth.py index 8da611a0fb..2283164542 100644 --- a/engine/apps/auth_token/tests/test_grafana_auth.py +++ b/engine/apps/auth_token/tests/test_grafana_auth.py @@ -110,9 +110,9 @@ def test_grafana_authentication_no_org_grafana_url(): request_sync_url = f"{grafana_url}/api/plugins/{PluginID.ONCALL}/resources/plugin/sync?wait=true&force=true" httpretty.register_uri(httpretty.POST, request_sync_url, status=404) - with pytest.raises(exceptions.AuthenticationFailed) as exc: + with pytest.raises(exceptions.Throttled) as exc: GrafanaServiceAccountAuthentication().authenticate(request) - assert exc.value.detail == "Organization not found." + assert exc.value.detail == "Organization being synced, please retry." @pytest.mark.parametrize("grafana_url", ["null;", "foo", ""])