Skip to content

Commit

Permalink
Clean up reverted migration (#5119)
Browse files Browse the repository at this point in the history
Related to #5116

Simplifies the db migration removing the `DeclaredIncident` model + FK
setup but keeping the other changes (adding `severity` field for
escalation policy, and "Declare incident" step, which is disabled). In
this way deployments for which the original migration was run, this
won't be applied and they will be in sync with the migration status
(eventually a manual step may be needed to remove the table and FK,
which won't be used for now).
  • Loading branch information
matiasb authored Oct 3, 2024
1 parent 62ab3f1 commit 4d9846e
Show file tree
Hide file tree
Showing 9 changed files with 57 additions and 11 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ class Meta:
"to_time",
"num_alerts_in_window",
"num_minutes_in_window",
"severity",
"custom_webhook",
"notify_schedule",
"notify_to_group",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ class EscalationPolicySnapshot:
"notify_schedule",
"notify_to_group",
"notify_to_team_members",
"severity",
"escalation_counter",
"passed_last_time",
"pause_escalation",
Expand Down Expand Up @@ -71,6 +72,7 @@ def __init__(
passed_last_time,
pause_escalation,
notify_to_team_members=None,
severity=None,
):
self.id = id
self.order = order
Expand All @@ -86,6 +88,7 @@ def __init__(
self.notify_schedule = notify_schedule
self.notify_to_group = notify_to_group
self.notify_to_team_members = notify_to_team_members
self.severity = severity
self.escalation_counter = escalation_counter # used for STEP_REPEAT_ESCALATION_N_TIMES
self.passed_last_time = passed_last_time # used for building escalation plan
self.pause_escalation = pause_escalation # used for STEP_NOTIFY_IF_NUM_ALERTS_IN_TIME_WINDOW
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# Generated by Django 4.2.15 on 2024-09-25 20:57

from django.db import migrations, models
import django.db.models.deletion


class Migration(migrations.Migration):

dependencies = [
('user_management', '0022_alter_team_unique_together'),
('alerts', '0058_alter_alertgroup_reason_to_skip_escalation'),
]

operations = [
migrations.AddField(
model_name='escalationpolicy',
name='severity',
field=models.CharField(default=None, max_length=512, null=True),
),
migrations.AlterField(
model_name='escalationpolicy',
name='step',
field=models.IntegerField(choices=[(0, 'Wait'), (1, 'Notify User'), (2, 'Notify Whole Channel'), (3, 'Repeat Escalation (5 times max)'), (4, 'Resolve'), (5, 'Notify Group'), (6, 'Notify Schedule'), (7, 'Notify User (Important)'), (8, 'Notify Group (Important)'), (9, 'Notify Schedule (Important)'), (10, 'Trigger Outgoing Webhook'), (11, 'Notify User (next each time)'), (12, 'Continue escalation only if time is from'), (13, 'Notify multiple Users'), (14, 'Notify multiple Users (Important)'), (15, 'Continue escalation if >X alerts per Y minutes'), (16, 'Trigger Webhook'), (17, 'Notify all users in a Team'), (18, 'Notify all users in a Team (Important)'), (19, 'Declare Incident')], default=None, null=True),
),
]
14 changes: 13 additions & 1 deletion engine/apps/alerts/models/escalation_policy.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,8 @@ class EscalationPolicy(OrderedModel):
STEP_TRIGGER_CUSTOM_WEBHOOK,
STEP_NOTIFY_TEAM_MEMBERS,
STEP_NOTIFY_TEAM_MEMBERS_IMPORTANT,
) = range(19)
STEP_DECLARE_INCIDENT,
) = range(20)

# Must be the same order as previous
STEP_CHOICES = (
Expand All @@ -70,6 +71,7 @@ class EscalationPolicy(OrderedModel):
(STEP_TRIGGER_CUSTOM_WEBHOOK, "Trigger Webhook"),
(STEP_NOTIFY_TEAM_MEMBERS, "Notify all users in a Team"),
(STEP_NOTIFY_TEAM_MEMBERS_IMPORTANT, "Notify all users in a Team (Important)"),
(STEP_DECLARE_INCIDENT, "Declare Incident"),
)

# Ordered step choices available for internal api.
Expand Down Expand Up @@ -110,6 +112,7 @@ class EscalationPolicy(OrderedModel):
STEP_NOTIFY_MULTIPLE_USERS_IMPORTANT,
STEP_TRIGGER_CUSTOM_WEBHOOK,
STEP_REPEAT_ESCALATION_N_TIMES,
STEP_DECLARE_INCIDENT,
]

# Maps internal api's steps choices to their verbal. First string in tuple is display name for existent step.
Expand Down Expand Up @@ -151,6 +154,10 @@ class EscalationPolicy(OrderedModel):
"Repeat escalation from the beginning (5 times max)",
"Repeat escalations from the beginning (5 times max)",
),
STEP_DECLARE_INCIDENT: (
"Declare Incident with severity {{severity}} (non-default routes only)",
"Declare Incident (non-default routes only)",
),
}

STEPS_WITH_NO_IMPORTANT_VERSION_SET = {
Expand All @@ -161,6 +168,7 @@ class EscalationPolicy(OrderedModel):
STEP_NOTIFY_USERS_QUEUE,
STEP_NOTIFY_IF_TIME,
STEP_REPEAT_ESCALATION_N_TIMES,
STEP_DECLARE_INCIDENT,
}

DEFAULT_TO_IMPORTANT_STEP_MAPPING = {
Expand Down Expand Up @@ -291,6 +299,10 @@ class EscalationPolicy(OrderedModel):
null=True,
)

# Incident severity for declare incident step
SEVERITY_SET_FROM_LABEL, SEVERITY_SET_FROM_LABEL_DISPLAY_VALUE = ("set-from-label", "from 'severity' label")
severity = models.CharField(max_length=512, null=True, default=None)

ONE_MINUTE = datetime.timedelta(minutes=1)
FIVE_MINUTES = datetime.timedelta(minutes=5)
FIFTEEN_MINUTES = datetime.timedelta(minutes=15)
Expand Down
3 changes: 3 additions & 0 deletions engine/apps/alerts/tests/test_escalation_snapshot.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ def test_raw_escalation_snapshot(escalation_snapshot_test_setup):
"notify_schedule": None,
"notify_to_group": None,
"notify_to_team_members": None,
"severity": None,
"from_time": None,
"to_time": None,
"num_alerts_in_window": None,
Expand All @@ -60,6 +61,7 @@ def test_raw_escalation_snapshot(escalation_snapshot_test_setup):
"notify_schedule": None,
"notify_to_group": None,
"notify_to_team_members": None,
"severity": None,
"from_time": None,
"to_time": None,
"num_alerts_in_window": None,
Expand All @@ -79,6 +81,7 @@ def test_raw_escalation_snapshot(escalation_snapshot_test_setup):
"notify_schedule": None,
"notify_to_group": None,
"notify_to_team_members": None,
"severity": None,
"from_time": notify_if_time_step.from_time.isoformat(),
"to_time": notify_if_time_step.to_time.isoformat(),
"num_alerts_in_window": None,
Expand Down
4 changes: 4 additions & 0 deletions engine/apps/api/tests/test_escalation_policy.py
Original file line number Diff line number Diff line change
Expand Up @@ -656,6 +656,10 @@ def test_create_escalation_policy_with_no_important_version(
_, token = make_token_for_organization(organization)
escalation_chain = make_escalation_chain(organization)

if step == EscalationPolicy.STEP_DECLARE_INCIDENT:
# declare incident step is disabled
return

client = APIClient()
data_for_creation = {
"escalation_chain": escalation_chain.public_primary_key,
Expand Down
4 changes: 4 additions & 0 deletions engine/apps/api/views/escalation_policy.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,10 @@ def escalation_options(self, request):
slack_integration_required = step in EscalationPolicy.SLACK_INTEGRATION_REQUIRED_STEPS
if slack_integration_required and not settings.FEATURE_SLACK_INTEGRATION_ENABLED:
continue

if step == EscalationPolicy.STEP_DECLARE_INCIDENT:
continue

choices.append(
{
"value": step,
Expand Down
9 changes: 3 additions & 6 deletions engine/common/incident_api/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ def __str__(self):


TIMEOUT = 5
DEFAULT_INCIDENT_SEVERITY = "pending"
DEFAULT_INCIDENT_SEVERITY = "Pending"
DEFAULT_INCIDENT_STATUS = "active"
DEFAULT_ACTIVITY_KIND = "userNote"

Expand All @@ -92,18 +92,15 @@ def _request_headers(self):
return {"User-Agent": settings.GRAFANA_COM_USER_AGENT, "Authorization": f"Bearer {self.api_token}"}

def _check_response(self, response: requests.models.Response):
message = None
message = ""

if 400 <= response.status_code < 500:
if response.status_code >= 400:
try:
error_data = response.json()
message = error_data.get("error", response.reason)
except JSONDecodeError:
message = response.reason
elif 500 <= response.status_code < 600:
message = response.reason

if message:
raise IncidentAPIException(
status=response.status_code,
url=response.request.url,
Expand Down
5 changes: 1 addition & 4 deletions engine/common/incident_api/tests/test_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,9 +182,6 @@ def test_error_handling(endpoint, client_method_name, args):
client_method = getattr(client, client_method_name)
client_method(*args)
assert excinfo.value.status == error_code
expected_error = (
response_data["error"] if error_code == status.HTTP_400_BAD_REQUEST else "Internal Server Error"
)
assert excinfo.value.msg == expected_error
assert excinfo.value.msg == response_data["error"]
assert excinfo.value.url == url
assert excinfo.value.method == "POST"

0 comments on commit 4d9846e

Please sign in to comment.