From c36761e345bd5351b4f1b224e47f4384e073f87b Mon Sep 17 00:00:00 2001 From: Vadim Stepanov Date: Wed, 18 Dec 2024 16:35:44 +0000 Subject: [PATCH] Inbound email: download from S3 + convert HTML to plaintext (#5348) # What this PR does * Make `AmazonSESValidatedInboundWebhookView` able to download emails from S3 by providing AWS credentials via env variables * Convert HTML to plaintext when there's only `text/html` available ## Which issue(s) this PR closes Related to https://github.com/grafana/oncall-private/issues/2905 ## Checklist - [x] Unit, integration, and e2e (if applicable) tests updated - [x] Documentation added (or `pr:no public docs` PR label added if not required) - [x] Added the relevant release notes label (see labels prefixed w/ `release:`). These labels dictate how your PR will show up in the autogenerated release notes. --- engine/apps/email/inbound.py | 129 ++++++++++++++-- engine/apps/email/tests/test_inbound_email.py | 144 +++++++++++++++--- engine/settings/base.py | 3 + 3 files changed, 245 insertions(+), 31 deletions(-) diff --git a/engine/apps/email/inbound.py b/engine/apps/email/inbound.py index 6456d9fb2..e3863f49c 100644 --- a/engine/apps/email/inbound.py +++ b/engine/apps/email/inbound.py @@ -7,6 +7,8 @@ from anymail.inbound import AnymailInboundMessage from anymail.signals import AnymailInboundEvent from anymail.webhooks import amazon_ses, mailgun, mailjet, mandrill, postal, postmark, sendgrid, sparkpost +from bs4 import BeautifulSoup +from django.conf import settings from django.http import HttpResponse, HttpResponseNotAllowed from django.utils import timezone from rest_framework import status @@ -25,6 +27,15 @@ class AmazonSESValidatedInboundWebhookView(amazon_ses.AmazonSESInboundWebhookVie # disable "Your Anymail webhooks are insecure and open to anyone on the web." warning warn_if_no_basic_auth = False + def __init__(self): + super().__init__( + session_params={ + "aws_access_key_id": settings.INBOUND_EMAIL_AWS_ACCESS_KEY_ID, + "aws_secret_access_key": settings.INBOUND_EMAIL_AWS_SECRET_ACCESS_KEY, + "region_name": settings.INBOUND_EMAIL_AWS_REGION, + }, + ) + def validate_request(self, request): """Add SNS message validation to Amazon SES inbound webhook view, which is not implemented in Anymail.""" if not validate_amazon_sns_message(self._parse_sns_message(request)): @@ -74,11 +85,10 @@ def dispatch(self, request): if request.method.lower() == "head": return HttpResponse(status=status.HTTP_200_OK) - integration_token = self.get_integration_token_from_request(request) - if integration_token is None: + if self.integration_token is None: return HttpResponse(status=status.HTTP_400_BAD_REQUEST) - request.inbound_email_integration_token = integration_token # used in RequestTimeLoggingMiddleware - return super().dispatch(request, alert_channel_key=integration_token) + request.inbound_email_integration_token = self.integration_token # used in RequestTimeLoggingMiddleware + return super().dispatch(request, alert_channel_key=self.integration_token) def post(self, request): payload = self.get_alert_payload_from_email_message(self.message) @@ -94,7 +104,8 @@ def post(self, request): ) return Response("OK", status=status.HTTP_200_OK) - def get_integration_token_from_request(self, request) -> Optional[str]: + @cached_property + def integration_token(self) -> Optional[str]: if not self.message: return None # First try envelope_recipient field. @@ -151,7 +162,8 @@ def message(self) -> AnymailInboundMessage | None: logger.error("Failed to parse inbound email message") return None - def check_inbound_email_settings_set(self): + @staticmethod + def check_inbound_email_settings_set(): """ Guard method to checks if INBOUND_EMAIL settings present. Returns InternalServerError if not. @@ -167,16 +179,105 @@ def check_inbound_email_settings_set(self): logger.error("InboundEmailWebhookView: INBOUND_EMAIL_DOMAIN env variable must be set.") return HttpResponse(status=status.HTTP_500_INTERNAL_SERVER_ERROR) - def get_alert_payload_from_email_message(self, email: AnymailInboundMessage) -> EmailAlertPayload: - subject = email.subject or "" - subject = subject.strip() - message = email.text or "" - message = message.strip() - sender = self.get_sender_from_email_message(email) + @classmethod + def get_alert_payload_from_email_message(cls, email: AnymailInboundMessage) -> EmailAlertPayload: + if email.text: + message = email.text.strip() + elif email.html: + message = cls.html_to_plaintext(email.html) + else: + message = "" + + return { + "subject": email.subject.strip() if email.subject else "", + "message": message, + "sender": cls.get_sender_from_email_message(email), + } + + @staticmethod + def html_to_plaintext(html: str) -> str: + """ + Converts HTML to plain text. Renders links as "text (href)" and removes any empty lines. + Converting HTML to plaintext is a non-trivial task, so this method may not work perfectly for all cases. + """ + soup = BeautifulSoup(html, "html.parser") + + # Browsers typically render these elements on their own line. + # There is no single official HTML5 list for this, so we go with HTML tags that render as + # display: block, display: list-item, display: table, display: table-row by default according to the HTML standard: + # https://html.spec.whatwg.org/multipage/rendering.html + newline_tags = [ + "address", + "article", + "aside", + "blockquote", + "body", + "center", + "dd", + "details", + "dialog", + "dir", + "div", + "dl", + "dt", + "fieldset", + "figcaption", + "figure", + "footer", + "form", + "h1", + "h2", + "h3", + "h4", + "h5", + "h6", + "header", + "hgroup", + "hr", + "html", + "legend", + "li", + "listing", + "main", + "menu", + "nav", + "ol", + "p", + "plaintext", + "pre", + "search", + "section", + "summary", + "table", + "tr", + "ul", + "xmp", + ] + # Insert a newline after each block-level element + for tag in soup.find_all(newline_tags): + tag.insert_before("\n") + tag.insert_after("\n") + + #
tags are also typically rendered as newlines + for br in soup.find_all("br"): + br.replace_with("\n") + + # example: "example" -> "example (https://example.com)" + for a in soup.find_all("a"): + if href := a.get("href"): + a.append(f" ({href})") + + for li in soup.find_all("li"): + li.insert_before("* ") + + for hr in soup.find_all("hr"): + hr.replace_with("-" * 32) - return {"subject": subject, "message": message, "sender": sender} + # remove empty lines + return "\n".join(line.strip() for line in soup.get_text().splitlines() if line.strip()) - def get_sender_from_email_message(self, email: AnymailInboundMessage) -> str: + @staticmethod + def get_sender_from_email_message(email: AnymailInboundMessage) -> str: try: if isinstance(email.from_email, list): sender = email.from_email[0].addr_spec diff --git a/engine/apps/email/tests/test_inbound_email.py b/engine/apps/email/tests/test_inbound_email.py index 808fbdfac..d0b6ce929 100644 --- a/engine/apps/email/tests/test_inbound_email.py +++ b/engine/apps/email/tests/test_inbound_email.py @@ -6,6 +6,7 @@ from textwrap import dedent from unittest.mock import ANY, Mock, patch +import httpretty import pytest from anymail.inbound import AnymailInboundMessage from cryptography import x509 @@ -54,13 +55,14 @@ MESSAGE = "This is a test email message body." -def _sns_inbound_email_payload_and_headers(sender_email, to_email, subject, message): +def _sns_inbound_email_setup(sender_email, to_email, subject, message, content_type="text/plain", s3=False): content = ( f"From: Sender Name <{sender_email}>\n" f"To: {to_email}\n" f"Subject: {subject}\n" "Date: Tue, 5 Nov 2024 16:05:39 +0000\n" - "Message-ID: \n\n" + "Message-ID: \n" + f"Content-Type: {content_type}\n\n" f"{message}\r\n" ) @@ -130,7 +132,7 @@ def _sns_inbound_email_payload_and_headers(sender_email, to_email, subject, mess {"name": "To", "value": to_email}, { "name": "Content-Type", - "value": 'multipart/alternative; boundary="00000000000036b9f706262c9312"', + "value": f"{content_type}", }, ], "commonHeaders": { @@ -152,12 +154,12 @@ def _sns_inbound_email_payload_and_headers(sender_email, to_email, subject, mess "dkimVerdict": {"status": "PASS"}, "dmarcVerdict": {"status": "PASS"}, "action": { - "type": "SNS", + "type": "S3" if s3 else "SNS", "topicArn": "arn:aws:sns:us-east-2:123456789012:test", - "encoding": "BASE64", + **({"bucketName": "test-s3-bucket", "objectKey": "test-object-key"} if s3 else {"encoding": "BASE64"}), }, }, - "content": b64encode(content.encode()).decode(), + **({} if s3 else {"content": b64encode(content.encode()).decode()}), } payload = { @@ -189,7 +191,7 @@ def _sns_inbound_email_payload_and_headers(sender_email, to_email, subject, mess "X-Amz-Sns-Message-Type": "Notification", "X-Amz-Sns-Message-Id": "example-message-id-1234", } - return payload, headers + return payload, headers, content def _mailgun_inbound_email_payload(sender_email, to_email, subject, message): @@ -444,7 +446,7 @@ def test_amazon_ses_pass(create_alert_mock, settings, make_organization, make_al token="test-token", ) - sns_payload, sns_headers = _sns_inbound_email_payload_and_headers( + sns_payload, sns_headers, _ = _sns_inbound_email_setup( sender_email=SENDER_EMAIL, to_email=TO_EMAIL, subject=SUBJECT, @@ -476,16 +478,17 @@ def test_amazon_ses_pass(create_alert_mock, settings, make_organization, make_al ) -@patch("requests.get", return_value=Mock(content=CERTIFICATE)) @patch.object(create_alert, "delay") +@httpretty.activate(verbose=True, allow_net_connect=True) @pytest.mark.django_db -def test_amazon_ses_validated_pass( - mock_create_alert, mock_requests_get, settings, make_organization, make_alert_receive_channel -): +def test_amazon_ses_validated_s3_pass(mock_create_alert, settings, make_organization, make_alert_receive_channel): settings.INBOUND_EMAIL_ESP = "amazon_ses_validated,mailgun" settings.INBOUND_EMAIL_DOMAIN = "inbound.example.com" settings.INBOUND_EMAIL_WEBHOOK_SECRET = "secret" settings.INBOUND_EMAIL_AMAZON_SNS_TOPIC_ARN = AMAZON_SNS_TOPIC_ARN + settings.INBOUND_EMAIL_AWS_ACCESS_KEY_ID = "test-access-key-id" + settings.INBOUND_EMAIL_AWS_SECRET_ACCESS_KEY = "test-secret-access-key" + settings.INBOUND_EMAIL_AWS_REGION = "us-east-2" organization = make_organization() alert_receive_channel = make_alert_receive_channel( @@ -494,11 +497,24 @@ def test_amazon_ses_validated_pass( token="test-token", ) - sns_payload, sns_headers = _sns_inbound_email_payload_and_headers( + sns_payload, sns_headers, content = _sns_inbound_email_setup( sender_email=SENDER_EMAIL, to_email=TO_EMAIL, subject=SUBJECT, message=MESSAGE, + s3=True, + ) + + httpretty.register_uri(httpretty.GET, SIGNING_CERT_URL, body=CERTIFICATE) + httpretty.register_uri( + httpretty.HEAD, + "https://test-s3-bucket.s3.us-east-2.amazonaws.com/test-object-key", + responses=[httpretty.Response(body="")], + ) + httpretty.register_uri( + httpretty.GET, + "https://test-s3-bucket.s3.us-east-2.amazonaws.com/test-object-key", + responses=[httpretty.Response(body=content)], ) client = APIClient() @@ -525,6 +541,100 @@ def test_amazon_ses_validated_pass( received_at=ANY, ) + assert len(httpretty.latest_requests()) == 3 + assert (httpretty.latest_requests()[0].method, httpretty.latest_requests()[0].path) == ( + "GET", + "/SimpleNotificationService-example.pem", + ) + assert (httpretty.latest_requests()[1].method, httpretty.latest_requests()[1].path) == ("HEAD", "/test-object-key") + assert (httpretty.latest_requests()[2].method, httpretty.latest_requests()[2].path) == ("GET", "/test-object-key") + + +@patch("requests.get", return_value=Mock(content=CERTIFICATE)) +@patch.object(create_alert, "delay") +@pytest.mark.django_db +def test_amazon_ses_validated_pass_html( + mock_create_alert, mock_requests_get, settings, make_organization, make_alert_receive_channel +): + settings.INBOUND_EMAIL_ESP = "amazon_ses_validated,mailgun" + settings.INBOUND_EMAIL_DOMAIN = "inbound.example.com" + settings.INBOUND_EMAIL_WEBHOOK_SECRET = "secret" + settings.INBOUND_EMAIL_AMAZON_SNS_TOPIC_ARN = AMAZON_SNS_TOPIC_ARN + + organization = make_organization() + alert_receive_channel = make_alert_receive_channel( + organization, + integration=AlertReceiveChannel.INTEGRATION_INBOUND_EMAIL, + token="test-token", + ) + + html_message = """\ + + title + +
+

h1

+


+

pbi span

new line


+ link +
    +
  • li1
  • +
  • li2
  • +
+ + + + + +
td1td2
+
+ + + """ + plaintext_message = ( + "title\n" + "h1\n" + "pbi span\n" + "new line\n" + "--------------------------------\n" + "link (https://example.com)\n" + "* li1\n" + "* li2\n" + "td1\n" + "td2" + ) + sns_payload, sns_headers, _ = _sns_inbound_email_setup( + sender_email=SENDER_EMAIL, + to_email=TO_EMAIL, + subject=SUBJECT, + message=html_message, + content_type="text/html", + ) + + client = APIClient() + response = client.post( + reverse("integrations:inbound_email_webhook"), + data=sns_payload, + headers=sns_headers, + format="json", + ) + + assert response.status_code == status.HTTP_200_OK + mock_create_alert.assert_called_once_with( + title=SUBJECT, + message=plaintext_message, + alert_receive_channel_pk=alert_receive_channel.pk, + image_url=None, + link_to_upstream_details=None, + integration_unique_data=None, + raw_request_data={ + "subject": SUBJECT, + "message": plaintext_message, + "sender": SENDER_EMAIL, + }, + received_at=ANY, + ) + mock_requests_get.assert_called_once_with(SIGNING_CERT_URL, timeout=5) @@ -546,7 +656,7 @@ def test_amazon_ses_validated_fail_wrong_sns_topic_arn( token="test-token", ) - sns_payload, sns_headers = _sns_inbound_email_payload_and_headers( + sns_payload, sns_headers, _ = _sns_inbound_email_setup( sender_email=SENDER_EMAIL, to_email=TO_EMAIL, subject=SUBJECT, @@ -584,7 +694,7 @@ def test_amazon_ses_validated_fail_wrong_signature( token="test-token", ) - sns_payload, sns_headers = _sns_inbound_email_payload_and_headers( + sns_payload, sns_headers, _ = _sns_inbound_email_setup( sender_email=SENDER_EMAIL, to_email=TO_EMAIL, subject=SUBJECT, @@ -622,7 +732,7 @@ def test_amazon_ses_validated_fail_cant_download_certificate( token="test-token", ) - sns_payload, sns_headers = _sns_inbound_email_payload_and_headers( + sns_payload, sns_headers, _ = _sns_inbound_email_setup( sender_email=SENDER_EMAIL, to_email=TO_EMAIL, subject=SUBJECT, @@ -656,7 +766,7 @@ def test_amazon_ses_validated_caches_certificate( token="test-token", ) - sns_payload, sns_headers = _sns_inbound_email_payload_and_headers( + sns_payload, sns_headers, _ = _sns_inbound_email_setup( sender_email=SENDER_EMAIL, to_email=TO_EMAIL, subject=SUBJECT, diff --git a/engine/settings/base.py b/engine/settings/base.py index 0f73c8d5a..007779f19 100644 --- a/engine/settings/base.py +++ b/engine/settings/base.py @@ -868,6 +868,9 @@ class BrokerTypes: INBOUND_EMAIL_DOMAIN = os.getenv("INBOUND_EMAIL_DOMAIN") INBOUND_EMAIL_WEBHOOK_SECRET = os.getenv("INBOUND_EMAIL_WEBHOOK_SECRET") INBOUND_EMAIL_AMAZON_SNS_TOPIC_ARN = os.getenv("INBOUND_EMAIL_AMAZON_SNS_TOPIC_ARN") +INBOUND_EMAIL_AWS_ACCESS_KEY_ID = os.getenv("INBOUND_EMAIL_AWS_ACCESS_KEY_ID") +INBOUND_EMAIL_AWS_SECRET_ACCESS_KEY = os.getenv("INBOUND_EMAIL_AWS_SECRET_ACCESS_KEY") +INBOUND_EMAIL_AWS_REGION = os.getenv("INBOUND_EMAIL_AWS_REGION") INSTALLED_ONCALL_INTEGRATIONS = [ # Featured