From 6db73848dfe6fc130cc1d39c400e239b5cc68510 Mon Sep 17 00:00:00 2001 From: James Biggs <62654785+jamesbiggs@users.noreply.github.com> Date: Tue, 9 Apr 2024 09:58:18 +0100 Subject: [PATCH 1/8] EDEV-102: Increase Platform.sh DB size (#1603) --- .platform/services.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.platform/services.yaml b/.platform/services.yaml index b0471fe69..5bace5561 100644 --- a/.platform/services.yaml +++ b/.platform/services.yaml @@ -1,6 +1,6 @@ db: type: postgresql:12 - disk: 256 + disk: 512 redis: type: redis:6.0 From db03529e382d51aa4f91373cee0b16a3a53fc87a Mon Sep 17 00:00:00 2001 From: Andrew Hosgood Date: Wed, 10 Apr 2024 11:01:31 +0100 Subject: [PATCH 2/8] Don't delete untagged images --- .github/workflows/remove-untagged.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/remove-untagged.yml b/.github/workflows/remove-untagged.yml index 1d3332f03..afcd500ba 100644 --- a/.github/workflows/remove-untagged.yml +++ b/.github/workflows/remove-untagged.yml @@ -2,8 +2,8 @@ name: Remove untagged container images on: workflow_dispatch: - schedule: - - cron: "0 3 * * 1" + # schedule: + # - cron: "0 3 * * 1" jobs: remove-untagged: @@ -23,7 +23,7 @@ jobs: if (version.metadata.container.tags.length == 0 && version.name !== "latest" && version.name !== "preview") { console.log("Delete " + version.id) - const deleteResponse = await github.request("DELETE /orgs/${{ github.repository_owner }}/packages/container/${{ vars.DOCKER_IMAGE_NAME }}/versions/" + version.id, { }); - console.log("status " + deleteResponse.status) + // const deleteResponse = await github.request("DELETE /orgs/${{ github.repository_owner }}/packages/container/${{ vars.DOCKER_IMAGE_NAME }}/versions/" + version.id, { }); + // console.log("status " + deleteResponse.status) } } \ No newline at end of file From 97bd225ff66ca2eba3fde813d800876f36c09ab3 Mon Sep 17 00:00:00 2001 From: TNA-Allan <96120886+TNA-Allan@users.noreply.github.com> Date: Wed, 10 Apr 2024 15:24:02 +0100 Subject: [PATCH 3/8] Feature/EDEV-98: updates for strip html (#1604) --- etna/ciim/tests/test_utils.py | 43 +++++++++++++++++++++++++++++++++++ etna/ciim/utils.py | 39 +++++++++++++++++++++++-------- 2 files changed, 72 insertions(+), 10 deletions(-) diff --git a/etna/ciim/tests/test_utils.py b/etna/ciim/tests/test_utils.py index 6ed7251cc..66e5de33b 100644 --- a/etna/ciim/tests/test_utils.py +++ b/etna/ciim/tests/test_utils.py @@ -10,6 +10,7 @@ find_all, format_description_markup, pluck, + strip_html, ) @@ -370,3 +371,45 @@ def test_index_is_zero_for_non_int_sort_key(self): index = convert_sort_key_to_index(sort) self.assertEqual(index, 0) + + +class TestStripHtml(SimpleTestCase): + + def test_ensure_spaces_preserve_marks(self): + + test_data = ( + ( + "test for span tag", + "This is atest example", + "This is a test example", + ), + ( + "test for p tag", + "This is a

test example

", + "This is a test example", + ), + ( + "test for unknown tag", + "This is atest example", + "This is atest example", + ), + ( + "D7376859", + 'PatmanClifford DouglasArmament Quarter Master Serjeant1865334Royal Army Ordnance Corps, 8 Hussars now Royal Electrical and Mechanical EngineersEscape and EvasionMentions in Despatches', + "Patman Clifford Douglas Armament Quarter Master Serjeant 1865334 Royal Army Ordnance Corps, 8 Hussars now Royal Electrical and Mechanical Engineers Escape and Evasion Mentions in Despatches", + ), + ) + + for label, value, expected in test_data: + with self.subTest(label): + result = strip_html(value, preserve_marks=True, ensure_spaces=True) + self.assertEqual(result, expected) + + def test_allow_tags(self): + value = """this is a test""" + expected = ( + """this is a test""" + ) + allow_tags = {"a", "br", "p"} + result = strip_html(value, allow_tags=allow_tags) + self.assertEqual(result, expected) diff --git a/etna/ciim/utils.py b/etna/ciim/utils.py index 017e2881b..8458799aa 100644 --- a/etna/ciim/utils.py +++ b/etna/ciim/utils.py @@ -257,20 +257,39 @@ def format_link(link_html: str) -> Dict[str, str]: return {"href": href, "id": id, "text": document.text()} -def strip_html(value: str, *, preserve_marks, ensure_spaces): +def strip_html( + value: str, + *, + preserve_marks: bool = False, + ensure_spaces: bool = False, + allow_tags: Optional[set] = None, +) -> str: """ Temporary HTML sanitiser to remove unwanted tags from data. - K-int will eventually sanitise this at API level. - preserve_marks=True will keep tags in the output, otherwise they are removed. - - Replacing and

tags is necessary to prevent "bunched" data, - "This is atestexample" will return as "This is atestexample" - without the placement of the space. + TODO:this will eventually be sanitised at API level. + + value: + the value to be sanitised + preserver_marks: + allow pre-defined tags for styling + ensure_spaces: + allow pre-defined tags and replaces them with whitespace + allow_tags: + sets the tags that are allowed """ clean_tags = {"span", "p"} if ensure_spaces else set() - clean_html = nh3.clean( - value, tags={*clean_tags, "mark"} if preserve_marks else clean_tags - ) + + if allow_tags is None: + allow_tags = set() + + tags = set() + if preserve_marks: + tags.add("mark") + tags.update(clean_tags) + tags.update(allow_tags) + + clean_html = nh3.clean(value, tags=tags) + for tag in clean_tags: opening_regex = rf"<{tag}[^>]*>" closing_regex = rf"" From 34fddbacf1962ae597dbe496abbf9b70557de58b Mon Sep 17 00:00:00 2001 From: jamesbiggs Date: Thu, 11 Apr 2024 14:16:14 +0100 Subject: [PATCH 4/8] Added CSRF_TRUSTED_ORIGINS for AWS CSRF validation --- config/settings/base.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/config/settings/base.py b/config/settings/base.py index 477a87b07..4d4d3e05f 100644 --- a/config/settings/base.py +++ b/config/settings/base.py @@ -290,6 +290,8 @@ "WAGTAILADMIN_BASE_URL", "https://nationalarchives.gov.uk" ) +CSRF_TRUSTED_ORIGINS = [WAGTAILADMIN_BASE_URL] + # For search results within Wagtail itself WAGTAILSEARCH_BACKENDS = { "default": { @@ -432,4 +434,4 @@ ) FEATURE_DISABLE_JS_WHATS_ON_LISTING = strtobool( os.getenv("FEATURE_DISABLE_JS_WHATS_ON_LISTING", "False") -) +) \ No newline at end of file From 761c1814d17e0439e4cfeadae68a6e8915179715 Mon Sep 17 00:00:00 2001 From: jamesbiggs Date: Thu, 11 Apr 2024 14:18:57 +0100 Subject: [PATCH 5/8] base.py blankline... --- config/settings/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config/settings/base.py b/config/settings/base.py index 4d4d3e05f..e162bf35e 100644 --- a/config/settings/base.py +++ b/config/settings/base.py @@ -434,4 +434,4 @@ ) FEATURE_DISABLE_JS_WHATS_ON_LISTING = strtobool( os.getenv("FEATURE_DISABLE_JS_WHATS_ON_LISTING", "False") -) \ No newline at end of file +) From 1d6f67b701cfbfc7074918203a9c1bb23e453802 Mon Sep 17 00:00:00 2001 From: jamesbiggs Date: Thu, 11 Apr 2024 14:28:10 +0100 Subject: [PATCH 6/8] getenv CSRF_TRUSTED_ORIGIN --- config/settings/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config/settings/base.py b/config/settings/base.py index e162bf35e..68bd71fc8 100644 --- a/config/settings/base.py +++ b/config/settings/base.py @@ -290,7 +290,7 @@ "WAGTAILADMIN_BASE_URL", "https://nationalarchives.gov.uk" ) -CSRF_TRUSTED_ORIGINS = [WAGTAILADMIN_BASE_URL] +CSRF_TRUSTED_ORIGINS = [os.getenv("CSRF_TRUSTED_ORIGIN", "nationalarchives.gov.uk")] # For search results within Wagtail itself WAGTAILSEARCH_BACKENDS = { From 8439f3af41963ef0d20d8e26fb6efc3c0a230149 Mon Sep 17 00:00:00 2001 From: jamesbiggs Date: Thu, 11 Apr 2024 14:37:09 +0100 Subject: [PATCH 7/8] Formatting CSRF getenv --- config/settings/base.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/config/settings/base.py b/config/settings/base.py index 68bd71fc8..95d4ac20d 100644 --- a/config/settings/base.py +++ b/config/settings/base.py @@ -290,7 +290,9 @@ "WAGTAILADMIN_BASE_URL", "https://nationalarchives.gov.uk" ) -CSRF_TRUSTED_ORIGINS = [os.getenv("CSRF_TRUSTED_ORIGIN", "nationalarchives.gov.uk")] +CSRF_TRUSTED_ORIGINS = [ + os.getenv("CSRF_TRUSTED_ORIGIN", "https://nationalarchives.gov.uk") +] # For search results within Wagtail itself WAGTAILSEARCH_BACKENDS = { From 8c610e3a9bbe04cb98c7350abae8237f84b93169 Mon Sep 17 00:00:00 2001 From: GitHub Action Date: Tue, 16 Apr 2024 08:54:41 +0000 Subject: [PATCH 8/8] Release 24.04.16.32 --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index dfd94c562..b28956cd1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "Etna" -version = "24.04.03.31" +version = "24.04.16.32" description = "" authors = ["James Biggs "]