diff --git a/.github/workflows/remove-untagged.yml b/.github/workflows/remove-untagged.yml index 1d3332f03..afcd500ba 100644 --- a/.github/workflows/remove-untagged.yml +++ b/.github/workflows/remove-untagged.yml @@ -2,8 +2,8 @@ name: Remove untagged container images on: workflow_dispatch: - schedule: - - cron: "0 3 * * 1" + # schedule: + # - cron: "0 3 * * 1" jobs: remove-untagged: @@ -23,7 +23,7 @@ jobs: if (version.metadata.container.tags.length == 0 && version.name !== "latest" && version.name !== "preview") { console.log("Delete " + version.id) - const deleteResponse = await github.request("DELETE /orgs/${{ github.repository_owner }}/packages/container/${{ vars.DOCKER_IMAGE_NAME }}/versions/" + version.id, { }); - console.log("status " + deleteResponse.status) + // const deleteResponse = await github.request("DELETE /orgs/${{ github.repository_owner }}/packages/container/${{ vars.DOCKER_IMAGE_NAME }}/versions/" + version.id, { }); + // console.log("status " + deleteResponse.status) } } \ No newline at end of file diff --git a/.platform/services.yaml b/.platform/services.yaml index b0471fe69..5bace5561 100644 --- a/.platform/services.yaml +++ b/.platform/services.yaml @@ -1,6 +1,6 @@ db: type: postgresql:12 - disk: 256 + disk: 512 redis: type: redis:6.0 diff --git a/config/settings/base.py b/config/settings/base.py index 477a87b07..95d4ac20d 100644 --- a/config/settings/base.py +++ b/config/settings/base.py @@ -290,6 +290,10 @@ "WAGTAILADMIN_BASE_URL", "https://nationalarchives.gov.uk" ) +CSRF_TRUSTED_ORIGINS = [ + os.getenv("CSRF_TRUSTED_ORIGIN", "https://nationalarchives.gov.uk") +] + # For search results within Wagtail itself WAGTAILSEARCH_BACKENDS = { "default": { diff --git a/etna/ciim/tests/test_utils.py b/etna/ciim/tests/test_utils.py index 6ed7251cc..66e5de33b 100644 --- a/etna/ciim/tests/test_utils.py +++ b/etna/ciim/tests/test_utils.py @@ -10,6 +10,7 @@ find_all, format_description_markup, pluck, + strip_html, ) @@ -370,3 +371,45 @@ def test_index_is_zero_for_non_int_sort_key(self): index = convert_sort_key_to_index(sort) self.assertEqual(index, 0) + + +class TestStripHtml(SimpleTestCase): + + def test_ensure_spaces_preserve_marks(self): + + test_data = ( + ( + "test for span tag", + "This is atest example", + "This is a test example", + ), + ( + "test for p tag", + "This is a

test example

", + "This is a test example", + ), + ( + "test for unknown tag", + "This is atest example", + "This is atest example", + ), + ( + "D7376859", + 'PatmanClifford DouglasArmament Quarter Master Serjeant1865334Royal Army Ordnance Corps, 8 Hussars now Royal Electrical and Mechanical EngineersEscape and EvasionMentions in Despatches', + "Patman Clifford Douglas Armament Quarter Master Serjeant 1865334 Royal Army Ordnance Corps, 8 Hussars now Royal Electrical and Mechanical Engineers Escape and Evasion Mentions in Despatches", + ), + ) + + for label, value, expected in test_data: + with self.subTest(label): + result = strip_html(value, preserve_marks=True, ensure_spaces=True) + self.assertEqual(result, expected) + + def test_allow_tags(self): + value = """this is a test""" + expected = ( + """this is a test""" + ) + allow_tags = {"a", "br", "p"} + result = strip_html(value, allow_tags=allow_tags) + self.assertEqual(result, expected) diff --git a/etna/ciim/utils.py b/etna/ciim/utils.py index 017e2881b..8458799aa 100644 --- a/etna/ciim/utils.py +++ b/etna/ciim/utils.py @@ -257,20 +257,39 @@ def format_link(link_html: str) -> Dict[str, str]: return {"href": href, "id": id, "text": document.text()} -def strip_html(value: str, *, preserve_marks, ensure_spaces): +def strip_html( + value: str, + *, + preserve_marks: bool = False, + ensure_spaces: bool = False, + allow_tags: Optional[set] = None, +) -> str: """ Temporary HTML sanitiser to remove unwanted tags from data. - K-int will eventually sanitise this at API level. - preserve_marks=True will keep tags in the output, otherwise they are removed. - - Replacing and

tags is necessary to prevent "bunched" data, - "This is atestexample" will return as "This is atestexample" - without the placement of the space. + TODO:this will eventually be sanitised at API level. + + value: + the value to be sanitised + preserver_marks: + allow pre-defined tags for styling + ensure_spaces: + allow pre-defined tags and replaces them with whitespace + allow_tags: + sets the tags that are allowed """ clean_tags = {"span", "p"} if ensure_spaces else set() - clean_html = nh3.clean( - value, tags={*clean_tags, "mark"} if preserve_marks else clean_tags - ) + + if allow_tags is None: + allow_tags = set() + + tags = set() + if preserve_marks: + tags.add("mark") + tags.update(clean_tags) + tags.update(allow_tags) + + clean_html = nh3.clean(value, tags=tags) + for tag in clean_tags: opening_regex = rf"<{tag}[^>]*>" closing_regex = rf"" diff --git a/pyproject.toml b/pyproject.toml index dfd94c562..b28956cd1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "Etna" -version = "24.04.03.31" +version = "24.04.16.32" description = "" authors = ["James Biggs "]