Skip to content

Commit

Permalink
Merge branch 'develop' into get-api-data-without-errors
Browse files Browse the repository at this point in the history
  • Loading branch information
jamesbiggs committed Apr 16, 2024
2 parents 792fe32 + 645b8fc commit 34dd3a0
Show file tree
Hide file tree
Showing 6 changed files with 82 additions and 16 deletions.
8 changes: 4 additions & 4 deletions .github/workflows/remove-untagged.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@ name: Remove untagged container images

on:
workflow_dispatch:
schedule:
- cron: "0 3 * * 1"
# schedule:
# - cron: "0 3 * * 1"

jobs:
remove-untagged:
Expand All @@ -23,7 +23,7 @@ jobs:
if (version.metadata.container.tags.length == 0 && version.name !== "latest" && version.name !== "preview") {
console.log("Delete " + version.id)
const deleteResponse = await github.request("DELETE /orgs/${{ github.repository_owner }}/packages/container/${{ vars.DOCKER_IMAGE_NAME }}/versions/" + version.id, { });
console.log("status " + deleteResponse.status)
// const deleteResponse = await github.request("DELETE /orgs/${{ github.repository_owner }}/packages/container/${{ vars.DOCKER_IMAGE_NAME }}/versions/" + version.id, { });
// console.log("status " + deleteResponse.status)
}
}
2 changes: 1 addition & 1 deletion .platform/services.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
db:
type: postgresql:12
disk: 256
disk: 512

redis:
type: redis:6.0
4 changes: 4 additions & 0 deletions config/settings/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -290,6 +290,10 @@
"WAGTAILADMIN_BASE_URL", "https://nationalarchives.gov.uk"
)

CSRF_TRUSTED_ORIGINS = [
os.getenv("CSRF_TRUSTED_ORIGIN", "https://nationalarchives.gov.uk")
]

# For search results within Wagtail itself
WAGTAILSEARCH_BACKENDS = {
"default": {
Expand Down
43 changes: 43 additions & 0 deletions etna/ciim/tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
find_all,
format_description_markup,
pluck,
strip_html,
)


Expand Down Expand Up @@ -370,3 +371,45 @@ def test_index_is_zero_for_non_int_sort_key(self):
index = convert_sort_key_to_index(sort)

self.assertEqual(index, 0)


class TestStripHtml(SimpleTestCase):

def test_ensure_spaces_preserve_marks(self):

test_data = (
(
"test for span tag",
"This is a<span>test example</span>",
"This is a test example",
),
(
"test for p tag",
"This is a<p>test example</p>",
"This is a test example",
),
(
"test for unknown tag",
"This is a<unknown>test example</unknown>",
"This is atest example",
),
(
"D7376859",
'<span class="wrapper"><span altrender="doctype" class="emph"></span><span class="persname"><span altrender="surname" class="emph">Patman</span><span altrender="forenames" class="emph">Clifford Douglas</span></span><span altrender="rank" class="emph">Armament Quarter Master Serjeant</span><span altrender="regno" class="emph">1865334</span><span class="corpname">Royal Army Ordnance Corps, 8 Hussars now Royal Electrical and Mechanical Engineers</span><span class="geogname">Escape and Evasion</span><span altrender="award" class="emph">Mentions in Despatches</span></span>',
"Patman Clifford Douglas Armament Quarter Master Serjeant 1865334 Royal Army Ordnance Corps, 8 Hussars now Royal Electrical and Mechanical Engineers Escape and Evasion Mentions in Despatches",
),
)

for label, value, expected in test_data:
with self.subTest(label):
result = strip_html(value, preserve_marks=True, ensure_spaces=True)
self.assertEqual(result, expected)

def test_allow_tags(self):
value = """<a href="http://test.com">this is a test</a>"""
expected = (
"""<a href="http://test.com" rel="noopener noreferrer">this is a test</a>"""
)
allow_tags = {"a", "br", "p"}
result = strip_html(value, allow_tags=allow_tags)
self.assertEqual(result, expected)
39 changes: 29 additions & 10 deletions etna/ciim/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -257,20 +257,39 @@ def format_link(link_html: str) -> Dict[str, str]:
return {"href": href, "id": id, "text": document.text()}


def strip_html(value: str, *, preserve_marks, ensure_spaces):
def strip_html(
value: str,
*,
preserve_marks: bool = False,
ensure_spaces: bool = False,
allow_tags: Optional[set] = None,
) -> str:
"""
Temporary HTML sanitiser to remove unwanted tags from data.
K-int will eventually sanitise this at API level.
preserve_marks=True will keep <mark> tags in the output, otherwise they are removed.
Replacing <span> and <p> tags is necessary to prevent "bunched" data,
"This is a<span>test</span>example" will return as "This is atestexample"
without the placement of the space.
TODO:this will eventually be sanitised at API level.
value:
the value to be sanitised
preserver_marks:
allow pre-defined tags for styling
ensure_spaces:
allow pre-defined tags and replaces them with whitespace
allow_tags:
sets the tags that are allowed
"""
clean_tags = {"span", "p"} if ensure_spaces else set()
clean_html = nh3.clean(
value, tags={*clean_tags, "mark"} if preserve_marks else clean_tags
)

if allow_tags is None:
allow_tags = set()

tags = set()
if preserve_marks:
tags.add("mark")
tags.update(clean_tags)
tags.update(allow_tags)

clean_html = nh3.clean(value, tags=tags)

for tag in clean_tags:
opening_regex = rf"<{tag}[^>]*>"
closing_regex = rf"</{tag}>"
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "Etna"
version = "24.04.03.31"
version = "24.04.16.32"
description = ""
authors = ["James Biggs <[email protected]>"]

Expand Down

0 comments on commit 34dd3a0

Please sign in to comment.