From a12119b394873eab1e235ac3aeeee35787bf99e6 Mon Sep 17 00:00:00 2001 From: Anne van Kesteren Date: Sat, 7 Mar 2020 12:20:33 +0100 Subject: [PATCH 1/6] Use CommonMark in convert-policy.py This fixes a few subtle issues and is a pre-requisite for fixing more. --- .travis.yml | 2 +- convert-policy.py | 35 ++++++++++++++++++----------------- 2 files changed, 19 insertions(+), 18 deletions(-) diff --git a/.travis.yml b/.travis.yml index 5d1c16bae..4b036aab7 100644 --- a/.travis.yml +++ b/.travis.yml @@ -9,7 +9,7 @@ env: - SERVER_PUBLIC_KEY="ecdsa-sha2-nistp256 AAAAE2VjZHNhLXNoYTItbmlzdHAyNTYAAAAIbmlzdHAyNTYAAABBBDt6Igtp73aTOYXuFb8qLtgs80wWF6cNi3/AItpWAMpX3PymUw7stU7Pi+IoBJz21nfgmxaKp3gfSe2DPNt06l8=" install: - - pip install markdown==2.6.11 py-gfm==0.1.4 + - pip install commonmark==0.9.1 - npm install script: - shellcheck deploy.sh diff --git a/convert-policy.py b/convert-policy.py index d02b6c7d8..e4cc4f546 100755 --- a/convert-policy.py +++ b/convert-policy.py @@ -1,8 +1,8 @@ #!/usr/bin/env python +# -*- coding: UTF-8 -*- import codecs -import markdown -from mdx_partial_gfm import PartialGithubFlavoredMarkdownExtension +import commonmark import re @@ -35,13 +35,12 @@ def header_text_to_id(header_text): def add_one_header_anchor(line): - header_text = line.lstrip('#') - header_level = len(line) - len(header_text) - - if header_level <= 2: + search = re.search(r'(.+)', line) + if not search: return line - header_text = header_text.lstrip(' ') + header_level = search.group(1) + header_text = search.group(2) header_id = header_text_to_id(header_text) return '{2}'.format(header_level, header_id, header_text) @@ -55,10 +54,6 @@ def rewrite_defs(policy_markdown): return re.sub(r']*)>[*][*]([^*]*)[*][*]', '\\2', policy_markdown) -def fix_nested_lists(policy_markdown): - return re.sub(r'^ 1[.]', ' 1.', policy_markdown, flags=re.MULTILINE) - - def avoid_link_false_positives(policy_markdown): return re.sub(r'[]] [(]', '] \\(', policy_markdown) @@ -66,14 +61,21 @@ def avoid_link_false_positives(policy_markdown): def preprocess_markdown(policy_markdown, mapping_pairs): result = lower_headers(policy_markdown) result = apply_link_mapping(result, mapping_pairs) - result = add_header_anchors(result) result = rewrite_defs(result) - result = fix_nested_lists(result) result = avoid_link_false_positives(result) return result +def postprocess_html(policy_html, template, title): + result = policy_html.replace(""", "\"") + result = template.replace("@POLICY_GOES_HERE@", result) + result = result.replace("@TITLE_GOES_HERE@", title) + result = add_header_anchors(result) + + return result + + def markdown_title(policy_markdown): for line in policy_markdown.split('\n'): if line.startswith("# WHATWG "): @@ -97,11 +99,10 @@ def main(): (title, policy_markdown) = markdown_title(policy_markdown) preprocessed_policy_markdown = preprocess_markdown(policy_markdown, link_mapping_pairs) - policy_html = markdown.markdown(preprocessed_policy_markdown, extensions=[PartialGithubFlavoredMarkdownExtension()]) + policy_html = commonmark.commonmark(preprocessed_policy_markdown) - final_policy_html = template.replace("@POLICY_GOES_HERE@", policy_html) - final_policy_html = final_policy_html.replace("@TITLE_GOES_HERE@", title) + postprocessed_policy_html = postprocess_html(policy_html, template, title) - codecs.open("whatwg.org/" + link, "w", encoding="utf-8").write(final_policy_html) + codecs.open("whatwg.org/" + link, "w", encoding="utf-8").write(postprocessed_policy_html) main() From cd9c675f677bdc450342ef1dc76b8654e9c31345 Mon Sep 17 00:00:00 2001 From: Anne van Kesteren Date: Sat, 7 Mar 2020 12:40:06 +0100 Subject: [PATCH 2/6] test python3 --- .travis.yml | 2 -- 1 file changed, 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index 4b036aab7..64785bff2 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,6 +1,4 @@ language: python -python: - - "2.7" env: global: From 5c28a9a56bdc2d554ec1528fcdbb654b0b1ed0c0 Mon Sep 17 00:00:00 2001 From: Anne van Kesteren Date: Mon, 9 Mar 2020 18:06:52 +0100 Subject: [PATCH 3/6] remove avoid_link_false_positives too --- convert-policy.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/convert-policy.py b/convert-policy.py index e4cc4f546..671cfcddc 100755 --- a/convert-policy.py +++ b/convert-policy.py @@ -54,15 +54,10 @@ def rewrite_defs(policy_markdown): return re.sub(r']*)>[*][*]([^*]*)[*][*]', '\\2', policy_markdown) -def avoid_link_false_positives(policy_markdown): - return re.sub(r'[]] [(]', '] \\(', policy_markdown) - - def preprocess_markdown(policy_markdown, mapping_pairs): result = lower_headers(policy_markdown) result = apply_link_mapping(result, mapping_pairs) result = rewrite_defs(result) - result = avoid_link_false_positives(result) return result From f413867bce100dcacce1e5a5d37fdd64880a0545 Mon Sep 17 00:00:00 2001 From: Anne van Kesteren Date: Mon, 9 Mar 2020 20:35:44 +0100 Subject: [PATCH 4/6] review nits --- convert-policy.py | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/convert-policy.py b/convert-policy.py index 671cfcddc..59910236a 100755 --- a/convert-policy.py +++ b/convert-policy.py @@ -1,7 +1,5 @@ #!/usr/bin/env python -# -*- coding: UTF-8 -*- -import codecs import commonmark import re @@ -26,6 +24,7 @@ def ascii_lower(str): def header_text_to_id(header_text): + header_text = header_text.replace(""", "") punctuation_regexp = r'[^\w\- ]' header_id = ascii_lower(header_text) header_id = re.sub(punctuation_regexp, '', header_id) @@ -63,8 +62,7 @@ def preprocess_markdown(policy_markdown, mapping_pairs): def postprocess_html(policy_html, template, title): - result = policy_html.replace(""", "\"") - result = template.replace("@POLICY_GOES_HERE@", result) + result = template.replace("@POLICY_GOES_HERE@", policy_html) result = result.replace("@TITLE_GOES_HERE@", title) result = add_header_anchors(result) @@ -83,13 +81,13 @@ def markdown_title(policy_markdown): def main(): - link_mapping_pairs = parse_link_mapping(codecs.open("sg/policy-link-mapping.txt", "r", encoding="utf-8").read()) - template = codecs.open("policy-template.html", "r", encoding="utf-8").read() + link_mapping_pairs = parse_link_mapping(open("sg/policy-link-mapping.txt", "r", encoding="utf-8").read()) + template = open("policy-template.html", "r", encoding="utf-8").read() for resource, link in link_mapping_pairs: if link.startswith("https:"): continue - policy_markdown = codecs.open("sg" + resource[1:].replace("%20", " "), "r", encoding="utf-8").read() + policy_markdown = open("sg" + resource[1:].replace("%20", " "), "r", encoding="utf-8").read() (title, policy_markdown) = markdown_title(policy_markdown) preprocessed_policy_markdown = preprocess_markdown(policy_markdown, link_mapping_pairs) @@ -98,6 +96,6 @@ def main(): postprocessed_policy_html = postprocess_html(policy_html, template, title) - codecs.open("whatwg.org/" + link, "w", encoding="utf-8").write(postprocessed_policy_html) + open("whatwg.org/" + link, "w", encoding="utf-8").write(postprocessed_policy_html) main() From e64ccec61bf21870f071b18028f97010d26cdb3e Mon Sep 17 00:00:00 2001 From: Anne van Kesteren Date: Mon, 9 Mar 2020 20:39:47 +0100 Subject: [PATCH 5/6] merge header adjusting see https://github.com/whatwg/whatwg.org/pull/294/commits/50164262ff87cf3c17e66fb739664104dc8d6662 --- convert-policy.py | 21 ++++++++------------- 1 file changed, 8 insertions(+), 13 deletions(-) diff --git a/convert-policy.py b/convert-policy.py index 59910236a..20eac4c2e 100755 --- a/convert-policy.py +++ b/convert-policy.py @@ -4,10 +4,6 @@ import re -def lower_headers(policy_markdown): - return re.sub(r'^#', '##', policy_markdown, flags=re.MULTILINE) - - def parse_link_mapping(link_mapping): return [line.split('=',1) for line in link_mapping.split("\n") if len(line) > 0] @@ -33,20 +29,20 @@ def header_text_to_id(header_text): return header_id -def add_one_header_anchor(line): - search = re.search(r'(.+)', line) +def adjust_header(line): + search = re.search(r'(.+)', line) if not search: return line - header_level = search.group(1) + header_level = str(int(search.group(1) + 1)) header_text = search.group(2) header_id = header_text_to_id(header_text) return '{2}'.format(header_level, header_id, header_text) -def add_header_anchors(policy_markdown): - return str.join('\n', [add_one_header_anchor(line) for line in policy_markdown.split('\n')]) +def adjust_headers(policy_markdown): + return str.join('\n', [adjust_header(line) for line in policy_markdown.split('\n')]) def rewrite_defs(policy_markdown): @@ -54,17 +50,16 @@ def rewrite_defs(policy_markdown): def preprocess_markdown(policy_markdown, mapping_pairs): - result = lower_headers(policy_markdown) - result = apply_link_mapping(result, mapping_pairs) + result = apply_link_mapping(policy_markdown, mapping_pairs) result = rewrite_defs(result) return result def postprocess_html(policy_html, template, title): - result = template.replace("@POLICY_GOES_HERE@", policy_html) + result = adjust_headers(policy_html) + result = template.replace("@POLICY_GOES_HERE@", result) result = result.replace("@TITLE_GOES_HERE@", title) - result = add_header_anchors(result) return result From dfb2b6d031b23b76bca460b58b0b0666eaed0a47 Mon Sep 17 00:00:00 2001 From: Anne van Kesteren Date: Mon, 9 Mar 2020 20:43:11 +0100 Subject: [PATCH 6/6] oops --- convert-policy.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/convert-policy.py b/convert-policy.py index 20eac4c2e..174d506b4 100755 --- a/convert-policy.py +++ b/convert-policy.py @@ -34,7 +34,7 @@ def adjust_header(line): if not search: return line - header_level = str(int(search.group(1) + 1)) + header_level = str(int(search.group(1)) + 1) header_text = search.group(2) header_id = header_text_to_id(header_text)