add script to create json and update requirements

WDscholia · Sep 14, 2021 · 0b99ac7 · 0b99ac7
1 parent 92f1a24
commit 0b99ac7
Show file tree

Hide file tree

Showing 2 changed files with 273 additions and 1 deletion.
diff --git a/i18n_create_json.py b/i18n_create_json.py
@@ -0,0 +1,268 @@
+"""
+Create json file for internationalitation process.
+
+Script to create json file using data-i18n attributes
+inside html files for internationalitation process
+"""
+
+import json
+import click
+import re
+import glob
+from pathlib import Path
+from bs4 import BeautifulSoup
+from tabulate import tabulate
+from colorama import Fore, Style
+from collections import OrderedDict, Counter
+
+
+def read_file(filename):
+    """Read a file an return its content as string."""
+    with open(filename, "r") as fstream:
+        content = fstream.read()
+    return content
+
+
+def write_file(filename, content):
+    """Write into a file."""
+    with open(filename, "w") as f:
+        f.write(content)
+
+
+def read_json(filename):
+    """Read a json file an return its content as dict."""
+    with open(filename, "r") as f:
+        content = json.loads(f.read())
+    return content
+
+
+def write_json(filename, data):
+    """Write dict as json file."""
+    with open(filename, "w") as f:
+        f.write(json.dumps(data, indent=4))
+
+
+def show_table(datadict, color, fmt="pretty"):
+    """Show dict as a table with tabulate with color."""
+    print(color)
+    print(tabulate(datadict, headers="keys", tablefmt=fmt))
+    print(Style.RESET_ALL)
+
+
+def print_info(datadict, color, title=''):
+    """Show dict info as a table with tabulate with color."""
+    print(color)
+    print(f"+{title:-^60s}+")
+    print(json.dumps(datadict, indent=4))
+    print(f"+{'':-^60s}+")
+    print(Style.RESET_ALL)
+
+
+def create_json_i18n(filename, json_content, verbose=False):
+    """Create the dict/json i18n from html content.
+
+    Search for data-i18n attribute inside html content
+    and generate/update the json file following banana format
+    """
+    content = read_file(filename)
+    soup = BeautifulSoup(content, 'html.parser')
+    matches = soup.find_all([], {"data-i18n": True})
+    oldfields = set(json_content)
+
+    for tag in matches:
+        if not json_content.get(tag.get("data-i18n"), None):
+            json_content.update({f"{tag.get('data-i18n')}": ''})
+
+    newfields = {
+        key
+        for key in json_content.keys() if key not in oldfields
+    }
+    if verbose:
+        show_table({
+            "file": [Path(filename).name],
+            "existing fields": oldfields,
+            "new fields": newfields
+        }, Fore.YELLOW)
+
+
+def normalize(name):
+    """Allow click to use command with underscore."""
+    return name.replace("_", "-")
+
+
+@click.group(context_settings={"token_normalize_func": normalize})
+def cli():
+    """Create/update json for internationalitation.
+
+    This program allows you to create or update a <lang>.json file
+    for an internationalitation process, using the banana format. You
+    can look for the data-i18n atrribute in one or several html files at time
+    and therefore, extract them and create/update the json file. Also you
+    can check if there are attributes duplicated in html files before
+    put in json
+    file.
+
+    To show help for specific command, you can run:
+
+    python i18n_create_json.py COMMAND --help
+    """
+    pass
+
+
+@cli.command()
+@click.option('-f', "--file", help="to pass the html file which it will \
+be scanned")
+@click.option('--output', '-o', help="To give the name of the output json")
+@click.option('-i', "--inplace", is_flag=True, help="To create/update the file. Without \
+this option, the command is executed in a dry-run mode")
+@click.option('-v', "--verbose", is_flag=True, help="To show more detailed information \
+about the process")
+def onefile(**kwargs):
+    """To search all data-i18n attributes inside one html file.
+
+    This command allows you look for all data-i18n attributes inside one html
+    file passed by command line with the option -f/--file and create or update
+    a json file with these attributes following the banana format.
+
+
+    How to use:
+
+    1. To execute in dry-run mode
+
+       $ python i18n_create_json.py onefile --file="path/to/file.html"
+        -o path/to/output.json
+
+
+    2. To execute and replace in-place
+
+       $ python i18n_create_json.py onefile --file="path/to/file.html"
+        -o path/to/output.json -i/--inplace
+
+    """
+    filename = kwargs['file']
+    trfile_content = {}
+    verbose = kwargs["verbose"]
+
+    outfile = Path(filename).parent.parent / "static/i18n" / kwargs['output']
+
+    if outfile.exists():
+        trfile_content = read_json(outfile)
+
+    metadata = {"@metadata": trfile_content.pop("@metadata", None)}
+    create_json_i18n(filename, trfile_content, verbose)
+    trfile_content = {
+        **metadata,
+        **OrderedDict(sorted(trfile_content.items()))
+    }
+
+    if not kwargs["inplace"]:
+        print_info(
+            trfile_content,
+            Fore.LIGHTGREEN_EX,
+            title=f"New content for {kwargs['output']}"
+        )
+    else:
+        write_json(outfile, trfile_content)
+
+
+@cli.command()
+@click.option("--pattern", help="To pass the html files using unix wildcards")
+@click.option('--output', '-o', help="To give the name of the output json")
+@click.option('-i', "--inplace", is_flag=True, help="To create/update the file. \
+Without this option, the command is executed in a dry-run mode")
+@click.option('-v', "--verbose", is_flag=True, help="To show more detailed information \
+about the process")
+def severalfiles(**kwargs):
+    """To search all data-i18n attributes inside several html files.
+
+    This command allows you look for all data-i18n attributes inside several
+    html files passed by command line with the option -p/--pattern as a
+    pattern. You can use the bash wildcards. With this pattern, you can create
+    or update the json file with these attributes following the banana format.
+
+
+    How to use:
+
+    1. To execute in dry-run mode
+
+       $ python i18n_create_json.py severalfiles --pattern="path/to/file*.html"
+        -o path/to/output.json
+
+
+    2. To execute and replace in-place
+
+       $ python i18n_create_json.py everalfiles --pattern="path/to/file*.html"
+       -o path/to/output.json -i/--inplace
+
+    """
+    pattern = kwargs['pattern']
+    verbose = kwargs["verbose"]
+
+    trfile_content = {}
+    outfile = Path(pattern).parent.parent / "static/i18n" / kwargs['output']
+
+    if outfile.exists():
+        trfile_content = read_json(outfile)
+
+    metadata = {"@metadata": trfile_content.pop("@metadata", None)}
+    files = glob.glob(pattern)
+    for file in files:
+        create_json_i18n(file, trfile_content, verbose)
+
+    trfile_content = {
+        **metadata,
+        **OrderedDict(sorted(trfile_content.items()))
+    }
+
+    if not kwargs["inplace"]:
+        print_info(
+            trfile_content,
+            Fore.LIGHTGREEN_EX,
+            title=f"New content for {kwargs['output']}"
+        )
+    else:
+        write_json(outfile, trfile_content)
+
+
+@cli.command()
+@click.option('--path', required=True, help="To pass the html files using \
+unix wildcards")
+def check_duplicates(**kwargs):
+    """To look for data-i18n attributes duplicated.
+
+    This command allows you look for all duplicated data-i18n attributes
+    inside several html files passed by command line with the option
+    --path as a pattern.
+
+    How to use:
+
+    1. To show duplicated data-i18n attributes
+
+       $ python i18n_create_json.py check_duplicates
+       --path="path/to/file*.html"
+
+    """
+    path = kwargs["path"]
+    files = glob.glob(path)
+    rx = re.compile(r'(data-i18n\b=\"([^"]*)\")')
+    content = []
+    for file in files:
+        string = read_file(file)
+        matches = rx.finditer(string)
+        for match in matches:
+            content.append(match.group(2))
+
+    duplicates = [key for key, val in Counter(content).items() if val > 1]
+
+    show_table(
+        {
+            "KEY DUPLICATES": duplicates if duplicates
+            else ["There are not duplicated keys"]
+        },
+        color=Fore.LIGHTRED_EX,
+        fmt="simple"
+    )
+
+
+if __name__ == '__main__':
+    cli()
diff --git a/requirements.txt b/requirements.txt
@@ -8,4 +8,8 @@ requests
 simplejson
 werkzeug>=0.9
 urllib3>=1.25.1
-feedparser
+feedparser
+colorama
+bs4
+tabulate
+click