forked from olahol/iso-3166-2.js
-
Notifications
You must be signed in to change notification settings - Fork 1
/
parse.py
58 lines (48 loc) · 1.84 KB
/
parse.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
import csv
import json
import re
from functools import reduce
def unicode_csv_reader(utf8_data, **kwargs):
csv_reader = csv.reader(utf8_data, dialect=csv.excel, **kwargs)
for row in csv_reader:
# No need to decode from utf-8, as strings are Unicode in Python 3.
yield [cell for cell in row]
# countries and their subdivisions.
with open("data.csv", "r", encoding="utf-8") as csv_file:
countries = {}
for row in unicode_csv_reader(csv_file):
country_name = row[0]
subdivision_code = row[1]
subdivision_name = re.sub(r"\[.*\]", "", row[2])
type = row[3]
country_code = row[4]
if country_code not in countries:
countries[country_code] = {"name": country_name.strip(), "sub": {}}
countries[country_code]["sub"][subdivision_code] = {
"name": subdivision_name.strip(),
"type": type.strip()
}
subdivisions = reduce(
lambda a, b: a + len(list(countries[b]["sub"].keys())), countries, 0
)
print("Countries: %d, Subdivisions: %d" % (
len(countries), subdivisions
))
with open("data.js", "w", encoding="utf-8") as json_file:
print("Dumping subdivisions to data.js")
json_file.write("var data = ")
json.dump(countries, json_file, ensure_ascii=False)
json_file.write(";")
# alpha-3 to alpha-2 country code conversions
with open("codes.csv", "r", encoding="utf-8") as csv_file:
codes = {}
for row in unicode_csv_reader(csv_file):
alpha2 = row[0]
alpha3 = row[1]
codes[alpha3] = alpha2
print("Country codes: %d" % len(codes))
with open("data.js", "a", encoding="utf-8") as json_file:
print("Dumping codes to data.js")
json_file.write("var codes = ")
json.dump(codes, json_file, ensure_ascii=False)
json_file.write(";")