-
-
Notifications
You must be signed in to change notification settings - Fork 1
/
fotos.py
58 lines (48 loc) · 1.97 KB
/
fotos.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
from urllib.parse import urljoin
from pathlib import Path
from zipfile import ZipFile
from rows.utils import download_file, import_from_uri
from tqdm import tqdm
data_path = Path("fotos")
download_path = data_path / "download"
output_path = data_path / "output"
for path in (data_path, download_path, output_path):
if not path.exists():
path.mkdir()
def download_photos(year):
year = str(year)
url = f"http://agencia.tse.jus.br/estatistica/sead/eleicoes/eleicoes{year}/fotos/"
table = import_from_uri(url)
for row in table:
if row.name == "Parent Directory":
continue
filename = download_path / year / row.name
print(f"Downloading {filename.name}", end="")
if filename.exists():
print(" - downloaded already, skipping.")
else:
if not filename.parent.exists():
filename.parent.mkdir()
print()
download_file(urljoin(url, row.name), progress=True, filename=filename)
print(f" saved: {filename}")
photo_path = output_path / year
if not photo_path.exists():
photo_path.mkdir()
print(f" Exporting to: {photo_path}")
zf = ZipFile(filename)
for file_info in tqdm(zf.filelist, desc="Exporting pictures"):
internal_name = file_info.filename
internal_path = Path(internal_name)
extension = internal_path.name.split(".")[-1].lower()
info = internal_path.name.split(".")[0].split("_")[0]
state, sequence_number = info[1:3], info[3:]
new_filename = photo_path / state / f"{sequence_number}.{extension}"
if not new_filename.parent.exists():
new_filename.parent.mkdir()
zfobj = zf.open(internal_name)
with open(new_filename, mode="wb") as fobj:
fobj.write(zfobj.read())
if __name__ == "__main__":
for year in range(2012, 2018 + 1, 2):
download_photos(year)