Skip to content

Commit

Permalink
Fix downloading of ISIN file (#12)
Browse files Browse the repository at this point in the history
* Fix downloading of ISIN file

* Fix actions

* Bump actions version

* Specific ubunut version

* Remove python 3.5

* Stringify
  • Loading branch information
jdvala authored Oct 6, 2024
1 parent b85a0d0 commit b455d83
Show file tree
Hide file tree
Showing 8 changed files with 64 additions and 102 deletions.
26 changes: 16 additions & 10 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,18 +8,24 @@ on:
tags: ["*"]
jobs:
CI:
runs-on: ubuntu-latest
runs-on: ubuntu-20.04
strategy:
matrix:
python-version: ["3.6", "3.7", "3.8", "3.9", "3.10", "3.11"] # Specify Python versions here
steps:
- uses: actions/checkout@v2
- name: Set up Python 3.7
uses: actions/setup-python@v2
- uses: actions/checkout@v4

- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v5
with:
python-version: 3.7
python-version: ${{ matrix.python-version }}

- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install -r requirements/ci.txt
pip install -e .
- name: Download Data
run: python -c "from python_lei.utils import Download; Download(_is_actions=True)"

Expand All @@ -38,12 +44,12 @@ jobs:
if: startsWith(github.ref, 'refs/tags/')
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- uses: actions/checkout@v4

- name: Set up Python 3.7
uses: actions/setup-python@v2
- name: Set up Python 3.10
uses: actions/setup-python@v5
with:
python-version: 3.7
python-version: 3.10

- name: PyPi Deploy preparation
run: |
Expand All @@ -53,4 +59,4 @@ jobs:
uses: pypa/[email protected]
with:
user: ${{ secrets.PYPI_USER }}
password: ${{ secrets.PYPI_PASSWORD }}
password: ${{ secrets.PYPI_PASSWORD }}
35 changes: 17 additions & 18 deletions requirements/ci.txt
Original file line number Diff line number Diff line change
@@ -1,18 +1,17 @@
black>=19.10b0 ; python_version >= "3.6"
isort~=4.3.21
flake8~=3.7.9
flake8-absolute-import~=1.0 ; python_version >= "3.6"
flake8-black~=0.1.1 ; python_version >= "3.6"
flake8-blind-except~=0.1.1 ; python_version >= "3.6"
flake8-builtins~=1.5.2 ; python_version >= "3.6"
flake8-comprehensions~=3.2.2 ; python_version >= "3.6"
flake8-docstrings~=1.5.0 ; python_version >= "3.6"
flake8-mutable~=1.2.0 ; python_version >= "3.6"
flake8-print~=3.1.4 ; python_version >= "3.6"
flake8-quotes~=3.0.0 ; python_version >= "3.6"
flake8-tuple~=0.4.1 ; python_version >= "3.6"
pytest~=4.6 # pytest 5 requires py3
pytest-cov~=2.8.1
pytest-env~=0.6.2
pytest-sugar~=0.9.2
testfixtures~=6.14.0
black
isort
flake8
flake8-absolute-import
flake8-black
flake8-blind-except
flake8-comprehensions
flake8-docstrings
flake8-mutable
flake8-print
flake8-quotes
flake8-tuple
pytest
pytest-cov
pytest-env
pytest-sugar
testfixtures
3 changes: 2 additions & 1 deletion requirements/prod.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
pandas
texttable
requests
requests
bs4
2 changes: 0 additions & 2 deletions src/python_lei/isin_lei.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
import pandas as pd
import requests
from python_lei.exceptions import InvalidISIN, InvalidLEI
from python_lei.utils import load_data

Expand Down
35 changes: 26 additions & 9 deletions src/python_lei/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

import pandas as pd
import requests
from bs4 import BeautifulSoup

logger = logging.getLogger(__name__)
logging.basicConfig(level=logging.INFO)
Expand All @@ -30,8 +31,7 @@ def __init__(self, _is_actions=False):
Args:
_is_actions (bool): For setting path of downloaded resources on Github Actions
"""
self.data_url = f"https://isinmapping.gleif.org/file-by-date/{TODAY}"

self.data_url = "https://www.gleif.org/en/lei-data/lei-mapping/download-isin-to-lei-relationship-files"
self._download(_is_actions)

def _download(self, _is_actions):
Expand All @@ -42,8 +42,13 @@ def _download(self, _is_actions):
logger.info(f"No resources directory found, creating resources directory.")
os.mkdir(RESOURCE_DIR)

download_link = self._scrape_isin_file()

if not download_link:
raise ValueError("Downloading of isin file not available.")

try:
response = requests.get(self.data_url)
response = requests.get(download_link)
except requests.exceptions as err:
logger.error(
"Connection Error, Unable to download data at this time. Please check you have working internet connection or try again later."
Expand All @@ -52,19 +57,31 @@ def _download(self, _is_actions):
logger.error("No response from GLEIF server.")

logger.info("The file could be over 50 Mb.")
# TODO: Add progress bar
zipped_content = zipfile.ZipFile(io.BytesIO(response.content))
# TODO: Remove this
if _is_actions:
zipped_content.extractall(
"/home/runner/work/python-lei/python-lei/resources"
)
else:
zipped_content.extractall(RESOURCE_DIR)
logger.info(f"Extraction complete in {RESOURCE_DIR}")

def _scrape_isin_file(self):
"""
Scrape the data.
"""
try:
response = requests.get(self.data_url)
response.raise_for_status()
soup = BeautifulSoup(response.text)

# find all the tr and td and get to the href
download_link = soup.find_all("tr")[1].find_all("td")[1].find("a")["href"]
return download_link

# TODO: Covert the dataframe to parquet and use it.

except requests.ConnectionError:
logger.error(f"Error connecting to {self.data_url}")


class Update:
"""
Expand All @@ -83,12 +100,12 @@ def __init__(self):
logger.info(
"Resource directory not found or LEI ISIN mappings not found. Downloading now."
)
download = Download()
Download()

if os.listdir(RESOURCE_DIR) != []:
shutil.rmtree(RESOURCE_DIR)
logger.info(f"Downloading Data in {RESOURCE_DIR}")
download = Download()
Download()


def load_data():
Expand Down
55 changes: 0 additions & 55 deletions tests/test_isin_lei.py

This file was deleted.

5 changes: 1 addition & 4 deletions tests/test_lei_search.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,6 @@
from pathlib import Path
from unittest.mock import Mock, patch
from unittest.mock import patch

import pandas as pd
import pytest
from python_lei.exceptions import NotFound
from python_lei.lei_search import SearchLEI


Expand Down
5 changes: 2 additions & 3 deletions tests/test_pylei.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
from pathlib import Path
from unittest.mock import Mock, patch
from unittest.mock import patch

import pandas as pd
import pytest
from python_lei.exceptions import InvalidISIN, InvalidLEI
from python_lei.exceptions import InvalidLEI
from python_lei.pylei import pyLEI
from python_lei.utils import PROJECT_ROOT

Expand Down

0 comments on commit b455d83

Please sign in to comment.