From 5f92d78936cbe8434e656af0bf51a469970e0f5c Mon Sep 17 00:00:00 2001 From: Keshav Priyadarshi Date: Thu, 21 Mar 2024 18:34:36 +0530 Subject: [PATCH] Move GitHub GraphQL methods to utils.py Signed-off-by: Keshav Priyadarshi --- setup.cfg | 1 + src/fetchcode/package_versions.py | 127 +------------------------- src/fetchcode/utils.py | 146 ++++++++++++++++++++++++++++++ tests/test_package_versions.py | 2 +- 4 files changed, 152 insertions(+), 124 deletions(-) create mode 100644 src/fetchcode/utils.py diff --git a/setup.cfg b/setup.cfg index da69a761..200c39cb 100644 --- a/setup.cfg +++ b/setup.cfg @@ -58,6 +58,7 @@ install_requires = packageurl-python requests python-dateutil + python-dotenv [options.packages.find] diff --git a/src/fetchcode/package_versions.py b/src/fetchcode/package_versions.py index b80994a0..0a565d04 100644 --- a/src/fetchcode/package_versions.py +++ b/src/fetchcode/package_versions.py @@ -16,7 +16,6 @@ import dataclasses import logging -import os import traceback import xml.etree.ElementTree as ET from datetime import datetime @@ -31,6 +30,8 @@ from packageurl.contrib.route import NoRouteAvailable from packageurl.contrib.route import Router +from fetchcode.utils import fetch_github_tags_gql + logger = logging.getLogger(__name__) router = Router() @@ -287,7 +288,8 @@ def get_github_versions_from_purl(purl): """Fetch versions of ``github`` packages using GitHub REST API.""" purl = PackageURL.from_string(purl) - yield from fetch_github_tags_gql(purl) + for version, date in fetch_github_tags_gql(purl): + yield PackageVersion(value=version, release_date=date) @router.route("pkg:golang/.*") @@ -549,124 +551,3 @@ def remove_debian_default_epoch(version): '' """ return version and version.replace("0:", "") - - -def fetch_github_tags_gql(purl): - """ - Yield PackageVersion for given github ``purl`` using the GitHub GQL API. - """ - for node in fetch_github_tag_nodes(purl): - name = node["name"] - target = node["target"] - - # in case the tag is a signed tag, then the commit info is in target['target'] - if "committedDate" not in target: - target = target["target"] - - committed_date = target.get("committedDate") - release_date = None - if committed_date: - release_date = dateparser.parse(committed_date) - - yield PackageVersion(value=name, release_date=release_date) - - -GQL_QUERY = """ -query getTags($name: String!, $owner: String!, $after: String) -{ - repository(name: $name, owner: $owner) { - refs(refPrefix: "refs/tags/", first: 100, after: $after) { - totalCount - pageInfo { - endCursor - hasNextPage - } - nodes { - name - target { - ... on Commit { - committedDate - } - ... on Tag { - target { - ... on Commit { - committedDate - } - } - } - } - } - } - } -}""" - - -def fetch_github_tag_nodes(purl): - """ - Yield node name/target mappings for Git tags of the ``purl``. - - Each node has this shape: - { - "name": "v2.6.24-rc5", - "target": { - "target": { - "committedDate": "2007-12-11T03:48:43Z" - } - } - }, - """ - variables = { - "owner": purl.namespace, - "name": purl.name, - } - graphql_query = { - "query": GQL_QUERY, - "variables": variables, - } - - while True: - response = github_response(graphql_query) - refs = response["data"]["repository"]["refs"] - for node in refs["nodes"]: - yield node - - page_info = refs["pageInfo"] - if not page_info["hasNextPage"]: - break - - # to fetch next page, we just set the after variable to endCursor - variables["after"] = page_info["endCursor"] - - -class GitHubTokenError(Exception): - pass - - -class GraphQLError(Exception): - pass - - -def github_response(graphql_query): - gh_token = os.environ.get("GH_TOKEN", None) - - if not gh_token: - msg = ( - "GitHub API Token Not Set\n" - "Set your GitHub token in the GH_TOKEN environment variable." - ) - raise GitHubTokenError(msg) - - headers = {"Authorization": f"bearer {gh_token}"} - - endpoint = "https://api.github.com/graphql" - response = requests.post(endpoint, headers=headers, json=graphql_query).json() - - message = response.get("message") - if message and message == "Bad credentials": - raise GitHubTokenError(f"Invalid GitHub token: {message}") - - errors = response.get("errors") - if errors: - raise GraphQLError(errors) - - return response diff --git a/src/fetchcode/utils.py b/src/fetchcode/utils.py new file mode 100644 index 00000000..0f59077a --- /dev/null +++ b/src/fetchcode/utils.py @@ -0,0 +1,146 @@ +# fetchcode is a free software tool from nexB Inc. and others. +# Visit https://github.com/nexB/fetchcode for support and download. +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# http://nexb.com and http://aboutcode.org +# +# This software is licensed under the Apache License version 2.0. +# +# You may not use this software except in compliance with the License. +# You may obtain a copy of the License at: +# http://apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software distributed +# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +# CONDITIONS OF ANY KIND, either express or implied. See the License for the +# specific language governing permissions and limitations under the License. + + +import os +from dateutil import parser as dateparser +import requests + + +def fetch_github_tags_gql(purl): + """ + Yield PackageVersion for given github ``purl`` using the GitHub GQL API. + """ + for node in fetch_github_tag_nodes(purl): + name = node["name"] + target = node["target"] + + # in case the tag is a signed tag, then the commit info is in target['target'] + if "committedDate" not in target: + target = target["target"] + + committed_date = target.get("committedDate") + release_date = None + if committed_date: + release_date = dateparser.parse(committed_date) + + yield name, release_date + + +GQL_QUERY = """ +query getTags($name: String!, $owner: String!, $after: String) +{ + repository(name: $name, owner: $owner) { + refs(refPrefix: "refs/tags/", first: 100, after: $after) { + totalCount + pageInfo { + endCursor + hasNextPage + } + nodes { + name + target { + ... on Commit { + committedDate + } + ... on Tag { + target { + ... on Commit { + committedDate + } + } + } + } + } + } + } +}""" + + +def fetch_github_tag_nodes(purl): + """ + Yield node name/target mappings for Git tags of the ``purl``. + + Each node has this shape: + { + "name": "v2.6.24-rc5", + "target": { + "target": { + "committedDate": "2007-12-11T03:48:43Z" + } + } + }, + """ + variables = { + "owner": purl.namespace, + "name": purl.name, + } + graphql_query = { + "query": GQL_QUERY, + "variables": variables, + } + + while True: + response = github_response(graphql_query) + refs = response["data"]["repository"]["refs"] + for node in refs["nodes"]: + yield node + + page_info = refs["pageInfo"] + if not page_info["hasNextPage"]: + break + + # to fetch next page, we just set the after variable to endCursor + variables["after"] = page_info["endCursor"] + + +class GitHubTokenError(Exception): + pass + + +class GraphQLError(Exception): + pass + + +def github_response(graphql_query): + gh_token = os.environ.get("GH_TOKEN", None) + if not gh_token: + from dotenv import load_dotenv + + load_dotenv() + gh_token = os.environ.get("GH_TOKEN", None) + + if not gh_token: + msg = ( + "GitHub API Token Not Set\n" + "Set your GitHub token in the GH_TOKEN environment variable." + ) + raise GitHubTokenError(msg) + + headers = {"Authorization": f"bearer {gh_token}"} + + endpoint = "https://api.github.com/graphql" + response = requests.post(endpoint, headers=headers, json=graphql_query).json() + + message = response.get("message") + if message and message == "Bad credentials": + raise GitHubTokenError(f"Invalid GitHub token: {message}") + + errors = response.get("errors") + if errors: + raise GraphQLError(errors) + + return response diff --git a/tests/test_package_versions.py b/tests/test_package_versions.py index 53991d1a..1b5ad8d4 100644 --- a/tests/test_package_versions.py +++ b/tests/test_package_versions.py @@ -164,7 +164,7 @@ def test_get_conan_versions_from_purl(mock_get_response): check_results_against_json(result, expected_file) -@mock.patch("fetchcode.package_versions.github_response") +@mock.patch("fetchcode.utils.github_response") def test_get_github_versions_from_purl(mock_github_response): github_mock_directory = data_location / "github" side_effect = []