From 41b4a21252f8cb2d06624f42bb2bbb84a3052119 Mon Sep 17 00:00:00 2001 From: Zach Johnson Date: Thu, 16 Nov 2023 16:24:08 -0700 Subject: [PATCH 1/4] fix license diffs to match new data format --- src/deltacode/__init__.py | 12 +++---- src/deltacode/utils.py | 70 +++++++++++++++++++-------------------- 2 files changed, 40 insertions(+), 42 deletions(-) diff --git a/src/deltacode/__init__.py b/src/deltacode/__init__.py index 2500b373..c1a91781 100644 --- a/src/deltacode/__init__.py +++ b/src/deltacode/__init__.py @@ -133,7 +133,7 @@ def create_deltas( def determine_delta(self): """ Create Delta objects and append them to the list. Top Down BFS Traversal is used - to visit the codebase structures of the old and new Codebase Directiries. + to visit the codebase structures of the old and new Codebase Directories. """ old_resource_considered = set() @@ -380,15 +380,13 @@ def licenses_to_dict(self, file): """ licenses = [] try: - for license in file.licenses: + for license in file.license_detections: licenses.append( OrderedDict( [ - ("key", license.get("key", None)), - ("score", license.get("score", None)), - ("short_name", license.get("short_name", None)), - ("category", license.get("category", None)), - ("owner", license.get("owner", None)), + ("identifier", license.get("identifier", None)), + ("license_expression", license.get("license_expression", None)), + ("matches", license.get("matches", None)), ] ) ) diff --git a/src/deltacode/utils.py b/src/deltacode/utils.py index 0fceb5ee..692d852a 100644 --- a/src/deltacode/utils.py +++ b/src/deltacode/utils.py @@ -56,19 +56,19 @@ def update_added_from_license_info(delta, unique_categories): been a license change. """ new_licenses = ( - delta.new_file.licenses if hasattr(delta.new_file, "licenses") else [] + delta.new_file.license_detections if hasattr(delta.new_file, "license_detections") else [] ) - new_categories = set(license["category"] for license in new_licenses) - if hasattr(delta.new_file, "licenses"): + #new_categories = set(license["category"] for license in new_licenses) + if hasattr(delta.new_file, "license_detections"): delta.update(20, "license info added") - for category in new_categories: - # no license ==> 'Copyleft Limited'or higher - if category in unique_categories: - delta.update(20, category.lower() + " added") - # no license ==> 'Permissive' or 'Public Domain' - else: - delta.update(0, category.lower() + " added") +# for category in new_categories: +# # no license ==> 'Copyleft Limited'or higher +# if category in unique_categories: +# delta.update(20, category.lower() + " added") +# # no license ==> 'Permissive' or 'Public Domain' +# else: +# delta.update(0, category.lower() + " added") return @@ -80,48 +80,48 @@ def update_modified_from_license_info(delta, unique_categories): """ new_licenses = ( - delta.new_file.licenses if hasattr(delta.new_file, "licenses") else [] + delta.new_file.license_detections if hasattr(delta.new_file, "license_detections") else [] ) old_licenses = ( - delta.old_file.licenses if hasattr(delta.old_file, "licenses") else [] + delta.old_file.license_detections if hasattr(delta.old_file, "license_detections") else [] ) if not new_licenses and old_licenses: delta.update(15, "license info removed") return - new_categories = set(license.get("category", "") for license in new_licenses) - old_categories = set(license.get("category", "") for license in old_licenses) +# new_categories = set(license.get("category", "") for license in new_licenses) +# old_categories = set(license.get("category", "") for license in old_licenses) if new_licenses and not old_licenses: delta.update(20, "license info added") - for category in new_categories: - # no license ==> 'Copyleft Limited'or higher - if category in unique_categories: - delta.update(20, category.lower() + " added") - # no license ==> 'Permissive' or 'Public Domain' - else: - delta.update(0, category.lower() + " added") +# for category in new_categories: +# # no license ==> 'Copyleft Limited'or higher +# if category in unique_categories: +# delta.update(20, category.lower() + " added") +# # no license ==> 'Permissive' or 'Public Domain' +# else: +# delta.update(0, category.lower() + " added") return - new_keys = set(license.get("key", "") for license in new_licenses) - old_keys = set(license.get("key", "") for license in old_licenses) + new_ids = set(license.get("identifier", "") for license in new_licenses) + old_ids = set(license.get("identifier", "") for license in old_licenses) - if new_keys != old_keys: + if new_ids != old_ids: delta.update(10, "license change") - for category in new_categories - old_categories: - unique_categories_in_old_file = len(old_categories & unique_categories) - # 'Permissive' or 'Public Domain' ==> 'Copyleft Limited' or higher - if unique_categories_in_old_file == 0 and category in unique_categories: - delta.update(20, category.lower() + " added") - # at least 1 category in the old file was 'Copyleft Limited' or higher ==> 'Copyleft Limited' or higher - elif unique_categories_in_old_file != 0 and category in unique_categories: - delta.update(10, category.lower() + " added") - # 'Permissive' or 'Public Domain' ==> 'Permissive' or 'Public Domain' if not in old_categories - elif category not in unique_categories: - delta.update(0, category.lower() + " added") +# for category in new_categories - old_categories: +# unique_categories_in_old_file = len(old_categories & unique_categories) +# # 'Permissive' or 'Public Domain' ==> 'Copyleft Limited' or higher +# if unique_categories_in_old_file == 0 and category in unique_categories: +# delta.update(20, category.lower() + " added") +# # at least 1 category in the old file was 'Copyleft Limited' or higher ==> 'Copyleft Limited' or higher +# elif unique_categories_in_old_file != 0 and category in unique_categories: +# delta.update(10, category.lower() + " added") +# # 'Permissive' or 'Public Domain' ==> 'Permissive' or 'Public Domain' if not in old_categories +# elif category not in unique_categories: +# delta.update(0, category.lower() + " added") def update_from_copyright_info(delta): From 778334c844a060eeb4b1d9f58217da2753baedc4 Mon Sep 17 00:00:00 2001 From: Zach Johnson Date: Fri, 17 Nov 2023 15:19:27 -0700 Subject: [PATCH 2/4] fix categories --- src/deltacode/__init__.py | 8 ++++- src/deltacode/utils.py | 67 +++++++++++++++++++-------------------- 2 files changed, 40 insertions(+), 35 deletions(-) diff --git a/src/deltacode/__init__.py b/src/deltacode/__init__.py index c1a91781..1bbf3843 100644 --- a/src/deltacode/__init__.py +++ b/src/deltacode/__init__.py @@ -271,8 +271,14 @@ def license_diff(self): ] ) + license_refs = {} + for license_reference in self.codebase1.attributes.license_references: + license_refs[license_reference["key"]] = license_reference["category"] + for license_reference in self.codebase2.attributes.license_references: + license_refs[license_reference["key"]] = license_reference["category"] + for delta in self.deltas: - utils.update_from_license_info(delta, unique_categories) + utils.update_from_license_info(delta, unique_categories, license_refs) def copyright_diff(self): """ diff --git a/src/deltacode/utils.py b/src/deltacode/utils.py index 692d852a..a1c9252a 100644 --- a/src/deltacode/utils.py +++ b/src/deltacode/utils.py @@ -36,20 +36,20 @@ from collections import OrderedDict -def update_from_license_info(delta, unique_categories): +def update_from_license_info(delta, unique_categories, license_refs): """ Increase an 'added' or 'modified' Delta object's 'score' attribute and add one or more appropriate categories to its 'factors' attribute if there has been a license change and depending on the nature of that change. """ if delta.is_added(): - update_added_from_license_info(delta, unique_categories) + update_added_from_license_info(delta, unique_categories, license_refs) if delta.is_modified(): - update_modified_from_license_info(delta, unique_categories) + update_modified_from_license_info(delta, unique_categories, license_refs) -def update_added_from_license_info(delta, unique_categories): +def update_added_from_license_info(delta, unique_categories, license_refs): """ Increase an 'added' Delta object's 'score' attribute and add one or more categories to its 'factors' attribute if there has @@ -59,20 +59,20 @@ def update_added_from_license_info(delta, unique_categories): delta.new_file.license_detections if hasattr(delta.new_file, "license_detections") else [] ) - #new_categories = set(license["category"] for license in new_licenses) + new_categories = set(license_refs[license["license_expression"]] for license in new_licenses) if hasattr(delta.new_file, "license_detections"): delta.update(20, "license info added") -# for category in new_categories: -# # no license ==> 'Copyleft Limited'or higher -# if category in unique_categories: -# delta.update(20, category.lower() + " added") -# # no license ==> 'Permissive' or 'Public Domain' -# else: -# delta.update(0, category.lower() + " added") + for category in new_categories: + # no license ==> 'Copyleft Limited'or higher + if category in unique_categories: + delta.update(20, category.lower() + " added") + # no license ==> 'Permissive' or 'Public Domain' + else: + delta.update(0, category.lower() + " added") return -def update_modified_from_license_info(delta, unique_categories): +def update_modified_from_license_info(delta, unique_categories, license_refs): """ Increase a 'modified' Delta object's 'score' attribute and add one or more categories to its 'factors' attribute if there has @@ -90,38 +90,37 @@ def update_modified_from_license_info(delta, unique_categories): delta.update(15, "license info removed") return -# new_categories = set(license.get("category", "") for license in new_licenses) -# old_categories = set(license.get("category", "") for license in old_licenses) + new_categories = set(license_refs[license["license_expression"]] for license in new_licenses) + old_categories = set(license_refs[license["license_expression"]] for license in old_licenses) if new_licenses and not old_licenses: delta.update(20, "license info added") -# for category in new_categories: -# # no license ==> 'Copyleft Limited'or higher -# if category in unique_categories: -# delta.update(20, category.lower() + " added") -# # no license ==> 'Permissive' or 'Public Domain' -# else: -# delta.update(0, category.lower() + " added") + for category in new_categories: + # no license ==> 'Copyleft Limited'or higher + if category in unique_categories: + delta.update(20, category.lower() + " added") + # no license ==> 'Permissive' or 'Public Domain' + else: + delta.update(0, category.lower() + " added") return new_ids = set(license.get("identifier", "") for license in new_licenses) old_ids = set(license.get("identifier", "") for license in old_licenses) if new_ids != old_ids: - delta.update(10, "license change") -# for category in new_categories - old_categories: -# unique_categories_in_old_file = len(old_categories & unique_categories) -# # 'Permissive' or 'Public Domain' ==> 'Copyleft Limited' or higher -# if unique_categories_in_old_file == 0 and category in unique_categories: -# delta.update(20, category.lower() + " added") -# # at least 1 category in the old file was 'Copyleft Limited' or higher ==> 'Copyleft Limited' or higher -# elif unique_categories_in_old_file != 0 and category in unique_categories: -# delta.update(10, category.lower() + " added") -# # 'Permissive' or 'Public Domain' ==> 'Permissive' or 'Public Domain' if not in old_categories -# elif category not in unique_categories: -# delta.update(0, category.lower() + " added") + for category in new_categories - old_categories: + unique_categories_in_old_file = len(old_categories & unique_categories) + # 'Permissive' or 'Public Domain' ==> 'Copyleft Limited' or higher + if unique_categories_in_old_file == 0 and category in unique_categories: + delta.update(20, category.lower() + " added") + # at least 1 category in the old file was 'Copyleft Limited' or higher ==> 'Copyleft Limited' or higher + elif unique_categories_in_old_file != 0 and category in unique_categories: + delta.update(10, category.lower() + " added") + # 'Permissive' or 'Public Domain' ==> 'Permissive' or 'Public Domain' if not in old_categories + elif category not in unique_categories: + delta.update(0, category.lower() + " added") def update_from_copyright_info(delta): From ec97552a727bd1a745b46f9a0ca8e6ca32fd4943 Mon Sep 17 00:00:00 2001 From: Zach Johnson Date: Mon, 20 Nov 2023 15:21:50 -0700 Subject: [PATCH 3/4] update models with new fields --- src/deltacode/cli.py | 3 +++ src/deltacode/models.py | 58 ++++++++++++++++++++++++++++++++++++++--- 2 files changed, 58 insertions(+), 3 deletions(-) diff --git a/src/deltacode/cli.py b/src/deltacode/cli.py index 79d7bdc5..a9d8eb73 100644 --- a/src/deltacode/cli.py +++ b/src/deltacode/cli.py @@ -92,3 +92,6 @@ def cli(new, old, json_file, all_delta_types): deltacode = DeltaCode(new, old, options) # generate JSON output write_json(deltacode, json_file, all_delta_types) + +if __name__ == "__main__": + cli() \ No newline at end of file diff --git a/src/deltacode/models.py b/src/deltacode/models.py index ed7bee88..df23a490 100644 --- a/src/deltacode/models.py +++ b/src/deltacode/models.py @@ -248,10 +248,36 @@ class License(object): """ def __init__(self, dictionary={}): self.key = dictionary.get('key') - self.score = dictionary.get('score') + self.language = dictionary.get('language') self.short_name = dictionary.get('short_name') + self.name = dictionary.get('name') self.category = dictionary.get('category') self.owner = dictionary.get('owner') + self.homepage_url = dictionary.get('homepage_url') + self.notes = dictionary.get('notes') + self.is_builtin = dictionary.get('is_builtin') + self.is_exception = dictionary.get('is_exception') + self.is_unknown = dictionary.get('is_unknown') + self.is_generic = dictionary.get('is_generic') + self.spdx_license_key = dictionary.get('spdx_license_key') + self.other_spdx_license_keys = dictionary.get('other_spdx_license_keys') + self.osi_license_key = dictionary.get('osi_license_key') + self.text_urls = dictionary.get('text_urls') + self.osi_url = dictionary.get('osi_url') + self.faq_url = dictionary.get('faq_url') + self.other_urls = dictionary.get('other_urls') + self.key_aliases = dictionary.get('key_aliases') + self.minimum_coverage = dictionary.get('minimum_coverage') + self.standard_notice = dictionary.get('standard_notice') + self.ignorable_copyrights = dictionary.get('ignorable_copyrights') + self.ignorable_holders = dictionary.get('ignorable_holders') + self.ignorable_authors = dictionary.get('ignorable_authors') + self.ignorable_urls = dictionary.get('ignorable_urls') + self.ignorable_emails = dictionary.get('ignorable_emails') + self.text = dictionary.get('text') + self.scancode_url = dictionary.get('scancode_url') + self.licensedb_url = dictionary.get('licensedb_url') + self.spdx_url = dictionary.get('spdx_url') def to_dict(self): """ @@ -260,10 +286,36 @@ def to_dict(self): """ d = OrderedDict([ ('key', self.key), - ('score', self.score), + ('language', self.language), ('short_name', self.short_name), + ('name', self.name), ('category', self.category), - ('owner', self.owner) + ('owner', self.owner), + ('homepage_url', self.homepage_url), + ('notes', self.notes), + ('is_builtin', self.is_builtin), + ('is_exception', self.is_exception), + ('is_unknown', self.is_unknown), + ('is_generic', self.is_generic), + ('spdx_license_key', self.spdx_license_key), + ('other_spdx_license_keys', self.other_spdx_license_keys), + ('osi_license_key', self.osi_license_key), + ('text_urls', self.text_urls), + ('osi_url', self.osi_url), + ('faq_url', self.faq_url), + ('other_urls', self.other_urls), + ('key_aliases', self.key_aliases), + ('minimum_coverage', self.minimum_coverage), + ('standard_notice', self.standard_notice), + ('ignorable_copyrights', self.ignorable_copyrights), + ('ignorable_holders', self.ignorable_holders), + ('ignorable_authors', self.ignorable_authors), + ('ignorable_urls', self.ignorable_urls), + ('ignorable_emails', self.ignorable_emails), + ('text', self.text), + ('scancode_url', self.scancode_url), + ('licensedb_url', self.licensedb_url), + ('spdx_url', self.spdx_url) ]) return d From b97093a23f0f2edbd1a2fe250b88cfad08fcabef Mon Sep 17 00:00:00 2001 From: Zach Johnson Date: Wed, 29 Nov 2023 11:43:21 -0700 Subject: [PATCH 4/4] handle multi license categories by specifying na for the category --- src/deltacode/utils.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/src/deltacode/utils.py b/src/deltacode/utils.py index a1c9252a..10734b94 100644 --- a/src/deltacode/utils.py +++ b/src/deltacode/utils.py @@ -59,7 +59,9 @@ def update_added_from_license_info(delta, unique_categories, license_refs): delta.new_file.license_detections if hasattr(delta.new_file, "license_detections") else [] ) - new_categories = set(license_refs[license["license_expression"]] for license in new_licenses) + new_categories = set() + for license in new_licenses: + new_categories.add(license_refs.get(license["license_expression"], "N/A")) if hasattr(delta.new_file, "license_detections"): delta.update(20, "license info added") for category in new_categories: @@ -90,8 +92,12 @@ def update_modified_from_license_info(delta, unique_categories, license_refs): delta.update(15, "license info removed") return - new_categories = set(license_refs[license["license_expression"]] for license in new_licenses) - old_categories = set(license_refs[license["license_expression"]] for license in old_licenses) + new_categories = set() + for license in new_licenses: + new_categories.add(license_refs.get(license["license_expression"], "N/A")) + old_categories = set() + for license in old_licenses: + old_categories.add(license_refs.get(license["license_expression"], "N/A")) if new_licenses and not old_licenses: delta.update(20, "license info added")