Skip to content

Commit

Permalink
Add computation of previous and next certificate versions based on ID.
Browse files Browse the repository at this point in the history
Fixes #457.
  • Loading branch information
J08nY committed Nov 20, 2024
1 parent e7e69ba commit f5ce1a5
Show file tree
Hide file tree
Showing 4 changed files with 150 additions and 62 deletions.
12 changes: 12 additions & 0 deletions src/sec_certs/dataset/cc.py
Original file line number Diff line number Diff line change
Expand Up @@ -885,10 +885,22 @@ def _compute_sars(self) -> None:
for cert in self:
cert.heuristics.extracted_sars = transformer.transform_single_cert(cert)

@staged(logger, "Computing heuristics: certificate versions")
def _compute_cert_versions(self) -> None:
cert_ids = {
cert.dgst: CertificateId(cert.scheme, cert.heuristics.cert_id)
if cert.heuristics.cert_id is not None
else None
for cert in self
}
for cert in self:
cert.compute_heuristics_cert_versions(cert_ids)

def _compute_heuristics(self) -> None:
self._compute_normalized_cert_ids()
super()._compute_heuristics()
self._compute_scheme_data()
self._compute_cert_versions()
self._compute_cert_labs()
self._compute_sars()

Expand Down
5 changes: 3 additions & 2 deletions src/sec_certs/rules.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ cc_cert_id:
# Rapport de certification 2001/02v2
# Certification Report 2003/20
NL:
- "(?:NSCIB-|CC-|NSCIB-CC-)(?P<core>((?P<year>[0-9]{2})-)?(?:-?[0-9]+)+)(?:-?(?P<doc>(?:CR|MA|MR)[0-9]*))?"
- "(?:NSCIB-|CC-|NSCIB-CC-)(?P<core>((?P<year>[0-9]{2})-)?(?:-?[0-9]+)+)(?:-?(?P<doc>(?:CR|MA|MR)(?P<version>[0-9]*)))?"
# Examples:
# NSCIB-CC-22-0428888-CR2 (with year=22 and CR2)
# NSCIB-CC-228723-CR (no year)
Expand Down Expand Up @@ -57,11 +57,12 @@ cc_cert_id:
# CRP208
# CERTIFICATION REPORT No. P123A
ES:
- "(?P<year>[0-9]{4})[-‐](?P<project>[0-9]+)[-‐]INF[-‐](?P<counter>[0-9]+)[ -‐]{1,2}[vV](?P<version>[0-9])"
- "(?P<year>[0-9]{4})[-‐](?P<project>[0-9]+)[-‐]INF[-‐](?P<counter>[0-9]+)(?:[ -‐]{1,2}[vV](?P<version>[0-9]))?"
# Examples:
# 2006-4-INF-98 v2
# 2020-34-INF-3784- v1
# 2019-20-INF-3379-v1
# 2011-14-INF-1095 (also without the version)
KR:
- "KECS[-‐](?P<word>ISIS|NISS|CISS)[-‐](?P<counter>[0-9]{2,4})[-‐](?P<year>[0-9]{4})"
# XXX: Do not use KECS-CR as those refer to the certificate report and do not represent the certificate id.
Expand Down
51 changes: 50 additions & 1 deletion src/sec_certs/sample/cc.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import copy
import re
from bisect import insort
from collections import Counter, defaultdict
from dataclasses import dataclass, field
from datetime import date, datetime
Expand All @@ -18,7 +19,7 @@
from sec_certs import constants
from sec_certs.cert_rules import SARS_IMPLIED_FROM_EAL, cc_rules, rules, security_level_csv_scan
from sec_certs.configuration import config
from sec_certs.sample.cc_certificate_id import canonicalize, schemes
from sec_certs.sample.cc_certificate_id import CertificateId, canonicalize, schemes
from sec_certs.sample.certificate import Certificate, References, logger
from sec_certs.sample.certificate import Heuristics as BaseHeuristics
from sec_certs.sample.certificate import PdfData as BasePdfData
Expand Down Expand Up @@ -345,6 +346,8 @@ class Heuristics(BaseHeuristics, ComplexSerializableType):
related_cves: set[str] | None = field(default=None)
cert_lab: list[str] | None = field(default=None)
cert_id: str | None = field(default=None)
prev_certificates: list[str] | None = field(default=None)
next_certificates: list[str] | None = field(default=None)
st_references: References = field(default_factory=References)
report_references: References = field(default_factory=References)

Expand Down Expand Up @@ -1000,6 +1003,52 @@ def extract_cert_pdf_keywords(cert: CCCertificate) -> CCCertificate:
cert.pdf_data.cert_keywords = cert_keywords
return cert

def compute_heuristics_cert_versions(self, cert_ids: dict[str, CertificateId | None]) -> None: # noqa: C901
"""
Fills in the previous and next certificate versions based on the cert ID.
"""
self.heuristics.prev_certificates = []
self.heuristics.next_certificates = []
own = cert_ids[self.dgst]
if own is None:
return
if self.scheme not in ("DE", "FR", "ES", "NL", "MY"):
# There is no version in the cert_id, so skip it
return
version = own.meta.get("version")
for other_dgst, other in cert_ids.items():
if other_dgst == self.dgst:
# Skip ourselves
continue
if other is None or other.scheme != own.scheme:
# The other does not have cert ID or is different scheme or does not have a version.
continue
other_version = other.meta.get("version")
# Go over the own meta and compare, if some field other than version is different, bail out.
# If all except the version are the same, we have a match.
for key, value in own.meta.items():
if key == "version":
continue
if self.scheme == "DE" and key == "year":
# For German certs we want to also ignore the year in comparison.
continue
if value != other.meta.get(key):
break
else:
if other_version is None and version is None:
# This means a duplicate ID is present, and it has no version.
# Just pass silently.
pass
elif version is None:
insort(self.heuristics.next_certificates, str(other))
elif other_version is None:
insort(self.heuristics.prev_certificates, str(other))
else:
if other_version < version:
insort(self.heuristics.prev_certificates, str(other))
else:
insort(self.heuristics.next_certificates, str(other))

def compute_heuristics_version(self) -> None:
"""
Fills in the heuristically obtained version of certified product into attribute in heuristics class.
Expand Down
144 changes: 85 additions & 59 deletions tests/cc/test_cc_misc.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,15 @@
import pytest

from sec_certs.sample.cc_certificate_id import CertificateId, canonicalize


def canonicalize_n(n, cert_id_str, scheme):
cert_id = cert_id_str
for _ in range(n):
cert_id = canonicalize(cert_id, scheme)
return cert_id


def test_meta_parse():
i = CertificateId("FR", "Rapport de certification 2001/02v2")
assert "year" in i.meta
Expand All @@ -9,97 +18,114 @@ def test_meta_parse():
assert i.meta["version"] == "2"


def test_canonicalize_fr():
assert canonicalize("Rapport de certification 2001/02v2", "FR") == "ANSSI-CC-2001/02v2"
assert canonicalize("ANSSI-CC 2001/02-R01", "FR") == "ANSSI-CC-2001/02-R01"
assert canonicalize("ANSSI-CC 2001_02-M01", "FR") == "ANSSI-CC-2001/02-M01"
assert canonicalize("ANSSI-CC-PP-2013/58", "FR") == "ANSSI-CC-PP-2013/58"
@pytest.mark.parametrize("n", [1, 2])
def test_canonicalize_fr(n):
assert canonicalize_n(n, "Rapport de certification 2001/02v2", "FR") == "ANSSI-CC-2001/02v2"
assert canonicalize_n(n, "ANSSI-CC 2001/02-R01", "FR") == "ANSSI-CC-2001/02-R01"
assert canonicalize_n(n, "ANSSI-CC 2001_02-M01", "FR") == "ANSSI-CC-2001/02-M01"
assert canonicalize_n(n, "ANSSI-CC-PP-2013/58", "FR") == "ANSSI-CC-PP-2013/58"


def test_canonicalize_de():
assert canonicalize("BSI-DSZ-CC-0420-2007", "DE") == "BSI-DSZ-CC-0420-2007"
assert canonicalize("BSI-DSZ-CC-1004", "DE") == "BSI-DSZ-CC-1004"
assert canonicalize("BSI-DSZ-CC-0831-V4-2021", "DE") == "BSI-DSZ-CC-0831-V4-2021"
assert canonicalize("BSI-DSZ-CC-0837-V2-2014-MA-01", "DE") == "BSI-DSZ-CC-0837-V2-2014-MA-01"
@pytest.mark.parametrize("n", [1, 2])
def test_canonicalize_de(n):
assert canonicalize_n(n, "BSI-DSZ-CC-0420-2007", "DE") == "BSI-DSZ-CC-0420-2007"
assert canonicalize_n(n, "BSI-DSZ-CC-1004", "DE") == "BSI-DSZ-CC-1004"
assert canonicalize_n(n, "BSI-DSZ-CC-0831-V4-2021", "DE") == "BSI-DSZ-CC-0831-V4-2021"
assert canonicalize_n(n, "BSI-DSZ-CC-0837-V2-2014-MA-01", "DE") == "BSI-DSZ-CC-0837-V2-2014-MA-01"


def test_canonicalize_us():
assert canonicalize("CCEVS-VR-VID10015", "US") == "CCEVS-VR-VID-10015"
assert canonicalize("CCEVS-VR-VID10015-2008", "US") == "CCEVS-VR-VID-10015-2008"
assert canonicalize("CCEVS-VR-10880-2018", "US") == "CCEVS-VR-10880-2018"
assert canonicalize("CCEVS-VR-04-0082", "US") == "CCEVS-VR-0082-2004"
@pytest.mark.parametrize("n", [1, 2])
def test_canonicalize_us(n):
assert canonicalize_n(n, "CCEVS-VR-VID10015", "US") == "CCEVS-VR-VID-10015"
assert canonicalize_n(n, "CCEVS-VR-VID10015-2008", "US") == "CCEVS-VR-VID-10015-2008"
assert canonicalize_n(n, "CCEVS-VR-10880-2018", "US") == "CCEVS-VR-10880-2018"
assert canonicalize_n(n, "CCEVS-VR-04-0082", "US") == "CCEVS-VR-0082-2004"


def test_canonicalize_my():
assert canonicalize("ISCB-5-RPT-C075-CR-v2", "MY") == "ISCB-5-RPT-C075-CR-v2"
assert canonicalize("ISCB-5-RPT-C046-CR-V1a", "MY") == "ISCB-5-RPT-C046-CR-v1a"
assert canonicalize("ISCB-3-RPT-C068-CR-1-v1", "MY") == "ISCB-3-RPT-C068-CR-v1"
@pytest.mark.parametrize("n", [1, 2])
def test_canonicalize_my(n):
assert canonicalize_n(n, "ISCB-5-RPT-C075-CR-v2", "MY") == "ISCB-5-RPT-C075-CR-v2"
assert canonicalize_n(n, "ISCB-5-RPT-C046-CR-V1a", "MY") == "ISCB-5-RPT-C046-CR-v1a"
assert canonicalize_n(n, "ISCB-3-RPT-C068-CR-1-v1", "MY") == "ISCB-3-RPT-C068-CR-v1"


def test_canonicalize_es():
assert canonicalize("2011-14-INF-1095-v1", "ES") == "2011-14-INF-1095"
@pytest.mark.parametrize("n", [1, 2])
def test_canonicalize_es(n):
assert canonicalize_n(n, "2011-14-INF-1095-v1", "ES") == "2011-14-INF-1095"


def test_canonicalize_sg():
assert canonicalize("CSA_CC_21005", "SG") == "CSA_CC_21005"
@pytest.mark.parametrize("n", [1, 2])
def test_canonicalize_sg(n):
assert canonicalize_n(n, "CSA_CC_21005", "SG") == "CSA_CC_21005"


def test_canonicalize_in():
assert canonicalize("IC3S/KOL01/ADVA/EAL2/0520/0021 /CR", "IN") == "IC3S/KOL01/ADVA/EAL2/0520/0021"
@pytest.mark.parametrize("n", [1, 2])
def test_canonicalize_in(n):
assert canonicalize_n(n, "IC3S/KOL01/ADVA/EAL2/0520/0021 /CR", "IN") == "IC3S/KOL01/ADVA/EAL2/0520/0021"


def test_canonicalize_it():
assert canonicalize("OCSI/CERT/TEC/02/2009/RC", "IT") == "OCSI/CERT/TEC/02/2009/RC"
@pytest.mark.parametrize("n", [1, 2])
def test_canonicalize_it(n):
assert canonicalize_n(n, "OCSI/CERT/TEC/02/2009/RC", "IT") == "OCSI/CERT/TEC/02/2009/RC"


def test_canonicalize_se():
assert canonicalize("CSEC2017020", "SE") == "CSEC2017020"
assert canonicalize("CSEC 2017020", "SE") == "CSEC2017020"
assert canonicalize("CSEC201003", "SE") == "CSEC2010003"
@pytest.mark.parametrize("n", [1, 2])
def test_canonicalize_se(n):
assert canonicalize_n(n, "CSEC2017020", "SE") == "CSEC2017020"
assert canonicalize_n(n, "CSEC 2017020", "SE") == "CSEC2017020"
assert canonicalize_n(n, "CSEC201003", "SE") == "CSEC2010003"


def test_canonicalize_uk():
assert canonicalize("CERTIFICATION REPORT No. P123", "UK") == "CRP123"
assert canonicalize("CRP123A", "UK") == "CRP123A"
@pytest.mark.parametrize("n", [1, 2])
def test_canonicalize_uk(n):
assert canonicalize_n(n, "CERTIFICATION REPORT No. P123", "UK") == "CRP123"
assert canonicalize_n(n, "CRP123A", "UK") == "CRP123A"


def test_canonicalize_au():
assert canonicalize("Certification Report 2007/02", "AU") == "Certificate Number: 2007/02"
assert canonicalize("Certificate Number: 37/2006", "AU") == "Certificate Number: 2006/37"
assert canonicalize("Certificate Number: 2011/73", "AU") == "Certificate Number: 2011/73"
assert canonicalize("Certification Report 97/76", "AU") == "Certificate Number: 1997/76"
@pytest.mark.parametrize("n", [1, 2])
def test_canonicalize_au(n):
assert canonicalize_n(n, "Certification Report 2007/02", "AU") == "Certificate Number: 2007/02"
assert canonicalize_n(n, "Certificate Number: 37/2006", "AU") == "Certificate Number: 2006/37"
assert canonicalize_n(n, "Certificate Number: 2011/73", "AU") == "Certificate Number: 2011/73"
assert canonicalize_n(n, "Certification Report 97/76", "AU") == "Certificate Number: 1997/76"


def test_canonicalize_ca():
assert canonicalize("383-4-123-CR", "CA") == "383-4-123"
assert canonicalize("383-4-123P", "CA") == "383-4-123"
assert canonicalize("522 EWA 2020", "CA") == "522-EWA-2020"
@pytest.mark.parametrize("n", [1, 2])
def test_canonicalize_ca(n):
assert canonicalize_n(n, "383-4-123-CR", "CA") == "383-4-123"
assert canonicalize_n(n, "383-4-123P", "CA") == "383-4-123"
assert canonicalize_n(n, "522 EWA 2020", "CA") == "522-EWA-2020"


def test_canonicalize_jp():
assert canonicalize("Certification No. C01234", "JP") == "JISEC-CC-CRP-C01234"
assert canonicalize("CRP-C01234-01", "JP") == "JISEC-CC-CRP-C01234-01"
assert canonicalize("JISEC-CC-CRP-C0689-01-2020", "JP") == "JISEC-CC-CRP-C0689-01-2020"
@pytest.mark.parametrize("n", [1, 2])
def test_canonicalize_jp(n):
assert canonicalize_n(n, "Certification No. C01234", "JP") == "JISEC-CC-CRP-C01234"
assert canonicalize_n(n, "CRP-C01234-01", "JP") == "JISEC-CC-CRP-C01234-01"
assert canonicalize_n(n, "JISEC-CC-CRP-C0689-01-2020", "JP") == "JISEC-CC-CRP-C0689-01-2020"


def test_canonicalize_kr():
assert canonicalize("KECS-ISIS-0579-2015", "KR") == "KECS-ISIS-0579-2015"
assert canonicalize("KECS-CISS-10-2023", "KR") == "KECS-CISS-0010-2023"
@pytest.mark.parametrize("n", [1, 2])
def test_canonicalize_kr(n):
assert canonicalize_n(n, "KECS-ISIS-0579-2015", "KR") == "KECS-ISIS-0579-2015"
assert canonicalize_n(n, "KECS-CISS-10-2023", "KR") == "KECS-CISS-0010-2023"


def test_canonicalize_no():
assert canonicalize("SERTIT-12", "NO") == "SERTIT-012"
@pytest.mark.parametrize("n", [1, 2])
def test_canonicalize_no(n):
assert canonicalize_n(n, "SERTIT-12", "NO") == "SERTIT-012"


def test_canonicalize_tr():
assert canonicalize("21.0.03.0.00.00/TSE-CCCS-85", "TR") == "21.0.03.0.00.00/TSE-CCCS-85"
assert canonicalize("21.0.03/TSE-CCCS-33", "TR") == "21.0.03/TSE-CCCS-33"
@pytest.mark.parametrize("n", [1, 2])
def test_canonicalize_tr(n):
assert canonicalize_n(n, "21.0.03.0.00.00/TSE-CCCS-85", "TR") == "21.0.03.0.00.00/TSE-CCCS-85"
assert canonicalize_n(n, "21.0.03/TSE-CCCS-33", "TR") == "21.0.03/TSE-CCCS-33"


def test_canonicalize_nl():
assert canonicalize("NSCIB-CC-22-0428888-CR2", "NL") == "NSCIB-CC-22-0428888-CR2"
assert canonicalize("NSCIB-CC-22-0428888", "NL") == "NSCIB-CC-22-0428888-CR"
assert canonicalize("CC-22-0428888", "NL") == "NSCIB-CC-22-0428888-CR"
@pytest.mark.parametrize("n", [1, 2])
def test_canonicalize_nl(n):
assert canonicalize_n(n, "NSCIB-CC-22-0428888-CR2", "NL") == "NSCIB-CC-22-0428888-CR2"
assert canonicalize_n(n, "NSCIB-CC-22-0428888", "NL") == "NSCIB-CC-22-0428888-CR"
assert canonicalize_n(n, "CC-22-0428888", "NL") == "NSCIB-CC-22-0428888-CR"


def test_certid_compare():
Expand Down

0 comments on commit f5ce1a5

Please sign in to comment.