Source code for ntia_conformance_checker.spdx3_utils

# SPDX-FileCopyrightText: 2025 SPDX contributors
# SPDX-FileType: SOURCE
# SPDX-License-Identifier: Apache-2.0

"""Helpers for SPDX 3."""

from __future__ import annotations

from typing import TYPE_CHECKING, Any, Union

from spdx_python_model.bindings import v3_0_1 as spdx3
from spdx_tools.spdx.validation.validation_message import (
    ValidationContext,
    ValidationMessage,
)

if TYPE_CHECKING:
    from collections.abc import Iterator


SPDX3_PACKAGE_DEPENDENCY_RELATIONSHIP_TYPES = (
    "contains",
    "dependsOn",
    "hasDynamicLink",
    "hasStaticLink",
)


[docs] def validate_spdx3_data( object_set: spdx3.SHACLObjectSet, ) -> tuple[spdx3.SpdxDocument | None, list[ValidationMessage]]: """ Validate an SHACLObjectSet if it contains a valid SpdxDocument. The SPDX 3.0 specification states that "Any instance of serialization of SPDX data shall not contain more than one SpdxDocument element definition." See: https://spdx.github.io/spdx-spec/latest/model/Core/Classes/SpdxDocument/ Args: object_set (spdx3.SHACLObjectSet): The SHACLObjectSet containing the SPDX 3 document. Returns: spdx3.SpdxDocument | None: An SpdxDocument if found, otherwise None. list[ValidationMessage]: A list of validation messages. Empty if no errors. """ # Note that we use spdx_tools.spdx.validation.validation_message, # which is originally meant for SPDX 2, to report validation errors for # SPDX 3 as well, so the print/HTML/JSON output functions can be reused. doc: spdx3.SpdxDocument | None = None validation_messages: list[ValidationMessage] = [] spdx_documents: list[spdx3.SpdxDocument] = list( object_set.foreach_type(spdx3.SpdxDocument) ) # == SPDX 3 JSON serialization constraint ===== # Collections of SPDX 3 Elements shall be inside SpdxDocument if not spdx_documents: error_msg = ( "No SpdxDocument object found in the SPDX 3 JSON file. " "Expected exactly one." ) validation_messages.append(ValidationMessage(error_msg, ValidationContext())) return (doc, validation_messages) if len(spdx_documents) != 1: error_msg = ( "Multiple SpdxDocument objects found in the SPDX 3 JSON file. " "Allows no more than one. " "Ref: https://spdx.github.io/spdx-spec/latest/model/Core/Classes/SpdxDocument/" ) validation_messages.append(ValidationMessage(error_msg, ValidationContext())) return (doc, validation_messages) # == ElementCollection constraint ===== # SpdxDocument is an ElementCollection. doc = spdx_documents[0] doc_id = getattr(doc, "spdxId", None) elements: spdx3.ListProxy[Union[str, spdx3.Element]] = doc.element root_elements: spdx3.ListProxy[Union[str, spdx3.Element]] = doc.rootElement # ElementCollection constraint: if there is at least one element, # there shall also be at least one rootElement. # Ref: https://spdx.github.io/spdx-spec/latest/model/Core/Classes/ElementCollection/ if elements and not root_elements: error_msg = ( "The SpdxDocument has elements but no rootElement. " "An SpdxDocument with at least one element shall also have " "at least one rootElement. " "Ref: https://spdx.github.io/spdx-spec/latest/model/Core/Classes/ElementCollection/" ) validation_messages.append( ValidationMessage(error_msg, ValidationContext(parent_id=doc_id)) ) # ElementCollection constraint: element items shall not be of type SpdxDocument. # Ref: https://spdx.github.io/spdx-spec/latest/model/Core/Classes/ElementCollection/ for elem in elements: if isinstance(elem, spdx3.SpdxDocument): elem_id = getattr(elem, "spdxId", None) error_msg = ( "An SpdxDocument element shall not be of type SpdxDocument. " "Ref: https://spdx.github.io/spdx-spec/latest/model/Core/Classes/ElementCollection/" ) context = ValidationContext(parent_id=doc_id, spdx_id=elem_id) validation_messages.append(ValidationMessage(error_msg, context)) # ElementCollection constraint: rootElement items shall not be of type SpdxDocument. # Ref: https://spdx.github.io/spdx-spec/latest/model/Core/Classes/ElementCollection/ for root_elem in root_elements: if isinstance(root_elem, spdx3.SpdxDocument): root_elem_id = getattr(root_elem, "spdxId", None) error_msg = ( "An SpdxDocument rootElement shall not be of type SpdxDocument. " "Ref: https://spdx.github.io/spdx-spec/latest/model/Core/Classes/ElementCollection/" ) context = ValidationContext(parent_id=doc_id, spdx_id=root_elem_id) validation_messages.append(ValidationMessage(error_msg, context)) return (doc, validation_messages)
[docs] def get_boms_from_spdx_document( spdx_doc: spdx3.SpdxDocument | None, ) -> list[spdx3.Bom] | None: """ Retrieve the BOMs that are rootElements of an SPDX 3 SpdxDocument. Args: spdx_doc (spdx3.SpdxDocument | None): The SPDX 3 SpdxDocument. Returns: list[spdx3.Bom] | None: A list of BOMs if found, otherwise None. """ if not spdx_doc: return None root_elements: list[spdx3.Bom] = getattr(spdx_doc, "rootElement", []) if not root_elements: return None return root_elements
[docs] def get_packages_from_bom( bom: spdx3.Bom | None, ) -> list[spdx3.software_Package] | None: """ Retrieve the /Software/Packages that are rootElements of an SPDX 3 BOM. Args: bom (spdx3.Bom | None): The SPDX 3 Bom. Returns: list[spdx3.software_Package] | None: A list of packages if found, otherwise None. """ if not bom: return None root_elements: list[spdx3.software_Package] = getattr(bom, "rootElement", []) if not root_elements or len(root_elements) != 1: return None return root_elements
[docs] def iter_objects_with_property( object_set: spdx3.SHACLObjectSet, typ: type[spdx3.SHACLObject] = spdx3.Artifact, property_name: str = "spdxId", ) -> Iterator[tuple[str, str, Any]]: """ Yield (name, spdxId, property) for each SPDX 3 object. Args: object_set (spdx3.SHACLObjectSet): The SHACLObjectSet to iterate over. typ (type[spdx3.SHACLObject]): The type of SPDX3 object property_name (str): The property name to retrieve. Yields: Iterator[tuple[str, str, Any]]: A tuple containing the name, SPDX ID, and the specified property of the object. """ for obj in object_set.foreach_type(typ): name = (getattr(obj, "name", "") or "").strip() spdx_id = (getattr(obj, "spdxId", "") or "").strip() property_ = getattr(obj, property_name, None) yield name, spdx_id, property_
[docs] def iter_relationships_by_type( object_set: spdx3.SHACLObjectSet, rel_type: str, ) -> Iterator[tuple[str, list[str]]]: """ Yield (from_id, to_ids_list) for each relationship of the specified relationship type. """ for obj in object_set.foreach_type(spdx3.Relationship): _rel_type = getattr(obj, "relationshipType", "") # Remove the IRI prefix of entry name before compare if not _rel_type or _rel_type.split("/")[-1] != rel_type: continue from_: str | spdx3.Element | None = obj.from_ to_elements: spdx3.ListProxy[Union[str, spdx3.Element]] = obj.to if not from_ or not to_elements: continue from_id = from_ if isinstance(from_, str) else getattr(from_, "spdxId", "") to_ids = [] for to_item in to_elements: to_id = ( to_item if isinstance(to_item, str) else getattr(to_item, "spdxId", "") ) if to_id: to_ids.append(to_id) if from_id and to_ids: yield from_id, to_ids
[docs] def get_all_packages(object_set: spdx3.SHACLObjectSet) -> set[spdx3.software_Package]: """Retrieve all /Software/Package objects from an SHACLObjectSet.""" packages: set[spdx3.software_Package] = set( object_set.foreach_type(spdx3.software_Package) ) return packages
[docs] def get_all_package_ids(object_set: spdx3.SHACLObjectSet) -> set[str]: """Retrieve spdxId for all /Software/Package objects from an SHACLObjectSet.""" return { spdx_id for _name, spdx_id, _ in iter_objects_with_property( object_set, spdx3.software_Package, "spdxId", ) if spdx_id }
[docs] def get_all_element_ids(object_set: spdx3.SHACLObjectSet) -> set[str]: """Retrieve spdxId for all SPDX 3 Element objects from an SHACLObjectSet.""" return { spdx_id for _name, spdx_id, _ in iter_objects_with_property( object_set, spdx3.Element, "spdxId", ) if spdx_id }
[docs] def has_package_dependency_relationship(object_set: spdx3.SHACLObjectSet) -> bool: """Return True if a dependency relationship connects SPDX 3 Elements.""" element_ids = get_all_element_ids(object_set) if len(element_ids) < 2: return False for rel_type in SPDX3_PACKAGE_DEPENDENCY_RELATIONSHIP_TYPES: for from_id, to_ids in iter_relationships_by_type(object_set, rel_type): if from_id in element_ids and any(to_id in element_ids for to_id in to_ids): return True return False