Source code for ntia_conformance_checker.spdx3_utils

# SPDX-FileCopyrightText: 2025 SPDX contributors
# SPDX-FileType: SOURCE
# SPDX-License-Identifier: Apache-2.0

"""Helpers for SPDX 3."""

from __future__ import annotations

from typing import Any, Iterator, List, Optional, Set, Tuple, Type, cast

from spdx_python_model import v3_0_1 as spdx3  # type: ignore # import-untyped
from spdx_tools.spdx.validation.validation_message import (
    ValidationContext,
    ValidationMessage,
)


[docs] def validate_spdx3_data( object_set: spdx3.SHACLObjectSet, ) -> Tuple[Optional[spdx3.SpdxDocument], List[ValidationMessage]]: """ Validate an SHACLObjectSet if it contains a valid SpdxDocument. The SPDX 3.0 specification states that "Any instance of serialization of SPDX data MUST NOT contain more than one SpdxDocument element definition." See: https://spdx.github.io/spdx-spec/v3.0/model/Core/Classes/SpdxDocument/ For the purpose of BOM/SBOM application, it also requires that the SpdxDocument should have a Bom or Software/Sbom as its rootElement. See: https://github.com/spdx/ntia-conformance-checker/issues/268 Args: object_set (spdx3.SHACLObjectSet): The SHACLObjectSet containing the SPDX 3 document. Returns: Optional[spdx3.SpdxDocument]: An SpdxDocument if found, otherwise None. List[ValidationMessage]: A list of validation messages. Empty if no errors. """ # Note that we use spdx_tools.spdx.validation.validation_message, # which is originally meant for SPDX 2, to report validation errors for # SPDX 3 as well, so the print/HTML/JSON output functions can be reused. doc: Optional[spdx3.SpdxDocument] = None validation_messages: List[ValidationMessage] = [] spdx_documents: List[spdx3.SpdxDocument] = [ cast("spdx3.SpdxDocument", obj) for obj in object_set.foreach_type("SpdxDocument") ] if not spdx_documents: error_msg = ( "No SpdxDocument object found in the SPDX 3 JSON file. " "Expected exactly one." ) validation_messages.append(ValidationMessage(error_msg, ValidationContext())) return (doc, validation_messages) if len(spdx_documents) != 1: error_msg = "Multiple SpdxDocument objects found. Allows exactly one." validation_messages.append(ValidationMessage(error_msg, ValidationContext())) return (doc, validation_messages) doc = spdx_documents[0] doc_id = getattr(doc, "spdxId", None) root_element = getattr(doc, "rootElement", None) if not root_element: error_msg = "No rootElement found in the SpdxDocument. Expected exactly one." context = ValidationContext(parent_id=doc_id) validation_messages.append(ValidationMessage(error_msg, context)) elif len(root_element) != 1: error_msg = "Multiple root elements found in SpdxDocument. Allows exactly one." context = ValidationContext(parent_id=doc_id) validation_messages.append(ValidationMessage(error_msg, context)) else: root_element = root_element[0] if not isinstance(root_element, (spdx3.Bom, spdx3.software_Sbom)): error_msg = ( "The root element must be of type Bom or software_Sbom. " f"Found: {type(root_element)}" ) root_element_id = getattr(root_element, "spdxId", None) context = ValidationContext(parent_id=doc_id, spdx_id=root_element_id) validation_messages.append(ValidationMessage(error_msg, context)) return (doc, validation_messages)
[docs] def get_boms_from_spdx_document( spdx_doc: Optional[spdx3.SpdxDocument], ) -> Optional[List[spdx3.Bom]]: """ Retrieve the BOMs that are rootElements of an SPDX 3 SpdxDocument. Args: spdx_doc (spdx3.SpdxDocument): The SPDX 3 SpdxDocument. Returns: Optional[List[spdx3.Bom]]: The Boms if found, otherwise None. """ if not spdx_doc: return None root_elements: List[spdx3.Bom] = getattr(spdx_doc, "rootElement", []) if not root_elements: return None return root_elements
[docs] def get_packages_from_bom( bom: Optional[spdx3.Bom], ) -> Optional[List[spdx3.software_Package]]: """ Retrieve the /Software/Packages that are rootElements of an SPDX 3 BOM. Args: spdx_doc (spdx3.Bom): The SPDX 3 Bom. Returns: Optional[List[spdx3.software_Package]]: The packages if found, otherwise None. """ if not bom: return None root_elements: List[spdx3.software_Package] = getattr(bom, "rootElement", []) if not root_elements or len(root_elements) != 1: return None return root_elements
[docs] def iter_objects_with_property( object_set: spdx3.SHACLObjectSet, typ: Type[spdx3.SHACLObject] = spdx3.Artifact, property_name: str = "spdxId", ) -> Iterator[Tuple[str, str, Any]]: """ Yield (name, spdxId, property) for each SPDX 3 object. Args: object_set (spdx3.SHACLObjectSet): The SHACLObjectSet to iterate over. typ (Type[spdx3.SHACLObject]): The type of SPDX3 object property_name (str): The property name to retrieve. Yields: Iterator[Tuple[str, str, Any]]: A tuple containing the name, SPDX ID, and the specified property of the object. """ for obj in object_set.foreach_type(typ.__name__): name = (getattr(obj, "name", "") or "").strip() spdx_id = (getattr(obj, "spdxId", "") or "").strip() property_ = getattr(obj, property_name, None) yield name, spdx_id, property_
[docs] def iter_relationships_by_type( object_set: spdx3.SHACLObjectSet, rel_type: str, ) -> Iterator[Tuple[str, str]]: """ Yield (from_id, to_id) for each relationship of the specified relationship type. """ for obj in object_set.foreach_type("Relationship"): _rel_type = getattr(obj, "relationshipType", "") # Remove the IRI prefix of entry name before compare if not _rel_type or _rel_type.split("/")[-1] != rel_type: continue from_: Optional[spdx3.Element] = getattr(obj, "from_", None) to: Optional[spdx3.Element] = getattr(obj, "to", None) if not from_ or not to: continue from_id = getattr(from_, "spdxId", "") to_id = getattr(to, "spdxId", "") yield from_id, to_id
[docs] def get_all_packages(object_set: spdx3.SHACLObjectSet) -> Set[spdx3.software_Package]: """Retrieve all /Software/Package objects from an SHACLObjectSet.""" packages: Set[spdx3.software_Package] = { cast("spdx3.software_Package", obj) for obj in object_set.foreach_type("software_Package") } return packages