spdx_tools.spdx.parser.rdf.creation_info_parser

  1# SPDX-FileCopyrightText: 2023 spdx contributors
  2#
  3# SPDX-License-Identifier: Apache-2.0
  4import logging
  5import sys
  6from urllib.parse import urldefrag
  7
  8from beartype.typing import Tuple
  9from rdflib import RDF, RDFS, Graph, Namespace
 10from rdflib.exceptions import UniquenessError
 11from rdflib.term import URIRef
 12
 13from spdx_tools.spdx.constants import DOCUMENT_SPDX_ID
 14from spdx_tools.spdx.datetime_conversions import datetime_from_str
 15from spdx_tools.spdx.model import CreationInfo, ExternalDocumentRef, Version
 16from spdx_tools.spdx.parser.actor_parser import ActorParser
 17from spdx_tools.spdx.parser.error import SPDXParsingError
 18from spdx_tools.spdx.parser.logger import Logger
 19from spdx_tools.spdx.parser.parsing_functions import (
 20    construct_or_raise_parsing_error,
 21    raise_parsing_error_if_logger_has_messages,
 22)
 23from spdx_tools.spdx.parser.rdf.checksum_parser import parse_checksum
 24from spdx_tools.spdx.parser.rdf.graph_parsing_functions import (
 25    get_correctly_typed_triples,
 26    parse_literal,
 27    parse_spdx_id,
 28    remove_prefix,
 29)
 30from spdx_tools.spdx.rdfschema.namespace import LICENSE_NAMESPACE, SPDX_NAMESPACE
 31
 32
 33def parse_creation_info(graph: Graph) -> Tuple[CreationInfo, URIRef]:
 34    logger = Logger()
 35    namespace, spdx_id, doc_node = parse_namespace_and_spdx_id(graph)
 36    spec_version = parse_literal(logger, graph, doc_node, SPDX_NAMESPACE.specVersion)
 37    data_license = parse_literal(
 38        logger,
 39        graph,
 40        doc_node,
 41        SPDX_NAMESPACE.dataLicense,
 42        parsing_method=lambda x: remove_prefix(x, LICENSE_NAMESPACE),
 43    )
 44    comment = parse_literal(logger, graph, doc_node, RDFS.comment)
 45    name = parse_literal(logger, graph, doc_node, SPDX_NAMESPACE.name)
 46
 47    creation_info_node = graph.value(predicate=RDF.type, object=SPDX_NAMESPACE.CreationInfo)
 48    if not creation_info_node:
 49        logger.append("CreationInfo does not exist.")
 50        raise SPDXParsingError([f"Error while parsing document {name}: {logger.get_messages()}"])
 51
 52    created = parse_literal(
 53        logger, graph, creation_info_node, SPDX_NAMESPACE.created, parsing_method=datetime_from_str
 54    )
 55    license_list_version = parse_literal(
 56        logger, graph, creation_info_node, SPDX_NAMESPACE.licenseListVersion, parsing_method=Version.from_string
 57    )
 58    creator_comment = parse_literal(logger, graph, creation_info_node, RDFS.comment)
 59    creators = []
 60    for _, _, creator_literal in get_correctly_typed_triples(
 61        logger, graph, creation_info_node, SPDX_NAMESPACE.creator
 62    ):
 63        creators.append(ActorParser.parse_actor(creator_literal.toPython()))
 64    if not creators:
 65        logger.append("No creators provided.")
 66    external_document_refs = []
 67    for _, _, external_document_node in get_correctly_typed_triples(
 68        logger, graph, doc_node, SPDX_NAMESPACE.externalDocumentRef
 69    ):
 70        external_document_refs.append(parse_external_document_refs(external_document_node, graph, namespace))
 71
 72    raise_parsing_error_if_logger_has_messages(logger, "CreationInfo")
 73    creation_info = construct_or_raise_parsing_error(
 74        CreationInfo,
 75        dict(
 76            spdx_id=spdx_id,
 77            document_namespace=namespace,
 78            spdx_version=spec_version,
 79            name=name,
 80            data_license=data_license,
 81            document_comment=comment,
 82            created=created,
 83            license_list_version=license_list_version,
 84            creator_comment=creator_comment,
 85            creators=creators,
 86            external_document_refs=external_document_refs,
 87        ),
 88    )
 89    return creation_info, doc_node
 90
 91
 92def parse_namespace_and_spdx_id(graph: Graph) -> (str, str):
 93    try:
 94        subject = graph.value(predicate=RDF.type, object=SPDX_NAMESPACE.SpdxDocument, any=False)
 95    except UniquenessError:
 96        logging.error("Multiple SpdxDocuments found, can't parse rdf file.")
 97        sys.exit(1)
 98
 99    if not subject:
100        logging.error("No SpdxDocument found, can't parse rdf file.")
101        sys.exit(1)
102    if "#" not in subject:
103        logging.error(
104            "No '#' found in the URI of SpdxDocument, "
105            f"the URI for the SpdxDocument should be the namespace appended by '#{DOCUMENT_SPDX_ID}."
106        )
107        sys.exit(1)
108
109    namespace, spdx_id = urldefrag(str(subject))
110
111    if not namespace:
112        logging.error(
113            f"No namespace found, the URI for the SpdxDocument should be the namespace appended by "
114            f"'#{DOCUMENT_SPDX_ID}."
115        )
116        sys.exit(1)
117
118    if not spdx_id:
119        spdx_id = None
120
121    return namespace, spdx_id, subject
122
123
124def parse_external_document_refs(
125    external_document_node: URIRef, graph: Graph, doc_namespace: str
126) -> ExternalDocumentRef:
127    logger = Logger()
128    document_ref_id = parse_spdx_id(external_document_node, doc_namespace, graph)
129    document_uri = parse_literal(logger, graph, external_document_node, SPDX_NAMESPACE.spdxDocument)
130    checksum = parse_literal(
131        logger,
132        graph,
133        external_document_node,
134        SPDX_NAMESPACE.checksum,
135        parsing_method=lambda x: parse_checksum(x, graph),
136    )
137    external_document_ref = construct_or_raise_parsing_error(
138        ExternalDocumentRef, dict(document_ref_id=document_ref_id, document_uri=document_uri, checksum=checksum)
139    )
140
141    # To replace the external doc namespaces by the ref id in spdx ids later (e.g. in a relationship), we need to bind
142    # the namespace to the graph.
143    graph.bind(external_document_ref.document_ref_id, Namespace(external_document_ref.document_uri + "#"))
144
145    return external_document_ref
def parse_creation_info( graph: rdflib.graph.Graph) -> tuple[spdx_tools.spdx.model.document.CreationInfo, rdflib.term.URIRef]:
34def parse_creation_info(graph: Graph) -> Tuple[CreationInfo, URIRef]:
35    logger = Logger()
36    namespace, spdx_id, doc_node = parse_namespace_and_spdx_id(graph)
37    spec_version = parse_literal(logger, graph, doc_node, SPDX_NAMESPACE.specVersion)
38    data_license = parse_literal(
39        logger,
40        graph,
41        doc_node,
42        SPDX_NAMESPACE.dataLicense,
43        parsing_method=lambda x: remove_prefix(x, LICENSE_NAMESPACE),
44    )
45    comment = parse_literal(logger, graph, doc_node, RDFS.comment)
46    name = parse_literal(logger, graph, doc_node, SPDX_NAMESPACE.name)
47
48    creation_info_node = graph.value(predicate=RDF.type, object=SPDX_NAMESPACE.CreationInfo)
49    if not creation_info_node:
50        logger.append("CreationInfo does not exist.")
51        raise SPDXParsingError([f"Error while parsing document {name}: {logger.get_messages()}"])
52
53    created = parse_literal(
54        logger, graph, creation_info_node, SPDX_NAMESPACE.created, parsing_method=datetime_from_str
55    )
56    license_list_version = parse_literal(
57        logger, graph, creation_info_node, SPDX_NAMESPACE.licenseListVersion, parsing_method=Version.from_string
58    )
59    creator_comment = parse_literal(logger, graph, creation_info_node, RDFS.comment)
60    creators = []
61    for _, _, creator_literal in get_correctly_typed_triples(
62        logger, graph, creation_info_node, SPDX_NAMESPACE.creator
63    ):
64        creators.append(ActorParser.parse_actor(creator_literal.toPython()))
65    if not creators:
66        logger.append("No creators provided.")
67    external_document_refs = []
68    for _, _, external_document_node in get_correctly_typed_triples(
69        logger, graph, doc_node, SPDX_NAMESPACE.externalDocumentRef
70    ):
71        external_document_refs.append(parse_external_document_refs(external_document_node, graph, namespace))
72
73    raise_parsing_error_if_logger_has_messages(logger, "CreationInfo")
74    creation_info = construct_or_raise_parsing_error(
75        CreationInfo,
76        dict(
77            spdx_id=spdx_id,
78            document_namespace=namespace,
79            spdx_version=spec_version,
80            name=name,
81            data_license=data_license,
82            document_comment=comment,
83            created=created,
84            license_list_version=license_list_version,
85            creator_comment=creator_comment,
86            creators=creators,
87            external_document_refs=external_document_refs,
88        ),
89    )
90    return creation_info, doc_node
def parse_namespace_and_spdx_id(graph: rdflib.graph.Graph) -> (<class 'str'>, <class 'str'>):
 93def parse_namespace_and_spdx_id(graph: Graph) -> (str, str):
 94    try:
 95        subject = graph.value(predicate=RDF.type, object=SPDX_NAMESPACE.SpdxDocument, any=False)
 96    except UniquenessError:
 97        logging.error("Multiple SpdxDocuments found, can't parse rdf file.")
 98        sys.exit(1)
 99
100    if not subject:
101        logging.error("No SpdxDocument found, can't parse rdf file.")
102        sys.exit(1)
103    if "#" not in subject:
104        logging.error(
105            "No '#' found in the URI of SpdxDocument, "
106            f"the URI for the SpdxDocument should be the namespace appended by '#{DOCUMENT_SPDX_ID}."
107        )
108        sys.exit(1)
109
110    namespace, spdx_id = urldefrag(str(subject))
111
112    if not namespace:
113        logging.error(
114            f"No namespace found, the URI for the SpdxDocument should be the namespace appended by "
115            f"'#{DOCUMENT_SPDX_ID}."
116        )
117        sys.exit(1)
118
119    if not spdx_id:
120        spdx_id = None
121
122    return namespace, spdx_id, subject
def parse_external_document_refs( external_document_node: rdflib.term.URIRef, graph: rdflib.graph.Graph, doc_namespace: str) -> spdx_tools.spdx.model.external_document_ref.ExternalDocumentRef:
125def parse_external_document_refs(
126    external_document_node: URIRef, graph: Graph, doc_namespace: str
127) -> ExternalDocumentRef:
128    logger = Logger()
129    document_ref_id = parse_spdx_id(external_document_node, doc_namespace, graph)
130    document_uri = parse_literal(logger, graph, external_document_node, SPDX_NAMESPACE.spdxDocument)
131    checksum = parse_literal(
132        logger,
133        graph,
134        external_document_node,
135        SPDX_NAMESPACE.checksum,
136        parsing_method=lambda x: parse_checksum(x, graph),
137    )
138    external_document_ref = construct_or_raise_parsing_error(
139        ExternalDocumentRef, dict(document_ref_id=document_ref_id, document_uri=document_uri, checksum=checksum)
140    )
141
142    # To replace the external doc namespaces by the ref id in spdx ids later (e.g. in a relationship), we need to bind
143    # the namespace to the graph.
144    graph.bind(external_document_ref.document_ref_id, Namespace(external_document_ref.document_uri + "#"))
145
146    return external_document_ref