spdx_tools.spdx.parser.rdf.creation_info_parser
1# SPDX-FileCopyrightText: 2023 spdx contributors 2# 3# SPDX-License-Identifier: Apache-2.0 4import logging 5import sys 6from urllib.parse import urldefrag 7 8from beartype.typing import Tuple 9from rdflib import RDF, RDFS, Graph, Namespace 10from rdflib.exceptions import UniquenessError 11from rdflib.term import URIRef 12 13from spdx_tools.spdx.constants import DOCUMENT_SPDX_ID 14from spdx_tools.spdx.datetime_conversions import datetime_from_str 15from spdx_tools.spdx.model import CreationInfo, ExternalDocumentRef, Version 16from spdx_tools.spdx.parser.actor_parser import ActorParser 17from spdx_tools.spdx.parser.error import SPDXParsingError 18from spdx_tools.spdx.parser.logger import Logger 19from spdx_tools.spdx.parser.parsing_functions import ( 20 construct_or_raise_parsing_error, 21 raise_parsing_error_if_logger_has_messages, 22) 23from spdx_tools.spdx.parser.rdf.checksum_parser import parse_checksum 24from spdx_tools.spdx.parser.rdf.graph_parsing_functions import ( 25 get_correctly_typed_triples, 26 parse_literal, 27 parse_spdx_id, 28 remove_prefix, 29) 30from spdx_tools.spdx.rdfschema.namespace import LICENSE_NAMESPACE, SPDX_NAMESPACE 31 32 33def parse_creation_info(graph: Graph) -> Tuple[CreationInfo, URIRef]: 34 logger = Logger() 35 namespace, spdx_id, doc_node = parse_namespace_and_spdx_id(graph) 36 spec_version = parse_literal(logger, graph, doc_node, SPDX_NAMESPACE.specVersion) 37 data_license = parse_literal( 38 logger, 39 graph, 40 doc_node, 41 SPDX_NAMESPACE.dataLicense, 42 parsing_method=lambda x: remove_prefix(x, LICENSE_NAMESPACE), 43 ) 44 comment = parse_literal(logger, graph, doc_node, RDFS.comment) 45 name = parse_literal(logger, graph, doc_node, SPDX_NAMESPACE.name) 46 47 creation_info_node = graph.value(predicate=RDF.type, object=SPDX_NAMESPACE.CreationInfo) 48 if not creation_info_node: 49 logger.append("CreationInfo does not exist.") 50 raise SPDXParsingError([f"Error while parsing document {name}: {logger.get_messages()}"]) 51 52 created = parse_literal( 53 logger, graph, creation_info_node, SPDX_NAMESPACE.created, parsing_method=datetime_from_str 54 ) 55 license_list_version = parse_literal( 56 logger, graph, creation_info_node, SPDX_NAMESPACE.licenseListVersion, parsing_method=Version.from_string 57 ) 58 creator_comment = parse_literal(logger, graph, creation_info_node, RDFS.comment) 59 creators = [] 60 for _, _, creator_literal in get_correctly_typed_triples( 61 logger, graph, creation_info_node, SPDX_NAMESPACE.creator 62 ): 63 creators.append(ActorParser.parse_actor(creator_literal.toPython())) 64 if not creators: 65 logger.append("No creators provided.") 66 external_document_refs = [] 67 for _, _, external_document_node in get_correctly_typed_triples( 68 logger, graph, doc_node, SPDX_NAMESPACE.externalDocumentRef 69 ): 70 external_document_refs.append(parse_external_document_refs(external_document_node, graph, namespace)) 71 72 raise_parsing_error_if_logger_has_messages(logger, "CreationInfo") 73 creation_info = construct_or_raise_parsing_error( 74 CreationInfo, 75 dict( 76 spdx_id=spdx_id, 77 document_namespace=namespace, 78 spdx_version=spec_version, 79 name=name, 80 data_license=data_license, 81 document_comment=comment, 82 created=created, 83 license_list_version=license_list_version, 84 creator_comment=creator_comment, 85 creators=creators, 86 external_document_refs=external_document_refs, 87 ), 88 ) 89 return creation_info, doc_node 90 91 92def parse_namespace_and_spdx_id(graph: Graph) -> (str, str): 93 try: 94 subject = graph.value(predicate=RDF.type, object=SPDX_NAMESPACE.SpdxDocument, any=False) 95 except UniquenessError: 96 logging.error("Multiple SpdxDocuments found, can't parse rdf file.") 97 sys.exit(1) 98 99 if not subject: 100 logging.error("No SpdxDocument found, can't parse rdf file.") 101 sys.exit(1) 102 if "#" not in subject: 103 logging.error( 104 "No '#' found in the URI of SpdxDocument, " 105 f"the URI for the SpdxDocument should be the namespace appended by '#{DOCUMENT_SPDX_ID}." 106 ) 107 sys.exit(1) 108 109 namespace, spdx_id = urldefrag(str(subject)) 110 111 if not namespace: 112 logging.error( 113 f"No namespace found, the URI for the SpdxDocument should be the namespace appended by " 114 f"'#{DOCUMENT_SPDX_ID}." 115 ) 116 sys.exit(1) 117 118 if not spdx_id: 119 spdx_id = None 120 121 return namespace, spdx_id, subject 122 123 124def parse_external_document_refs( 125 external_document_node: URIRef, graph: Graph, doc_namespace: str 126) -> ExternalDocumentRef: 127 logger = Logger() 128 document_ref_id = parse_spdx_id(external_document_node, doc_namespace, graph) 129 document_uri = parse_literal(logger, graph, external_document_node, SPDX_NAMESPACE.spdxDocument) 130 checksum = parse_literal( 131 logger, 132 graph, 133 external_document_node, 134 SPDX_NAMESPACE.checksum, 135 parsing_method=lambda x: parse_checksum(x, graph), 136 ) 137 external_document_ref = construct_or_raise_parsing_error( 138 ExternalDocumentRef, dict(document_ref_id=document_ref_id, document_uri=document_uri, checksum=checksum) 139 ) 140 141 # To replace the external doc namespaces by the ref id in spdx ids later (e.g. in a relationship), we need to bind 142 # the namespace to the graph. 143 graph.bind(external_document_ref.document_ref_id, Namespace(external_document_ref.document_uri + "#")) 144 145 return external_document_ref
def
parse_creation_info( graph: rdflib.graph.Graph) -> tuple[spdx_tools.spdx.model.document.CreationInfo, rdflib.term.URIRef]:
34def parse_creation_info(graph: Graph) -> Tuple[CreationInfo, URIRef]: 35 logger = Logger() 36 namespace, spdx_id, doc_node = parse_namespace_and_spdx_id(graph) 37 spec_version = parse_literal(logger, graph, doc_node, SPDX_NAMESPACE.specVersion) 38 data_license = parse_literal( 39 logger, 40 graph, 41 doc_node, 42 SPDX_NAMESPACE.dataLicense, 43 parsing_method=lambda x: remove_prefix(x, LICENSE_NAMESPACE), 44 ) 45 comment = parse_literal(logger, graph, doc_node, RDFS.comment) 46 name = parse_literal(logger, graph, doc_node, SPDX_NAMESPACE.name) 47 48 creation_info_node = graph.value(predicate=RDF.type, object=SPDX_NAMESPACE.CreationInfo) 49 if not creation_info_node: 50 logger.append("CreationInfo does not exist.") 51 raise SPDXParsingError([f"Error while parsing document {name}: {logger.get_messages()}"]) 52 53 created = parse_literal( 54 logger, graph, creation_info_node, SPDX_NAMESPACE.created, parsing_method=datetime_from_str 55 ) 56 license_list_version = parse_literal( 57 logger, graph, creation_info_node, SPDX_NAMESPACE.licenseListVersion, parsing_method=Version.from_string 58 ) 59 creator_comment = parse_literal(logger, graph, creation_info_node, RDFS.comment) 60 creators = [] 61 for _, _, creator_literal in get_correctly_typed_triples( 62 logger, graph, creation_info_node, SPDX_NAMESPACE.creator 63 ): 64 creators.append(ActorParser.parse_actor(creator_literal.toPython())) 65 if not creators: 66 logger.append("No creators provided.") 67 external_document_refs = [] 68 for _, _, external_document_node in get_correctly_typed_triples( 69 logger, graph, doc_node, SPDX_NAMESPACE.externalDocumentRef 70 ): 71 external_document_refs.append(parse_external_document_refs(external_document_node, graph, namespace)) 72 73 raise_parsing_error_if_logger_has_messages(logger, "CreationInfo") 74 creation_info = construct_or_raise_parsing_error( 75 CreationInfo, 76 dict( 77 spdx_id=spdx_id, 78 document_namespace=namespace, 79 spdx_version=spec_version, 80 name=name, 81 data_license=data_license, 82 document_comment=comment, 83 created=created, 84 license_list_version=license_list_version, 85 creator_comment=creator_comment, 86 creators=creators, 87 external_document_refs=external_document_refs, 88 ), 89 ) 90 return creation_info, doc_node
def
parse_namespace_and_spdx_id(graph: rdflib.graph.Graph) -> (<class 'str'>, <class 'str'>):
93def parse_namespace_and_spdx_id(graph: Graph) -> (str, str): 94 try: 95 subject = graph.value(predicate=RDF.type, object=SPDX_NAMESPACE.SpdxDocument, any=False) 96 except UniquenessError: 97 logging.error("Multiple SpdxDocuments found, can't parse rdf file.") 98 sys.exit(1) 99 100 if not subject: 101 logging.error("No SpdxDocument found, can't parse rdf file.") 102 sys.exit(1) 103 if "#" not in subject: 104 logging.error( 105 "No '#' found in the URI of SpdxDocument, " 106 f"the URI for the SpdxDocument should be the namespace appended by '#{DOCUMENT_SPDX_ID}." 107 ) 108 sys.exit(1) 109 110 namespace, spdx_id = urldefrag(str(subject)) 111 112 if not namespace: 113 logging.error( 114 f"No namespace found, the URI for the SpdxDocument should be the namespace appended by " 115 f"'#{DOCUMENT_SPDX_ID}." 116 ) 117 sys.exit(1) 118 119 if not spdx_id: 120 spdx_id = None 121 122 return namespace, spdx_id, subject
def
parse_external_document_refs( external_document_node: rdflib.term.URIRef, graph: rdflib.graph.Graph, doc_namespace: str) -> spdx_tools.spdx.model.external_document_ref.ExternalDocumentRef:
125def parse_external_document_refs( 126 external_document_node: URIRef, graph: Graph, doc_namespace: str 127) -> ExternalDocumentRef: 128 logger = Logger() 129 document_ref_id = parse_spdx_id(external_document_node, doc_namespace, graph) 130 document_uri = parse_literal(logger, graph, external_document_node, SPDX_NAMESPACE.spdxDocument) 131 checksum = parse_literal( 132 logger, 133 graph, 134 external_document_node, 135 SPDX_NAMESPACE.checksum, 136 parsing_method=lambda x: parse_checksum(x, graph), 137 ) 138 external_document_ref = construct_or_raise_parsing_error( 139 ExternalDocumentRef, dict(document_ref_id=document_ref_id, document_uri=document_uri, checksum=checksum) 140 ) 141 142 # To replace the external doc namespaces by the ref id in spdx ids later (e.g. in a relationship), we need to bind 143 # the namespace to the graph. 144 graph.bind(external_document_ref.document_ref_id, Namespace(external_document_ref.document_uri + "#")) 145 146 return external_document_ref