spdx_tools.spdx.parser.rdf.package_parser

  1# SPDX-FileCopyrightText: 2023 spdx contributors
  2#
  3# SPDX-License-Identifier: Apache-2.0
  4from beartype.typing import Optional, Union
  5from rdflib import DOAP, RDFS, Graph, URIRef
  6from rdflib.term import BNode
  7
  8from spdx_tools.spdx.datetime_conversions import datetime_from_str
  9from spdx_tools.spdx.model import (
 10    ExternalPackageRef,
 11    ExternalPackageRefCategory,
 12    Package,
 13    PackagePurpose,
 14    PackageVerificationCode,
 15)
 16from spdx_tools.spdx.parser.actor_parser import ActorParser
 17from spdx_tools.spdx.parser.logger import Logger
 18from spdx_tools.spdx.parser.parsing_functions import (
 19    construct_or_raise_parsing_error,
 20    raise_parsing_error_if_logger_has_messages,
 21)
 22from spdx_tools.spdx.parser.rdf.checksum_parser import parse_checksum
 23from spdx_tools.spdx.parser.rdf.graph_parsing_functions import (
 24    get_correctly_typed_triples,
 25    get_correctly_typed_value,
 26    get_value_from_graph,
 27    parse_enum_value,
 28    parse_literal,
 29    parse_literal_or_no_assertion_or_none,
 30    parse_spdx_id,
 31)
 32from spdx_tools.spdx.parser.rdf.license_expression_parser import parse_license_expression
 33from spdx_tools.spdx.rdfschema.namespace import REFERENCE_NAMESPACE, SPDX_NAMESPACE
 34
 35
 36def parse_package(package_node: Union[URIRef, BNode], graph: Graph, doc_namespace: str) -> Package:
 37    logger = Logger()
 38    spdx_id = parse_spdx_id(package_node, doc_namespace, graph)
 39    name = parse_literal(logger, graph, package_node, SPDX_NAMESPACE.name)
 40    download_location = parse_literal_or_no_assertion_or_none(
 41        logger, graph, package_node, SPDX_NAMESPACE.downloadLocation
 42    )
 43    checksums = []
 44    for _, _, checksum_node in get_correctly_typed_triples(logger, graph, package_node, SPDX_NAMESPACE.checksum):
 45        checksums.append(parse_checksum(checksum_node, graph))
 46
 47    version_info = parse_literal(logger, graph, package_node, SPDX_NAMESPACE.versionInfo)
 48    package_file_name = parse_literal(logger, graph, package_node, SPDX_NAMESPACE.packageFileName)
 49
 50    supplier = parse_literal_or_no_assertion_or_none(
 51        logger, graph, package_node, SPDX_NAMESPACE.supplier, parsing_method=ActorParser.parse_actor
 52    )
 53    originator = parse_literal_or_no_assertion_or_none(
 54        logger, graph, package_node, SPDX_NAMESPACE.originator, parsing_method=ActorParser.parse_actor
 55    )
 56    verification_code = parse_literal(
 57        logger,
 58        graph,
 59        package_node,
 60        SPDX_NAMESPACE.packageVerificationCode,
 61        parsing_method=lambda x: parse_package_verification_code(x, graph),
 62    )
 63
 64    external_package_refs = []
 65    for _, _, external_package_ref_node in get_correctly_typed_triples(
 66        logger, graph, package_node, SPDX_NAMESPACE.externalRef
 67    ):
 68        external_package_refs.append(parse_external_package_ref(external_package_ref_node, graph, doc_namespace))
 69    files_analyzed = bool(
 70        get_value_from_graph(logger, graph, package_node, SPDX_NAMESPACE.filesAnalyzed, default=True)
 71    )
 72    license_concluded = parse_literal_or_no_assertion_or_none(
 73        logger,
 74        graph,
 75        package_node,
 76        SPDX_NAMESPACE.licenseConcluded,
 77        parsing_method=lambda x: parse_license_expression(x, graph, doc_namespace, logger),
 78    )
 79    license_declared = parse_literal_or_no_assertion_or_none(
 80        logger,
 81        graph,
 82        package_node,
 83        SPDX_NAMESPACE.licenseDeclared,
 84        parsing_method=lambda x: parse_license_expression(x, graph, doc_namespace, logger),
 85    )
 86    license_info_from_files = []
 87    for _, _, license_info_from_files_node in graph.triples((package_node, SPDX_NAMESPACE.licenseInfoFromFiles, None)):
 88        license_info_from_files.append(
 89            get_correctly_typed_value(
 90                logger,
 91                license_info_from_files_node,
 92                lambda x: parse_license_expression(x, graph, doc_namespace, logger),
 93            )
 94        )
 95    license_comment = parse_literal(logger, graph, package_node, SPDX_NAMESPACE.licenseComments)
 96    comment = parse_literal(logger, graph, package_node, RDFS.comment)
 97    summary = parse_literal(logger, graph, package_node, SPDX_NAMESPACE.summary)
 98    description = parse_literal(logger, graph, package_node, SPDX_NAMESPACE.description)
 99    copyright_text = parse_literal_or_no_assertion_or_none(logger, graph, package_node, SPDX_NAMESPACE.copyrightText)
100    source_info = parse_literal(logger, graph, package_node, SPDX_NAMESPACE.sourceInfo)
101    primary_package_purpose = parse_literal(
102        logger,
103        graph,
104        package_node,
105        SPDX_NAMESPACE.primaryPackagePurpose,
106        parsing_method=lambda x: parse_enum_value(x, PackagePurpose, SPDX_NAMESPACE.purpose_),
107    )
108    homepage = parse_literal(logger, graph, package_node, DOAP.homepage)
109    attribution_texts = []
110    for _, _, attribution_text_literal in get_correctly_typed_triples(
111        logger, graph, package_node, SPDX_NAMESPACE.attributionText, None
112    ):
113        attribution_texts.append(attribution_text_literal.toPython())
114
115    release_date = parse_literal(
116        logger, graph, package_node, SPDX_NAMESPACE.releaseDate, parsing_method=datetime_from_str
117    )
118    built_date = parse_literal(logger, graph, package_node, SPDX_NAMESPACE.builtDate, parsing_method=datetime_from_str)
119    valid_until_date = parse_literal(
120        logger, graph, package_node, SPDX_NAMESPACE.validUntilDate, parsing_method=datetime_from_str
121    )
122    raise_parsing_error_if_logger_has_messages(logger, "Package")
123    package = construct_or_raise_parsing_error(
124        Package,
125        dict(
126            name=name,
127            spdx_id=spdx_id,
128            download_location=download_location,
129            version=version_info,
130            file_name=package_file_name,
131            supplier=supplier,
132            originator=originator,
133            files_analyzed=files_analyzed,
134            verification_code=verification_code,
135            checksums=checksums,
136            homepage=homepage,
137            source_info=source_info,
138            license_concluded=license_concluded,
139            license_info_from_files=license_info_from_files,
140            license_declared=license_declared,
141            license_comment=license_comment,
142            copyright_text=copyright_text,
143            summary=summary,
144            description=description,
145            comment=comment,
146            external_references=external_package_refs,
147            attribution_texts=attribution_texts,
148            primary_package_purpose=primary_package_purpose,
149            release_date=release_date,
150            built_date=built_date,
151            valid_until_date=valid_until_date,
152        ),
153    )
154
155    return package
156
157
158def parse_package_verification_code(
159    package_verification_code_node: URIRef, graph: Graph
160) -> Optional[PackageVerificationCode]:
161    logger = Logger()
162    value = parse_literal(logger, graph, package_verification_code_node, SPDX_NAMESPACE.packageVerificationCodeValue)
163    excluded_files = []
164    for _, _, excluded_file_literal in graph.triples(
165        (package_verification_code_node, SPDX_NAMESPACE.packageVerificationCodeExcludedFile, None)
166    ):
167        excluded_files.append(excluded_file_literal.toPython())
168
169    raise_parsing_error_if_logger_has_messages(logger, "PackageVerificationCode")
170    package_verification_code = construct_or_raise_parsing_error(
171        PackageVerificationCode, dict(value=value, excluded_files=excluded_files)
172    )
173    return package_verification_code
174
175
176def parse_external_package_ref(external_package_ref_node: BNode, graph: Graph, doc_namespace) -> ExternalPackageRef:
177    logger = Logger()
178    ref_locator = parse_literal(logger, graph, external_package_ref_node, SPDX_NAMESPACE.referenceLocator)
179    ref_category = parse_literal(
180        logger,
181        graph,
182        external_package_ref_node,
183        SPDX_NAMESPACE.referenceCategory,
184        parsing_method=lambda x: parse_enum_value(x, ExternalPackageRefCategory, SPDX_NAMESPACE.referenceCategory_),
185    )
186    ref_type = parse_literal(
187        logger,
188        graph,
189        external_package_ref_node,
190        SPDX_NAMESPACE.referenceType,
191        parsing_method=lambda x: parse_external_package_ref_type(x, doc_namespace),
192    )
193    comment = parse_literal(logger, graph, external_package_ref_node, RDFS.comment)
194
195    raise_parsing_error_if_logger_has_messages(logger, "ExternalPackageRef")
196    external_package_ref = construct_or_raise_parsing_error(
197        ExternalPackageRef, dict(category=ref_category, reference_type=ref_type, locator=ref_locator, comment=comment)
198    )
199    return external_package_ref
200
201
202def parse_external_package_ref_type(external_package_ref_type_resource: URIRef, doc_namespace: str) -> str:
203    if external_package_ref_type_resource.startswith(doc_namespace):
204        return external_package_ref_type_resource.fragment
205    if external_package_ref_type_resource.startswith(REFERENCE_NAMESPACE):
206        return external_package_ref_type_resource.replace(REFERENCE_NAMESPACE, "")
207    return external_package_ref_type_resource.toPython()
def parse_package( package_node: Union[rdflib.term.URIRef, rdflib.term.BNode], graph: rdflib.graph.Graph, doc_namespace: str) -> spdx_tools.spdx.model.package.Package:
 37def parse_package(package_node: Union[URIRef, BNode], graph: Graph, doc_namespace: str) -> Package:
 38    logger = Logger()
 39    spdx_id = parse_spdx_id(package_node, doc_namespace, graph)
 40    name = parse_literal(logger, graph, package_node, SPDX_NAMESPACE.name)
 41    download_location = parse_literal_or_no_assertion_or_none(
 42        logger, graph, package_node, SPDX_NAMESPACE.downloadLocation
 43    )
 44    checksums = []
 45    for _, _, checksum_node in get_correctly_typed_triples(logger, graph, package_node, SPDX_NAMESPACE.checksum):
 46        checksums.append(parse_checksum(checksum_node, graph))
 47
 48    version_info = parse_literal(logger, graph, package_node, SPDX_NAMESPACE.versionInfo)
 49    package_file_name = parse_literal(logger, graph, package_node, SPDX_NAMESPACE.packageFileName)
 50
 51    supplier = parse_literal_or_no_assertion_or_none(
 52        logger, graph, package_node, SPDX_NAMESPACE.supplier, parsing_method=ActorParser.parse_actor
 53    )
 54    originator = parse_literal_or_no_assertion_or_none(
 55        logger, graph, package_node, SPDX_NAMESPACE.originator, parsing_method=ActorParser.parse_actor
 56    )
 57    verification_code = parse_literal(
 58        logger,
 59        graph,
 60        package_node,
 61        SPDX_NAMESPACE.packageVerificationCode,
 62        parsing_method=lambda x: parse_package_verification_code(x, graph),
 63    )
 64
 65    external_package_refs = []
 66    for _, _, external_package_ref_node in get_correctly_typed_triples(
 67        logger, graph, package_node, SPDX_NAMESPACE.externalRef
 68    ):
 69        external_package_refs.append(parse_external_package_ref(external_package_ref_node, graph, doc_namespace))
 70    files_analyzed = bool(
 71        get_value_from_graph(logger, graph, package_node, SPDX_NAMESPACE.filesAnalyzed, default=True)
 72    )
 73    license_concluded = parse_literal_or_no_assertion_or_none(
 74        logger,
 75        graph,
 76        package_node,
 77        SPDX_NAMESPACE.licenseConcluded,
 78        parsing_method=lambda x: parse_license_expression(x, graph, doc_namespace, logger),
 79    )
 80    license_declared = parse_literal_or_no_assertion_or_none(
 81        logger,
 82        graph,
 83        package_node,
 84        SPDX_NAMESPACE.licenseDeclared,
 85        parsing_method=lambda x: parse_license_expression(x, graph, doc_namespace, logger),
 86    )
 87    license_info_from_files = []
 88    for _, _, license_info_from_files_node in graph.triples((package_node, SPDX_NAMESPACE.licenseInfoFromFiles, None)):
 89        license_info_from_files.append(
 90            get_correctly_typed_value(
 91                logger,
 92                license_info_from_files_node,
 93                lambda x: parse_license_expression(x, graph, doc_namespace, logger),
 94            )
 95        )
 96    license_comment = parse_literal(logger, graph, package_node, SPDX_NAMESPACE.licenseComments)
 97    comment = parse_literal(logger, graph, package_node, RDFS.comment)
 98    summary = parse_literal(logger, graph, package_node, SPDX_NAMESPACE.summary)
 99    description = parse_literal(logger, graph, package_node, SPDX_NAMESPACE.description)
100    copyright_text = parse_literal_or_no_assertion_or_none(logger, graph, package_node, SPDX_NAMESPACE.copyrightText)
101    source_info = parse_literal(logger, graph, package_node, SPDX_NAMESPACE.sourceInfo)
102    primary_package_purpose = parse_literal(
103        logger,
104        graph,
105        package_node,
106        SPDX_NAMESPACE.primaryPackagePurpose,
107        parsing_method=lambda x: parse_enum_value(x, PackagePurpose, SPDX_NAMESPACE.purpose_),
108    )
109    homepage = parse_literal(logger, graph, package_node, DOAP.homepage)
110    attribution_texts = []
111    for _, _, attribution_text_literal in get_correctly_typed_triples(
112        logger, graph, package_node, SPDX_NAMESPACE.attributionText, None
113    ):
114        attribution_texts.append(attribution_text_literal.toPython())
115
116    release_date = parse_literal(
117        logger, graph, package_node, SPDX_NAMESPACE.releaseDate, parsing_method=datetime_from_str
118    )
119    built_date = parse_literal(logger, graph, package_node, SPDX_NAMESPACE.builtDate, parsing_method=datetime_from_str)
120    valid_until_date = parse_literal(
121        logger, graph, package_node, SPDX_NAMESPACE.validUntilDate, parsing_method=datetime_from_str
122    )
123    raise_parsing_error_if_logger_has_messages(logger, "Package")
124    package = construct_or_raise_parsing_error(
125        Package,
126        dict(
127            name=name,
128            spdx_id=spdx_id,
129            download_location=download_location,
130            version=version_info,
131            file_name=package_file_name,
132            supplier=supplier,
133            originator=originator,
134            files_analyzed=files_analyzed,
135            verification_code=verification_code,
136            checksums=checksums,
137            homepage=homepage,
138            source_info=source_info,
139            license_concluded=license_concluded,
140            license_info_from_files=license_info_from_files,
141            license_declared=license_declared,
142            license_comment=license_comment,
143            copyright_text=copyright_text,
144            summary=summary,
145            description=description,
146            comment=comment,
147            external_references=external_package_refs,
148            attribution_texts=attribution_texts,
149            primary_package_purpose=primary_package_purpose,
150            release_date=release_date,
151            built_date=built_date,
152            valid_until_date=valid_until_date,
153        ),
154    )
155
156    return package
def parse_package_verification_code( package_verification_code_node: rdflib.term.URIRef, graph: rdflib.graph.Graph) -> Optional[spdx_tools.spdx.model.package.PackageVerificationCode]:
159def parse_package_verification_code(
160    package_verification_code_node: URIRef, graph: Graph
161) -> Optional[PackageVerificationCode]:
162    logger = Logger()
163    value = parse_literal(logger, graph, package_verification_code_node, SPDX_NAMESPACE.packageVerificationCodeValue)
164    excluded_files = []
165    for _, _, excluded_file_literal in graph.triples(
166        (package_verification_code_node, SPDX_NAMESPACE.packageVerificationCodeExcludedFile, None)
167    ):
168        excluded_files.append(excluded_file_literal.toPython())
169
170    raise_parsing_error_if_logger_has_messages(logger, "PackageVerificationCode")
171    package_verification_code = construct_or_raise_parsing_error(
172        PackageVerificationCode, dict(value=value, excluded_files=excluded_files)
173    )
174    return package_verification_code
def parse_external_package_ref( external_package_ref_node: rdflib.term.BNode, graph: rdflib.graph.Graph, doc_namespace) -> spdx_tools.spdx.model.package.ExternalPackageRef:
177def parse_external_package_ref(external_package_ref_node: BNode, graph: Graph, doc_namespace) -> ExternalPackageRef:
178    logger = Logger()
179    ref_locator = parse_literal(logger, graph, external_package_ref_node, SPDX_NAMESPACE.referenceLocator)
180    ref_category = parse_literal(
181        logger,
182        graph,
183        external_package_ref_node,
184        SPDX_NAMESPACE.referenceCategory,
185        parsing_method=lambda x: parse_enum_value(x, ExternalPackageRefCategory, SPDX_NAMESPACE.referenceCategory_),
186    )
187    ref_type = parse_literal(
188        logger,
189        graph,
190        external_package_ref_node,
191        SPDX_NAMESPACE.referenceType,
192        parsing_method=lambda x: parse_external_package_ref_type(x, doc_namespace),
193    )
194    comment = parse_literal(logger, graph, external_package_ref_node, RDFS.comment)
195
196    raise_parsing_error_if_logger_has_messages(logger, "ExternalPackageRef")
197    external_package_ref = construct_or_raise_parsing_error(
198        ExternalPackageRef, dict(category=ref_category, reference_type=ref_type, locator=ref_locator, comment=comment)
199    )
200    return external_package_ref
def parse_external_package_ref_type( external_package_ref_type_resource: rdflib.term.URIRef, doc_namespace: str) -> str:
203def parse_external_package_ref_type(external_package_ref_type_resource: URIRef, doc_namespace: str) -> str:
204    if external_package_ref_type_resource.startswith(doc_namespace):
205        return external_package_ref_type_resource.fragment
206    if external_package_ref_type_resource.startswith(REFERENCE_NAMESPACE):
207        return external_package_ref_type_resource.replace(REFERENCE_NAMESPACE, "")
208    return external_package_ref_type_resource.toPython()