spdx_tools.spdx.parser.rdf.rdf_parser

 1# SPDX-FileCopyrightText: 2023 spdx contributors
 2#
 3# SPDX-License-Identifier: Apache-2.0
 4from beartype.typing import Any, Dict
 5from rdflib import RDF, Graph
 6
 7from spdx_tools.spdx.model import Document, RelationshipType
 8from spdx_tools.spdx.parser.error import SPDXParsingError
 9from spdx_tools.spdx.parser.logger import Logger
10from spdx_tools.spdx.parser.parsing_functions import (
11    construct_or_raise_parsing_error,
12    raise_parsing_error_if_logger_has_messages,
13)
14from spdx_tools.spdx.parser.rdf.annotation_parser import parse_annotation
15from spdx_tools.spdx.parser.rdf.creation_info_parser import parse_creation_info
16from spdx_tools.spdx.parser.rdf.extracted_licensing_info_parser import parse_extracted_licensing_info
17from spdx_tools.spdx.parser.rdf.file_parser import parse_file
18from spdx_tools.spdx.parser.rdf.graph_parsing_functions import get_correctly_typed_triples
19from spdx_tools.spdx.parser.rdf.package_parser import parse_package
20from spdx_tools.spdx.parser.rdf.relationship_parser import parse_implicit_relationship, parse_relationship
21from spdx_tools.spdx.parser.rdf.snippet_parser import parse_snippet
22from spdx_tools.spdx.rdfschema.namespace import SPDX_NAMESPACE
23
24
25def parse_from_file(file_name: str, encoding: str = "utf-8") -> Document:
26    graph = Graph()
27    with open(file_name, encoding=encoding) as file:
28        graph.parse(file, format="xml")
29
30    document: Document = translate_graph_to_document(graph)
31    return document
32
33
34def translate_graph_to_document(graph: Graph) -> Document:
35    parsed_fields: Dict[str, Any] = dict()
36    logger = Logger()
37    creation_info, doc_node = parse_creation_info(graph)
38
39    parsed_fields["creation_info"] = creation_info
40
41    for element, triple, parsing_method in [
42        ("packages", (None, RDF.type, SPDX_NAMESPACE.Package), parse_package),
43        ("files", (None, RDF.type, SPDX_NAMESPACE.File), parse_file),
44        ("snippets", (None, RDF.type, SPDX_NAMESPACE.Snippet), parse_snippet),
45    ]:
46        elements = []
47        for element_node, _, _ in get_correctly_typed_triples(logger, graph, *triple):
48            try:
49                elements.append(parsing_method(element_node, graph, creation_info.document_namespace))
50            except SPDXParsingError as err:
51                logger.extend(err.get_messages())
52        parsed_fields[element] = elements
53
54    for element, triple, parsing_method in [
55        ("annotations", (None, SPDX_NAMESPACE.annotation, None), parse_annotation),
56        ("relationships", (None, SPDX_NAMESPACE.relationship, None), parse_relationship),
57    ]:
58        elements = []
59        for parent_node, _, element_node in graph.triples(triple):
60            try:
61                elements.append(parsing_method(element_node, graph, parent_node, creation_info.document_namespace))
62            except SPDXParsingError as err:
63                logger.extend(err.get_messages())
64        parsed_fields[element] = elements
65
66    for triple, relationship_type in [
67        ((None, SPDX_NAMESPACE.hasFile, None), RelationshipType.CONTAINS),
68        ((None, SPDX_NAMESPACE.describesPackage, None), RelationshipType.DESCRIBES),
69    ]:
70        for parent_node, _, element_node in get_correctly_typed_triples(logger, graph, *triple):
71            try:
72                relationship = parse_implicit_relationship(
73                    parent_node, relationship_type, element_node, graph, creation_info.document_namespace
74                )
75                if relationship not in parsed_fields["relationships"]:
76                    parsed_fields["relationships"].append(relationship)
77
78            except SPDXParsingError as err:
79                logger.extend(err.get_messages())
80
81    extracted_licensing_infos = []
82    for _, _, extracted_licensing_info_node in get_correctly_typed_triples(
83        logger, graph, None, SPDX_NAMESPACE.hasExtractedLicensingInfo
84    ):
85        try:
86            extracted_licensing_infos.append(
87                parse_extracted_licensing_info(extracted_licensing_info_node, graph, creation_info.document_namespace)
88            )
89        except SPDXParsingError as err:
90            logger.extend(err.get_messages())
91    parsed_fields["extracted_licensing_info"] = extracted_licensing_infos
92
93    raise_parsing_error_if_logger_has_messages(logger)
94    document = construct_or_raise_parsing_error(Document, parsed_fields)
95
96    return document
def parse_from_file( file_name: str, encoding: str = 'utf-8') -> spdx_tools.spdx.model.document.Document:
26def parse_from_file(file_name: str, encoding: str = "utf-8") -> Document:
27    graph = Graph()
28    with open(file_name, encoding=encoding) as file:
29        graph.parse(file, format="xml")
30
31    document: Document = translate_graph_to_document(graph)
32    return document
def translate_graph_to_document(graph: rdflib.graph.Graph) -> spdx_tools.spdx.model.document.Document:
35def translate_graph_to_document(graph: Graph) -> Document:
36    parsed_fields: Dict[str, Any] = dict()
37    logger = Logger()
38    creation_info, doc_node = parse_creation_info(graph)
39
40    parsed_fields["creation_info"] = creation_info
41
42    for element, triple, parsing_method in [
43        ("packages", (None, RDF.type, SPDX_NAMESPACE.Package), parse_package),
44        ("files", (None, RDF.type, SPDX_NAMESPACE.File), parse_file),
45        ("snippets", (None, RDF.type, SPDX_NAMESPACE.Snippet), parse_snippet),
46    ]:
47        elements = []
48        for element_node, _, _ in get_correctly_typed_triples(logger, graph, *triple):
49            try:
50                elements.append(parsing_method(element_node, graph, creation_info.document_namespace))
51            except SPDXParsingError as err:
52                logger.extend(err.get_messages())
53        parsed_fields[element] = elements
54
55    for element, triple, parsing_method in [
56        ("annotations", (None, SPDX_NAMESPACE.annotation, None), parse_annotation),
57        ("relationships", (None, SPDX_NAMESPACE.relationship, None), parse_relationship),
58    ]:
59        elements = []
60        for parent_node, _, element_node in graph.triples(triple):
61            try:
62                elements.append(parsing_method(element_node, graph, parent_node, creation_info.document_namespace))
63            except SPDXParsingError as err:
64                logger.extend(err.get_messages())
65        parsed_fields[element] = elements
66
67    for triple, relationship_type in [
68        ((None, SPDX_NAMESPACE.hasFile, None), RelationshipType.CONTAINS),
69        ((None, SPDX_NAMESPACE.describesPackage, None), RelationshipType.DESCRIBES),
70    ]:
71        for parent_node, _, element_node in get_correctly_typed_triples(logger, graph, *triple):
72            try:
73                relationship = parse_implicit_relationship(
74                    parent_node, relationship_type, element_node, graph, creation_info.document_namespace
75                )
76                if relationship not in parsed_fields["relationships"]:
77                    parsed_fields["relationships"].append(relationship)
78
79            except SPDXParsingError as err:
80                logger.extend(err.get_messages())
81
82    extracted_licensing_infos = []
83    for _, _, extracted_licensing_info_node in get_correctly_typed_triples(
84        logger, graph, None, SPDX_NAMESPACE.hasExtractedLicensingInfo
85    ):
86        try:
87            extracted_licensing_infos.append(
88                parse_extracted_licensing_info(extracted_licensing_info_node, graph, creation_info.document_namespace)
89            )
90        except SPDXParsingError as err:
91            logger.extend(err.get_messages())
92    parsed_fields["extracted_licensing_info"] = extracted_licensing_infos
93
94    raise_parsing_error_if_logger_has_messages(logger)
95    document = construct_or_raise_parsing_error(Document, parsed_fields)
96
97    return document