spdx_tools.spdx.parser.rdf.snippet_parser

  1# SPDX-FileCopyrightText: 2023 spdx contributors
  2#
  3# SPDX-License-Identifier: Apache-2.0
  4from beartype.typing import Dict, Optional, Tuple, Union
  5from rdflib import RDF, RDFS, Graph
  6from rdflib.exceptions import UniquenessError
  7from rdflib.term import BNode, Node, URIRef
  8
  9from spdx_tools.spdx.model import Snippet
 10from spdx_tools.spdx.parser.error import SPDXParsingError
 11from spdx_tools.spdx.parser.logger import Logger
 12from spdx_tools.spdx.parser.parsing_functions import (
 13    construct_or_raise_parsing_error,
 14    raise_parsing_error_if_logger_has_messages,
 15)
 16from spdx_tools.spdx.parser.rdf.graph_parsing_functions import (
 17    apply_parsing_method_or_log_error,
 18    get_correctly_typed_triples,
 19    get_correctly_typed_value,
 20    get_value_from_graph,
 21    parse_literal,
 22    parse_literal_or_no_assertion_or_none,
 23    parse_spdx_id,
 24)
 25from spdx_tools.spdx.parser.rdf.license_expression_parser import parse_license_expression
 26from spdx_tools.spdx.rdfschema.namespace import POINTER_NAMESPACE, SPDX_NAMESPACE
 27
 28
 29def parse_snippet(snippet_node: Union[URIRef, BNode], graph: Graph, doc_namespace: str) -> Snippet:
 30    logger = Logger()
 31    spdx_id = parse_spdx_id(snippet_node, doc_namespace, graph)
 32    file_spdx_id_uri = get_value_from_graph(
 33        logger, graph, subject=snippet_node, predicate=SPDX_NAMESPACE.snippetFromFile
 34    )
 35    file_spdx_id = parse_spdx_id(file_spdx_id_uri, doc_namespace, graph)
 36    byte_range = None
 37    line_range = None
 38    for _, _, start_end_pointer in graph.triples((snippet_node, SPDX_NAMESPACE.range, None)):
 39        parsed_range = apply_parsing_method_or_log_error(
 40            logger, start_end_pointer, parsing_method=lambda x: parse_ranges(x, graph)
 41        )
 42        byte_range, line_range = set_range_or_log_error(byte_range, line_range, logger, parsed_range)
 43
 44    license_concluded = parse_literal_or_no_assertion_or_none(
 45        logger,
 46        graph,
 47        snippet_node,
 48        SPDX_NAMESPACE.licenseConcluded,
 49        parsing_method=lambda x: parse_license_expression(x, graph, doc_namespace, logger),
 50    )
 51    license_info_in_snippet = []
 52    for _, _, license_info_in_snippet_node in graph.triples((snippet_node, SPDX_NAMESPACE.licenseInfoInSnippet, None)):
 53        license_info_in_snippet.append(
 54            get_correctly_typed_value(
 55                logger,
 56                license_info_in_snippet_node,
 57                lambda x: parse_license_expression(x, graph, doc_namespace, logger),
 58            )
 59        )
 60    license_comment = parse_literal(logger, graph, snippet_node, SPDX_NAMESPACE.licenseComments)
 61    copyright_text = parse_literal_or_no_assertion_or_none(logger, graph, snippet_node, SPDX_NAMESPACE.copyrightText)
 62    comment = parse_literal(logger, graph, snippet_node, RDFS.comment)
 63    name = parse_literal(logger, graph, snippet_node, SPDX_NAMESPACE.name)
 64    attribution_texts = []
 65    for _, _, attribution_text_literal in get_correctly_typed_triples(
 66        logger, graph, snippet_node, SPDX_NAMESPACE.attributionText, None
 67    ):
 68        attribution_texts.append(attribution_text_literal.toPython())
 69
 70    raise_parsing_error_if_logger_has_messages(logger, "Snippet")
 71    snippet = construct_or_raise_parsing_error(
 72        Snippet,
 73        dict(
 74            spdx_id=spdx_id,
 75            file_spdx_id=file_spdx_id,
 76            byte_range=byte_range,
 77            line_range=line_range,
 78            license_concluded=license_concluded,
 79            license_info_in_snippet=license_info_in_snippet,
 80            license_comment=license_comment,
 81            copyright_text=copyright_text,
 82            comment=comment,
 83            name=name,
 84            attribution_texts=attribution_texts,
 85        ),
 86    )
 87    return snippet
 88
 89
 90def set_range_or_log_error(
 91    byte_range: Optional[Tuple[int, int]],
 92    line_range: Optional[Tuple[int, int]],
 93    logger: Logger,
 94    parsed_range: Dict[str, Tuple[int, int]],
 95) -> Tuple[Optional[Tuple[int, int]], Optional[Tuple[int, int]]]:
 96    if not parsed_range:
 97        return byte_range, line_range
 98    if "ByteOffsetPointer" in parsed_range.keys() and not byte_range:
 99        byte_range = parsed_range["ByteOffsetPointer"]
100    elif "ByteOffsetPointer" in parsed_range.keys() and byte_range:
101        logger.append("Multiple ByteOffsetPointer found.")
102    elif "LineCharPointer" in parsed_range.keys() and not line_range:
103        line_range = parsed_range["LineCharPointer"]
104    elif "LineCharPointer" in parsed_range.keys() and line_range:
105        logger.append("Multiple LineCharPointer found.")
106    return byte_range, line_range
107
108
109def parse_ranges(start_end_pointer: BNode, graph: Graph) -> Dict[str, Tuple[int, int]]:
110    range_values = dict()
111    start_pointer_type, start_pointer_node = get_pointer_type(graph, POINTER_NAMESPACE.startPointer, start_end_pointer)
112    end_pointer_type, end_pointer_node = get_pointer_type(graph, POINTER_NAMESPACE.endPointer, start_end_pointer)
113
114    if start_pointer_type != end_pointer_type:
115        raise SPDXParsingError(["Types of startPointer and endPointer don't match"])
116
117    range_values["startPointer"] = parse_range_value(graph, start_pointer_node, POINTER_MATCHING[start_pointer_type])
118    range_values["endPointer"] = parse_range_value(graph, end_pointer_node, POINTER_MATCHING[end_pointer_type])
119
120    return {str(start_pointer_type.fragment): (range_values["startPointer"], range_values["endPointer"])}
121
122
123def get_pointer_type(graph: Graph, pointer: URIRef, start_end_pointer: BNode) -> Tuple[URIRef, Node]:
124    try:
125        pointer_node = graph.value(start_end_pointer, pointer, any=False)
126    except UniquenessError:
127        raise SPDXParsingError([f"Multiple values for {pointer.fragment}"])
128    if not pointer_node:
129        raise SPDXParsingError([f"Couldn't find pointer of type {pointer.fragment}."])
130    pointer_type = get_value_from_graph(Logger(), graph, pointer_node, RDF.type)
131    return pointer_type, pointer_node
132
133
134POINTER_MATCHING = {
135    POINTER_NAMESPACE.ByteOffsetPointer: POINTER_NAMESPACE.offset,
136    POINTER_NAMESPACE.LineCharPointer: POINTER_NAMESPACE.lineNumber,
137}
138
139
140def parse_range_value(graph: Graph, pointer_node: Node, predicate: URIRef) -> Optional[int]:
141    try:
142        value = get_value_from_graph(Logger(), graph, pointer_node, predicate, _any=False)
143    except UniquenessError:
144        raise SPDXParsingError([f"Multiple values for {predicate.fragment} found."])
145    if value:
146        value = int(value.toPython())
147    return value
def parse_snippet( snippet_node: Union[rdflib.term.URIRef, rdflib.term.BNode], graph: rdflib.graph.Graph, doc_namespace: str) -> spdx_tools.spdx.model.snippet.Snippet:
30def parse_snippet(snippet_node: Union[URIRef, BNode], graph: Graph, doc_namespace: str) -> Snippet:
31    logger = Logger()
32    spdx_id = parse_spdx_id(snippet_node, doc_namespace, graph)
33    file_spdx_id_uri = get_value_from_graph(
34        logger, graph, subject=snippet_node, predicate=SPDX_NAMESPACE.snippetFromFile
35    )
36    file_spdx_id = parse_spdx_id(file_spdx_id_uri, doc_namespace, graph)
37    byte_range = None
38    line_range = None
39    for _, _, start_end_pointer in graph.triples((snippet_node, SPDX_NAMESPACE.range, None)):
40        parsed_range = apply_parsing_method_or_log_error(
41            logger, start_end_pointer, parsing_method=lambda x: parse_ranges(x, graph)
42        )
43        byte_range, line_range = set_range_or_log_error(byte_range, line_range, logger, parsed_range)
44
45    license_concluded = parse_literal_or_no_assertion_or_none(
46        logger,
47        graph,
48        snippet_node,
49        SPDX_NAMESPACE.licenseConcluded,
50        parsing_method=lambda x: parse_license_expression(x, graph, doc_namespace, logger),
51    )
52    license_info_in_snippet = []
53    for _, _, license_info_in_snippet_node in graph.triples((snippet_node, SPDX_NAMESPACE.licenseInfoInSnippet, None)):
54        license_info_in_snippet.append(
55            get_correctly_typed_value(
56                logger,
57                license_info_in_snippet_node,
58                lambda x: parse_license_expression(x, graph, doc_namespace, logger),
59            )
60        )
61    license_comment = parse_literal(logger, graph, snippet_node, SPDX_NAMESPACE.licenseComments)
62    copyright_text = parse_literal_or_no_assertion_or_none(logger, graph, snippet_node, SPDX_NAMESPACE.copyrightText)
63    comment = parse_literal(logger, graph, snippet_node, RDFS.comment)
64    name = parse_literal(logger, graph, snippet_node, SPDX_NAMESPACE.name)
65    attribution_texts = []
66    for _, _, attribution_text_literal in get_correctly_typed_triples(
67        logger, graph, snippet_node, SPDX_NAMESPACE.attributionText, None
68    ):
69        attribution_texts.append(attribution_text_literal.toPython())
70
71    raise_parsing_error_if_logger_has_messages(logger, "Snippet")
72    snippet = construct_or_raise_parsing_error(
73        Snippet,
74        dict(
75            spdx_id=spdx_id,
76            file_spdx_id=file_spdx_id,
77            byte_range=byte_range,
78            line_range=line_range,
79            license_concluded=license_concluded,
80            license_info_in_snippet=license_info_in_snippet,
81            license_comment=license_comment,
82            copyright_text=copyright_text,
83            comment=comment,
84            name=name,
85            attribution_texts=attribution_texts,
86        ),
87    )
88    return snippet
def set_range_or_log_error( byte_range: Optional[tuple[int, int]], line_range: Optional[tuple[int, int]], logger: spdx_tools.spdx.parser.logger.Logger, parsed_range: dict[str, tuple[int, int]]) -> tuple[typing.Optional[tuple[int, int]], typing.Optional[tuple[int, int]]]:
 91def set_range_or_log_error(
 92    byte_range: Optional[Tuple[int, int]],
 93    line_range: Optional[Tuple[int, int]],
 94    logger: Logger,
 95    parsed_range: Dict[str, Tuple[int, int]],
 96) -> Tuple[Optional[Tuple[int, int]], Optional[Tuple[int, int]]]:
 97    if not parsed_range:
 98        return byte_range, line_range
 99    if "ByteOffsetPointer" in parsed_range.keys() and not byte_range:
100        byte_range = parsed_range["ByteOffsetPointer"]
101    elif "ByteOffsetPointer" in parsed_range.keys() and byte_range:
102        logger.append("Multiple ByteOffsetPointer found.")
103    elif "LineCharPointer" in parsed_range.keys() and not line_range:
104        line_range = parsed_range["LineCharPointer"]
105    elif "LineCharPointer" in parsed_range.keys() and line_range:
106        logger.append("Multiple LineCharPointer found.")
107    return byte_range, line_range
def parse_ranges( start_end_pointer: rdflib.term.BNode, graph: rdflib.graph.Graph) -> dict[str, tuple[int, int]]:
110def parse_ranges(start_end_pointer: BNode, graph: Graph) -> Dict[str, Tuple[int, int]]:
111    range_values = dict()
112    start_pointer_type, start_pointer_node = get_pointer_type(graph, POINTER_NAMESPACE.startPointer, start_end_pointer)
113    end_pointer_type, end_pointer_node = get_pointer_type(graph, POINTER_NAMESPACE.endPointer, start_end_pointer)
114
115    if start_pointer_type != end_pointer_type:
116        raise SPDXParsingError(["Types of startPointer and endPointer don't match"])
117
118    range_values["startPointer"] = parse_range_value(graph, start_pointer_node, POINTER_MATCHING[start_pointer_type])
119    range_values["endPointer"] = parse_range_value(graph, end_pointer_node, POINTER_MATCHING[end_pointer_type])
120
121    return {str(start_pointer_type.fragment): (range_values["startPointer"], range_values["endPointer"])}
def get_pointer_type( graph: rdflib.graph.Graph, pointer: rdflib.term.URIRef, start_end_pointer: rdflib.term.BNode) -> tuple[rdflib.term.URIRef, rdflib.term.Node]:
124def get_pointer_type(graph: Graph, pointer: URIRef, start_end_pointer: BNode) -> Tuple[URIRef, Node]:
125    try:
126        pointer_node = graph.value(start_end_pointer, pointer, any=False)
127    except UniquenessError:
128        raise SPDXParsingError([f"Multiple values for {pointer.fragment}"])
129    if not pointer_node:
130        raise SPDXParsingError([f"Couldn't find pointer of type {pointer.fragment}."])
131    pointer_type = get_value_from_graph(Logger(), graph, pointer_node, RDF.type)
132    return pointer_type, pointer_node
POINTER_MATCHING = {rdflib.term.URIRef('http://www.w3.org/2009/pointers#ByteOffsetPointer'): rdflib.term.URIRef('http://www.w3.org/2009/pointers#offset'), rdflib.term.URIRef('http://www.w3.org/2009/pointers#LineCharPointer'): rdflib.term.URIRef('http://www.w3.org/2009/pointers#lineNumber')}
def parse_range_value( graph: rdflib.graph.Graph, pointer_node: rdflib.term.Node, predicate: rdflib.term.URIRef) -> Optional[int]:
141def parse_range_value(graph: Graph, pointer_node: Node, predicate: URIRef) -> Optional[int]:
142    try:
143        value = get_value_from_graph(Logger(), graph, pointer_node, predicate, _any=False)
144    except UniquenessError:
145        raise SPDXParsingError([f"Multiple values for {predicate.fragment} found."])
146    if value:
147        value = int(value.toPython())
148    return value