spdx_tools.spdx.parser.rdf.snippet_parser
1# SPDX-FileCopyrightText: 2023 spdx contributors 2# 3# SPDX-License-Identifier: Apache-2.0 4from beartype.typing import Dict, Optional, Tuple, Union 5from rdflib import RDF, RDFS, Graph 6from rdflib.exceptions import UniquenessError 7from rdflib.term import BNode, Node, URIRef 8 9from spdx_tools.spdx.model import Snippet 10from spdx_tools.spdx.parser.error import SPDXParsingError 11from spdx_tools.spdx.parser.logger import Logger 12from spdx_tools.spdx.parser.parsing_functions import ( 13 construct_or_raise_parsing_error, 14 raise_parsing_error_if_logger_has_messages, 15) 16from spdx_tools.spdx.parser.rdf.graph_parsing_functions import ( 17 apply_parsing_method_or_log_error, 18 get_correctly_typed_triples, 19 get_correctly_typed_value, 20 get_value_from_graph, 21 parse_literal, 22 parse_literal_or_no_assertion_or_none, 23 parse_spdx_id, 24) 25from spdx_tools.spdx.parser.rdf.license_expression_parser import parse_license_expression 26from spdx_tools.spdx.rdfschema.namespace import POINTER_NAMESPACE, SPDX_NAMESPACE 27 28 29def parse_snippet(snippet_node: Union[URIRef, BNode], graph: Graph, doc_namespace: str) -> Snippet: 30 logger = Logger() 31 spdx_id = parse_spdx_id(snippet_node, doc_namespace, graph) 32 file_spdx_id_uri = get_value_from_graph( 33 logger, graph, subject=snippet_node, predicate=SPDX_NAMESPACE.snippetFromFile 34 ) 35 file_spdx_id = parse_spdx_id(file_spdx_id_uri, doc_namespace, graph) 36 byte_range = None 37 line_range = None 38 for _, _, start_end_pointer in graph.triples((snippet_node, SPDX_NAMESPACE.range, None)): 39 parsed_range = apply_parsing_method_or_log_error( 40 logger, start_end_pointer, parsing_method=lambda x: parse_ranges(x, graph) 41 ) 42 byte_range, line_range = set_range_or_log_error(byte_range, line_range, logger, parsed_range) 43 44 license_concluded = parse_literal_or_no_assertion_or_none( 45 logger, 46 graph, 47 snippet_node, 48 SPDX_NAMESPACE.licenseConcluded, 49 parsing_method=lambda x: parse_license_expression(x, graph, doc_namespace, logger), 50 ) 51 license_info_in_snippet = [] 52 for _, _, license_info_in_snippet_node in graph.triples((snippet_node, SPDX_NAMESPACE.licenseInfoInSnippet, None)): 53 license_info_in_snippet.append( 54 get_correctly_typed_value( 55 logger, 56 license_info_in_snippet_node, 57 lambda x: parse_license_expression(x, graph, doc_namespace, logger), 58 ) 59 ) 60 license_comment = parse_literal(logger, graph, snippet_node, SPDX_NAMESPACE.licenseComments) 61 copyright_text = parse_literal_or_no_assertion_or_none(logger, graph, snippet_node, SPDX_NAMESPACE.copyrightText) 62 comment = parse_literal(logger, graph, snippet_node, RDFS.comment) 63 name = parse_literal(logger, graph, snippet_node, SPDX_NAMESPACE.name) 64 attribution_texts = [] 65 for _, _, attribution_text_literal in get_correctly_typed_triples( 66 logger, graph, snippet_node, SPDX_NAMESPACE.attributionText, None 67 ): 68 attribution_texts.append(attribution_text_literal.toPython()) 69 70 raise_parsing_error_if_logger_has_messages(logger, "Snippet") 71 snippet = construct_or_raise_parsing_error( 72 Snippet, 73 dict( 74 spdx_id=spdx_id, 75 file_spdx_id=file_spdx_id, 76 byte_range=byte_range, 77 line_range=line_range, 78 license_concluded=license_concluded, 79 license_info_in_snippet=license_info_in_snippet, 80 license_comment=license_comment, 81 copyright_text=copyright_text, 82 comment=comment, 83 name=name, 84 attribution_texts=attribution_texts, 85 ), 86 ) 87 return snippet 88 89 90def set_range_or_log_error( 91 byte_range: Optional[Tuple[int, int]], 92 line_range: Optional[Tuple[int, int]], 93 logger: Logger, 94 parsed_range: Dict[str, Tuple[int, int]], 95) -> Tuple[Optional[Tuple[int, int]], Optional[Tuple[int, int]]]: 96 if not parsed_range: 97 return byte_range, line_range 98 if "ByteOffsetPointer" in parsed_range.keys() and not byte_range: 99 byte_range = parsed_range["ByteOffsetPointer"] 100 elif "ByteOffsetPointer" in parsed_range.keys() and byte_range: 101 logger.append("Multiple ByteOffsetPointer found.") 102 elif "LineCharPointer" in parsed_range.keys() and not line_range: 103 line_range = parsed_range["LineCharPointer"] 104 elif "LineCharPointer" in parsed_range.keys() and line_range: 105 logger.append("Multiple LineCharPointer found.") 106 return byte_range, line_range 107 108 109def parse_ranges(start_end_pointer: BNode, graph: Graph) -> Dict[str, Tuple[int, int]]: 110 range_values = dict() 111 start_pointer_type, start_pointer_node = get_pointer_type(graph, POINTER_NAMESPACE.startPointer, start_end_pointer) 112 end_pointer_type, end_pointer_node = get_pointer_type(graph, POINTER_NAMESPACE.endPointer, start_end_pointer) 113 114 if start_pointer_type != end_pointer_type: 115 raise SPDXParsingError(["Types of startPointer and endPointer don't match"]) 116 117 range_values["startPointer"] = parse_range_value(graph, start_pointer_node, POINTER_MATCHING[start_pointer_type]) 118 range_values["endPointer"] = parse_range_value(graph, end_pointer_node, POINTER_MATCHING[end_pointer_type]) 119 120 return {str(start_pointer_type.fragment): (range_values["startPointer"], range_values["endPointer"])} 121 122 123def get_pointer_type(graph: Graph, pointer: URIRef, start_end_pointer: BNode) -> Tuple[URIRef, Node]: 124 try: 125 pointer_node = graph.value(start_end_pointer, pointer, any=False) 126 except UniquenessError: 127 raise SPDXParsingError([f"Multiple values for {pointer.fragment}"]) 128 if not pointer_node: 129 raise SPDXParsingError([f"Couldn't find pointer of type {pointer.fragment}."]) 130 pointer_type = get_value_from_graph(Logger(), graph, pointer_node, RDF.type) 131 return pointer_type, pointer_node 132 133 134POINTER_MATCHING = { 135 POINTER_NAMESPACE.ByteOffsetPointer: POINTER_NAMESPACE.offset, 136 POINTER_NAMESPACE.LineCharPointer: POINTER_NAMESPACE.lineNumber, 137} 138 139 140def parse_range_value(graph: Graph, pointer_node: Node, predicate: URIRef) -> Optional[int]: 141 try: 142 value = get_value_from_graph(Logger(), graph, pointer_node, predicate, _any=False) 143 except UniquenessError: 144 raise SPDXParsingError([f"Multiple values for {predicate.fragment} found."]) 145 if value: 146 value = int(value.toPython()) 147 return value
def
parse_snippet( snippet_node: Union[rdflib.term.URIRef, rdflib.term.BNode], graph: rdflib.graph.Graph, doc_namespace: str) -> spdx_tools.spdx.model.snippet.Snippet:
30def parse_snippet(snippet_node: Union[URIRef, BNode], graph: Graph, doc_namespace: str) -> Snippet: 31 logger = Logger() 32 spdx_id = parse_spdx_id(snippet_node, doc_namespace, graph) 33 file_spdx_id_uri = get_value_from_graph( 34 logger, graph, subject=snippet_node, predicate=SPDX_NAMESPACE.snippetFromFile 35 ) 36 file_spdx_id = parse_spdx_id(file_spdx_id_uri, doc_namespace, graph) 37 byte_range = None 38 line_range = None 39 for _, _, start_end_pointer in graph.triples((snippet_node, SPDX_NAMESPACE.range, None)): 40 parsed_range = apply_parsing_method_or_log_error( 41 logger, start_end_pointer, parsing_method=lambda x: parse_ranges(x, graph) 42 ) 43 byte_range, line_range = set_range_or_log_error(byte_range, line_range, logger, parsed_range) 44 45 license_concluded = parse_literal_or_no_assertion_or_none( 46 logger, 47 graph, 48 snippet_node, 49 SPDX_NAMESPACE.licenseConcluded, 50 parsing_method=lambda x: parse_license_expression(x, graph, doc_namespace, logger), 51 ) 52 license_info_in_snippet = [] 53 for _, _, license_info_in_snippet_node in graph.triples((snippet_node, SPDX_NAMESPACE.licenseInfoInSnippet, None)): 54 license_info_in_snippet.append( 55 get_correctly_typed_value( 56 logger, 57 license_info_in_snippet_node, 58 lambda x: parse_license_expression(x, graph, doc_namespace, logger), 59 ) 60 ) 61 license_comment = parse_literal(logger, graph, snippet_node, SPDX_NAMESPACE.licenseComments) 62 copyright_text = parse_literal_or_no_assertion_or_none(logger, graph, snippet_node, SPDX_NAMESPACE.copyrightText) 63 comment = parse_literal(logger, graph, snippet_node, RDFS.comment) 64 name = parse_literal(logger, graph, snippet_node, SPDX_NAMESPACE.name) 65 attribution_texts = [] 66 for _, _, attribution_text_literal in get_correctly_typed_triples( 67 logger, graph, snippet_node, SPDX_NAMESPACE.attributionText, None 68 ): 69 attribution_texts.append(attribution_text_literal.toPython()) 70 71 raise_parsing_error_if_logger_has_messages(logger, "Snippet") 72 snippet = construct_or_raise_parsing_error( 73 Snippet, 74 dict( 75 spdx_id=spdx_id, 76 file_spdx_id=file_spdx_id, 77 byte_range=byte_range, 78 line_range=line_range, 79 license_concluded=license_concluded, 80 license_info_in_snippet=license_info_in_snippet, 81 license_comment=license_comment, 82 copyright_text=copyright_text, 83 comment=comment, 84 name=name, 85 attribution_texts=attribution_texts, 86 ), 87 ) 88 return snippet
def
set_range_or_log_error( byte_range: Optional[tuple[int, int]], line_range: Optional[tuple[int, int]], logger: spdx_tools.spdx.parser.logger.Logger, parsed_range: dict[str, tuple[int, int]]) -> tuple[typing.Optional[tuple[int, int]], typing.Optional[tuple[int, int]]]:
91def set_range_or_log_error( 92 byte_range: Optional[Tuple[int, int]], 93 line_range: Optional[Tuple[int, int]], 94 logger: Logger, 95 parsed_range: Dict[str, Tuple[int, int]], 96) -> Tuple[Optional[Tuple[int, int]], Optional[Tuple[int, int]]]: 97 if not parsed_range: 98 return byte_range, line_range 99 if "ByteOffsetPointer" in parsed_range.keys() and not byte_range: 100 byte_range = parsed_range["ByteOffsetPointer"] 101 elif "ByteOffsetPointer" in parsed_range.keys() and byte_range: 102 logger.append("Multiple ByteOffsetPointer found.") 103 elif "LineCharPointer" in parsed_range.keys() and not line_range: 104 line_range = parsed_range["LineCharPointer"] 105 elif "LineCharPointer" in parsed_range.keys() and line_range: 106 logger.append("Multiple LineCharPointer found.") 107 return byte_range, line_range
def
parse_ranges( start_end_pointer: rdflib.term.BNode, graph: rdflib.graph.Graph) -> dict[str, tuple[int, int]]:
110def parse_ranges(start_end_pointer: BNode, graph: Graph) -> Dict[str, Tuple[int, int]]: 111 range_values = dict() 112 start_pointer_type, start_pointer_node = get_pointer_type(graph, POINTER_NAMESPACE.startPointer, start_end_pointer) 113 end_pointer_type, end_pointer_node = get_pointer_type(graph, POINTER_NAMESPACE.endPointer, start_end_pointer) 114 115 if start_pointer_type != end_pointer_type: 116 raise SPDXParsingError(["Types of startPointer and endPointer don't match"]) 117 118 range_values["startPointer"] = parse_range_value(graph, start_pointer_node, POINTER_MATCHING[start_pointer_type]) 119 range_values["endPointer"] = parse_range_value(graph, end_pointer_node, POINTER_MATCHING[end_pointer_type]) 120 121 return {str(start_pointer_type.fragment): (range_values["startPointer"], range_values["endPointer"])}
def
get_pointer_type( graph: rdflib.graph.Graph, pointer: rdflib.term.URIRef, start_end_pointer: rdflib.term.BNode) -> tuple[rdflib.term.URIRef, rdflib.term.Node]:
124def get_pointer_type(graph: Graph, pointer: URIRef, start_end_pointer: BNode) -> Tuple[URIRef, Node]: 125 try: 126 pointer_node = graph.value(start_end_pointer, pointer, any=False) 127 except UniquenessError: 128 raise SPDXParsingError([f"Multiple values for {pointer.fragment}"]) 129 if not pointer_node: 130 raise SPDXParsingError([f"Couldn't find pointer of type {pointer.fragment}."]) 131 pointer_type = get_value_from_graph(Logger(), graph, pointer_node, RDF.type) 132 return pointer_type, pointer_node
POINTER_MATCHING =
{rdflib.term.URIRef('http://www.w3.org/2009/pointers#ByteOffsetPointer'): rdflib.term.URIRef('http://www.w3.org/2009/pointers#offset'), rdflib.term.URIRef('http://www.w3.org/2009/pointers#LineCharPointer'): rdflib.term.URIRef('http://www.w3.org/2009/pointers#lineNumber')}
def
parse_range_value( graph: rdflib.graph.Graph, pointer_node: rdflib.term.Node, predicate: rdflib.term.URIRef) -> Optional[int]:
141def parse_range_value(graph: Graph, pointer_node: Node, predicate: URIRef) -> Optional[int]: 142 try: 143 value = get_value_from_graph(Logger(), graph, pointer_node, predicate, _any=False) 144 except UniquenessError: 145 raise SPDXParsingError([f"Multiple values for {predicate.fragment} found."]) 146 if value: 147 value = int(value.toPython()) 148 return value