spdx_tools.spdx.parser.jsonlikedict.snippet_parser
1# SPDX-FileCopyrightText: 2022 spdx contributors 2# 3# SPDX-License-Identifier: Apache-2.0 4from enum import Enum, auto 5 6from beartype.typing import Dict, List, Optional, Tuple, Union 7from license_expression import LicenseExpression 8 9from spdx_tools.spdx.model import Snippet, SpdxNoAssertion, SpdxNone 10from spdx_tools.spdx.parser.error import SPDXParsingError 11from spdx_tools.spdx.parser.jsonlikedict.dict_parsing_functions import ( 12 parse_field_or_log_error, 13 parse_field_or_no_assertion_or_none, 14) 15from spdx_tools.spdx.parser.jsonlikedict.license_expression_parser import LicenseExpressionParser 16from spdx_tools.spdx.parser.logger import Logger 17from spdx_tools.spdx.parser.parsing_functions import construct_or_raise_parsing_error 18 19 20class RangeType(Enum): 21 BYTE = auto() 22 LINE = auto() 23 24 25class SnippetParser: 26 logger: Logger 27 license_expression_parser = LicenseExpressionParser 28 29 def __init__(self): 30 self.logger = Logger() 31 self.license_expression_parser = LicenseExpressionParser() 32 33 def parse_snippet(self, snippet_dict: Dict) -> Snippet: 34 logger = Logger() 35 spdx_id: Optional[str] = snippet_dict.get("SPDXID") 36 file_spdx_id: Optional[str] = snippet_dict.get("snippetFromFile") 37 name: Optional[str] = snippet_dict.get("name") 38 39 ranges: Dict = parse_field_or_log_error(logger, snippet_dict.get("ranges", []), self.parse_ranges, default={}) 40 byte_range: Optional[Tuple[Union[int, str], Union[int, str]]] = ranges.get(RangeType.BYTE) 41 line_range: Optional[Tuple[Union[int, str], Union[int, str]]] = ranges.get(RangeType.LINE) 42 byte_range = self.convert_range_from_str(byte_range) 43 line_range = self.convert_range_from_str(line_range) 44 45 attribution_texts: List[str] = snippet_dict.get("attributionTexts", []) 46 comment: Optional[str] = snippet_dict.get("comment") 47 copyright_text: Optional[Union[str, SpdxNoAssertion, SpdxNone]] = parse_field_or_no_assertion_or_none( 48 snippet_dict.get("copyrightText") 49 ) 50 license_comment: Optional[str] = snippet_dict.get("licenseComments") 51 license_concluded: Optional[Union[LicenseExpression, SpdxNoAssertion, SpdxNone]] = parse_field_or_log_error( 52 logger, snippet_dict.get("licenseConcluded"), self.license_expression_parser.parse_license_expression 53 ) 54 55 license_info: List[Union[LicenseExpression], SpdxNoAssertion, SpdxNone] = parse_field_or_log_error( 56 logger, 57 snippet_dict.get("licenseInfoInSnippets"), 58 self.license_expression_parser.parse_license_expression, 59 field_is_list=True, 60 ) 61 if logger.has_messages(): 62 raise SPDXParsingError([f"Error while parsing snippet: {logger.get_messages()}"]) 63 64 snippet = construct_or_raise_parsing_error( 65 Snippet, 66 dict( 67 spdx_id=spdx_id, 68 name=name, 69 byte_range=byte_range, 70 file_spdx_id=file_spdx_id, 71 line_range=line_range, 72 attribution_texts=attribution_texts, 73 comment=comment, 74 copyright_text=copyright_text, 75 license_comment=license_comment, 76 license_concluded=license_concluded, 77 license_info_in_snippet=license_info, 78 ), 79 ) 80 81 return snippet 82 83 def parse_ranges(self, ranges_from_snippet: List[Dict]) -> Dict: 84 logger = Logger() 85 ranges = {} 86 for range_dict in ranges_from_snippet: 87 try: 88 range_type: RangeType = self.validate_range_and_get_type(range_dict) 89 start_end_tuple: Tuple[int, int] = SnippetParser.get_start_end_tuple(range_dict, range_type) 90 ranges[range_type] = start_end_tuple 91 except ValueError as error: 92 logger.append(error.args[0]) 93 if logger.has_messages(): 94 raise SPDXParsingError([f"Error while parsing snippet ranges: {logger.get_messages()}"]) 95 return ranges 96 97 @staticmethod 98 def get_start_end_tuple(range_dict: Dict, range_type: RangeType) -> Tuple[int, int]: 99 end_pointer: Dict = range_dict["endPointer"] 100 start_pointer: Dict = range_dict["startPointer"] 101 if range_type == RangeType.BYTE: 102 start: int = start_pointer["offset"] 103 end: int = end_pointer["offset"] 104 else: 105 start: int = start_pointer["lineNumber"] 106 end: int = end_pointer["lineNumber"] 107 return start, end 108 109 def validate_range_and_get_type(self, range_dict: Dict) -> RangeType: 110 if "startPointer" not in range_dict: 111 raise ValueError("Startpointer missing in snippet ranges.") 112 if "endPointer" not in range_dict: 113 raise ValueError("Endpointer missing in snippet ranges.") 114 start_pointer_type: RangeType = self.validate_pointer_and_get_type(range_dict["startPointer"]) 115 end_pointer_type: RangeType = self.validate_pointer_and_get_type(range_dict["endPointer"]) 116 if start_pointer_type != end_pointer_type: 117 raise ValueError("Type of startpointer is not the same as type of endpointer.") 118 return start_pointer_type 119 120 @staticmethod 121 def validate_pointer_and_get_type(pointer: Dict) -> RangeType: 122 if "offset" in pointer and "lineNumber" in pointer: 123 raise ValueError('Couldn\'t determine type of pointer: "offset" and "lineNumber" provided as key.') 124 if "offset" not in pointer and "lineNumber" not in pointer: 125 raise ValueError('Couldn\'t determine type of pointer: neither "offset" nor "lineNumber" provided as key.') 126 return RangeType.BYTE if "offset" in pointer else RangeType.LINE 127 128 @staticmethod 129 def convert_range_from_str( 130 _range: Tuple[Union[int, str], Union[int, str]] 131 ) -> Tuple[Union[int, str], Union[int, str]]: 132 # XML does not support integers, so we have to convert from string (if possible) 133 if not _range: 134 return _range 135 136 if isinstance(_range[0], str) and _range[0].isdigit(): 137 _range = int(_range[0]), _range[1] 138 if isinstance(_range[1], str) and _range[1].isdigit(): 139 _range = _range[0], int(_range[1]) 140 return _range
class
RangeType(enum.Enum):
BYTE =
<RangeType.BYTE: 1>
LINE =
<RangeType.LINE: 2>
Inherited Members
- enum.Enum
- name
- value
class
SnippetParser:
26class SnippetParser: 27 logger: Logger 28 license_expression_parser = LicenseExpressionParser 29 30 def __init__(self): 31 self.logger = Logger() 32 self.license_expression_parser = LicenseExpressionParser() 33 34 def parse_snippet(self, snippet_dict: Dict) -> Snippet: 35 logger = Logger() 36 spdx_id: Optional[str] = snippet_dict.get("SPDXID") 37 file_spdx_id: Optional[str] = snippet_dict.get("snippetFromFile") 38 name: Optional[str] = snippet_dict.get("name") 39 40 ranges: Dict = parse_field_or_log_error(logger, snippet_dict.get("ranges", []), self.parse_ranges, default={}) 41 byte_range: Optional[Tuple[Union[int, str], Union[int, str]]] = ranges.get(RangeType.BYTE) 42 line_range: Optional[Tuple[Union[int, str], Union[int, str]]] = ranges.get(RangeType.LINE) 43 byte_range = self.convert_range_from_str(byte_range) 44 line_range = self.convert_range_from_str(line_range) 45 46 attribution_texts: List[str] = snippet_dict.get("attributionTexts", []) 47 comment: Optional[str] = snippet_dict.get("comment") 48 copyright_text: Optional[Union[str, SpdxNoAssertion, SpdxNone]] = parse_field_or_no_assertion_or_none( 49 snippet_dict.get("copyrightText") 50 ) 51 license_comment: Optional[str] = snippet_dict.get("licenseComments") 52 license_concluded: Optional[Union[LicenseExpression, SpdxNoAssertion, SpdxNone]] = parse_field_or_log_error( 53 logger, snippet_dict.get("licenseConcluded"), self.license_expression_parser.parse_license_expression 54 ) 55 56 license_info: List[Union[LicenseExpression], SpdxNoAssertion, SpdxNone] = parse_field_or_log_error( 57 logger, 58 snippet_dict.get("licenseInfoInSnippets"), 59 self.license_expression_parser.parse_license_expression, 60 field_is_list=True, 61 ) 62 if logger.has_messages(): 63 raise SPDXParsingError([f"Error while parsing snippet: {logger.get_messages()}"]) 64 65 snippet = construct_or_raise_parsing_error( 66 Snippet, 67 dict( 68 spdx_id=spdx_id, 69 name=name, 70 byte_range=byte_range, 71 file_spdx_id=file_spdx_id, 72 line_range=line_range, 73 attribution_texts=attribution_texts, 74 comment=comment, 75 copyright_text=copyright_text, 76 license_comment=license_comment, 77 license_concluded=license_concluded, 78 license_info_in_snippet=license_info, 79 ), 80 ) 81 82 return snippet 83 84 def parse_ranges(self, ranges_from_snippet: List[Dict]) -> Dict: 85 logger = Logger() 86 ranges = {} 87 for range_dict in ranges_from_snippet: 88 try: 89 range_type: RangeType = self.validate_range_and_get_type(range_dict) 90 start_end_tuple: Tuple[int, int] = SnippetParser.get_start_end_tuple(range_dict, range_type) 91 ranges[range_type] = start_end_tuple 92 except ValueError as error: 93 logger.append(error.args[0]) 94 if logger.has_messages(): 95 raise SPDXParsingError([f"Error while parsing snippet ranges: {logger.get_messages()}"]) 96 return ranges 97 98 @staticmethod 99 def get_start_end_tuple(range_dict: Dict, range_type: RangeType) -> Tuple[int, int]: 100 end_pointer: Dict = range_dict["endPointer"] 101 start_pointer: Dict = range_dict["startPointer"] 102 if range_type == RangeType.BYTE: 103 start: int = start_pointer["offset"] 104 end: int = end_pointer["offset"] 105 else: 106 start: int = start_pointer["lineNumber"] 107 end: int = end_pointer["lineNumber"] 108 return start, end 109 110 def validate_range_and_get_type(self, range_dict: Dict) -> RangeType: 111 if "startPointer" not in range_dict: 112 raise ValueError("Startpointer missing in snippet ranges.") 113 if "endPointer" not in range_dict: 114 raise ValueError("Endpointer missing in snippet ranges.") 115 start_pointer_type: RangeType = self.validate_pointer_and_get_type(range_dict["startPointer"]) 116 end_pointer_type: RangeType = self.validate_pointer_and_get_type(range_dict["endPointer"]) 117 if start_pointer_type != end_pointer_type: 118 raise ValueError("Type of startpointer is not the same as type of endpointer.") 119 return start_pointer_type 120 121 @staticmethod 122 def validate_pointer_and_get_type(pointer: Dict) -> RangeType: 123 if "offset" in pointer and "lineNumber" in pointer: 124 raise ValueError('Couldn\'t determine type of pointer: "offset" and "lineNumber" provided as key.') 125 if "offset" not in pointer and "lineNumber" not in pointer: 126 raise ValueError('Couldn\'t determine type of pointer: neither "offset" nor "lineNumber" provided as key.') 127 return RangeType.BYTE if "offset" in pointer else RangeType.LINE 128 129 @staticmethod 130 def convert_range_from_str( 131 _range: Tuple[Union[int, str], Union[int, str]] 132 ) -> Tuple[Union[int, str], Union[int, str]]: 133 # XML does not support integers, so we have to convert from string (if possible) 134 if not _range: 135 return _range 136 137 if isinstance(_range[0], str) and _range[0].isdigit(): 138 _range = int(_range[0]), _range[1] 139 if isinstance(_range[1], str) and _range[1].isdigit(): 140 _range = _range[0], int(_range[1]) 141 return _range
license_expression_parser =
<class 'spdx_tools.spdx.parser.jsonlikedict.license_expression_parser.LicenseExpressionParser'>
34 def parse_snippet(self, snippet_dict: Dict) -> Snippet: 35 logger = Logger() 36 spdx_id: Optional[str] = snippet_dict.get("SPDXID") 37 file_spdx_id: Optional[str] = snippet_dict.get("snippetFromFile") 38 name: Optional[str] = snippet_dict.get("name") 39 40 ranges: Dict = parse_field_or_log_error(logger, snippet_dict.get("ranges", []), self.parse_ranges, default={}) 41 byte_range: Optional[Tuple[Union[int, str], Union[int, str]]] = ranges.get(RangeType.BYTE) 42 line_range: Optional[Tuple[Union[int, str], Union[int, str]]] = ranges.get(RangeType.LINE) 43 byte_range = self.convert_range_from_str(byte_range) 44 line_range = self.convert_range_from_str(line_range) 45 46 attribution_texts: List[str] = snippet_dict.get("attributionTexts", []) 47 comment: Optional[str] = snippet_dict.get("comment") 48 copyright_text: Optional[Union[str, SpdxNoAssertion, SpdxNone]] = parse_field_or_no_assertion_or_none( 49 snippet_dict.get("copyrightText") 50 ) 51 license_comment: Optional[str] = snippet_dict.get("licenseComments") 52 license_concluded: Optional[Union[LicenseExpression, SpdxNoAssertion, SpdxNone]] = parse_field_or_log_error( 53 logger, snippet_dict.get("licenseConcluded"), self.license_expression_parser.parse_license_expression 54 ) 55 56 license_info: List[Union[LicenseExpression], SpdxNoAssertion, SpdxNone] = parse_field_or_log_error( 57 logger, 58 snippet_dict.get("licenseInfoInSnippets"), 59 self.license_expression_parser.parse_license_expression, 60 field_is_list=True, 61 ) 62 if logger.has_messages(): 63 raise SPDXParsingError([f"Error while parsing snippet: {logger.get_messages()}"]) 64 65 snippet = construct_or_raise_parsing_error( 66 Snippet, 67 dict( 68 spdx_id=spdx_id, 69 name=name, 70 byte_range=byte_range, 71 file_spdx_id=file_spdx_id, 72 line_range=line_range, 73 attribution_texts=attribution_texts, 74 comment=comment, 75 copyright_text=copyright_text, 76 license_comment=license_comment, 77 license_concluded=license_concluded, 78 license_info_in_snippet=license_info, 79 ), 80 ) 81 82 return snippet
def
parse_ranges(self, ranges_from_snippet: list[dict]) -> dict:
84 def parse_ranges(self, ranges_from_snippet: List[Dict]) -> Dict: 85 logger = Logger() 86 ranges = {} 87 for range_dict in ranges_from_snippet: 88 try: 89 range_type: RangeType = self.validate_range_and_get_type(range_dict) 90 start_end_tuple: Tuple[int, int] = SnippetParser.get_start_end_tuple(range_dict, range_type) 91 ranges[range_type] = start_end_tuple 92 except ValueError as error: 93 logger.append(error.args[0]) 94 if logger.has_messages(): 95 raise SPDXParsingError([f"Error while parsing snippet ranges: {logger.get_messages()}"]) 96 return ranges
98 @staticmethod 99 def get_start_end_tuple(range_dict: Dict, range_type: RangeType) -> Tuple[int, int]: 100 end_pointer: Dict = range_dict["endPointer"] 101 start_pointer: Dict = range_dict["startPointer"] 102 if range_type == RangeType.BYTE: 103 start: int = start_pointer["offset"] 104 end: int = end_pointer["offset"] 105 else: 106 start: int = start_pointer["lineNumber"] 107 end: int = end_pointer["lineNumber"] 108 return start, end
110 def validate_range_and_get_type(self, range_dict: Dict) -> RangeType: 111 if "startPointer" not in range_dict: 112 raise ValueError("Startpointer missing in snippet ranges.") 113 if "endPointer" not in range_dict: 114 raise ValueError("Endpointer missing in snippet ranges.") 115 start_pointer_type: RangeType = self.validate_pointer_and_get_type(range_dict["startPointer"]) 116 end_pointer_type: RangeType = self.validate_pointer_and_get_type(range_dict["endPointer"]) 117 if start_pointer_type != end_pointer_type: 118 raise ValueError("Type of startpointer is not the same as type of endpointer.") 119 return start_pointer_type
121 @staticmethod 122 def validate_pointer_and_get_type(pointer: Dict) -> RangeType: 123 if "offset" in pointer and "lineNumber" in pointer: 124 raise ValueError('Couldn\'t determine type of pointer: "offset" and "lineNumber" provided as key.') 125 if "offset" not in pointer and "lineNumber" not in pointer: 126 raise ValueError('Couldn\'t determine type of pointer: neither "offset" nor "lineNumber" provided as key.') 127 return RangeType.BYTE if "offset" in pointer else RangeType.LINE
@staticmethod
def
convert_range_from_str( _range: tuple[typing.Union[int, str], typing.Union[int, str]]) -> tuple[typing.Union[int, str], typing.Union[int, str]]:
129 @staticmethod 130 def convert_range_from_str( 131 _range: Tuple[Union[int, str], Union[int, str]] 132 ) -> Tuple[Union[int, str], Union[int, str]]: 133 # XML does not support integers, so we have to convert from string (if possible) 134 if not _range: 135 return _range 136 137 if isinstance(_range[0], str) and _range[0].isdigit(): 138 _range = int(_range[0]), _range[1] 139 if isinstance(_range[1], str) and _range[1].isdigit(): 140 _range = _range[0], int(_range[1]) 141 return _range