spdx_tools.spdx.parser.jsonlikedict.snippet_parser

  1# SPDX-FileCopyrightText: 2022 spdx contributors
  2#
  3# SPDX-License-Identifier: Apache-2.0
  4from enum import Enum, auto
  5
  6from beartype.typing import Dict, List, Optional, Tuple, Union
  7from license_expression import LicenseExpression
  8
  9from spdx_tools.spdx.model import Snippet, SpdxNoAssertion, SpdxNone
 10from spdx_tools.spdx.parser.error import SPDXParsingError
 11from spdx_tools.spdx.parser.jsonlikedict.dict_parsing_functions import (
 12    parse_field_or_log_error,
 13    parse_field_or_no_assertion_or_none,
 14)
 15from spdx_tools.spdx.parser.jsonlikedict.license_expression_parser import LicenseExpressionParser
 16from spdx_tools.spdx.parser.logger import Logger
 17from spdx_tools.spdx.parser.parsing_functions import construct_or_raise_parsing_error
 18
 19
 20class RangeType(Enum):
 21    BYTE = auto()
 22    LINE = auto()
 23
 24
 25class SnippetParser:
 26    logger: Logger
 27    license_expression_parser = LicenseExpressionParser
 28
 29    def __init__(self):
 30        self.logger = Logger()
 31        self.license_expression_parser = LicenseExpressionParser()
 32
 33    def parse_snippet(self, snippet_dict: Dict) -> Snippet:
 34        logger = Logger()
 35        spdx_id: Optional[str] = snippet_dict.get("SPDXID")
 36        file_spdx_id: Optional[str] = snippet_dict.get("snippetFromFile")
 37        name: Optional[str] = snippet_dict.get("name")
 38
 39        ranges: Dict = parse_field_or_log_error(logger, snippet_dict.get("ranges", []), self.parse_ranges, default={})
 40        byte_range: Optional[Tuple[Union[int, str], Union[int, str]]] = ranges.get(RangeType.BYTE)
 41        line_range: Optional[Tuple[Union[int, str], Union[int, str]]] = ranges.get(RangeType.LINE)
 42        byte_range = self.convert_range_from_str(byte_range)
 43        line_range = self.convert_range_from_str(line_range)
 44
 45        attribution_texts: List[str] = snippet_dict.get("attributionTexts", [])
 46        comment: Optional[str] = snippet_dict.get("comment")
 47        copyright_text: Optional[Union[str, SpdxNoAssertion, SpdxNone]] = parse_field_or_no_assertion_or_none(
 48            snippet_dict.get("copyrightText")
 49        )
 50        license_comment: Optional[str] = snippet_dict.get("licenseComments")
 51        license_concluded: Optional[Union[LicenseExpression, SpdxNoAssertion, SpdxNone]] = parse_field_or_log_error(
 52            logger, snippet_dict.get("licenseConcluded"), self.license_expression_parser.parse_license_expression
 53        )
 54
 55        license_info: List[Union[LicenseExpression], SpdxNoAssertion, SpdxNone] = parse_field_or_log_error(
 56            logger,
 57            snippet_dict.get("licenseInfoInSnippets"),
 58            self.license_expression_parser.parse_license_expression,
 59            field_is_list=True,
 60        )
 61        if logger.has_messages():
 62            raise SPDXParsingError([f"Error while parsing snippet: {logger.get_messages()}"])
 63
 64        snippet = construct_or_raise_parsing_error(
 65            Snippet,
 66            dict(
 67                spdx_id=spdx_id,
 68                name=name,
 69                byte_range=byte_range,
 70                file_spdx_id=file_spdx_id,
 71                line_range=line_range,
 72                attribution_texts=attribution_texts,
 73                comment=comment,
 74                copyright_text=copyright_text,
 75                license_comment=license_comment,
 76                license_concluded=license_concluded,
 77                license_info_in_snippet=license_info,
 78            ),
 79        )
 80
 81        return snippet
 82
 83    def parse_ranges(self, ranges_from_snippet: List[Dict]) -> Dict:
 84        logger = Logger()
 85        ranges = {}
 86        for range_dict in ranges_from_snippet:
 87            try:
 88                range_type: RangeType = self.validate_range_and_get_type(range_dict)
 89                start_end_tuple: Tuple[int, int] = SnippetParser.get_start_end_tuple(range_dict, range_type)
 90                ranges[range_type] = start_end_tuple
 91            except ValueError as error:
 92                logger.append(error.args[0])
 93        if logger.has_messages():
 94            raise SPDXParsingError([f"Error while parsing snippet ranges: {logger.get_messages()}"])
 95        return ranges
 96
 97    @staticmethod
 98    def get_start_end_tuple(range_dict: Dict, range_type: RangeType) -> Tuple[int, int]:
 99        end_pointer: Dict = range_dict["endPointer"]
100        start_pointer: Dict = range_dict["startPointer"]
101        if range_type == RangeType.BYTE:
102            start: int = start_pointer["offset"]
103            end: int = end_pointer["offset"]
104        else:
105            start: int = start_pointer["lineNumber"]
106            end: int = end_pointer["lineNumber"]
107        return start, end
108
109    def validate_range_and_get_type(self, range_dict: Dict) -> RangeType:
110        if "startPointer" not in range_dict:
111            raise ValueError("Startpointer missing in snippet ranges.")
112        if "endPointer" not in range_dict:
113            raise ValueError("Endpointer missing in snippet ranges.")
114        start_pointer_type: RangeType = self.validate_pointer_and_get_type(range_dict["startPointer"])
115        end_pointer_type: RangeType = self.validate_pointer_and_get_type(range_dict["endPointer"])
116        if start_pointer_type != end_pointer_type:
117            raise ValueError("Type of startpointer is not the same as type of endpointer.")
118        return start_pointer_type
119
120    @staticmethod
121    def validate_pointer_and_get_type(pointer: Dict) -> RangeType:
122        if "offset" in pointer and "lineNumber" in pointer:
123            raise ValueError('Couldn\'t determine type of pointer: "offset" and "lineNumber" provided as key.')
124        if "offset" not in pointer and "lineNumber" not in pointer:
125            raise ValueError('Couldn\'t determine type of pointer: neither "offset" nor "lineNumber" provided as key.')
126        return RangeType.BYTE if "offset" in pointer else RangeType.LINE
127
128    @staticmethod
129    def convert_range_from_str(
130        _range: Tuple[Union[int, str], Union[int, str]]
131    ) -> Tuple[Union[int, str], Union[int, str]]:
132        # XML does not support integers, so we have to convert from string (if possible)
133        if not _range:
134            return _range
135
136        if isinstance(_range[0], str) and _range[0].isdigit():
137            _range = int(_range[0]), _range[1]
138        if isinstance(_range[1], str) and _range[1].isdigit():
139            _range = _range[0], int(_range[1])
140        return _range
class RangeType(enum.Enum):
21class RangeType(Enum):
22    BYTE = auto()
23    LINE = auto()

Create a collection of name/value pairs.

Example enumeration:

>>> class Color(Enum):
...     RED = 1
...     BLUE = 2
...     GREEN = 3

Access them by:

  • attribute access::
>>> Color.RED
<Color.RED: 1>
  • value lookup:
>>> Color(1)
<Color.RED: 1>
  • name lookup:
>>> Color['RED']
<Color.RED: 1>

Enumerations can be iterated over, and know how many members they have:

>>> len(Color)
3
>>> list(Color)
[<Color.RED: 1>, <Color.BLUE: 2>, <Color.GREEN: 3>]

Methods can be added to enumerations, and members can have their own attributes -- see the documentation for details.

BYTE = <RangeType.BYTE: 1>
LINE = <RangeType.LINE: 2>
Inherited Members
enum.Enum
name
value
class SnippetParser:
 26class SnippetParser:
 27    logger: Logger
 28    license_expression_parser = LicenseExpressionParser
 29
 30    def __init__(self):
 31        self.logger = Logger()
 32        self.license_expression_parser = LicenseExpressionParser()
 33
 34    def parse_snippet(self, snippet_dict: Dict) -> Snippet:
 35        logger = Logger()
 36        spdx_id: Optional[str] = snippet_dict.get("SPDXID")
 37        file_spdx_id: Optional[str] = snippet_dict.get("snippetFromFile")
 38        name: Optional[str] = snippet_dict.get("name")
 39
 40        ranges: Dict = parse_field_or_log_error(logger, snippet_dict.get("ranges", []), self.parse_ranges, default={})
 41        byte_range: Optional[Tuple[Union[int, str], Union[int, str]]] = ranges.get(RangeType.BYTE)
 42        line_range: Optional[Tuple[Union[int, str], Union[int, str]]] = ranges.get(RangeType.LINE)
 43        byte_range = self.convert_range_from_str(byte_range)
 44        line_range = self.convert_range_from_str(line_range)
 45
 46        attribution_texts: List[str] = snippet_dict.get("attributionTexts", [])
 47        comment: Optional[str] = snippet_dict.get("comment")
 48        copyright_text: Optional[Union[str, SpdxNoAssertion, SpdxNone]] = parse_field_or_no_assertion_or_none(
 49            snippet_dict.get("copyrightText")
 50        )
 51        license_comment: Optional[str] = snippet_dict.get("licenseComments")
 52        license_concluded: Optional[Union[LicenseExpression, SpdxNoAssertion, SpdxNone]] = parse_field_or_log_error(
 53            logger, snippet_dict.get("licenseConcluded"), self.license_expression_parser.parse_license_expression
 54        )
 55
 56        license_info: List[Union[LicenseExpression], SpdxNoAssertion, SpdxNone] = parse_field_or_log_error(
 57            logger,
 58            snippet_dict.get("licenseInfoInSnippets"),
 59            self.license_expression_parser.parse_license_expression,
 60            field_is_list=True,
 61        )
 62        if logger.has_messages():
 63            raise SPDXParsingError([f"Error while parsing snippet: {logger.get_messages()}"])
 64
 65        snippet = construct_or_raise_parsing_error(
 66            Snippet,
 67            dict(
 68                spdx_id=spdx_id,
 69                name=name,
 70                byte_range=byte_range,
 71                file_spdx_id=file_spdx_id,
 72                line_range=line_range,
 73                attribution_texts=attribution_texts,
 74                comment=comment,
 75                copyright_text=copyright_text,
 76                license_comment=license_comment,
 77                license_concluded=license_concluded,
 78                license_info_in_snippet=license_info,
 79            ),
 80        )
 81
 82        return snippet
 83
 84    def parse_ranges(self, ranges_from_snippet: List[Dict]) -> Dict:
 85        logger = Logger()
 86        ranges = {}
 87        for range_dict in ranges_from_snippet:
 88            try:
 89                range_type: RangeType = self.validate_range_and_get_type(range_dict)
 90                start_end_tuple: Tuple[int, int] = SnippetParser.get_start_end_tuple(range_dict, range_type)
 91                ranges[range_type] = start_end_tuple
 92            except ValueError as error:
 93                logger.append(error.args[0])
 94        if logger.has_messages():
 95            raise SPDXParsingError([f"Error while parsing snippet ranges: {logger.get_messages()}"])
 96        return ranges
 97
 98    @staticmethod
 99    def get_start_end_tuple(range_dict: Dict, range_type: RangeType) -> Tuple[int, int]:
100        end_pointer: Dict = range_dict["endPointer"]
101        start_pointer: Dict = range_dict["startPointer"]
102        if range_type == RangeType.BYTE:
103            start: int = start_pointer["offset"]
104            end: int = end_pointer["offset"]
105        else:
106            start: int = start_pointer["lineNumber"]
107            end: int = end_pointer["lineNumber"]
108        return start, end
109
110    def validate_range_and_get_type(self, range_dict: Dict) -> RangeType:
111        if "startPointer" not in range_dict:
112            raise ValueError("Startpointer missing in snippet ranges.")
113        if "endPointer" not in range_dict:
114            raise ValueError("Endpointer missing in snippet ranges.")
115        start_pointer_type: RangeType = self.validate_pointer_and_get_type(range_dict["startPointer"])
116        end_pointer_type: RangeType = self.validate_pointer_and_get_type(range_dict["endPointer"])
117        if start_pointer_type != end_pointer_type:
118            raise ValueError("Type of startpointer is not the same as type of endpointer.")
119        return start_pointer_type
120
121    @staticmethod
122    def validate_pointer_and_get_type(pointer: Dict) -> RangeType:
123        if "offset" in pointer and "lineNumber" in pointer:
124            raise ValueError('Couldn\'t determine type of pointer: "offset" and "lineNumber" provided as key.')
125        if "offset" not in pointer and "lineNumber" not in pointer:
126            raise ValueError('Couldn\'t determine type of pointer: neither "offset" nor "lineNumber" provided as key.')
127        return RangeType.BYTE if "offset" in pointer else RangeType.LINE
128
129    @staticmethod
130    def convert_range_from_str(
131        _range: Tuple[Union[int, str], Union[int, str]]
132    ) -> Tuple[Union[int, str], Union[int, str]]:
133        # XML does not support integers, so we have to convert from string (if possible)
134        if not _range:
135            return _range
136
137        if isinstance(_range[0], str) and _range[0].isdigit():
138            _range = int(_range[0]), _range[1]
139        if isinstance(_range[1], str) and _range[1].isdigit():
140            _range = _range[0], int(_range[1])
141        return _range
def parse_snippet(self, snippet_dict: dict) -> spdx_tools.spdx.model.snippet.Snippet:
34    def parse_snippet(self, snippet_dict: Dict) -> Snippet:
35        logger = Logger()
36        spdx_id: Optional[str] = snippet_dict.get("SPDXID")
37        file_spdx_id: Optional[str] = snippet_dict.get("snippetFromFile")
38        name: Optional[str] = snippet_dict.get("name")
39
40        ranges: Dict = parse_field_or_log_error(logger, snippet_dict.get("ranges", []), self.parse_ranges, default={})
41        byte_range: Optional[Tuple[Union[int, str], Union[int, str]]] = ranges.get(RangeType.BYTE)
42        line_range: Optional[Tuple[Union[int, str], Union[int, str]]] = ranges.get(RangeType.LINE)
43        byte_range = self.convert_range_from_str(byte_range)
44        line_range = self.convert_range_from_str(line_range)
45
46        attribution_texts: List[str] = snippet_dict.get("attributionTexts", [])
47        comment: Optional[str] = snippet_dict.get("comment")
48        copyright_text: Optional[Union[str, SpdxNoAssertion, SpdxNone]] = parse_field_or_no_assertion_or_none(
49            snippet_dict.get("copyrightText")
50        )
51        license_comment: Optional[str] = snippet_dict.get("licenseComments")
52        license_concluded: Optional[Union[LicenseExpression, SpdxNoAssertion, SpdxNone]] = parse_field_or_log_error(
53            logger, snippet_dict.get("licenseConcluded"), self.license_expression_parser.parse_license_expression
54        )
55
56        license_info: List[Union[LicenseExpression], SpdxNoAssertion, SpdxNone] = parse_field_or_log_error(
57            logger,
58            snippet_dict.get("licenseInfoInSnippets"),
59            self.license_expression_parser.parse_license_expression,
60            field_is_list=True,
61        )
62        if logger.has_messages():
63            raise SPDXParsingError([f"Error while parsing snippet: {logger.get_messages()}"])
64
65        snippet = construct_or_raise_parsing_error(
66            Snippet,
67            dict(
68                spdx_id=spdx_id,
69                name=name,
70                byte_range=byte_range,
71                file_spdx_id=file_spdx_id,
72                line_range=line_range,
73                attribution_texts=attribution_texts,
74                comment=comment,
75                copyright_text=copyright_text,
76                license_comment=license_comment,
77                license_concluded=license_concluded,
78                license_info_in_snippet=license_info,
79            ),
80        )
81
82        return snippet
def parse_ranges(self, ranges_from_snippet: list[dict]) -> dict:
84    def parse_ranges(self, ranges_from_snippet: List[Dict]) -> Dict:
85        logger = Logger()
86        ranges = {}
87        for range_dict in ranges_from_snippet:
88            try:
89                range_type: RangeType = self.validate_range_and_get_type(range_dict)
90                start_end_tuple: Tuple[int, int] = SnippetParser.get_start_end_tuple(range_dict, range_type)
91                ranges[range_type] = start_end_tuple
92            except ValueError as error:
93                logger.append(error.args[0])
94        if logger.has_messages():
95            raise SPDXParsingError([f"Error while parsing snippet ranges: {logger.get_messages()}"])
96        return ranges
@staticmethod
def get_start_end_tuple( range_dict: dict, range_type: RangeType) -> tuple[int, int]:
 98    @staticmethod
 99    def get_start_end_tuple(range_dict: Dict, range_type: RangeType) -> Tuple[int, int]:
100        end_pointer: Dict = range_dict["endPointer"]
101        start_pointer: Dict = range_dict["startPointer"]
102        if range_type == RangeType.BYTE:
103            start: int = start_pointer["offset"]
104            end: int = end_pointer["offset"]
105        else:
106            start: int = start_pointer["lineNumber"]
107            end: int = end_pointer["lineNumber"]
108        return start, end
def validate_range_and_get_type( self, range_dict: dict) -> RangeType:
110    def validate_range_and_get_type(self, range_dict: Dict) -> RangeType:
111        if "startPointer" not in range_dict:
112            raise ValueError("Startpointer missing in snippet ranges.")
113        if "endPointer" not in range_dict:
114            raise ValueError("Endpointer missing in snippet ranges.")
115        start_pointer_type: RangeType = self.validate_pointer_and_get_type(range_dict["startPointer"])
116        end_pointer_type: RangeType = self.validate_pointer_and_get_type(range_dict["endPointer"])
117        if start_pointer_type != end_pointer_type:
118            raise ValueError("Type of startpointer is not the same as type of endpointer.")
119        return start_pointer_type
@staticmethod
def validate_pointer_and_get_type( pointer: dict) -> RangeType:
121    @staticmethod
122    def validate_pointer_and_get_type(pointer: Dict) -> RangeType:
123        if "offset" in pointer and "lineNumber" in pointer:
124            raise ValueError('Couldn\'t determine type of pointer: "offset" and "lineNumber" provided as key.')
125        if "offset" not in pointer and "lineNumber" not in pointer:
126            raise ValueError('Couldn\'t determine type of pointer: neither "offset" nor "lineNumber" provided as key.')
127        return RangeType.BYTE if "offset" in pointer else RangeType.LINE
@staticmethod
def convert_range_from_str( _range: tuple[typing.Union[int, str], typing.Union[int, str]]) -> tuple[typing.Union[int, str], typing.Union[int, str]]:
129    @staticmethod
130    def convert_range_from_str(
131        _range: Tuple[Union[int, str], Union[int, str]]
132    ) -> Tuple[Union[int, str], Union[int, str]]:
133        # XML does not support integers, so we have to convert from string (if possible)
134        if not _range:
135            return _range
136
137        if isinstance(_range[0], str) and _range[0].isdigit():
138            _range = int(_range[0]), _range[1]
139        if isinstance(_range[1], str) and _range[1].isdigit():
140            _range = _range[0], int(_range[1])
141        return _range