spdx_tools.spdx.parser.tagvalue.parser

  1# Copyright (c) 2014 Ahmed H. Ismail
  2# Copyright (c) 2023 spdx contributors
  3# SPDX-License-Identifier: Apache-2.0
  4# Licensed under the Apache License, Version 2.0 (the "License");
  5# you may not use this file except in compliance with the License.
  6# You may obtain a copy of the License at
  7#     http://www.apache.org/licenses/LICENSE-2.0
  8# Unless required by applicable law or agreed to in writing, software
  9# distributed under the License is distributed on an "AS IS" BASIS,
 10# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 11# See the License for the specific language governing permissions and
 12# limitations under the License.
 13
 14import re
 15
 16from beartype.typing import Any, Dict, List
 17from license_expression import ExpressionError, get_spdx_licensing
 18from ply import yacc
 19from ply.yacc import LRParser
 20
 21from spdx_tools.spdx.datetime_conversions import datetime_from_str
 22from spdx_tools.spdx.model import (
 23    Annotation,
 24    AnnotationType,
 25    CreationInfo,
 26    Document,
 27    ExternalDocumentRef,
 28    ExternalPackageRef,
 29    ExternalPackageRefCategory,
 30    ExtractedLicensingInfo,
 31    File,
 32    FileType,
 33    Package,
 34    PackagePurpose,
 35    PackageVerificationCode,
 36    Relationship,
 37    RelationshipType,
 38    Snippet,
 39    SpdxNoAssertion,
 40    SpdxNone,
 41    Version,
 42)
 43from spdx_tools.spdx.parser.actor_parser import ActorParser
 44from spdx_tools.spdx.parser.error import SPDXParsingError
 45from spdx_tools.spdx.parser.logger import Logger
 46from spdx_tools.spdx.parser.parsing_functions import (
 47    construct_or_raise_parsing_error,
 48    raise_parsing_error_if_logger_has_messages,
 49)
 50from spdx_tools.spdx.parser.tagvalue.helper_methods import (
 51    TAG_DATA_MODEL_FIELD,
 52    grammar_rule,
 53    parse_checksum,
 54    set_value,
 55    str_from_text,
 56)
 57from spdx_tools.spdx.parser.tagvalue.lexer import SPDXLexer
 58
 59CLASS_MAPPING = dict(
 60    File="files",
 61    Annotation="annotations",
 62    Relationship="relationships",
 63    Snippet="snippets",
 64    Package="packages",
 65    ExtractedLicensingInfo="extracted_licensing_info",
 66)
 67ELEMENT_EXPECTED_START_TAG = dict(
 68    File="FileName",
 69    Annotation="Annotator",
 70    Relationship="Relationship",
 71    Snippet="SnippetSPDXID",
 72    Package="PackageName",
 73    ExtractedLicensingInfo="LicenseID",
 74)
 75
 76
 77class Parser:
 78    tokens: List[str]
 79    logger: Logger
 80    current_element: Dict[str, Any]
 81    creation_info: Dict[str, Any]
 82    elements_built: Dict[str, Any]
 83    lex: SPDXLexer
 84    yacc: LRParser
 85
 86    def __init__(self, **kwargs):
 87        self.tokens = SPDXLexer.tokens
 88        self.logger = Logger()
 89        self.current_element = {"logger": Logger()}
 90        self.creation_info = {"logger": Logger()}
 91        self.elements_built = dict()
 92        self.lex = SPDXLexer()
 93        self.lex.build(reflags=re.UNICODE)
 94        self.yacc = yacc.yacc(module=self, **kwargs)
 95
 96    @grammar_rule("start : start attrib ")
 97    def p_start_start_attrib(self, p):
 98        pass
 99
100    @grammar_rule("start : attrib ")
101    def p_start_attrib(self, p):
102        pass
103
104    @grammar_rule(
105        "attrib : spdx_version\n| spdx_id\n| data_license\n| doc_name\n| document_comment\n| document_namespace\n| "
106        "creator\n| created\n| creator_comment\n| license_list_version\n| ext_doc_ref\n"
107        # attributes for file
108        "| file_name\n| file_type\n| file_checksum\n| file_license_concluded\n| file_license_info\n"
109        "| file_copyright_text\n| file_license_comment\n| file_attribution_text\n| file_notice\n| file_comment\n"
110        "| file_contributor\n"
111        # attributes for annotation
112        "| annotator\n| annotation_date\n| annotation_comment\n| annotation_type\n| annotation_spdx_id\n"
113        # attributes for relationship
114        "| relationship\n"
115        # attributes for snippet
116        "| snippet_spdx_id\n| snippet_name\n| snippet_comment\n| snippet_attribution_text\n| snippet_copyright_text\n"
117        "| snippet_license_comment\n| file_spdx_id\n| snippet_license_concluded\n| snippet_license_info\n"
118        "| snippet_byte_range\n| snippet_line_range\n"
119        # attributes for package
120        "| package_name\n| package_version\n| download_location\n| files_analyzed\n| homepage\n"
121        "| summary\n| source_info\n| pkg_file_name\n| supplier\n| originator\n| pkg_checksum\n"
122        "| verification_code\n| description\n| pkg_comment\n| pkg_attribution_text\n| pkg_license_declared\n"
123        "| pkg_license_concluded\n| pkg_license_info\n| pkg_license_comment\n| pkg_copyright_text\n"
124        "| pkg_external_ref\n| primary_package_purpose\n| built_date\n| release_date\n| valid_until_date\n"
125        # attributes for extracted licensing info
126        "| license_id\n| extracted_text\n| license_name\n| license_cross_ref\n| lic_comment\n"
127        "| unknown_tag "
128    )
129    def p_attrib(self, p):
130        pass
131
132    # general parsing methods
133    @grammar_rule(
134        "license_id : LICENSE_ID error\n license_cross_ref : LICENSE_CROSS_REF error\n "
135        "lic_comment : LICENSE_COMMENT error\n license_name : LICENSE_NAME error\n "
136        "extracted_text : LICENSE_TEXT error\n "
137        "file_name : FILE_NAME error\n file_contributor : FILE_CONTRIBUTOR error\n "
138        "file_notice : FILE_NOTICE error\n file_copyright_text : FILE_COPYRIGHT_TEXT error\n "
139        "file_license_comment : FILE_LICENSE_COMMENT error\n "
140        "file_license_info : FILE_LICENSE_INFO error\n file_comment : FILE_COMMENT error\n "
141        "file_checksum : FILE_CHECKSUM error\n file_license_concluded : FILE_LICENSE_CONCLUDED error\n "
142        "file_type : FILE_TYPE error\n file_attribution_text : FILE_ATTRIBUTION_TEXT error\n "
143        "package_name : PKG_NAME error\n pkg_attribution_text : PKG_ATTRIBUTION_TEXT error\n "
144        "description : PKG_DESCRIPTION error\n pkg_comment : PKG_COMMENT error\n "
145        "summary : PKG_SUMMARY error\n pkg_copyright_text : PKG_COPYRIGHT_TEXT error\n "
146        "pkg_external_ref : PKG_EXTERNAL_REF error\n pkg_license_comment : PKG_LICENSE_COMMENT error\n "
147        "pkg_license_declared : PKG_LICENSE_DECLARED error\n pkg_license_info : PKG_LICENSE_INFO error \n "
148        "pkg_license_concluded : PKG_LICENSE_CONCLUDED error\n source_info : PKG_SOURCE_INFO error\n "
149        "homepage : PKG_HOMEPAGE error\n pkg_checksum : PKG_CHECKSUM error\n "
150        "verification_code : PKG_VERIFICATION_CODE error\n originator : PKG_ORIGINATOR error\n "
151        "download_location : PKG_DOWNLOAD_LOCATION error\n files_analyzed : PKG_FILES_ANALYZED error\n "
152        "supplier : PKG_SUPPLIER error\n pkg_file_name : PKG_FILE_NAME error\n "
153        "package_version : PKG_VERSION error\n primary_package_purpose : PRIMARY_PACKAGE_PURPOSE error\n "
154        "built_date : BUILT_DATE error\n release_date : RELEASE_DATE error\n "
155        "valid_until_date : VALID_UNTIL_DATE error\n snippet_spdx_id : SNIPPET_SPDX_ID error\n "
156        "snippet_name : SNIPPET_NAME error\n snippet_comment : SNIPPET_COMMENT error\n "
157        "snippet_attribution_text : SNIPPET_ATTRIBUTION_TEXT error\n "
158        "snippet_copyright_text : SNIPPET_COPYRIGHT_TEXT error\n "
159        "snippet_license_comment : SNIPPET_LICENSE_COMMENT error\n file_spdx_id : SNIPPET_FILE_SPDXID error\n "
160        "snippet_license_concluded : SNIPPET_LICENSE_CONCLUDED error\n "
161        "snippet_license_info : SNIPPET_LICENSE_INFO error\n "
162        "snippet_byte_range : SNIPPET_BYTE_RANGE error\n snippet_line_range : SNIPPET_LINE_RANGE error\n "
163        "annotator : ANNOTATOR error\n annotation_date : ANNOTATION_DATE error\n "
164        "annotation_comment : ANNOTATION_COMMENT error\n annotation_type : ANNOTATION_TYPE error\n "
165        "annotation_spdx_id : ANNOTATION_SPDX_ID error\n relationship : RELATIONSHIP error"
166    )
167    def p_current_element_error(self, p):
168        if p[1] in ELEMENT_EXPECTED_START_TAG.values():
169            self.initialize_new_current_element(TAG_DATA_MODEL_FIELD[p[1]][0])
170        self.current_element["logger"].append(
171            f"Error while parsing {p[1]}: Token did not match specified grammar rule. Line: {p.lineno(1)}"
172        )
173
174    @grammar_rule(
175        "license_name : LICENSE_NAME line_or_no_assertion\n extracted_text : LICENSE_TEXT text_or_line\n "
176        "lic_comment : LICENSE_COMMENT text_or_line\n license_id : LICENSE_ID LINE\n "
177        "file_name : FILE_NAME LINE \n file_notice : FILE_NOTICE text_or_line\n "
178        "file_copyright_text : FILE_COPYRIGHT_TEXT line_or_no_assertion_or_none\n "
179        "file_license_comment : FILE_LICENSE_COMMENT text_or_line\n "
180        "file_comment : FILE_COMMENT text_or_line\n "
181        "file_license_concluded : FILE_LICENSE_CONCLUDED license_or_no_assertion_or_none\n "
182        "package_name : PKG_NAME LINE\n description : PKG_DESCRIPTION text_or_line\n "
183        "summary : PKG_SUMMARY text_or_line\n source_info : PKG_SOURCE_INFO text_or_line\n "
184        "homepage : PKG_HOMEPAGE line_or_no_assertion_or_none\n "
185        "download_location : PKG_DOWNLOAD_LOCATION line_or_no_assertion_or_none\n "
186        "originator : PKG_ORIGINATOR actor_or_no_assertion\n supplier : PKG_SUPPLIER actor_or_no_assertion\n "
187        "pkg_comment : PKG_COMMENT text_or_line\n "
188        "pkg_copyright_text : PKG_COPYRIGHT_TEXT line_or_no_assertion_or_none\n "
189        "pkg_license_declared : PKG_LICENSE_DECLARED license_or_no_assertion_or_none\n "
190        "pkg_file_name : PKG_FILE_NAME LINE\n "
191        "pkg_license_concluded : PKG_LICENSE_CONCLUDED license_or_no_assertion_or_none\n "
192        "package_version : PKG_VERSION LINE\n pkg_license_comment : PKG_LICENSE_COMMENT text_or_line\n "
193        "snippet_spdx_id : SNIPPET_SPDX_ID LINE\n snippet_name : SNIPPET_NAME LINE\n "
194        "snippet_comment : SNIPPET_COMMENT text_or_line\n "
195        "snippet_copyright_text : SNIPPET_COPYRIGHT_TEXT line_or_no_assertion_or_none\n "
196        "snippet_license_comment : SNIPPET_LICENSE_COMMENT text_or_line\n "
197        "file_spdx_id : SNIPPET_FILE_SPDXID LINE\n "
198        "snippet_license_concluded : SNIPPET_LICENSE_CONCLUDED license_or_no_assertion_or_none\n "
199        "annotation_spdx_id : ANNOTATION_SPDX_ID LINE\n "
200        "annotation_comment : ANNOTATION_COMMENT text_or_line"
201    )
202    def p_generic_value(self, p):
203        if p[1] in ELEMENT_EXPECTED_START_TAG.values():
204            self.initialize_new_current_element(TAG_DATA_MODEL_FIELD[p[1]][0])
205        if self.check_that_current_element_matches_class_for_value(TAG_DATA_MODEL_FIELD[p[1]][0], p.lineno(1)):
206            set_value(p, self.current_element)
207
208    @grammar_rule(
209        "unknown_tag : UNKNOWN_TAG text_or_line\n | UNKNOWN_TAG ISO8601_DATE\n | UNKNOWN_TAG PERSON_VALUE \n"
210        "| UNKNOWN_TAG"
211    )
212    def p_unknown_tag(self, p):
213        self.logger.append(f"Unknown tag provided in line {p.lineno(1)}")
214
215    @grammar_rule("text_or_line : TEXT\n line_or_no_assertion_or_none : TEXT")
216    def p_text(self, p):
217        p[0] = str_from_text(p[1])
218
219    @grammar_rule(
220        "text_or_line : LINE\n line_or_no_assertion : LINE\nline_or_no_assertion_or_none : LINE\n"
221        "text_or_line : NO_ASSERTION\n text_or_line : NONE"
222    )
223    def p_line(self, p):
224        p[0] = p[1]
225
226    @grammar_rule(
227        "license_or_no_assertion_or_none : NO_ASSERTION\n actor_or_no_assertion : NO_ASSERTION\n"
228        "line_or_no_assertion : NO_ASSERTION\n line_or_no_assertion_or_none : NO_ASSERTION"
229    )
230    def p_no_assertion(self, p):
231        p[0] = SpdxNoAssertion()
232
233    @grammar_rule("license_or_no_assertion_or_none : NONE\n line_or_no_assertion_or_none : NONE")
234    def p_none(self, p):
235        p[0] = SpdxNone()
236
237    @grammar_rule("license_or_no_assertion_or_none : LINE")
238    def p_license(self, p):
239        try:
240            p[0] = get_spdx_licensing().parse(p[1])
241        except ExpressionError as err:
242            error_message = f"Error while parsing license expression: {p[1]}"
243            if err.args:
244                error_message += f": {err.args[0]}"
245            self.current_element["logger"].append(error_message)
246
247    @grammar_rule("actor_or_no_assertion : PERSON_VALUE\n | ORGANIZATION_VALUE")
248    def p_actor_values(self, p):
249        p[0] = ActorParser.parse_actor(p[1])
250
251    @grammar_rule("spdx_id : SPDX_ID LINE")
252    def p_spdx_id(self, p):
253        # As all SPDX Ids share the same tag, there is no knowing which spdx_id belongs to the document.
254        # We assume that to be the first spdx_id we encounter. As the specification does not explicitly require this,
255        # our approach might lead to unwanted behavior when the document's SPDX Id is defined later in the document.
256        if "spdx_id" in self.creation_info:
257            self.current_element["spdx_id"] = p[2]
258        else:
259            self.creation_info["spdx_id"] = p[2]
260
261    # parsing methods for creation info / document level
262
263    @grammar_rule(
264        "license_list_version : LICENSE_LIST_VERSION error\n document_comment : DOC_COMMENT error\n "
265        "document_namespace : DOC_NAMESPACE error\n data_license : DOC_LICENSE error\n "
266        "doc_name : DOC_NAME error\n ext_doc_ref : EXT_DOC_REF error\n spdx_version : DOC_VERSION error\n "
267        "creator_comment : CREATOR_COMMENT error\n creator : CREATOR error\n created : CREATED error"
268    )
269    def p_creation_info_value_error(self, p):
270        self.creation_info["logger"].append(
271            f"Error while parsing {p[1]}: Token did not match specified grammar rule. Line: {p.lineno(1)}"
272        )
273
274    @grammar_rule(
275        "document_comment : DOC_COMMENT text_or_line\n document_namespace : DOC_NAMESPACE LINE\n "
276        "data_license : DOC_LICENSE LINE\n spdx_version : DOC_VERSION LINE\n "
277        "creator_comment : CREATOR_COMMENT text_or_line\n doc_name : DOC_NAME LINE"
278    )
279    def p_generic_value_creation_info(self, p):
280        set_value(p, self.creation_info)
281
282    @grammar_rule("license_list_version : LICENSE_LIST_VERSION LINE")
283    def p_license_list_version(self, p):
284        set_value(p, self.creation_info, method_to_apply=Version.from_string)
285
286    @grammar_rule("ext_doc_ref : EXT_DOC_REF LINE")
287    def p_external_document_ref(self, p):
288        external_doc_ref_regex = re.compile(r"(.*)(\s*SHA1:\s*[a-f0-9]{40})")
289        external_doc_ref_match = external_doc_ref_regex.match(p[2])
290        if not external_doc_ref_match:
291            self.creation_info["logger"].append(
292                f"Error while parsing ExternalDocumentRef: Couldn't match Checksum. Line: {p.lineno(1)}"
293            )
294            return
295        try:
296            document_ref_id, document_uri = external_doc_ref_match.group(1).strip().split(" ")
297        except ValueError:
298            self.creation_info["logger"].append(
299                f"Error while parsing ExternalDocumentRef: Couldn't split the first part of the value into "
300                f"document_ref_id and document_uri. Line: {p.lineno(1)}"
301            )
302            return
303        checksum = parse_checksum(external_doc_ref_match.group(2).strip())
304        external_document_ref = ExternalDocumentRef(document_ref_id, document_uri, checksum)
305        self.creation_info.setdefault("external_document_refs", []).append(external_document_ref)
306
307    @grammar_rule("creator : CREATOR PERSON_VALUE\n| CREATOR TOOL_VALUE\n| CREATOR ORGANIZATION_VALUE")
308    def p_creator(self, p):
309        self.creation_info.setdefault("creators", []).append(ActorParser.parse_actor(p[2]))
310
311    @grammar_rule("created : CREATED ISO8601_DATE")
312    def p_created(self, p):
313        set_value(p, self.creation_info, method_to_apply=datetime_from_str)
314
315    # parsing methods for extracted licensing info
316
317    @grammar_rule("license_cross_ref : LICENSE_CROSS_REF LINE")
318    def p_extracted_cross_reference(self, p):
319        if self.check_that_current_element_matches_class_for_value(ExtractedLicensingInfo, p.lineno(1)):
320            self.current_element.setdefault("cross_references", []).append(p[2])
321
322    # parsing methods for file
323
324    @grammar_rule("file_contributor : FILE_CONTRIBUTOR LINE")
325    def p_file_contributor(self, p):
326        if self.check_that_current_element_matches_class_for_value(File, p.lineno(1)):
327            self.current_element.setdefault("contributors", []).append(p[2])
328
329    @grammar_rule("file_attribution_text : FILE_ATTRIBUTION_TEXT text_or_line")
330    def p_file_attribution_text(self, p):
331        if self.check_that_current_element_matches_class_for_value(File, p.lineno(1)):
332            self.current_element.setdefault("attribution_texts", []).append(p[2])
333
334    @grammar_rule("file_license_info : FILE_LICENSE_INFO license_or_no_assertion_or_none")
335    def p_file_license_info(self, p):
336        if self.check_that_current_element_matches_class_for_value(File, p.lineno(1)):
337            self.current_element.setdefault("license_info_in_file", []).append(p[2])
338
339    @grammar_rule("file_type : FILE_TYPE LINE")
340    def p_file_type(self, p):
341        if not self.check_that_current_element_matches_class_for_value(File, p.lineno(1)):
342            return
343        try:
344            file_type = FileType[p[2].strip()]
345        except KeyError:
346            self.current_element["logger"].append(f"Invalid FileType: {p[2]}. Line {p.lineno(1)}")
347            return
348        self.current_element.setdefault("file_types", []).append(file_type)
349
350    @grammar_rule("file_checksum : FILE_CHECKSUM CHECKSUM")
351    def p_file_checksum(self, p):
352        if not self.check_that_current_element_matches_class_for_value(File, p.lineno(1)):
353            return
354        checksum = parse_checksum(p[2])
355        self.current_element.setdefault("checksums", []).append(checksum)
356
357    # parsing methods for package
358
359    @grammar_rule("pkg_attribution_text : PKG_ATTRIBUTION_TEXT text_or_line")
360    def p_pkg_attribution_text(self, p):
361        self.check_that_current_element_matches_class_for_value(Package, p.lineno(1))
362        self.current_element.setdefault("attribution_texts", []).append(p[2])
363
364    @grammar_rule(
365        "pkg_external_ref : PKG_EXTERNAL_REF LINE PKG_EXTERNAL_REF_COMMENT text_or_line\n | PKG_EXTERNAL_REF LINE"
366    )
367    def p_pkg_external_refs(self, p):
368        if not self.check_that_current_element_matches_class_for_value(Package, p.lineno(1)):
369            return
370        try:
371            category, reference_type, locator = p[2].split(" ")
372        except ValueError:
373            self.current_element["logger"].append(
374                f"Couldn't split PackageExternalRef in category, reference_type and locator. Line: {p.lineno(1)}"
375            )
376            return
377        comment = None
378        if len(p) == 5:
379            comment = p[4]
380        try:
381            category = ExternalPackageRefCategory[category.replace("-", "_")]
382        except KeyError:
383            self.current_element["logger"].append(
384                f"Invalid ExternalPackageRefCategory: {category}. Line: {p.lineno(1)}"
385            )
386            return
387        try:
388            external_package_ref = construct_or_raise_parsing_error(
389                ExternalPackageRef,
390                {"category": category, "reference_type": reference_type, "locator": locator, "comment": comment},
391            )
392        except SPDXParsingError as err:
393            self.current_element["logger"].append(err.get_messages())
394            return
395        self.current_element.setdefault("external_references", []).append(external_package_ref)
396
397    @grammar_rule("pkg_license_info : PKG_LICENSE_INFO license_or_no_assertion_or_none")
398    def p_pkg_license_info_from_file(self, p):
399        if self.check_that_current_element_matches_class_for_value(Package, p.lineno(1)):
400            self.current_element.setdefault("license_info_from_files", []).append(p[2])
401
402    @grammar_rule("pkg_checksum : PKG_CHECKSUM CHECKSUM")
403    def p_pkg_checksum(self, p):
404        if not self.check_that_current_element_matches_class_for_value(Package, p.lineno(1)):
405            return
406        checksum = parse_checksum(p[2])
407        self.current_element.setdefault("checksums", []).append(checksum)
408
409    @grammar_rule("verification_code : PKG_VERIFICATION_CODE LINE")
410    def p_pkg_verification_code(self, p):
411        if not self.check_that_current_element_matches_class_for_value(Package, p.lineno(1)):
412            return
413
414        if "verification_code" in self.current_element:
415            self.current_element["logger"].append(f"Multiple values for {p[1]} found. Line: {p.lineno(1)}")
416            return
417        verif_code_regex = re.compile(r"([0-9a-f]{40})\s*(\(excludes:\s*(.+)\))?", re.UNICODE)
418        verif_code_code_grp = 1
419        verif_code_exc_files_grp = 3
420        match = verif_code_regex.match(p[2])
421        if not match:
422            self.current_element["logger"].append(
423                f"Error while parsing {p[1]}: Value did not match expected format. Line: {p.lineno(1)}"
424            )
425            return
426        value = match.group(verif_code_code_grp)
427        excluded_files = None
428        if match.group(verif_code_exc_files_grp):
429            excluded_files = match.group(verif_code_exc_files_grp).split(",")
430        self.current_element["verification_code"] = PackageVerificationCode(value, excluded_files)
431
432    @grammar_rule("files_analyzed : PKG_FILES_ANALYZED LINE")
433    def p_pkg_files_analyzed(self, p):
434        if not self.check_that_current_element_matches_class_for_value(Package, p.lineno(1)):
435            return
436        if "files_analyzed" in self.current_element:
437            self.current_element["logger"].append(f"Multiple values for {p[1]} found. Line: {p.lineno(1)}")
438            return
439        if p[2] == "true":
440            self.current_element["files_analyzed"] = True
441        elif p[2] == "false":
442            self.current_element["files_analyzed"] = False
443        else:
444            self.current_element["logger"].append(
445                f'The value of FilesAnalyzed must be either "true" or "false", but is: {p[2]}'
446            )
447
448    @grammar_rule("primary_package_purpose : PRIMARY_PACKAGE_PURPOSE LINE")
449    def p_primary_package_purpose(self, p):
450        if self.check_that_current_element_matches_class_for_value(Package, p.lineno(1)):
451            set_value(p, self.current_element, method_to_apply=lambda x: PackagePurpose[x.replace("-", "_")])
452
453    @grammar_rule(
454        "built_date : BUILT_DATE ISO8601_DATE\n release_date : RELEASE_DATE ISO8601_DATE\n "
455        "valid_until_date : VALID_UNTIL_DATE ISO8601_DATE"
456    )
457    def p_package_dates(self, p):
458        if self.check_that_current_element_matches_class_for_value(Package, p.lineno(1)):
459            set_value(p, self.current_element, method_to_apply=datetime_from_str)
460
461    # parsing methods for snippet
462
463    @grammar_rule("snippet_attribution_text : SNIPPET_ATTRIBUTION_TEXT text_or_line")
464    def p_snippet_attribution_text(self, p):
465        if self.check_that_current_element_matches_class_for_value(Snippet, p.lineno(1)):
466            self.current_element.setdefault("attribution_texts", []).append(p[2])
467
468    @grammar_rule("snippet_license_info : SNIPPET_LICENSE_INFO license_or_no_assertion_or_none")
469    def p_snippet_license_info(self, p):
470        if self.check_that_current_element_matches_class_for_value(Snippet, p.lineno(1)):
471            self.current_element.setdefault("license_info_in_snippet", []).append(p[2])
472
473    @grammar_rule("snippet_byte_range : SNIPPET_BYTE_RANGE LINE\n snippet_line_range : SNIPPET_LINE_RANGE LINE")
474    def p_snippet_range(self, p):
475        if not self.check_that_current_element_matches_class_for_value(Snippet, p.lineno(1)):
476            return
477
478        argument_name = TAG_DATA_MODEL_FIELD[p[1]][1]
479        if argument_name in self.current_element:
480            self.current_element["logger"].append(f"Multiple values for {p[1]} found. Line: {p.lineno(1)}")
481            return
482        range_re = re.compile(r"^(\d+):(\d+)$", re.UNICODE)
483        if not range_re.match(p[2].strip()):
484            self.current_element["logger"].append(
485                f"Value for {p[1]} doesn't match valid range pattern. " f"Line: {p.lineno(1)}"
486            )
487            return
488        startpoint = int(p[2].split(":")[0])
489        endpoint = int(p[2].split(":")[-1])
490        self.current_element[argument_name] = startpoint, endpoint
491
492    # parsing methods for annotation
493
494    @grammar_rule("annotator : ANNOTATOR PERSON_VALUE\n| ANNOTATOR TOOL_VALUE\n| ANNOTATOR ORGANIZATION_VALUE")
495    def p_annotator(self, p):
496        self.initialize_new_current_element(Annotation)
497        set_value(p, self.current_element, method_to_apply=ActorParser.parse_actor)
498
499    @grammar_rule("annotation_date : ANNOTATION_DATE ISO8601_DATE")
500    def p_annotation_date(self, p):
501        if self.check_that_current_element_matches_class_for_value(Annotation, p.lineno(1)):
502            set_value(p, self.current_element, method_to_apply=datetime_from_str)
503
504    @grammar_rule("annotation_type : ANNOTATION_TYPE LINE")
505    def p_annotation_type(self, p):
506        if self.check_that_current_element_matches_class_for_value(Annotation, p.lineno(1)):
507            set_value(p, self.current_element, method_to_apply=lambda x: AnnotationType[x])
508
509    # parsing methods for relationship
510
511    @grammar_rule("relationship : RELATIONSHIP LINE RELATIONSHIP_COMMENT text_or_line\n " "| RELATIONSHIP LINE")
512    def p_relationship(self, p):
513        self.initialize_new_current_element(Relationship)
514        try:
515            spdx_element_id, relationship_type, related_spdx_element_id = p[2].split(" ")
516        except ValueError:
517            self.current_element["logger"].append(
518                f"Relationship couldn't be split in spdx_element_id, relationship_type and "
519                f"related_spdx_element. Line: {p.lineno(1)}"
520            )
521            return
522        try:
523            self.current_element["relationship_type"] = RelationshipType[relationship_type]
524        except KeyError:
525            self.current_element["logger"].append(f"Invalid RelationshipType {relationship_type}. Line: {p.lineno(1)}")
526        if related_spdx_element_id == "NONE":
527            related_spdx_element_id = SpdxNone()
528        if related_spdx_element_id == "NOASSERTION":
529            related_spdx_element_id = SpdxNoAssertion()
530        self.current_element["related_spdx_element_id"] = related_spdx_element_id
531        self.current_element["spdx_element_id"] = spdx_element_id
532        if len(p) == 5:
533            self.current_element["comment"] = p[4]
534
535    def p_error(self, p):
536        pass
537
538    def parse(self, text):
539        # entry point for the tag-value parser
540        self.yacc.parse(text, lexer=self.lex)
541        # this constructs the last remaining element; all other elements are constructed at the start of
542        # their subsequent element
543        self.construct_current_element()
544
545        # To be able to parse creation info values if they appear in between other elements, e.g. packages, we use
546        # two different dictionaries to collect the creation info and all other elements. Therefore, we have a separate
547        # logger for the creation info whose messages we need to add to the main logger to than raise all collected
548        # messages at once.
549        creation_info_logger = self.creation_info.pop("logger")
550        if creation_info_logger.has_messages():
551            self.logger.extend([f"Error while parsing CreationInfo: {creation_info_logger.get_messages()}"])
552
553        raise_parsing_error_if_logger_has_messages(self.logger)
554        creation_info = construct_or_raise_parsing_error(CreationInfo, self.creation_info)
555        self.elements_built["creation_info"] = creation_info
556        document = construct_or_raise_parsing_error(Document, self.elements_built)
557        return document
558
559    def initialize_new_current_element(self, clazz: Any):
560        self.construct_current_element()
561        self.current_element["class"] = clazz
562
563    def check_that_current_element_matches_class_for_value(self, expected_class, line_number) -> bool:
564        if "class" not in self.current_element or expected_class != self.current_element["class"]:
565            self.logger.append(
566                f"Element {expected_class.__name__} is not the current element in scope, probably the expected tag to "
567                f"start the element ({ELEMENT_EXPECTED_START_TAG[expected_class.__name__]}) is missing. "
568                f"Line: {line_number}"
569            )
570            return False
571        return True
572
573    def construct_current_element(self):
574        if "class" not in self.current_element:
575            # This happens when the first element is initialized via initialize_new_current_element() or if the first
576            # element is missing its expected starting tag. In both cases we are unable to construct an element.
577            return
578
579        clazz = self.current_element.pop("class")
580        try:
581            raise_parsing_error_if_logger_has_messages(self.current_element.pop("logger"), clazz.__name__)
582            self.elements_built.setdefault(CLASS_MAPPING[clazz.__name__], []).append(
583                construct_or_raise_parsing_error(clazz, self.current_element)
584            )
585            if clazz == File:
586                self.check_for_preceding_package_and_build_contains_relationship()
587        except SPDXParsingError as err:
588            self.logger.extend(err.get_messages())
589        self.current_element = {"logger": Logger()}
590
591    def check_for_preceding_package_and_build_contains_relationship(self):
592        file_spdx_id = self.current_element["spdx_id"]
593        if "packages" not in self.elements_built:
594            return
595        # We assume that all files that are not contained in a package precede any package information. Any file
596        # information that follows any package information is assigned to the last parsed package by creating a
597        # corresponding contains relationship.
598        # (see https://spdx.github.io/spdx-spec/v2.3/composition-of-an-SPDX-document/#5.2.2)
599        if not self.elements_built["packages"]:
600            self.logger.append(
601                f"Error while building contains relationship for file {file_spdx_id}, "
602                f"preceding package was not parsed successfully."
603            )
604            return
605        package_spdx_id = self.elements_built["packages"][-1].spdx_id
606        relationship = Relationship(package_spdx_id, RelationshipType.CONTAINS, file_spdx_id)
607        if relationship not in self.elements_built.setdefault("relationships", []):
608            self.elements_built["relationships"].append(relationship)
CLASS_MAPPING = {'File': 'files', 'Annotation': 'annotations', 'Relationship': 'relationships', 'Snippet': 'snippets', 'Package': 'packages', 'ExtractedLicensingInfo': 'extracted_licensing_info'}
ELEMENT_EXPECTED_START_TAG = {'File': 'FileName', 'Annotation': 'Annotator', 'Relationship': 'Relationship', 'Snippet': 'SnippetSPDXID', 'Package': 'PackageName', 'ExtractedLicensingInfo': 'LicenseID'}
class Parser:
 78class Parser:
 79    tokens: List[str]
 80    logger: Logger
 81    current_element: Dict[str, Any]
 82    creation_info: Dict[str, Any]
 83    elements_built: Dict[str, Any]
 84    lex: SPDXLexer
 85    yacc: LRParser
 86
 87    def __init__(self, **kwargs):
 88        self.tokens = SPDXLexer.tokens
 89        self.logger = Logger()
 90        self.current_element = {"logger": Logger()}
 91        self.creation_info = {"logger": Logger()}
 92        self.elements_built = dict()
 93        self.lex = SPDXLexer()
 94        self.lex.build(reflags=re.UNICODE)
 95        self.yacc = yacc.yacc(module=self, **kwargs)
 96
 97    @grammar_rule("start : start attrib ")
 98    def p_start_start_attrib(self, p):
 99        pass
100
101    @grammar_rule("start : attrib ")
102    def p_start_attrib(self, p):
103        pass
104
105    @grammar_rule(
106        "attrib : spdx_version\n| spdx_id\n| data_license\n| doc_name\n| document_comment\n| document_namespace\n| "
107        "creator\n| created\n| creator_comment\n| license_list_version\n| ext_doc_ref\n"
108        # attributes for file
109        "| file_name\n| file_type\n| file_checksum\n| file_license_concluded\n| file_license_info\n"
110        "| file_copyright_text\n| file_license_comment\n| file_attribution_text\n| file_notice\n| file_comment\n"
111        "| file_contributor\n"
112        # attributes for annotation
113        "| annotator\n| annotation_date\n| annotation_comment\n| annotation_type\n| annotation_spdx_id\n"
114        # attributes for relationship
115        "| relationship\n"
116        # attributes for snippet
117        "| snippet_spdx_id\n| snippet_name\n| snippet_comment\n| snippet_attribution_text\n| snippet_copyright_text\n"
118        "| snippet_license_comment\n| file_spdx_id\n| snippet_license_concluded\n| snippet_license_info\n"
119        "| snippet_byte_range\n| snippet_line_range\n"
120        # attributes for package
121        "| package_name\n| package_version\n| download_location\n| files_analyzed\n| homepage\n"
122        "| summary\n| source_info\n| pkg_file_name\n| supplier\n| originator\n| pkg_checksum\n"
123        "| verification_code\n| description\n| pkg_comment\n| pkg_attribution_text\n| pkg_license_declared\n"
124        "| pkg_license_concluded\n| pkg_license_info\n| pkg_license_comment\n| pkg_copyright_text\n"
125        "| pkg_external_ref\n| primary_package_purpose\n| built_date\n| release_date\n| valid_until_date\n"
126        # attributes for extracted licensing info
127        "| license_id\n| extracted_text\n| license_name\n| license_cross_ref\n| lic_comment\n"
128        "| unknown_tag "
129    )
130    def p_attrib(self, p):
131        pass
132
133    # general parsing methods
134    @grammar_rule(
135        "license_id : LICENSE_ID error\n license_cross_ref : LICENSE_CROSS_REF error\n "
136        "lic_comment : LICENSE_COMMENT error\n license_name : LICENSE_NAME error\n "
137        "extracted_text : LICENSE_TEXT error\n "
138        "file_name : FILE_NAME error\n file_contributor : FILE_CONTRIBUTOR error\n "
139        "file_notice : FILE_NOTICE error\n file_copyright_text : FILE_COPYRIGHT_TEXT error\n "
140        "file_license_comment : FILE_LICENSE_COMMENT error\n "
141        "file_license_info : FILE_LICENSE_INFO error\n file_comment : FILE_COMMENT error\n "
142        "file_checksum : FILE_CHECKSUM error\n file_license_concluded : FILE_LICENSE_CONCLUDED error\n "
143        "file_type : FILE_TYPE error\n file_attribution_text : FILE_ATTRIBUTION_TEXT error\n "
144        "package_name : PKG_NAME error\n pkg_attribution_text : PKG_ATTRIBUTION_TEXT error\n "
145        "description : PKG_DESCRIPTION error\n pkg_comment : PKG_COMMENT error\n "
146        "summary : PKG_SUMMARY error\n pkg_copyright_text : PKG_COPYRIGHT_TEXT error\n "
147        "pkg_external_ref : PKG_EXTERNAL_REF error\n pkg_license_comment : PKG_LICENSE_COMMENT error\n "
148        "pkg_license_declared : PKG_LICENSE_DECLARED error\n pkg_license_info : PKG_LICENSE_INFO error \n "
149        "pkg_license_concluded : PKG_LICENSE_CONCLUDED error\n source_info : PKG_SOURCE_INFO error\n "
150        "homepage : PKG_HOMEPAGE error\n pkg_checksum : PKG_CHECKSUM error\n "
151        "verification_code : PKG_VERIFICATION_CODE error\n originator : PKG_ORIGINATOR error\n "
152        "download_location : PKG_DOWNLOAD_LOCATION error\n files_analyzed : PKG_FILES_ANALYZED error\n "
153        "supplier : PKG_SUPPLIER error\n pkg_file_name : PKG_FILE_NAME error\n "
154        "package_version : PKG_VERSION error\n primary_package_purpose : PRIMARY_PACKAGE_PURPOSE error\n "
155        "built_date : BUILT_DATE error\n release_date : RELEASE_DATE error\n "
156        "valid_until_date : VALID_UNTIL_DATE error\n snippet_spdx_id : SNIPPET_SPDX_ID error\n "
157        "snippet_name : SNIPPET_NAME error\n snippet_comment : SNIPPET_COMMENT error\n "
158        "snippet_attribution_text : SNIPPET_ATTRIBUTION_TEXT error\n "
159        "snippet_copyright_text : SNIPPET_COPYRIGHT_TEXT error\n "
160        "snippet_license_comment : SNIPPET_LICENSE_COMMENT error\n file_spdx_id : SNIPPET_FILE_SPDXID error\n "
161        "snippet_license_concluded : SNIPPET_LICENSE_CONCLUDED error\n "
162        "snippet_license_info : SNIPPET_LICENSE_INFO error\n "
163        "snippet_byte_range : SNIPPET_BYTE_RANGE error\n snippet_line_range : SNIPPET_LINE_RANGE error\n "
164        "annotator : ANNOTATOR error\n annotation_date : ANNOTATION_DATE error\n "
165        "annotation_comment : ANNOTATION_COMMENT error\n annotation_type : ANNOTATION_TYPE error\n "
166        "annotation_spdx_id : ANNOTATION_SPDX_ID error\n relationship : RELATIONSHIP error"
167    )
168    def p_current_element_error(self, p):
169        if p[1] in ELEMENT_EXPECTED_START_TAG.values():
170            self.initialize_new_current_element(TAG_DATA_MODEL_FIELD[p[1]][0])
171        self.current_element["logger"].append(
172            f"Error while parsing {p[1]}: Token did not match specified grammar rule. Line: {p.lineno(1)}"
173        )
174
175    @grammar_rule(
176        "license_name : LICENSE_NAME line_or_no_assertion\n extracted_text : LICENSE_TEXT text_or_line\n "
177        "lic_comment : LICENSE_COMMENT text_or_line\n license_id : LICENSE_ID LINE\n "
178        "file_name : FILE_NAME LINE \n file_notice : FILE_NOTICE text_or_line\n "
179        "file_copyright_text : FILE_COPYRIGHT_TEXT line_or_no_assertion_or_none\n "
180        "file_license_comment : FILE_LICENSE_COMMENT text_or_line\n "
181        "file_comment : FILE_COMMENT text_or_line\n "
182        "file_license_concluded : FILE_LICENSE_CONCLUDED license_or_no_assertion_or_none\n "
183        "package_name : PKG_NAME LINE\n description : PKG_DESCRIPTION text_or_line\n "
184        "summary : PKG_SUMMARY text_or_line\n source_info : PKG_SOURCE_INFO text_or_line\n "
185        "homepage : PKG_HOMEPAGE line_or_no_assertion_or_none\n "
186        "download_location : PKG_DOWNLOAD_LOCATION line_or_no_assertion_or_none\n "
187        "originator : PKG_ORIGINATOR actor_or_no_assertion\n supplier : PKG_SUPPLIER actor_or_no_assertion\n "
188        "pkg_comment : PKG_COMMENT text_or_line\n "
189        "pkg_copyright_text : PKG_COPYRIGHT_TEXT line_or_no_assertion_or_none\n "
190        "pkg_license_declared : PKG_LICENSE_DECLARED license_or_no_assertion_or_none\n "
191        "pkg_file_name : PKG_FILE_NAME LINE\n "
192        "pkg_license_concluded : PKG_LICENSE_CONCLUDED license_or_no_assertion_or_none\n "
193        "package_version : PKG_VERSION LINE\n pkg_license_comment : PKG_LICENSE_COMMENT text_or_line\n "
194        "snippet_spdx_id : SNIPPET_SPDX_ID LINE\n snippet_name : SNIPPET_NAME LINE\n "
195        "snippet_comment : SNIPPET_COMMENT text_or_line\n "
196        "snippet_copyright_text : SNIPPET_COPYRIGHT_TEXT line_or_no_assertion_or_none\n "
197        "snippet_license_comment : SNIPPET_LICENSE_COMMENT text_or_line\n "
198        "file_spdx_id : SNIPPET_FILE_SPDXID LINE\n "
199        "snippet_license_concluded : SNIPPET_LICENSE_CONCLUDED license_or_no_assertion_or_none\n "
200        "annotation_spdx_id : ANNOTATION_SPDX_ID LINE\n "
201        "annotation_comment : ANNOTATION_COMMENT text_or_line"
202    )
203    def p_generic_value(self, p):
204        if p[1] in ELEMENT_EXPECTED_START_TAG.values():
205            self.initialize_new_current_element(TAG_DATA_MODEL_FIELD[p[1]][0])
206        if self.check_that_current_element_matches_class_for_value(TAG_DATA_MODEL_FIELD[p[1]][0], p.lineno(1)):
207            set_value(p, self.current_element)
208
209    @grammar_rule(
210        "unknown_tag : UNKNOWN_TAG text_or_line\n | UNKNOWN_TAG ISO8601_DATE\n | UNKNOWN_TAG PERSON_VALUE \n"
211        "| UNKNOWN_TAG"
212    )
213    def p_unknown_tag(self, p):
214        self.logger.append(f"Unknown tag provided in line {p.lineno(1)}")
215
216    @grammar_rule("text_or_line : TEXT\n line_or_no_assertion_or_none : TEXT")
217    def p_text(self, p):
218        p[0] = str_from_text(p[1])
219
220    @grammar_rule(
221        "text_or_line : LINE\n line_or_no_assertion : LINE\nline_or_no_assertion_or_none : LINE\n"
222        "text_or_line : NO_ASSERTION\n text_or_line : NONE"
223    )
224    def p_line(self, p):
225        p[0] = p[1]
226
227    @grammar_rule(
228        "license_or_no_assertion_or_none : NO_ASSERTION\n actor_or_no_assertion : NO_ASSERTION\n"
229        "line_or_no_assertion : NO_ASSERTION\n line_or_no_assertion_or_none : NO_ASSERTION"
230    )
231    def p_no_assertion(self, p):
232        p[0] = SpdxNoAssertion()
233
234    @grammar_rule("license_or_no_assertion_or_none : NONE\n line_or_no_assertion_or_none : NONE")
235    def p_none(self, p):
236        p[0] = SpdxNone()
237
238    @grammar_rule("license_or_no_assertion_or_none : LINE")
239    def p_license(self, p):
240        try:
241            p[0] = get_spdx_licensing().parse(p[1])
242        except ExpressionError as err:
243            error_message = f"Error while parsing license expression: {p[1]}"
244            if err.args:
245                error_message += f": {err.args[0]}"
246            self.current_element["logger"].append(error_message)
247
248    @grammar_rule("actor_or_no_assertion : PERSON_VALUE\n | ORGANIZATION_VALUE")
249    def p_actor_values(self, p):
250        p[0] = ActorParser.parse_actor(p[1])
251
252    @grammar_rule("spdx_id : SPDX_ID LINE")
253    def p_spdx_id(self, p):
254        # As all SPDX Ids share the same tag, there is no knowing which spdx_id belongs to the document.
255        # We assume that to be the first spdx_id we encounter. As the specification does not explicitly require this,
256        # our approach might lead to unwanted behavior when the document's SPDX Id is defined later in the document.
257        if "spdx_id" in self.creation_info:
258            self.current_element["spdx_id"] = p[2]
259        else:
260            self.creation_info["spdx_id"] = p[2]
261
262    # parsing methods for creation info / document level
263
264    @grammar_rule(
265        "license_list_version : LICENSE_LIST_VERSION error\n document_comment : DOC_COMMENT error\n "
266        "document_namespace : DOC_NAMESPACE error\n data_license : DOC_LICENSE error\n "
267        "doc_name : DOC_NAME error\n ext_doc_ref : EXT_DOC_REF error\n spdx_version : DOC_VERSION error\n "
268        "creator_comment : CREATOR_COMMENT error\n creator : CREATOR error\n created : CREATED error"
269    )
270    def p_creation_info_value_error(self, p):
271        self.creation_info["logger"].append(
272            f"Error while parsing {p[1]}: Token did not match specified grammar rule. Line: {p.lineno(1)}"
273        )
274
275    @grammar_rule(
276        "document_comment : DOC_COMMENT text_or_line\n document_namespace : DOC_NAMESPACE LINE\n "
277        "data_license : DOC_LICENSE LINE\n spdx_version : DOC_VERSION LINE\n "
278        "creator_comment : CREATOR_COMMENT text_or_line\n doc_name : DOC_NAME LINE"
279    )
280    def p_generic_value_creation_info(self, p):
281        set_value(p, self.creation_info)
282
283    @grammar_rule("license_list_version : LICENSE_LIST_VERSION LINE")
284    def p_license_list_version(self, p):
285        set_value(p, self.creation_info, method_to_apply=Version.from_string)
286
287    @grammar_rule("ext_doc_ref : EXT_DOC_REF LINE")
288    def p_external_document_ref(self, p):
289        external_doc_ref_regex = re.compile(r"(.*)(\s*SHA1:\s*[a-f0-9]{40})")
290        external_doc_ref_match = external_doc_ref_regex.match(p[2])
291        if not external_doc_ref_match:
292            self.creation_info["logger"].append(
293                f"Error while parsing ExternalDocumentRef: Couldn't match Checksum. Line: {p.lineno(1)}"
294            )
295            return
296        try:
297            document_ref_id, document_uri = external_doc_ref_match.group(1).strip().split(" ")
298        except ValueError:
299            self.creation_info["logger"].append(
300                f"Error while parsing ExternalDocumentRef: Couldn't split the first part of the value into "
301                f"document_ref_id and document_uri. Line: {p.lineno(1)}"
302            )
303            return
304        checksum = parse_checksum(external_doc_ref_match.group(2).strip())
305        external_document_ref = ExternalDocumentRef(document_ref_id, document_uri, checksum)
306        self.creation_info.setdefault("external_document_refs", []).append(external_document_ref)
307
308    @grammar_rule("creator : CREATOR PERSON_VALUE\n| CREATOR TOOL_VALUE\n| CREATOR ORGANIZATION_VALUE")
309    def p_creator(self, p):
310        self.creation_info.setdefault("creators", []).append(ActorParser.parse_actor(p[2]))
311
312    @grammar_rule("created : CREATED ISO8601_DATE")
313    def p_created(self, p):
314        set_value(p, self.creation_info, method_to_apply=datetime_from_str)
315
316    # parsing methods for extracted licensing info
317
318    @grammar_rule("license_cross_ref : LICENSE_CROSS_REF LINE")
319    def p_extracted_cross_reference(self, p):
320        if self.check_that_current_element_matches_class_for_value(ExtractedLicensingInfo, p.lineno(1)):
321            self.current_element.setdefault("cross_references", []).append(p[2])
322
323    # parsing methods for file
324
325    @grammar_rule("file_contributor : FILE_CONTRIBUTOR LINE")
326    def p_file_contributor(self, p):
327        if self.check_that_current_element_matches_class_for_value(File, p.lineno(1)):
328            self.current_element.setdefault("contributors", []).append(p[2])
329
330    @grammar_rule("file_attribution_text : FILE_ATTRIBUTION_TEXT text_or_line")
331    def p_file_attribution_text(self, p):
332        if self.check_that_current_element_matches_class_for_value(File, p.lineno(1)):
333            self.current_element.setdefault("attribution_texts", []).append(p[2])
334
335    @grammar_rule("file_license_info : FILE_LICENSE_INFO license_or_no_assertion_or_none")
336    def p_file_license_info(self, p):
337        if self.check_that_current_element_matches_class_for_value(File, p.lineno(1)):
338            self.current_element.setdefault("license_info_in_file", []).append(p[2])
339
340    @grammar_rule("file_type : FILE_TYPE LINE")
341    def p_file_type(self, p):
342        if not self.check_that_current_element_matches_class_for_value(File, p.lineno(1)):
343            return
344        try:
345            file_type = FileType[p[2].strip()]
346        except KeyError:
347            self.current_element["logger"].append(f"Invalid FileType: {p[2]}. Line {p.lineno(1)}")
348            return
349        self.current_element.setdefault("file_types", []).append(file_type)
350
351    @grammar_rule("file_checksum : FILE_CHECKSUM CHECKSUM")
352    def p_file_checksum(self, p):
353        if not self.check_that_current_element_matches_class_for_value(File, p.lineno(1)):
354            return
355        checksum = parse_checksum(p[2])
356        self.current_element.setdefault("checksums", []).append(checksum)
357
358    # parsing methods for package
359
360    @grammar_rule("pkg_attribution_text : PKG_ATTRIBUTION_TEXT text_or_line")
361    def p_pkg_attribution_text(self, p):
362        self.check_that_current_element_matches_class_for_value(Package, p.lineno(1))
363        self.current_element.setdefault("attribution_texts", []).append(p[2])
364
365    @grammar_rule(
366        "pkg_external_ref : PKG_EXTERNAL_REF LINE PKG_EXTERNAL_REF_COMMENT text_or_line\n | PKG_EXTERNAL_REF LINE"
367    )
368    def p_pkg_external_refs(self, p):
369        if not self.check_that_current_element_matches_class_for_value(Package, p.lineno(1)):
370            return
371        try:
372            category, reference_type, locator = p[2].split(" ")
373        except ValueError:
374            self.current_element["logger"].append(
375                f"Couldn't split PackageExternalRef in category, reference_type and locator. Line: {p.lineno(1)}"
376            )
377            return
378        comment = None
379        if len(p) == 5:
380            comment = p[4]
381        try:
382            category = ExternalPackageRefCategory[category.replace("-", "_")]
383        except KeyError:
384            self.current_element["logger"].append(
385                f"Invalid ExternalPackageRefCategory: {category}. Line: {p.lineno(1)}"
386            )
387            return
388        try:
389            external_package_ref = construct_or_raise_parsing_error(
390                ExternalPackageRef,
391                {"category": category, "reference_type": reference_type, "locator": locator, "comment": comment},
392            )
393        except SPDXParsingError as err:
394            self.current_element["logger"].append(err.get_messages())
395            return
396        self.current_element.setdefault("external_references", []).append(external_package_ref)
397
398    @grammar_rule("pkg_license_info : PKG_LICENSE_INFO license_or_no_assertion_or_none")
399    def p_pkg_license_info_from_file(self, p):
400        if self.check_that_current_element_matches_class_for_value(Package, p.lineno(1)):
401            self.current_element.setdefault("license_info_from_files", []).append(p[2])
402
403    @grammar_rule("pkg_checksum : PKG_CHECKSUM CHECKSUM")
404    def p_pkg_checksum(self, p):
405        if not self.check_that_current_element_matches_class_for_value(Package, p.lineno(1)):
406            return
407        checksum = parse_checksum(p[2])
408        self.current_element.setdefault("checksums", []).append(checksum)
409
410    @grammar_rule("verification_code : PKG_VERIFICATION_CODE LINE")
411    def p_pkg_verification_code(self, p):
412        if not self.check_that_current_element_matches_class_for_value(Package, p.lineno(1)):
413            return
414
415        if "verification_code" in self.current_element:
416            self.current_element["logger"].append(f"Multiple values for {p[1]} found. Line: {p.lineno(1)}")
417            return
418        verif_code_regex = re.compile(r"([0-9a-f]{40})\s*(\(excludes:\s*(.+)\))?", re.UNICODE)
419        verif_code_code_grp = 1
420        verif_code_exc_files_grp = 3
421        match = verif_code_regex.match(p[2])
422        if not match:
423            self.current_element["logger"].append(
424                f"Error while parsing {p[1]}: Value did not match expected format. Line: {p.lineno(1)}"
425            )
426            return
427        value = match.group(verif_code_code_grp)
428        excluded_files = None
429        if match.group(verif_code_exc_files_grp):
430            excluded_files = match.group(verif_code_exc_files_grp).split(",")
431        self.current_element["verification_code"] = PackageVerificationCode(value, excluded_files)
432
433    @grammar_rule("files_analyzed : PKG_FILES_ANALYZED LINE")
434    def p_pkg_files_analyzed(self, p):
435        if not self.check_that_current_element_matches_class_for_value(Package, p.lineno(1)):
436            return
437        if "files_analyzed" in self.current_element:
438            self.current_element["logger"].append(f"Multiple values for {p[1]} found. Line: {p.lineno(1)}")
439            return
440        if p[2] == "true":
441            self.current_element["files_analyzed"] = True
442        elif p[2] == "false":
443            self.current_element["files_analyzed"] = False
444        else:
445            self.current_element["logger"].append(
446                f'The value of FilesAnalyzed must be either "true" or "false", but is: {p[2]}'
447            )
448
449    @grammar_rule("primary_package_purpose : PRIMARY_PACKAGE_PURPOSE LINE")
450    def p_primary_package_purpose(self, p):
451        if self.check_that_current_element_matches_class_for_value(Package, p.lineno(1)):
452            set_value(p, self.current_element, method_to_apply=lambda x: PackagePurpose[x.replace("-", "_")])
453
454    @grammar_rule(
455        "built_date : BUILT_DATE ISO8601_DATE\n release_date : RELEASE_DATE ISO8601_DATE\n "
456        "valid_until_date : VALID_UNTIL_DATE ISO8601_DATE"
457    )
458    def p_package_dates(self, p):
459        if self.check_that_current_element_matches_class_for_value(Package, p.lineno(1)):
460            set_value(p, self.current_element, method_to_apply=datetime_from_str)
461
462    # parsing methods for snippet
463
464    @grammar_rule("snippet_attribution_text : SNIPPET_ATTRIBUTION_TEXT text_or_line")
465    def p_snippet_attribution_text(self, p):
466        if self.check_that_current_element_matches_class_for_value(Snippet, p.lineno(1)):
467            self.current_element.setdefault("attribution_texts", []).append(p[2])
468
469    @grammar_rule("snippet_license_info : SNIPPET_LICENSE_INFO license_or_no_assertion_or_none")
470    def p_snippet_license_info(self, p):
471        if self.check_that_current_element_matches_class_for_value(Snippet, p.lineno(1)):
472            self.current_element.setdefault("license_info_in_snippet", []).append(p[2])
473
474    @grammar_rule("snippet_byte_range : SNIPPET_BYTE_RANGE LINE\n snippet_line_range : SNIPPET_LINE_RANGE LINE")
475    def p_snippet_range(self, p):
476        if not self.check_that_current_element_matches_class_for_value(Snippet, p.lineno(1)):
477            return
478
479        argument_name = TAG_DATA_MODEL_FIELD[p[1]][1]
480        if argument_name in self.current_element:
481            self.current_element["logger"].append(f"Multiple values for {p[1]} found. Line: {p.lineno(1)}")
482            return
483        range_re = re.compile(r"^(\d+):(\d+)$", re.UNICODE)
484        if not range_re.match(p[2].strip()):
485            self.current_element["logger"].append(
486                f"Value for {p[1]} doesn't match valid range pattern. " f"Line: {p.lineno(1)}"
487            )
488            return
489        startpoint = int(p[2].split(":")[0])
490        endpoint = int(p[2].split(":")[-1])
491        self.current_element[argument_name] = startpoint, endpoint
492
493    # parsing methods for annotation
494
495    @grammar_rule("annotator : ANNOTATOR PERSON_VALUE\n| ANNOTATOR TOOL_VALUE\n| ANNOTATOR ORGANIZATION_VALUE")
496    def p_annotator(self, p):
497        self.initialize_new_current_element(Annotation)
498        set_value(p, self.current_element, method_to_apply=ActorParser.parse_actor)
499
500    @grammar_rule("annotation_date : ANNOTATION_DATE ISO8601_DATE")
501    def p_annotation_date(self, p):
502        if self.check_that_current_element_matches_class_for_value(Annotation, p.lineno(1)):
503            set_value(p, self.current_element, method_to_apply=datetime_from_str)
504
505    @grammar_rule("annotation_type : ANNOTATION_TYPE LINE")
506    def p_annotation_type(self, p):
507        if self.check_that_current_element_matches_class_for_value(Annotation, p.lineno(1)):
508            set_value(p, self.current_element, method_to_apply=lambda x: AnnotationType[x])
509
510    # parsing methods for relationship
511
512    @grammar_rule("relationship : RELATIONSHIP LINE RELATIONSHIP_COMMENT text_or_line\n " "| RELATIONSHIP LINE")
513    def p_relationship(self, p):
514        self.initialize_new_current_element(Relationship)
515        try:
516            spdx_element_id, relationship_type, related_spdx_element_id = p[2].split(" ")
517        except ValueError:
518            self.current_element["logger"].append(
519                f"Relationship couldn't be split in spdx_element_id, relationship_type and "
520                f"related_spdx_element. Line: {p.lineno(1)}"
521            )
522            return
523        try:
524            self.current_element["relationship_type"] = RelationshipType[relationship_type]
525        except KeyError:
526            self.current_element["logger"].append(f"Invalid RelationshipType {relationship_type}. Line: {p.lineno(1)}")
527        if related_spdx_element_id == "NONE":
528            related_spdx_element_id = SpdxNone()
529        if related_spdx_element_id == "NOASSERTION":
530            related_spdx_element_id = SpdxNoAssertion()
531        self.current_element["related_spdx_element_id"] = related_spdx_element_id
532        self.current_element["spdx_element_id"] = spdx_element_id
533        if len(p) == 5:
534            self.current_element["comment"] = p[4]
535
536    def p_error(self, p):
537        pass
538
539    def parse(self, text):
540        # entry point for the tag-value parser
541        self.yacc.parse(text, lexer=self.lex)
542        # this constructs the last remaining element; all other elements are constructed at the start of
543        # their subsequent element
544        self.construct_current_element()
545
546        # To be able to parse creation info values if they appear in between other elements, e.g. packages, we use
547        # two different dictionaries to collect the creation info and all other elements. Therefore, we have a separate
548        # logger for the creation info whose messages we need to add to the main logger to than raise all collected
549        # messages at once.
550        creation_info_logger = self.creation_info.pop("logger")
551        if creation_info_logger.has_messages():
552            self.logger.extend([f"Error while parsing CreationInfo: {creation_info_logger.get_messages()}"])
553
554        raise_parsing_error_if_logger_has_messages(self.logger)
555        creation_info = construct_or_raise_parsing_error(CreationInfo, self.creation_info)
556        self.elements_built["creation_info"] = creation_info
557        document = construct_or_raise_parsing_error(Document, self.elements_built)
558        return document
559
560    def initialize_new_current_element(self, clazz: Any):
561        self.construct_current_element()
562        self.current_element["class"] = clazz
563
564    def check_that_current_element_matches_class_for_value(self, expected_class, line_number) -> bool:
565        if "class" not in self.current_element or expected_class != self.current_element["class"]:
566            self.logger.append(
567                f"Element {expected_class.__name__} is not the current element in scope, probably the expected tag to "
568                f"start the element ({ELEMENT_EXPECTED_START_TAG[expected_class.__name__]}) is missing. "
569                f"Line: {line_number}"
570            )
571            return False
572        return True
573
574    def construct_current_element(self):
575        if "class" not in self.current_element:
576            # This happens when the first element is initialized via initialize_new_current_element() or if the first
577            # element is missing its expected starting tag. In both cases we are unable to construct an element.
578            return
579
580        clazz = self.current_element.pop("class")
581        try:
582            raise_parsing_error_if_logger_has_messages(self.current_element.pop("logger"), clazz.__name__)
583            self.elements_built.setdefault(CLASS_MAPPING[clazz.__name__], []).append(
584                construct_or_raise_parsing_error(clazz, self.current_element)
585            )
586            if clazz == File:
587                self.check_for_preceding_package_and_build_contains_relationship()
588        except SPDXParsingError as err:
589            self.logger.extend(err.get_messages())
590        self.current_element = {"logger": Logger()}
591
592    def check_for_preceding_package_and_build_contains_relationship(self):
593        file_spdx_id = self.current_element["spdx_id"]
594        if "packages" not in self.elements_built:
595            return
596        # We assume that all files that are not contained in a package precede any package information. Any file
597        # information that follows any package information is assigned to the last parsed package by creating a
598        # corresponding contains relationship.
599        # (see https://spdx.github.io/spdx-spec/v2.3/composition-of-an-SPDX-document/#5.2.2)
600        if not self.elements_built["packages"]:
601            self.logger.append(
602                f"Error while building contains relationship for file {file_spdx_id}, "
603                f"preceding package was not parsed successfully."
604            )
605            return
606        package_spdx_id = self.elements_built["packages"][-1].spdx_id
607        relationship = Relationship(package_spdx_id, RelationshipType.CONTAINS, file_spdx_id)
608        if relationship not in self.elements_built.setdefault("relationships", []):
609            self.elements_built["relationships"].append(relationship)
Parser(**kwargs)
87    def __init__(self, **kwargs):
88        self.tokens = SPDXLexer.tokens
89        self.logger = Logger()
90        self.current_element = {"logger": Logger()}
91        self.creation_info = {"logger": Logger()}
92        self.elements_built = dict()
93        self.lex = SPDXLexer()
94        self.lex.build(reflags=re.UNICODE)
95        self.yacc = yacc.yacc(module=self, **kwargs)
tokens: list[str]
current_element: dict[str, typing.Any]
creation_info: dict[str, typing.Any]
elements_built: dict[str, typing.Any]
yacc: ply.yacc.LRParser
@grammar_rule('start : start attrib ')
def p_start_start_attrib(self, p):
97    @grammar_rule("start : start attrib ")
98    def p_start_start_attrib(self, p):
99        pass

start : start attrib

@grammar_rule('start : attrib ')
def p_start_attrib(self, p):
101    @grammar_rule("start : attrib ")
102    def p_start_attrib(self, p):
103        pass

start : attrib

@grammar_rule('attrib : spdx_version\n| spdx_id\n| data_license\n| doc_name\n| document_comment\n| document_namespace\n| creator\n| created\n| creator_comment\n| license_list_version\n| ext_doc_ref\n| file_name\n| file_type\n| file_checksum\n| file_license_concluded\n| file_license_info\n| file_copyright_text\n| file_license_comment\n| file_attribution_text\n| file_notice\n| file_comment\n| file_contributor\n| annotator\n| annotation_date\n| annotation_comment\n| annotation_type\n| annotation_spdx_id\n| relationship\n| snippet_spdx_id\n| snippet_name\n| snippet_comment\n| snippet_attribution_text\n| snippet_copyright_text\n| snippet_license_comment\n| file_spdx_id\n| snippet_license_concluded\n| snippet_license_info\n| snippet_byte_range\n| snippet_line_range\n| package_name\n| package_version\n| download_location\n| files_analyzed\n| homepage\n| summary\n| source_info\n| pkg_file_name\n| supplier\n| originator\n| pkg_checksum\n| verification_code\n| description\n| pkg_comment\n| pkg_attribution_text\n| pkg_license_declared\n| pkg_license_concluded\n| pkg_license_info\n| pkg_license_comment\n| pkg_copyright_text\n| pkg_external_ref\n| primary_package_purpose\n| built_date\n| release_date\n| valid_until_date\n| license_id\n| extracted_text\n| license_name\n| license_cross_ref\n| lic_comment\n| unknown_tag ')
def p_attrib(self, p):
105    @grammar_rule(
106        "attrib : spdx_version\n| spdx_id\n| data_license\n| doc_name\n| document_comment\n| document_namespace\n| "
107        "creator\n| created\n| creator_comment\n| license_list_version\n| ext_doc_ref\n"
108        # attributes for file
109        "| file_name\n| file_type\n| file_checksum\n| file_license_concluded\n| file_license_info\n"
110        "| file_copyright_text\n| file_license_comment\n| file_attribution_text\n| file_notice\n| file_comment\n"
111        "| file_contributor\n"
112        # attributes for annotation
113        "| annotator\n| annotation_date\n| annotation_comment\n| annotation_type\n| annotation_spdx_id\n"
114        # attributes for relationship
115        "| relationship\n"
116        # attributes for snippet
117        "| snippet_spdx_id\n| snippet_name\n| snippet_comment\n| snippet_attribution_text\n| snippet_copyright_text\n"
118        "| snippet_license_comment\n| file_spdx_id\n| snippet_license_concluded\n| snippet_license_info\n"
119        "| snippet_byte_range\n| snippet_line_range\n"
120        # attributes for package
121        "| package_name\n| package_version\n| download_location\n| files_analyzed\n| homepage\n"
122        "| summary\n| source_info\n| pkg_file_name\n| supplier\n| originator\n| pkg_checksum\n"
123        "| verification_code\n| description\n| pkg_comment\n| pkg_attribution_text\n| pkg_license_declared\n"
124        "| pkg_license_concluded\n| pkg_license_info\n| pkg_license_comment\n| pkg_copyright_text\n"
125        "| pkg_external_ref\n| primary_package_purpose\n| built_date\n| release_date\n| valid_until_date\n"
126        # attributes for extracted licensing info
127        "| license_id\n| extracted_text\n| license_name\n| license_cross_ref\n| lic_comment\n"
128        "| unknown_tag "
129    )
130    def p_attrib(self, p):
131        pass

attrib : spdx_version | spdx_id | data_license | doc_name | document_comment | document_namespace | creator | created | creator_comment | license_list_version | ext_doc_ref | file_name | file_type | file_checksum | file_license_concluded | file_license_info | file_copyright_text | file_license_comment | file_attribution_text | file_notice | file_comment | file_contributor | annotator | annotation_date | annotation_comment | annotation_type | annotation_spdx_id | relationship | snippet_spdx_id | snippet_name | snippet_comment | snippet_attribution_text | snippet_copyright_text | snippet_license_comment | file_spdx_id | snippet_license_concluded | snippet_license_info | snippet_byte_range | snippet_line_range | package_name | package_version | download_location | files_analyzed | homepage | summary | source_info | pkg_file_name | supplier | originator | pkg_checksum | verification_code | description | pkg_comment | pkg_attribution_text | pkg_license_declared | pkg_license_concluded | pkg_license_info | pkg_license_comment | pkg_copyright_text | pkg_external_ref | primary_package_purpose | built_date | release_date | valid_until_date | license_id | extracted_text | license_name | license_cross_ref | lic_comment | unknown_tag

@grammar_rule('license_id : LICENSE_ID error\n license_cross_ref : LICENSE_CROSS_REF error\n lic_comment : LICENSE_COMMENT error\n license_name : LICENSE_NAME error\n extracted_text : LICENSE_TEXT error\n file_name : FILE_NAME error\n file_contributor : FILE_CONTRIBUTOR error\n file_notice : FILE_NOTICE error\n file_copyright_text : FILE_COPYRIGHT_TEXT error\n file_license_comment : FILE_LICENSE_COMMENT error\n file_license_info : FILE_LICENSE_INFO error\n file_comment : FILE_COMMENT error\n file_checksum : FILE_CHECKSUM error\n file_license_concluded : FILE_LICENSE_CONCLUDED error\n file_type : FILE_TYPE error\n file_attribution_text : FILE_ATTRIBUTION_TEXT error\n package_name : PKG_NAME error\n pkg_attribution_text : PKG_ATTRIBUTION_TEXT error\n description : PKG_DESCRIPTION error\n pkg_comment : PKG_COMMENT error\n summary : PKG_SUMMARY error\n pkg_copyright_text : PKG_COPYRIGHT_TEXT error\n pkg_external_ref : PKG_EXTERNAL_REF error\n pkg_license_comment : PKG_LICENSE_COMMENT error\n pkg_license_declared : PKG_LICENSE_DECLARED error\n pkg_license_info : PKG_LICENSE_INFO error \n pkg_license_concluded : PKG_LICENSE_CONCLUDED error\n source_info : PKG_SOURCE_INFO error\n homepage : PKG_HOMEPAGE error\n pkg_checksum : PKG_CHECKSUM error\n verification_code : PKG_VERIFICATION_CODE error\n originator : PKG_ORIGINATOR error\n download_location : PKG_DOWNLOAD_LOCATION error\n files_analyzed : PKG_FILES_ANALYZED error\n supplier : PKG_SUPPLIER error\n pkg_file_name : PKG_FILE_NAME error\n package_version : PKG_VERSION error\n primary_package_purpose : PRIMARY_PACKAGE_PURPOSE error\n built_date : BUILT_DATE error\n release_date : RELEASE_DATE error\n valid_until_date : VALID_UNTIL_DATE error\n snippet_spdx_id : SNIPPET_SPDX_ID error\n snippet_name : SNIPPET_NAME error\n snippet_comment : SNIPPET_COMMENT error\n snippet_attribution_text : SNIPPET_ATTRIBUTION_TEXT error\n snippet_copyright_text : SNIPPET_COPYRIGHT_TEXT error\n snippet_license_comment : SNIPPET_LICENSE_COMMENT error\n file_spdx_id : SNIPPET_FILE_SPDXID error\n snippet_license_concluded : SNIPPET_LICENSE_CONCLUDED error\n snippet_license_info : SNIPPET_LICENSE_INFO error\n snippet_byte_range : SNIPPET_BYTE_RANGE error\n snippet_line_range : SNIPPET_LINE_RANGE error\n annotator : ANNOTATOR error\n annotation_date : ANNOTATION_DATE error\n annotation_comment : ANNOTATION_COMMENT error\n annotation_type : ANNOTATION_TYPE error\n annotation_spdx_id : ANNOTATION_SPDX_ID error\n relationship : RELATIONSHIP error')
def p_current_element_error(self, p):
134    @grammar_rule(
135        "license_id : LICENSE_ID error\n license_cross_ref : LICENSE_CROSS_REF error\n "
136        "lic_comment : LICENSE_COMMENT error\n license_name : LICENSE_NAME error\n "
137        "extracted_text : LICENSE_TEXT error\n "
138        "file_name : FILE_NAME error\n file_contributor : FILE_CONTRIBUTOR error\n "
139        "file_notice : FILE_NOTICE error\n file_copyright_text : FILE_COPYRIGHT_TEXT error\n "
140        "file_license_comment : FILE_LICENSE_COMMENT error\n "
141        "file_license_info : FILE_LICENSE_INFO error\n file_comment : FILE_COMMENT error\n "
142        "file_checksum : FILE_CHECKSUM error\n file_license_concluded : FILE_LICENSE_CONCLUDED error\n "
143        "file_type : FILE_TYPE error\n file_attribution_text : FILE_ATTRIBUTION_TEXT error\n "
144        "package_name : PKG_NAME error\n pkg_attribution_text : PKG_ATTRIBUTION_TEXT error\n "
145        "description : PKG_DESCRIPTION error\n pkg_comment : PKG_COMMENT error\n "
146        "summary : PKG_SUMMARY error\n pkg_copyright_text : PKG_COPYRIGHT_TEXT error\n "
147        "pkg_external_ref : PKG_EXTERNAL_REF error\n pkg_license_comment : PKG_LICENSE_COMMENT error\n "
148        "pkg_license_declared : PKG_LICENSE_DECLARED error\n pkg_license_info : PKG_LICENSE_INFO error \n "
149        "pkg_license_concluded : PKG_LICENSE_CONCLUDED error\n source_info : PKG_SOURCE_INFO error\n "
150        "homepage : PKG_HOMEPAGE error\n pkg_checksum : PKG_CHECKSUM error\n "
151        "verification_code : PKG_VERIFICATION_CODE error\n originator : PKG_ORIGINATOR error\n "
152        "download_location : PKG_DOWNLOAD_LOCATION error\n files_analyzed : PKG_FILES_ANALYZED error\n "
153        "supplier : PKG_SUPPLIER error\n pkg_file_name : PKG_FILE_NAME error\n "
154        "package_version : PKG_VERSION error\n primary_package_purpose : PRIMARY_PACKAGE_PURPOSE error\n "
155        "built_date : BUILT_DATE error\n release_date : RELEASE_DATE error\n "
156        "valid_until_date : VALID_UNTIL_DATE error\n snippet_spdx_id : SNIPPET_SPDX_ID error\n "
157        "snippet_name : SNIPPET_NAME error\n snippet_comment : SNIPPET_COMMENT error\n "
158        "snippet_attribution_text : SNIPPET_ATTRIBUTION_TEXT error\n "
159        "snippet_copyright_text : SNIPPET_COPYRIGHT_TEXT error\n "
160        "snippet_license_comment : SNIPPET_LICENSE_COMMENT error\n file_spdx_id : SNIPPET_FILE_SPDXID error\n "
161        "snippet_license_concluded : SNIPPET_LICENSE_CONCLUDED error\n "
162        "snippet_license_info : SNIPPET_LICENSE_INFO error\n "
163        "snippet_byte_range : SNIPPET_BYTE_RANGE error\n snippet_line_range : SNIPPET_LINE_RANGE error\n "
164        "annotator : ANNOTATOR error\n annotation_date : ANNOTATION_DATE error\n "
165        "annotation_comment : ANNOTATION_COMMENT error\n annotation_type : ANNOTATION_TYPE error\n "
166        "annotation_spdx_id : ANNOTATION_SPDX_ID error\n relationship : RELATIONSHIP error"
167    )
168    def p_current_element_error(self, p):
169        if p[1] in ELEMENT_EXPECTED_START_TAG.values():
170            self.initialize_new_current_element(TAG_DATA_MODEL_FIELD[p[1]][0])
171        self.current_element["logger"].append(
172            f"Error while parsing {p[1]}: Token did not match specified grammar rule. Line: {p.lineno(1)}"
173        )

license_id : LICENSE_ID error license_cross_ref : LICENSE_CROSS_REF error lic_comment : LICENSE_COMMENT error license_name : LICENSE_NAME error extracted_text : LICENSE_TEXT error file_name : FILE_NAME error file_contributor : FILE_CONTRIBUTOR error file_notice : FILE_NOTICE error file_copyright_text : FILE_COPYRIGHT_TEXT error file_license_comment : FILE_LICENSE_COMMENT error file_license_info : FILE_LICENSE_INFO error file_comment : FILE_COMMENT error file_checksum : FILE_CHECKSUM error file_license_concluded : FILE_LICENSE_CONCLUDED error file_type : FILE_TYPE error file_attribution_text : FILE_ATTRIBUTION_TEXT error package_name : PKG_NAME error pkg_attribution_text : PKG_ATTRIBUTION_TEXT error description : PKG_DESCRIPTION error pkg_comment : PKG_COMMENT error summary : PKG_SUMMARY error pkg_copyright_text : PKG_COPYRIGHT_TEXT error pkg_external_ref : PKG_EXTERNAL_REF error pkg_license_comment : PKG_LICENSE_COMMENT error pkg_license_declared : PKG_LICENSE_DECLARED error pkg_license_info : PKG_LICENSE_INFO error pkg_license_concluded : PKG_LICENSE_CONCLUDED error source_info : PKG_SOURCE_INFO error homepage : PKG_HOMEPAGE error pkg_checksum : PKG_CHECKSUM error verification_code : PKG_VERIFICATION_CODE error originator : PKG_ORIGINATOR error download_location : PKG_DOWNLOAD_LOCATION error files_analyzed : PKG_FILES_ANALYZED error supplier : PKG_SUPPLIER error pkg_file_name : PKG_FILE_NAME error package_version : PKG_VERSION error primary_package_purpose : PRIMARY_PACKAGE_PURPOSE error built_date : BUILT_DATE error release_date : RELEASE_DATE error valid_until_date : VALID_UNTIL_DATE error snippet_spdx_id : SNIPPET_SPDX_ID error snippet_name : SNIPPET_NAME error snippet_comment : SNIPPET_COMMENT error snippet_attribution_text : SNIPPET_ATTRIBUTION_TEXT error snippet_copyright_text : SNIPPET_COPYRIGHT_TEXT error snippet_license_comment : SNIPPET_LICENSE_COMMENT error file_spdx_id : SNIPPET_FILE_SPDXID error snippet_license_concluded : SNIPPET_LICENSE_CONCLUDED error snippet_license_info : SNIPPET_LICENSE_INFO error snippet_byte_range : SNIPPET_BYTE_RANGE error snippet_line_range : SNIPPET_LINE_RANGE error annotator : ANNOTATOR error annotation_date : ANNOTATION_DATE error annotation_comment : ANNOTATION_COMMENT error annotation_type : ANNOTATION_TYPE error annotation_spdx_id : ANNOTATION_SPDX_ID error relationship : RELATIONSHIP error

@grammar_rule('license_name : LICENSE_NAME line_or_no_assertion\n extracted_text : LICENSE_TEXT text_or_line\n lic_comment : LICENSE_COMMENT text_or_line\n license_id : LICENSE_ID LINE\n file_name : FILE_NAME LINE \n file_notice : FILE_NOTICE text_or_line\n file_copyright_text : FILE_COPYRIGHT_TEXT line_or_no_assertion_or_none\n file_license_comment : FILE_LICENSE_COMMENT text_or_line\n file_comment : FILE_COMMENT text_or_line\n file_license_concluded : FILE_LICENSE_CONCLUDED license_or_no_assertion_or_none\n package_name : PKG_NAME LINE\n description : PKG_DESCRIPTION text_or_line\n summary : PKG_SUMMARY text_or_line\n source_info : PKG_SOURCE_INFO text_or_line\n homepage : PKG_HOMEPAGE line_or_no_assertion_or_none\n download_location : PKG_DOWNLOAD_LOCATION line_or_no_assertion_or_none\n originator : PKG_ORIGINATOR actor_or_no_assertion\n supplier : PKG_SUPPLIER actor_or_no_assertion\n pkg_comment : PKG_COMMENT text_or_line\n pkg_copyright_text : PKG_COPYRIGHT_TEXT line_or_no_assertion_or_none\n pkg_license_declared : PKG_LICENSE_DECLARED license_or_no_assertion_or_none\n pkg_file_name : PKG_FILE_NAME LINE\n pkg_license_concluded : PKG_LICENSE_CONCLUDED license_or_no_assertion_or_none\n package_version : PKG_VERSION LINE\n pkg_license_comment : PKG_LICENSE_COMMENT text_or_line\n snippet_spdx_id : SNIPPET_SPDX_ID LINE\n snippet_name : SNIPPET_NAME LINE\n snippet_comment : SNIPPET_COMMENT text_or_line\n snippet_copyright_text : SNIPPET_COPYRIGHT_TEXT line_or_no_assertion_or_none\n snippet_license_comment : SNIPPET_LICENSE_COMMENT text_or_line\n file_spdx_id : SNIPPET_FILE_SPDXID LINE\n snippet_license_concluded : SNIPPET_LICENSE_CONCLUDED license_or_no_assertion_or_none\n annotation_spdx_id : ANNOTATION_SPDX_ID LINE\n annotation_comment : ANNOTATION_COMMENT text_or_line')
def p_generic_value(self, p):
175    @grammar_rule(
176        "license_name : LICENSE_NAME line_or_no_assertion\n extracted_text : LICENSE_TEXT text_or_line\n "
177        "lic_comment : LICENSE_COMMENT text_or_line\n license_id : LICENSE_ID LINE\n "
178        "file_name : FILE_NAME LINE \n file_notice : FILE_NOTICE text_or_line\n "
179        "file_copyright_text : FILE_COPYRIGHT_TEXT line_or_no_assertion_or_none\n "
180        "file_license_comment : FILE_LICENSE_COMMENT text_or_line\n "
181        "file_comment : FILE_COMMENT text_or_line\n "
182        "file_license_concluded : FILE_LICENSE_CONCLUDED license_or_no_assertion_or_none\n "
183        "package_name : PKG_NAME LINE\n description : PKG_DESCRIPTION text_or_line\n "
184        "summary : PKG_SUMMARY text_or_line\n source_info : PKG_SOURCE_INFO text_or_line\n "
185        "homepage : PKG_HOMEPAGE line_or_no_assertion_or_none\n "
186        "download_location : PKG_DOWNLOAD_LOCATION line_or_no_assertion_or_none\n "
187        "originator : PKG_ORIGINATOR actor_or_no_assertion\n supplier : PKG_SUPPLIER actor_or_no_assertion\n "
188        "pkg_comment : PKG_COMMENT text_or_line\n "
189        "pkg_copyright_text : PKG_COPYRIGHT_TEXT line_or_no_assertion_or_none\n "
190        "pkg_license_declared : PKG_LICENSE_DECLARED license_or_no_assertion_or_none\n "
191        "pkg_file_name : PKG_FILE_NAME LINE\n "
192        "pkg_license_concluded : PKG_LICENSE_CONCLUDED license_or_no_assertion_or_none\n "
193        "package_version : PKG_VERSION LINE\n pkg_license_comment : PKG_LICENSE_COMMENT text_or_line\n "
194        "snippet_spdx_id : SNIPPET_SPDX_ID LINE\n snippet_name : SNIPPET_NAME LINE\n "
195        "snippet_comment : SNIPPET_COMMENT text_or_line\n "
196        "snippet_copyright_text : SNIPPET_COPYRIGHT_TEXT line_or_no_assertion_or_none\n "
197        "snippet_license_comment : SNIPPET_LICENSE_COMMENT text_or_line\n "
198        "file_spdx_id : SNIPPET_FILE_SPDXID LINE\n "
199        "snippet_license_concluded : SNIPPET_LICENSE_CONCLUDED license_or_no_assertion_or_none\n "
200        "annotation_spdx_id : ANNOTATION_SPDX_ID LINE\n "
201        "annotation_comment : ANNOTATION_COMMENT text_or_line"
202    )
203    def p_generic_value(self, p):
204        if p[1] in ELEMENT_EXPECTED_START_TAG.values():
205            self.initialize_new_current_element(TAG_DATA_MODEL_FIELD[p[1]][0])
206        if self.check_that_current_element_matches_class_for_value(TAG_DATA_MODEL_FIELD[p[1]][0], p.lineno(1)):
207            set_value(p, self.current_element)

license_name : LICENSE_NAME line_or_no_assertion extracted_text : LICENSE_TEXT text_or_line lic_comment : LICENSE_COMMENT text_or_line license_id : LICENSE_ID LINE file_name : FILE_NAME LINE file_notice : FILE_NOTICE text_or_line file_copyright_text : FILE_COPYRIGHT_TEXT line_or_no_assertion_or_none file_license_comment : FILE_LICENSE_COMMENT text_or_line file_comment : FILE_COMMENT text_or_line file_license_concluded : FILE_LICENSE_CONCLUDED license_or_no_assertion_or_none package_name : PKG_NAME LINE description : PKG_DESCRIPTION text_or_line summary : PKG_SUMMARY text_or_line source_info : PKG_SOURCE_INFO text_or_line homepage : PKG_HOMEPAGE line_or_no_assertion_or_none download_location : PKG_DOWNLOAD_LOCATION line_or_no_assertion_or_none originator : PKG_ORIGINATOR actor_or_no_assertion supplier : PKG_SUPPLIER actor_or_no_assertion pkg_comment : PKG_COMMENT text_or_line pkg_copyright_text : PKG_COPYRIGHT_TEXT line_or_no_assertion_or_none pkg_license_declared : PKG_LICENSE_DECLARED license_or_no_assertion_or_none pkg_file_name : PKG_FILE_NAME LINE pkg_license_concluded : PKG_LICENSE_CONCLUDED license_or_no_assertion_or_none package_version : PKG_VERSION LINE pkg_license_comment : PKG_LICENSE_COMMENT text_or_line snippet_spdx_id : SNIPPET_SPDX_ID LINE snippet_name : SNIPPET_NAME LINE snippet_comment : SNIPPET_COMMENT text_or_line snippet_copyright_text : SNIPPET_COPYRIGHT_TEXT line_or_no_assertion_or_none snippet_license_comment : SNIPPET_LICENSE_COMMENT text_or_line file_spdx_id : SNIPPET_FILE_SPDXID LINE snippet_license_concluded : SNIPPET_LICENSE_CONCLUDED license_or_no_assertion_or_none annotation_spdx_id : ANNOTATION_SPDX_ID LINE annotation_comment : ANNOTATION_COMMENT text_or_line

@grammar_rule('unknown_tag : UNKNOWN_TAG text_or_line\n | UNKNOWN_TAG ISO8601_DATE\n | UNKNOWN_TAG PERSON_VALUE \n| UNKNOWN_TAG')
def p_unknown_tag(self, p):
209    @grammar_rule(
210        "unknown_tag : UNKNOWN_TAG text_or_line\n | UNKNOWN_TAG ISO8601_DATE\n | UNKNOWN_TAG PERSON_VALUE \n"
211        "| UNKNOWN_TAG"
212    )
213    def p_unknown_tag(self, p):
214        self.logger.append(f"Unknown tag provided in line {p.lineno(1)}")

unknown_tag : UNKNOWN_TAG text_or_line | UNKNOWN_TAG ISO8601_DATE | UNKNOWN_TAG PERSON_VALUE | UNKNOWN_TAG

@grammar_rule('text_or_line : TEXT\n line_or_no_assertion_or_none : TEXT')
def p_text(self, p):
216    @grammar_rule("text_or_line : TEXT\n line_or_no_assertion_or_none : TEXT")
217    def p_text(self, p):
218        p[0] = str_from_text(p[1])

text_or_line : TEXT line_or_no_assertion_or_none : TEXT

@grammar_rule('text_or_line : LINE\n line_or_no_assertion : LINE\nline_or_no_assertion_or_none : LINE\ntext_or_line : NO_ASSERTION\n text_or_line : NONE')
def p_line(self, p):
220    @grammar_rule(
221        "text_or_line : LINE\n line_or_no_assertion : LINE\nline_or_no_assertion_or_none : LINE\n"
222        "text_or_line : NO_ASSERTION\n text_or_line : NONE"
223    )
224    def p_line(self, p):
225        p[0] = p[1]

text_or_line : LINE line_or_no_assertion : LINE line_or_no_assertion_or_none : LINE text_or_line : NO_ASSERTION text_or_line : NONE

@grammar_rule('license_or_no_assertion_or_none : NO_ASSERTION\n actor_or_no_assertion : NO_ASSERTION\nline_or_no_assertion : NO_ASSERTION\n line_or_no_assertion_or_none : NO_ASSERTION')
def p_no_assertion(self, p):
227    @grammar_rule(
228        "license_or_no_assertion_or_none : NO_ASSERTION\n actor_or_no_assertion : NO_ASSERTION\n"
229        "line_or_no_assertion : NO_ASSERTION\n line_or_no_assertion_or_none : NO_ASSERTION"
230    )
231    def p_no_assertion(self, p):
232        p[0] = SpdxNoAssertion()

license_or_no_assertion_or_none : NO_ASSERTION actor_or_no_assertion : NO_ASSERTION line_or_no_assertion : NO_ASSERTION line_or_no_assertion_or_none : NO_ASSERTION

@grammar_rule('license_or_no_assertion_or_none : NONE\n line_or_no_assertion_or_none : NONE')
def p_none(self, p):
234    @grammar_rule("license_or_no_assertion_or_none : NONE\n line_or_no_assertion_or_none : NONE")
235    def p_none(self, p):
236        p[0] = SpdxNone()

license_or_no_assertion_or_none : NONE line_or_no_assertion_or_none : NONE

@grammar_rule('license_or_no_assertion_or_none : LINE')
def p_license(self, p):
238    @grammar_rule("license_or_no_assertion_or_none : LINE")
239    def p_license(self, p):
240        try:
241            p[0] = get_spdx_licensing().parse(p[1])
242        except ExpressionError as err:
243            error_message = f"Error while parsing license expression: {p[1]}"
244            if err.args:
245                error_message += f": {err.args[0]}"
246            self.current_element["logger"].append(error_message)

license_or_no_assertion_or_none : LINE

@grammar_rule('actor_or_no_assertion : PERSON_VALUE\n | ORGANIZATION_VALUE')
def p_actor_values(self, p):
248    @grammar_rule("actor_or_no_assertion : PERSON_VALUE\n | ORGANIZATION_VALUE")
249    def p_actor_values(self, p):
250        p[0] = ActorParser.parse_actor(p[1])

actor_or_no_assertion : PERSON_VALUE | ORGANIZATION_VALUE

@grammar_rule('spdx_id : SPDX_ID LINE')
def p_spdx_id(self, p):
252    @grammar_rule("spdx_id : SPDX_ID LINE")
253    def p_spdx_id(self, p):
254        # As all SPDX Ids share the same tag, there is no knowing which spdx_id belongs to the document.
255        # We assume that to be the first spdx_id we encounter. As the specification does not explicitly require this,
256        # our approach might lead to unwanted behavior when the document's SPDX Id is defined later in the document.
257        if "spdx_id" in self.creation_info:
258            self.current_element["spdx_id"] = p[2]
259        else:
260            self.creation_info["spdx_id"] = p[2]

spdx_id : SPDX_ID LINE

@grammar_rule('license_list_version : LICENSE_LIST_VERSION error\n document_comment : DOC_COMMENT error\n document_namespace : DOC_NAMESPACE error\n data_license : DOC_LICENSE error\n doc_name : DOC_NAME error\n ext_doc_ref : EXT_DOC_REF error\n spdx_version : DOC_VERSION error\n creator_comment : CREATOR_COMMENT error\n creator : CREATOR error\n created : CREATED error')
def p_creation_info_value_error(self, p):
264    @grammar_rule(
265        "license_list_version : LICENSE_LIST_VERSION error\n document_comment : DOC_COMMENT error\n "
266        "document_namespace : DOC_NAMESPACE error\n data_license : DOC_LICENSE error\n "
267        "doc_name : DOC_NAME error\n ext_doc_ref : EXT_DOC_REF error\n spdx_version : DOC_VERSION error\n "
268        "creator_comment : CREATOR_COMMENT error\n creator : CREATOR error\n created : CREATED error"
269    )
270    def p_creation_info_value_error(self, p):
271        self.creation_info["logger"].append(
272            f"Error while parsing {p[1]}: Token did not match specified grammar rule. Line: {p.lineno(1)}"
273        )

license_list_version : LICENSE_LIST_VERSION error document_comment : DOC_COMMENT error document_namespace : DOC_NAMESPACE error data_license : DOC_LICENSE error doc_name : DOC_NAME error ext_doc_ref : EXT_DOC_REF error spdx_version : DOC_VERSION error creator_comment : CREATOR_COMMENT error creator : CREATOR error created : CREATED error

@grammar_rule('document_comment : DOC_COMMENT text_or_line\n document_namespace : DOC_NAMESPACE LINE\n data_license : DOC_LICENSE LINE\n spdx_version : DOC_VERSION LINE\n creator_comment : CREATOR_COMMENT text_or_line\n doc_name : DOC_NAME LINE')
def p_generic_value_creation_info(self, p):
275    @grammar_rule(
276        "document_comment : DOC_COMMENT text_or_line\n document_namespace : DOC_NAMESPACE LINE\n "
277        "data_license : DOC_LICENSE LINE\n spdx_version : DOC_VERSION LINE\n "
278        "creator_comment : CREATOR_COMMENT text_or_line\n doc_name : DOC_NAME LINE"
279    )
280    def p_generic_value_creation_info(self, p):
281        set_value(p, self.creation_info)

document_comment : DOC_COMMENT text_or_line document_namespace : DOC_NAMESPACE LINE data_license : DOC_LICENSE LINE spdx_version : DOC_VERSION LINE creator_comment : CREATOR_COMMENT text_or_line doc_name : DOC_NAME LINE

@grammar_rule('license_list_version : LICENSE_LIST_VERSION LINE')
def p_license_list_version(self, p):
283    @grammar_rule("license_list_version : LICENSE_LIST_VERSION LINE")
284    def p_license_list_version(self, p):
285        set_value(p, self.creation_info, method_to_apply=Version.from_string)

license_list_version : LICENSE_LIST_VERSION LINE

@grammar_rule('ext_doc_ref : EXT_DOC_REF LINE')
def p_external_document_ref(self, p):
287    @grammar_rule("ext_doc_ref : EXT_DOC_REF LINE")
288    def p_external_document_ref(self, p):
289        external_doc_ref_regex = re.compile(r"(.*)(\s*SHA1:\s*[a-f0-9]{40})")
290        external_doc_ref_match = external_doc_ref_regex.match(p[2])
291        if not external_doc_ref_match:
292            self.creation_info["logger"].append(
293                f"Error while parsing ExternalDocumentRef: Couldn't match Checksum. Line: {p.lineno(1)}"
294            )
295            return
296        try:
297            document_ref_id, document_uri = external_doc_ref_match.group(1).strip().split(" ")
298        except ValueError:
299            self.creation_info["logger"].append(
300                f"Error while parsing ExternalDocumentRef: Couldn't split the first part of the value into "
301                f"document_ref_id and document_uri. Line: {p.lineno(1)}"
302            )
303            return
304        checksum = parse_checksum(external_doc_ref_match.group(2).strip())
305        external_document_ref = ExternalDocumentRef(document_ref_id, document_uri, checksum)
306        self.creation_info.setdefault("external_document_refs", []).append(external_document_ref)

ext_doc_ref : EXT_DOC_REF LINE

@grammar_rule('creator : CREATOR PERSON_VALUE\n| CREATOR TOOL_VALUE\n| CREATOR ORGANIZATION_VALUE')
def p_creator(self, p):
308    @grammar_rule("creator : CREATOR PERSON_VALUE\n| CREATOR TOOL_VALUE\n| CREATOR ORGANIZATION_VALUE")
309    def p_creator(self, p):
310        self.creation_info.setdefault("creators", []).append(ActorParser.parse_actor(p[2]))

creator : CREATOR PERSON_VALUE | CREATOR TOOL_VALUE | CREATOR ORGANIZATION_VALUE

@grammar_rule('created : CREATED ISO8601_DATE')
def p_created(self, p):
312    @grammar_rule("created : CREATED ISO8601_DATE")
313    def p_created(self, p):
314        set_value(p, self.creation_info, method_to_apply=datetime_from_str)

created : CREATED ISO8601_DATE

@grammar_rule('license_cross_ref : LICENSE_CROSS_REF LINE')
def p_extracted_cross_reference(self, p):
318    @grammar_rule("license_cross_ref : LICENSE_CROSS_REF LINE")
319    def p_extracted_cross_reference(self, p):
320        if self.check_that_current_element_matches_class_for_value(ExtractedLicensingInfo, p.lineno(1)):
321            self.current_element.setdefault("cross_references", []).append(p[2])

license_cross_ref : LICENSE_CROSS_REF LINE

@grammar_rule('file_contributor : FILE_CONTRIBUTOR LINE')
def p_file_contributor(self, p):
325    @grammar_rule("file_contributor : FILE_CONTRIBUTOR LINE")
326    def p_file_contributor(self, p):
327        if self.check_that_current_element_matches_class_for_value(File, p.lineno(1)):
328            self.current_element.setdefault("contributors", []).append(p[2])

file_contributor : FILE_CONTRIBUTOR LINE

@grammar_rule('file_attribution_text : FILE_ATTRIBUTION_TEXT text_or_line')
def p_file_attribution_text(self, p):
330    @grammar_rule("file_attribution_text : FILE_ATTRIBUTION_TEXT text_or_line")
331    def p_file_attribution_text(self, p):
332        if self.check_that_current_element_matches_class_for_value(File, p.lineno(1)):
333            self.current_element.setdefault("attribution_texts", []).append(p[2])

file_attribution_text : FILE_ATTRIBUTION_TEXT text_or_line

@grammar_rule('file_license_info : FILE_LICENSE_INFO license_or_no_assertion_or_none')
def p_file_license_info(self, p):
335    @grammar_rule("file_license_info : FILE_LICENSE_INFO license_or_no_assertion_or_none")
336    def p_file_license_info(self, p):
337        if self.check_that_current_element_matches_class_for_value(File, p.lineno(1)):
338            self.current_element.setdefault("license_info_in_file", []).append(p[2])

file_license_info : FILE_LICENSE_INFO license_or_no_assertion_or_none

@grammar_rule('file_type : FILE_TYPE LINE')
def p_file_type(self, p):
340    @grammar_rule("file_type : FILE_TYPE LINE")
341    def p_file_type(self, p):
342        if not self.check_that_current_element_matches_class_for_value(File, p.lineno(1)):
343            return
344        try:
345            file_type = FileType[p[2].strip()]
346        except KeyError:
347            self.current_element["logger"].append(f"Invalid FileType: {p[2]}. Line {p.lineno(1)}")
348            return
349        self.current_element.setdefault("file_types", []).append(file_type)

file_type : FILE_TYPE LINE

@grammar_rule('file_checksum : FILE_CHECKSUM CHECKSUM')
def p_file_checksum(self, p):
351    @grammar_rule("file_checksum : FILE_CHECKSUM CHECKSUM")
352    def p_file_checksum(self, p):
353        if not self.check_that_current_element_matches_class_for_value(File, p.lineno(1)):
354            return
355        checksum = parse_checksum(p[2])
356        self.current_element.setdefault("checksums", []).append(checksum)

file_checksum : FILE_CHECKSUM CHECKSUM

@grammar_rule('pkg_attribution_text : PKG_ATTRIBUTION_TEXT text_or_line')
def p_pkg_attribution_text(self, p):
360    @grammar_rule("pkg_attribution_text : PKG_ATTRIBUTION_TEXT text_or_line")
361    def p_pkg_attribution_text(self, p):
362        self.check_that_current_element_matches_class_for_value(Package, p.lineno(1))
363        self.current_element.setdefault("attribution_texts", []).append(p[2])

pkg_attribution_text : PKG_ATTRIBUTION_TEXT text_or_line

@grammar_rule('pkg_external_ref : PKG_EXTERNAL_REF LINE PKG_EXTERNAL_REF_COMMENT text_or_line\n | PKG_EXTERNAL_REF LINE')
def p_pkg_external_refs(self, p):
365    @grammar_rule(
366        "pkg_external_ref : PKG_EXTERNAL_REF LINE PKG_EXTERNAL_REF_COMMENT text_or_line\n | PKG_EXTERNAL_REF LINE"
367    )
368    def p_pkg_external_refs(self, p):
369        if not self.check_that_current_element_matches_class_for_value(Package, p.lineno(1)):
370            return
371        try:
372            category, reference_type, locator = p[2].split(" ")
373        except ValueError:
374            self.current_element["logger"].append(
375                f"Couldn't split PackageExternalRef in category, reference_type and locator. Line: {p.lineno(1)}"
376            )
377            return
378        comment = None
379        if len(p) == 5:
380            comment = p[4]
381        try:
382            category = ExternalPackageRefCategory[category.replace("-", "_")]
383        except KeyError:
384            self.current_element["logger"].append(
385                f"Invalid ExternalPackageRefCategory: {category}. Line: {p.lineno(1)}"
386            )
387            return
388        try:
389            external_package_ref = construct_or_raise_parsing_error(
390                ExternalPackageRef,
391                {"category": category, "reference_type": reference_type, "locator": locator, "comment": comment},
392            )
393        except SPDXParsingError as err:
394            self.current_element["logger"].append(err.get_messages())
395            return
396        self.current_element.setdefault("external_references", []).append(external_package_ref)

pkg_external_ref : PKG_EXTERNAL_REF LINE PKG_EXTERNAL_REF_COMMENT text_or_line | PKG_EXTERNAL_REF LINE

@grammar_rule('pkg_license_info : PKG_LICENSE_INFO license_or_no_assertion_or_none')
def p_pkg_license_info_from_file(self, p):
398    @grammar_rule("pkg_license_info : PKG_LICENSE_INFO license_or_no_assertion_or_none")
399    def p_pkg_license_info_from_file(self, p):
400        if self.check_that_current_element_matches_class_for_value(Package, p.lineno(1)):
401            self.current_element.setdefault("license_info_from_files", []).append(p[2])

pkg_license_info : PKG_LICENSE_INFO license_or_no_assertion_or_none

@grammar_rule('pkg_checksum : PKG_CHECKSUM CHECKSUM')
def p_pkg_checksum(self, p):
403    @grammar_rule("pkg_checksum : PKG_CHECKSUM CHECKSUM")
404    def p_pkg_checksum(self, p):
405        if not self.check_that_current_element_matches_class_for_value(Package, p.lineno(1)):
406            return
407        checksum = parse_checksum(p[2])
408        self.current_element.setdefault("checksums", []).append(checksum)

pkg_checksum : PKG_CHECKSUM CHECKSUM

@grammar_rule('verification_code : PKG_VERIFICATION_CODE LINE')
def p_pkg_verification_code(self, p):
410    @grammar_rule("verification_code : PKG_VERIFICATION_CODE LINE")
411    def p_pkg_verification_code(self, p):
412        if not self.check_that_current_element_matches_class_for_value(Package, p.lineno(1)):
413            return
414
415        if "verification_code" in self.current_element:
416            self.current_element["logger"].append(f"Multiple values for {p[1]} found. Line: {p.lineno(1)}")
417            return
418        verif_code_regex = re.compile(r"([0-9a-f]{40})\s*(\(excludes:\s*(.+)\))?", re.UNICODE)
419        verif_code_code_grp = 1
420        verif_code_exc_files_grp = 3
421        match = verif_code_regex.match(p[2])
422        if not match:
423            self.current_element["logger"].append(
424                f"Error while parsing {p[1]}: Value did not match expected format. Line: {p.lineno(1)}"
425            )
426            return
427        value = match.group(verif_code_code_grp)
428        excluded_files = None
429        if match.group(verif_code_exc_files_grp):
430            excluded_files = match.group(verif_code_exc_files_grp).split(",")
431        self.current_element["verification_code"] = PackageVerificationCode(value, excluded_files)

verification_code : PKG_VERIFICATION_CODE LINE

@grammar_rule('files_analyzed : PKG_FILES_ANALYZED LINE')
def p_pkg_files_analyzed(self, p):
433    @grammar_rule("files_analyzed : PKG_FILES_ANALYZED LINE")
434    def p_pkg_files_analyzed(self, p):
435        if not self.check_that_current_element_matches_class_for_value(Package, p.lineno(1)):
436            return
437        if "files_analyzed" in self.current_element:
438            self.current_element["logger"].append(f"Multiple values for {p[1]} found. Line: {p.lineno(1)}")
439            return
440        if p[2] == "true":
441            self.current_element["files_analyzed"] = True
442        elif p[2] == "false":
443            self.current_element["files_analyzed"] = False
444        else:
445            self.current_element["logger"].append(
446                f'The value of FilesAnalyzed must be either "true" or "false", but is: {p[2]}'
447            )

files_analyzed : PKG_FILES_ANALYZED LINE

@grammar_rule('primary_package_purpose : PRIMARY_PACKAGE_PURPOSE LINE')
def p_primary_package_purpose(self, p):
449    @grammar_rule("primary_package_purpose : PRIMARY_PACKAGE_PURPOSE LINE")
450    def p_primary_package_purpose(self, p):
451        if self.check_that_current_element_matches_class_for_value(Package, p.lineno(1)):
452            set_value(p, self.current_element, method_to_apply=lambda x: PackagePurpose[x.replace("-", "_")])

primary_package_purpose : PRIMARY_PACKAGE_PURPOSE LINE

@grammar_rule('built_date : BUILT_DATE ISO8601_DATE\n release_date : RELEASE_DATE ISO8601_DATE\n valid_until_date : VALID_UNTIL_DATE ISO8601_DATE')
def p_package_dates(self, p):
454    @grammar_rule(
455        "built_date : BUILT_DATE ISO8601_DATE\n release_date : RELEASE_DATE ISO8601_DATE\n "
456        "valid_until_date : VALID_UNTIL_DATE ISO8601_DATE"
457    )
458    def p_package_dates(self, p):
459        if self.check_that_current_element_matches_class_for_value(Package, p.lineno(1)):
460            set_value(p, self.current_element, method_to_apply=datetime_from_str)

built_date : BUILT_DATE ISO8601_DATE release_date : RELEASE_DATE ISO8601_DATE valid_until_date : VALID_UNTIL_DATE ISO8601_DATE

@grammar_rule('snippet_attribution_text : SNIPPET_ATTRIBUTION_TEXT text_or_line')
def p_snippet_attribution_text(self, p):
464    @grammar_rule("snippet_attribution_text : SNIPPET_ATTRIBUTION_TEXT text_or_line")
465    def p_snippet_attribution_text(self, p):
466        if self.check_that_current_element_matches_class_for_value(Snippet, p.lineno(1)):
467            self.current_element.setdefault("attribution_texts", []).append(p[2])

snippet_attribution_text : SNIPPET_ATTRIBUTION_TEXT text_or_line

@grammar_rule('snippet_license_info : SNIPPET_LICENSE_INFO license_or_no_assertion_or_none')
def p_snippet_license_info(self, p):
469    @grammar_rule("snippet_license_info : SNIPPET_LICENSE_INFO license_or_no_assertion_or_none")
470    def p_snippet_license_info(self, p):
471        if self.check_that_current_element_matches_class_for_value(Snippet, p.lineno(1)):
472            self.current_element.setdefault("license_info_in_snippet", []).append(p[2])

snippet_license_info : SNIPPET_LICENSE_INFO license_or_no_assertion_or_none

@grammar_rule('snippet_byte_range : SNIPPET_BYTE_RANGE LINE\n snippet_line_range : SNIPPET_LINE_RANGE LINE')
def p_snippet_range(self, p):
474    @grammar_rule("snippet_byte_range : SNIPPET_BYTE_RANGE LINE\n snippet_line_range : SNIPPET_LINE_RANGE LINE")
475    def p_snippet_range(self, p):
476        if not self.check_that_current_element_matches_class_for_value(Snippet, p.lineno(1)):
477            return
478
479        argument_name = TAG_DATA_MODEL_FIELD[p[1]][1]
480        if argument_name in self.current_element:
481            self.current_element["logger"].append(f"Multiple values for {p[1]} found. Line: {p.lineno(1)}")
482            return
483        range_re = re.compile(r"^(\d+):(\d+)$", re.UNICODE)
484        if not range_re.match(p[2].strip()):
485            self.current_element["logger"].append(
486                f"Value for {p[1]} doesn't match valid range pattern. " f"Line: {p.lineno(1)}"
487            )
488            return
489        startpoint = int(p[2].split(":")[0])
490        endpoint = int(p[2].split(":")[-1])
491        self.current_element[argument_name] = startpoint, endpoint

snippet_byte_range : SNIPPET_BYTE_RANGE LINE snippet_line_range : SNIPPET_LINE_RANGE LINE

@grammar_rule('annotator : ANNOTATOR PERSON_VALUE\n| ANNOTATOR TOOL_VALUE\n| ANNOTATOR ORGANIZATION_VALUE')
def p_annotator(self, p):
495    @grammar_rule("annotator : ANNOTATOR PERSON_VALUE\n| ANNOTATOR TOOL_VALUE\n| ANNOTATOR ORGANIZATION_VALUE")
496    def p_annotator(self, p):
497        self.initialize_new_current_element(Annotation)
498        set_value(p, self.current_element, method_to_apply=ActorParser.parse_actor)

annotator : ANNOTATOR PERSON_VALUE | ANNOTATOR TOOL_VALUE | ANNOTATOR ORGANIZATION_VALUE

@grammar_rule('annotation_date : ANNOTATION_DATE ISO8601_DATE')
def p_annotation_date(self, p):
500    @grammar_rule("annotation_date : ANNOTATION_DATE ISO8601_DATE")
501    def p_annotation_date(self, p):
502        if self.check_that_current_element_matches_class_for_value(Annotation, p.lineno(1)):
503            set_value(p, self.current_element, method_to_apply=datetime_from_str)

annotation_date : ANNOTATION_DATE ISO8601_DATE

@grammar_rule('annotation_type : ANNOTATION_TYPE LINE')
def p_annotation_type(self, p):
505    @grammar_rule("annotation_type : ANNOTATION_TYPE LINE")
506    def p_annotation_type(self, p):
507        if self.check_that_current_element_matches_class_for_value(Annotation, p.lineno(1)):
508            set_value(p, self.current_element, method_to_apply=lambda x: AnnotationType[x])

annotation_type : ANNOTATION_TYPE LINE

@grammar_rule('relationship : RELATIONSHIP LINE RELATIONSHIP_COMMENT text_or_line\n | RELATIONSHIP LINE')
def p_relationship(self, p):
512    @grammar_rule("relationship : RELATIONSHIP LINE RELATIONSHIP_COMMENT text_or_line\n " "| RELATIONSHIP LINE")
513    def p_relationship(self, p):
514        self.initialize_new_current_element(Relationship)
515        try:
516            spdx_element_id, relationship_type, related_spdx_element_id = p[2].split(" ")
517        except ValueError:
518            self.current_element["logger"].append(
519                f"Relationship couldn't be split in spdx_element_id, relationship_type and "
520                f"related_spdx_element. Line: {p.lineno(1)}"
521            )
522            return
523        try:
524            self.current_element["relationship_type"] = RelationshipType[relationship_type]
525        except KeyError:
526            self.current_element["logger"].append(f"Invalid RelationshipType {relationship_type}. Line: {p.lineno(1)}")
527        if related_spdx_element_id == "NONE":
528            related_spdx_element_id = SpdxNone()
529        if related_spdx_element_id == "NOASSERTION":
530            related_spdx_element_id = SpdxNoAssertion()
531        self.current_element["related_spdx_element_id"] = related_spdx_element_id
532        self.current_element["spdx_element_id"] = spdx_element_id
533        if len(p) == 5:
534            self.current_element["comment"] = p[4]

relationship : RELATIONSHIP LINE RELATIONSHIP_COMMENT text_or_line | RELATIONSHIP LINE

def p_error(self, p):
536    def p_error(self, p):
537        pass
def parse(self, text):
539    def parse(self, text):
540        # entry point for the tag-value parser
541        self.yacc.parse(text, lexer=self.lex)
542        # this constructs the last remaining element; all other elements are constructed at the start of
543        # their subsequent element
544        self.construct_current_element()
545
546        # To be able to parse creation info values if they appear in between other elements, e.g. packages, we use
547        # two different dictionaries to collect the creation info and all other elements. Therefore, we have a separate
548        # logger for the creation info whose messages we need to add to the main logger to than raise all collected
549        # messages at once.
550        creation_info_logger = self.creation_info.pop("logger")
551        if creation_info_logger.has_messages():
552            self.logger.extend([f"Error while parsing CreationInfo: {creation_info_logger.get_messages()}"])
553
554        raise_parsing_error_if_logger_has_messages(self.logger)
555        creation_info = construct_or_raise_parsing_error(CreationInfo, self.creation_info)
556        self.elements_built["creation_info"] = creation_info
557        document = construct_or_raise_parsing_error(Document, self.elements_built)
558        return document
def initialize_new_current_element(self, clazz: Any):
560    def initialize_new_current_element(self, clazz: Any):
561        self.construct_current_element()
562        self.current_element["class"] = clazz
def check_that_current_element_matches_class_for_value(self, expected_class, line_number) -> bool:
564    def check_that_current_element_matches_class_for_value(self, expected_class, line_number) -> bool:
565        if "class" not in self.current_element or expected_class != self.current_element["class"]:
566            self.logger.append(
567                f"Element {expected_class.__name__} is not the current element in scope, probably the expected tag to "
568                f"start the element ({ELEMENT_EXPECTED_START_TAG[expected_class.__name__]}) is missing. "
569                f"Line: {line_number}"
570            )
571            return False
572        return True
def construct_current_element(self):
574    def construct_current_element(self):
575        if "class" not in self.current_element:
576            # This happens when the first element is initialized via initialize_new_current_element() or if the first
577            # element is missing its expected starting tag. In both cases we are unable to construct an element.
578            return
579
580        clazz = self.current_element.pop("class")
581        try:
582            raise_parsing_error_if_logger_has_messages(self.current_element.pop("logger"), clazz.__name__)
583            self.elements_built.setdefault(CLASS_MAPPING[clazz.__name__], []).append(
584                construct_or_raise_parsing_error(clazz, self.current_element)
585            )
586            if clazz == File:
587                self.check_for_preceding_package_and_build_contains_relationship()
588        except SPDXParsingError as err:
589            self.logger.extend(err.get_messages())
590        self.current_element = {"logger": Logger()}
def check_for_preceding_package_and_build_contains_relationship(self):
592    def check_for_preceding_package_and_build_contains_relationship(self):
593        file_spdx_id = self.current_element["spdx_id"]
594        if "packages" not in self.elements_built:
595            return
596        # We assume that all files that are not contained in a package precede any package information. Any file
597        # information that follows any package information is assigned to the last parsed package by creating a
598        # corresponding contains relationship.
599        # (see https://spdx.github.io/spdx-spec/v2.3/composition-of-an-SPDX-document/#5.2.2)
600        if not self.elements_built["packages"]:
601            self.logger.append(
602                f"Error while building contains relationship for file {file_spdx_id}, "
603                f"preceding package was not parsed successfully."
604            )
605            return
606        package_spdx_id = self.elements_built["packages"][-1].spdx_id
607        relationship = Relationship(package_spdx_id, RelationshipType.CONTAINS, file_spdx_id)
608        if relationship not in self.elements_built.setdefault("relationships", []):
609            self.elements_built["relationships"].append(relationship)