spdx_tools.spdx.parser.tagvalue.parser

  1# Copyright (c) 2014 Ahmed H. Ismail
  2# Copyright (c) 2023 spdx contributors
  3# SPDX-License-Identifier: Apache-2.0
  4# Licensed under the Apache License, Version 2.0 (the "License");
  5# you may not use this file except in compliance with the License.
  6# You may obtain a copy of the License at
  7#     http://www.apache.org/licenses/LICENSE-2.0
  8# Unless required by applicable law or agreed to in writing, software
  9# distributed under the License is distributed on an "AS IS" BASIS,
 10# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 11# See the License for the specific language governing permissions and
 12# limitations under the License.
 13
 14import re
 15
 16from beartype.typing import Any, Dict, List
 17from license_expression import ExpressionError, get_spdx_licensing
 18from ply import yacc
 19from ply.yacc import LRParser
 20
 21from spdx_tools.spdx.datetime_conversions import datetime_from_str
 22from spdx_tools.spdx.model import (
 23    Annotation,
 24    AnnotationType,
 25    CreationInfo,
 26    Document,
 27    ExternalDocumentRef,
 28    ExternalPackageRef,
 29    ExternalPackageRefCategory,
 30    ExtractedLicensingInfo,
 31    File,
 32    FileType,
 33    Package,
 34    PackagePurpose,
 35    PackageVerificationCode,
 36    Relationship,
 37    RelationshipType,
 38    Snippet,
 39    SpdxNoAssertion,
 40    SpdxNone,
 41    Version,
 42)
 43from spdx_tools.spdx.parser.actor_parser import ActorParser
 44from spdx_tools.spdx.parser.error import SPDXParsingError
 45from spdx_tools.spdx.parser.logger import Logger
 46from spdx_tools.spdx.parser.parsing_functions import (
 47    construct_or_raise_parsing_error,
 48    raise_parsing_error_if_logger_has_messages,
 49)
 50from spdx_tools.spdx.parser.tagvalue.helper_methods import (
 51    TAG_DATA_MODEL_FIELD,
 52    grammar_rule,
 53    parse_checksum,
 54    set_value,
 55    str_from_text,
 56)
 57from spdx_tools.spdx.parser.tagvalue.lexer import SPDXLexer
 58
 59CLASS_MAPPING = dict(
 60    File="files",
 61    Annotation="annotations",
 62    Relationship="relationships",
 63    Snippet="snippets",
 64    Package="packages",
 65    ExtractedLicensingInfo="extracted_licensing_info",
 66)
 67ELEMENT_EXPECTED_START_TAG = dict(
 68    File="FileName",
 69    Annotation="Annotator",
 70    Relationship="Relationship",
 71    Snippet="SnippetSPDXID",
 72    Package="PackageName",
 73    ExtractedLicensingInfo="LicenseID",
 74)
 75
 76
 77class Parser:
 78    tokens: List[str]
 79    logger: Logger
 80    current_element: Dict[str, Any]
 81    creation_info: Dict[str, Any]
 82    elements_built: Dict[str, Any]
 83    lex: SPDXLexer
 84    yacc: LRParser
 85
 86    def __init__(self, **kwargs):
 87        self.tokens = SPDXLexer.tokens
 88        self.logger = Logger()
 89        self.current_element = {"logger": Logger()}
 90        self.creation_info = {"logger": Logger()}
 91        self.elements_built = dict()
 92        self.lex = SPDXLexer()
 93        self.lex.build(reflags=re.UNICODE)
 94        self.yacc = yacc.yacc(module=self, **kwargs)
 95
 96    @grammar_rule("start : start attrib ")
 97    def p_start_start_attrib(self, p):
 98        pass
 99
100    @grammar_rule("start : attrib ")
101    def p_start_attrib(self, p):
102        pass
103
104    @grammar_rule(
105        "attrib : spdx_version\n| spdx_id\n| data_license\n| doc_name\n| document_comment\n| document_namespace\n| "
106        "creator\n| created\n| creator_comment\n| license_list_version\n| ext_doc_ref\n"
107        # attributes for file
108        "| file_name\n| file_type\n| file_checksum\n| file_license_concluded\n| file_license_info\n"
109        "| file_copyright_text\n| file_license_comment\n| file_attribution_text\n| file_notice\n| file_comment\n"
110        "| file_contributor\n"
111        # attributes for annotation
112        "| annotator\n| annotation_date\n| annotation_comment\n| annotation_type\n| annotation_spdx_id\n"
113        # attributes for relationship
114        "| relationship\n"
115        # attributes for snippet
116        "| snippet_spdx_id\n| snippet_name\n| snippet_comment\n| snippet_attribution_text\n| snippet_copyright_text\n"
117        "| snippet_license_comment\n| file_spdx_id\n| snippet_license_concluded\n| snippet_license_info\n"
118        "| snippet_byte_range\n| snippet_line_range\n"
119        # attributes for package
120        "| package_name\n| package_version\n| download_location\n| files_analyzed\n| homepage\n"
121        "| summary\n| source_info\n| pkg_file_name\n| supplier\n| originator\n| pkg_checksum\n"
122        "| verification_code\n| description\n| pkg_comment\n| pkg_attribution_text\n| pkg_license_declared\n"
123        "| pkg_license_concluded\n| pkg_license_info\n| pkg_license_comment\n| pkg_copyright_text\n"
124        "| pkg_external_ref\n| primary_package_purpose\n| built_date\n| release_date\n| valid_until_date\n"
125        # attributes for extracted licensing info
126        "| license_id\n| extracted_text\n| license_name\n| license_cross_ref\n| lic_comment\n"
127        "| unknown_tag "
128    )
129    def p_attrib(self, p):
130        pass
131
132    # general parsing methods
133    @grammar_rule(
134        "license_id : LICENSE_ID error\n license_cross_ref : LICENSE_CROSS_REF error\n "
135        "lic_comment : LICENSE_COMMENT error\n license_name : LICENSE_NAME error\n "
136        "extracted_text : LICENSE_TEXT error\n "
137        "file_name : FILE_NAME error\n file_contributor : FILE_CONTRIBUTOR error\n "
138        "file_notice : FILE_NOTICE error\n file_copyright_text : FILE_COPYRIGHT_TEXT error\n "
139        "file_license_comment : FILE_LICENSE_COMMENT error\n "
140        "file_license_info : FILE_LICENSE_INFO error\n file_comment : FILE_COMMENT error\n "
141        "file_checksum : FILE_CHECKSUM error\n file_license_concluded : FILE_LICENSE_CONCLUDED error\n "
142        "file_type : FILE_TYPE error\n file_attribution_text : FILE_ATTRIBUTION_TEXT error\n "
143        "package_name : PKG_NAME error\n pkg_attribution_text : PKG_ATTRIBUTION_TEXT error\n "
144        "description : PKG_DESCRIPTION error\n pkg_comment : PKG_COMMENT error\n "
145        "summary : PKG_SUMMARY error\n pkg_copyright_text : PKG_COPYRIGHT_TEXT error\n "
146        "pkg_external_ref : PKG_EXTERNAL_REF error\n pkg_license_comment : PKG_LICENSE_COMMENT error\n "
147        "pkg_license_declared : PKG_LICENSE_DECLARED error\n pkg_license_info : PKG_LICENSE_INFO error \n "
148        "pkg_license_concluded : PKG_LICENSE_CONCLUDED error\n source_info : PKG_SOURCE_INFO error\n "
149        "homepage : PKG_HOMEPAGE error\n pkg_checksum : PKG_CHECKSUM error\n "
150        "verification_code : PKG_VERIFICATION_CODE error\n originator : PKG_ORIGINATOR error\n "
151        "download_location : PKG_DOWNLOAD_LOCATION error\n files_analyzed : PKG_FILES_ANALYZED error\n "
152        "supplier : PKG_SUPPLIER error\n pkg_file_name : PKG_FILE_NAME error\n "
153        "package_version : PKG_VERSION error\n primary_package_purpose : PRIMARY_PACKAGE_PURPOSE error\n "
154        "built_date : BUILT_DATE error\n release_date : RELEASE_DATE error\n "
155        "valid_until_date : VALID_UNTIL_DATE error\n snippet_spdx_id : SNIPPET_SPDX_ID error\n "
156        "snippet_name : SNIPPET_NAME error\n snippet_comment : SNIPPET_COMMENT error\n "
157        "snippet_attribution_text : SNIPPET_ATTRIBUTION_TEXT error\n "
158        "snippet_copyright_text : SNIPPET_COPYRIGHT_TEXT error\n "
159        "snippet_license_comment : SNIPPET_LICENSE_COMMENT error\n file_spdx_id : SNIPPET_FILE_SPDXID error\n "
160        "snippet_license_concluded : SNIPPET_LICENSE_CONCLUDED error\n "
161        "snippet_license_info : SNIPPET_LICENSE_INFO error\n "
162        "snippet_byte_range : SNIPPET_BYTE_RANGE error\n snippet_line_range : SNIPPET_LINE_RANGE error\n "
163        "annotator : ANNOTATOR error\n annotation_date : ANNOTATION_DATE error\n "
164        "annotation_comment : ANNOTATION_COMMENT error\n annotation_type : ANNOTATION_TYPE error\n "
165        "annotation_spdx_id : ANNOTATION_SPDX_ID error\n relationship : RELATIONSHIP error"
166    )
167    def p_current_element_error(self, p):
168        if p[1] in ELEMENT_EXPECTED_START_TAG.values():
169            self.initialize_new_current_element(TAG_DATA_MODEL_FIELD[p[1]][0])
170        self.current_element["logger"].append(
171            f"Error while parsing {p[1]}: Token did not match specified grammar rule. Line: {p.lineno(1)}"
172        )
173
174    @grammar_rule(
175        "license_name : LICENSE_NAME line_or_no_assertion\n extracted_text : LICENSE_TEXT text_or_line\n "
176        "lic_comment : LICENSE_COMMENT text_or_line\n license_id : LICENSE_ID LINE\n "
177        "file_name : FILE_NAME LINE \n file_notice : FILE_NOTICE text_or_line\n "
178        "file_copyright_text : FILE_COPYRIGHT_TEXT line_or_no_assertion_or_none\n "
179        "file_license_comment : FILE_LICENSE_COMMENT text_or_line\n "
180        "file_comment : FILE_COMMENT text_or_line\n "
181        "file_license_concluded : FILE_LICENSE_CONCLUDED license_or_no_assertion_or_none\n "
182        "package_name : PKG_NAME LINE\n description : PKG_DESCRIPTION text_or_line\n "
183        "summary : PKG_SUMMARY text_or_line\n source_info : PKG_SOURCE_INFO text_or_line\n "
184        "homepage : PKG_HOMEPAGE line_or_no_assertion_or_none\n "
185        "download_location : PKG_DOWNLOAD_LOCATION line_or_no_assertion_or_none\n "
186        "originator : PKG_ORIGINATOR actor_or_no_assertion\n supplier : PKG_SUPPLIER actor_or_no_assertion\n "
187        "pkg_comment : PKG_COMMENT text_or_line\n "
188        "pkg_copyright_text : PKG_COPYRIGHT_TEXT line_or_no_assertion_or_none\n "
189        "pkg_license_declared : PKG_LICENSE_DECLARED license_or_no_assertion_or_none\n "
190        "pkg_file_name : PKG_FILE_NAME LINE\n "
191        "pkg_license_concluded : PKG_LICENSE_CONCLUDED license_or_no_assertion_or_none\n "
192        "package_version : PKG_VERSION LINE\n pkg_license_comment : PKG_LICENSE_COMMENT text_or_line\n "
193        "snippet_spdx_id : SNIPPET_SPDX_ID LINE\n snippet_name : SNIPPET_NAME LINE\n "
194        "snippet_comment : SNIPPET_COMMENT text_or_line\n "
195        "snippet_copyright_text : SNIPPET_COPYRIGHT_TEXT line_or_no_assertion_or_none\n "
196        "snippet_license_comment : SNIPPET_LICENSE_COMMENT text_or_line\n "
197        "file_spdx_id : SNIPPET_FILE_SPDXID LINE\n "
198        "snippet_license_concluded : SNIPPET_LICENSE_CONCLUDED license_or_no_assertion_or_none\n "
199        "annotation_spdx_id : ANNOTATION_SPDX_ID LINE\n "
200        "annotation_comment : ANNOTATION_COMMENT text_or_line"
201    )
202    def p_generic_value(self, p):
203        if p[1] in ELEMENT_EXPECTED_START_TAG.values():
204            self.initialize_new_current_element(TAG_DATA_MODEL_FIELD[p[1]][0])
205        if self.check_that_current_element_matches_class_for_value(TAG_DATA_MODEL_FIELD[p[1]][0], p.lineno(1)):
206            set_value(p, self.current_element)
207
208    @grammar_rule(
209        "unknown_tag : UNKNOWN_TAG text_or_line\n | UNKNOWN_TAG ISO8601_DATE\n | UNKNOWN_TAG PERSON_VALUE \n"
210        "| UNKNOWN_TAG"
211    )
212    def p_unknown_tag(self, p):
213        self.logger.append(f"Unknown tag provided in line {p.lineno(1)}")
214
215    @grammar_rule("text_or_line : TEXT")
216    def p_text(self, p):
217        p[0] = str_from_text(p[1])
218
219    @grammar_rule("text_or_line : LINE\n line_or_no_assertion : LINE\nline_or_no_assertion_or_none : text_or_line")
220    def p_line(self, p):
221        p[0] = p[1]
222
223    @grammar_rule(
224        "license_or_no_assertion_or_none : NO_ASSERTION\n actor_or_no_assertion : NO_ASSERTION\n"
225        "line_or_no_assertion : NO_ASSERTION\n line_or_no_assertion_or_none : NO_ASSERTION"
226    )
227    def p_no_assertion(self, p):
228        p[0] = SpdxNoAssertion()
229
230    @grammar_rule("license_or_no_assertion_or_none : NONE\n line_or_no_assertion_or_none : NONE")
231    def p_none(self, p):
232        p[0] = SpdxNone()
233
234    @grammar_rule("license_or_no_assertion_or_none : LINE")
235    def p_license(self, p):
236        try:
237            p[0] = get_spdx_licensing().parse(p[1])
238        except ExpressionError as err:
239            error_message = f"Error while parsing license expression: {p[1]}"
240            if err.args:
241                error_message += f": {err.args[0]}"
242            self.current_element["logger"].append(error_message)
243
244    @grammar_rule("actor_or_no_assertion : PERSON_VALUE\n | ORGANIZATION_VALUE")
245    def p_actor_values(self, p):
246        p[0] = ActorParser.parse_actor(p[1])
247
248    @grammar_rule("spdx_id : SPDX_ID LINE")
249    def p_spdx_id(self, p):
250        # As all SPDX Ids share the same tag, there is no knowing which spdx_id belongs to the document.
251        # We assume that to be the first spdx_id we encounter. As the specification does not explicitly require this,
252        # our approach might lead to unwanted behavior when the document's SPDX Id is defined later in the document.
253        if "spdx_id" in self.creation_info:
254            self.current_element["spdx_id"] = p[2]
255        else:
256            self.creation_info["spdx_id"] = p[2]
257
258    # parsing methods for creation info / document level
259
260    @grammar_rule(
261        "license_list_version : LICENSE_LIST_VERSION error\n document_comment : DOC_COMMENT error\n "
262        "document_namespace : DOC_NAMESPACE error\n data_license : DOC_LICENSE error\n "
263        "doc_name : DOC_NAME error\n ext_doc_ref : EXT_DOC_REF error\n spdx_version : DOC_VERSION error\n "
264        "creator_comment : CREATOR_COMMENT error\n creator : CREATOR error\n created : CREATED error"
265    )
266    def p_creation_info_value_error(self, p):
267        self.creation_info["logger"].append(
268            f"Error while parsing {p[1]}: Token did not match specified grammar rule. Line: {p.lineno(1)}"
269        )
270
271    @grammar_rule(
272        "document_comment : DOC_COMMENT text_or_line\n document_namespace : DOC_NAMESPACE LINE\n "
273        "data_license : DOC_LICENSE LINE\n spdx_version : DOC_VERSION LINE\n "
274        "creator_comment : CREATOR_COMMENT text_or_line\n doc_name : DOC_NAME LINE"
275    )
276    def p_generic_value_creation_info(self, p):
277        set_value(p, self.creation_info)
278
279    @grammar_rule("license_list_version : LICENSE_LIST_VERSION LINE")
280    def p_license_list_version(self, p):
281        set_value(p, self.creation_info, method_to_apply=Version.from_string)
282
283    @grammar_rule("ext_doc_ref : EXT_DOC_REF LINE")
284    def p_external_document_ref(self, p):
285        external_doc_ref_regex = re.compile(r"(.*)(\s*SHA1:\s*[a-f0-9]{40})")
286        external_doc_ref_match = external_doc_ref_regex.match(p[2])
287        if not external_doc_ref_match:
288            self.creation_info["logger"].append(
289                f"Error while parsing ExternalDocumentRef: Couldn't match Checksum. Line: {p.lineno(1)}"
290            )
291            return
292        try:
293            document_ref_id, document_uri = external_doc_ref_match.group(1).strip().split(" ")
294        except ValueError:
295            self.creation_info["logger"].append(
296                f"Error while parsing ExternalDocumentRef: Couldn't split the first part of the value into "
297                f"document_ref_id and document_uri. Line: {p.lineno(1)}"
298            )
299            return
300        checksum = parse_checksum(external_doc_ref_match.group(2).strip())
301        external_document_ref = ExternalDocumentRef(document_ref_id, document_uri, checksum)
302        self.creation_info.setdefault("external_document_refs", []).append(external_document_ref)
303
304    @grammar_rule("creator : CREATOR PERSON_VALUE\n| CREATOR TOOL_VALUE\n| CREATOR ORGANIZATION_VALUE")
305    def p_creator(self, p):
306        self.creation_info.setdefault("creators", []).append(ActorParser.parse_actor(p[2]))
307
308    @grammar_rule("created : CREATED ISO8601_DATE")
309    def p_created(self, p):
310        set_value(p, self.creation_info, method_to_apply=datetime_from_str)
311
312    # parsing methods for extracted licensing info
313
314    @grammar_rule("license_cross_ref : LICENSE_CROSS_REF LINE")
315    def p_extracted_cross_reference(self, p):
316        if self.check_that_current_element_matches_class_for_value(ExtractedLicensingInfo, p.lineno(1)):
317            self.current_element.setdefault("cross_references", []).append(p[2])
318
319    # parsing methods for file
320
321    @grammar_rule("file_contributor : FILE_CONTRIBUTOR LINE")
322    def p_file_contributor(self, p):
323        if self.check_that_current_element_matches_class_for_value(File, p.lineno(1)):
324            self.current_element.setdefault("contributors", []).append(p[2])
325
326    @grammar_rule("file_attribution_text : FILE_ATTRIBUTION_TEXT text_or_line")
327    def p_file_attribution_text(self, p):
328        if self.check_that_current_element_matches_class_for_value(File, p.lineno(1)):
329            self.current_element.setdefault("attribution_texts", []).append(p[2])
330
331    @grammar_rule("file_license_info : FILE_LICENSE_INFO license_or_no_assertion_or_none")
332    def p_file_license_info(self, p):
333        if self.check_that_current_element_matches_class_for_value(File, p.lineno(1)):
334            self.current_element.setdefault("license_info_in_file", []).append(p[2])
335
336    @grammar_rule("file_type : FILE_TYPE LINE")
337    def p_file_type(self, p):
338        if not self.check_that_current_element_matches_class_for_value(File, p.lineno(1)):
339            return
340        try:
341            file_type = FileType[p[2].strip()]
342        except KeyError:
343            self.current_element["logger"].append(f"Invalid FileType: {p[2]}. Line {p.lineno(1)}")
344            return
345        self.current_element.setdefault("file_types", []).append(file_type)
346
347    @grammar_rule("file_checksum : FILE_CHECKSUM CHECKSUM")
348    def p_file_checksum(self, p):
349        if not self.check_that_current_element_matches_class_for_value(File, p.lineno(1)):
350            return
351        checksum = parse_checksum(p[2])
352        self.current_element.setdefault("checksums", []).append(checksum)
353
354    # parsing methods for package
355
356    @grammar_rule("pkg_attribution_text : PKG_ATTRIBUTION_TEXT text_or_line")
357    def p_pkg_attribution_text(self, p):
358        self.check_that_current_element_matches_class_for_value(Package, p.lineno(1))
359        self.current_element.setdefault("attribution_texts", []).append(p[2])
360
361    @grammar_rule(
362        "pkg_external_ref : PKG_EXTERNAL_REF LINE PKG_EXTERNAL_REF_COMMENT text_or_line\n | PKG_EXTERNAL_REF LINE"
363    )
364    def p_pkg_external_refs(self, p):
365        if not self.check_that_current_element_matches_class_for_value(Package, p.lineno(1)):
366            return
367        try:
368            category, reference_type, locator = p[2].split(" ")
369        except ValueError:
370            self.current_element["logger"].append(
371                f"Couldn't split PackageExternalRef in category, reference_type and locator. Line: {p.lineno(1)}"
372            )
373            return
374        comment = None
375        if len(p) == 5:
376            comment = p[4]
377        try:
378            category = ExternalPackageRefCategory[category.replace("-", "_")]
379        except KeyError:
380            self.current_element["logger"].append(
381                f"Invalid ExternalPackageRefCategory: {category}. Line: {p.lineno(1)}"
382            )
383            return
384        try:
385            external_package_ref = construct_or_raise_parsing_error(
386                ExternalPackageRef,
387                {"category": category, "reference_type": reference_type, "locator": locator, "comment": comment},
388            )
389        except SPDXParsingError as err:
390            self.current_element["logger"].append(err.get_messages())
391            return
392        self.current_element.setdefault("external_references", []).append(external_package_ref)
393
394    @grammar_rule("pkg_license_info : PKG_LICENSE_INFO license_or_no_assertion_or_none")
395    def p_pkg_license_info_from_file(self, p):
396        if self.check_that_current_element_matches_class_for_value(Package, p.lineno(1)):
397            self.current_element.setdefault("license_info_from_files", []).append(p[2])
398
399    @grammar_rule("pkg_checksum : PKG_CHECKSUM CHECKSUM")
400    def p_pkg_checksum(self, p):
401        if not self.check_that_current_element_matches_class_for_value(Package, p.lineno(1)):
402            return
403        checksum = parse_checksum(p[2])
404        self.current_element.setdefault("checksums", []).append(checksum)
405
406    @grammar_rule("verification_code : PKG_VERIFICATION_CODE LINE")
407    def p_pkg_verification_code(self, p):
408        if not self.check_that_current_element_matches_class_for_value(Package, p.lineno(1)):
409            return
410
411        if "verification_code" in self.current_element:
412            self.current_element["logger"].append(f"Multiple values for {p[1]} found. Line: {p.lineno(1)}")
413            return
414        verif_code_regex = re.compile(r"([0-9a-f]{40})\s*(\(excludes:\s*(.+)\))?", re.UNICODE)
415        verif_code_code_grp = 1
416        verif_code_exc_files_grp = 3
417        match = verif_code_regex.match(p[2])
418        if not match:
419            self.current_element["logger"].append(
420                f"Error while parsing {p[1]}: Value did not match expected format. Line: {p.lineno(1)}"
421            )
422            return
423        value = match.group(verif_code_code_grp)
424        excluded_files = None
425        if match.group(verif_code_exc_files_grp):
426            excluded_files = match.group(verif_code_exc_files_grp).split(",")
427        self.current_element["verification_code"] = PackageVerificationCode(value, excluded_files)
428
429    @grammar_rule("files_analyzed : PKG_FILES_ANALYZED LINE")
430    def p_pkg_files_analyzed(self, p):
431        if not self.check_that_current_element_matches_class_for_value(Package, p.lineno(1)):
432            return
433        if "files_analyzed" in self.current_element:
434            self.current_element["logger"].append(f"Multiple values for {p[1]} found. Line: {p.lineno(1)}")
435            return
436        if p[2] == "true":
437            self.current_element["files_analyzed"] = True
438        elif p[2] == "false":
439            self.current_element["files_analyzed"] = False
440        else:
441            self.current_element["logger"].append(
442                f'The value of FilesAnalyzed must be either "true" or "false", but is: {p[2]}'
443            )
444
445    @grammar_rule("primary_package_purpose : PRIMARY_PACKAGE_PURPOSE LINE")
446    def p_primary_package_purpose(self, p):
447        if self.check_that_current_element_matches_class_for_value(Package, p.lineno(1)):
448            set_value(p, self.current_element, method_to_apply=lambda x: PackagePurpose[x.replace("-", "_")])
449
450    @grammar_rule(
451        "built_date : BUILT_DATE ISO8601_DATE\n release_date : RELEASE_DATE ISO8601_DATE\n "
452        "valid_until_date : VALID_UNTIL_DATE ISO8601_DATE"
453    )
454    def p_package_dates(self, p):
455        if self.check_that_current_element_matches_class_for_value(Package, p.lineno(1)):
456            set_value(p, self.current_element, method_to_apply=datetime_from_str)
457
458    # parsing methods for snippet
459
460    @grammar_rule("snippet_attribution_text : SNIPPET_ATTRIBUTION_TEXT text_or_line")
461    def p_snippet_attribution_text(self, p):
462        if self.check_that_current_element_matches_class_for_value(Snippet, p.lineno(1)):
463            self.current_element.setdefault("attribution_texts", []).append(p[2])
464
465    @grammar_rule("snippet_license_info : SNIPPET_LICENSE_INFO license_or_no_assertion_or_none")
466    def p_snippet_license_info(self, p):
467        if self.check_that_current_element_matches_class_for_value(Snippet, p.lineno(1)):
468            self.current_element.setdefault("license_info_in_snippet", []).append(p[2])
469
470    @grammar_rule("snippet_byte_range : SNIPPET_BYTE_RANGE LINE\n snippet_line_range : SNIPPET_LINE_RANGE LINE")
471    def p_snippet_range(self, p):
472        if not self.check_that_current_element_matches_class_for_value(Snippet, p.lineno(1)):
473            return
474
475        argument_name = TAG_DATA_MODEL_FIELD[p[1]][1]
476        if argument_name in self.current_element:
477            self.current_element["logger"].append(f"Multiple values for {p[1]} found. Line: {p.lineno(1)}")
478            return
479        range_re = re.compile(r"^(\d+):(\d+)$", re.UNICODE)
480        if not range_re.match(p[2].strip()):
481            self.current_element["logger"].append(
482                f"Value for {p[1]} doesn't match valid range pattern. " f"Line: {p.lineno(1)}"
483            )
484            return
485        startpoint = int(p[2].split(":")[0])
486        endpoint = int(p[2].split(":")[-1])
487        self.current_element[argument_name] = startpoint, endpoint
488
489    # parsing methods for annotation
490
491    @grammar_rule("annotator : ANNOTATOR PERSON_VALUE\n| ANNOTATOR TOOL_VALUE\n| ANNOTATOR ORGANIZATION_VALUE")
492    def p_annotator(self, p):
493        self.initialize_new_current_element(Annotation)
494        set_value(p, self.current_element, method_to_apply=ActorParser.parse_actor)
495
496    @grammar_rule("annotation_date : ANNOTATION_DATE ISO8601_DATE")
497    def p_annotation_date(self, p):
498        if self.check_that_current_element_matches_class_for_value(Annotation, p.lineno(1)):
499            set_value(p, self.current_element, method_to_apply=datetime_from_str)
500
501    @grammar_rule("annotation_type : ANNOTATION_TYPE LINE")
502    def p_annotation_type(self, p):
503        if self.check_that_current_element_matches_class_for_value(Annotation, p.lineno(1)):
504            set_value(p, self.current_element, method_to_apply=lambda x: AnnotationType[x])
505
506    # parsing methods for relationship
507
508    @grammar_rule("relationship : RELATIONSHIP LINE RELATIONSHIP_COMMENT text_or_line\n " "| RELATIONSHIP LINE")
509    def p_relationship(self, p):
510        self.initialize_new_current_element(Relationship)
511        try:
512            spdx_element_id, relationship_type, related_spdx_element_id = p[2].split(" ")
513        except ValueError:
514            self.current_element["logger"].append(
515                f"Relationship couldn't be split in spdx_element_id, relationship_type and "
516                f"related_spdx_element. Line: {p.lineno(1)}"
517            )
518            return
519        try:
520            self.current_element["relationship_type"] = RelationshipType[relationship_type]
521        except KeyError:
522            self.current_element["logger"].append(f"Invalid RelationshipType {relationship_type}. Line: {p.lineno(1)}")
523        if related_spdx_element_id == "NONE":
524            related_spdx_element_id = SpdxNone()
525        if related_spdx_element_id == "NOASSERTION":
526            related_spdx_element_id = SpdxNoAssertion()
527        self.current_element["related_spdx_element_id"] = related_spdx_element_id
528        self.current_element["spdx_element_id"] = spdx_element_id
529        if len(p) == 5:
530            self.current_element["comment"] = p[4]
531
532    def p_error(self, p):
533        pass
534
535    def parse(self, text):
536        # entry point for the tag-value parser
537        self.yacc.parse(text, lexer=self.lex)
538        # this constructs the last remaining element; all other elements are constructed at the start of
539        # their subsequent element
540        self.construct_current_element()
541
542        # To be able to parse creation info values if they appear in between other elements, e.g. packages, we use
543        # two different dictionaries to collect the creation info and all other elements. Therefore, we have a separate
544        # logger for the creation info whose messages we need to add to the main logger to than raise all collected
545        # messages at once.
546        creation_info_logger = self.creation_info.pop("logger")
547        if creation_info_logger.has_messages():
548            self.logger.extend([f"Error while parsing CreationInfo: {creation_info_logger.get_messages()}"])
549
550        raise_parsing_error_if_logger_has_messages(self.logger)
551        creation_info = construct_or_raise_parsing_error(CreationInfo, self.creation_info)
552        self.elements_built["creation_info"] = creation_info
553        document = construct_or_raise_parsing_error(Document, self.elements_built)
554        return document
555
556    def initialize_new_current_element(self, clazz: Any):
557        self.construct_current_element()
558        self.current_element["class"] = clazz
559
560    def check_that_current_element_matches_class_for_value(self, expected_class, line_number) -> bool:
561        if "class" not in self.current_element or expected_class != self.current_element["class"]:
562            self.logger.append(
563                f"Element {expected_class.__name__} is not the current element in scope, probably the expected tag to "
564                f"start the element ({ELEMENT_EXPECTED_START_TAG[expected_class.__name__]}) is missing. "
565                f"Line: {line_number}"
566            )
567            return False
568        return True
569
570    def construct_current_element(self):
571        if "class" not in self.current_element:
572            # This happens when the first element is initialized via initialize_new_current_element() or if the first
573            # element is missing its expected starting tag. In both cases we are unable to construct an element.
574            return
575
576        clazz = self.current_element.pop("class")
577        try:
578            raise_parsing_error_if_logger_has_messages(self.current_element.pop("logger"), clazz.__name__)
579            self.elements_built.setdefault(CLASS_MAPPING[clazz.__name__], []).append(
580                construct_or_raise_parsing_error(clazz, self.current_element)
581            )
582            if clazz == File:
583                self.check_for_preceding_package_and_build_contains_relationship()
584        except SPDXParsingError as err:
585            self.logger.extend(err.get_messages())
586        self.current_element = {"logger": Logger()}
587
588    def check_for_preceding_package_and_build_contains_relationship(self):
589        file_spdx_id = self.current_element["spdx_id"]
590        if "packages" not in self.elements_built:
591            return
592        # We assume that all files that are not contained in a package precede any package information. Any file
593        # information that follows any package information is assigned to the last parsed package by creating a
594        # corresponding contains relationship.
595        # (see https://spdx.github.io/spdx-spec/v2.3/composition-of-an-SPDX-document/#5.2.2)
596        if not self.elements_built["packages"]:
597            self.logger.append(
598                f"Error while building contains relationship for file {file_spdx_id}, "
599                f"preceding package was not parsed successfully."
600            )
601            return
602        package_spdx_id = self.elements_built["packages"][-1].spdx_id
603        relationship = Relationship(package_spdx_id, RelationshipType.CONTAINS, file_spdx_id)
604        if relationship not in self.elements_built.setdefault("relationships", []):
605            self.elements_built["relationships"].append(relationship)
CLASS_MAPPING = {'File': 'files', 'Annotation': 'annotations', 'Relationship': 'relationships', 'Snippet': 'snippets', 'Package': 'packages', 'ExtractedLicensingInfo': 'extracted_licensing_info'}
ELEMENT_EXPECTED_START_TAG = {'File': 'FileName', 'Annotation': 'Annotator', 'Relationship': 'Relationship', 'Snippet': 'SnippetSPDXID', 'Package': 'PackageName', 'ExtractedLicensingInfo': 'LicenseID'}
class Parser:
 78class Parser:
 79    tokens: List[str]
 80    logger: Logger
 81    current_element: Dict[str, Any]
 82    creation_info: Dict[str, Any]
 83    elements_built: Dict[str, Any]
 84    lex: SPDXLexer
 85    yacc: LRParser
 86
 87    def __init__(self, **kwargs):
 88        self.tokens = SPDXLexer.tokens
 89        self.logger = Logger()
 90        self.current_element = {"logger": Logger()}
 91        self.creation_info = {"logger": Logger()}
 92        self.elements_built = dict()
 93        self.lex = SPDXLexer()
 94        self.lex.build(reflags=re.UNICODE)
 95        self.yacc = yacc.yacc(module=self, **kwargs)
 96
 97    @grammar_rule("start : start attrib ")
 98    def p_start_start_attrib(self, p):
 99        pass
100
101    @grammar_rule("start : attrib ")
102    def p_start_attrib(self, p):
103        pass
104
105    @grammar_rule(
106        "attrib : spdx_version\n| spdx_id\n| data_license\n| doc_name\n| document_comment\n| document_namespace\n| "
107        "creator\n| created\n| creator_comment\n| license_list_version\n| ext_doc_ref\n"
108        # attributes for file
109        "| file_name\n| file_type\n| file_checksum\n| file_license_concluded\n| file_license_info\n"
110        "| file_copyright_text\n| file_license_comment\n| file_attribution_text\n| file_notice\n| file_comment\n"
111        "| file_contributor\n"
112        # attributes for annotation
113        "| annotator\n| annotation_date\n| annotation_comment\n| annotation_type\n| annotation_spdx_id\n"
114        # attributes for relationship
115        "| relationship\n"
116        # attributes for snippet
117        "| snippet_spdx_id\n| snippet_name\n| snippet_comment\n| snippet_attribution_text\n| snippet_copyright_text\n"
118        "| snippet_license_comment\n| file_spdx_id\n| snippet_license_concluded\n| snippet_license_info\n"
119        "| snippet_byte_range\n| snippet_line_range\n"
120        # attributes for package
121        "| package_name\n| package_version\n| download_location\n| files_analyzed\n| homepage\n"
122        "| summary\n| source_info\n| pkg_file_name\n| supplier\n| originator\n| pkg_checksum\n"
123        "| verification_code\n| description\n| pkg_comment\n| pkg_attribution_text\n| pkg_license_declared\n"
124        "| pkg_license_concluded\n| pkg_license_info\n| pkg_license_comment\n| pkg_copyright_text\n"
125        "| pkg_external_ref\n| primary_package_purpose\n| built_date\n| release_date\n| valid_until_date\n"
126        # attributes for extracted licensing info
127        "| license_id\n| extracted_text\n| license_name\n| license_cross_ref\n| lic_comment\n"
128        "| unknown_tag "
129    )
130    def p_attrib(self, p):
131        pass
132
133    # general parsing methods
134    @grammar_rule(
135        "license_id : LICENSE_ID error\n license_cross_ref : LICENSE_CROSS_REF error\n "
136        "lic_comment : LICENSE_COMMENT error\n license_name : LICENSE_NAME error\n "
137        "extracted_text : LICENSE_TEXT error\n "
138        "file_name : FILE_NAME error\n file_contributor : FILE_CONTRIBUTOR error\n "
139        "file_notice : FILE_NOTICE error\n file_copyright_text : FILE_COPYRIGHT_TEXT error\n "
140        "file_license_comment : FILE_LICENSE_COMMENT error\n "
141        "file_license_info : FILE_LICENSE_INFO error\n file_comment : FILE_COMMENT error\n "
142        "file_checksum : FILE_CHECKSUM error\n file_license_concluded : FILE_LICENSE_CONCLUDED error\n "
143        "file_type : FILE_TYPE error\n file_attribution_text : FILE_ATTRIBUTION_TEXT error\n "
144        "package_name : PKG_NAME error\n pkg_attribution_text : PKG_ATTRIBUTION_TEXT error\n "
145        "description : PKG_DESCRIPTION error\n pkg_comment : PKG_COMMENT error\n "
146        "summary : PKG_SUMMARY error\n pkg_copyright_text : PKG_COPYRIGHT_TEXT error\n "
147        "pkg_external_ref : PKG_EXTERNAL_REF error\n pkg_license_comment : PKG_LICENSE_COMMENT error\n "
148        "pkg_license_declared : PKG_LICENSE_DECLARED error\n pkg_license_info : PKG_LICENSE_INFO error \n "
149        "pkg_license_concluded : PKG_LICENSE_CONCLUDED error\n source_info : PKG_SOURCE_INFO error\n "
150        "homepage : PKG_HOMEPAGE error\n pkg_checksum : PKG_CHECKSUM error\n "
151        "verification_code : PKG_VERIFICATION_CODE error\n originator : PKG_ORIGINATOR error\n "
152        "download_location : PKG_DOWNLOAD_LOCATION error\n files_analyzed : PKG_FILES_ANALYZED error\n "
153        "supplier : PKG_SUPPLIER error\n pkg_file_name : PKG_FILE_NAME error\n "
154        "package_version : PKG_VERSION error\n primary_package_purpose : PRIMARY_PACKAGE_PURPOSE error\n "
155        "built_date : BUILT_DATE error\n release_date : RELEASE_DATE error\n "
156        "valid_until_date : VALID_UNTIL_DATE error\n snippet_spdx_id : SNIPPET_SPDX_ID error\n "
157        "snippet_name : SNIPPET_NAME error\n snippet_comment : SNIPPET_COMMENT error\n "
158        "snippet_attribution_text : SNIPPET_ATTRIBUTION_TEXT error\n "
159        "snippet_copyright_text : SNIPPET_COPYRIGHT_TEXT error\n "
160        "snippet_license_comment : SNIPPET_LICENSE_COMMENT error\n file_spdx_id : SNIPPET_FILE_SPDXID error\n "
161        "snippet_license_concluded : SNIPPET_LICENSE_CONCLUDED error\n "
162        "snippet_license_info : SNIPPET_LICENSE_INFO error\n "
163        "snippet_byte_range : SNIPPET_BYTE_RANGE error\n snippet_line_range : SNIPPET_LINE_RANGE error\n "
164        "annotator : ANNOTATOR error\n annotation_date : ANNOTATION_DATE error\n "
165        "annotation_comment : ANNOTATION_COMMENT error\n annotation_type : ANNOTATION_TYPE error\n "
166        "annotation_spdx_id : ANNOTATION_SPDX_ID error\n relationship : RELATIONSHIP error"
167    )
168    def p_current_element_error(self, p):
169        if p[1] in ELEMENT_EXPECTED_START_TAG.values():
170            self.initialize_new_current_element(TAG_DATA_MODEL_FIELD[p[1]][0])
171        self.current_element["logger"].append(
172            f"Error while parsing {p[1]}: Token did not match specified grammar rule. Line: {p.lineno(1)}"
173        )
174
175    @grammar_rule(
176        "license_name : LICENSE_NAME line_or_no_assertion\n extracted_text : LICENSE_TEXT text_or_line\n "
177        "lic_comment : LICENSE_COMMENT text_or_line\n license_id : LICENSE_ID LINE\n "
178        "file_name : FILE_NAME LINE \n file_notice : FILE_NOTICE text_or_line\n "
179        "file_copyright_text : FILE_COPYRIGHT_TEXT line_or_no_assertion_or_none\n "
180        "file_license_comment : FILE_LICENSE_COMMENT text_or_line\n "
181        "file_comment : FILE_COMMENT text_or_line\n "
182        "file_license_concluded : FILE_LICENSE_CONCLUDED license_or_no_assertion_or_none\n "
183        "package_name : PKG_NAME LINE\n description : PKG_DESCRIPTION text_or_line\n "
184        "summary : PKG_SUMMARY text_or_line\n source_info : PKG_SOURCE_INFO text_or_line\n "
185        "homepage : PKG_HOMEPAGE line_or_no_assertion_or_none\n "
186        "download_location : PKG_DOWNLOAD_LOCATION line_or_no_assertion_or_none\n "
187        "originator : PKG_ORIGINATOR actor_or_no_assertion\n supplier : PKG_SUPPLIER actor_or_no_assertion\n "
188        "pkg_comment : PKG_COMMENT text_or_line\n "
189        "pkg_copyright_text : PKG_COPYRIGHT_TEXT line_or_no_assertion_or_none\n "
190        "pkg_license_declared : PKG_LICENSE_DECLARED license_or_no_assertion_or_none\n "
191        "pkg_file_name : PKG_FILE_NAME LINE\n "
192        "pkg_license_concluded : PKG_LICENSE_CONCLUDED license_or_no_assertion_or_none\n "
193        "package_version : PKG_VERSION LINE\n pkg_license_comment : PKG_LICENSE_COMMENT text_or_line\n "
194        "snippet_spdx_id : SNIPPET_SPDX_ID LINE\n snippet_name : SNIPPET_NAME LINE\n "
195        "snippet_comment : SNIPPET_COMMENT text_or_line\n "
196        "snippet_copyright_text : SNIPPET_COPYRIGHT_TEXT line_or_no_assertion_or_none\n "
197        "snippet_license_comment : SNIPPET_LICENSE_COMMENT text_or_line\n "
198        "file_spdx_id : SNIPPET_FILE_SPDXID LINE\n "
199        "snippet_license_concluded : SNIPPET_LICENSE_CONCLUDED license_or_no_assertion_or_none\n "
200        "annotation_spdx_id : ANNOTATION_SPDX_ID LINE\n "
201        "annotation_comment : ANNOTATION_COMMENT text_or_line"
202    )
203    def p_generic_value(self, p):
204        if p[1] in ELEMENT_EXPECTED_START_TAG.values():
205            self.initialize_new_current_element(TAG_DATA_MODEL_FIELD[p[1]][0])
206        if self.check_that_current_element_matches_class_for_value(TAG_DATA_MODEL_FIELD[p[1]][0], p.lineno(1)):
207            set_value(p, self.current_element)
208
209    @grammar_rule(
210        "unknown_tag : UNKNOWN_TAG text_or_line\n | UNKNOWN_TAG ISO8601_DATE\n | UNKNOWN_TAG PERSON_VALUE \n"
211        "| UNKNOWN_TAG"
212    )
213    def p_unknown_tag(self, p):
214        self.logger.append(f"Unknown tag provided in line {p.lineno(1)}")
215
216    @grammar_rule("text_or_line : TEXT")
217    def p_text(self, p):
218        p[0] = str_from_text(p[1])
219
220    @grammar_rule("text_or_line : LINE\n line_or_no_assertion : LINE\nline_or_no_assertion_or_none : text_or_line")
221    def p_line(self, p):
222        p[0] = p[1]
223
224    @grammar_rule(
225        "license_or_no_assertion_or_none : NO_ASSERTION\n actor_or_no_assertion : NO_ASSERTION\n"
226        "line_or_no_assertion : NO_ASSERTION\n line_or_no_assertion_or_none : NO_ASSERTION"
227    )
228    def p_no_assertion(self, p):
229        p[0] = SpdxNoAssertion()
230
231    @grammar_rule("license_or_no_assertion_or_none : NONE\n line_or_no_assertion_or_none : NONE")
232    def p_none(self, p):
233        p[0] = SpdxNone()
234
235    @grammar_rule("license_or_no_assertion_or_none : LINE")
236    def p_license(self, p):
237        try:
238            p[0] = get_spdx_licensing().parse(p[1])
239        except ExpressionError as err:
240            error_message = f"Error while parsing license expression: {p[1]}"
241            if err.args:
242                error_message += f": {err.args[0]}"
243            self.current_element["logger"].append(error_message)
244
245    @grammar_rule("actor_or_no_assertion : PERSON_VALUE\n | ORGANIZATION_VALUE")
246    def p_actor_values(self, p):
247        p[0] = ActorParser.parse_actor(p[1])
248
249    @grammar_rule("spdx_id : SPDX_ID LINE")
250    def p_spdx_id(self, p):
251        # As all SPDX Ids share the same tag, there is no knowing which spdx_id belongs to the document.
252        # We assume that to be the first spdx_id we encounter. As the specification does not explicitly require this,
253        # our approach might lead to unwanted behavior when the document's SPDX Id is defined later in the document.
254        if "spdx_id" in self.creation_info:
255            self.current_element["spdx_id"] = p[2]
256        else:
257            self.creation_info["spdx_id"] = p[2]
258
259    # parsing methods for creation info / document level
260
261    @grammar_rule(
262        "license_list_version : LICENSE_LIST_VERSION error\n document_comment : DOC_COMMENT error\n "
263        "document_namespace : DOC_NAMESPACE error\n data_license : DOC_LICENSE error\n "
264        "doc_name : DOC_NAME error\n ext_doc_ref : EXT_DOC_REF error\n spdx_version : DOC_VERSION error\n "
265        "creator_comment : CREATOR_COMMENT error\n creator : CREATOR error\n created : CREATED error"
266    )
267    def p_creation_info_value_error(self, p):
268        self.creation_info["logger"].append(
269            f"Error while parsing {p[1]}: Token did not match specified grammar rule. Line: {p.lineno(1)}"
270        )
271
272    @grammar_rule(
273        "document_comment : DOC_COMMENT text_or_line\n document_namespace : DOC_NAMESPACE LINE\n "
274        "data_license : DOC_LICENSE LINE\n spdx_version : DOC_VERSION LINE\n "
275        "creator_comment : CREATOR_COMMENT text_or_line\n doc_name : DOC_NAME LINE"
276    )
277    def p_generic_value_creation_info(self, p):
278        set_value(p, self.creation_info)
279
280    @grammar_rule("license_list_version : LICENSE_LIST_VERSION LINE")
281    def p_license_list_version(self, p):
282        set_value(p, self.creation_info, method_to_apply=Version.from_string)
283
284    @grammar_rule("ext_doc_ref : EXT_DOC_REF LINE")
285    def p_external_document_ref(self, p):
286        external_doc_ref_regex = re.compile(r"(.*)(\s*SHA1:\s*[a-f0-9]{40})")
287        external_doc_ref_match = external_doc_ref_regex.match(p[2])
288        if not external_doc_ref_match:
289            self.creation_info["logger"].append(
290                f"Error while parsing ExternalDocumentRef: Couldn't match Checksum. Line: {p.lineno(1)}"
291            )
292            return
293        try:
294            document_ref_id, document_uri = external_doc_ref_match.group(1).strip().split(" ")
295        except ValueError:
296            self.creation_info["logger"].append(
297                f"Error while parsing ExternalDocumentRef: Couldn't split the first part of the value into "
298                f"document_ref_id and document_uri. Line: {p.lineno(1)}"
299            )
300            return
301        checksum = parse_checksum(external_doc_ref_match.group(2).strip())
302        external_document_ref = ExternalDocumentRef(document_ref_id, document_uri, checksum)
303        self.creation_info.setdefault("external_document_refs", []).append(external_document_ref)
304
305    @grammar_rule("creator : CREATOR PERSON_VALUE\n| CREATOR TOOL_VALUE\n| CREATOR ORGANIZATION_VALUE")
306    def p_creator(self, p):
307        self.creation_info.setdefault("creators", []).append(ActorParser.parse_actor(p[2]))
308
309    @grammar_rule("created : CREATED ISO8601_DATE")
310    def p_created(self, p):
311        set_value(p, self.creation_info, method_to_apply=datetime_from_str)
312
313    # parsing methods for extracted licensing info
314
315    @grammar_rule("license_cross_ref : LICENSE_CROSS_REF LINE")
316    def p_extracted_cross_reference(self, p):
317        if self.check_that_current_element_matches_class_for_value(ExtractedLicensingInfo, p.lineno(1)):
318            self.current_element.setdefault("cross_references", []).append(p[2])
319
320    # parsing methods for file
321
322    @grammar_rule("file_contributor : FILE_CONTRIBUTOR LINE")
323    def p_file_contributor(self, p):
324        if self.check_that_current_element_matches_class_for_value(File, p.lineno(1)):
325            self.current_element.setdefault("contributors", []).append(p[2])
326
327    @grammar_rule("file_attribution_text : FILE_ATTRIBUTION_TEXT text_or_line")
328    def p_file_attribution_text(self, p):
329        if self.check_that_current_element_matches_class_for_value(File, p.lineno(1)):
330            self.current_element.setdefault("attribution_texts", []).append(p[2])
331
332    @grammar_rule("file_license_info : FILE_LICENSE_INFO license_or_no_assertion_or_none")
333    def p_file_license_info(self, p):
334        if self.check_that_current_element_matches_class_for_value(File, p.lineno(1)):
335            self.current_element.setdefault("license_info_in_file", []).append(p[2])
336
337    @grammar_rule("file_type : FILE_TYPE LINE")
338    def p_file_type(self, p):
339        if not self.check_that_current_element_matches_class_for_value(File, p.lineno(1)):
340            return
341        try:
342            file_type = FileType[p[2].strip()]
343        except KeyError:
344            self.current_element["logger"].append(f"Invalid FileType: {p[2]}. Line {p.lineno(1)}")
345            return
346        self.current_element.setdefault("file_types", []).append(file_type)
347
348    @grammar_rule("file_checksum : FILE_CHECKSUM CHECKSUM")
349    def p_file_checksum(self, p):
350        if not self.check_that_current_element_matches_class_for_value(File, p.lineno(1)):
351            return
352        checksum = parse_checksum(p[2])
353        self.current_element.setdefault("checksums", []).append(checksum)
354
355    # parsing methods for package
356
357    @grammar_rule("pkg_attribution_text : PKG_ATTRIBUTION_TEXT text_or_line")
358    def p_pkg_attribution_text(self, p):
359        self.check_that_current_element_matches_class_for_value(Package, p.lineno(1))
360        self.current_element.setdefault("attribution_texts", []).append(p[2])
361
362    @grammar_rule(
363        "pkg_external_ref : PKG_EXTERNAL_REF LINE PKG_EXTERNAL_REF_COMMENT text_or_line\n | PKG_EXTERNAL_REF LINE"
364    )
365    def p_pkg_external_refs(self, p):
366        if not self.check_that_current_element_matches_class_for_value(Package, p.lineno(1)):
367            return
368        try:
369            category, reference_type, locator = p[2].split(" ")
370        except ValueError:
371            self.current_element["logger"].append(
372                f"Couldn't split PackageExternalRef in category, reference_type and locator. Line: {p.lineno(1)}"
373            )
374            return
375        comment = None
376        if len(p) == 5:
377            comment = p[4]
378        try:
379            category = ExternalPackageRefCategory[category.replace("-", "_")]
380        except KeyError:
381            self.current_element["logger"].append(
382                f"Invalid ExternalPackageRefCategory: {category}. Line: {p.lineno(1)}"
383            )
384            return
385        try:
386            external_package_ref = construct_or_raise_parsing_error(
387                ExternalPackageRef,
388                {"category": category, "reference_type": reference_type, "locator": locator, "comment": comment},
389            )
390        except SPDXParsingError as err:
391            self.current_element["logger"].append(err.get_messages())
392            return
393        self.current_element.setdefault("external_references", []).append(external_package_ref)
394
395    @grammar_rule("pkg_license_info : PKG_LICENSE_INFO license_or_no_assertion_or_none")
396    def p_pkg_license_info_from_file(self, p):
397        if self.check_that_current_element_matches_class_for_value(Package, p.lineno(1)):
398            self.current_element.setdefault("license_info_from_files", []).append(p[2])
399
400    @grammar_rule("pkg_checksum : PKG_CHECKSUM CHECKSUM")
401    def p_pkg_checksum(self, p):
402        if not self.check_that_current_element_matches_class_for_value(Package, p.lineno(1)):
403            return
404        checksum = parse_checksum(p[2])
405        self.current_element.setdefault("checksums", []).append(checksum)
406
407    @grammar_rule("verification_code : PKG_VERIFICATION_CODE LINE")
408    def p_pkg_verification_code(self, p):
409        if not self.check_that_current_element_matches_class_for_value(Package, p.lineno(1)):
410            return
411
412        if "verification_code" in self.current_element:
413            self.current_element["logger"].append(f"Multiple values for {p[1]} found. Line: {p.lineno(1)}")
414            return
415        verif_code_regex = re.compile(r"([0-9a-f]{40})\s*(\(excludes:\s*(.+)\))?", re.UNICODE)
416        verif_code_code_grp = 1
417        verif_code_exc_files_grp = 3
418        match = verif_code_regex.match(p[2])
419        if not match:
420            self.current_element["logger"].append(
421                f"Error while parsing {p[1]}: Value did not match expected format. Line: {p.lineno(1)}"
422            )
423            return
424        value = match.group(verif_code_code_grp)
425        excluded_files = None
426        if match.group(verif_code_exc_files_grp):
427            excluded_files = match.group(verif_code_exc_files_grp).split(",")
428        self.current_element["verification_code"] = PackageVerificationCode(value, excluded_files)
429
430    @grammar_rule("files_analyzed : PKG_FILES_ANALYZED LINE")
431    def p_pkg_files_analyzed(self, p):
432        if not self.check_that_current_element_matches_class_for_value(Package, p.lineno(1)):
433            return
434        if "files_analyzed" in self.current_element:
435            self.current_element["logger"].append(f"Multiple values for {p[1]} found. Line: {p.lineno(1)}")
436            return
437        if p[2] == "true":
438            self.current_element["files_analyzed"] = True
439        elif p[2] == "false":
440            self.current_element["files_analyzed"] = False
441        else:
442            self.current_element["logger"].append(
443                f'The value of FilesAnalyzed must be either "true" or "false", but is: {p[2]}'
444            )
445
446    @grammar_rule("primary_package_purpose : PRIMARY_PACKAGE_PURPOSE LINE")
447    def p_primary_package_purpose(self, p):
448        if self.check_that_current_element_matches_class_for_value(Package, p.lineno(1)):
449            set_value(p, self.current_element, method_to_apply=lambda x: PackagePurpose[x.replace("-", "_")])
450
451    @grammar_rule(
452        "built_date : BUILT_DATE ISO8601_DATE\n release_date : RELEASE_DATE ISO8601_DATE\n "
453        "valid_until_date : VALID_UNTIL_DATE ISO8601_DATE"
454    )
455    def p_package_dates(self, p):
456        if self.check_that_current_element_matches_class_for_value(Package, p.lineno(1)):
457            set_value(p, self.current_element, method_to_apply=datetime_from_str)
458
459    # parsing methods for snippet
460
461    @grammar_rule("snippet_attribution_text : SNIPPET_ATTRIBUTION_TEXT text_or_line")
462    def p_snippet_attribution_text(self, p):
463        if self.check_that_current_element_matches_class_for_value(Snippet, p.lineno(1)):
464            self.current_element.setdefault("attribution_texts", []).append(p[2])
465
466    @grammar_rule("snippet_license_info : SNIPPET_LICENSE_INFO license_or_no_assertion_or_none")
467    def p_snippet_license_info(self, p):
468        if self.check_that_current_element_matches_class_for_value(Snippet, p.lineno(1)):
469            self.current_element.setdefault("license_info_in_snippet", []).append(p[2])
470
471    @grammar_rule("snippet_byte_range : SNIPPET_BYTE_RANGE LINE\n snippet_line_range : SNIPPET_LINE_RANGE LINE")
472    def p_snippet_range(self, p):
473        if not self.check_that_current_element_matches_class_for_value(Snippet, p.lineno(1)):
474            return
475
476        argument_name = TAG_DATA_MODEL_FIELD[p[1]][1]
477        if argument_name in self.current_element:
478            self.current_element["logger"].append(f"Multiple values for {p[1]} found. Line: {p.lineno(1)}")
479            return
480        range_re = re.compile(r"^(\d+):(\d+)$", re.UNICODE)
481        if not range_re.match(p[2].strip()):
482            self.current_element["logger"].append(
483                f"Value for {p[1]} doesn't match valid range pattern. " f"Line: {p.lineno(1)}"
484            )
485            return
486        startpoint = int(p[2].split(":")[0])
487        endpoint = int(p[2].split(":")[-1])
488        self.current_element[argument_name] = startpoint, endpoint
489
490    # parsing methods for annotation
491
492    @grammar_rule("annotator : ANNOTATOR PERSON_VALUE\n| ANNOTATOR TOOL_VALUE\n| ANNOTATOR ORGANIZATION_VALUE")
493    def p_annotator(self, p):
494        self.initialize_new_current_element(Annotation)
495        set_value(p, self.current_element, method_to_apply=ActorParser.parse_actor)
496
497    @grammar_rule("annotation_date : ANNOTATION_DATE ISO8601_DATE")
498    def p_annotation_date(self, p):
499        if self.check_that_current_element_matches_class_for_value(Annotation, p.lineno(1)):
500            set_value(p, self.current_element, method_to_apply=datetime_from_str)
501
502    @grammar_rule("annotation_type : ANNOTATION_TYPE LINE")
503    def p_annotation_type(self, p):
504        if self.check_that_current_element_matches_class_for_value(Annotation, p.lineno(1)):
505            set_value(p, self.current_element, method_to_apply=lambda x: AnnotationType[x])
506
507    # parsing methods for relationship
508
509    @grammar_rule("relationship : RELATIONSHIP LINE RELATIONSHIP_COMMENT text_or_line\n " "| RELATIONSHIP LINE")
510    def p_relationship(self, p):
511        self.initialize_new_current_element(Relationship)
512        try:
513            spdx_element_id, relationship_type, related_spdx_element_id = p[2].split(" ")
514        except ValueError:
515            self.current_element["logger"].append(
516                f"Relationship couldn't be split in spdx_element_id, relationship_type and "
517                f"related_spdx_element. Line: {p.lineno(1)}"
518            )
519            return
520        try:
521            self.current_element["relationship_type"] = RelationshipType[relationship_type]
522        except KeyError:
523            self.current_element["logger"].append(f"Invalid RelationshipType {relationship_type}. Line: {p.lineno(1)}")
524        if related_spdx_element_id == "NONE":
525            related_spdx_element_id = SpdxNone()
526        if related_spdx_element_id == "NOASSERTION":
527            related_spdx_element_id = SpdxNoAssertion()
528        self.current_element["related_spdx_element_id"] = related_spdx_element_id
529        self.current_element["spdx_element_id"] = spdx_element_id
530        if len(p) == 5:
531            self.current_element["comment"] = p[4]
532
533    def p_error(self, p):
534        pass
535
536    def parse(self, text):
537        # entry point for the tag-value parser
538        self.yacc.parse(text, lexer=self.lex)
539        # this constructs the last remaining element; all other elements are constructed at the start of
540        # their subsequent element
541        self.construct_current_element()
542
543        # To be able to parse creation info values if they appear in between other elements, e.g. packages, we use
544        # two different dictionaries to collect the creation info and all other elements. Therefore, we have a separate
545        # logger for the creation info whose messages we need to add to the main logger to than raise all collected
546        # messages at once.
547        creation_info_logger = self.creation_info.pop("logger")
548        if creation_info_logger.has_messages():
549            self.logger.extend([f"Error while parsing CreationInfo: {creation_info_logger.get_messages()}"])
550
551        raise_parsing_error_if_logger_has_messages(self.logger)
552        creation_info = construct_or_raise_parsing_error(CreationInfo, self.creation_info)
553        self.elements_built["creation_info"] = creation_info
554        document = construct_or_raise_parsing_error(Document, self.elements_built)
555        return document
556
557    def initialize_new_current_element(self, clazz: Any):
558        self.construct_current_element()
559        self.current_element["class"] = clazz
560
561    def check_that_current_element_matches_class_for_value(self, expected_class, line_number) -> bool:
562        if "class" not in self.current_element or expected_class != self.current_element["class"]:
563            self.logger.append(
564                f"Element {expected_class.__name__} is not the current element in scope, probably the expected tag to "
565                f"start the element ({ELEMENT_EXPECTED_START_TAG[expected_class.__name__]}) is missing. "
566                f"Line: {line_number}"
567            )
568            return False
569        return True
570
571    def construct_current_element(self):
572        if "class" not in self.current_element:
573            # This happens when the first element is initialized via initialize_new_current_element() or if the first
574            # element is missing its expected starting tag. In both cases we are unable to construct an element.
575            return
576
577        clazz = self.current_element.pop("class")
578        try:
579            raise_parsing_error_if_logger_has_messages(self.current_element.pop("logger"), clazz.__name__)
580            self.elements_built.setdefault(CLASS_MAPPING[clazz.__name__], []).append(
581                construct_or_raise_parsing_error(clazz, self.current_element)
582            )
583            if clazz == File:
584                self.check_for_preceding_package_and_build_contains_relationship()
585        except SPDXParsingError as err:
586            self.logger.extend(err.get_messages())
587        self.current_element = {"logger": Logger()}
588
589    def check_for_preceding_package_and_build_contains_relationship(self):
590        file_spdx_id = self.current_element["spdx_id"]
591        if "packages" not in self.elements_built:
592            return
593        # We assume that all files that are not contained in a package precede any package information. Any file
594        # information that follows any package information is assigned to the last parsed package by creating a
595        # corresponding contains relationship.
596        # (see https://spdx.github.io/spdx-spec/v2.3/composition-of-an-SPDX-document/#5.2.2)
597        if not self.elements_built["packages"]:
598            self.logger.append(
599                f"Error while building contains relationship for file {file_spdx_id}, "
600                f"preceding package was not parsed successfully."
601            )
602            return
603        package_spdx_id = self.elements_built["packages"][-1].spdx_id
604        relationship = Relationship(package_spdx_id, RelationshipType.CONTAINS, file_spdx_id)
605        if relationship not in self.elements_built.setdefault("relationships", []):
606            self.elements_built["relationships"].append(relationship)
Parser(**kwargs)
87    def __init__(self, **kwargs):
88        self.tokens = SPDXLexer.tokens
89        self.logger = Logger()
90        self.current_element = {"logger": Logger()}
91        self.creation_info = {"logger": Logger()}
92        self.elements_built = dict()
93        self.lex = SPDXLexer()
94        self.lex.build(reflags=re.UNICODE)
95        self.yacc = yacc.yacc(module=self, **kwargs)
tokens: list[str]
current_element: dict[str, typing.Any]
creation_info: dict[str, typing.Any]
elements_built: dict[str, typing.Any]
yacc: ply.yacc.LRParser
@grammar_rule('start : start attrib ')
def p_start_start_attrib(self, p):
97    @grammar_rule("start : start attrib ")
98    def p_start_start_attrib(self, p):
99        pass

start : start attrib

@grammar_rule('start : attrib ')
def p_start_attrib(self, p):
101    @grammar_rule("start : attrib ")
102    def p_start_attrib(self, p):
103        pass

start : attrib

@grammar_rule('attrib : spdx_version\n| spdx_id\n| data_license\n| doc_name\n| document_comment\n| document_namespace\n| creator\n| created\n| creator_comment\n| license_list_version\n| ext_doc_ref\n| file_name\n| file_type\n| file_checksum\n| file_license_concluded\n| file_license_info\n| file_copyright_text\n| file_license_comment\n| file_attribution_text\n| file_notice\n| file_comment\n| file_contributor\n| annotator\n| annotation_date\n| annotation_comment\n| annotation_type\n| annotation_spdx_id\n| relationship\n| snippet_spdx_id\n| snippet_name\n| snippet_comment\n| snippet_attribution_text\n| snippet_copyright_text\n| snippet_license_comment\n| file_spdx_id\n| snippet_license_concluded\n| snippet_license_info\n| snippet_byte_range\n| snippet_line_range\n| package_name\n| package_version\n| download_location\n| files_analyzed\n| homepage\n| summary\n| source_info\n| pkg_file_name\n| supplier\n| originator\n| pkg_checksum\n| verification_code\n| description\n| pkg_comment\n| pkg_attribution_text\n| pkg_license_declared\n| pkg_license_concluded\n| pkg_license_info\n| pkg_license_comment\n| pkg_copyright_text\n| pkg_external_ref\n| primary_package_purpose\n| built_date\n| release_date\n| valid_until_date\n| license_id\n| extracted_text\n| license_name\n| license_cross_ref\n| lic_comment\n| unknown_tag ')
def p_attrib(self, p):
105    @grammar_rule(
106        "attrib : spdx_version\n| spdx_id\n| data_license\n| doc_name\n| document_comment\n| document_namespace\n| "
107        "creator\n| created\n| creator_comment\n| license_list_version\n| ext_doc_ref\n"
108        # attributes for file
109        "| file_name\n| file_type\n| file_checksum\n| file_license_concluded\n| file_license_info\n"
110        "| file_copyright_text\n| file_license_comment\n| file_attribution_text\n| file_notice\n| file_comment\n"
111        "| file_contributor\n"
112        # attributes for annotation
113        "| annotator\n| annotation_date\n| annotation_comment\n| annotation_type\n| annotation_spdx_id\n"
114        # attributes for relationship
115        "| relationship\n"
116        # attributes for snippet
117        "| snippet_spdx_id\n| snippet_name\n| snippet_comment\n| snippet_attribution_text\n| snippet_copyright_text\n"
118        "| snippet_license_comment\n| file_spdx_id\n| snippet_license_concluded\n| snippet_license_info\n"
119        "| snippet_byte_range\n| snippet_line_range\n"
120        # attributes for package
121        "| package_name\n| package_version\n| download_location\n| files_analyzed\n| homepage\n"
122        "| summary\n| source_info\n| pkg_file_name\n| supplier\n| originator\n| pkg_checksum\n"
123        "| verification_code\n| description\n| pkg_comment\n| pkg_attribution_text\n| pkg_license_declared\n"
124        "| pkg_license_concluded\n| pkg_license_info\n| pkg_license_comment\n| pkg_copyright_text\n"
125        "| pkg_external_ref\n| primary_package_purpose\n| built_date\n| release_date\n| valid_until_date\n"
126        # attributes for extracted licensing info
127        "| license_id\n| extracted_text\n| license_name\n| license_cross_ref\n| lic_comment\n"
128        "| unknown_tag "
129    )
130    def p_attrib(self, p):
131        pass

attrib : spdx_version | spdx_id | data_license | doc_name | document_comment | document_namespace | creator | created | creator_comment | license_list_version | ext_doc_ref | file_name | file_type | file_checksum | file_license_concluded | file_license_info | file_copyright_text | file_license_comment | file_attribution_text | file_notice | file_comment | file_contributor | annotator | annotation_date | annotation_comment | annotation_type | annotation_spdx_id | relationship | snippet_spdx_id | snippet_name | snippet_comment | snippet_attribution_text | snippet_copyright_text | snippet_license_comment | file_spdx_id | snippet_license_concluded | snippet_license_info | snippet_byte_range | snippet_line_range | package_name | package_version | download_location | files_analyzed | homepage | summary | source_info | pkg_file_name | supplier | originator | pkg_checksum | verification_code | description | pkg_comment | pkg_attribution_text | pkg_license_declared | pkg_license_concluded | pkg_license_info | pkg_license_comment | pkg_copyright_text | pkg_external_ref | primary_package_purpose | built_date | release_date | valid_until_date | license_id | extracted_text | license_name | license_cross_ref | lic_comment | unknown_tag

@grammar_rule('license_id : LICENSE_ID error\n license_cross_ref : LICENSE_CROSS_REF error\n lic_comment : LICENSE_COMMENT error\n license_name : LICENSE_NAME error\n extracted_text : LICENSE_TEXT error\n file_name : FILE_NAME error\n file_contributor : FILE_CONTRIBUTOR error\n file_notice : FILE_NOTICE error\n file_copyright_text : FILE_COPYRIGHT_TEXT error\n file_license_comment : FILE_LICENSE_COMMENT error\n file_license_info : FILE_LICENSE_INFO error\n file_comment : FILE_COMMENT error\n file_checksum : FILE_CHECKSUM error\n file_license_concluded : FILE_LICENSE_CONCLUDED error\n file_type : FILE_TYPE error\n file_attribution_text : FILE_ATTRIBUTION_TEXT error\n package_name : PKG_NAME error\n pkg_attribution_text : PKG_ATTRIBUTION_TEXT error\n description : PKG_DESCRIPTION error\n pkg_comment : PKG_COMMENT error\n summary : PKG_SUMMARY error\n pkg_copyright_text : PKG_COPYRIGHT_TEXT error\n pkg_external_ref : PKG_EXTERNAL_REF error\n pkg_license_comment : PKG_LICENSE_COMMENT error\n pkg_license_declared : PKG_LICENSE_DECLARED error\n pkg_license_info : PKG_LICENSE_INFO error \n pkg_license_concluded : PKG_LICENSE_CONCLUDED error\n source_info : PKG_SOURCE_INFO error\n homepage : PKG_HOMEPAGE error\n pkg_checksum : PKG_CHECKSUM error\n verification_code : PKG_VERIFICATION_CODE error\n originator : PKG_ORIGINATOR error\n download_location : PKG_DOWNLOAD_LOCATION error\n files_analyzed : PKG_FILES_ANALYZED error\n supplier : PKG_SUPPLIER error\n pkg_file_name : PKG_FILE_NAME error\n package_version : PKG_VERSION error\n primary_package_purpose : PRIMARY_PACKAGE_PURPOSE error\n built_date : BUILT_DATE error\n release_date : RELEASE_DATE error\n valid_until_date : VALID_UNTIL_DATE error\n snippet_spdx_id : SNIPPET_SPDX_ID error\n snippet_name : SNIPPET_NAME error\n snippet_comment : SNIPPET_COMMENT error\n snippet_attribution_text : SNIPPET_ATTRIBUTION_TEXT error\n snippet_copyright_text : SNIPPET_COPYRIGHT_TEXT error\n snippet_license_comment : SNIPPET_LICENSE_COMMENT error\n file_spdx_id : SNIPPET_FILE_SPDXID error\n snippet_license_concluded : SNIPPET_LICENSE_CONCLUDED error\n snippet_license_info : SNIPPET_LICENSE_INFO error\n snippet_byte_range : SNIPPET_BYTE_RANGE error\n snippet_line_range : SNIPPET_LINE_RANGE error\n annotator : ANNOTATOR error\n annotation_date : ANNOTATION_DATE error\n annotation_comment : ANNOTATION_COMMENT error\n annotation_type : ANNOTATION_TYPE error\n annotation_spdx_id : ANNOTATION_SPDX_ID error\n relationship : RELATIONSHIP error')
def p_current_element_error(self, p):
134    @grammar_rule(
135        "license_id : LICENSE_ID error\n license_cross_ref : LICENSE_CROSS_REF error\n "
136        "lic_comment : LICENSE_COMMENT error\n license_name : LICENSE_NAME error\n "
137        "extracted_text : LICENSE_TEXT error\n "
138        "file_name : FILE_NAME error\n file_contributor : FILE_CONTRIBUTOR error\n "
139        "file_notice : FILE_NOTICE error\n file_copyright_text : FILE_COPYRIGHT_TEXT error\n "
140        "file_license_comment : FILE_LICENSE_COMMENT error\n "
141        "file_license_info : FILE_LICENSE_INFO error\n file_comment : FILE_COMMENT error\n "
142        "file_checksum : FILE_CHECKSUM error\n file_license_concluded : FILE_LICENSE_CONCLUDED error\n "
143        "file_type : FILE_TYPE error\n file_attribution_text : FILE_ATTRIBUTION_TEXT error\n "
144        "package_name : PKG_NAME error\n pkg_attribution_text : PKG_ATTRIBUTION_TEXT error\n "
145        "description : PKG_DESCRIPTION error\n pkg_comment : PKG_COMMENT error\n "
146        "summary : PKG_SUMMARY error\n pkg_copyright_text : PKG_COPYRIGHT_TEXT error\n "
147        "pkg_external_ref : PKG_EXTERNAL_REF error\n pkg_license_comment : PKG_LICENSE_COMMENT error\n "
148        "pkg_license_declared : PKG_LICENSE_DECLARED error\n pkg_license_info : PKG_LICENSE_INFO error \n "
149        "pkg_license_concluded : PKG_LICENSE_CONCLUDED error\n source_info : PKG_SOURCE_INFO error\n "
150        "homepage : PKG_HOMEPAGE error\n pkg_checksum : PKG_CHECKSUM error\n "
151        "verification_code : PKG_VERIFICATION_CODE error\n originator : PKG_ORIGINATOR error\n "
152        "download_location : PKG_DOWNLOAD_LOCATION error\n files_analyzed : PKG_FILES_ANALYZED error\n "
153        "supplier : PKG_SUPPLIER error\n pkg_file_name : PKG_FILE_NAME error\n "
154        "package_version : PKG_VERSION error\n primary_package_purpose : PRIMARY_PACKAGE_PURPOSE error\n "
155        "built_date : BUILT_DATE error\n release_date : RELEASE_DATE error\n "
156        "valid_until_date : VALID_UNTIL_DATE error\n snippet_spdx_id : SNIPPET_SPDX_ID error\n "
157        "snippet_name : SNIPPET_NAME error\n snippet_comment : SNIPPET_COMMENT error\n "
158        "snippet_attribution_text : SNIPPET_ATTRIBUTION_TEXT error\n "
159        "snippet_copyright_text : SNIPPET_COPYRIGHT_TEXT error\n "
160        "snippet_license_comment : SNIPPET_LICENSE_COMMENT error\n file_spdx_id : SNIPPET_FILE_SPDXID error\n "
161        "snippet_license_concluded : SNIPPET_LICENSE_CONCLUDED error\n "
162        "snippet_license_info : SNIPPET_LICENSE_INFO error\n "
163        "snippet_byte_range : SNIPPET_BYTE_RANGE error\n snippet_line_range : SNIPPET_LINE_RANGE error\n "
164        "annotator : ANNOTATOR error\n annotation_date : ANNOTATION_DATE error\n "
165        "annotation_comment : ANNOTATION_COMMENT error\n annotation_type : ANNOTATION_TYPE error\n "
166        "annotation_spdx_id : ANNOTATION_SPDX_ID error\n relationship : RELATIONSHIP error"
167    )
168    def p_current_element_error(self, p):
169        if p[1] in ELEMENT_EXPECTED_START_TAG.values():
170            self.initialize_new_current_element(TAG_DATA_MODEL_FIELD[p[1]][0])
171        self.current_element["logger"].append(
172            f"Error while parsing {p[1]}: Token did not match specified grammar rule. Line: {p.lineno(1)}"
173        )

license_id : LICENSE_ID error license_cross_ref : LICENSE_CROSS_REF error lic_comment : LICENSE_COMMENT error license_name : LICENSE_NAME error extracted_text : LICENSE_TEXT error file_name : FILE_NAME error file_contributor : FILE_CONTRIBUTOR error file_notice : FILE_NOTICE error file_copyright_text : FILE_COPYRIGHT_TEXT error file_license_comment : FILE_LICENSE_COMMENT error file_license_info : FILE_LICENSE_INFO error file_comment : FILE_COMMENT error file_checksum : FILE_CHECKSUM error file_license_concluded : FILE_LICENSE_CONCLUDED error file_type : FILE_TYPE error file_attribution_text : FILE_ATTRIBUTION_TEXT error package_name : PKG_NAME error pkg_attribution_text : PKG_ATTRIBUTION_TEXT error description : PKG_DESCRIPTION error pkg_comment : PKG_COMMENT error summary : PKG_SUMMARY error pkg_copyright_text : PKG_COPYRIGHT_TEXT error pkg_external_ref : PKG_EXTERNAL_REF error pkg_license_comment : PKG_LICENSE_COMMENT error pkg_license_declared : PKG_LICENSE_DECLARED error pkg_license_info : PKG_LICENSE_INFO error pkg_license_concluded : PKG_LICENSE_CONCLUDED error source_info : PKG_SOURCE_INFO error homepage : PKG_HOMEPAGE error pkg_checksum : PKG_CHECKSUM error verification_code : PKG_VERIFICATION_CODE error originator : PKG_ORIGINATOR error download_location : PKG_DOWNLOAD_LOCATION error files_analyzed : PKG_FILES_ANALYZED error supplier : PKG_SUPPLIER error pkg_file_name : PKG_FILE_NAME error package_version : PKG_VERSION error primary_package_purpose : PRIMARY_PACKAGE_PURPOSE error built_date : BUILT_DATE error release_date : RELEASE_DATE error valid_until_date : VALID_UNTIL_DATE error snippet_spdx_id : SNIPPET_SPDX_ID error snippet_name : SNIPPET_NAME error snippet_comment : SNIPPET_COMMENT error snippet_attribution_text : SNIPPET_ATTRIBUTION_TEXT error snippet_copyright_text : SNIPPET_COPYRIGHT_TEXT error snippet_license_comment : SNIPPET_LICENSE_COMMENT error file_spdx_id : SNIPPET_FILE_SPDXID error snippet_license_concluded : SNIPPET_LICENSE_CONCLUDED error snippet_license_info : SNIPPET_LICENSE_INFO error snippet_byte_range : SNIPPET_BYTE_RANGE error snippet_line_range : SNIPPET_LINE_RANGE error annotator : ANNOTATOR error annotation_date : ANNOTATION_DATE error annotation_comment : ANNOTATION_COMMENT error annotation_type : ANNOTATION_TYPE error annotation_spdx_id : ANNOTATION_SPDX_ID error relationship : RELATIONSHIP error

@grammar_rule('license_name : LICENSE_NAME line_or_no_assertion\n extracted_text : LICENSE_TEXT text_or_line\n lic_comment : LICENSE_COMMENT text_or_line\n license_id : LICENSE_ID LINE\n file_name : FILE_NAME LINE \n file_notice : FILE_NOTICE text_or_line\n file_copyright_text : FILE_COPYRIGHT_TEXT line_or_no_assertion_or_none\n file_license_comment : FILE_LICENSE_COMMENT text_or_line\n file_comment : FILE_COMMENT text_or_line\n file_license_concluded : FILE_LICENSE_CONCLUDED license_or_no_assertion_or_none\n package_name : PKG_NAME LINE\n description : PKG_DESCRIPTION text_or_line\n summary : PKG_SUMMARY text_or_line\n source_info : PKG_SOURCE_INFO text_or_line\n homepage : PKG_HOMEPAGE line_or_no_assertion_or_none\n download_location : PKG_DOWNLOAD_LOCATION line_or_no_assertion_or_none\n originator : PKG_ORIGINATOR actor_or_no_assertion\n supplier : PKG_SUPPLIER actor_or_no_assertion\n pkg_comment : PKG_COMMENT text_or_line\n pkg_copyright_text : PKG_COPYRIGHT_TEXT line_or_no_assertion_or_none\n pkg_license_declared : PKG_LICENSE_DECLARED license_or_no_assertion_or_none\n pkg_file_name : PKG_FILE_NAME LINE\n pkg_license_concluded : PKG_LICENSE_CONCLUDED license_or_no_assertion_or_none\n package_version : PKG_VERSION LINE\n pkg_license_comment : PKG_LICENSE_COMMENT text_or_line\n snippet_spdx_id : SNIPPET_SPDX_ID LINE\n snippet_name : SNIPPET_NAME LINE\n snippet_comment : SNIPPET_COMMENT text_or_line\n snippet_copyright_text : SNIPPET_COPYRIGHT_TEXT line_or_no_assertion_or_none\n snippet_license_comment : SNIPPET_LICENSE_COMMENT text_or_line\n file_spdx_id : SNIPPET_FILE_SPDXID LINE\n snippet_license_concluded : SNIPPET_LICENSE_CONCLUDED license_or_no_assertion_or_none\n annotation_spdx_id : ANNOTATION_SPDX_ID LINE\n annotation_comment : ANNOTATION_COMMENT text_or_line')
def p_generic_value(self, p):
175    @grammar_rule(
176        "license_name : LICENSE_NAME line_or_no_assertion\n extracted_text : LICENSE_TEXT text_or_line\n "
177        "lic_comment : LICENSE_COMMENT text_or_line\n license_id : LICENSE_ID LINE\n "
178        "file_name : FILE_NAME LINE \n file_notice : FILE_NOTICE text_or_line\n "
179        "file_copyright_text : FILE_COPYRIGHT_TEXT line_or_no_assertion_or_none\n "
180        "file_license_comment : FILE_LICENSE_COMMENT text_or_line\n "
181        "file_comment : FILE_COMMENT text_or_line\n "
182        "file_license_concluded : FILE_LICENSE_CONCLUDED license_or_no_assertion_or_none\n "
183        "package_name : PKG_NAME LINE\n description : PKG_DESCRIPTION text_or_line\n "
184        "summary : PKG_SUMMARY text_or_line\n source_info : PKG_SOURCE_INFO text_or_line\n "
185        "homepage : PKG_HOMEPAGE line_or_no_assertion_or_none\n "
186        "download_location : PKG_DOWNLOAD_LOCATION line_or_no_assertion_or_none\n "
187        "originator : PKG_ORIGINATOR actor_or_no_assertion\n supplier : PKG_SUPPLIER actor_or_no_assertion\n "
188        "pkg_comment : PKG_COMMENT text_or_line\n "
189        "pkg_copyright_text : PKG_COPYRIGHT_TEXT line_or_no_assertion_or_none\n "
190        "pkg_license_declared : PKG_LICENSE_DECLARED license_or_no_assertion_or_none\n "
191        "pkg_file_name : PKG_FILE_NAME LINE\n "
192        "pkg_license_concluded : PKG_LICENSE_CONCLUDED license_or_no_assertion_or_none\n "
193        "package_version : PKG_VERSION LINE\n pkg_license_comment : PKG_LICENSE_COMMENT text_or_line\n "
194        "snippet_spdx_id : SNIPPET_SPDX_ID LINE\n snippet_name : SNIPPET_NAME LINE\n "
195        "snippet_comment : SNIPPET_COMMENT text_or_line\n "
196        "snippet_copyright_text : SNIPPET_COPYRIGHT_TEXT line_or_no_assertion_or_none\n "
197        "snippet_license_comment : SNIPPET_LICENSE_COMMENT text_or_line\n "
198        "file_spdx_id : SNIPPET_FILE_SPDXID LINE\n "
199        "snippet_license_concluded : SNIPPET_LICENSE_CONCLUDED license_or_no_assertion_or_none\n "
200        "annotation_spdx_id : ANNOTATION_SPDX_ID LINE\n "
201        "annotation_comment : ANNOTATION_COMMENT text_or_line"
202    )
203    def p_generic_value(self, p):
204        if p[1] in ELEMENT_EXPECTED_START_TAG.values():
205            self.initialize_new_current_element(TAG_DATA_MODEL_FIELD[p[1]][0])
206        if self.check_that_current_element_matches_class_for_value(TAG_DATA_MODEL_FIELD[p[1]][0], p.lineno(1)):
207            set_value(p, self.current_element)

license_name : LICENSE_NAME line_or_no_assertion extracted_text : LICENSE_TEXT text_or_line lic_comment : LICENSE_COMMENT text_or_line license_id : LICENSE_ID LINE file_name : FILE_NAME LINE file_notice : FILE_NOTICE text_or_line file_copyright_text : FILE_COPYRIGHT_TEXT line_or_no_assertion_or_none file_license_comment : FILE_LICENSE_COMMENT text_or_line file_comment : FILE_COMMENT text_or_line file_license_concluded : FILE_LICENSE_CONCLUDED license_or_no_assertion_or_none package_name : PKG_NAME LINE description : PKG_DESCRIPTION text_or_line summary : PKG_SUMMARY text_or_line source_info : PKG_SOURCE_INFO text_or_line homepage : PKG_HOMEPAGE line_or_no_assertion_or_none download_location : PKG_DOWNLOAD_LOCATION line_or_no_assertion_or_none originator : PKG_ORIGINATOR actor_or_no_assertion supplier : PKG_SUPPLIER actor_or_no_assertion pkg_comment : PKG_COMMENT text_or_line pkg_copyright_text : PKG_COPYRIGHT_TEXT line_or_no_assertion_or_none pkg_license_declared : PKG_LICENSE_DECLARED license_or_no_assertion_or_none pkg_file_name : PKG_FILE_NAME LINE pkg_license_concluded : PKG_LICENSE_CONCLUDED license_or_no_assertion_or_none package_version : PKG_VERSION LINE pkg_license_comment : PKG_LICENSE_COMMENT text_or_line snippet_spdx_id : SNIPPET_SPDX_ID LINE snippet_name : SNIPPET_NAME LINE snippet_comment : SNIPPET_COMMENT text_or_line snippet_copyright_text : SNIPPET_COPYRIGHT_TEXT line_or_no_assertion_or_none snippet_license_comment : SNIPPET_LICENSE_COMMENT text_or_line file_spdx_id : SNIPPET_FILE_SPDXID LINE snippet_license_concluded : SNIPPET_LICENSE_CONCLUDED license_or_no_assertion_or_none annotation_spdx_id : ANNOTATION_SPDX_ID LINE annotation_comment : ANNOTATION_COMMENT text_or_line

@grammar_rule('unknown_tag : UNKNOWN_TAG text_or_line\n | UNKNOWN_TAG ISO8601_DATE\n | UNKNOWN_TAG PERSON_VALUE \n| UNKNOWN_TAG')
def p_unknown_tag(self, p):
209    @grammar_rule(
210        "unknown_tag : UNKNOWN_TAG text_or_line\n | UNKNOWN_TAG ISO8601_DATE\n | UNKNOWN_TAG PERSON_VALUE \n"
211        "| UNKNOWN_TAG"
212    )
213    def p_unknown_tag(self, p):
214        self.logger.append(f"Unknown tag provided in line {p.lineno(1)}")

unknown_tag : UNKNOWN_TAG text_or_line | UNKNOWN_TAG ISO8601_DATE | UNKNOWN_TAG PERSON_VALUE | UNKNOWN_TAG

@grammar_rule('text_or_line : TEXT')
def p_text(self, p):
216    @grammar_rule("text_or_line : TEXT")
217    def p_text(self, p):
218        p[0] = str_from_text(p[1])

text_or_line : TEXT

@grammar_rule('text_or_line : LINE\n line_or_no_assertion : LINE\nline_or_no_assertion_or_none : text_or_line')
def p_line(self, p):
220    @grammar_rule("text_or_line : LINE\n line_or_no_assertion : LINE\nline_or_no_assertion_or_none : text_or_line")
221    def p_line(self, p):
222        p[0] = p[1]

text_or_line : LINE line_or_no_assertion : LINE line_or_no_assertion_or_none : text_or_line

@grammar_rule('license_or_no_assertion_or_none : NO_ASSERTION\n actor_or_no_assertion : NO_ASSERTION\nline_or_no_assertion : NO_ASSERTION\n line_or_no_assertion_or_none : NO_ASSERTION')
def p_no_assertion(self, p):
224    @grammar_rule(
225        "license_or_no_assertion_or_none : NO_ASSERTION\n actor_or_no_assertion : NO_ASSERTION\n"
226        "line_or_no_assertion : NO_ASSERTION\n line_or_no_assertion_or_none : NO_ASSERTION"
227    )
228    def p_no_assertion(self, p):
229        p[0] = SpdxNoAssertion()

license_or_no_assertion_or_none : NO_ASSERTION actor_or_no_assertion : NO_ASSERTION line_or_no_assertion : NO_ASSERTION line_or_no_assertion_or_none : NO_ASSERTION

@grammar_rule('license_or_no_assertion_or_none : NONE\n line_or_no_assertion_or_none : NONE')
def p_none(self, p):
231    @grammar_rule("license_or_no_assertion_or_none : NONE\n line_or_no_assertion_or_none : NONE")
232    def p_none(self, p):
233        p[0] = SpdxNone()

license_or_no_assertion_or_none : NONE line_or_no_assertion_or_none : NONE

@grammar_rule('license_or_no_assertion_or_none : LINE')
def p_license(self, p):
235    @grammar_rule("license_or_no_assertion_or_none : LINE")
236    def p_license(self, p):
237        try:
238            p[0] = get_spdx_licensing().parse(p[1])
239        except ExpressionError as err:
240            error_message = f"Error while parsing license expression: {p[1]}"
241            if err.args:
242                error_message += f": {err.args[0]}"
243            self.current_element["logger"].append(error_message)

license_or_no_assertion_or_none : LINE

@grammar_rule('actor_or_no_assertion : PERSON_VALUE\n | ORGANIZATION_VALUE')
def p_actor_values(self, p):
245    @grammar_rule("actor_or_no_assertion : PERSON_VALUE\n | ORGANIZATION_VALUE")
246    def p_actor_values(self, p):
247        p[0] = ActorParser.parse_actor(p[1])

actor_or_no_assertion : PERSON_VALUE | ORGANIZATION_VALUE

@grammar_rule('spdx_id : SPDX_ID LINE')
def p_spdx_id(self, p):
249    @grammar_rule("spdx_id : SPDX_ID LINE")
250    def p_spdx_id(self, p):
251        # As all SPDX Ids share the same tag, there is no knowing which spdx_id belongs to the document.
252        # We assume that to be the first spdx_id we encounter. As the specification does not explicitly require this,
253        # our approach might lead to unwanted behavior when the document's SPDX Id is defined later in the document.
254        if "spdx_id" in self.creation_info:
255            self.current_element["spdx_id"] = p[2]
256        else:
257            self.creation_info["spdx_id"] = p[2]

spdx_id : SPDX_ID LINE

@grammar_rule('license_list_version : LICENSE_LIST_VERSION error\n document_comment : DOC_COMMENT error\n document_namespace : DOC_NAMESPACE error\n data_license : DOC_LICENSE error\n doc_name : DOC_NAME error\n ext_doc_ref : EXT_DOC_REF error\n spdx_version : DOC_VERSION error\n creator_comment : CREATOR_COMMENT error\n creator : CREATOR error\n created : CREATED error')
def p_creation_info_value_error(self, p):
261    @grammar_rule(
262        "license_list_version : LICENSE_LIST_VERSION error\n document_comment : DOC_COMMENT error\n "
263        "document_namespace : DOC_NAMESPACE error\n data_license : DOC_LICENSE error\n "
264        "doc_name : DOC_NAME error\n ext_doc_ref : EXT_DOC_REF error\n spdx_version : DOC_VERSION error\n "
265        "creator_comment : CREATOR_COMMENT error\n creator : CREATOR error\n created : CREATED error"
266    )
267    def p_creation_info_value_error(self, p):
268        self.creation_info["logger"].append(
269            f"Error while parsing {p[1]}: Token did not match specified grammar rule. Line: {p.lineno(1)}"
270        )

license_list_version : LICENSE_LIST_VERSION error document_comment : DOC_COMMENT error document_namespace : DOC_NAMESPACE error data_license : DOC_LICENSE error doc_name : DOC_NAME error ext_doc_ref : EXT_DOC_REF error spdx_version : DOC_VERSION error creator_comment : CREATOR_COMMENT error creator : CREATOR error created : CREATED error

@grammar_rule('document_comment : DOC_COMMENT text_or_line\n document_namespace : DOC_NAMESPACE LINE\n data_license : DOC_LICENSE LINE\n spdx_version : DOC_VERSION LINE\n creator_comment : CREATOR_COMMENT text_or_line\n doc_name : DOC_NAME LINE')
def p_generic_value_creation_info(self, p):
272    @grammar_rule(
273        "document_comment : DOC_COMMENT text_or_line\n document_namespace : DOC_NAMESPACE LINE\n "
274        "data_license : DOC_LICENSE LINE\n spdx_version : DOC_VERSION LINE\n "
275        "creator_comment : CREATOR_COMMENT text_or_line\n doc_name : DOC_NAME LINE"
276    )
277    def p_generic_value_creation_info(self, p):
278        set_value(p, self.creation_info)

document_comment : DOC_COMMENT text_or_line document_namespace : DOC_NAMESPACE LINE data_license : DOC_LICENSE LINE spdx_version : DOC_VERSION LINE creator_comment : CREATOR_COMMENT text_or_line doc_name : DOC_NAME LINE

@grammar_rule('license_list_version : LICENSE_LIST_VERSION LINE')
def p_license_list_version(self, p):
280    @grammar_rule("license_list_version : LICENSE_LIST_VERSION LINE")
281    def p_license_list_version(self, p):
282        set_value(p, self.creation_info, method_to_apply=Version.from_string)

license_list_version : LICENSE_LIST_VERSION LINE

@grammar_rule('ext_doc_ref : EXT_DOC_REF LINE')
def p_external_document_ref(self, p):
284    @grammar_rule("ext_doc_ref : EXT_DOC_REF LINE")
285    def p_external_document_ref(self, p):
286        external_doc_ref_regex = re.compile(r"(.*)(\s*SHA1:\s*[a-f0-9]{40})")
287        external_doc_ref_match = external_doc_ref_regex.match(p[2])
288        if not external_doc_ref_match:
289            self.creation_info["logger"].append(
290                f"Error while parsing ExternalDocumentRef: Couldn't match Checksum. Line: {p.lineno(1)}"
291            )
292            return
293        try:
294            document_ref_id, document_uri = external_doc_ref_match.group(1).strip().split(" ")
295        except ValueError:
296            self.creation_info["logger"].append(
297                f"Error while parsing ExternalDocumentRef: Couldn't split the first part of the value into "
298                f"document_ref_id and document_uri. Line: {p.lineno(1)}"
299            )
300            return
301        checksum = parse_checksum(external_doc_ref_match.group(2).strip())
302        external_document_ref = ExternalDocumentRef(document_ref_id, document_uri, checksum)
303        self.creation_info.setdefault("external_document_refs", []).append(external_document_ref)

ext_doc_ref : EXT_DOC_REF LINE

@grammar_rule('creator : CREATOR PERSON_VALUE\n| CREATOR TOOL_VALUE\n| CREATOR ORGANIZATION_VALUE')
def p_creator(self, p):
305    @grammar_rule("creator : CREATOR PERSON_VALUE\n| CREATOR TOOL_VALUE\n| CREATOR ORGANIZATION_VALUE")
306    def p_creator(self, p):
307        self.creation_info.setdefault("creators", []).append(ActorParser.parse_actor(p[2]))

creator : CREATOR PERSON_VALUE | CREATOR TOOL_VALUE | CREATOR ORGANIZATION_VALUE

@grammar_rule('created : CREATED ISO8601_DATE')
def p_created(self, p):
309    @grammar_rule("created : CREATED ISO8601_DATE")
310    def p_created(self, p):
311        set_value(p, self.creation_info, method_to_apply=datetime_from_str)

created : CREATED ISO8601_DATE

@grammar_rule('license_cross_ref : LICENSE_CROSS_REF LINE')
def p_extracted_cross_reference(self, p):
315    @grammar_rule("license_cross_ref : LICENSE_CROSS_REF LINE")
316    def p_extracted_cross_reference(self, p):
317        if self.check_that_current_element_matches_class_for_value(ExtractedLicensingInfo, p.lineno(1)):
318            self.current_element.setdefault("cross_references", []).append(p[2])

license_cross_ref : LICENSE_CROSS_REF LINE

@grammar_rule('file_contributor : FILE_CONTRIBUTOR LINE')
def p_file_contributor(self, p):
322    @grammar_rule("file_contributor : FILE_CONTRIBUTOR LINE")
323    def p_file_contributor(self, p):
324        if self.check_that_current_element_matches_class_for_value(File, p.lineno(1)):
325            self.current_element.setdefault("contributors", []).append(p[2])

file_contributor : FILE_CONTRIBUTOR LINE

@grammar_rule('file_attribution_text : FILE_ATTRIBUTION_TEXT text_or_line')
def p_file_attribution_text(self, p):
327    @grammar_rule("file_attribution_text : FILE_ATTRIBUTION_TEXT text_or_line")
328    def p_file_attribution_text(self, p):
329        if self.check_that_current_element_matches_class_for_value(File, p.lineno(1)):
330            self.current_element.setdefault("attribution_texts", []).append(p[2])

file_attribution_text : FILE_ATTRIBUTION_TEXT text_or_line

@grammar_rule('file_license_info : FILE_LICENSE_INFO license_or_no_assertion_or_none')
def p_file_license_info(self, p):
332    @grammar_rule("file_license_info : FILE_LICENSE_INFO license_or_no_assertion_or_none")
333    def p_file_license_info(self, p):
334        if self.check_that_current_element_matches_class_for_value(File, p.lineno(1)):
335            self.current_element.setdefault("license_info_in_file", []).append(p[2])

file_license_info : FILE_LICENSE_INFO license_or_no_assertion_or_none

@grammar_rule('file_type : FILE_TYPE LINE')
def p_file_type(self, p):
337    @grammar_rule("file_type : FILE_TYPE LINE")
338    def p_file_type(self, p):
339        if not self.check_that_current_element_matches_class_for_value(File, p.lineno(1)):
340            return
341        try:
342            file_type = FileType[p[2].strip()]
343        except KeyError:
344            self.current_element["logger"].append(f"Invalid FileType: {p[2]}. Line {p.lineno(1)}")
345            return
346        self.current_element.setdefault("file_types", []).append(file_type)

file_type : FILE_TYPE LINE

@grammar_rule('file_checksum : FILE_CHECKSUM CHECKSUM')
def p_file_checksum(self, p):
348    @grammar_rule("file_checksum : FILE_CHECKSUM CHECKSUM")
349    def p_file_checksum(self, p):
350        if not self.check_that_current_element_matches_class_for_value(File, p.lineno(1)):
351            return
352        checksum = parse_checksum(p[2])
353        self.current_element.setdefault("checksums", []).append(checksum)

file_checksum : FILE_CHECKSUM CHECKSUM

@grammar_rule('pkg_attribution_text : PKG_ATTRIBUTION_TEXT text_or_line')
def p_pkg_attribution_text(self, p):
357    @grammar_rule("pkg_attribution_text : PKG_ATTRIBUTION_TEXT text_or_line")
358    def p_pkg_attribution_text(self, p):
359        self.check_that_current_element_matches_class_for_value(Package, p.lineno(1))
360        self.current_element.setdefault("attribution_texts", []).append(p[2])

pkg_attribution_text : PKG_ATTRIBUTION_TEXT text_or_line

@grammar_rule('pkg_external_ref : PKG_EXTERNAL_REF LINE PKG_EXTERNAL_REF_COMMENT text_or_line\n | PKG_EXTERNAL_REF LINE')
def p_pkg_external_refs(self, p):
362    @grammar_rule(
363        "pkg_external_ref : PKG_EXTERNAL_REF LINE PKG_EXTERNAL_REF_COMMENT text_or_line\n | PKG_EXTERNAL_REF LINE"
364    )
365    def p_pkg_external_refs(self, p):
366        if not self.check_that_current_element_matches_class_for_value(Package, p.lineno(1)):
367            return
368        try:
369            category, reference_type, locator = p[2].split(" ")
370        except ValueError:
371            self.current_element["logger"].append(
372                f"Couldn't split PackageExternalRef in category, reference_type and locator. Line: {p.lineno(1)}"
373            )
374            return
375        comment = None
376        if len(p) == 5:
377            comment = p[4]
378        try:
379            category = ExternalPackageRefCategory[category.replace("-", "_")]
380        except KeyError:
381            self.current_element["logger"].append(
382                f"Invalid ExternalPackageRefCategory: {category}. Line: {p.lineno(1)}"
383            )
384            return
385        try:
386            external_package_ref = construct_or_raise_parsing_error(
387                ExternalPackageRef,
388                {"category": category, "reference_type": reference_type, "locator": locator, "comment": comment},
389            )
390        except SPDXParsingError as err:
391            self.current_element["logger"].append(err.get_messages())
392            return
393        self.current_element.setdefault("external_references", []).append(external_package_ref)

pkg_external_ref : PKG_EXTERNAL_REF LINE PKG_EXTERNAL_REF_COMMENT text_or_line | PKG_EXTERNAL_REF LINE

@grammar_rule('pkg_license_info : PKG_LICENSE_INFO license_or_no_assertion_or_none')
def p_pkg_license_info_from_file(self, p):
395    @grammar_rule("pkg_license_info : PKG_LICENSE_INFO license_or_no_assertion_or_none")
396    def p_pkg_license_info_from_file(self, p):
397        if self.check_that_current_element_matches_class_for_value(Package, p.lineno(1)):
398            self.current_element.setdefault("license_info_from_files", []).append(p[2])

pkg_license_info : PKG_LICENSE_INFO license_or_no_assertion_or_none

@grammar_rule('pkg_checksum : PKG_CHECKSUM CHECKSUM')
def p_pkg_checksum(self, p):
400    @grammar_rule("pkg_checksum : PKG_CHECKSUM CHECKSUM")
401    def p_pkg_checksum(self, p):
402        if not self.check_that_current_element_matches_class_for_value(Package, p.lineno(1)):
403            return
404        checksum = parse_checksum(p[2])
405        self.current_element.setdefault("checksums", []).append(checksum)

pkg_checksum : PKG_CHECKSUM CHECKSUM

@grammar_rule('verification_code : PKG_VERIFICATION_CODE LINE')
def p_pkg_verification_code(self, p):
407    @grammar_rule("verification_code : PKG_VERIFICATION_CODE LINE")
408    def p_pkg_verification_code(self, p):
409        if not self.check_that_current_element_matches_class_for_value(Package, p.lineno(1)):
410            return
411
412        if "verification_code" in self.current_element:
413            self.current_element["logger"].append(f"Multiple values for {p[1]} found. Line: {p.lineno(1)}")
414            return
415        verif_code_regex = re.compile(r"([0-9a-f]{40})\s*(\(excludes:\s*(.+)\))?", re.UNICODE)
416        verif_code_code_grp = 1
417        verif_code_exc_files_grp = 3
418        match = verif_code_regex.match(p[2])
419        if not match:
420            self.current_element["logger"].append(
421                f"Error while parsing {p[1]}: Value did not match expected format. Line: {p.lineno(1)}"
422            )
423            return
424        value = match.group(verif_code_code_grp)
425        excluded_files = None
426        if match.group(verif_code_exc_files_grp):
427            excluded_files = match.group(verif_code_exc_files_grp).split(",")
428        self.current_element["verification_code"] = PackageVerificationCode(value, excluded_files)

verification_code : PKG_VERIFICATION_CODE LINE

@grammar_rule('files_analyzed : PKG_FILES_ANALYZED LINE')
def p_pkg_files_analyzed(self, p):
430    @grammar_rule("files_analyzed : PKG_FILES_ANALYZED LINE")
431    def p_pkg_files_analyzed(self, p):
432        if not self.check_that_current_element_matches_class_for_value(Package, p.lineno(1)):
433            return
434        if "files_analyzed" in self.current_element:
435            self.current_element["logger"].append(f"Multiple values for {p[1]} found. Line: {p.lineno(1)}")
436            return
437        if p[2] == "true":
438            self.current_element["files_analyzed"] = True
439        elif p[2] == "false":
440            self.current_element["files_analyzed"] = False
441        else:
442            self.current_element["logger"].append(
443                f'The value of FilesAnalyzed must be either "true" or "false", but is: {p[2]}'
444            )

files_analyzed : PKG_FILES_ANALYZED LINE

@grammar_rule('primary_package_purpose : PRIMARY_PACKAGE_PURPOSE LINE')
def p_primary_package_purpose(self, p):
446    @grammar_rule("primary_package_purpose : PRIMARY_PACKAGE_PURPOSE LINE")
447    def p_primary_package_purpose(self, p):
448        if self.check_that_current_element_matches_class_for_value(Package, p.lineno(1)):
449            set_value(p, self.current_element, method_to_apply=lambda x: PackagePurpose[x.replace("-", "_")])

primary_package_purpose : PRIMARY_PACKAGE_PURPOSE LINE

@grammar_rule('built_date : BUILT_DATE ISO8601_DATE\n release_date : RELEASE_DATE ISO8601_DATE\n valid_until_date : VALID_UNTIL_DATE ISO8601_DATE')
def p_package_dates(self, p):
451    @grammar_rule(
452        "built_date : BUILT_DATE ISO8601_DATE\n release_date : RELEASE_DATE ISO8601_DATE\n "
453        "valid_until_date : VALID_UNTIL_DATE ISO8601_DATE"
454    )
455    def p_package_dates(self, p):
456        if self.check_that_current_element_matches_class_for_value(Package, p.lineno(1)):
457            set_value(p, self.current_element, method_to_apply=datetime_from_str)

built_date : BUILT_DATE ISO8601_DATE release_date : RELEASE_DATE ISO8601_DATE valid_until_date : VALID_UNTIL_DATE ISO8601_DATE

@grammar_rule('snippet_attribution_text : SNIPPET_ATTRIBUTION_TEXT text_or_line')
def p_snippet_attribution_text(self, p):
461    @grammar_rule("snippet_attribution_text : SNIPPET_ATTRIBUTION_TEXT text_or_line")
462    def p_snippet_attribution_text(self, p):
463        if self.check_that_current_element_matches_class_for_value(Snippet, p.lineno(1)):
464            self.current_element.setdefault("attribution_texts", []).append(p[2])

snippet_attribution_text : SNIPPET_ATTRIBUTION_TEXT text_or_line

@grammar_rule('snippet_license_info : SNIPPET_LICENSE_INFO license_or_no_assertion_or_none')
def p_snippet_license_info(self, p):
466    @grammar_rule("snippet_license_info : SNIPPET_LICENSE_INFO license_or_no_assertion_or_none")
467    def p_snippet_license_info(self, p):
468        if self.check_that_current_element_matches_class_for_value(Snippet, p.lineno(1)):
469            self.current_element.setdefault("license_info_in_snippet", []).append(p[2])

snippet_license_info : SNIPPET_LICENSE_INFO license_or_no_assertion_or_none

@grammar_rule('snippet_byte_range : SNIPPET_BYTE_RANGE LINE\n snippet_line_range : SNIPPET_LINE_RANGE LINE')
def p_snippet_range(self, p):
471    @grammar_rule("snippet_byte_range : SNIPPET_BYTE_RANGE LINE\n snippet_line_range : SNIPPET_LINE_RANGE LINE")
472    def p_snippet_range(self, p):
473        if not self.check_that_current_element_matches_class_for_value(Snippet, p.lineno(1)):
474            return
475
476        argument_name = TAG_DATA_MODEL_FIELD[p[1]][1]
477        if argument_name in self.current_element:
478            self.current_element["logger"].append(f"Multiple values for {p[1]} found. Line: {p.lineno(1)}")
479            return
480        range_re = re.compile(r"^(\d+):(\d+)$", re.UNICODE)
481        if not range_re.match(p[2].strip()):
482            self.current_element["logger"].append(
483                f"Value for {p[1]} doesn't match valid range pattern. " f"Line: {p.lineno(1)}"
484            )
485            return
486        startpoint = int(p[2].split(":")[0])
487        endpoint = int(p[2].split(":")[-1])
488        self.current_element[argument_name] = startpoint, endpoint

snippet_byte_range : SNIPPET_BYTE_RANGE LINE snippet_line_range : SNIPPET_LINE_RANGE LINE

@grammar_rule('annotator : ANNOTATOR PERSON_VALUE\n| ANNOTATOR TOOL_VALUE\n| ANNOTATOR ORGANIZATION_VALUE')
def p_annotator(self, p):
492    @grammar_rule("annotator : ANNOTATOR PERSON_VALUE\n| ANNOTATOR TOOL_VALUE\n| ANNOTATOR ORGANIZATION_VALUE")
493    def p_annotator(self, p):
494        self.initialize_new_current_element(Annotation)
495        set_value(p, self.current_element, method_to_apply=ActorParser.parse_actor)

annotator : ANNOTATOR PERSON_VALUE | ANNOTATOR TOOL_VALUE | ANNOTATOR ORGANIZATION_VALUE

@grammar_rule('annotation_date : ANNOTATION_DATE ISO8601_DATE')
def p_annotation_date(self, p):
497    @grammar_rule("annotation_date : ANNOTATION_DATE ISO8601_DATE")
498    def p_annotation_date(self, p):
499        if self.check_that_current_element_matches_class_for_value(Annotation, p.lineno(1)):
500            set_value(p, self.current_element, method_to_apply=datetime_from_str)

annotation_date : ANNOTATION_DATE ISO8601_DATE

@grammar_rule('annotation_type : ANNOTATION_TYPE LINE')
def p_annotation_type(self, p):
502    @grammar_rule("annotation_type : ANNOTATION_TYPE LINE")
503    def p_annotation_type(self, p):
504        if self.check_that_current_element_matches_class_for_value(Annotation, p.lineno(1)):
505            set_value(p, self.current_element, method_to_apply=lambda x: AnnotationType[x])

annotation_type : ANNOTATION_TYPE LINE

@grammar_rule('relationship : RELATIONSHIP LINE RELATIONSHIP_COMMENT text_or_line\n | RELATIONSHIP LINE')
def p_relationship(self, p):
509    @grammar_rule("relationship : RELATIONSHIP LINE RELATIONSHIP_COMMENT text_or_line\n " "| RELATIONSHIP LINE")
510    def p_relationship(self, p):
511        self.initialize_new_current_element(Relationship)
512        try:
513            spdx_element_id, relationship_type, related_spdx_element_id = p[2].split(" ")
514        except ValueError:
515            self.current_element["logger"].append(
516                f"Relationship couldn't be split in spdx_element_id, relationship_type and "
517                f"related_spdx_element. Line: {p.lineno(1)}"
518            )
519            return
520        try:
521            self.current_element["relationship_type"] = RelationshipType[relationship_type]
522        except KeyError:
523            self.current_element["logger"].append(f"Invalid RelationshipType {relationship_type}. Line: {p.lineno(1)}")
524        if related_spdx_element_id == "NONE":
525            related_spdx_element_id = SpdxNone()
526        if related_spdx_element_id == "NOASSERTION":
527            related_spdx_element_id = SpdxNoAssertion()
528        self.current_element["related_spdx_element_id"] = related_spdx_element_id
529        self.current_element["spdx_element_id"] = spdx_element_id
530        if len(p) == 5:
531            self.current_element["comment"] = p[4]

relationship : RELATIONSHIP LINE RELATIONSHIP_COMMENT text_or_line | RELATIONSHIP LINE

def p_error(self, p):
533    def p_error(self, p):
534        pass
def parse(self, text):
536    def parse(self, text):
537        # entry point for the tag-value parser
538        self.yacc.parse(text, lexer=self.lex)
539        # this constructs the last remaining element; all other elements are constructed at the start of
540        # their subsequent element
541        self.construct_current_element()
542
543        # To be able to parse creation info values if they appear in between other elements, e.g. packages, we use
544        # two different dictionaries to collect the creation info and all other elements. Therefore, we have a separate
545        # logger for the creation info whose messages we need to add to the main logger to than raise all collected
546        # messages at once.
547        creation_info_logger = self.creation_info.pop("logger")
548        if creation_info_logger.has_messages():
549            self.logger.extend([f"Error while parsing CreationInfo: {creation_info_logger.get_messages()}"])
550
551        raise_parsing_error_if_logger_has_messages(self.logger)
552        creation_info = construct_or_raise_parsing_error(CreationInfo, self.creation_info)
553        self.elements_built["creation_info"] = creation_info
554        document = construct_or_raise_parsing_error(Document, self.elements_built)
555        return document
def initialize_new_current_element(self, clazz: Any):
557    def initialize_new_current_element(self, clazz: Any):
558        self.construct_current_element()
559        self.current_element["class"] = clazz
def check_that_current_element_matches_class_for_value(self, expected_class, line_number) -> bool:
561    def check_that_current_element_matches_class_for_value(self, expected_class, line_number) -> bool:
562        if "class" not in self.current_element or expected_class != self.current_element["class"]:
563            self.logger.append(
564                f"Element {expected_class.__name__} is not the current element in scope, probably the expected tag to "
565                f"start the element ({ELEMENT_EXPECTED_START_TAG[expected_class.__name__]}) is missing. "
566                f"Line: {line_number}"
567            )
568            return False
569        return True
def construct_current_element(self):
571    def construct_current_element(self):
572        if "class" not in self.current_element:
573            # This happens when the first element is initialized via initialize_new_current_element() or if the first
574            # element is missing its expected starting tag. In both cases we are unable to construct an element.
575            return
576
577        clazz = self.current_element.pop("class")
578        try:
579            raise_parsing_error_if_logger_has_messages(self.current_element.pop("logger"), clazz.__name__)
580            self.elements_built.setdefault(CLASS_MAPPING[clazz.__name__], []).append(
581                construct_or_raise_parsing_error(clazz, self.current_element)
582            )
583            if clazz == File:
584                self.check_for_preceding_package_and_build_contains_relationship()
585        except SPDXParsingError as err:
586            self.logger.extend(err.get_messages())
587        self.current_element = {"logger": Logger()}
def check_for_preceding_package_and_build_contains_relationship(self):
589    def check_for_preceding_package_and_build_contains_relationship(self):
590        file_spdx_id = self.current_element["spdx_id"]
591        if "packages" not in self.elements_built:
592            return
593        # We assume that all files that are not contained in a package precede any package information. Any file
594        # information that follows any package information is assigned to the last parsed package by creating a
595        # corresponding contains relationship.
596        # (see https://spdx.github.io/spdx-spec/v2.3/composition-of-an-SPDX-document/#5.2.2)
597        if not self.elements_built["packages"]:
598            self.logger.append(
599                f"Error while building contains relationship for file {file_spdx_id}, "
600                f"preceding package was not parsed successfully."
601            )
602            return
603        package_spdx_id = self.elements_built["packages"][-1].spdx_id
604        relationship = Relationship(package_spdx_id, RelationshipType.CONTAINS, file_spdx_id)
605        if relationship not in self.elements_built.setdefault("relationships", []):
606            self.elements_built["relationships"].append(relationship)