spdx_tools.spdx.parser.tagvalue.parser
1# Copyright (c) 2014 Ahmed H. Ismail 2# Copyright (c) 2023 spdx contributors 3# SPDX-License-Identifier: Apache-2.0 4# Licensed under the Apache License, Version 2.0 (the "License"); 5# you may not use this file except in compliance with the License. 6# You may obtain a copy of the License at 7# http://www.apache.org/licenses/LICENSE-2.0 8# Unless required by applicable law or agreed to in writing, software 9# distributed under the License is distributed on an "AS IS" BASIS, 10# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11# See the License for the specific language governing permissions and 12# limitations under the License. 13 14import re 15 16from beartype.typing import Any, Dict, List 17from license_expression import ExpressionError, get_spdx_licensing 18from ply import yacc 19from ply.yacc import LRParser 20 21from spdx_tools.spdx.datetime_conversions import datetime_from_str 22from spdx_tools.spdx.model import ( 23 Annotation, 24 AnnotationType, 25 CreationInfo, 26 Document, 27 ExternalDocumentRef, 28 ExternalPackageRef, 29 ExternalPackageRefCategory, 30 ExtractedLicensingInfo, 31 File, 32 FileType, 33 Package, 34 PackagePurpose, 35 PackageVerificationCode, 36 Relationship, 37 RelationshipType, 38 Snippet, 39 SpdxNoAssertion, 40 SpdxNone, 41 Version, 42) 43from spdx_tools.spdx.parser.actor_parser import ActorParser 44from spdx_tools.spdx.parser.error import SPDXParsingError 45from spdx_tools.spdx.parser.logger import Logger 46from spdx_tools.spdx.parser.parsing_functions import ( 47 construct_or_raise_parsing_error, 48 raise_parsing_error_if_logger_has_messages, 49) 50from spdx_tools.spdx.parser.tagvalue.helper_methods import ( 51 TAG_DATA_MODEL_FIELD, 52 grammar_rule, 53 parse_checksum, 54 set_value, 55 str_from_text, 56) 57from spdx_tools.spdx.parser.tagvalue.lexer import SPDXLexer 58 59CLASS_MAPPING = dict( 60 File="files", 61 Annotation="annotations", 62 Relationship="relationships", 63 Snippet="snippets", 64 Package="packages", 65 ExtractedLicensingInfo="extracted_licensing_info", 66) 67ELEMENT_EXPECTED_START_TAG = dict( 68 File="FileName", 69 Annotation="Annotator", 70 Relationship="Relationship", 71 Snippet="SnippetSPDXID", 72 Package="PackageName", 73 ExtractedLicensingInfo="LicenseID", 74) 75 76 77class Parser: 78 tokens: List[str] 79 logger: Logger 80 current_element: Dict[str, Any] 81 creation_info: Dict[str, Any] 82 elements_built: Dict[str, Any] 83 lex: SPDXLexer 84 yacc: LRParser 85 86 def __init__(self, **kwargs): 87 self.tokens = SPDXLexer.tokens 88 self.logger = Logger() 89 self.current_element = {"logger": Logger()} 90 self.creation_info = {"logger": Logger()} 91 self.elements_built = dict() 92 self.lex = SPDXLexer() 93 self.lex.build(reflags=re.UNICODE) 94 self.yacc = yacc.yacc(module=self, **kwargs) 95 96 @grammar_rule("start : start attrib ") 97 def p_start_start_attrib(self, p): 98 pass 99 100 @grammar_rule("start : attrib ") 101 def p_start_attrib(self, p): 102 pass 103 104 @grammar_rule( 105 "attrib : spdx_version\n| spdx_id\n| data_license\n| doc_name\n| document_comment\n| document_namespace\n| " 106 "creator\n| created\n| creator_comment\n| license_list_version\n| ext_doc_ref\n" 107 # attributes for file 108 "| file_name\n| file_type\n| file_checksum\n| file_license_concluded\n| file_license_info\n" 109 "| file_copyright_text\n| file_license_comment\n| file_attribution_text\n| file_notice\n| file_comment\n" 110 "| file_contributor\n" 111 # attributes for annotation 112 "| annotator\n| annotation_date\n| annotation_comment\n| annotation_type\n| annotation_spdx_id\n" 113 # attributes for relationship 114 "| relationship\n" 115 # attributes for snippet 116 "| snippet_spdx_id\n| snippet_name\n| snippet_comment\n| snippet_attribution_text\n| snippet_copyright_text\n" 117 "| snippet_license_comment\n| file_spdx_id\n| snippet_license_concluded\n| snippet_license_info\n" 118 "| snippet_byte_range\n| snippet_line_range\n" 119 # attributes for package 120 "| package_name\n| package_version\n| download_location\n| files_analyzed\n| homepage\n" 121 "| summary\n| source_info\n| pkg_file_name\n| supplier\n| originator\n| pkg_checksum\n" 122 "| verification_code\n| description\n| pkg_comment\n| pkg_attribution_text\n| pkg_license_declared\n" 123 "| pkg_license_concluded\n| pkg_license_info\n| pkg_license_comment\n| pkg_copyright_text\n" 124 "| pkg_external_ref\n| primary_package_purpose\n| built_date\n| release_date\n| valid_until_date\n" 125 # attributes for extracted licensing info 126 "| license_id\n| extracted_text\n| license_name\n| license_cross_ref\n| lic_comment\n" 127 "| unknown_tag " 128 ) 129 def p_attrib(self, p): 130 pass 131 132 # general parsing methods 133 @grammar_rule( 134 "license_id : LICENSE_ID error\n license_cross_ref : LICENSE_CROSS_REF error\n " 135 "lic_comment : LICENSE_COMMENT error\n license_name : LICENSE_NAME error\n " 136 "extracted_text : LICENSE_TEXT error\n " 137 "file_name : FILE_NAME error\n file_contributor : FILE_CONTRIBUTOR error\n " 138 "file_notice : FILE_NOTICE error\n file_copyright_text : FILE_COPYRIGHT_TEXT error\n " 139 "file_license_comment : FILE_LICENSE_COMMENT error\n " 140 "file_license_info : FILE_LICENSE_INFO error\n file_comment : FILE_COMMENT error\n " 141 "file_checksum : FILE_CHECKSUM error\n file_license_concluded : FILE_LICENSE_CONCLUDED error\n " 142 "file_type : FILE_TYPE error\n file_attribution_text : FILE_ATTRIBUTION_TEXT error\n " 143 "package_name : PKG_NAME error\n pkg_attribution_text : PKG_ATTRIBUTION_TEXT error\n " 144 "description : PKG_DESCRIPTION error\n pkg_comment : PKG_COMMENT error\n " 145 "summary : PKG_SUMMARY error\n pkg_copyright_text : PKG_COPYRIGHT_TEXT error\n " 146 "pkg_external_ref : PKG_EXTERNAL_REF error\n pkg_license_comment : PKG_LICENSE_COMMENT error\n " 147 "pkg_license_declared : PKG_LICENSE_DECLARED error\n pkg_license_info : PKG_LICENSE_INFO error \n " 148 "pkg_license_concluded : PKG_LICENSE_CONCLUDED error\n source_info : PKG_SOURCE_INFO error\n " 149 "homepage : PKG_HOMEPAGE error\n pkg_checksum : PKG_CHECKSUM error\n " 150 "verification_code : PKG_VERIFICATION_CODE error\n originator : PKG_ORIGINATOR error\n " 151 "download_location : PKG_DOWNLOAD_LOCATION error\n files_analyzed : PKG_FILES_ANALYZED error\n " 152 "supplier : PKG_SUPPLIER error\n pkg_file_name : PKG_FILE_NAME error\n " 153 "package_version : PKG_VERSION error\n primary_package_purpose : PRIMARY_PACKAGE_PURPOSE error\n " 154 "built_date : BUILT_DATE error\n release_date : RELEASE_DATE error\n " 155 "valid_until_date : VALID_UNTIL_DATE error\n snippet_spdx_id : SNIPPET_SPDX_ID error\n " 156 "snippet_name : SNIPPET_NAME error\n snippet_comment : SNIPPET_COMMENT error\n " 157 "snippet_attribution_text : SNIPPET_ATTRIBUTION_TEXT error\n " 158 "snippet_copyright_text : SNIPPET_COPYRIGHT_TEXT error\n " 159 "snippet_license_comment : SNIPPET_LICENSE_COMMENT error\n file_spdx_id : SNIPPET_FILE_SPDXID error\n " 160 "snippet_license_concluded : SNIPPET_LICENSE_CONCLUDED error\n " 161 "snippet_license_info : SNIPPET_LICENSE_INFO error\n " 162 "snippet_byte_range : SNIPPET_BYTE_RANGE error\n snippet_line_range : SNIPPET_LINE_RANGE error\n " 163 "annotator : ANNOTATOR error\n annotation_date : ANNOTATION_DATE error\n " 164 "annotation_comment : ANNOTATION_COMMENT error\n annotation_type : ANNOTATION_TYPE error\n " 165 "annotation_spdx_id : ANNOTATION_SPDX_ID error\n relationship : RELATIONSHIP error" 166 ) 167 def p_current_element_error(self, p): 168 if p[1] in ELEMENT_EXPECTED_START_TAG.values(): 169 self.initialize_new_current_element(TAG_DATA_MODEL_FIELD[p[1]][0]) 170 self.current_element["logger"].append( 171 f"Error while parsing {p[1]}: Token did not match specified grammar rule. Line: {p.lineno(1)}" 172 ) 173 174 @grammar_rule( 175 "license_name : LICENSE_NAME line_or_no_assertion\n extracted_text : LICENSE_TEXT text_or_line\n " 176 "lic_comment : LICENSE_COMMENT text_or_line\n license_id : LICENSE_ID LINE\n " 177 "file_name : FILE_NAME LINE \n file_notice : FILE_NOTICE text_or_line\n " 178 "file_copyright_text : FILE_COPYRIGHT_TEXT line_or_no_assertion_or_none\n " 179 "file_license_comment : FILE_LICENSE_COMMENT text_or_line\n " 180 "file_comment : FILE_COMMENT text_or_line\n " 181 "file_license_concluded : FILE_LICENSE_CONCLUDED license_or_no_assertion_or_none\n " 182 "package_name : PKG_NAME LINE\n description : PKG_DESCRIPTION text_or_line\n " 183 "summary : PKG_SUMMARY text_or_line\n source_info : PKG_SOURCE_INFO text_or_line\n " 184 "homepage : PKG_HOMEPAGE line_or_no_assertion_or_none\n " 185 "download_location : PKG_DOWNLOAD_LOCATION line_or_no_assertion_or_none\n " 186 "originator : PKG_ORIGINATOR actor_or_no_assertion\n supplier : PKG_SUPPLIER actor_or_no_assertion\n " 187 "pkg_comment : PKG_COMMENT text_or_line\n " 188 "pkg_copyright_text : PKG_COPYRIGHT_TEXT line_or_no_assertion_or_none\n " 189 "pkg_license_declared : PKG_LICENSE_DECLARED license_or_no_assertion_or_none\n " 190 "pkg_file_name : PKG_FILE_NAME LINE\n " 191 "pkg_license_concluded : PKG_LICENSE_CONCLUDED license_or_no_assertion_or_none\n " 192 "package_version : PKG_VERSION LINE\n pkg_license_comment : PKG_LICENSE_COMMENT text_or_line\n " 193 "snippet_spdx_id : SNIPPET_SPDX_ID LINE\n snippet_name : SNIPPET_NAME LINE\n " 194 "snippet_comment : SNIPPET_COMMENT text_or_line\n " 195 "snippet_copyright_text : SNIPPET_COPYRIGHT_TEXT line_or_no_assertion_or_none\n " 196 "snippet_license_comment : SNIPPET_LICENSE_COMMENT text_or_line\n " 197 "file_spdx_id : SNIPPET_FILE_SPDXID LINE\n " 198 "snippet_license_concluded : SNIPPET_LICENSE_CONCLUDED license_or_no_assertion_or_none\n " 199 "annotation_spdx_id : ANNOTATION_SPDX_ID LINE\n " 200 "annotation_comment : ANNOTATION_COMMENT text_or_line" 201 ) 202 def p_generic_value(self, p): 203 if p[1] in ELEMENT_EXPECTED_START_TAG.values(): 204 self.initialize_new_current_element(TAG_DATA_MODEL_FIELD[p[1]][0]) 205 if self.check_that_current_element_matches_class_for_value(TAG_DATA_MODEL_FIELD[p[1]][0], p.lineno(1)): 206 set_value(p, self.current_element) 207 208 @grammar_rule( 209 "unknown_tag : UNKNOWN_TAG text_or_line\n | UNKNOWN_TAG ISO8601_DATE\n | UNKNOWN_TAG PERSON_VALUE \n" 210 "| UNKNOWN_TAG" 211 ) 212 def p_unknown_tag(self, p): 213 self.logger.append(f"Unknown tag provided in line {p.lineno(1)}") 214 215 @grammar_rule("text_or_line : TEXT\n line_or_no_assertion_or_none : TEXT") 216 def p_text(self, p): 217 p[0] = str_from_text(p[1]) 218 219 @grammar_rule( 220 "text_or_line : LINE\n line_or_no_assertion : LINE\nline_or_no_assertion_or_none : LINE\n" 221 "text_or_line : NO_ASSERTION\n text_or_line : NONE" 222 ) 223 def p_line(self, p): 224 p[0] = p[1] 225 226 @grammar_rule( 227 "license_or_no_assertion_or_none : NO_ASSERTION\n actor_or_no_assertion : NO_ASSERTION\n" 228 "line_or_no_assertion : NO_ASSERTION\n line_or_no_assertion_or_none : NO_ASSERTION" 229 ) 230 def p_no_assertion(self, p): 231 p[0] = SpdxNoAssertion() 232 233 @grammar_rule("license_or_no_assertion_or_none : NONE\n line_or_no_assertion_or_none : NONE") 234 def p_none(self, p): 235 p[0] = SpdxNone() 236 237 @grammar_rule("license_or_no_assertion_or_none : LINE") 238 def p_license(self, p): 239 try: 240 p[0] = get_spdx_licensing().parse(p[1]) 241 except ExpressionError as err: 242 error_message = f"Error while parsing license expression: {p[1]}" 243 if err.args: 244 error_message += f": {err.args[0]}" 245 self.current_element["logger"].append(error_message) 246 247 @grammar_rule("actor_or_no_assertion : PERSON_VALUE\n | ORGANIZATION_VALUE") 248 def p_actor_values(self, p): 249 p[0] = ActorParser.parse_actor(p[1]) 250 251 @grammar_rule("spdx_id : SPDX_ID LINE") 252 def p_spdx_id(self, p): 253 # As all SPDX Ids share the same tag, there is no knowing which spdx_id belongs to the document. 254 # We assume that to be the first spdx_id we encounter. As the specification does not explicitly require this, 255 # our approach might lead to unwanted behavior when the document's SPDX Id is defined later in the document. 256 if "spdx_id" in self.creation_info: 257 self.current_element["spdx_id"] = p[2] 258 else: 259 self.creation_info["spdx_id"] = p[2] 260 261 # parsing methods for creation info / document level 262 263 @grammar_rule( 264 "license_list_version : LICENSE_LIST_VERSION error\n document_comment : DOC_COMMENT error\n " 265 "document_namespace : DOC_NAMESPACE error\n data_license : DOC_LICENSE error\n " 266 "doc_name : DOC_NAME error\n ext_doc_ref : EXT_DOC_REF error\n spdx_version : DOC_VERSION error\n " 267 "creator_comment : CREATOR_COMMENT error\n creator : CREATOR error\n created : CREATED error" 268 ) 269 def p_creation_info_value_error(self, p): 270 self.creation_info["logger"].append( 271 f"Error while parsing {p[1]}: Token did not match specified grammar rule. Line: {p.lineno(1)}" 272 ) 273 274 @grammar_rule( 275 "document_comment : DOC_COMMENT text_or_line\n document_namespace : DOC_NAMESPACE LINE\n " 276 "data_license : DOC_LICENSE LINE\n spdx_version : DOC_VERSION LINE\n " 277 "creator_comment : CREATOR_COMMENT text_or_line\n doc_name : DOC_NAME LINE" 278 ) 279 def p_generic_value_creation_info(self, p): 280 set_value(p, self.creation_info) 281 282 @grammar_rule("license_list_version : LICENSE_LIST_VERSION LINE") 283 def p_license_list_version(self, p): 284 set_value(p, self.creation_info, method_to_apply=Version.from_string) 285 286 @grammar_rule("ext_doc_ref : EXT_DOC_REF LINE") 287 def p_external_document_ref(self, p): 288 external_doc_ref_regex = re.compile(r"(.*)(\s*SHA1:\s*[a-f0-9]{40})") 289 external_doc_ref_match = external_doc_ref_regex.match(p[2]) 290 if not external_doc_ref_match: 291 self.creation_info["logger"].append( 292 f"Error while parsing ExternalDocumentRef: Couldn't match Checksum. Line: {p.lineno(1)}" 293 ) 294 return 295 try: 296 document_ref_id, document_uri = external_doc_ref_match.group(1).strip().split(" ") 297 except ValueError: 298 self.creation_info["logger"].append( 299 f"Error while parsing ExternalDocumentRef: Couldn't split the first part of the value into " 300 f"document_ref_id and document_uri. Line: {p.lineno(1)}" 301 ) 302 return 303 checksum = parse_checksum(external_doc_ref_match.group(2).strip()) 304 external_document_ref = ExternalDocumentRef(document_ref_id, document_uri, checksum) 305 self.creation_info.setdefault("external_document_refs", []).append(external_document_ref) 306 307 @grammar_rule("creator : CREATOR PERSON_VALUE\n| CREATOR TOOL_VALUE\n| CREATOR ORGANIZATION_VALUE") 308 def p_creator(self, p): 309 self.creation_info.setdefault("creators", []).append(ActorParser.parse_actor(p[2])) 310 311 @grammar_rule("created : CREATED ISO8601_DATE") 312 def p_created(self, p): 313 set_value(p, self.creation_info, method_to_apply=datetime_from_str) 314 315 # parsing methods for extracted licensing info 316 317 @grammar_rule("license_cross_ref : LICENSE_CROSS_REF LINE") 318 def p_extracted_cross_reference(self, p): 319 if self.check_that_current_element_matches_class_for_value(ExtractedLicensingInfo, p.lineno(1)): 320 self.current_element.setdefault("cross_references", []).append(p[2]) 321 322 # parsing methods for file 323 324 @grammar_rule("file_contributor : FILE_CONTRIBUTOR LINE") 325 def p_file_contributor(self, p): 326 if self.check_that_current_element_matches_class_for_value(File, p.lineno(1)): 327 self.current_element.setdefault("contributors", []).append(p[2]) 328 329 @grammar_rule("file_attribution_text : FILE_ATTRIBUTION_TEXT text_or_line") 330 def p_file_attribution_text(self, p): 331 if self.check_that_current_element_matches_class_for_value(File, p.lineno(1)): 332 self.current_element.setdefault("attribution_texts", []).append(p[2]) 333 334 @grammar_rule("file_license_info : FILE_LICENSE_INFO license_or_no_assertion_or_none") 335 def p_file_license_info(self, p): 336 if self.check_that_current_element_matches_class_for_value(File, p.lineno(1)): 337 self.current_element.setdefault("license_info_in_file", []).append(p[2]) 338 339 @grammar_rule("file_type : FILE_TYPE LINE") 340 def p_file_type(self, p): 341 if not self.check_that_current_element_matches_class_for_value(File, p.lineno(1)): 342 return 343 try: 344 file_type = FileType[p[2].strip()] 345 except KeyError: 346 self.current_element["logger"].append(f"Invalid FileType: {p[2]}. Line {p.lineno(1)}") 347 return 348 self.current_element.setdefault("file_types", []).append(file_type) 349 350 @grammar_rule("file_checksum : FILE_CHECKSUM CHECKSUM") 351 def p_file_checksum(self, p): 352 if not self.check_that_current_element_matches_class_for_value(File, p.lineno(1)): 353 return 354 checksum = parse_checksum(p[2]) 355 self.current_element.setdefault("checksums", []).append(checksum) 356 357 # parsing methods for package 358 359 @grammar_rule("pkg_attribution_text : PKG_ATTRIBUTION_TEXT text_or_line") 360 def p_pkg_attribution_text(self, p): 361 self.check_that_current_element_matches_class_for_value(Package, p.lineno(1)) 362 self.current_element.setdefault("attribution_texts", []).append(p[2]) 363 364 @grammar_rule( 365 "pkg_external_ref : PKG_EXTERNAL_REF LINE PKG_EXTERNAL_REF_COMMENT text_or_line\n | PKG_EXTERNAL_REF LINE" 366 ) 367 def p_pkg_external_refs(self, p): 368 if not self.check_that_current_element_matches_class_for_value(Package, p.lineno(1)): 369 return 370 try: 371 category, reference_type, locator = p[2].split(" ") 372 except ValueError: 373 self.current_element["logger"].append( 374 f"Couldn't split PackageExternalRef in category, reference_type and locator. Line: {p.lineno(1)}" 375 ) 376 return 377 comment = None 378 if len(p) == 5: 379 comment = p[4] 380 try: 381 category = ExternalPackageRefCategory[category.replace("-", "_")] 382 except KeyError: 383 self.current_element["logger"].append( 384 f"Invalid ExternalPackageRefCategory: {category}. Line: {p.lineno(1)}" 385 ) 386 return 387 try: 388 external_package_ref = construct_or_raise_parsing_error( 389 ExternalPackageRef, 390 {"category": category, "reference_type": reference_type, "locator": locator, "comment": comment}, 391 ) 392 except SPDXParsingError as err: 393 self.current_element["logger"].append(err.get_messages()) 394 return 395 self.current_element.setdefault("external_references", []).append(external_package_ref) 396 397 @grammar_rule("pkg_license_info : PKG_LICENSE_INFO license_or_no_assertion_or_none") 398 def p_pkg_license_info_from_file(self, p): 399 if self.check_that_current_element_matches_class_for_value(Package, p.lineno(1)): 400 self.current_element.setdefault("license_info_from_files", []).append(p[2]) 401 402 @grammar_rule("pkg_checksum : PKG_CHECKSUM CHECKSUM") 403 def p_pkg_checksum(self, p): 404 if not self.check_that_current_element_matches_class_for_value(Package, p.lineno(1)): 405 return 406 checksum = parse_checksum(p[2]) 407 self.current_element.setdefault("checksums", []).append(checksum) 408 409 @grammar_rule("verification_code : PKG_VERIFICATION_CODE LINE") 410 def p_pkg_verification_code(self, p): 411 if not self.check_that_current_element_matches_class_for_value(Package, p.lineno(1)): 412 return 413 414 if "verification_code" in self.current_element: 415 self.current_element["logger"].append(f"Multiple values for {p[1]} found. Line: {p.lineno(1)}") 416 return 417 verif_code_regex = re.compile(r"([0-9a-f]{40})\s*(\(excludes:\s*(.+)\))?", re.UNICODE) 418 verif_code_code_grp = 1 419 verif_code_exc_files_grp = 3 420 match = verif_code_regex.match(p[2]) 421 if not match: 422 self.current_element["logger"].append( 423 f"Error while parsing {p[1]}: Value did not match expected format. Line: {p.lineno(1)}" 424 ) 425 return 426 value = match.group(verif_code_code_grp) 427 excluded_files = None 428 if match.group(verif_code_exc_files_grp): 429 excluded_files = match.group(verif_code_exc_files_grp).split(",") 430 self.current_element["verification_code"] = PackageVerificationCode(value, excluded_files) 431 432 @grammar_rule("files_analyzed : PKG_FILES_ANALYZED LINE") 433 def p_pkg_files_analyzed(self, p): 434 if not self.check_that_current_element_matches_class_for_value(Package, p.lineno(1)): 435 return 436 if "files_analyzed" in self.current_element: 437 self.current_element["logger"].append(f"Multiple values for {p[1]} found. Line: {p.lineno(1)}") 438 return 439 if p[2] == "true": 440 self.current_element["files_analyzed"] = True 441 elif p[2] == "false": 442 self.current_element["files_analyzed"] = False 443 else: 444 self.current_element["logger"].append( 445 f'The value of FilesAnalyzed must be either "true" or "false", but is: {p[2]}' 446 ) 447 448 @grammar_rule("primary_package_purpose : PRIMARY_PACKAGE_PURPOSE LINE") 449 def p_primary_package_purpose(self, p): 450 if self.check_that_current_element_matches_class_for_value(Package, p.lineno(1)): 451 set_value(p, self.current_element, method_to_apply=lambda x: PackagePurpose[x.replace("-", "_")]) 452 453 @grammar_rule( 454 "built_date : BUILT_DATE ISO8601_DATE\n release_date : RELEASE_DATE ISO8601_DATE\n " 455 "valid_until_date : VALID_UNTIL_DATE ISO8601_DATE" 456 ) 457 def p_package_dates(self, p): 458 if self.check_that_current_element_matches_class_for_value(Package, p.lineno(1)): 459 set_value(p, self.current_element, method_to_apply=datetime_from_str) 460 461 # parsing methods for snippet 462 463 @grammar_rule("snippet_attribution_text : SNIPPET_ATTRIBUTION_TEXT text_or_line") 464 def p_snippet_attribution_text(self, p): 465 if self.check_that_current_element_matches_class_for_value(Snippet, p.lineno(1)): 466 self.current_element.setdefault("attribution_texts", []).append(p[2]) 467 468 @grammar_rule("snippet_license_info : SNIPPET_LICENSE_INFO license_or_no_assertion_or_none") 469 def p_snippet_license_info(self, p): 470 if self.check_that_current_element_matches_class_for_value(Snippet, p.lineno(1)): 471 self.current_element.setdefault("license_info_in_snippet", []).append(p[2]) 472 473 @grammar_rule("snippet_byte_range : SNIPPET_BYTE_RANGE LINE\n snippet_line_range : SNIPPET_LINE_RANGE LINE") 474 def p_snippet_range(self, p): 475 if not self.check_that_current_element_matches_class_for_value(Snippet, p.lineno(1)): 476 return 477 478 argument_name = TAG_DATA_MODEL_FIELD[p[1]][1] 479 if argument_name in self.current_element: 480 self.current_element["logger"].append(f"Multiple values for {p[1]} found. Line: {p.lineno(1)}") 481 return 482 range_re = re.compile(r"^(\d+):(\d+)$", re.UNICODE) 483 if not range_re.match(p[2].strip()): 484 self.current_element["logger"].append( 485 f"Value for {p[1]} doesn't match valid range pattern. " f"Line: {p.lineno(1)}" 486 ) 487 return 488 startpoint = int(p[2].split(":")[0]) 489 endpoint = int(p[2].split(":")[-1]) 490 self.current_element[argument_name] = startpoint, endpoint 491 492 # parsing methods for annotation 493 494 @grammar_rule("annotator : ANNOTATOR PERSON_VALUE\n| ANNOTATOR TOOL_VALUE\n| ANNOTATOR ORGANIZATION_VALUE") 495 def p_annotator(self, p): 496 self.initialize_new_current_element(Annotation) 497 set_value(p, self.current_element, method_to_apply=ActorParser.parse_actor) 498 499 @grammar_rule("annotation_date : ANNOTATION_DATE ISO8601_DATE") 500 def p_annotation_date(self, p): 501 if self.check_that_current_element_matches_class_for_value(Annotation, p.lineno(1)): 502 set_value(p, self.current_element, method_to_apply=datetime_from_str) 503 504 @grammar_rule("annotation_type : ANNOTATION_TYPE LINE") 505 def p_annotation_type(self, p): 506 if self.check_that_current_element_matches_class_for_value(Annotation, p.lineno(1)): 507 set_value(p, self.current_element, method_to_apply=lambda x: AnnotationType[x]) 508 509 # parsing methods for relationship 510 511 @grammar_rule("relationship : RELATIONSHIP LINE RELATIONSHIP_COMMENT text_or_line\n " "| RELATIONSHIP LINE") 512 def p_relationship(self, p): 513 self.initialize_new_current_element(Relationship) 514 try: 515 spdx_element_id, relationship_type, related_spdx_element_id = p[2].split(" ") 516 except ValueError: 517 self.current_element["logger"].append( 518 f"Relationship couldn't be split in spdx_element_id, relationship_type and " 519 f"related_spdx_element. Line: {p.lineno(1)}" 520 ) 521 return 522 try: 523 self.current_element["relationship_type"] = RelationshipType[relationship_type] 524 except KeyError: 525 self.current_element["logger"].append(f"Invalid RelationshipType {relationship_type}. Line: {p.lineno(1)}") 526 if related_spdx_element_id == "NONE": 527 related_spdx_element_id = SpdxNone() 528 if related_spdx_element_id == "NOASSERTION": 529 related_spdx_element_id = SpdxNoAssertion() 530 self.current_element["related_spdx_element_id"] = related_spdx_element_id 531 self.current_element["spdx_element_id"] = spdx_element_id 532 if len(p) == 5: 533 self.current_element["comment"] = p[4] 534 535 def p_error(self, p): 536 pass 537 538 def parse(self, text): 539 # entry point for the tag-value parser 540 self.yacc.parse(text, lexer=self.lex) 541 # this constructs the last remaining element; all other elements are constructed at the start of 542 # their subsequent element 543 self.construct_current_element() 544 545 # To be able to parse creation info values if they appear in between other elements, e.g. packages, we use 546 # two different dictionaries to collect the creation info and all other elements. Therefore, we have a separate 547 # logger for the creation info whose messages we need to add to the main logger to than raise all collected 548 # messages at once. 549 creation_info_logger = self.creation_info.pop("logger") 550 if creation_info_logger.has_messages(): 551 self.logger.extend([f"Error while parsing CreationInfo: {creation_info_logger.get_messages()}"]) 552 553 raise_parsing_error_if_logger_has_messages(self.logger) 554 creation_info = construct_or_raise_parsing_error(CreationInfo, self.creation_info) 555 self.elements_built["creation_info"] = creation_info 556 document = construct_or_raise_parsing_error(Document, self.elements_built) 557 return document 558 559 def initialize_new_current_element(self, clazz: Any): 560 self.construct_current_element() 561 self.current_element["class"] = clazz 562 563 def check_that_current_element_matches_class_for_value(self, expected_class, line_number) -> bool: 564 if "class" not in self.current_element or expected_class != self.current_element["class"]: 565 self.logger.append( 566 f"Element {expected_class.__name__} is not the current element in scope, probably the expected tag to " 567 f"start the element ({ELEMENT_EXPECTED_START_TAG[expected_class.__name__]}) is missing. " 568 f"Line: {line_number}" 569 ) 570 return False 571 return True 572 573 def construct_current_element(self): 574 if "class" not in self.current_element: 575 # This happens when the first element is initialized via initialize_new_current_element() or if the first 576 # element is missing its expected starting tag. In both cases we are unable to construct an element. 577 return 578 579 clazz = self.current_element.pop("class") 580 try: 581 raise_parsing_error_if_logger_has_messages(self.current_element.pop("logger"), clazz.__name__) 582 self.elements_built.setdefault(CLASS_MAPPING[clazz.__name__], []).append( 583 construct_or_raise_parsing_error(clazz, self.current_element) 584 ) 585 if clazz == File: 586 self.check_for_preceding_package_and_build_contains_relationship() 587 except SPDXParsingError as err: 588 self.logger.extend(err.get_messages()) 589 self.current_element = {"logger": Logger()} 590 591 def check_for_preceding_package_and_build_contains_relationship(self): 592 file_spdx_id = self.current_element["spdx_id"] 593 if "packages" not in self.elements_built: 594 return 595 # We assume that all files that are not contained in a package precede any package information. Any file 596 # information that follows any package information is assigned to the last parsed package by creating a 597 # corresponding contains relationship. 598 # (see https://spdx.github.io/spdx-spec/v2.3/composition-of-an-SPDX-document/#5.2.2) 599 if not self.elements_built["packages"]: 600 self.logger.append( 601 f"Error while building contains relationship for file {file_spdx_id}, " 602 f"preceding package was not parsed successfully." 603 ) 604 return 605 package_spdx_id = self.elements_built["packages"][-1].spdx_id 606 relationship = Relationship(package_spdx_id, RelationshipType.CONTAINS, file_spdx_id) 607 if relationship not in self.elements_built.setdefault("relationships", []): 608 self.elements_built["relationships"].append(relationship)
78class Parser: 79 tokens: List[str] 80 logger: Logger 81 current_element: Dict[str, Any] 82 creation_info: Dict[str, Any] 83 elements_built: Dict[str, Any] 84 lex: SPDXLexer 85 yacc: LRParser 86 87 def __init__(self, **kwargs): 88 self.tokens = SPDXLexer.tokens 89 self.logger = Logger() 90 self.current_element = {"logger": Logger()} 91 self.creation_info = {"logger": Logger()} 92 self.elements_built = dict() 93 self.lex = SPDXLexer() 94 self.lex.build(reflags=re.UNICODE) 95 self.yacc = yacc.yacc(module=self, **kwargs) 96 97 @grammar_rule("start : start attrib ") 98 def p_start_start_attrib(self, p): 99 pass 100 101 @grammar_rule("start : attrib ") 102 def p_start_attrib(self, p): 103 pass 104 105 @grammar_rule( 106 "attrib : spdx_version\n| spdx_id\n| data_license\n| doc_name\n| document_comment\n| document_namespace\n| " 107 "creator\n| created\n| creator_comment\n| license_list_version\n| ext_doc_ref\n" 108 # attributes for file 109 "| file_name\n| file_type\n| file_checksum\n| file_license_concluded\n| file_license_info\n" 110 "| file_copyright_text\n| file_license_comment\n| file_attribution_text\n| file_notice\n| file_comment\n" 111 "| file_contributor\n" 112 # attributes for annotation 113 "| annotator\n| annotation_date\n| annotation_comment\n| annotation_type\n| annotation_spdx_id\n" 114 # attributes for relationship 115 "| relationship\n" 116 # attributes for snippet 117 "| snippet_spdx_id\n| snippet_name\n| snippet_comment\n| snippet_attribution_text\n| snippet_copyright_text\n" 118 "| snippet_license_comment\n| file_spdx_id\n| snippet_license_concluded\n| snippet_license_info\n" 119 "| snippet_byte_range\n| snippet_line_range\n" 120 # attributes for package 121 "| package_name\n| package_version\n| download_location\n| files_analyzed\n| homepage\n" 122 "| summary\n| source_info\n| pkg_file_name\n| supplier\n| originator\n| pkg_checksum\n" 123 "| verification_code\n| description\n| pkg_comment\n| pkg_attribution_text\n| pkg_license_declared\n" 124 "| pkg_license_concluded\n| pkg_license_info\n| pkg_license_comment\n| pkg_copyright_text\n" 125 "| pkg_external_ref\n| primary_package_purpose\n| built_date\n| release_date\n| valid_until_date\n" 126 # attributes for extracted licensing info 127 "| license_id\n| extracted_text\n| license_name\n| license_cross_ref\n| lic_comment\n" 128 "| unknown_tag " 129 ) 130 def p_attrib(self, p): 131 pass 132 133 # general parsing methods 134 @grammar_rule( 135 "license_id : LICENSE_ID error\n license_cross_ref : LICENSE_CROSS_REF error\n " 136 "lic_comment : LICENSE_COMMENT error\n license_name : LICENSE_NAME error\n " 137 "extracted_text : LICENSE_TEXT error\n " 138 "file_name : FILE_NAME error\n file_contributor : FILE_CONTRIBUTOR error\n " 139 "file_notice : FILE_NOTICE error\n file_copyright_text : FILE_COPYRIGHT_TEXT error\n " 140 "file_license_comment : FILE_LICENSE_COMMENT error\n " 141 "file_license_info : FILE_LICENSE_INFO error\n file_comment : FILE_COMMENT error\n " 142 "file_checksum : FILE_CHECKSUM error\n file_license_concluded : FILE_LICENSE_CONCLUDED error\n " 143 "file_type : FILE_TYPE error\n file_attribution_text : FILE_ATTRIBUTION_TEXT error\n " 144 "package_name : PKG_NAME error\n pkg_attribution_text : PKG_ATTRIBUTION_TEXT error\n " 145 "description : PKG_DESCRIPTION error\n pkg_comment : PKG_COMMENT error\n " 146 "summary : PKG_SUMMARY error\n pkg_copyright_text : PKG_COPYRIGHT_TEXT error\n " 147 "pkg_external_ref : PKG_EXTERNAL_REF error\n pkg_license_comment : PKG_LICENSE_COMMENT error\n " 148 "pkg_license_declared : PKG_LICENSE_DECLARED error\n pkg_license_info : PKG_LICENSE_INFO error \n " 149 "pkg_license_concluded : PKG_LICENSE_CONCLUDED error\n source_info : PKG_SOURCE_INFO error\n " 150 "homepage : PKG_HOMEPAGE error\n pkg_checksum : PKG_CHECKSUM error\n " 151 "verification_code : PKG_VERIFICATION_CODE error\n originator : PKG_ORIGINATOR error\n " 152 "download_location : PKG_DOWNLOAD_LOCATION error\n files_analyzed : PKG_FILES_ANALYZED error\n " 153 "supplier : PKG_SUPPLIER error\n pkg_file_name : PKG_FILE_NAME error\n " 154 "package_version : PKG_VERSION error\n primary_package_purpose : PRIMARY_PACKAGE_PURPOSE error\n " 155 "built_date : BUILT_DATE error\n release_date : RELEASE_DATE error\n " 156 "valid_until_date : VALID_UNTIL_DATE error\n snippet_spdx_id : SNIPPET_SPDX_ID error\n " 157 "snippet_name : SNIPPET_NAME error\n snippet_comment : SNIPPET_COMMENT error\n " 158 "snippet_attribution_text : SNIPPET_ATTRIBUTION_TEXT error\n " 159 "snippet_copyright_text : SNIPPET_COPYRIGHT_TEXT error\n " 160 "snippet_license_comment : SNIPPET_LICENSE_COMMENT error\n file_spdx_id : SNIPPET_FILE_SPDXID error\n " 161 "snippet_license_concluded : SNIPPET_LICENSE_CONCLUDED error\n " 162 "snippet_license_info : SNIPPET_LICENSE_INFO error\n " 163 "snippet_byte_range : SNIPPET_BYTE_RANGE error\n snippet_line_range : SNIPPET_LINE_RANGE error\n " 164 "annotator : ANNOTATOR error\n annotation_date : ANNOTATION_DATE error\n " 165 "annotation_comment : ANNOTATION_COMMENT error\n annotation_type : ANNOTATION_TYPE error\n " 166 "annotation_spdx_id : ANNOTATION_SPDX_ID error\n relationship : RELATIONSHIP error" 167 ) 168 def p_current_element_error(self, p): 169 if p[1] in ELEMENT_EXPECTED_START_TAG.values(): 170 self.initialize_new_current_element(TAG_DATA_MODEL_FIELD[p[1]][0]) 171 self.current_element["logger"].append( 172 f"Error while parsing {p[1]}: Token did not match specified grammar rule. Line: {p.lineno(1)}" 173 ) 174 175 @grammar_rule( 176 "license_name : LICENSE_NAME line_or_no_assertion\n extracted_text : LICENSE_TEXT text_or_line\n " 177 "lic_comment : LICENSE_COMMENT text_or_line\n license_id : LICENSE_ID LINE\n " 178 "file_name : FILE_NAME LINE \n file_notice : FILE_NOTICE text_or_line\n " 179 "file_copyright_text : FILE_COPYRIGHT_TEXT line_or_no_assertion_or_none\n " 180 "file_license_comment : FILE_LICENSE_COMMENT text_or_line\n " 181 "file_comment : FILE_COMMENT text_or_line\n " 182 "file_license_concluded : FILE_LICENSE_CONCLUDED license_or_no_assertion_or_none\n " 183 "package_name : PKG_NAME LINE\n description : PKG_DESCRIPTION text_or_line\n " 184 "summary : PKG_SUMMARY text_or_line\n source_info : PKG_SOURCE_INFO text_or_line\n " 185 "homepage : PKG_HOMEPAGE line_or_no_assertion_or_none\n " 186 "download_location : PKG_DOWNLOAD_LOCATION line_or_no_assertion_or_none\n " 187 "originator : PKG_ORIGINATOR actor_or_no_assertion\n supplier : PKG_SUPPLIER actor_or_no_assertion\n " 188 "pkg_comment : PKG_COMMENT text_or_line\n " 189 "pkg_copyright_text : PKG_COPYRIGHT_TEXT line_or_no_assertion_or_none\n " 190 "pkg_license_declared : PKG_LICENSE_DECLARED license_or_no_assertion_or_none\n " 191 "pkg_file_name : PKG_FILE_NAME LINE\n " 192 "pkg_license_concluded : PKG_LICENSE_CONCLUDED license_or_no_assertion_or_none\n " 193 "package_version : PKG_VERSION LINE\n pkg_license_comment : PKG_LICENSE_COMMENT text_or_line\n " 194 "snippet_spdx_id : SNIPPET_SPDX_ID LINE\n snippet_name : SNIPPET_NAME LINE\n " 195 "snippet_comment : SNIPPET_COMMENT text_or_line\n " 196 "snippet_copyright_text : SNIPPET_COPYRIGHT_TEXT line_or_no_assertion_or_none\n " 197 "snippet_license_comment : SNIPPET_LICENSE_COMMENT text_or_line\n " 198 "file_spdx_id : SNIPPET_FILE_SPDXID LINE\n " 199 "snippet_license_concluded : SNIPPET_LICENSE_CONCLUDED license_or_no_assertion_or_none\n " 200 "annotation_spdx_id : ANNOTATION_SPDX_ID LINE\n " 201 "annotation_comment : ANNOTATION_COMMENT text_or_line" 202 ) 203 def p_generic_value(self, p): 204 if p[1] in ELEMENT_EXPECTED_START_TAG.values(): 205 self.initialize_new_current_element(TAG_DATA_MODEL_FIELD[p[1]][0]) 206 if self.check_that_current_element_matches_class_for_value(TAG_DATA_MODEL_FIELD[p[1]][0], p.lineno(1)): 207 set_value(p, self.current_element) 208 209 @grammar_rule( 210 "unknown_tag : UNKNOWN_TAG text_or_line\n | UNKNOWN_TAG ISO8601_DATE\n | UNKNOWN_TAG PERSON_VALUE \n" 211 "| UNKNOWN_TAG" 212 ) 213 def p_unknown_tag(self, p): 214 self.logger.append(f"Unknown tag provided in line {p.lineno(1)}") 215 216 @grammar_rule("text_or_line : TEXT\n line_or_no_assertion_or_none : TEXT") 217 def p_text(self, p): 218 p[0] = str_from_text(p[1]) 219 220 @grammar_rule( 221 "text_or_line : LINE\n line_or_no_assertion : LINE\nline_or_no_assertion_or_none : LINE\n" 222 "text_or_line : NO_ASSERTION\n text_or_line : NONE" 223 ) 224 def p_line(self, p): 225 p[0] = p[1] 226 227 @grammar_rule( 228 "license_or_no_assertion_or_none : NO_ASSERTION\n actor_or_no_assertion : NO_ASSERTION\n" 229 "line_or_no_assertion : NO_ASSERTION\n line_or_no_assertion_or_none : NO_ASSERTION" 230 ) 231 def p_no_assertion(self, p): 232 p[0] = SpdxNoAssertion() 233 234 @grammar_rule("license_or_no_assertion_or_none : NONE\n line_or_no_assertion_or_none : NONE") 235 def p_none(self, p): 236 p[0] = SpdxNone() 237 238 @grammar_rule("license_or_no_assertion_or_none : LINE") 239 def p_license(self, p): 240 try: 241 p[0] = get_spdx_licensing().parse(p[1]) 242 except ExpressionError as err: 243 error_message = f"Error while parsing license expression: {p[1]}" 244 if err.args: 245 error_message += f": {err.args[0]}" 246 self.current_element["logger"].append(error_message) 247 248 @grammar_rule("actor_or_no_assertion : PERSON_VALUE\n | ORGANIZATION_VALUE") 249 def p_actor_values(self, p): 250 p[0] = ActorParser.parse_actor(p[1]) 251 252 @grammar_rule("spdx_id : SPDX_ID LINE") 253 def p_spdx_id(self, p): 254 # As all SPDX Ids share the same tag, there is no knowing which spdx_id belongs to the document. 255 # We assume that to be the first spdx_id we encounter. As the specification does not explicitly require this, 256 # our approach might lead to unwanted behavior when the document's SPDX Id is defined later in the document. 257 if "spdx_id" in self.creation_info: 258 self.current_element["spdx_id"] = p[2] 259 else: 260 self.creation_info["spdx_id"] = p[2] 261 262 # parsing methods for creation info / document level 263 264 @grammar_rule( 265 "license_list_version : LICENSE_LIST_VERSION error\n document_comment : DOC_COMMENT error\n " 266 "document_namespace : DOC_NAMESPACE error\n data_license : DOC_LICENSE error\n " 267 "doc_name : DOC_NAME error\n ext_doc_ref : EXT_DOC_REF error\n spdx_version : DOC_VERSION error\n " 268 "creator_comment : CREATOR_COMMENT error\n creator : CREATOR error\n created : CREATED error" 269 ) 270 def p_creation_info_value_error(self, p): 271 self.creation_info["logger"].append( 272 f"Error while parsing {p[1]}: Token did not match specified grammar rule. Line: {p.lineno(1)}" 273 ) 274 275 @grammar_rule( 276 "document_comment : DOC_COMMENT text_or_line\n document_namespace : DOC_NAMESPACE LINE\n " 277 "data_license : DOC_LICENSE LINE\n spdx_version : DOC_VERSION LINE\n " 278 "creator_comment : CREATOR_COMMENT text_or_line\n doc_name : DOC_NAME LINE" 279 ) 280 def p_generic_value_creation_info(self, p): 281 set_value(p, self.creation_info) 282 283 @grammar_rule("license_list_version : LICENSE_LIST_VERSION LINE") 284 def p_license_list_version(self, p): 285 set_value(p, self.creation_info, method_to_apply=Version.from_string) 286 287 @grammar_rule("ext_doc_ref : EXT_DOC_REF LINE") 288 def p_external_document_ref(self, p): 289 external_doc_ref_regex = re.compile(r"(.*)(\s*SHA1:\s*[a-f0-9]{40})") 290 external_doc_ref_match = external_doc_ref_regex.match(p[2]) 291 if not external_doc_ref_match: 292 self.creation_info["logger"].append( 293 f"Error while parsing ExternalDocumentRef: Couldn't match Checksum. Line: {p.lineno(1)}" 294 ) 295 return 296 try: 297 document_ref_id, document_uri = external_doc_ref_match.group(1).strip().split(" ") 298 except ValueError: 299 self.creation_info["logger"].append( 300 f"Error while parsing ExternalDocumentRef: Couldn't split the first part of the value into " 301 f"document_ref_id and document_uri. Line: {p.lineno(1)}" 302 ) 303 return 304 checksum = parse_checksum(external_doc_ref_match.group(2).strip()) 305 external_document_ref = ExternalDocumentRef(document_ref_id, document_uri, checksum) 306 self.creation_info.setdefault("external_document_refs", []).append(external_document_ref) 307 308 @grammar_rule("creator : CREATOR PERSON_VALUE\n| CREATOR TOOL_VALUE\n| CREATOR ORGANIZATION_VALUE") 309 def p_creator(self, p): 310 self.creation_info.setdefault("creators", []).append(ActorParser.parse_actor(p[2])) 311 312 @grammar_rule("created : CREATED ISO8601_DATE") 313 def p_created(self, p): 314 set_value(p, self.creation_info, method_to_apply=datetime_from_str) 315 316 # parsing methods for extracted licensing info 317 318 @grammar_rule("license_cross_ref : LICENSE_CROSS_REF LINE") 319 def p_extracted_cross_reference(self, p): 320 if self.check_that_current_element_matches_class_for_value(ExtractedLicensingInfo, p.lineno(1)): 321 self.current_element.setdefault("cross_references", []).append(p[2]) 322 323 # parsing methods for file 324 325 @grammar_rule("file_contributor : FILE_CONTRIBUTOR LINE") 326 def p_file_contributor(self, p): 327 if self.check_that_current_element_matches_class_for_value(File, p.lineno(1)): 328 self.current_element.setdefault("contributors", []).append(p[2]) 329 330 @grammar_rule("file_attribution_text : FILE_ATTRIBUTION_TEXT text_or_line") 331 def p_file_attribution_text(self, p): 332 if self.check_that_current_element_matches_class_for_value(File, p.lineno(1)): 333 self.current_element.setdefault("attribution_texts", []).append(p[2]) 334 335 @grammar_rule("file_license_info : FILE_LICENSE_INFO license_or_no_assertion_or_none") 336 def p_file_license_info(self, p): 337 if self.check_that_current_element_matches_class_for_value(File, p.lineno(1)): 338 self.current_element.setdefault("license_info_in_file", []).append(p[2]) 339 340 @grammar_rule("file_type : FILE_TYPE LINE") 341 def p_file_type(self, p): 342 if not self.check_that_current_element_matches_class_for_value(File, p.lineno(1)): 343 return 344 try: 345 file_type = FileType[p[2].strip()] 346 except KeyError: 347 self.current_element["logger"].append(f"Invalid FileType: {p[2]}. Line {p.lineno(1)}") 348 return 349 self.current_element.setdefault("file_types", []).append(file_type) 350 351 @grammar_rule("file_checksum : FILE_CHECKSUM CHECKSUM") 352 def p_file_checksum(self, p): 353 if not self.check_that_current_element_matches_class_for_value(File, p.lineno(1)): 354 return 355 checksum = parse_checksum(p[2]) 356 self.current_element.setdefault("checksums", []).append(checksum) 357 358 # parsing methods for package 359 360 @grammar_rule("pkg_attribution_text : PKG_ATTRIBUTION_TEXT text_or_line") 361 def p_pkg_attribution_text(self, p): 362 self.check_that_current_element_matches_class_for_value(Package, p.lineno(1)) 363 self.current_element.setdefault("attribution_texts", []).append(p[2]) 364 365 @grammar_rule( 366 "pkg_external_ref : PKG_EXTERNAL_REF LINE PKG_EXTERNAL_REF_COMMENT text_or_line\n | PKG_EXTERNAL_REF LINE" 367 ) 368 def p_pkg_external_refs(self, p): 369 if not self.check_that_current_element_matches_class_for_value(Package, p.lineno(1)): 370 return 371 try: 372 category, reference_type, locator = p[2].split(" ") 373 except ValueError: 374 self.current_element["logger"].append( 375 f"Couldn't split PackageExternalRef in category, reference_type and locator. Line: {p.lineno(1)}" 376 ) 377 return 378 comment = None 379 if len(p) == 5: 380 comment = p[4] 381 try: 382 category = ExternalPackageRefCategory[category.replace("-", "_")] 383 except KeyError: 384 self.current_element["logger"].append( 385 f"Invalid ExternalPackageRefCategory: {category}. Line: {p.lineno(1)}" 386 ) 387 return 388 try: 389 external_package_ref = construct_or_raise_parsing_error( 390 ExternalPackageRef, 391 {"category": category, "reference_type": reference_type, "locator": locator, "comment": comment}, 392 ) 393 except SPDXParsingError as err: 394 self.current_element["logger"].append(err.get_messages()) 395 return 396 self.current_element.setdefault("external_references", []).append(external_package_ref) 397 398 @grammar_rule("pkg_license_info : PKG_LICENSE_INFO license_or_no_assertion_or_none") 399 def p_pkg_license_info_from_file(self, p): 400 if self.check_that_current_element_matches_class_for_value(Package, p.lineno(1)): 401 self.current_element.setdefault("license_info_from_files", []).append(p[2]) 402 403 @grammar_rule("pkg_checksum : PKG_CHECKSUM CHECKSUM") 404 def p_pkg_checksum(self, p): 405 if not self.check_that_current_element_matches_class_for_value(Package, p.lineno(1)): 406 return 407 checksum = parse_checksum(p[2]) 408 self.current_element.setdefault("checksums", []).append(checksum) 409 410 @grammar_rule("verification_code : PKG_VERIFICATION_CODE LINE") 411 def p_pkg_verification_code(self, p): 412 if not self.check_that_current_element_matches_class_for_value(Package, p.lineno(1)): 413 return 414 415 if "verification_code" in self.current_element: 416 self.current_element["logger"].append(f"Multiple values for {p[1]} found. Line: {p.lineno(1)}") 417 return 418 verif_code_regex = re.compile(r"([0-9a-f]{40})\s*(\(excludes:\s*(.+)\))?", re.UNICODE) 419 verif_code_code_grp = 1 420 verif_code_exc_files_grp = 3 421 match = verif_code_regex.match(p[2]) 422 if not match: 423 self.current_element["logger"].append( 424 f"Error while parsing {p[1]}: Value did not match expected format. Line: {p.lineno(1)}" 425 ) 426 return 427 value = match.group(verif_code_code_grp) 428 excluded_files = None 429 if match.group(verif_code_exc_files_grp): 430 excluded_files = match.group(verif_code_exc_files_grp).split(",") 431 self.current_element["verification_code"] = PackageVerificationCode(value, excluded_files) 432 433 @grammar_rule("files_analyzed : PKG_FILES_ANALYZED LINE") 434 def p_pkg_files_analyzed(self, p): 435 if not self.check_that_current_element_matches_class_for_value(Package, p.lineno(1)): 436 return 437 if "files_analyzed" in self.current_element: 438 self.current_element["logger"].append(f"Multiple values for {p[1]} found. Line: {p.lineno(1)}") 439 return 440 if p[2] == "true": 441 self.current_element["files_analyzed"] = True 442 elif p[2] == "false": 443 self.current_element["files_analyzed"] = False 444 else: 445 self.current_element["logger"].append( 446 f'The value of FilesAnalyzed must be either "true" or "false", but is: {p[2]}' 447 ) 448 449 @grammar_rule("primary_package_purpose : PRIMARY_PACKAGE_PURPOSE LINE") 450 def p_primary_package_purpose(self, p): 451 if self.check_that_current_element_matches_class_for_value(Package, p.lineno(1)): 452 set_value(p, self.current_element, method_to_apply=lambda x: PackagePurpose[x.replace("-", "_")]) 453 454 @grammar_rule( 455 "built_date : BUILT_DATE ISO8601_DATE\n release_date : RELEASE_DATE ISO8601_DATE\n " 456 "valid_until_date : VALID_UNTIL_DATE ISO8601_DATE" 457 ) 458 def p_package_dates(self, p): 459 if self.check_that_current_element_matches_class_for_value(Package, p.lineno(1)): 460 set_value(p, self.current_element, method_to_apply=datetime_from_str) 461 462 # parsing methods for snippet 463 464 @grammar_rule("snippet_attribution_text : SNIPPET_ATTRIBUTION_TEXT text_or_line") 465 def p_snippet_attribution_text(self, p): 466 if self.check_that_current_element_matches_class_for_value(Snippet, p.lineno(1)): 467 self.current_element.setdefault("attribution_texts", []).append(p[2]) 468 469 @grammar_rule("snippet_license_info : SNIPPET_LICENSE_INFO license_or_no_assertion_or_none") 470 def p_snippet_license_info(self, p): 471 if self.check_that_current_element_matches_class_for_value(Snippet, p.lineno(1)): 472 self.current_element.setdefault("license_info_in_snippet", []).append(p[2]) 473 474 @grammar_rule("snippet_byte_range : SNIPPET_BYTE_RANGE LINE\n snippet_line_range : SNIPPET_LINE_RANGE LINE") 475 def p_snippet_range(self, p): 476 if not self.check_that_current_element_matches_class_for_value(Snippet, p.lineno(1)): 477 return 478 479 argument_name = TAG_DATA_MODEL_FIELD[p[1]][1] 480 if argument_name in self.current_element: 481 self.current_element["logger"].append(f"Multiple values for {p[1]} found. Line: {p.lineno(1)}") 482 return 483 range_re = re.compile(r"^(\d+):(\d+)$", re.UNICODE) 484 if not range_re.match(p[2].strip()): 485 self.current_element["logger"].append( 486 f"Value for {p[1]} doesn't match valid range pattern. " f"Line: {p.lineno(1)}" 487 ) 488 return 489 startpoint = int(p[2].split(":")[0]) 490 endpoint = int(p[2].split(":")[-1]) 491 self.current_element[argument_name] = startpoint, endpoint 492 493 # parsing methods for annotation 494 495 @grammar_rule("annotator : ANNOTATOR PERSON_VALUE\n| ANNOTATOR TOOL_VALUE\n| ANNOTATOR ORGANIZATION_VALUE") 496 def p_annotator(self, p): 497 self.initialize_new_current_element(Annotation) 498 set_value(p, self.current_element, method_to_apply=ActorParser.parse_actor) 499 500 @grammar_rule("annotation_date : ANNOTATION_DATE ISO8601_DATE") 501 def p_annotation_date(self, p): 502 if self.check_that_current_element_matches_class_for_value(Annotation, p.lineno(1)): 503 set_value(p, self.current_element, method_to_apply=datetime_from_str) 504 505 @grammar_rule("annotation_type : ANNOTATION_TYPE LINE") 506 def p_annotation_type(self, p): 507 if self.check_that_current_element_matches_class_for_value(Annotation, p.lineno(1)): 508 set_value(p, self.current_element, method_to_apply=lambda x: AnnotationType[x]) 509 510 # parsing methods for relationship 511 512 @grammar_rule("relationship : RELATIONSHIP LINE RELATIONSHIP_COMMENT text_or_line\n " "| RELATIONSHIP LINE") 513 def p_relationship(self, p): 514 self.initialize_new_current_element(Relationship) 515 try: 516 spdx_element_id, relationship_type, related_spdx_element_id = p[2].split(" ") 517 except ValueError: 518 self.current_element["logger"].append( 519 f"Relationship couldn't be split in spdx_element_id, relationship_type and " 520 f"related_spdx_element. Line: {p.lineno(1)}" 521 ) 522 return 523 try: 524 self.current_element["relationship_type"] = RelationshipType[relationship_type] 525 except KeyError: 526 self.current_element["logger"].append(f"Invalid RelationshipType {relationship_type}. Line: {p.lineno(1)}") 527 if related_spdx_element_id == "NONE": 528 related_spdx_element_id = SpdxNone() 529 if related_spdx_element_id == "NOASSERTION": 530 related_spdx_element_id = SpdxNoAssertion() 531 self.current_element["related_spdx_element_id"] = related_spdx_element_id 532 self.current_element["spdx_element_id"] = spdx_element_id 533 if len(p) == 5: 534 self.current_element["comment"] = p[4] 535 536 def p_error(self, p): 537 pass 538 539 def parse(self, text): 540 # entry point for the tag-value parser 541 self.yacc.parse(text, lexer=self.lex) 542 # this constructs the last remaining element; all other elements are constructed at the start of 543 # their subsequent element 544 self.construct_current_element() 545 546 # To be able to parse creation info values if they appear in between other elements, e.g. packages, we use 547 # two different dictionaries to collect the creation info and all other elements. Therefore, we have a separate 548 # logger for the creation info whose messages we need to add to the main logger to than raise all collected 549 # messages at once. 550 creation_info_logger = self.creation_info.pop("logger") 551 if creation_info_logger.has_messages(): 552 self.logger.extend([f"Error while parsing CreationInfo: {creation_info_logger.get_messages()}"]) 553 554 raise_parsing_error_if_logger_has_messages(self.logger) 555 creation_info = construct_or_raise_parsing_error(CreationInfo, self.creation_info) 556 self.elements_built["creation_info"] = creation_info 557 document = construct_or_raise_parsing_error(Document, self.elements_built) 558 return document 559 560 def initialize_new_current_element(self, clazz: Any): 561 self.construct_current_element() 562 self.current_element["class"] = clazz 563 564 def check_that_current_element_matches_class_for_value(self, expected_class, line_number) -> bool: 565 if "class" not in self.current_element or expected_class != self.current_element["class"]: 566 self.logger.append( 567 f"Element {expected_class.__name__} is not the current element in scope, probably the expected tag to " 568 f"start the element ({ELEMENT_EXPECTED_START_TAG[expected_class.__name__]}) is missing. " 569 f"Line: {line_number}" 570 ) 571 return False 572 return True 573 574 def construct_current_element(self): 575 if "class" not in self.current_element: 576 # This happens when the first element is initialized via initialize_new_current_element() or if the first 577 # element is missing its expected starting tag. In both cases we are unable to construct an element. 578 return 579 580 clazz = self.current_element.pop("class") 581 try: 582 raise_parsing_error_if_logger_has_messages(self.current_element.pop("logger"), clazz.__name__) 583 self.elements_built.setdefault(CLASS_MAPPING[clazz.__name__], []).append( 584 construct_or_raise_parsing_error(clazz, self.current_element) 585 ) 586 if clazz == File: 587 self.check_for_preceding_package_and_build_contains_relationship() 588 except SPDXParsingError as err: 589 self.logger.extend(err.get_messages()) 590 self.current_element = {"logger": Logger()} 591 592 def check_for_preceding_package_and_build_contains_relationship(self): 593 file_spdx_id = self.current_element["spdx_id"] 594 if "packages" not in self.elements_built: 595 return 596 # We assume that all files that are not contained in a package precede any package information. Any file 597 # information that follows any package information is assigned to the last parsed package by creating a 598 # corresponding contains relationship. 599 # (see https://spdx.github.io/spdx-spec/v2.3/composition-of-an-SPDX-document/#5.2.2) 600 if not self.elements_built["packages"]: 601 self.logger.append( 602 f"Error while building contains relationship for file {file_spdx_id}, " 603 f"preceding package was not parsed successfully." 604 ) 605 return 606 package_spdx_id = self.elements_built["packages"][-1].spdx_id 607 relationship = Relationship(package_spdx_id, RelationshipType.CONTAINS, file_spdx_id) 608 if relationship not in self.elements_built.setdefault("relationships", []): 609 self.elements_built["relationships"].append(relationship)
87 def __init__(self, **kwargs): 88 self.tokens = SPDXLexer.tokens 89 self.logger = Logger() 90 self.current_element = {"logger": Logger()} 91 self.creation_info = {"logger": Logger()} 92 self.elements_built = dict() 93 self.lex = SPDXLexer() 94 self.lex.build(reflags=re.UNICODE) 95 self.yacc = yacc.yacc(module=self, **kwargs)
105 @grammar_rule( 106 "attrib : spdx_version\n| spdx_id\n| data_license\n| doc_name\n| document_comment\n| document_namespace\n| " 107 "creator\n| created\n| creator_comment\n| license_list_version\n| ext_doc_ref\n" 108 # attributes for file 109 "| file_name\n| file_type\n| file_checksum\n| file_license_concluded\n| file_license_info\n" 110 "| file_copyright_text\n| file_license_comment\n| file_attribution_text\n| file_notice\n| file_comment\n" 111 "| file_contributor\n" 112 # attributes for annotation 113 "| annotator\n| annotation_date\n| annotation_comment\n| annotation_type\n| annotation_spdx_id\n" 114 # attributes for relationship 115 "| relationship\n" 116 # attributes for snippet 117 "| snippet_spdx_id\n| snippet_name\n| snippet_comment\n| snippet_attribution_text\n| snippet_copyright_text\n" 118 "| snippet_license_comment\n| file_spdx_id\n| snippet_license_concluded\n| snippet_license_info\n" 119 "| snippet_byte_range\n| snippet_line_range\n" 120 # attributes for package 121 "| package_name\n| package_version\n| download_location\n| files_analyzed\n| homepage\n" 122 "| summary\n| source_info\n| pkg_file_name\n| supplier\n| originator\n| pkg_checksum\n" 123 "| verification_code\n| description\n| pkg_comment\n| pkg_attribution_text\n| pkg_license_declared\n" 124 "| pkg_license_concluded\n| pkg_license_info\n| pkg_license_comment\n| pkg_copyright_text\n" 125 "| pkg_external_ref\n| primary_package_purpose\n| built_date\n| release_date\n| valid_until_date\n" 126 # attributes for extracted licensing info 127 "| license_id\n| extracted_text\n| license_name\n| license_cross_ref\n| lic_comment\n" 128 "| unknown_tag " 129 ) 130 def p_attrib(self, p): 131 pass
attrib : spdx_version | spdx_id | data_license | doc_name | document_comment | document_namespace | creator | created | creator_comment | license_list_version | ext_doc_ref | file_name | file_type | file_checksum | file_license_concluded | file_license_info | file_copyright_text | file_license_comment | file_attribution_text | file_notice | file_comment | file_contributor | annotator | annotation_date | annotation_comment | annotation_type | annotation_spdx_id | relationship | snippet_spdx_id | snippet_name | snippet_comment | snippet_attribution_text | snippet_copyright_text | snippet_license_comment | file_spdx_id | snippet_license_concluded | snippet_license_info | snippet_byte_range | snippet_line_range | package_name | package_version | download_location | files_analyzed | homepage | summary | source_info | pkg_file_name | supplier | originator | pkg_checksum | verification_code | description | pkg_comment | pkg_attribution_text | pkg_license_declared | pkg_license_concluded | pkg_license_info | pkg_license_comment | pkg_copyright_text | pkg_external_ref | primary_package_purpose | built_date | release_date | valid_until_date | license_id | extracted_text | license_name | license_cross_ref | lic_comment | unknown_tag
134 @grammar_rule( 135 "license_id : LICENSE_ID error\n license_cross_ref : LICENSE_CROSS_REF error\n " 136 "lic_comment : LICENSE_COMMENT error\n license_name : LICENSE_NAME error\n " 137 "extracted_text : LICENSE_TEXT error\n " 138 "file_name : FILE_NAME error\n file_contributor : FILE_CONTRIBUTOR error\n " 139 "file_notice : FILE_NOTICE error\n file_copyright_text : FILE_COPYRIGHT_TEXT error\n " 140 "file_license_comment : FILE_LICENSE_COMMENT error\n " 141 "file_license_info : FILE_LICENSE_INFO error\n file_comment : FILE_COMMENT error\n " 142 "file_checksum : FILE_CHECKSUM error\n file_license_concluded : FILE_LICENSE_CONCLUDED error\n " 143 "file_type : FILE_TYPE error\n file_attribution_text : FILE_ATTRIBUTION_TEXT error\n " 144 "package_name : PKG_NAME error\n pkg_attribution_text : PKG_ATTRIBUTION_TEXT error\n " 145 "description : PKG_DESCRIPTION error\n pkg_comment : PKG_COMMENT error\n " 146 "summary : PKG_SUMMARY error\n pkg_copyright_text : PKG_COPYRIGHT_TEXT error\n " 147 "pkg_external_ref : PKG_EXTERNAL_REF error\n pkg_license_comment : PKG_LICENSE_COMMENT error\n " 148 "pkg_license_declared : PKG_LICENSE_DECLARED error\n pkg_license_info : PKG_LICENSE_INFO error \n " 149 "pkg_license_concluded : PKG_LICENSE_CONCLUDED error\n source_info : PKG_SOURCE_INFO error\n " 150 "homepage : PKG_HOMEPAGE error\n pkg_checksum : PKG_CHECKSUM error\n " 151 "verification_code : PKG_VERIFICATION_CODE error\n originator : PKG_ORIGINATOR error\n " 152 "download_location : PKG_DOWNLOAD_LOCATION error\n files_analyzed : PKG_FILES_ANALYZED error\n " 153 "supplier : PKG_SUPPLIER error\n pkg_file_name : PKG_FILE_NAME error\n " 154 "package_version : PKG_VERSION error\n primary_package_purpose : PRIMARY_PACKAGE_PURPOSE error\n " 155 "built_date : BUILT_DATE error\n release_date : RELEASE_DATE error\n " 156 "valid_until_date : VALID_UNTIL_DATE error\n snippet_spdx_id : SNIPPET_SPDX_ID error\n " 157 "snippet_name : SNIPPET_NAME error\n snippet_comment : SNIPPET_COMMENT error\n " 158 "snippet_attribution_text : SNIPPET_ATTRIBUTION_TEXT error\n " 159 "snippet_copyright_text : SNIPPET_COPYRIGHT_TEXT error\n " 160 "snippet_license_comment : SNIPPET_LICENSE_COMMENT error\n file_spdx_id : SNIPPET_FILE_SPDXID error\n " 161 "snippet_license_concluded : SNIPPET_LICENSE_CONCLUDED error\n " 162 "snippet_license_info : SNIPPET_LICENSE_INFO error\n " 163 "snippet_byte_range : SNIPPET_BYTE_RANGE error\n snippet_line_range : SNIPPET_LINE_RANGE error\n " 164 "annotator : ANNOTATOR error\n annotation_date : ANNOTATION_DATE error\n " 165 "annotation_comment : ANNOTATION_COMMENT error\n annotation_type : ANNOTATION_TYPE error\n " 166 "annotation_spdx_id : ANNOTATION_SPDX_ID error\n relationship : RELATIONSHIP error" 167 ) 168 def p_current_element_error(self, p): 169 if p[1] in ELEMENT_EXPECTED_START_TAG.values(): 170 self.initialize_new_current_element(TAG_DATA_MODEL_FIELD[p[1]][0]) 171 self.current_element["logger"].append( 172 f"Error while parsing {p[1]}: Token did not match specified grammar rule. Line: {p.lineno(1)}" 173 )
license_id : LICENSE_ID error license_cross_ref : LICENSE_CROSS_REF error lic_comment : LICENSE_COMMENT error license_name : LICENSE_NAME error extracted_text : LICENSE_TEXT error file_name : FILE_NAME error file_contributor : FILE_CONTRIBUTOR error file_notice : FILE_NOTICE error file_copyright_text : FILE_COPYRIGHT_TEXT error file_license_comment : FILE_LICENSE_COMMENT error file_license_info : FILE_LICENSE_INFO error file_comment : FILE_COMMENT error file_checksum : FILE_CHECKSUM error file_license_concluded : FILE_LICENSE_CONCLUDED error file_type : FILE_TYPE error file_attribution_text : FILE_ATTRIBUTION_TEXT error package_name : PKG_NAME error pkg_attribution_text : PKG_ATTRIBUTION_TEXT error description : PKG_DESCRIPTION error pkg_comment : PKG_COMMENT error summary : PKG_SUMMARY error pkg_copyright_text : PKG_COPYRIGHT_TEXT error pkg_external_ref : PKG_EXTERNAL_REF error pkg_license_comment : PKG_LICENSE_COMMENT error pkg_license_declared : PKG_LICENSE_DECLARED error pkg_license_info : PKG_LICENSE_INFO error pkg_license_concluded : PKG_LICENSE_CONCLUDED error source_info : PKG_SOURCE_INFO error homepage : PKG_HOMEPAGE error pkg_checksum : PKG_CHECKSUM error verification_code : PKG_VERIFICATION_CODE error originator : PKG_ORIGINATOR error download_location : PKG_DOWNLOAD_LOCATION error files_analyzed : PKG_FILES_ANALYZED error supplier : PKG_SUPPLIER error pkg_file_name : PKG_FILE_NAME error package_version : PKG_VERSION error primary_package_purpose : PRIMARY_PACKAGE_PURPOSE error built_date : BUILT_DATE error release_date : RELEASE_DATE error valid_until_date : VALID_UNTIL_DATE error snippet_spdx_id : SNIPPET_SPDX_ID error snippet_name : SNIPPET_NAME error snippet_comment : SNIPPET_COMMENT error snippet_attribution_text : SNIPPET_ATTRIBUTION_TEXT error snippet_copyright_text : SNIPPET_COPYRIGHT_TEXT error snippet_license_comment : SNIPPET_LICENSE_COMMENT error file_spdx_id : SNIPPET_FILE_SPDXID error snippet_license_concluded : SNIPPET_LICENSE_CONCLUDED error snippet_license_info : SNIPPET_LICENSE_INFO error snippet_byte_range : SNIPPET_BYTE_RANGE error snippet_line_range : SNIPPET_LINE_RANGE error annotator : ANNOTATOR error annotation_date : ANNOTATION_DATE error annotation_comment : ANNOTATION_COMMENT error annotation_type : ANNOTATION_TYPE error annotation_spdx_id : ANNOTATION_SPDX_ID error relationship : RELATIONSHIP error
175 @grammar_rule( 176 "license_name : LICENSE_NAME line_or_no_assertion\n extracted_text : LICENSE_TEXT text_or_line\n " 177 "lic_comment : LICENSE_COMMENT text_or_line\n license_id : LICENSE_ID LINE\n " 178 "file_name : FILE_NAME LINE \n file_notice : FILE_NOTICE text_or_line\n " 179 "file_copyright_text : FILE_COPYRIGHT_TEXT line_or_no_assertion_or_none\n " 180 "file_license_comment : FILE_LICENSE_COMMENT text_or_line\n " 181 "file_comment : FILE_COMMENT text_or_line\n " 182 "file_license_concluded : FILE_LICENSE_CONCLUDED license_or_no_assertion_or_none\n " 183 "package_name : PKG_NAME LINE\n description : PKG_DESCRIPTION text_or_line\n " 184 "summary : PKG_SUMMARY text_or_line\n source_info : PKG_SOURCE_INFO text_or_line\n " 185 "homepage : PKG_HOMEPAGE line_or_no_assertion_or_none\n " 186 "download_location : PKG_DOWNLOAD_LOCATION line_or_no_assertion_or_none\n " 187 "originator : PKG_ORIGINATOR actor_or_no_assertion\n supplier : PKG_SUPPLIER actor_or_no_assertion\n " 188 "pkg_comment : PKG_COMMENT text_or_line\n " 189 "pkg_copyright_text : PKG_COPYRIGHT_TEXT line_or_no_assertion_or_none\n " 190 "pkg_license_declared : PKG_LICENSE_DECLARED license_or_no_assertion_or_none\n " 191 "pkg_file_name : PKG_FILE_NAME LINE\n " 192 "pkg_license_concluded : PKG_LICENSE_CONCLUDED license_or_no_assertion_or_none\n " 193 "package_version : PKG_VERSION LINE\n pkg_license_comment : PKG_LICENSE_COMMENT text_or_line\n " 194 "snippet_spdx_id : SNIPPET_SPDX_ID LINE\n snippet_name : SNIPPET_NAME LINE\n " 195 "snippet_comment : SNIPPET_COMMENT text_or_line\n " 196 "snippet_copyright_text : SNIPPET_COPYRIGHT_TEXT line_or_no_assertion_or_none\n " 197 "snippet_license_comment : SNIPPET_LICENSE_COMMENT text_or_line\n " 198 "file_spdx_id : SNIPPET_FILE_SPDXID LINE\n " 199 "snippet_license_concluded : SNIPPET_LICENSE_CONCLUDED license_or_no_assertion_or_none\n " 200 "annotation_spdx_id : ANNOTATION_SPDX_ID LINE\n " 201 "annotation_comment : ANNOTATION_COMMENT text_or_line" 202 ) 203 def p_generic_value(self, p): 204 if p[1] in ELEMENT_EXPECTED_START_TAG.values(): 205 self.initialize_new_current_element(TAG_DATA_MODEL_FIELD[p[1]][0]) 206 if self.check_that_current_element_matches_class_for_value(TAG_DATA_MODEL_FIELD[p[1]][0], p.lineno(1)): 207 set_value(p, self.current_element)
license_name : LICENSE_NAME line_or_no_assertion extracted_text : LICENSE_TEXT text_or_line lic_comment : LICENSE_COMMENT text_or_line license_id : LICENSE_ID LINE file_name : FILE_NAME LINE file_notice : FILE_NOTICE text_or_line file_copyright_text : FILE_COPYRIGHT_TEXT line_or_no_assertion_or_none file_license_comment : FILE_LICENSE_COMMENT text_or_line file_comment : FILE_COMMENT text_or_line file_license_concluded : FILE_LICENSE_CONCLUDED license_or_no_assertion_or_none package_name : PKG_NAME LINE description : PKG_DESCRIPTION text_or_line summary : PKG_SUMMARY text_or_line source_info : PKG_SOURCE_INFO text_or_line homepage : PKG_HOMEPAGE line_or_no_assertion_or_none download_location : PKG_DOWNLOAD_LOCATION line_or_no_assertion_or_none originator : PKG_ORIGINATOR actor_or_no_assertion supplier : PKG_SUPPLIER actor_or_no_assertion pkg_comment : PKG_COMMENT text_or_line pkg_copyright_text : PKG_COPYRIGHT_TEXT line_or_no_assertion_or_none pkg_license_declared : PKG_LICENSE_DECLARED license_or_no_assertion_or_none pkg_file_name : PKG_FILE_NAME LINE pkg_license_concluded : PKG_LICENSE_CONCLUDED license_or_no_assertion_or_none package_version : PKG_VERSION LINE pkg_license_comment : PKG_LICENSE_COMMENT text_or_line snippet_spdx_id : SNIPPET_SPDX_ID LINE snippet_name : SNIPPET_NAME LINE snippet_comment : SNIPPET_COMMENT text_or_line snippet_copyright_text : SNIPPET_COPYRIGHT_TEXT line_or_no_assertion_or_none snippet_license_comment : SNIPPET_LICENSE_COMMENT text_or_line file_spdx_id : SNIPPET_FILE_SPDXID LINE snippet_license_concluded : SNIPPET_LICENSE_CONCLUDED license_or_no_assertion_or_none annotation_spdx_id : ANNOTATION_SPDX_ID LINE annotation_comment : ANNOTATION_COMMENT text_or_line
209 @grammar_rule( 210 "unknown_tag : UNKNOWN_TAG text_or_line\n | UNKNOWN_TAG ISO8601_DATE\n | UNKNOWN_TAG PERSON_VALUE \n" 211 "| UNKNOWN_TAG" 212 ) 213 def p_unknown_tag(self, p): 214 self.logger.append(f"Unknown tag provided in line {p.lineno(1)}")
unknown_tag : UNKNOWN_TAG text_or_line | UNKNOWN_TAG ISO8601_DATE | UNKNOWN_TAG PERSON_VALUE | UNKNOWN_TAG
216 @grammar_rule("text_or_line : TEXT\n line_or_no_assertion_or_none : TEXT") 217 def p_text(self, p): 218 p[0] = str_from_text(p[1])
text_or_line : TEXT line_or_no_assertion_or_none : TEXT
220 @grammar_rule( 221 "text_or_line : LINE\n line_or_no_assertion : LINE\nline_or_no_assertion_or_none : LINE\n" 222 "text_or_line : NO_ASSERTION\n text_or_line : NONE" 223 ) 224 def p_line(self, p): 225 p[0] = p[1]
text_or_line : LINE line_or_no_assertion : LINE line_or_no_assertion_or_none : LINE text_or_line : NO_ASSERTION text_or_line : NONE
227 @grammar_rule( 228 "license_or_no_assertion_or_none : NO_ASSERTION\n actor_or_no_assertion : NO_ASSERTION\n" 229 "line_or_no_assertion : NO_ASSERTION\n line_or_no_assertion_or_none : NO_ASSERTION" 230 ) 231 def p_no_assertion(self, p): 232 p[0] = SpdxNoAssertion()
license_or_no_assertion_or_none : NO_ASSERTION actor_or_no_assertion : NO_ASSERTION line_or_no_assertion : NO_ASSERTION line_or_no_assertion_or_none : NO_ASSERTION
234 @grammar_rule("license_or_no_assertion_or_none : NONE\n line_or_no_assertion_or_none : NONE") 235 def p_none(self, p): 236 p[0] = SpdxNone()
license_or_no_assertion_or_none : NONE line_or_no_assertion_or_none : NONE
238 @grammar_rule("license_or_no_assertion_or_none : LINE") 239 def p_license(self, p): 240 try: 241 p[0] = get_spdx_licensing().parse(p[1]) 242 except ExpressionError as err: 243 error_message = f"Error while parsing license expression: {p[1]}" 244 if err.args: 245 error_message += f": {err.args[0]}" 246 self.current_element["logger"].append(error_message)
license_or_no_assertion_or_none : LINE
248 @grammar_rule("actor_or_no_assertion : PERSON_VALUE\n | ORGANIZATION_VALUE") 249 def p_actor_values(self, p): 250 p[0] = ActorParser.parse_actor(p[1])
actor_or_no_assertion : PERSON_VALUE | ORGANIZATION_VALUE
252 @grammar_rule("spdx_id : SPDX_ID LINE") 253 def p_spdx_id(self, p): 254 # As all SPDX Ids share the same tag, there is no knowing which spdx_id belongs to the document. 255 # We assume that to be the first spdx_id we encounter. As the specification does not explicitly require this, 256 # our approach might lead to unwanted behavior when the document's SPDX Id is defined later in the document. 257 if "spdx_id" in self.creation_info: 258 self.current_element["spdx_id"] = p[2] 259 else: 260 self.creation_info["spdx_id"] = p[2]
spdx_id : SPDX_ID LINE
264 @grammar_rule( 265 "license_list_version : LICENSE_LIST_VERSION error\n document_comment : DOC_COMMENT error\n " 266 "document_namespace : DOC_NAMESPACE error\n data_license : DOC_LICENSE error\n " 267 "doc_name : DOC_NAME error\n ext_doc_ref : EXT_DOC_REF error\n spdx_version : DOC_VERSION error\n " 268 "creator_comment : CREATOR_COMMENT error\n creator : CREATOR error\n created : CREATED error" 269 ) 270 def p_creation_info_value_error(self, p): 271 self.creation_info["logger"].append( 272 f"Error while parsing {p[1]}: Token did not match specified grammar rule. Line: {p.lineno(1)}" 273 )
license_list_version : LICENSE_LIST_VERSION error document_comment : DOC_COMMENT error document_namespace : DOC_NAMESPACE error data_license : DOC_LICENSE error doc_name : DOC_NAME error ext_doc_ref : EXT_DOC_REF error spdx_version : DOC_VERSION error creator_comment : CREATOR_COMMENT error creator : CREATOR error created : CREATED error
275 @grammar_rule( 276 "document_comment : DOC_COMMENT text_or_line\n document_namespace : DOC_NAMESPACE LINE\n " 277 "data_license : DOC_LICENSE LINE\n spdx_version : DOC_VERSION LINE\n " 278 "creator_comment : CREATOR_COMMENT text_or_line\n doc_name : DOC_NAME LINE" 279 ) 280 def p_generic_value_creation_info(self, p): 281 set_value(p, self.creation_info)
document_comment : DOC_COMMENT text_or_line document_namespace : DOC_NAMESPACE LINE data_license : DOC_LICENSE LINE spdx_version : DOC_VERSION LINE creator_comment : CREATOR_COMMENT text_or_line doc_name : DOC_NAME LINE
283 @grammar_rule("license_list_version : LICENSE_LIST_VERSION LINE") 284 def p_license_list_version(self, p): 285 set_value(p, self.creation_info, method_to_apply=Version.from_string)
license_list_version : LICENSE_LIST_VERSION LINE
287 @grammar_rule("ext_doc_ref : EXT_DOC_REF LINE") 288 def p_external_document_ref(self, p): 289 external_doc_ref_regex = re.compile(r"(.*)(\s*SHA1:\s*[a-f0-9]{40})") 290 external_doc_ref_match = external_doc_ref_regex.match(p[2]) 291 if not external_doc_ref_match: 292 self.creation_info["logger"].append( 293 f"Error while parsing ExternalDocumentRef: Couldn't match Checksum. Line: {p.lineno(1)}" 294 ) 295 return 296 try: 297 document_ref_id, document_uri = external_doc_ref_match.group(1).strip().split(" ") 298 except ValueError: 299 self.creation_info["logger"].append( 300 f"Error while parsing ExternalDocumentRef: Couldn't split the first part of the value into " 301 f"document_ref_id and document_uri. Line: {p.lineno(1)}" 302 ) 303 return 304 checksum = parse_checksum(external_doc_ref_match.group(2).strip()) 305 external_document_ref = ExternalDocumentRef(document_ref_id, document_uri, checksum) 306 self.creation_info.setdefault("external_document_refs", []).append(external_document_ref)
ext_doc_ref : EXT_DOC_REF LINE
308 @grammar_rule("creator : CREATOR PERSON_VALUE\n| CREATOR TOOL_VALUE\n| CREATOR ORGANIZATION_VALUE") 309 def p_creator(self, p): 310 self.creation_info.setdefault("creators", []).append(ActorParser.parse_actor(p[2]))
creator : CREATOR PERSON_VALUE | CREATOR TOOL_VALUE | CREATOR ORGANIZATION_VALUE
312 @grammar_rule("created : CREATED ISO8601_DATE") 313 def p_created(self, p): 314 set_value(p, self.creation_info, method_to_apply=datetime_from_str)
created : CREATED ISO8601_DATE
318 @grammar_rule("license_cross_ref : LICENSE_CROSS_REF LINE") 319 def p_extracted_cross_reference(self, p): 320 if self.check_that_current_element_matches_class_for_value(ExtractedLicensingInfo, p.lineno(1)): 321 self.current_element.setdefault("cross_references", []).append(p[2])
license_cross_ref : LICENSE_CROSS_REF LINE
325 @grammar_rule("file_contributor : FILE_CONTRIBUTOR LINE") 326 def p_file_contributor(self, p): 327 if self.check_that_current_element_matches_class_for_value(File, p.lineno(1)): 328 self.current_element.setdefault("contributors", []).append(p[2])
file_contributor : FILE_CONTRIBUTOR LINE
330 @grammar_rule("file_attribution_text : FILE_ATTRIBUTION_TEXT text_or_line") 331 def p_file_attribution_text(self, p): 332 if self.check_that_current_element_matches_class_for_value(File, p.lineno(1)): 333 self.current_element.setdefault("attribution_texts", []).append(p[2])
file_attribution_text : FILE_ATTRIBUTION_TEXT text_or_line
335 @grammar_rule("file_license_info : FILE_LICENSE_INFO license_or_no_assertion_or_none") 336 def p_file_license_info(self, p): 337 if self.check_that_current_element_matches_class_for_value(File, p.lineno(1)): 338 self.current_element.setdefault("license_info_in_file", []).append(p[2])
file_license_info : FILE_LICENSE_INFO license_or_no_assertion_or_none
340 @grammar_rule("file_type : FILE_TYPE LINE") 341 def p_file_type(self, p): 342 if not self.check_that_current_element_matches_class_for_value(File, p.lineno(1)): 343 return 344 try: 345 file_type = FileType[p[2].strip()] 346 except KeyError: 347 self.current_element["logger"].append(f"Invalid FileType: {p[2]}. Line {p.lineno(1)}") 348 return 349 self.current_element.setdefault("file_types", []).append(file_type)
file_type : FILE_TYPE LINE
351 @grammar_rule("file_checksum : FILE_CHECKSUM CHECKSUM") 352 def p_file_checksum(self, p): 353 if not self.check_that_current_element_matches_class_for_value(File, p.lineno(1)): 354 return 355 checksum = parse_checksum(p[2]) 356 self.current_element.setdefault("checksums", []).append(checksum)
file_checksum : FILE_CHECKSUM CHECKSUM
360 @grammar_rule("pkg_attribution_text : PKG_ATTRIBUTION_TEXT text_or_line") 361 def p_pkg_attribution_text(self, p): 362 self.check_that_current_element_matches_class_for_value(Package, p.lineno(1)) 363 self.current_element.setdefault("attribution_texts", []).append(p[2])
pkg_attribution_text : PKG_ATTRIBUTION_TEXT text_or_line
365 @grammar_rule( 366 "pkg_external_ref : PKG_EXTERNAL_REF LINE PKG_EXTERNAL_REF_COMMENT text_or_line\n | PKG_EXTERNAL_REF LINE" 367 ) 368 def p_pkg_external_refs(self, p): 369 if not self.check_that_current_element_matches_class_for_value(Package, p.lineno(1)): 370 return 371 try: 372 category, reference_type, locator = p[2].split(" ") 373 except ValueError: 374 self.current_element["logger"].append( 375 f"Couldn't split PackageExternalRef in category, reference_type and locator. Line: {p.lineno(1)}" 376 ) 377 return 378 comment = None 379 if len(p) == 5: 380 comment = p[4] 381 try: 382 category = ExternalPackageRefCategory[category.replace("-", "_")] 383 except KeyError: 384 self.current_element["logger"].append( 385 f"Invalid ExternalPackageRefCategory: {category}. Line: {p.lineno(1)}" 386 ) 387 return 388 try: 389 external_package_ref = construct_or_raise_parsing_error( 390 ExternalPackageRef, 391 {"category": category, "reference_type": reference_type, "locator": locator, "comment": comment}, 392 ) 393 except SPDXParsingError as err: 394 self.current_element["logger"].append(err.get_messages()) 395 return 396 self.current_element.setdefault("external_references", []).append(external_package_ref)
pkg_external_ref : PKG_EXTERNAL_REF LINE PKG_EXTERNAL_REF_COMMENT text_or_line | PKG_EXTERNAL_REF LINE
398 @grammar_rule("pkg_license_info : PKG_LICENSE_INFO license_or_no_assertion_or_none") 399 def p_pkg_license_info_from_file(self, p): 400 if self.check_that_current_element_matches_class_for_value(Package, p.lineno(1)): 401 self.current_element.setdefault("license_info_from_files", []).append(p[2])
pkg_license_info : PKG_LICENSE_INFO license_or_no_assertion_or_none
403 @grammar_rule("pkg_checksum : PKG_CHECKSUM CHECKSUM") 404 def p_pkg_checksum(self, p): 405 if not self.check_that_current_element_matches_class_for_value(Package, p.lineno(1)): 406 return 407 checksum = parse_checksum(p[2]) 408 self.current_element.setdefault("checksums", []).append(checksum)
pkg_checksum : PKG_CHECKSUM CHECKSUM
410 @grammar_rule("verification_code : PKG_VERIFICATION_CODE LINE") 411 def p_pkg_verification_code(self, p): 412 if not self.check_that_current_element_matches_class_for_value(Package, p.lineno(1)): 413 return 414 415 if "verification_code" in self.current_element: 416 self.current_element["logger"].append(f"Multiple values for {p[1]} found. Line: {p.lineno(1)}") 417 return 418 verif_code_regex = re.compile(r"([0-9a-f]{40})\s*(\(excludes:\s*(.+)\))?", re.UNICODE) 419 verif_code_code_grp = 1 420 verif_code_exc_files_grp = 3 421 match = verif_code_regex.match(p[2]) 422 if not match: 423 self.current_element["logger"].append( 424 f"Error while parsing {p[1]}: Value did not match expected format. Line: {p.lineno(1)}" 425 ) 426 return 427 value = match.group(verif_code_code_grp) 428 excluded_files = None 429 if match.group(verif_code_exc_files_grp): 430 excluded_files = match.group(verif_code_exc_files_grp).split(",") 431 self.current_element["verification_code"] = PackageVerificationCode(value, excluded_files)
verification_code : PKG_VERIFICATION_CODE LINE
433 @grammar_rule("files_analyzed : PKG_FILES_ANALYZED LINE") 434 def p_pkg_files_analyzed(self, p): 435 if not self.check_that_current_element_matches_class_for_value(Package, p.lineno(1)): 436 return 437 if "files_analyzed" in self.current_element: 438 self.current_element["logger"].append(f"Multiple values for {p[1]} found. Line: {p.lineno(1)}") 439 return 440 if p[2] == "true": 441 self.current_element["files_analyzed"] = True 442 elif p[2] == "false": 443 self.current_element["files_analyzed"] = False 444 else: 445 self.current_element["logger"].append( 446 f'The value of FilesAnalyzed must be either "true" or "false", but is: {p[2]}' 447 )
files_analyzed : PKG_FILES_ANALYZED LINE
449 @grammar_rule("primary_package_purpose : PRIMARY_PACKAGE_PURPOSE LINE") 450 def p_primary_package_purpose(self, p): 451 if self.check_that_current_element_matches_class_for_value(Package, p.lineno(1)): 452 set_value(p, self.current_element, method_to_apply=lambda x: PackagePurpose[x.replace("-", "_")])
primary_package_purpose : PRIMARY_PACKAGE_PURPOSE LINE
454 @grammar_rule( 455 "built_date : BUILT_DATE ISO8601_DATE\n release_date : RELEASE_DATE ISO8601_DATE\n " 456 "valid_until_date : VALID_UNTIL_DATE ISO8601_DATE" 457 ) 458 def p_package_dates(self, p): 459 if self.check_that_current_element_matches_class_for_value(Package, p.lineno(1)): 460 set_value(p, self.current_element, method_to_apply=datetime_from_str)
built_date : BUILT_DATE ISO8601_DATE release_date : RELEASE_DATE ISO8601_DATE valid_until_date : VALID_UNTIL_DATE ISO8601_DATE
464 @grammar_rule("snippet_attribution_text : SNIPPET_ATTRIBUTION_TEXT text_or_line") 465 def p_snippet_attribution_text(self, p): 466 if self.check_that_current_element_matches_class_for_value(Snippet, p.lineno(1)): 467 self.current_element.setdefault("attribution_texts", []).append(p[2])
snippet_attribution_text : SNIPPET_ATTRIBUTION_TEXT text_or_line
469 @grammar_rule("snippet_license_info : SNIPPET_LICENSE_INFO license_or_no_assertion_or_none") 470 def p_snippet_license_info(self, p): 471 if self.check_that_current_element_matches_class_for_value(Snippet, p.lineno(1)): 472 self.current_element.setdefault("license_info_in_snippet", []).append(p[2])
snippet_license_info : SNIPPET_LICENSE_INFO license_or_no_assertion_or_none
474 @grammar_rule("snippet_byte_range : SNIPPET_BYTE_RANGE LINE\n snippet_line_range : SNIPPET_LINE_RANGE LINE") 475 def p_snippet_range(self, p): 476 if not self.check_that_current_element_matches_class_for_value(Snippet, p.lineno(1)): 477 return 478 479 argument_name = TAG_DATA_MODEL_FIELD[p[1]][1] 480 if argument_name in self.current_element: 481 self.current_element["logger"].append(f"Multiple values for {p[1]} found. Line: {p.lineno(1)}") 482 return 483 range_re = re.compile(r"^(\d+):(\d+)$", re.UNICODE) 484 if not range_re.match(p[2].strip()): 485 self.current_element["logger"].append( 486 f"Value for {p[1]} doesn't match valid range pattern. " f"Line: {p.lineno(1)}" 487 ) 488 return 489 startpoint = int(p[2].split(":")[0]) 490 endpoint = int(p[2].split(":")[-1]) 491 self.current_element[argument_name] = startpoint, endpoint
snippet_byte_range : SNIPPET_BYTE_RANGE LINE snippet_line_range : SNIPPET_LINE_RANGE LINE
495 @grammar_rule("annotator : ANNOTATOR PERSON_VALUE\n| ANNOTATOR TOOL_VALUE\n| ANNOTATOR ORGANIZATION_VALUE") 496 def p_annotator(self, p): 497 self.initialize_new_current_element(Annotation) 498 set_value(p, self.current_element, method_to_apply=ActorParser.parse_actor)
annotator : ANNOTATOR PERSON_VALUE | ANNOTATOR TOOL_VALUE | ANNOTATOR ORGANIZATION_VALUE
500 @grammar_rule("annotation_date : ANNOTATION_DATE ISO8601_DATE") 501 def p_annotation_date(self, p): 502 if self.check_that_current_element_matches_class_for_value(Annotation, p.lineno(1)): 503 set_value(p, self.current_element, method_to_apply=datetime_from_str)
annotation_date : ANNOTATION_DATE ISO8601_DATE
505 @grammar_rule("annotation_type : ANNOTATION_TYPE LINE") 506 def p_annotation_type(self, p): 507 if self.check_that_current_element_matches_class_for_value(Annotation, p.lineno(1)): 508 set_value(p, self.current_element, method_to_apply=lambda x: AnnotationType[x])
annotation_type : ANNOTATION_TYPE LINE
512 @grammar_rule("relationship : RELATIONSHIP LINE RELATIONSHIP_COMMENT text_or_line\n " "| RELATIONSHIP LINE") 513 def p_relationship(self, p): 514 self.initialize_new_current_element(Relationship) 515 try: 516 spdx_element_id, relationship_type, related_spdx_element_id = p[2].split(" ") 517 except ValueError: 518 self.current_element["logger"].append( 519 f"Relationship couldn't be split in spdx_element_id, relationship_type and " 520 f"related_spdx_element. Line: {p.lineno(1)}" 521 ) 522 return 523 try: 524 self.current_element["relationship_type"] = RelationshipType[relationship_type] 525 except KeyError: 526 self.current_element["logger"].append(f"Invalid RelationshipType {relationship_type}. Line: {p.lineno(1)}") 527 if related_spdx_element_id == "NONE": 528 related_spdx_element_id = SpdxNone() 529 if related_spdx_element_id == "NOASSERTION": 530 related_spdx_element_id = SpdxNoAssertion() 531 self.current_element["related_spdx_element_id"] = related_spdx_element_id 532 self.current_element["spdx_element_id"] = spdx_element_id 533 if len(p) == 5: 534 self.current_element["comment"] = p[4]
relationship : RELATIONSHIP LINE RELATIONSHIP_COMMENT text_or_line | RELATIONSHIP LINE
539 def parse(self, text): 540 # entry point for the tag-value parser 541 self.yacc.parse(text, lexer=self.lex) 542 # this constructs the last remaining element; all other elements are constructed at the start of 543 # their subsequent element 544 self.construct_current_element() 545 546 # To be able to parse creation info values if they appear in between other elements, e.g. packages, we use 547 # two different dictionaries to collect the creation info and all other elements. Therefore, we have a separate 548 # logger for the creation info whose messages we need to add to the main logger to than raise all collected 549 # messages at once. 550 creation_info_logger = self.creation_info.pop("logger") 551 if creation_info_logger.has_messages(): 552 self.logger.extend([f"Error while parsing CreationInfo: {creation_info_logger.get_messages()}"]) 553 554 raise_parsing_error_if_logger_has_messages(self.logger) 555 creation_info = construct_or_raise_parsing_error(CreationInfo, self.creation_info) 556 self.elements_built["creation_info"] = creation_info 557 document = construct_or_raise_parsing_error(Document, self.elements_built) 558 return document
564 def check_that_current_element_matches_class_for_value(self, expected_class, line_number) -> bool: 565 if "class" not in self.current_element or expected_class != self.current_element["class"]: 566 self.logger.append( 567 f"Element {expected_class.__name__} is not the current element in scope, probably the expected tag to " 568 f"start the element ({ELEMENT_EXPECTED_START_TAG[expected_class.__name__]}) is missing. " 569 f"Line: {line_number}" 570 ) 571 return False 572 return True
574 def construct_current_element(self): 575 if "class" not in self.current_element: 576 # This happens when the first element is initialized via initialize_new_current_element() or if the first 577 # element is missing its expected starting tag. In both cases we are unable to construct an element. 578 return 579 580 clazz = self.current_element.pop("class") 581 try: 582 raise_parsing_error_if_logger_has_messages(self.current_element.pop("logger"), clazz.__name__) 583 self.elements_built.setdefault(CLASS_MAPPING[clazz.__name__], []).append( 584 construct_or_raise_parsing_error(clazz, self.current_element) 585 ) 586 if clazz == File: 587 self.check_for_preceding_package_and_build_contains_relationship() 588 except SPDXParsingError as err: 589 self.logger.extend(err.get_messages()) 590 self.current_element = {"logger": Logger()}
592 def check_for_preceding_package_and_build_contains_relationship(self): 593 file_spdx_id = self.current_element["spdx_id"] 594 if "packages" not in self.elements_built: 595 return 596 # We assume that all files that are not contained in a package precede any package information. Any file 597 # information that follows any package information is assigned to the last parsed package by creating a 598 # corresponding contains relationship. 599 # (see https://spdx.github.io/spdx-spec/v2.3/composition-of-an-SPDX-document/#5.2.2) 600 if not self.elements_built["packages"]: 601 self.logger.append( 602 f"Error while building contains relationship for file {file_spdx_id}, " 603 f"preceding package was not parsed successfully." 604 ) 605 return 606 package_spdx_id = self.elements_built["packages"][-1].spdx_id 607 relationship = Relationship(package_spdx_id, RelationshipType.CONTAINS, file_spdx_id) 608 if relationship not in self.elements_built.setdefault("relationships", []): 609 self.elements_built["relationships"].append(relationship)