spdx_tools.spdx.parser.tagvalue.parser
1# Copyright (c) 2014 Ahmed H. Ismail 2# Copyright (c) 2023 spdx contributors 3# SPDX-License-Identifier: Apache-2.0 4# Licensed under the Apache License, Version 2.0 (the "License"); 5# you may not use this file except in compliance with the License. 6# You may obtain a copy of the License at 7# http://www.apache.org/licenses/LICENSE-2.0 8# Unless required by applicable law or agreed to in writing, software 9# distributed under the License is distributed on an "AS IS" BASIS, 10# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11# See the License for the specific language governing permissions and 12# limitations under the License. 13 14import re 15 16from beartype.typing import Any, Dict, List 17from license_expression import ExpressionError, get_spdx_licensing 18from ply import yacc 19from ply.yacc import LRParser 20 21from spdx_tools.spdx.datetime_conversions import datetime_from_str 22from spdx_tools.spdx.model import ( 23 Annotation, 24 AnnotationType, 25 CreationInfo, 26 Document, 27 ExternalDocumentRef, 28 ExternalPackageRef, 29 ExternalPackageRefCategory, 30 ExtractedLicensingInfo, 31 File, 32 FileType, 33 Package, 34 PackagePurpose, 35 PackageVerificationCode, 36 Relationship, 37 RelationshipType, 38 Snippet, 39 SpdxNoAssertion, 40 SpdxNone, 41 Version, 42) 43from spdx_tools.spdx.parser.actor_parser import ActorParser 44from spdx_tools.spdx.parser.error import SPDXParsingError 45from spdx_tools.spdx.parser.logger import Logger 46from spdx_tools.spdx.parser.parsing_functions import ( 47 construct_or_raise_parsing_error, 48 raise_parsing_error_if_logger_has_messages, 49) 50from spdx_tools.spdx.parser.tagvalue.helper_methods import ( 51 TAG_DATA_MODEL_FIELD, 52 grammar_rule, 53 parse_checksum, 54 set_value, 55 str_from_text, 56) 57from spdx_tools.spdx.parser.tagvalue.lexer import SPDXLexer 58 59CLASS_MAPPING = dict( 60 File="files", 61 Annotation="annotations", 62 Relationship="relationships", 63 Snippet="snippets", 64 Package="packages", 65 ExtractedLicensingInfo="extracted_licensing_info", 66) 67ELEMENT_EXPECTED_START_TAG = dict( 68 File="FileName", 69 Annotation="Annotator", 70 Relationship="Relationship", 71 Snippet="SnippetSPDXID", 72 Package="PackageName", 73 ExtractedLicensingInfo="LicenseID", 74) 75 76 77class Parser: 78 tokens: List[str] 79 logger: Logger 80 current_element: Dict[str, Any] 81 creation_info: Dict[str, Any] 82 elements_built: Dict[str, Any] 83 lex: SPDXLexer 84 yacc: LRParser 85 86 def __init__(self, **kwargs): 87 self.tokens = SPDXLexer.tokens 88 self.logger = Logger() 89 self.current_element = {"logger": Logger()} 90 self.creation_info = {"logger": Logger()} 91 self.elements_built = dict() 92 self.lex = SPDXLexer() 93 self.lex.build(reflags=re.UNICODE) 94 self.yacc = yacc.yacc(module=self, **kwargs) 95 96 @grammar_rule("start : start attrib ") 97 def p_start_start_attrib(self, p): 98 pass 99 100 @grammar_rule("start : attrib ") 101 def p_start_attrib(self, p): 102 pass 103 104 @grammar_rule( 105 "attrib : spdx_version\n| spdx_id\n| data_license\n| doc_name\n| document_comment\n| document_namespace\n| " 106 "creator\n| created\n| creator_comment\n| license_list_version\n| ext_doc_ref\n" 107 # attributes for file 108 "| file_name\n| file_type\n| file_checksum\n| file_license_concluded\n| file_license_info\n" 109 "| file_copyright_text\n| file_license_comment\n| file_attribution_text\n| file_notice\n| file_comment\n" 110 "| file_contributor\n" 111 # attributes for annotation 112 "| annotator\n| annotation_date\n| annotation_comment\n| annotation_type\n| annotation_spdx_id\n" 113 # attributes for relationship 114 "| relationship\n" 115 # attributes for snippet 116 "| snippet_spdx_id\n| snippet_name\n| snippet_comment\n| snippet_attribution_text\n| snippet_copyright_text\n" 117 "| snippet_license_comment\n| file_spdx_id\n| snippet_license_concluded\n| snippet_license_info\n" 118 "| snippet_byte_range\n| snippet_line_range\n" 119 # attributes for package 120 "| package_name\n| package_version\n| download_location\n| files_analyzed\n| homepage\n" 121 "| summary\n| source_info\n| pkg_file_name\n| supplier\n| originator\n| pkg_checksum\n" 122 "| verification_code\n| description\n| pkg_comment\n| pkg_attribution_text\n| pkg_license_declared\n" 123 "| pkg_license_concluded\n| pkg_license_info\n| pkg_license_comment\n| pkg_copyright_text\n" 124 "| pkg_external_ref\n| primary_package_purpose\n| built_date\n| release_date\n| valid_until_date\n" 125 # attributes for extracted licensing info 126 "| license_id\n| extracted_text\n| license_name\n| license_cross_ref\n| lic_comment\n" 127 "| unknown_tag " 128 ) 129 def p_attrib(self, p): 130 pass 131 132 # general parsing methods 133 @grammar_rule( 134 "license_id : LICENSE_ID error\n license_cross_ref : LICENSE_CROSS_REF error\n " 135 "lic_comment : LICENSE_COMMENT error\n license_name : LICENSE_NAME error\n " 136 "extracted_text : LICENSE_TEXT error\n " 137 "file_name : FILE_NAME error\n file_contributor : FILE_CONTRIBUTOR error\n " 138 "file_notice : FILE_NOTICE error\n file_copyright_text : FILE_COPYRIGHT_TEXT error\n " 139 "file_license_comment : FILE_LICENSE_COMMENT error\n " 140 "file_license_info : FILE_LICENSE_INFO error\n file_comment : FILE_COMMENT error\n " 141 "file_checksum : FILE_CHECKSUM error\n file_license_concluded : FILE_LICENSE_CONCLUDED error\n " 142 "file_type : FILE_TYPE error\n file_attribution_text : FILE_ATTRIBUTION_TEXT error\n " 143 "package_name : PKG_NAME error\n pkg_attribution_text : PKG_ATTRIBUTION_TEXT error\n " 144 "description : PKG_DESCRIPTION error\n pkg_comment : PKG_COMMENT error\n " 145 "summary : PKG_SUMMARY error\n pkg_copyright_text : PKG_COPYRIGHT_TEXT error\n " 146 "pkg_external_ref : PKG_EXTERNAL_REF error\n pkg_license_comment : PKG_LICENSE_COMMENT error\n " 147 "pkg_license_declared : PKG_LICENSE_DECLARED error\n pkg_license_info : PKG_LICENSE_INFO error \n " 148 "pkg_license_concluded : PKG_LICENSE_CONCLUDED error\n source_info : PKG_SOURCE_INFO error\n " 149 "homepage : PKG_HOMEPAGE error\n pkg_checksum : PKG_CHECKSUM error\n " 150 "verification_code : PKG_VERIFICATION_CODE error\n originator : PKG_ORIGINATOR error\n " 151 "download_location : PKG_DOWNLOAD_LOCATION error\n files_analyzed : PKG_FILES_ANALYZED error\n " 152 "supplier : PKG_SUPPLIER error\n pkg_file_name : PKG_FILE_NAME error\n " 153 "package_version : PKG_VERSION error\n primary_package_purpose : PRIMARY_PACKAGE_PURPOSE error\n " 154 "built_date : BUILT_DATE error\n release_date : RELEASE_DATE error\n " 155 "valid_until_date : VALID_UNTIL_DATE error\n snippet_spdx_id : SNIPPET_SPDX_ID error\n " 156 "snippet_name : SNIPPET_NAME error\n snippet_comment : SNIPPET_COMMENT error\n " 157 "snippet_attribution_text : SNIPPET_ATTRIBUTION_TEXT error\n " 158 "snippet_copyright_text : SNIPPET_COPYRIGHT_TEXT error\n " 159 "snippet_license_comment : SNIPPET_LICENSE_COMMENT error\n file_spdx_id : SNIPPET_FILE_SPDXID error\n " 160 "snippet_license_concluded : SNIPPET_LICENSE_CONCLUDED error\n " 161 "snippet_license_info : SNIPPET_LICENSE_INFO error\n " 162 "snippet_byte_range : SNIPPET_BYTE_RANGE error\n snippet_line_range : SNIPPET_LINE_RANGE error\n " 163 "annotator : ANNOTATOR error\n annotation_date : ANNOTATION_DATE error\n " 164 "annotation_comment : ANNOTATION_COMMENT error\n annotation_type : ANNOTATION_TYPE error\n " 165 "annotation_spdx_id : ANNOTATION_SPDX_ID error\n relationship : RELATIONSHIP error" 166 ) 167 def p_current_element_error(self, p): 168 if p[1] in ELEMENT_EXPECTED_START_TAG.values(): 169 self.initialize_new_current_element(TAG_DATA_MODEL_FIELD[p[1]][0]) 170 self.current_element["logger"].append( 171 f"Error while parsing {p[1]}: Token did not match specified grammar rule. Line: {p.lineno(1)}" 172 ) 173 174 @grammar_rule( 175 "license_name : LICENSE_NAME line_or_no_assertion\n extracted_text : LICENSE_TEXT text_or_line\n " 176 "lic_comment : LICENSE_COMMENT text_or_line\n license_id : LICENSE_ID LINE\n " 177 "file_name : FILE_NAME LINE \n file_notice : FILE_NOTICE text_or_line\n " 178 "file_copyright_text : FILE_COPYRIGHT_TEXT line_or_no_assertion_or_none\n " 179 "file_license_comment : FILE_LICENSE_COMMENT text_or_line\n " 180 "file_comment : FILE_COMMENT text_or_line\n " 181 "file_license_concluded : FILE_LICENSE_CONCLUDED license_or_no_assertion_or_none\n " 182 "package_name : PKG_NAME LINE\n description : PKG_DESCRIPTION text_or_line\n " 183 "summary : PKG_SUMMARY text_or_line\n source_info : PKG_SOURCE_INFO text_or_line\n " 184 "homepage : PKG_HOMEPAGE line_or_no_assertion_or_none\n " 185 "download_location : PKG_DOWNLOAD_LOCATION line_or_no_assertion_or_none\n " 186 "originator : PKG_ORIGINATOR actor_or_no_assertion\n supplier : PKG_SUPPLIER actor_or_no_assertion\n " 187 "pkg_comment : PKG_COMMENT text_or_line\n " 188 "pkg_copyright_text : PKG_COPYRIGHT_TEXT line_or_no_assertion_or_none\n " 189 "pkg_license_declared : PKG_LICENSE_DECLARED license_or_no_assertion_or_none\n " 190 "pkg_file_name : PKG_FILE_NAME LINE\n " 191 "pkg_license_concluded : PKG_LICENSE_CONCLUDED license_or_no_assertion_or_none\n " 192 "package_version : PKG_VERSION LINE\n pkg_license_comment : PKG_LICENSE_COMMENT text_or_line\n " 193 "snippet_spdx_id : SNIPPET_SPDX_ID LINE\n snippet_name : SNIPPET_NAME LINE\n " 194 "snippet_comment : SNIPPET_COMMENT text_or_line\n " 195 "snippet_copyright_text : SNIPPET_COPYRIGHT_TEXT line_or_no_assertion_or_none\n " 196 "snippet_license_comment : SNIPPET_LICENSE_COMMENT text_or_line\n " 197 "file_spdx_id : SNIPPET_FILE_SPDXID LINE\n " 198 "snippet_license_concluded : SNIPPET_LICENSE_CONCLUDED license_or_no_assertion_or_none\n " 199 "annotation_spdx_id : ANNOTATION_SPDX_ID LINE\n " 200 "annotation_comment : ANNOTATION_COMMENT text_or_line" 201 ) 202 def p_generic_value(self, p): 203 if p[1] in ELEMENT_EXPECTED_START_TAG.values(): 204 self.initialize_new_current_element(TAG_DATA_MODEL_FIELD[p[1]][0]) 205 if self.check_that_current_element_matches_class_for_value(TAG_DATA_MODEL_FIELD[p[1]][0], p.lineno(1)): 206 set_value(p, self.current_element) 207 208 @grammar_rule( 209 "unknown_tag : UNKNOWN_TAG text_or_line\n | UNKNOWN_TAG ISO8601_DATE\n | UNKNOWN_TAG PERSON_VALUE \n" 210 "| UNKNOWN_TAG" 211 ) 212 def p_unknown_tag(self, p): 213 self.logger.append(f"Unknown tag provided in line {p.lineno(1)}") 214 215 @grammar_rule("text_or_line : TEXT") 216 def p_text(self, p): 217 p[0] = str_from_text(p[1]) 218 219 @grammar_rule("text_or_line : LINE\n line_or_no_assertion : LINE\nline_or_no_assertion_or_none : text_or_line") 220 def p_line(self, p): 221 p[0] = p[1] 222 223 @grammar_rule( 224 "license_or_no_assertion_or_none : NO_ASSERTION\n actor_or_no_assertion : NO_ASSERTION\n" 225 "line_or_no_assertion : NO_ASSERTION\n line_or_no_assertion_or_none : NO_ASSERTION" 226 ) 227 def p_no_assertion(self, p): 228 p[0] = SpdxNoAssertion() 229 230 @grammar_rule("license_or_no_assertion_or_none : NONE\n line_or_no_assertion_or_none : NONE") 231 def p_none(self, p): 232 p[0] = SpdxNone() 233 234 @grammar_rule("license_or_no_assertion_or_none : LINE") 235 def p_license(self, p): 236 try: 237 p[0] = get_spdx_licensing().parse(p[1]) 238 except ExpressionError as err: 239 error_message = f"Error while parsing license expression: {p[1]}" 240 if err.args: 241 error_message += f": {err.args[0]}" 242 self.current_element["logger"].append(error_message) 243 244 @grammar_rule("actor_or_no_assertion : PERSON_VALUE\n | ORGANIZATION_VALUE") 245 def p_actor_values(self, p): 246 p[0] = ActorParser.parse_actor(p[1]) 247 248 @grammar_rule("spdx_id : SPDX_ID LINE") 249 def p_spdx_id(self, p): 250 # As all SPDX Ids share the same tag, there is no knowing which spdx_id belongs to the document. 251 # We assume that to be the first spdx_id we encounter. As the specification does not explicitly require this, 252 # our approach might lead to unwanted behavior when the document's SPDX Id is defined later in the document. 253 if "spdx_id" in self.creation_info: 254 self.current_element["spdx_id"] = p[2] 255 else: 256 self.creation_info["spdx_id"] = p[2] 257 258 # parsing methods for creation info / document level 259 260 @grammar_rule( 261 "license_list_version : LICENSE_LIST_VERSION error\n document_comment : DOC_COMMENT error\n " 262 "document_namespace : DOC_NAMESPACE error\n data_license : DOC_LICENSE error\n " 263 "doc_name : DOC_NAME error\n ext_doc_ref : EXT_DOC_REF error\n spdx_version : DOC_VERSION error\n " 264 "creator_comment : CREATOR_COMMENT error\n creator : CREATOR error\n created : CREATED error" 265 ) 266 def p_creation_info_value_error(self, p): 267 self.creation_info["logger"].append( 268 f"Error while parsing {p[1]}: Token did not match specified grammar rule. Line: {p.lineno(1)}" 269 ) 270 271 @grammar_rule( 272 "document_comment : DOC_COMMENT text_or_line\n document_namespace : DOC_NAMESPACE LINE\n " 273 "data_license : DOC_LICENSE LINE\n spdx_version : DOC_VERSION LINE\n " 274 "creator_comment : CREATOR_COMMENT text_or_line\n doc_name : DOC_NAME LINE" 275 ) 276 def p_generic_value_creation_info(self, p): 277 set_value(p, self.creation_info) 278 279 @grammar_rule("license_list_version : LICENSE_LIST_VERSION LINE") 280 def p_license_list_version(self, p): 281 set_value(p, self.creation_info, method_to_apply=Version.from_string) 282 283 @grammar_rule("ext_doc_ref : EXT_DOC_REF LINE") 284 def p_external_document_ref(self, p): 285 external_doc_ref_regex = re.compile(r"(.*)(\s*SHA1:\s*[a-f0-9]{40})") 286 external_doc_ref_match = external_doc_ref_regex.match(p[2]) 287 if not external_doc_ref_match: 288 self.creation_info["logger"].append( 289 f"Error while parsing ExternalDocumentRef: Couldn't match Checksum. Line: {p.lineno(1)}" 290 ) 291 return 292 try: 293 document_ref_id, document_uri = external_doc_ref_match.group(1).strip().split(" ") 294 except ValueError: 295 self.creation_info["logger"].append( 296 f"Error while parsing ExternalDocumentRef: Couldn't split the first part of the value into " 297 f"document_ref_id and document_uri. Line: {p.lineno(1)}" 298 ) 299 return 300 checksum = parse_checksum(external_doc_ref_match.group(2).strip()) 301 external_document_ref = ExternalDocumentRef(document_ref_id, document_uri, checksum) 302 self.creation_info.setdefault("external_document_refs", []).append(external_document_ref) 303 304 @grammar_rule("creator : CREATOR PERSON_VALUE\n| CREATOR TOOL_VALUE\n| CREATOR ORGANIZATION_VALUE") 305 def p_creator(self, p): 306 self.creation_info.setdefault("creators", []).append(ActorParser.parse_actor(p[2])) 307 308 @grammar_rule("created : CREATED ISO8601_DATE") 309 def p_created(self, p): 310 set_value(p, self.creation_info, method_to_apply=datetime_from_str) 311 312 # parsing methods for extracted licensing info 313 314 @grammar_rule("license_cross_ref : LICENSE_CROSS_REF LINE") 315 def p_extracted_cross_reference(self, p): 316 if self.check_that_current_element_matches_class_for_value(ExtractedLicensingInfo, p.lineno(1)): 317 self.current_element.setdefault("cross_references", []).append(p[2]) 318 319 # parsing methods for file 320 321 @grammar_rule("file_contributor : FILE_CONTRIBUTOR LINE") 322 def p_file_contributor(self, p): 323 if self.check_that_current_element_matches_class_for_value(File, p.lineno(1)): 324 self.current_element.setdefault("contributors", []).append(p[2]) 325 326 @grammar_rule("file_attribution_text : FILE_ATTRIBUTION_TEXT text_or_line") 327 def p_file_attribution_text(self, p): 328 if self.check_that_current_element_matches_class_for_value(File, p.lineno(1)): 329 self.current_element.setdefault("attribution_texts", []).append(p[2]) 330 331 @grammar_rule("file_license_info : FILE_LICENSE_INFO license_or_no_assertion_or_none") 332 def p_file_license_info(self, p): 333 if self.check_that_current_element_matches_class_for_value(File, p.lineno(1)): 334 self.current_element.setdefault("license_info_in_file", []).append(p[2]) 335 336 @grammar_rule("file_type : FILE_TYPE LINE") 337 def p_file_type(self, p): 338 if not self.check_that_current_element_matches_class_for_value(File, p.lineno(1)): 339 return 340 try: 341 file_type = FileType[p[2].strip()] 342 except KeyError: 343 self.current_element["logger"].append(f"Invalid FileType: {p[2]}. Line {p.lineno(1)}") 344 return 345 self.current_element.setdefault("file_types", []).append(file_type) 346 347 @grammar_rule("file_checksum : FILE_CHECKSUM CHECKSUM") 348 def p_file_checksum(self, p): 349 if not self.check_that_current_element_matches_class_for_value(File, p.lineno(1)): 350 return 351 checksum = parse_checksum(p[2]) 352 self.current_element.setdefault("checksums", []).append(checksum) 353 354 # parsing methods for package 355 356 @grammar_rule("pkg_attribution_text : PKG_ATTRIBUTION_TEXT text_or_line") 357 def p_pkg_attribution_text(self, p): 358 self.check_that_current_element_matches_class_for_value(Package, p.lineno(1)) 359 self.current_element.setdefault("attribution_texts", []).append(p[2]) 360 361 @grammar_rule( 362 "pkg_external_ref : PKG_EXTERNAL_REF LINE PKG_EXTERNAL_REF_COMMENT text_or_line\n | PKG_EXTERNAL_REF LINE" 363 ) 364 def p_pkg_external_refs(self, p): 365 if not self.check_that_current_element_matches_class_for_value(Package, p.lineno(1)): 366 return 367 try: 368 category, reference_type, locator = p[2].split(" ") 369 except ValueError: 370 self.current_element["logger"].append( 371 f"Couldn't split PackageExternalRef in category, reference_type and locator. Line: {p.lineno(1)}" 372 ) 373 return 374 comment = None 375 if len(p) == 5: 376 comment = p[4] 377 try: 378 category = ExternalPackageRefCategory[category.replace("-", "_")] 379 except KeyError: 380 self.current_element["logger"].append( 381 f"Invalid ExternalPackageRefCategory: {category}. Line: {p.lineno(1)}" 382 ) 383 return 384 try: 385 external_package_ref = construct_or_raise_parsing_error( 386 ExternalPackageRef, 387 {"category": category, "reference_type": reference_type, "locator": locator, "comment": comment}, 388 ) 389 except SPDXParsingError as err: 390 self.current_element["logger"].append(err.get_messages()) 391 return 392 self.current_element.setdefault("external_references", []).append(external_package_ref) 393 394 @grammar_rule("pkg_license_info : PKG_LICENSE_INFO license_or_no_assertion_or_none") 395 def p_pkg_license_info_from_file(self, p): 396 if self.check_that_current_element_matches_class_for_value(Package, p.lineno(1)): 397 self.current_element.setdefault("license_info_from_files", []).append(p[2]) 398 399 @grammar_rule("pkg_checksum : PKG_CHECKSUM CHECKSUM") 400 def p_pkg_checksum(self, p): 401 if not self.check_that_current_element_matches_class_for_value(Package, p.lineno(1)): 402 return 403 checksum = parse_checksum(p[2]) 404 self.current_element.setdefault("checksums", []).append(checksum) 405 406 @grammar_rule("verification_code : PKG_VERIFICATION_CODE LINE") 407 def p_pkg_verification_code(self, p): 408 if not self.check_that_current_element_matches_class_for_value(Package, p.lineno(1)): 409 return 410 411 if "verification_code" in self.current_element: 412 self.current_element["logger"].append(f"Multiple values for {p[1]} found. Line: {p.lineno(1)}") 413 return 414 verif_code_regex = re.compile(r"([0-9a-f]{40})\s*(\(excludes:\s*(.+)\))?", re.UNICODE) 415 verif_code_code_grp = 1 416 verif_code_exc_files_grp = 3 417 match = verif_code_regex.match(p[2]) 418 if not match: 419 self.current_element["logger"].append( 420 f"Error while parsing {p[1]}: Value did not match expected format. Line: {p.lineno(1)}" 421 ) 422 return 423 value = match.group(verif_code_code_grp) 424 excluded_files = None 425 if match.group(verif_code_exc_files_grp): 426 excluded_files = match.group(verif_code_exc_files_grp).split(",") 427 self.current_element["verification_code"] = PackageVerificationCode(value, excluded_files) 428 429 @grammar_rule("files_analyzed : PKG_FILES_ANALYZED LINE") 430 def p_pkg_files_analyzed(self, p): 431 if not self.check_that_current_element_matches_class_for_value(Package, p.lineno(1)): 432 return 433 if "files_analyzed" in self.current_element: 434 self.current_element["logger"].append(f"Multiple values for {p[1]} found. Line: {p.lineno(1)}") 435 return 436 if p[2] == "true": 437 self.current_element["files_analyzed"] = True 438 elif p[2] == "false": 439 self.current_element["files_analyzed"] = False 440 else: 441 self.current_element["logger"].append( 442 f'The value of FilesAnalyzed must be either "true" or "false", but is: {p[2]}' 443 ) 444 445 @grammar_rule("primary_package_purpose : PRIMARY_PACKAGE_PURPOSE LINE") 446 def p_primary_package_purpose(self, p): 447 if self.check_that_current_element_matches_class_for_value(Package, p.lineno(1)): 448 set_value(p, self.current_element, method_to_apply=lambda x: PackagePurpose[x.replace("-", "_")]) 449 450 @grammar_rule( 451 "built_date : BUILT_DATE ISO8601_DATE\n release_date : RELEASE_DATE ISO8601_DATE\n " 452 "valid_until_date : VALID_UNTIL_DATE ISO8601_DATE" 453 ) 454 def p_package_dates(self, p): 455 if self.check_that_current_element_matches_class_for_value(Package, p.lineno(1)): 456 set_value(p, self.current_element, method_to_apply=datetime_from_str) 457 458 # parsing methods for snippet 459 460 @grammar_rule("snippet_attribution_text : SNIPPET_ATTRIBUTION_TEXT text_or_line") 461 def p_snippet_attribution_text(self, p): 462 if self.check_that_current_element_matches_class_for_value(Snippet, p.lineno(1)): 463 self.current_element.setdefault("attribution_texts", []).append(p[2]) 464 465 @grammar_rule("snippet_license_info : SNIPPET_LICENSE_INFO license_or_no_assertion_or_none") 466 def p_snippet_license_info(self, p): 467 if self.check_that_current_element_matches_class_for_value(Snippet, p.lineno(1)): 468 self.current_element.setdefault("license_info_in_snippet", []).append(p[2]) 469 470 @grammar_rule("snippet_byte_range : SNIPPET_BYTE_RANGE LINE\n snippet_line_range : SNIPPET_LINE_RANGE LINE") 471 def p_snippet_range(self, p): 472 if not self.check_that_current_element_matches_class_for_value(Snippet, p.lineno(1)): 473 return 474 475 argument_name = TAG_DATA_MODEL_FIELD[p[1]][1] 476 if argument_name in self.current_element: 477 self.current_element["logger"].append(f"Multiple values for {p[1]} found. Line: {p.lineno(1)}") 478 return 479 range_re = re.compile(r"^(\d+):(\d+)$", re.UNICODE) 480 if not range_re.match(p[2].strip()): 481 self.current_element["logger"].append( 482 f"Value for {p[1]} doesn't match valid range pattern. " f"Line: {p.lineno(1)}" 483 ) 484 return 485 startpoint = int(p[2].split(":")[0]) 486 endpoint = int(p[2].split(":")[-1]) 487 self.current_element[argument_name] = startpoint, endpoint 488 489 # parsing methods for annotation 490 491 @grammar_rule("annotator : ANNOTATOR PERSON_VALUE\n| ANNOTATOR TOOL_VALUE\n| ANNOTATOR ORGANIZATION_VALUE") 492 def p_annotator(self, p): 493 self.initialize_new_current_element(Annotation) 494 set_value(p, self.current_element, method_to_apply=ActorParser.parse_actor) 495 496 @grammar_rule("annotation_date : ANNOTATION_DATE ISO8601_DATE") 497 def p_annotation_date(self, p): 498 if self.check_that_current_element_matches_class_for_value(Annotation, p.lineno(1)): 499 set_value(p, self.current_element, method_to_apply=datetime_from_str) 500 501 @grammar_rule("annotation_type : ANNOTATION_TYPE LINE") 502 def p_annotation_type(self, p): 503 if self.check_that_current_element_matches_class_for_value(Annotation, p.lineno(1)): 504 set_value(p, self.current_element, method_to_apply=lambda x: AnnotationType[x]) 505 506 # parsing methods for relationship 507 508 @grammar_rule("relationship : RELATIONSHIP LINE RELATIONSHIP_COMMENT text_or_line\n " "| RELATIONSHIP LINE") 509 def p_relationship(self, p): 510 self.initialize_new_current_element(Relationship) 511 try: 512 spdx_element_id, relationship_type, related_spdx_element_id = p[2].split(" ") 513 except ValueError: 514 self.current_element["logger"].append( 515 f"Relationship couldn't be split in spdx_element_id, relationship_type and " 516 f"related_spdx_element. Line: {p.lineno(1)}" 517 ) 518 return 519 try: 520 self.current_element["relationship_type"] = RelationshipType[relationship_type] 521 except KeyError: 522 self.current_element["logger"].append(f"Invalid RelationshipType {relationship_type}. Line: {p.lineno(1)}") 523 if related_spdx_element_id == "NONE": 524 related_spdx_element_id = SpdxNone() 525 if related_spdx_element_id == "NOASSERTION": 526 related_spdx_element_id = SpdxNoAssertion() 527 self.current_element["related_spdx_element_id"] = related_spdx_element_id 528 self.current_element["spdx_element_id"] = spdx_element_id 529 if len(p) == 5: 530 self.current_element["comment"] = p[4] 531 532 def p_error(self, p): 533 pass 534 535 def parse(self, text): 536 # entry point for the tag-value parser 537 self.yacc.parse(text, lexer=self.lex) 538 # this constructs the last remaining element; all other elements are constructed at the start of 539 # their subsequent element 540 self.construct_current_element() 541 542 # To be able to parse creation info values if they appear in between other elements, e.g. packages, we use 543 # two different dictionaries to collect the creation info and all other elements. Therefore, we have a separate 544 # logger for the creation info whose messages we need to add to the main logger to than raise all collected 545 # messages at once. 546 creation_info_logger = self.creation_info.pop("logger") 547 if creation_info_logger.has_messages(): 548 self.logger.extend([f"Error while parsing CreationInfo: {creation_info_logger.get_messages()}"]) 549 550 raise_parsing_error_if_logger_has_messages(self.logger) 551 creation_info = construct_or_raise_parsing_error(CreationInfo, self.creation_info) 552 self.elements_built["creation_info"] = creation_info 553 document = construct_or_raise_parsing_error(Document, self.elements_built) 554 return document 555 556 def initialize_new_current_element(self, clazz: Any): 557 self.construct_current_element() 558 self.current_element["class"] = clazz 559 560 def check_that_current_element_matches_class_for_value(self, expected_class, line_number) -> bool: 561 if "class" not in self.current_element or expected_class != self.current_element["class"]: 562 self.logger.append( 563 f"Element {expected_class.__name__} is not the current element in scope, probably the expected tag to " 564 f"start the element ({ELEMENT_EXPECTED_START_TAG[expected_class.__name__]}) is missing. " 565 f"Line: {line_number}" 566 ) 567 return False 568 return True 569 570 def construct_current_element(self): 571 if "class" not in self.current_element: 572 # This happens when the first element is initialized via initialize_new_current_element() or if the first 573 # element is missing its expected starting tag. In both cases we are unable to construct an element. 574 return 575 576 clazz = self.current_element.pop("class") 577 try: 578 raise_parsing_error_if_logger_has_messages(self.current_element.pop("logger"), clazz.__name__) 579 self.elements_built.setdefault(CLASS_MAPPING[clazz.__name__], []).append( 580 construct_or_raise_parsing_error(clazz, self.current_element) 581 ) 582 if clazz == File: 583 self.check_for_preceding_package_and_build_contains_relationship() 584 except SPDXParsingError as err: 585 self.logger.extend(err.get_messages()) 586 self.current_element = {"logger": Logger()} 587 588 def check_for_preceding_package_and_build_contains_relationship(self): 589 file_spdx_id = self.current_element["spdx_id"] 590 if "packages" not in self.elements_built: 591 return 592 # We assume that all files that are not contained in a package precede any package information. Any file 593 # information that follows any package information is assigned to the last parsed package by creating a 594 # corresponding contains relationship. 595 # (see https://spdx.github.io/spdx-spec/v2.3/composition-of-an-SPDX-document/#5.2.2) 596 if not self.elements_built["packages"]: 597 self.logger.append( 598 f"Error while building contains relationship for file {file_spdx_id}, " 599 f"preceding package was not parsed successfully." 600 ) 601 return 602 package_spdx_id = self.elements_built["packages"][-1].spdx_id 603 relationship = Relationship(package_spdx_id, RelationshipType.CONTAINS, file_spdx_id) 604 if relationship not in self.elements_built.setdefault("relationships", []): 605 self.elements_built["relationships"].append(relationship)
78class Parser: 79 tokens: List[str] 80 logger: Logger 81 current_element: Dict[str, Any] 82 creation_info: Dict[str, Any] 83 elements_built: Dict[str, Any] 84 lex: SPDXLexer 85 yacc: LRParser 86 87 def __init__(self, **kwargs): 88 self.tokens = SPDXLexer.tokens 89 self.logger = Logger() 90 self.current_element = {"logger": Logger()} 91 self.creation_info = {"logger": Logger()} 92 self.elements_built = dict() 93 self.lex = SPDXLexer() 94 self.lex.build(reflags=re.UNICODE) 95 self.yacc = yacc.yacc(module=self, **kwargs) 96 97 @grammar_rule("start : start attrib ") 98 def p_start_start_attrib(self, p): 99 pass 100 101 @grammar_rule("start : attrib ") 102 def p_start_attrib(self, p): 103 pass 104 105 @grammar_rule( 106 "attrib : spdx_version\n| spdx_id\n| data_license\n| doc_name\n| document_comment\n| document_namespace\n| " 107 "creator\n| created\n| creator_comment\n| license_list_version\n| ext_doc_ref\n" 108 # attributes for file 109 "| file_name\n| file_type\n| file_checksum\n| file_license_concluded\n| file_license_info\n" 110 "| file_copyright_text\n| file_license_comment\n| file_attribution_text\n| file_notice\n| file_comment\n" 111 "| file_contributor\n" 112 # attributes for annotation 113 "| annotator\n| annotation_date\n| annotation_comment\n| annotation_type\n| annotation_spdx_id\n" 114 # attributes for relationship 115 "| relationship\n" 116 # attributes for snippet 117 "| snippet_spdx_id\n| snippet_name\n| snippet_comment\n| snippet_attribution_text\n| snippet_copyright_text\n" 118 "| snippet_license_comment\n| file_spdx_id\n| snippet_license_concluded\n| snippet_license_info\n" 119 "| snippet_byte_range\n| snippet_line_range\n" 120 # attributes for package 121 "| package_name\n| package_version\n| download_location\n| files_analyzed\n| homepage\n" 122 "| summary\n| source_info\n| pkg_file_name\n| supplier\n| originator\n| pkg_checksum\n" 123 "| verification_code\n| description\n| pkg_comment\n| pkg_attribution_text\n| pkg_license_declared\n" 124 "| pkg_license_concluded\n| pkg_license_info\n| pkg_license_comment\n| pkg_copyright_text\n" 125 "| pkg_external_ref\n| primary_package_purpose\n| built_date\n| release_date\n| valid_until_date\n" 126 # attributes for extracted licensing info 127 "| license_id\n| extracted_text\n| license_name\n| license_cross_ref\n| lic_comment\n" 128 "| unknown_tag " 129 ) 130 def p_attrib(self, p): 131 pass 132 133 # general parsing methods 134 @grammar_rule( 135 "license_id : LICENSE_ID error\n license_cross_ref : LICENSE_CROSS_REF error\n " 136 "lic_comment : LICENSE_COMMENT error\n license_name : LICENSE_NAME error\n " 137 "extracted_text : LICENSE_TEXT error\n " 138 "file_name : FILE_NAME error\n file_contributor : FILE_CONTRIBUTOR error\n " 139 "file_notice : FILE_NOTICE error\n file_copyright_text : FILE_COPYRIGHT_TEXT error\n " 140 "file_license_comment : FILE_LICENSE_COMMENT error\n " 141 "file_license_info : FILE_LICENSE_INFO error\n file_comment : FILE_COMMENT error\n " 142 "file_checksum : FILE_CHECKSUM error\n file_license_concluded : FILE_LICENSE_CONCLUDED error\n " 143 "file_type : FILE_TYPE error\n file_attribution_text : FILE_ATTRIBUTION_TEXT error\n " 144 "package_name : PKG_NAME error\n pkg_attribution_text : PKG_ATTRIBUTION_TEXT error\n " 145 "description : PKG_DESCRIPTION error\n pkg_comment : PKG_COMMENT error\n " 146 "summary : PKG_SUMMARY error\n pkg_copyright_text : PKG_COPYRIGHT_TEXT error\n " 147 "pkg_external_ref : PKG_EXTERNAL_REF error\n pkg_license_comment : PKG_LICENSE_COMMENT error\n " 148 "pkg_license_declared : PKG_LICENSE_DECLARED error\n pkg_license_info : PKG_LICENSE_INFO error \n " 149 "pkg_license_concluded : PKG_LICENSE_CONCLUDED error\n source_info : PKG_SOURCE_INFO error\n " 150 "homepage : PKG_HOMEPAGE error\n pkg_checksum : PKG_CHECKSUM error\n " 151 "verification_code : PKG_VERIFICATION_CODE error\n originator : PKG_ORIGINATOR error\n " 152 "download_location : PKG_DOWNLOAD_LOCATION error\n files_analyzed : PKG_FILES_ANALYZED error\n " 153 "supplier : PKG_SUPPLIER error\n pkg_file_name : PKG_FILE_NAME error\n " 154 "package_version : PKG_VERSION error\n primary_package_purpose : PRIMARY_PACKAGE_PURPOSE error\n " 155 "built_date : BUILT_DATE error\n release_date : RELEASE_DATE error\n " 156 "valid_until_date : VALID_UNTIL_DATE error\n snippet_spdx_id : SNIPPET_SPDX_ID error\n " 157 "snippet_name : SNIPPET_NAME error\n snippet_comment : SNIPPET_COMMENT error\n " 158 "snippet_attribution_text : SNIPPET_ATTRIBUTION_TEXT error\n " 159 "snippet_copyright_text : SNIPPET_COPYRIGHT_TEXT error\n " 160 "snippet_license_comment : SNIPPET_LICENSE_COMMENT error\n file_spdx_id : SNIPPET_FILE_SPDXID error\n " 161 "snippet_license_concluded : SNIPPET_LICENSE_CONCLUDED error\n " 162 "snippet_license_info : SNIPPET_LICENSE_INFO error\n " 163 "snippet_byte_range : SNIPPET_BYTE_RANGE error\n snippet_line_range : SNIPPET_LINE_RANGE error\n " 164 "annotator : ANNOTATOR error\n annotation_date : ANNOTATION_DATE error\n " 165 "annotation_comment : ANNOTATION_COMMENT error\n annotation_type : ANNOTATION_TYPE error\n " 166 "annotation_spdx_id : ANNOTATION_SPDX_ID error\n relationship : RELATIONSHIP error" 167 ) 168 def p_current_element_error(self, p): 169 if p[1] in ELEMENT_EXPECTED_START_TAG.values(): 170 self.initialize_new_current_element(TAG_DATA_MODEL_FIELD[p[1]][0]) 171 self.current_element["logger"].append( 172 f"Error while parsing {p[1]}: Token did not match specified grammar rule. Line: {p.lineno(1)}" 173 ) 174 175 @grammar_rule( 176 "license_name : LICENSE_NAME line_or_no_assertion\n extracted_text : LICENSE_TEXT text_or_line\n " 177 "lic_comment : LICENSE_COMMENT text_or_line\n license_id : LICENSE_ID LINE\n " 178 "file_name : FILE_NAME LINE \n file_notice : FILE_NOTICE text_or_line\n " 179 "file_copyright_text : FILE_COPYRIGHT_TEXT line_or_no_assertion_or_none\n " 180 "file_license_comment : FILE_LICENSE_COMMENT text_or_line\n " 181 "file_comment : FILE_COMMENT text_or_line\n " 182 "file_license_concluded : FILE_LICENSE_CONCLUDED license_or_no_assertion_or_none\n " 183 "package_name : PKG_NAME LINE\n description : PKG_DESCRIPTION text_or_line\n " 184 "summary : PKG_SUMMARY text_or_line\n source_info : PKG_SOURCE_INFO text_or_line\n " 185 "homepage : PKG_HOMEPAGE line_or_no_assertion_or_none\n " 186 "download_location : PKG_DOWNLOAD_LOCATION line_or_no_assertion_or_none\n " 187 "originator : PKG_ORIGINATOR actor_or_no_assertion\n supplier : PKG_SUPPLIER actor_or_no_assertion\n " 188 "pkg_comment : PKG_COMMENT text_or_line\n " 189 "pkg_copyright_text : PKG_COPYRIGHT_TEXT line_or_no_assertion_or_none\n " 190 "pkg_license_declared : PKG_LICENSE_DECLARED license_or_no_assertion_or_none\n " 191 "pkg_file_name : PKG_FILE_NAME LINE\n " 192 "pkg_license_concluded : PKG_LICENSE_CONCLUDED license_or_no_assertion_or_none\n " 193 "package_version : PKG_VERSION LINE\n pkg_license_comment : PKG_LICENSE_COMMENT text_or_line\n " 194 "snippet_spdx_id : SNIPPET_SPDX_ID LINE\n snippet_name : SNIPPET_NAME LINE\n " 195 "snippet_comment : SNIPPET_COMMENT text_or_line\n " 196 "snippet_copyright_text : SNIPPET_COPYRIGHT_TEXT line_or_no_assertion_or_none\n " 197 "snippet_license_comment : SNIPPET_LICENSE_COMMENT text_or_line\n " 198 "file_spdx_id : SNIPPET_FILE_SPDXID LINE\n " 199 "snippet_license_concluded : SNIPPET_LICENSE_CONCLUDED license_or_no_assertion_or_none\n " 200 "annotation_spdx_id : ANNOTATION_SPDX_ID LINE\n " 201 "annotation_comment : ANNOTATION_COMMENT text_or_line" 202 ) 203 def p_generic_value(self, p): 204 if p[1] in ELEMENT_EXPECTED_START_TAG.values(): 205 self.initialize_new_current_element(TAG_DATA_MODEL_FIELD[p[1]][0]) 206 if self.check_that_current_element_matches_class_for_value(TAG_DATA_MODEL_FIELD[p[1]][0], p.lineno(1)): 207 set_value(p, self.current_element) 208 209 @grammar_rule( 210 "unknown_tag : UNKNOWN_TAG text_or_line\n | UNKNOWN_TAG ISO8601_DATE\n | UNKNOWN_TAG PERSON_VALUE \n" 211 "| UNKNOWN_TAG" 212 ) 213 def p_unknown_tag(self, p): 214 self.logger.append(f"Unknown tag provided in line {p.lineno(1)}") 215 216 @grammar_rule("text_or_line : TEXT") 217 def p_text(self, p): 218 p[0] = str_from_text(p[1]) 219 220 @grammar_rule("text_or_line : LINE\n line_or_no_assertion : LINE\nline_or_no_assertion_or_none : text_or_line") 221 def p_line(self, p): 222 p[0] = p[1] 223 224 @grammar_rule( 225 "license_or_no_assertion_or_none : NO_ASSERTION\n actor_or_no_assertion : NO_ASSERTION\n" 226 "line_or_no_assertion : NO_ASSERTION\n line_or_no_assertion_or_none : NO_ASSERTION" 227 ) 228 def p_no_assertion(self, p): 229 p[0] = SpdxNoAssertion() 230 231 @grammar_rule("license_or_no_assertion_or_none : NONE\n line_or_no_assertion_or_none : NONE") 232 def p_none(self, p): 233 p[0] = SpdxNone() 234 235 @grammar_rule("license_or_no_assertion_or_none : LINE") 236 def p_license(self, p): 237 try: 238 p[0] = get_spdx_licensing().parse(p[1]) 239 except ExpressionError as err: 240 error_message = f"Error while parsing license expression: {p[1]}" 241 if err.args: 242 error_message += f": {err.args[0]}" 243 self.current_element["logger"].append(error_message) 244 245 @grammar_rule("actor_or_no_assertion : PERSON_VALUE\n | ORGANIZATION_VALUE") 246 def p_actor_values(self, p): 247 p[0] = ActorParser.parse_actor(p[1]) 248 249 @grammar_rule("spdx_id : SPDX_ID LINE") 250 def p_spdx_id(self, p): 251 # As all SPDX Ids share the same tag, there is no knowing which spdx_id belongs to the document. 252 # We assume that to be the first spdx_id we encounter. As the specification does not explicitly require this, 253 # our approach might lead to unwanted behavior when the document's SPDX Id is defined later in the document. 254 if "spdx_id" in self.creation_info: 255 self.current_element["spdx_id"] = p[2] 256 else: 257 self.creation_info["spdx_id"] = p[2] 258 259 # parsing methods for creation info / document level 260 261 @grammar_rule( 262 "license_list_version : LICENSE_LIST_VERSION error\n document_comment : DOC_COMMENT error\n " 263 "document_namespace : DOC_NAMESPACE error\n data_license : DOC_LICENSE error\n " 264 "doc_name : DOC_NAME error\n ext_doc_ref : EXT_DOC_REF error\n spdx_version : DOC_VERSION error\n " 265 "creator_comment : CREATOR_COMMENT error\n creator : CREATOR error\n created : CREATED error" 266 ) 267 def p_creation_info_value_error(self, p): 268 self.creation_info["logger"].append( 269 f"Error while parsing {p[1]}: Token did not match specified grammar rule. Line: {p.lineno(1)}" 270 ) 271 272 @grammar_rule( 273 "document_comment : DOC_COMMENT text_or_line\n document_namespace : DOC_NAMESPACE LINE\n " 274 "data_license : DOC_LICENSE LINE\n spdx_version : DOC_VERSION LINE\n " 275 "creator_comment : CREATOR_COMMENT text_or_line\n doc_name : DOC_NAME LINE" 276 ) 277 def p_generic_value_creation_info(self, p): 278 set_value(p, self.creation_info) 279 280 @grammar_rule("license_list_version : LICENSE_LIST_VERSION LINE") 281 def p_license_list_version(self, p): 282 set_value(p, self.creation_info, method_to_apply=Version.from_string) 283 284 @grammar_rule("ext_doc_ref : EXT_DOC_REF LINE") 285 def p_external_document_ref(self, p): 286 external_doc_ref_regex = re.compile(r"(.*)(\s*SHA1:\s*[a-f0-9]{40})") 287 external_doc_ref_match = external_doc_ref_regex.match(p[2]) 288 if not external_doc_ref_match: 289 self.creation_info["logger"].append( 290 f"Error while parsing ExternalDocumentRef: Couldn't match Checksum. Line: {p.lineno(1)}" 291 ) 292 return 293 try: 294 document_ref_id, document_uri = external_doc_ref_match.group(1).strip().split(" ") 295 except ValueError: 296 self.creation_info["logger"].append( 297 f"Error while parsing ExternalDocumentRef: Couldn't split the first part of the value into " 298 f"document_ref_id and document_uri. Line: {p.lineno(1)}" 299 ) 300 return 301 checksum = parse_checksum(external_doc_ref_match.group(2).strip()) 302 external_document_ref = ExternalDocumentRef(document_ref_id, document_uri, checksum) 303 self.creation_info.setdefault("external_document_refs", []).append(external_document_ref) 304 305 @grammar_rule("creator : CREATOR PERSON_VALUE\n| CREATOR TOOL_VALUE\n| CREATOR ORGANIZATION_VALUE") 306 def p_creator(self, p): 307 self.creation_info.setdefault("creators", []).append(ActorParser.parse_actor(p[2])) 308 309 @grammar_rule("created : CREATED ISO8601_DATE") 310 def p_created(self, p): 311 set_value(p, self.creation_info, method_to_apply=datetime_from_str) 312 313 # parsing methods for extracted licensing info 314 315 @grammar_rule("license_cross_ref : LICENSE_CROSS_REF LINE") 316 def p_extracted_cross_reference(self, p): 317 if self.check_that_current_element_matches_class_for_value(ExtractedLicensingInfo, p.lineno(1)): 318 self.current_element.setdefault("cross_references", []).append(p[2]) 319 320 # parsing methods for file 321 322 @grammar_rule("file_contributor : FILE_CONTRIBUTOR LINE") 323 def p_file_contributor(self, p): 324 if self.check_that_current_element_matches_class_for_value(File, p.lineno(1)): 325 self.current_element.setdefault("contributors", []).append(p[2]) 326 327 @grammar_rule("file_attribution_text : FILE_ATTRIBUTION_TEXT text_or_line") 328 def p_file_attribution_text(self, p): 329 if self.check_that_current_element_matches_class_for_value(File, p.lineno(1)): 330 self.current_element.setdefault("attribution_texts", []).append(p[2]) 331 332 @grammar_rule("file_license_info : FILE_LICENSE_INFO license_or_no_assertion_or_none") 333 def p_file_license_info(self, p): 334 if self.check_that_current_element_matches_class_for_value(File, p.lineno(1)): 335 self.current_element.setdefault("license_info_in_file", []).append(p[2]) 336 337 @grammar_rule("file_type : FILE_TYPE LINE") 338 def p_file_type(self, p): 339 if not self.check_that_current_element_matches_class_for_value(File, p.lineno(1)): 340 return 341 try: 342 file_type = FileType[p[2].strip()] 343 except KeyError: 344 self.current_element["logger"].append(f"Invalid FileType: {p[2]}. Line {p.lineno(1)}") 345 return 346 self.current_element.setdefault("file_types", []).append(file_type) 347 348 @grammar_rule("file_checksum : FILE_CHECKSUM CHECKSUM") 349 def p_file_checksum(self, p): 350 if not self.check_that_current_element_matches_class_for_value(File, p.lineno(1)): 351 return 352 checksum = parse_checksum(p[2]) 353 self.current_element.setdefault("checksums", []).append(checksum) 354 355 # parsing methods for package 356 357 @grammar_rule("pkg_attribution_text : PKG_ATTRIBUTION_TEXT text_or_line") 358 def p_pkg_attribution_text(self, p): 359 self.check_that_current_element_matches_class_for_value(Package, p.lineno(1)) 360 self.current_element.setdefault("attribution_texts", []).append(p[2]) 361 362 @grammar_rule( 363 "pkg_external_ref : PKG_EXTERNAL_REF LINE PKG_EXTERNAL_REF_COMMENT text_or_line\n | PKG_EXTERNAL_REF LINE" 364 ) 365 def p_pkg_external_refs(self, p): 366 if not self.check_that_current_element_matches_class_for_value(Package, p.lineno(1)): 367 return 368 try: 369 category, reference_type, locator = p[2].split(" ") 370 except ValueError: 371 self.current_element["logger"].append( 372 f"Couldn't split PackageExternalRef in category, reference_type and locator. Line: {p.lineno(1)}" 373 ) 374 return 375 comment = None 376 if len(p) == 5: 377 comment = p[4] 378 try: 379 category = ExternalPackageRefCategory[category.replace("-", "_")] 380 except KeyError: 381 self.current_element["logger"].append( 382 f"Invalid ExternalPackageRefCategory: {category}. Line: {p.lineno(1)}" 383 ) 384 return 385 try: 386 external_package_ref = construct_or_raise_parsing_error( 387 ExternalPackageRef, 388 {"category": category, "reference_type": reference_type, "locator": locator, "comment": comment}, 389 ) 390 except SPDXParsingError as err: 391 self.current_element["logger"].append(err.get_messages()) 392 return 393 self.current_element.setdefault("external_references", []).append(external_package_ref) 394 395 @grammar_rule("pkg_license_info : PKG_LICENSE_INFO license_or_no_assertion_or_none") 396 def p_pkg_license_info_from_file(self, p): 397 if self.check_that_current_element_matches_class_for_value(Package, p.lineno(1)): 398 self.current_element.setdefault("license_info_from_files", []).append(p[2]) 399 400 @grammar_rule("pkg_checksum : PKG_CHECKSUM CHECKSUM") 401 def p_pkg_checksum(self, p): 402 if not self.check_that_current_element_matches_class_for_value(Package, p.lineno(1)): 403 return 404 checksum = parse_checksum(p[2]) 405 self.current_element.setdefault("checksums", []).append(checksum) 406 407 @grammar_rule("verification_code : PKG_VERIFICATION_CODE LINE") 408 def p_pkg_verification_code(self, p): 409 if not self.check_that_current_element_matches_class_for_value(Package, p.lineno(1)): 410 return 411 412 if "verification_code" in self.current_element: 413 self.current_element["logger"].append(f"Multiple values for {p[1]} found. Line: {p.lineno(1)}") 414 return 415 verif_code_regex = re.compile(r"([0-9a-f]{40})\s*(\(excludes:\s*(.+)\))?", re.UNICODE) 416 verif_code_code_grp = 1 417 verif_code_exc_files_grp = 3 418 match = verif_code_regex.match(p[2]) 419 if not match: 420 self.current_element["logger"].append( 421 f"Error while parsing {p[1]}: Value did not match expected format. Line: {p.lineno(1)}" 422 ) 423 return 424 value = match.group(verif_code_code_grp) 425 excluded_files = None 426 if match.group(verif_code_exc_files_grp): 427 excluded_files = match.group(verif_code_exc_files_grp).split(",") 428 self.current_element["verification_code"] = PackageVerificationCode(value, excluded_files) 429 430 @grammar_rule("files_analyzed : PKG_FILES_ANALYZED LINE") 431 def p_pkg_files_analyzed(self, p): 432 if not self.check_that_current_element_matches_class_for_value(Package, p.lineno(1)): 433 return 434 if "files_analyzed" in self.current_element: 435 self.current_element["logger"].append(f"Multiple values for {p[1]} found. Line: {p.lineno(1)}") 436 return 437 if p[2] == "true": 438 self.current_element["files_analyzed"] = True 439 elif p[2] == "false": 440 self.current_element["files_analyzed"] = False 441 else: 442 self.current_element["logger"].append( 443 f'The value of FilesAnalyzed must be either "true" or "false", but is: {p[2]}' 444 ) 445 446 @grammar_rule("primary_package_purpose : PRIMARY_PACKAGE_PURPOSE LINE") 447 def p_primary_package_purpose(self, p): 448 if self.check_that_current_element_matches_class_for_value(Package, p.lineno(1)): 449 set_value(p, self.current_element, method_to_apply=lambda x: PackagePurpose[x.replace("-", "_")]) 450 451 @grammar_rule( 452 "built_date : BUILT_DATE ISO8601_DATE\n release_date : RELEASE_DATE ISO8601_DATE\n " 453 "valid_until_date : VALID_UNTIL_DATE ISO8601_DATE" 454 ) 455 def p_package_dates(self, p): 456 if self.check_that_current_element_matches_class_for_value(Package, p.lineno(1)): 457 set_value(p, self.current_element, method_to_apply=datetime_from_str) 458 459 # parsing methods for snippet 460 461 @grammar_rule("snippet_attribution_text : SNIPPET_ATTRIBUTION_TEXT text_or_line") 462 def p_snippet_attribution_text(self, p): 463 if self.check_that_current_element_matches_class_for_value(Snippet, p.lineno(1)): 464 self.current_element.setdefault("attribution_texts", []).append(p[2]) 465 466 @grammar_rule("snippet_license_info : SNIPPET_LICENSE_INFO license_or_no_assertion_or_none") 467 def p_snippet_license_info(self, p): 468 if self.check_that_current_element_matches_class_for_value(Snippet, p.lineno(1)): 469 self.current_element.setdefault("license_info_in_snippet", []).append(p[2]) 470 471 @grammar_rule("snippet_byte_range : SNIPPET_BYTE_RANGE LINE\n snippet_line_range : SNIPPET_LINE_RANGE LINE") 472 def p_snippet_range(self, p): 473 if not self.check_that_current_element_matches_class_for_value(Snippet, p.lineno(1)): 474 return 475 476 argument_name = TAG_DATA_MODEL_FIELD[p[1]][1] 477 if argument_name in self.current_element: 478 self.current_element["logger"].append(f"Multiple values for {p[1]} found. Line: {p.lineno(1)}") 479 return 480 range_re = re.compile(r"^(\d+):(\d+)$", re.UNICODE) 481 if not range_re.match(p[2].strip()): 482 self.current_element["logger"].append( 483 f"Value for {p[1]} doesn't match valid range pattern. " f"Line: {p.lineno(1)}" 484 ) 485 return 486 startpoint = int(p[2].split(":")[0]) 487 endpoint = int(p[2].split(":")[-1]) 488 self.current_element[argument_name] = startpoint, endpoint 489 490 # parsing methods for annotation 491 492 @grammar_rule("annotator : ANNOTATOR PERSON_VALUE\n| ANNOTATOR TOOL_VALUE\n| ANNOTATOR ORGANIZATION_VALUE") 493 def p_annotator(self, p): 494 self.initialize_new_current_element(Annotation) 495 set_value(p, self.current_element, method_to_apply=ActorParser.parse_actor) 496 497 @grammar_rule("annotation_date : ANNOTATION_DATE ISO8601_DATE") 498 def p_annotation_date(self, p): 499 if self.check_that_current_element_matches_class_for_value(Annotation, p.lineno(1)): 500 set_value(p, self.current_element, method_to_apply=datetime_from_str) 501 502 @grammar_rule("annotation_type : ANNOTATION_TYPE LINE") 503 def p_annotation_type(self, p): 504 if self.check_that_current_element_matches_class_for_value(Annotation, p.lineno(1)): 505 set_value(p, self.current_element, method_to_apply=lambda x: AnnotationType[x]) 506 507 # parsing methods for relationship 508 509 @grammar_rule("relationship : RELATIONSHIP LINE RELATIONSHIP_COMMENT text_or_line\n " "| RELATIONSHIP LINE") 510 def p_relationship(self, p): 511 self.initialize_new_current_element(Relationship) 512 try: 513 spdx_element_id, relationship_type, related_spdx_element_id = p[2].split(" ") 514 except ValueError: 515 self.current_element["logger"].append( 516 f"Relationship couldn't be split in spdx_element_id, relationship_type and " 517 f"related_spdx_element. Line: {p.lineno(1)}" 518 ) 519 return 520 try: 521 self.current_element["relationship_type"] = RelationshipType[relationship_type] 522 except KeyError: 523 self.current_element["logger"].append(f"Invalid RelationshipType {relationship_type}. Line: {p.lineno(1)}") 524 if related_spdx_element_id == "NONE": 525 related_spdx_element_id = SpdxNone() 526 if related_spdx_element_id == "NOASSERTION": 527 related_spdx_element_id = SpdxNoAssertion() 528 self.current_element["related_spdx_element_id"] = related_spdx_element_id 529 self.current_element["spdx_element_id"] = spdx_element_id 530 if len(p) == 5: 531 self.current_element["comment"] = p[4] 532 533 def p_error(self, p): 534 pass 535 536 def parse(self, text): 537 # entry point for the tag-value parser 538 self.yacc.parse(text, lexer=self.lex) 539 # this constructs the last remaining element; all other elements are constructed at the start of 540 # their subsequent element 541 self.construct_current_element() 542 543 # To be able to parse creation info values if they appear in between other elements, e.g. packages, we use 544 # two different dictionaries to collect the creation info and all other elements. Therefore, we have a separate 545 # logger for the creation info whose messages we need to add to the main logger to than raise all collected 546 # messages at once. 547 creation_info_logger = self.creation_info.pop("logger") 548 if creation_info_logger.has_messages(): 549 self.logger.extend([f"Error while parsing CreationInfo: {creation_info_logger.get_messages()}"]) 550 551 raise_parsing_error_if_logger_has_messages(self.logger) 552 creation_info = construct_or_raise_parsing_error(CreationInfo, self.creation_info) 553 self.elements_built["creation_info"] = creation_info 554 document = construct_or_raise_parsing_error(Document, self.elements_built) 555 return document 556 557 def initialize_new_current_element(self, clazz: Any): 558 self.construct_current_element() 559 self.current_element["class"] = clazz 560 561 def check_that_current_element_matches_class_for_value(self, expected_class, line_number) -> bool: 562 if "class" not in self.current_element or expected_class != self.current_element["class"]: 563 self.logger.append( 564 f"Element {expected_class.__name__} is not the current element in scope, probably the expected tag to " 565 f"start the element ({ELEMENT_EXPECTED_START_TAG[expected_class.__name__]}) is missing. " 566 f"Line: {line_number}" 567 ) 568 return False 569 return True 570 571 def construct_current_element(self): 572 if "class" not in self.current_element: 573 # This happens when the first element is initialized via initialize_new_current_element() or if the first 574 # element is missing its expected starting tag. In both cases we are unable to construct an element. 575 return 576 577 clazz = self.current_element.pop("class") 578 try: 579 raise_parsing_error_if_logger_has_messages(self.current_element.pop("logger"), clazz.__name__) 580 self.elements_built.setdefault(CLASS_MAPPING[clazz.__name__], []).append( 581 construct_or_raise_parsing_error(clazz, self.current_element) 582 ) 583 if clazz == File: 584 self.check_for_preceding_package_and_build_contains_relationship() 585 except SPDXParsingError as err: 586 self.logger.extend(err.get_messages()) 587 self.current_element = {"logger": Logger()} 588 589 def check_for_preceding_package_and_build_contains_relationship(self): 590 file_spdx_id = self.current_element["spdx_id"] 591 if "packages" not in self.elements_built: 592 return 593 # We assume that all files that are not contained in a package precede any package information. Any file 594 # information that follows any package information is assigned to the last parsed package by creating a 595 # corresponding contains relationship. 596 # (see https://spdx.github.io/spdx-spec/v2.3/composition-of-an-SPDX-document/#5.2.2) 597 if not self.elements_built["packages"]: 598 self.logger.append( 599 f"Error while building contains relationship for file {file_spdx_id}, " 600 f"preceding package was not parsed successfully." 601 ) 602 return 603 package_spdx_id = self.elements_built["packages"][-1].spdx_id 604 relationship = Relationship(package_spdx_id, RelationshipType.CONTAINS, file_spdx_id) 605 if relationship not in self.elements_built.setdefault("relationships", []): 606 self.elements_built["relationships"].append(relationship)
87 def __init__(self, **kwargs): 88 self.tokens = SPDXLexer.tokens 89 self.logger = Logger() 90 self.current_element = {"logger": Logger()} 91 self.creation_info = {"logger": Logger()} 92 self.elements_built = dict() 93 self.lex = SPDXLexer() 94 self.lex.build(reflags=re.UNICODE) 95 self.yacc = yacc.yacc(module=self, **kwargs)
105 @grammar_rule( 106 "attrib : spdx_version\n| spdx_id\n| data_license\n| doc_name\n| document_comment\n| document_namespace\n| " 107 "creator\n| created\n| creator_comment\n| license_list_version\n| ext_doc_ref\n" 108 # attributes for file 109 "| file_name\n| file_type\n| file_checksum\n| file_license_concluded\n| file_license_info\n" 110 "| file_copyright_text\n| file_license_comment\n| file_attribution_text\n| file_notice\n| file_comment\n" 111 "| file_contributor\n" 112 # attributes for annotation 113 "| annotator\n| annotation_date\n| annotation_comment\n| annotation_type\n| annotation_spdx_id\n" 114 # attributes for relationship 115 "| relationship\n" 116 # attributes for snippet 117 "| snippet_spdx_id\n| snippet_name\n| snippet_comment\n| snippet_attribution_text\n| snippet_copyright_text\n" 118 "| snippet_license_comment\n| file_spdx_id\n| snippet_license_concluded\n| snippet_license_info\n" 119 "| snippet_byte_range\n| snippet_line_range\n" 120 # attributes for package 121 "| package_name\n| package_version\n| download_location\n| files_analyzed\n| homepage\n" 122 "| summary\n| source_info\n| pkg_file_name\n| supplier\n| originator\n| pkg_checksum\n" 123 "| verification_code\n| description\n| pkg_comment\n| pkg_attribution_text\n| pkg_license_declared\n" 124 "| pkg_license_concluded\n| pkg_license_info\n| pkg_license_comment\n| pkg_copyright_text\n" 125 "| pkg_external_ref\n| primary_package_purpose\n| built_date\n| release_date\n| valid_until_date\n" 126 # attributes for extracted licensing info 127 "| license_id\n| extracted_text\n| license_name\n| license_cross_ref\n| lic_comment\n" 128 "| unknown_tag " 129 ) 130 def p_attrib(self, p): 131 pass
attrib : spdx_version | spdx_id | data_license | doc_name | document_comment | document_namespace | creator | created | creator_comment | license_list_version | ext_doc_ref | file_name | file_type | file_checksum | file_license_concluded | file_license_info | file_copyright_text | file_license_comment | file_attribution_text | file_notice | file_comment | file_contributor | annotator | annotation_date | annotation_comment | annotation_type | annotation_spdx_id | relationship | snippet_spdx_id | snippet_name | snippet_comment | snippet_attribution_text | snippet_copyright_text | snippet_license_comment | file_spdx_id | snippet_license_concluded | snippet_license_info | snippet_byte_range | snippet_line_range | package_name | package_version | download_location | files_analyzed | homepage | summary | source_info | pkg_file_name | supplier | originator | pkg_checksum | verification_code | description | pkg_comment | pkg_attribution_text | pkg_license_declared | pkg_license_concluded | pkg_license_info | pkg_license_comment | pkg_copyright_text | pkg_external_ref | primary_package_purpose | built_date | release_date | valid_until_date | license_id | extracted_text | license_name | license_cross_ref | lic_comment | unknown_tag
134 @grammar_rule( 135 "license_id : LICENSE_ID error\n license_cross_ref : LICENSE_CROSS_REF error\n " 136 "lic_comment : LICENSE_COMMENT error\n license_name : LICENSE_NAME error\n " 137 "extracted_text : LICENSE_TEXT error\n " 138 "file_name : FILE_NAME error\n file_contributor : FILE_CONTRIBUTOR error\n " 139 "file_notice : FILE_NOTICE error\n file_copyright_text : FILE_COPYRIGHT_TEXT error\n " 140 "file_license_comment : FILE_LICENSE_COMMENT error\n " 141 "file_license_info : FILE_LICENSE_INFO error\n file_comment : FILE_COMMENT error\n " 142 "file_checksum : FILE_CHECKSUM error\n file_license_concluded : FILE_LICENSE_CONCLUDED error\n " 143 "file_type : FILE_TYPE error\n file_attribution_text : FILE_ATTRIBUTION_TEXT error\n " 144 "package_name : PKG_NAME error\n pkg_attribution_text : PKG_ATTRIBUTION_TEXT error\n " 145 "description : PKG_DESCRIPTION error\n pkg_comment : PKG_COMMENT error\n " 146 "summary : PKG_SUMMARY error\n pkg_copyright_text : PKG_COPYRIGHT_TEXT error\n " 147 "pkg_external_ref : PKG_EXTERNAL_REF error\n pkg_license_comment : PKG_LICENSE_COMMENT error\n " 148 "pkg_license_declared : PKG_LICENSE_DECLARED error\n pkg_license_info : PKG_LICENSE_INFO error \n " 149 "pkg_license_concluded : PKG_LICENSE_CONCLUDED error\n source_info : PKG_SOURCE_INFO error\n " 150 "homepage : PKG_HOMEPAGE error\n pkg_checksum : PKG_CHECKSUM error\n " 151 "verification_code : PKG_VERIFICATION_CODE error\n originator : PKG_ORIGINATOR error\n " 152 "download_location : PKG_DOWNLOAD_LOCATION error\n files_analyzed : PKG_FILES_ANALYZED error\n " 153 "supplier : PKG_SUPPLIER error\n pkg_file_name : PKG_FILE_NAME error\n " 154 "package_version : PKG_VERSION error\n primary_package_purpose : PRIMARY_PACKAGE_PURPOSE error\n " 155 "built_date : BUILT_DATE error\n release_date : RELEASE_DATE error\n " 156 "valid_until_date : VALID_UNTIL_DATE error\n snippet_spdx_id : SNIPPET_SPDX_ID error\n " 157 "snippet_name : SNIPPET_NAME error\n snippet_comment : SNIPPET_COMMENT error\n " 158 "snippet_attribution_text : SNIPPET_ATTRIBUTION_TEXT error\n " 159 "snippet_copyright_text : SNIPPET_COPYRIGHT_TEXT error\n " 160 "snippet_license_comment : SNIPPET_LICENSE_COMMENT error\n file_spdx_id : SNIPPET_FILE_SPDXID error\n " 161 "snippet_license_concluded : SNIPPET_LICENSE_CONCLUDED error\n " 162 "snippet_license_info : SNIPPET_LICENSE_INFO error\n " 163 "snippet_byte_range : SNIPPET_BYTE_RANGE error\n snippet_line_range : SNIPPET_LINE_RANGE error\n " 164 "annotator : ANNOTATOR error\n annotation_date : ANNOTATION_DATE error\n " 165 "annotation_comment : ANNOTATION_COMMENT error\n annotation_type : ANNOTATION_TYPE error\n " 166 "annotation_spdx_id : ANNOTATION_SPDX_ID error\n relationship : RELATIONSHIP error" 167 ) 168 def p_current_element_error(self, p): 169 if p[1] in ELEMENT_EXPECTED_START_TAG.values(): 170 self.initialize_new_current_element(TAG_DATA_MODEL_FIELD[p[1]][0]) 171 self.current_element["logger"].append( 172 f"Error while parsing {p[1]}: Token did not match specified grammar rule. Line: {p.lineno(1)}" 173 )
license_id : LICENSE_ID error license_cross_ref : LICENSE_CROSS_REF error lic_comment : LICENSE_COMMENT error license_name : LICENSE_NAME error extracted_text : LICENSE_TEXT error file_name : FILE_NAME error file_contributor : FILE_CONTRIBUTOR error file_notice : FILE_NOTICE error file_copyright_text : FILE_COPYRIGHT_TEXT error file_license_comment : FILE_LICENSE_COMMENT error file_license_info : FILE_LICENSE_INFO error file_comment : FILE_COMMENT error file_checksum : FILE_CHECKSUM error file_license_concluded : FILE_LICENSE_CONCLUDED error file_type : FILE_TYPE error file_attribution_text : FILE_ATTRIBUTION_TEXT error package_name : PKG_NAME error pkg_attribution_text : PKG_ATTRIBUTION_TEXT error description : PKG_DESCRIPTION error pkg_comment : PKG_COMMENT error summary : PKG_SUMMARY error pkg_copyright_text : PKG_COPYRIGHT_TEXT error pkg_external_ref : PKG_EXTERNAL_REF error pkg_license_comment : PKG_LICENSE_COMMENT error pkg_license_declared : PKG_LICENSE_DECLARED error pkg_license_info : PKG_LICENSE_INFO error pkg_license_concluded : PKG_LICENSE_CONCLUDED error source_info : PKG_SOURCE_INFO error homepage : PKG_HOMEPAGE error pkg_checksum : PKG_CHECKSUM error verification_code : PKG_VERIFICATION_CODE error originator : PKG_ORIGINATOR error download_location : PKG_DOWNLOAD_LOCATION error files_analyzed : PKG_FILES_ANALYZED error supplier : PKG_SUPPLIER error pkg_file_name : PKG_FILE_NAME error package_version : PKG_VERSION error primary_package_purpose : PRIMARY_PACKAGE_PURPOSE error built_date : BUILT_DATE error release_date : RELEASE_DATE error valid_until_date : VALID_UNTIL_DATE error snippet_spdx_id : SNIPPET_SPDX_ID error snippet_name : SNIPPET_NAME error snippet_comment : SNIPPET_COMMENT error snippet_attribution_text : SNIPPET_ATTRIBUTION_TEXT error snippet_copyright_text : SNIPPET_COPYRIGHT_TEXT error snippet_license_comment : SNIPPET_LICENSE_COMMENT error file_spdx_id : SNIPPET_FILE_SPDXID error snippet_license_concluded : SNIPPET_LICENSE_CONCLUDED error snippet_license_info : SNIPPET_LICENSE_INFO error snippet_byte_range : SNIPPET_BYTE_RANGE error snippet_line_range : SNIPPET_LINE_RANGE error annotator : ANNOTATOR error annotation_date : ANNOTATION_DATE error annotation_comment : ANNOTATION_COMMENT error annotation_type : ANNOTATION_TYPE error annotation_spdx_id : ANNOTATION_SPDX_ID error relationship : RELATIONSHIP error
175 @grammar_rule( 176 "license_name : LICENSE_NAME line_or_no_assertion\n extracted_text : LICENSE_TEXT text_or_line\n " 177 "lic_comment : LICENSE_COMMENT text_or_line\n license_id : LICENSE_ID LINE\n " 178 "file_name : FILE_NAME LINE \n file_notice : FILE_NOTICE text_or_line\n " 179 "file_copyright_text : FILE_COPYRIGHT_TEXT line_or_no_assertion_or_none\n " 180 "file_license_comment : FILE_LICENSE_COMMENT text_or_line\n " 181 "file_comment : FILE_COMMENT text_or_line\n " 182 "file_license_concluded : FILE_LICENSE_CONCLUDED license_or_no_assertion_or_none\n " 183 "package_name : PKG_NAME LINE\n description : PKG_DESCRIPTION text_or_line\n " 184 "summary : PKG_SUMMARY text_or_line\n source_info : PKG_SOURCE_INFO text_or_line\n " 185 "homepage : PKG_HOMEPAGE line_or_no_assertion_or_none\n " 186 "download_location : PKG_DOWNLOAD_LOCATION line_or_no_assertion_or_none\n " 187 "originator : PKG_ORIGINATOR actor_or_no_assertion\n supplier : PKG_SUPPLIER actor_or_no_assertion\n " 188 "pkg_comment : PKG_COMMENT text_or_line\n " 189 "pkg_copyright_text : PKG_COPYRIGHT_TEXT line_or_no_assertion_or_none\n " 190 "pkg_license_declared : PKG_LICENSE_DECLARED license_or_no_assertion_or_none\n " 191 "pkg_file_name : PKG_FILE_NAME LINE\n " 192 "pkg_license_concluded : PKG_LICENSE_CONCLUDED license_or_no_assertion_or_none\n " 193 "package_version : PKG_VERSION LINE\n pkg_license_comment : PKG_LICENSE_COMMENT text_or_line\n " 194 "snippet_spdx_id : SNIPPET_SPDX_ID LINE\n snippet_name : SNIPPET_NAME LINE\n " 195 "snippet_comment : SNIPPET_COMMENT text_or_line\n " 196 "snippet_copyright_text : SNIPPET_COPYRIGHT_TEXT line_or_no_assertion_or_none\n " 197 "snippet_license_comment : SNIPPET_LICENSE_COMMENT text_or_line\n " 198 "file_spdx_id : SNIPPET_FILE_SPDXID LINE\n " 199 "snippet_license_concluded : SNIPPET_LICENSE_CONCLUDED license_or_no_assertion_or_none\n " 200 "annotation_spdx_id : ANNOTATION_SPDX_ID LINE\n " 201 "annotation_comment : ANNOTATION_COMMENT text_or_line" 202 ) 203 def p_generic_value(self, p): 204 if p[1] in ELEMENT_EXPECTED_START_TAG.values(): 205 self.initialize_new_current_element(TAG_DATA_MODEL_FIELD[p[1]][0]) 206 if self.check_that_current_element_matches_class_for_value(TAG_DATA_MODEL_FIELD[p[1]][0], p.lineno(1)): 207 set_value(p, self.current_element)
license_name : LICENSE_NAME line_or_no_assertion extracted_text : LICENSE_TEXT text_or_line lic_comment : LICENSE_COMMENT text_or_line license_id : LICENSE_ID LINE file_name : FILE_NAME LINE file_notice : FILE_NOTICE text_or_line file_copyright_text : FILE_COPYRIGHT_TEXT line_or_no_assertion_or_none file_license_comment : FILE_LICENSE_COMMENT text_or_line file_comment : FILE_COMMENT text_or_line file_license_concluded : FILE_LICENSE_CONCLUDED license_or_no_assertion_or_none package_name : PKG_NAME LINE description : PKG_DESCRIPTION text_or_line summary : PKG_SUMMARY text_or_line source_info : PKG_SOURCE_INFO text_or_line homepage : PKG_HOMEPAGE line_or_no_assertion_or_none download_location : PKG_DOWNLOAD_LOCATION line_or_no_assertion_or_none originator : PKG_ORIGINATOR actor_or_no_assertion supplier : PKG_SUPPLIER actor_or_no_assertion pkg_comment : PKG_COMMENT text_or_line pkg_copyright_text : PKG_COPYRIGHT_TEXT line_or_no_assertion_or_none pkg_license_declared : PKG_LICENSE_DECLARED license_or_no_assertion_or_none pkg_file_name : PKG_FILE_NAME LINE pkg_license_concluded : PKG_LICENSE_CONCLUDED license_or_no_assertion_or_none package_version : PKG_VERSION LINE pkg_license_comment : PKG_LICENSE_COMMENT text_or_line snippet_spdx_id : SNIPPET_SPDX_ID LINE snippet_name : SNIPPET_NAME LINE snippet_comment : SNIPPET_COMMENT text_or_line snippet_copyright_text : SNIPPET_COPYRIGHT_TEXT line_or_no_assertion_or_none snippet_license_comment : SNIPPET_LICENSE_COMMENT text_or_line file_spdx_id : SNIPPET_FILE_SPDXID LINE snippet_license_concluded : SNIPPET_LICENSE_CONCLUDED license_or_no_assertion_or_none annotation_spdx_id : ANNOTATION_SPDX_ID LINE annotation_comment : ANNOTATION_COMMENT text_or_line
209 @grammar_rule( 210 "unknown_tag : UNKNOWN_TAG text_or_line\n | UNKNOWN_TAG ISO8601_DATE\n | UNKNOWN_TAG PERSON_VALUE \n" 211 "| UNKNOWN_TAG" 212 ) 213 def p_unknown_tag(self, p): 214 self.logger.append(f"Unknown tag provided in line {p.lineno(1)}")
unknown_tag : UNKNOWN_TAG text_or_line | UNKNOWN_TAG ISO8601_DATE | UNKNOWN_TAG PERSON_VALUE | UNKNOWN_TAG
220 @grammar_rule("text_or_line : LINE\n line_or_no_assertion : LINE\nline_or_no_assertion_or_none : text_or_line") 221 def p_line(self, p): 222 p[0] = p[1]
text_or_line : LINE line_or_no_assertion : LINE line_or_no_assertion_or_none : text_or_line
224 @grammar_rule( 225 "license_or_no_assertion_or_none : NO_ASSERTION\n actor_or_no_assertion : NO_ASSERTION\n" 226 "line_or_no_assertion : NO_ASSERTION\n line_or_no_assertion_or_none : NO_ASSERTION" 227 ) 228 def p_no_assertion(self, p): 229 p[0] = SpdxNoAssertion()
license_or_no_assertion_or_none : NO_ASSERTION actor_or_no_assertion : NO_ASSERTION line_or_no_assertion : NO_ASSERTION line_or_no_assertion_or_none : NO_ASSERTION
231 @grammar_rule("license_or_no_assertion_or_none : NONE\n line_or_no_assertion_or_none : NONE") 232 def p_none(self, p): 233 p[0] = SpdxNone()
license_or_no_assertion_or_none : NONE line_or_no_assertion_or_none : NONE
235 @grammar_rule("license_or_no_assertion_or_none : LINE") 236 def p_license(self, p): 237 try: 238 p[0] = get_spdx_licensing().parse(p[1]) 239 except ExpressionError as err: 240 error_message = f"Error while parsing license expression: {p[1]}" 241 if err.args: 242 error_message += f": {err.args[0]}" 243 self.current_element["logger"].append(error_message)
license_or_no_assertion_or_none : LINE
245 @grammar_rule("actor_or_no_assertion : PERSON_VALUE\n | ORGANIZATION_VALUE") 246 def p_actor_values(self, p): 247 p[0] = ActorParser.parse_actor(p[1])
actor_or_no_assertion : PERSON_VALUE | ORGANIZATION_VALUE
249 @grammar_rule("spdx_id : SPDX_ID LINE") 250 def p_spdx_id(self, p): 251 # As all SPDX Ids share the same tag, there is no knowing which spdx_id belongs to the document. 252 # We assume that to be the first spdx_id we encounter. As the specification does not explicitly require this, 253 # our approach might lead to unwanted behavior when the document's SPDX Id is defined later in the document. 254 if "spdx_id" in self.creation_info: 255 self.current_element["spdx_id"] = p[2] 256 else: 257 self.creation_info["spdx_id"] = p[2]
spdx_id : SPDX_ID LINE
261 @grammar_rule( 262 "license_list_version : LICENSE_LIST_VERSION error\n document_comment : DOC_COMMENT error\n " 263 "document_namespace : DOC_NAMESPACE error\n data_license : DOC_LICENSE error\n " 264 "doc_name : DOC_NAME error\n ext_doc_ref : EXT_DOC_REF error\n spdx_version : DOC_VERSION error\n " 265 "creator_comment : CREATOR_COMMENT error\n creator : CREATOR error\n created : CREATED error" 266 ) 267 def p_creation_info_value_error(self, p): 268 self.creation_info["logger"].append( 269 f"Error while parsing {p[1]}: Token did not match specified grammar rule. Line: {p.lineno(1)}" 270 )
license_list_version : LICENSE_LIST_VERSION error document_comment : DOC_COMMENT error document_namespace : DOC_NAMESPACE error data_license : DOC_LICENSE error doc_name : DOC_NAME error ext_doc_ref : EXT_DOC_REF error spdx_version : DOC_VERSION error creator_comment : CREATOR_COMMENT error creator : CREATOR error created : CREATED error
272 @grammar_rule( 273 "document_comment : DOC_COMMENT text_or_line\n document_namespace : DOC_NAMESPACE LINE\n " 274 "data_license : DOC_LICENSE LINE\n spdx_version : DOC_VERSION LINE\n " 275 "creator_comment : CREATOR_COMMENT text_or_line\n doc_name : DOC_NAME LINE" 276 ) 277 def p_generic_value_creation_info(self, p): 278 set_value(p, self.creation_info)
document_comment : DOC_COMMENT text_or_line document_namespace : DOC_NAMESPACE LINE data_license : DOC_LICENSE LINE spdx_version : DOC_VERSION LINE creator_comment : CREATOR_COMMENT text_or_line doc_name : DOC_NAME LINE
280 @grammar_rule("license_list_version : LICENSE_LIST_VERSION LINE") 281 def p_license_list_version(self, p): 282 set_value(p, self.creation_info, method_to_apply=Version.from_string)
license_list_version : LICENSE_LIST_VERSION LINE
284 @grammar_rule("ext_doc_ref : EXT_DOC_REF LINE") 285 def p_external_document_ref(self, p): 286 external_doc_ref_regex = re.compile(r"(.*)(\s*SHA1:\s*[a-f0-9]{40})") 287 external_doc_ref_match = external_doc_ref_regex.match(p[2]) 288 if not external_doc_ref_match: 289 self.creation_info["logger"].append( 290 f"Error while parsing ExternalDocumentRef: Couldn't match Checksum. Line: {p.lineno(1)}" 291 ) 292 return 293 try: 294 document_ref_id, document_uri = external_doc_ref_match.group(1).strip().split(" ") 295 except ValueError: 296 self.creation_info["logger"].append( 297 f"Error while parsing ExternalDocumentRef: Couldn't split the first part of the value into " 298 f"document_ref_id and document_uri. Line: {p.lineno(1)}" 299 ) 300 return 301 checksum = parse_checksum(external_doc_ref_match.group(2).strip()) 302 external_document_ref = ExternalDocumentRef(document_ref_id, document_uri, checksum) 303 self.creation_info.setdefault("external_document_refs", []).append(external_document_ref)
ext_doc_ref : EXT_DOC_REF LINE
305 @grammar_rule("creator : CREATOR PERSON_VALUE\n| CREATOR TOOL_VALUE\n| CREATOR ORGANIZATION_VALUE") 306 def p_creator(self, p): 307 self.creation_info.setdefault("creators", []).append(ActorParser.parse_actor(p[2]))
creator : CREATOR PERSON_VALUE | CREATOR TOOL_VALUE | CREATOR ORGANIZATION_VALUE
309 @grammar_rule("created : CREATED ISO8601_DATE") 310 def p_created(self, p): 311 set_value(p, self.creation_info, method_to_apply=datetime_from_str)
created : CREATED ISO8601_DATE
315 @grammar_rule("license_cross_ref : LICENSE_CROSS_REF LINE") 316 def p_extracted_cross_reference(self, p): 317 if self.check_that_current_element_matches_class_for_value(ExtractedLicensingInfo, p.lineno(1)): 318 self.current_element.setdefault("cross_references", []).append(p[2])
license_cross_ref : LICENSE_CROSS_REF LINE
322 @grammar_rule("file_contributor : FILE_CONTRIBUTOR LINE") 323 def p_file_contributor(self, p): 324 if self.check_that_current_element_matches_class_for_value(File, p.lineno(1)): 325 self.current_element.setdefault("contributors", []).append(p[2])
file_contributor : FILE_CONTRIBUTOR LINE
327 @grammar_rule("file_attribution_text : FILE_ATTRIBUTION_TEXT text_or_line") 328 def p_file_attribution_text(self, p): 329 if self.check_that_current_element_matches_class_for_value(File, p.lineno(1)): 330 self.current_element.setdefault("attribution_texts", []).append(p[2])
file_attribution_text : FILE_ATTRIBUTION_TEXT text_or_line
332 @grammar_rule("file_license_info : FILE_LICENSE_INFO license_or_no_assertion_or_none") 333 def p_file_license_info(self, p): 334 if self.check_that_current_element_matches_class_for_value(File, p.lineno(1)): 335 self.current_element.setdefault("license_info_in_file", []).append(p[2])
file_license_info : FILE_LICENSE_INFO license_or_no_assertion_or_none
337 @grammar_rule("file_type : FILE_TYPE LINE") 338 def p_file_type(self, p): 339 if not self.check_that_current_element_matches_class_for_value(File, p.lineno(1)): 340 return 341 try: 342 file_type = FileType[p[2].strip()] 343 except KeyError: 344 self.current_element["logger"].append(f"Invalid FileType: {p[2]}. Line {p.lineno(1)}") 345 return 346 self.current_element.setdefault("file_types", []).append(file_type)
file_type : FILE_TYPE LINE
348 @grammar_rule("file_checksum : FILE_CHECKSUM CHECKSUM") 349 def p_file_checksum(self, p): 350 if not self.check_that_current_element_matches_class_for_value(File, p.lineno(1)): 351 return 352 checksum = parse_checksum(p[2]) 353 self.current_element.setdefault("checksums", []).append(checksum)
file_checksum : FILE_CHECKSUM CHECKSUM
357 @grammar_rule("pkg_attribution_text : PKG_ATTRIBUTION_TEXT text_or_line") 358 def p_pkg_attribution_text(self, p): 359 self.check_that_current_element_matches_class_for_value(Package, p.lineno(1)) 360 self.current_element.setdefault("attribution_texts", []).append(p[2])
pkg_attribution_text : PKG_ATTRIBUTION_TEXT text_or_line
362 @grammar_rule( 363 "pkg_external_ref : PKG_EXTERNAL_REF LINE PKG_EXTERNAL_REF_COMMENT text_or_line\n | PKG_EXTERNAL_REF LINE" 364 ) 365 def p_pkg_external_refs(self, p): 366 if not self.check_that_current_element_matches_class_for_value(Package, p.lineno(1)): 367 return 368 try: 369 category, reference_type, locator = p[2].split(" ") 370 except ValueError: 371 self.current_element["logger"].append( 372 f"Couldn't split PackageExternalRef in category, reference_type and locator. Line: {p.lineno(1)}" 373 ) 374 return 375 comment = None 376 if len(p) == 5: 377 comment = p[4] 378 try: 379 category = ExternalPackageRefCategory[category.replace("-", "_")] 380 except KeyError: 381 self.current_element["logger"].append( 382 f"Invalid ExternalPackageRefCategory: {category}. Line: {p.lineno(1)}" 383 ) 384 return 385 try: 386 external_package_ref = construct_or_raise_parsing_error( 387 ExternalPackageRef, 388 {"category": category, "reference_type": reference_type, "locator": locator, "comment": comment}, 389 ) 390 except SPDXParsingError as err: 391 self.current_element["logger"].append(err.get_messages()) 392 return 393 self.current_element.setdefault("external_references", []).append(external_package_ref)
pkg_external_ref : PKG_EXTERNAL_REF LINE PKG_EXTERNAL_REF_COMMENT text_or_line | PKG_EXTERNAL_REF LINE
395 @grammar_rule("pkg_license_info : PKG_LICENSE_INFO license_or_no_assertion_or_none") 396 def p_pkg_license_info_from_file(self, p): 397 if self.check_that_current_element_matches_class_for_value(Package, p.lineno(1)): 398 self.current_element.setdefault("license_info_from_files", []).append(p[2])
pkg_license_info : PKG_LICENSE_INFO license_or_no_assertion_or_none
400 @grammar_rule("pkg_checksum : PKG_CHECKSUM CHECKSUM") 401 def p_pkg_checksum(self, p): 402 if not self.check_that_current_element_matches_class_for_value(Package, p.lineno(1)): 403 return 404 checksum = parse_checksum(p[2]) 405 self.current_element.setdefault("checksums", []).append(checksum)
pkg_checksum : PKG_CHECKSUM CHECKSUM
407 @grammar_rule("verification_code : PKG_VERIFICATION_CODE LINE") 408 def p_pkg_verification_code(self, p): 409 if not self.check_that_current_element_matches_class_for_value(Package, p.lineno(1)): 410 return 411 412 if "verification_code" in self.current_element: 413 self.current_element["logger"].append(f"Multiple values for {p[1]} found. Line: {p.lineno(1)}") 414 return 415 verif_code_regex = re.compile(r"([0-9a-f]{40})\s*(\(excludes:\s*(.+)\))?", re.UNICODE) 416 verif_code_code_grp = 1 417 verif_code_exc_files_grp = 3 418 match = verif_code_regex.match(p[2]) 419 if not match: 420 self.current_element["logger"].append( 421 f"Error while parsing {p[1]}: Value did not match expected format. Line: {p.lineno(1)}" 422 ) 423 return 424 value = match.group(verif_code_code_grp) 425 excluded_files = None 426 if match.group(verif_code_exc_files_grp): 427 excluded_files = match.group(verif_code_exc_files_grp).split(",") 428 self.current_element["verification_code"] = PackageVerificationCode(value, excluded_files)
verification_code : PKG_VERIFICATION_CODE LINE
430 @grammar_rule("files_analyzed : PKG_FILES_ANALYZED LINE") 431 def p_pkg_files_analyzed(self, p): 432 if not self.check_that_current_element_matches_class_for_value(Package, p.lineno(1)): 433 return 434 if "files_analyzed" in self.current_element: 435 self.current_element["logger"].append(f"Multiple values for {p[1]} found. Line: {p.lineno(1)}") 436 return 437 if p[2] == "true": 438 self.current_element["files_analyzed"] = True 439 elif p[2] == "false": 440 self.current_element["files_analyzed"] = False 441 else: 442 self.current_element["logger"].append( 443 f'The value of FilesAnalyzed must be either "true" or "false", but is: {p[2]}' 444 )
files_analyzed : PKG_FILES_ANALYZED LINE
446 @grammar_rule("primary_package_purpose : PRIMARY_PACKAGE_PURPOSE LINE") 447 def p_primary_package_purpose(self, p): 448 if self.check_that_current_element_matches_class_for_value(Package, p.lineno(1)): 449 set_value(p, self.current_element, method_to_apply=lambda x: PackagePurpose[x.replace("-", "_")])
primary_package_purpose : PRIMARY_PACKAGE_PURPOSE LINE
451 @grammar_rule( 452 "built_date : BUILT_DATE ISO8601_DATE\n release_date : RELEASE_DATE ISO8601_DATE\n " 453 "valid_until_date : VALID_UNTIL_DATE ISO8601_DATE" 454 ) 455 def p_package_dates(self, p): 456 if self.check_that_current_element_matches_class_for_value(Package, p.lineno(1)): 457 set_value(p, self.current_element, method_to_apply=datetime_from_str)
built_date : BUILT_DATE ISO8601_DATE release_date : RELEASE_DATE ISO8601_DATE valid_until_date : VALID_UNTIL_DATE ISO8601_DATE
461 @grammar_rule("snippet_attribution_text : SNIPPET_ATTRIBUTION_TEXT text_or_line") 462 def p_snippet_attribution_text(self, p): 463 if self.check_that_current_element_matches_class_for_value(Snippet, p.lineno(1)): 464 self.current_element.setdefault("attribution_texts", []).append(p[2])
snippet_attribution_text : SNIPPET_ATTRIBUTION_TEXT text_or_line
466 @grammar_rule("snippet_license_info : SNIPPET_LICENSE_INFO license_or_no_assertion_or_none") 467 def p_snippet_license_info(self, p): 468 if self.check_that_current_element_matches_class_for_value(Snippet, p.lineno(1)): 469 self.current_element.setdefault("license_info_in_snippet", []).append(p[2])
snippet_license_info : SNIPPET_LICENSE_INFO license_or_no_assertion_or_none
471 @grammar_rule("snippet_byte_range : SNIPPET_BYTE_RANGE LINE\n snippet_line_range : SNIPPET_LINE_RANGE LINE") 472 def p_snippet_range(self, p): 473 if not self.check_that_current_element_matches_class_for_value(Snippet, p.lineno(1)): 474 return 475 476 argument_name = TAG_DATA_MODEL_FIELD[p[1]][1] 477 if argument_name in self.current_element: 478 self.current_element["logger"].append(f"Multiple values for {p[1]} found. Line: {p.lineno(1)}") 479 return 480 range_re = re.compile(r"^(\d+):(\d+)$", re.UNICODE) 481 if not range_re.match(p[2].strip()): 482 self.current_element["logger"].append( 483 f"Value for {p[1]} doesn't match valid range pattern. " f"Line: {p.lineno(1)}" 484 ) 485 return 486 startpoint = int(p[2].split(":")[0]) 487 endpoint = int(p[2].split(":")[-1]) 488 self.current_element[argument_name] = startpoint, endpoint
snippet_byte_range : SNIPPET_BYTE_RANGE LINE snippet_line_range : SNIPPET_LINE_RANGE LINE
492 @grammar_rule("annotator : ANNOTATOR PERSON_VALUE\n| ANNOTATOR TOOL_VALUE\n| ANNOTATOR ORGANIZATION_VALUE") 493 def p_annotator(self, p): 494 self.initialize_new_current_element(Annotation) 495 set_value(p, self.current_element, method_to_apply=ActorParser.parse_actor)
annotator : ANNOTATOR PERSON_VALUE | ANNOTATOR TOOL_VALUE | ANNOTATOR ORGANIZATION_VALUE
497 @grammar_rule("annotation_date : ANNOTATION_DATE ISO8601_DATE") 498 def p_annotation_date(self, p): 499 if self.check_that_current_element_matches_class_for_value(Annotation, p.lineno(1)): 500 set_value(p, self.current_element, method_to_apply=datetime_from_str)
annotation_date : ANNOTATION_DATE ISO8601_DATE
502 @grammar_rule("annotation_type : ANNOTATION_TYPE LINE") 503 def p_annotation_type(self, p): 504 if self.check_that_current_element_matches_class_for_value(Annotation, p.lineno(1)): 505 set_value(p, self.current_element, method_to_apply=lambda x: AnnotationType[x])
annotation_type : ANNOTATION_TYPE LINE
509 @grammar_rule("relationship : RELATIONSHIP LINE RELATIONSHIP_COMMENT text_or_line\n " "| RELATIONSHIP LINE") 510 def p_relationship(self, p): 511 self.initialize_new_current_element(Relationship) 512 try: 513 spdx_element_id, relationship_type, related_spdx_element_id = p[2].split(" ") 514 except ValueError: 515 self.current_element["logger"].append( 516 f"Relationship couldn't be split in spdx_element_id, relationship_type and " 517 f"related_spdx_element. Line: {p.lineno(1)}" 518 ) 519 return 520 try: 521 self.current_element["relationship_type"] = RelationshipType[relationship_type] 522 except KeyError: 523 self.current_element["logger"].append(f"Invalid RelationshipType {relationship_type}. Line: {p.lineno(1)}") 524 if related_spdx_element_id == "NONE": 525 related_spdx_element_id = SpdxNone() 526 if related_spdx_element_id == "NOASSERTION": 527 related_spdx_element_id = SpdxNoAssertion() 528 self.current_element["related_spdx_element_id"] = related_spdx_element_id 529 self.current_element["spdx_element_id"] = spdx_element_id 530 if len(p) == 5: 531 self.current_element["comment"] = p[4]
relationship : RELATIONSHIP LINE RELATIONSHIP_COMMENT text_or_line | RELATIONSHIP LINE
536 def parse(self, text): 537 # entry point for the tag-value parser 538 self.yacc.parse(text, lexer=self.lex) 539 # this constructs the last remaining element; all other elements are constructed at the start of 540 # their subsequent element 541 self.construct_current_element() 542 543 # To be able to parse creation info values if they appear in between other elements, e.g. packages, we use 544 # two different dictionaries to collect the creation info and all other elements. Therefore, we have a separate 545 # logger for the creation info whose messages we need to add to the main logger to than raise all collected 546 # messages at once. 547 creation_info_logger = self.creation_info.pop("logger") 548 if creation_info_logger.has_messages(): 549 self.logger.extend([f"Error while parsing CreationInfo: {creation_info_logger.get_messages()}"]) 550 551 raise_parsing_error_if_logger_has_messages(self.logger) 552 creation_info = construct_or_raise_parsing_error(CreationInfo, self.creation_info) 553 self.elements_built["creation_info"] = creation_info 554 document = construct_or_raise_parsing_error(Document, self.elements_built) 555 return document
561 def check_that_current_element_matches_class_for_value(self, expected_class, line_number) -> bool: 562 if "class" not in self.current_element or expected_class != self.current_element["class"]: 563 self.logger.append( 564 f"Element {expected_class.__name__} is not the current element in scope, probably the expected tag to " 565 f"start the element ({ELEMENT_EXPECTED_START_TAG[expected_class.__name__]}) is missing. " 566 f"Line: {line_number}" 567 ) 568 return False 569 return True
571 def construct_current_element(self): 572 if "class" not in self.current_element: 573 # This happens when the first element is initialized via initialize_new_current_element() or if the first 574 # element is missing its expected starting tag. In both cases we are unable to construct an element. 575 return 576 577 clazz = self.current_element.pop("class") 578 try: 579 raise_parsing_error_if_logger_has_messages(self.current_element.pop("logger"), clazz.__name__) 580 self.elements_built.setdefault(CLASS_MAPPING[clazz.__name__], []).append( 581 construct_or_raise_parsing_error(clazz, self.current_element) 582 ) 583 if clazz == File: 584 self.check_for_preceding_package_and_build_contains_relationship() 585 except SPDXParsingError as err: 586 self.logger.extend(err.get_messages()) 587 self.current_element = {"logger": Logger()}
589 def check_for_preceding_package_and_build_contains_relationship(self): 590 file_spdx_id = self.current_element["spdx_id"] 591 if "packages" not in self.elements_built: 592 return 593 # We assume that all files that are not contained in a package precede any package information. Any file 594 # information that follows any package information is assigned to the last parsed package by creating a 595 # corresponding contains relationship. 596 # (see https://spdx.github.io/spdx-spec/v2.3/composition-of-an-SPDX-document/#5.2.2) 597 if not self.elements_built["packages"]: 598 self.logger.append( 599 f"Error while building contains relationship for file {file_spdx_id}, " 600 f"preceding package was not parsed successfully." 601 ) 602 return 603 package_spdx_id = self.elements_built["packages"][-1].spdx_id 604 relationship = Relationship(package_spdx_id, RelationshipType.CONTAINS, file_spdx_id) 605 if relationship not in self.elements_built.setdefault("relationships", []): 606 self.elements_built["relationships"].append(relationship)