spdx_tools.spdx.parser.xml.xml_parser
1# SPDX-FileCopyrightText: 2023 spdx contributors 2# 3# SPDX-License-Identifier: Apache-2.0 4import xmltodict 5from beartype.typing import Any, Dict 6 7from spdx_tools.spdx.model import Document 8from spdx_tools.spdx.parser.error import SPDXParsingError 9from spdx_tools.spdx.parser.jsonlikedict.json_like_dict_parser import JsonLikeDictParser 10 11LIST_LIKE_FIELDS = [ 12 "creators", 13 "externalDocumentRefs", 14 "hasExtractedLicensingInfos", 15 "seeAlsos", 16 "annotations", 17 "relationships", 18 "snippets", 19 "reviewers", 20 "fileTypes", 21 "licenseInfoFromFiles", 22 "licenseInfoInFiles", 23 "artifactOf", 24 "fileContributors", 25 "fileDependencies", 26 "files", 27 "documentDescribes", 28 "packages", 29 "checksums", 30 "hasFiles", 31 "externalRefs", 32 "ranges", 33 "licenseInfoInSnippets", 34 "packageVerificationCodeExcludedFiles", 35 "attributionTexts", 36] 37 38 39def parse_from_file(file_name: str, encoding: str = "utf-8") -> Document: 40 with open(file_name, encoding=encoding) as file: 41 parsed_xml: Dict = xmltodict.parse(file.read(), encoding="utf-8") 42 43 input_doc_as_dict: Dict = _fix_list_like_fields(parsed_xml).get("Document") 44 45 if not input_doc_as_dict: 46 raise SPDXParsingError(['Did not find the XML top level tag "Document".']) 47 48 return JsonLikeDictParser().parse(input_doc_as_dict) 49 50 51def _fix_list_like_fields(data: Any) -> Any: 52 """ 53 XML files do not contain lists. Thus, single fields that should be a list in SPDX have to be manually cast. 54 This method takes a parsed dictionary and converts all values with key from LIST_LIKE_FIELDS to lists. 55 """ 56 if isinstance(data, dict): 57 new_data = {} 58 for key, value in data.items(): 59 if key in LIST_LIKE_FIELDS and not isinstance(value, list): 60 new_data[key] = [_fix_list_like_fields(value)] if value else [] 61 else: 62 new_data[key] = _fix_list_like_fields(value) 63 return new_data 64 65 if isinstance(data, list): 66 new_data = [] 67 for element in data: 68 new_data.append(_fix_list_like_fields(element)) 69 return new_data 70 71 return data
LIST_LIKE_FIELDS =
['creators', 'externalDocumentRefs', 'hasExtractedLicensingInfos', 'seeAlsos', 'annotations', 'relationships', 'snippets', 'reviewers', 'fileTypes', 'licenseInfoFromFiles', 'licenseInfoInFiles', 'artifactOf', 'fileContributors', 'fileDependencies', 'files', 'documentDescribes', 'packages', 'checksums', 'hasFiles', 'externalRefs', 'ranges', 'licenseInfoInSnippets', 'packageVerificationCodeExcludedFiles', 'attributionTexts']
def
parse_from_file( file_name: str, encoding: str = 'utf-8') -> spdx_tools.spdx.model.document.Document:
40def parse_from_file(file_name: str, encoding: str = "utf-8") -> Document: 41 with open(file_name, encoding=encoding) as file: 42 parsed_xml: Dict = xmltodict.parse(file.read(), encoding="utf-8") 43 44 input_doc_as_dict: Dict = _fix_list_like_fields(parsed_xml).get("Document") 45 46 if not input_doc_as_dict: 47 raise SPDXParsingError(['Did not find the XML top level tag "Document".']) 48 49 return JsonLikeDictParser().parse(input_doc_as_dict)