spdx_tools.spdx.parser.xml.xml_parser

 1# SPDX-FileCopyrightText: 2023 spdx contributors
 2#
 3# SPDX-License-Identifier: Apache-2.0
 4import xmltodict
 5from beartype.typing import Any, Dict
 6
 7from spdx_tools.spdx.model import Document
 8from spdx_tools.spdx.parser.error import SPDXParsingError
 9from spdx_tools.spdx.parser.jsonlikedict.json_like_dict_parser import JsonLikeDictParser
10
11LIST_LIKE_FIELDS = [
12    "creators",
13    "externalDocumentRefs",
14    "hasExtractedLicensingInfos",
15    "seeAlsos",
16    "annotations",
17    "relationships",
18    "snippets",
19    "reviewers",
20    "fileTypes",
21    "licenseInfoFromFiles",
22    "licenseInfoInFiles",
23    "artifactOf",
24    "fileContributors",
25    "fileDependencies",
26    "files",
27    "documentDescribes",
28    "packages",
29    "checksums",
30    "hasFiles",
31    "externalRefs",
32    "ranges",
33    "licenseInfoInSnippets",
34    "packageVerificationCodeExcludedFiles",
35    "attributionTexts",
36]
37
38
39def parse_from_file(file_name: str, encoding: str = "utf-8") -> Document:
40    with open(file_name, encoding=encoding) as file:
41        parsed_xml: Dict = xmltodict.parse(file.read(), encoding="utf-8")
42
43    input_doc_as_dict: Dict = _fix_list_like_fields(parsed_xml).get("Document")
44
45    if not input_doc_as_dict:
46        raise SPDXParsingError(['Did not find the XML top level tag "Document".'])
47
48    return JsonLikeDictParser().parse(input_doc_as_dict)
49
50
51def _fix_list_like_fields(data: Any) -> Any:
52    """
53    XML files do not contain lists. Thus, single fields that should be a list in SPDX have to be manually cast.
54    This method takes a parsed dictionary and converts all values with key from LIST_LIKE_FIELDS to lists.
55    """
56    if isinstance(data, dict):
57        new_data = {}
58        for key, value in data.items():
59            if key in LIST_LIKE_FIELDS and not isinstance(value, list):
60                new_data[key] = [_fix_list_like_fields(value)] if value else []
61            else:
62                new_data[key] = _fix_list_like_fields(value)
63        return new_data
64
65    if isinstance(data, list):
66        new_data = []
67        for element in data:
68            new_data.append(_fix_list_like_fields(element))
69        return new_data
70
71    return data
LIST_LIKE_FIELDS = ['creators', 'externalDocumentRefs', 'hasExtractedLicensingInfos', 'seeAlsos', 'annotations', 'relationships', 'snippets', 'reviewers', 'fileTypes', 'licenseInfoFromFiles', 'licenseInfoInFiles', 'artifactOf', 'fileContributors', 'fileDependencies', 'files', 'documentDescribes', 'packages', 'checksums', 'hasFiles', 'externalRefs', 'ranges', 'licenseInfoInSnippets', 'packageVerificationCodeExcludedFiles', 'attributionTexts']
def parse_from_file( file_name: str, encoding: str = 'utf-8') -> spdx_tools.spdx.model.document.Document:
40def parse_from_file(file_name: str, encoding: str = "utf-8") -> Document:
41    with open(file_name, encoding=encoding) as file:
42        parsed_xml: Dict = xmltodict.parse(file.read(), encoding="utf-8")
43
44    input_doc_as_dict: Dict = _fix_list_like_fields(parsed_xml).get("Document")
45
46    if not input_doc_as_dict:
47        raise SPDXParsingError(['Did not find the XML top level tag "Document".'])
48
49    return JsonLikeDictParser().parse(input_doc_as_dict)