spdx_tools.spdx.validation.license_expression_validator

 1# SPDX-FileCopyrightText: 2022 spdx contributors
 2#
 3# SPDX-License-Identifier: Apache-2.0
 4
 5from beartype.typing import List, Optional, Union
 6from license_expression import ExpressionError, ExpressionParseError, LicenseExpression
 7
 8from spdx_tools.common.spdx_licensing import spdx_licensing
 9from spdx_tools.spdx.model import Document, SpdxNoAssertion, SpdxNone
10from spdx_tools.spdx.validation.spdx_id_validators import is_external_doc_ref_present_in_document
11from spdx_tools.spdx.validation.validation_message import SpdxElementType, ValidationContext, ValidationMessage
12
13
14def validate_license_expressions(
15    license_expressions: List[Union[LicenseExpression, SpdxNoAssertion, SpdxNone]], document: Document, parent_id: str
16) -> List[ValidationMessage]:
17    context = ValidationContext(
18        parent_id=parent_id, element_type=SpdxElementType.LICENSE_EXPRESSION, full_element=license_expressions
19    )
20    validation_messages = []
21
22    for license_expression in license_expressions:
23        validation_messages.extend(validate_license_expression(license_expression, document, parent_id, context))
24
25    return validation_messages
26
27
28def validate_license_expression(
29    license_expression: Optional[Union[LicenseExpression, SpdxNoAssertion, SpdxNone]],
30    document: Document,
31    parent_id: str,
32    context: ValidationContext = None,
33) -> List[ValidationMessage]:
34    if license_expression in [SpdxNoAssertion(), SpdxNone(), None]:
35        return []
36
37    if not context:
38        context = ValidationContext(
39            parent_id=parent_id, element_type=SpdxElementType.LICENSE_EXPRESSION, full_element=license_expression
40        )
41
42    validation_messages = []
43    license_ref_ids: List[str] = [license_ref.license_id for license_ref in document.extracted_licensing_info]
44
45    for non_spdx_token in spdx_licensing.validate(license_expression).invalid_symbols:
46        if ":" in non_spdx_token:
47            split_token: List[str] = non_spdx_token.split(":")
48            if len(split_token) != 2:
49                validation_messages.append(
50                    ValidationMessage(
51                        f"Too many colons in license reference: {non_spdx_token}. "
52                        "A license reference must only contain a single colon to "
53                        "separate an external document reference from the license reference.",
54                        context,
55                    )
56                )
57            else:
58                if not split_token[1].startswith("LicenseRef-"):
59                    validation_messages.append(
60                        ValidationMessage(
61                            f'A license reference must start with "LicenseRef-", but is: {split_token[1]} '
62                            f"in external license reference {non_spdx_token}.",
63                            context,
64                        )
65                    )
66                if not is_external_doc_ref_present_in_document(split_token[0], document):
67                    validation_messages.append(
68                        ValidationMessage(
69                            f'Did not find the external document reference "{split_token[0]}" in the SPDX document. '
70                            f"From the external license reference {non_spdx_token}.",
71                            context,
72                        )
73                    )
74
75        elif non_spdx_token not in license_ref_ids:
76            validation_messages.append(
77                ValidationMessage(
78                    f"Unrecognized license reference: {non_spdx_token}. license_expression must only use IDs from the "
79                    f"license list or extracted licensing info, but is: {license_expression}",
80                    context,
81                )
82            )
83
84    try:
85        spdx_licensing.parse(str(license_expression), validate=True, strict=True)
86    except ExpressionParseError as err:
87        # This error is raised when an exception symbol is used as a license symbol and vice versa.
88        # So far, it only catches the first such error in the provided string.
89        validation_messages.append(ValidationMessage(f"{err}. for license_expression: {license_expression}", context))
90    except ExpressionError:
91        # This error is raised for invalid symbols within the license_expression, but it provides only a string of
92        # these. On the other hand, spdx_licensing.validate() gives an actual list of invalid symbols, so this is
93        # handled above.
94        pass
95
96    return validation_messages
def validate_license_expressions( license_expressions: list[typing.Union[boolean.boolean.Expression, spdx_tools.spdx.model.spdx_no_assertion.SpdxNoAssertion, spdx_tools.spdx.model.spdx_none.SpdxNone]], document: spdx_tools.spdx.model.document.Document, parent_id: str) -> list[spdx_tools.spdx.validation.validation_message.ValidationMessage]:
15def validate_license_expressions(
16    license_expressions: List[Union[LicenseExpression, SpdxNoAssertion, SpdxNone]], document: Document, parent_id: str
17) -> List[ValidationMessage]:
18    context = ValidationContext(
19        parent_id=parent_id, element_type=SpdxElementType.LICENSE_EXPRESSION, full_element=license_expressions
20    )
21    validation_messages = []
22
23    for license_expression in license_expressions:
24        validation_messages.extend(validate_license_expression(license_expression, document, parent_id, context))
25
26    return validation_messages
29def validate_license_expression(
30    license_expression: Optional[Union[LicenseExpression, SpdxNoAssertion, SpdxNone]],
31    document: Document,
32    parent_id: str,
33    context: ValidationContext = None,
34) -> List[ValidationMessage]:
35    if license_expression in [SpdxNoAssertion(), SpdxNone(), None]:
36        return []
37
38    if not context:
39        context = ValidationContext(
40            parent_id=parent_id, element_type=SpdxElementType.LICENSE_EXPRESSION, full_element=license_expression
41        )
42
43    validation_messages = []
44    license_ref_ids: List[str] = [license_ref.license_id for license_ref in document.extracted_licensing_info]
45
46    for non_spdx_token in spdx_licensing.validate(license_expression).invalid_symbols:
47        if ":" in non_spdx_token:
48            split_token: List[str] = non_spdx_token.split(":")
49            if len(split_token) != 2:
50                validation_messages.append(
51                    ValidationMessage(
52                        f"Too many colons in license reference: {non_spdx_token}. "
53                        "A license reference must only contain a single colon to "
54                        "separate an external document reference from the license reference.",
55                        context,
56                    )
57                )
58            else:
59                if not split_token[1].startswith("LicenseRef-"):
60                    validation_messages.append(
61                        ValidationMessage(
62                            f'A license reference must start with "LicenseRef-", but is: {split_token[1]} '
63                            f"in external license reference {non_spdx_token}.",
64                            context,
65                        )
66                    )
67                if not is_external_doc_ref_present_in_document(split_token[0], document):
68                    validation_messages.append(
69                        ValidationMessage(
70                            f'Did not find the external document reference "{split_token[0]}" in the SPDX document. '
71                            f"From the external license reference {non_spdx_token}.",
72                            context,
73                        )
74                    )
75
76        elif non_spdx_token not in license_ref_ids:
77            validation_messages.append(
78                ValidationMessage(
79                    f"Unrecognized license reference: {non_spdx_token}. license_expression must only use IDs from the "
80                    f"license list or extracted licensing info, but is: {license_expression}",
81                    context,
82                )
83            )
84
85    try:
86        spdx_licensing.parse(str(license_expression), validate=True, strict=True)
87    except ExpressionParseError as err:
88        # This error is raised when an exception symbol is used as a license symbol and vice versa.
89        # So far, it only catches the first such error in the provided string.
90        validation_messages.append(ValidationMessage(f"{err}. for license_expression: {license_expression}", context))
91    except ExpressionError:
92        # This error is raised for invalid symbols within the license_expression, but it provides only a string of
93        # these. On the other hand, spdx_licensing.validate() gives an actual list of invalid symbols, so this is
94        # handled above.
95        pass
96
97    return validation_messages