spdx_tools.spdx3.model.dataset.dataset

  1# SPDX-FileCopyrightText: 2023 spdx contributors
  2#
  3# SPDX-License-Identifier: Apache-2.0
  4from dataclasses import field
  5from datetime import datetime
  6from enum import Enum, auto
  7
  8from beartype.typing import Dict, List, Optional
  9
 10from spdx_tools.common.typing.dataclass_with_properties import dataclass_with_properties
 11from spdx_tools.common.typing.type_checks import check_types_and_set_values
 12from spdx_tools.spdx3.model import CreationInfo, ExternalIdentifier, ExternalReference, IntegrityMethod
 13from spdx_tools.spdx3.model.licensing import LicenseField
 14from spdx_tools.spdx3.model.software import Package, SoftwarePurpose
 15
 16
 17class DatasetType(Enum):
 18    STRUCTURED = auto()
 19    NUMERIC = auto()
 20    TEXT = auto()
 21    CATEGORICAL = auto()
 22    GRAPH = auto()
 23    TIMESERIES = auto()
 24    TIMESTAMP = auto()
 25    SENSOR = auto()
 26    IMAGE = auto()
 27    SYNTACTIC = auto()
 28    AUDIO = auto()
 29    VIDEO = auto()
 30    OTHER = auto()
 31    NO_ASSERTION = auto()
 32
 33
 34class ConfidentialityLevelType(Enum):
 35    RED = auto()
 36    AMBER = auto()
 37    GREEN = auto()
 38    CLEAR = auto()
 39
 40
 41class DatasetAvailabilityType(Enum):
 42    DIRECT_DOWNLOAD = auto()
 43    SCRAPING_SCRIPT = auto()
 44    QUERY = auto()
 45    CLICKTHROUGH = auto()
 46    REGISTRATION = auto()
 47
 48
 49@dataclass_with_properties
 50class Dataset(Package):
 51    dataset_type: List[DatasetType] = None
 52    data_collection_process: Optional[str] = None
 53    intended_use: Optional[str] = None
 54    dataset_size: Optional[int] = None
 55    dataset_noise: Optional[str] = None
 56    data_preprocessing: List[str] = field(default_factory=list)
 57    sensor: Dict[str, Optional[str]] = field(default_factory=dict)
 58    known_bias: List[str] = field(default_factory=list)
 59    sensitive_personal_information: Optional[bool] = None
 60    anonymization_method_used: List[str] = field(default_factory=list)
 61    confidentiality_level: Optional[ConfidentialityLevelType] = None
 62    dataset_update_mechanism: Optional[str] = None
 63    dataset_availability: Optional[DatasetAvailabilityType] = None
 64
 65    def __init__(
 66        self,
 67        spdx_id: str,
 68        name: str,
 69        originated_by: List[str],
 70        download_location: str,
 71        primary_purpose: SoftwarePurpose,
 72        built_time: datetime,
 73        release_time: datetime,
 74        dataset_type: List[DatasetType],
 75        creation_info: Optional[CreationInfo] = None,
 76        summary: Optional[str] = None,
 77        description: Optional[str] = None,
 78        comment: Optional[str] = None,
 79        verified_using: List[IntegrityMethod] = None,
 80        external_reference: List[ExternalReference] = None,
 81        external_identifier: List[ExternalIdentifier] = None,
 82        extension: Optional[str] = None,
 83        supplied_by: List[str] = None,
 84        valid_until_time: Optional[datetime] = None,
 85        standard: List[str] = None,
 86        content_identifier: Optional[str] = None,
 87        additional_purpose: List[SoftwarePurpose] = None,
 88        concluded_license: Optional[LicenseField] = None,
 89        declared_license: Optional[LicenseField] = None,
 90        copyright_text: Optional[str] = None,
 91        attribution_text: Optional[str] = None,
 92        package_version: Optional[str] = None,
 93        package_url: Optional[str] = None,
 94        homepage: Optional[str] = None,
 95        source_info: Optional[str] = None,
 96        data_collection_process: Optional[str] = None,
 97        intended_use: Optional[str] = None,
 98        dataset_size: Optional[int] = None,
 99        dataset_noise: Optional[str] = None,
100        data_preprocessing: List[str] = None,
101        sensor: Dict[str, Optional[str]] = None,
102        known_bias: List[str] = None,
103        sensitive_personal_information: Optional[bool] = None,
104        anonymization_method_used: List[str] = None,
105        confidentiality_level: Optional[ConfidentialityLevelType] = None,
106        dataset_update_mechanism: Optional[str] = None,
107        dataset_availability: Optional[DatasetAvailabilityType] = None,
108    ):
109        verified_using = [] if verified_using is None else verified_using
110        external_reference = [] if external_reference is None else external_reference
111        external_identifier = [] if external_identifier is None else external_identifier
112        originated_by = [] if originated_by is None else originated_by
113        additional_purpose = [] if additional_purpose is None else additional_purpose
114        supplied_by = [] if supplied_by is None else supplied_by
115        standard = [] if standard is None else standard
116        data_preprocessing = [] if data_preprocessing is None else data_preprocessing
117        sensor = {} if sensor is None else sensor
118        known_bias = [] if known_bias is None else known_bias
119        anonymization_method_used = [] if anonymization_method_used is None else anonymization_method_used
120        check_types_and_set_values(self, locals())
class DatasetType(enum.Enum):
18class DatasetType(Enum):
19    STRUCTURED = auto()
20    NUMERIC = auto()
21    TEXT = auto()
22    CATEGORICAL = auto()
23    GRAPH = auto()
24    TIMESERIES = auto()
25    TIMESTAMP = auto()
26    SENSOR = auto()
27    IMAGE = auto()
28    SYNTACTIC = auto()
29    AUDIO = auto()
30    VIDEO = auto()
31    OTHER = auto()
32    NO_ASSERTION = auto()
STRUCTURED = <DatasetType.STRUCTURED: 1>
NUMERIC = <DatasetType.NUMERIC: 2>
TEXT = <DatasetType.TEXT: 3>
CATEGORICAL = <DatasetType.CATEGORICAL: 4>
GRAPH = <DatasetType.GRAPH: 5>
TIMESERIES = <DatasetType.TIMESERIES: 6>
TIMESTAMP = <DatasetType.TIMESTAMP: 7>
SENSOR = <DatasetType.SENSOR: 8>
IMAGE = <DatasetType.IMAGE: 9>
SYNTACTIC = <DatasetType.SYNTACTIC: 10>
AUDIO = <DatasetType.AUDIO: 11>
VIDEO = <DatasetType.VIDEO: 12>
OTHER = <DatasetType.OTHER: 13>
NO_ASSERTION = <DatasetType.NO_ASSERTION: 14>
Inherited Members
enum.Enum
name
value
class ConfidentialityLevelType(enum.Enum):
35class ConfidentialityLevelType(Enum):
36    RED = auto()
37    AMBER = auto()
38    GREEN = auto()
39    CLEAR = auto()
Inherited Members
enum.Enum
name
value
class DatasetAvailabilityType(enum.Enum):
42class DatasetAvailabilityType(Enum):
43    DIRECT_DOWNLOAD = auto()
44    SCRAPING_SCRIPT = auto()
45    QUERY = auto()
46    CLICKTHROUGH = auto()
47    REGISTRATION = auto()
Inherited Members
enum.Enum
name
value
@dataclass_with_properties
class Dataset(spdx_tools.spdx3.model.software.package.Package):
 50@dataclass_with_properties
 51class Dataset(Package):
 52    dataset_type: List[DatasetType] = None
 53    data_collection_process: Optional[str] = None
 54    intended_use: Optional[str] = None
 55    dataset_size: Optional[int] = None
 56    dataset_noise: Optional[str] = None
 57    data_preprocessing: List[str] = field(default_factory=list)
 58    sensor: Dict[str, Optional[str]] = field(default_factory=dict)
 59    known_bias: List[str] = field(default_factory=list)
 60    sensitive_personal_information: Optional[bool] = None
 61    anonymization_method_used: List[str] = field(default_factory=list)
 62    confidentiality_level: Optional[ConfidentialityLevelType] = None
 63    dataset_update_mechanism: Optional[str] = None
 64    dataset_availability: Optional[DatasetAvailabilityType] = None
 65
 66    def __init__(
 67        self,
 68        spdx_id: str,
 69        name: str,
 70        originated_by: List[str],
 71        download_location: str,
 72        primary_purpose: SoftwarePurpose,
 73        built_time: datetime,
 74        release_time: datetime,
 75        dataset_type: List[DatasetType],
 76        creation_info: Optional[CreationInfo] = None,
 77        summary: Optional[str] = None,
 78        description: Optional[str] = None,
 79        comment: Optional[str] = None,
 80        verified_using: List[IntegrityMethod] = None,
 81        external_reference: List[ExternalReference] = None,
 82        external_identifier: List[ExternalIdentifier] = None,
 83        extension: Optional[str] = None,
 84        supplied_by: List[str] = None,
 85        valid_until_time: Optional[datetime] = None,
 86        standard: List[str] = None,
 87        content_identifier: Optional[str] = None,
 88        additional_purpose: List[SoftwarePurpose] = None,
 89        concluded_license: Optional[LicenseField] = None,
 90        declared_license: Optional[LicenseField] = None,
 91        copyright_text: Optional[str] = None,
 92        attribution_text: Optional[str] = None,
 93        package_version: Optional[str] = None,
 94        package_url: Optional[str] = None,
 95        homepage: Optional[str] = None,
 96        source_info: Optional[str] = None,
 97        data_collection_process: Optional[str] = None,
 98        intended_use: Optional[str] = None,
 99        dataset_size: Optional[int] = None,
100        dataset_noise: Optional[str] = None,
101        data_preprocessing: List[str] = None,
102        sensor: Dict[str, Optional[str]] = None,
103        known_bias: List[str] = None,
104        sensitive_personal_information: Optional[bool] = None,
105        anonymization_method_used: List[str] = None,
106        confidentiality_level: Optional[ConfidentialityLevelType] = None,
107        dataset_update_mechanism: Optional[str] = None,
108        dataset_availability: Optional[DatasetAvailabilityType] = None,
109    ):
110        verified_using = [] if verified_using is None else verified_using
111        external_reference = [] if external_reference is None else external_reference
112        external_identifier = [] if external_identifier is None else external_identifier
113        originated_by = [] if originated_by is None else originated_by
114        additional_purpose = [] if additional_purpose is None else additional_purpose
115        supplied_by = [] if supplied_by is None else supplied_by
116        standard = [] if standard is None else standard
117        data_preprocessing = [] if data_preprocessing is None else data_preprocessing
118        sensor = {} if sensor is None else sensor
119        known_bias = [] if known_bias is None else known_bias
120        anonymization_method_used = [] if anonymization_method_used is None else anonymization_method_used
121        check_types_and_set_values(self, locals())
Dataset( spdx_id: str, name: str, originated_by: list[str], download_location: str, primary_purpose: spdx_tools.spdx3.model.software.software_purpose.SoftwarePurpose, built_time: datetime.datetime, release_time: datetime.datetime, dataset_type: list[DatasetType], creation_info: Optional[spdx_tools.spdx3.model.creation_info.CreationInfo] = None, summary: Optional[str] = None, description: Optional[str] = None, comment: Optional[str] = None, verified_using: list[spdx_tools.spdx3.model.integrity_method.IntegrityMethod] = None, external_reference: list[spdx_tools.spdx3.model.external_reference.ExternalReference] = None, external_identifier: list[spdx_tools.spdx3.model.external_identifier.ExternalIdentifier] = None, extension: Optional[str] = None, supplied_by: list[str] = None, valid_until_time: Optional[datetime.datetime] = None, standard: list[str] = None, content_identifier: Optional[str] = None, additional_purpose: list[spdx_tools.spdx3.model.software.software_purpose.SoftwarePurpose] = None, concluded_license: Optional[spdx_tools.spdx3.model.licensing.license_field.LicenseField] = None, declared_license: Optional[spdx_tools.spdx3.model.licensing.license_field.LicenseField] = None, copyright_text: Optional[str] = None, attribution_text: Optional[str] = None, package_version: Optional[str] = None, package_url: Optional[str] = None, homepage: Optional[str] = None, source_info: Optional[str] = None, data_collection_process: Optional[str] = None, intended_use: Optional[str] = None, dataset_size: Optional[int] = None, dataset_noise: Optional[str] = None, data_preprocessing: list[str] = None, sensor: dict[str, typing.Optional[str]] = None, known_bias: list[str] = None, sensitive_personal_information: Optional[bool] = None, anonymization_method_used: list[str] = None, confidentiality_level: Optional[ConfidentialityLevelType] = None, dataset_update_mechanism: Optional[str] = None, dataset_availability: Optional[DatasetAvailabilityType] = None)
 66    def __init__(
 67        self,
 68        spdx_id: str,
 69        name: str,
 70        originated_by: List[str],
 71        download_location: str,
 72        primary_purpose: SoftwarePurpose,
 73        built_time: datetime,
 74        release_time: datetime,
 75        dataset_type: List[DatasetType],
 76        creation_info: Optional[CreationInfo] = None,
 77        summary: Optional[str] = None,
 78        description: Optional[str] = None,
 79        comment: Optional[str] = None,
 80        verified_using: List[IntegrityMethod] = None,
 81        external_reference: List[ExternalReference] = None,
 82        external_identifier: List[ExternalIdentifier] = None,
 83        extension: Optional[str] = None,
 84        supplied_by: List[str] = None,
 85        valid_until_time: Optional[datetime] = None,
 86        standard: List[str] = None,
 87        content_identifier: Optional[str] = None,
 88        additional_purpose: List[SoftwarePurpose] = None,
 89        concluded_license: Optional[LicenseField] = None,
 90        declared_license: Optional[LicenseField] = None,
 91        copyright_text: Optional[str] = None,
 92        attribution_text: Optional[str] = None,
 93        package_version: Optional[str] = None,
 94        package_url: Optional[str] = None,
 95        homepage: Optional[str] = None,
 96        source_info: Optional[str] = None,
 97        data_collection_process: Optional[str] = None,
 98        intended_use: Optional[str] = None,
 99        dataset_size: Optional[int] = None,
100        dataset_noise: Optional[str] = None,
101        data_preprocessing: List[str] = None,
102        sensor: Dict[str, Optional[str]] = None,
103        known_bias: List[str] = None,
104        sensitive_personal_information: Optional[bool] = None,
105        anonymization_method_used: List[str] = None,
106        confidentiality_level: Optional[ConfidentialityLevelType] = None,
107        dataset_update_mechanism: Optional[str] = None,
108        dataset_availability: Optional[DatasetAvailabilityType] = None,
109    ):
110        verified_using = [] if verified_using is None else verified_using
111        external_reference = [] if external_reference is None else external_reference
112        external_identifier = [] if external_identifier is None else external_identifier
113        originated_by = [] if originated_by is None else originated_by
114        additional_purpose = [] if additional_purpose is None else additional_purpose
115        supplied_by = [] if supplied_by is None else supplied_by
116        standard = [] if standard is None else standard
117        data_preprocessing = [] if data_preprocessing is None else data_preprocessing
118        sensor = {} if sensor is None else sensor
119        known_bias = [] if known_bias is None else known_bias
120        anonymization_method_used = [] if anonymization_method_used is None else anonymization_method_used
121        check_types_and_set_values(self, locals())
dataset_type: list[DatasetType]
47    def get_field(self) -> field_type:
48        return getattr(self, f"_{field_name}")
data_collection_process: Optional[str]
47    def get_field(self) -> field_type:
48        return getattr(self, f"_{field_name}")
intended_use: Optional[str]
47    def get_field(self) -> field_type:
48        return getattr(self, f"_{field_name}")
dataset_size: Optional[int]
47    def get_field(self) -> field_type:
48        return getattr(self, f"_{field_name}")
dataset_noise: Optional[str]
47    def get_field(self) -> field_type:
48        return getattr(self, f"_{field_name}")
data_preprocessing: list[str]
47    def get_field(self) -> field_type:
48        return getattr(self, f"_{field_name}")
sensor: dict[str, typing.Optional[str]]
47    def get_field(self) -> field_type:
48        return getattr(self, f"_{field_name}")
known_bias: list[str]
47    def get_field(self) -> field_type:
48        return getattr(self, f"_{field_name}")
sensitive_personal_information: Optional[bool]
47    def get_field(self) -> field_type:
48        return getattr(self, f"_{field_name}")
anonymization_method_used: list[str]
47    def get_field(self) -> field_type:
48        return getattr(self, f"_{field_name}")
confidentiality_level: Optional[ConfidentialityLevelType]
47    def get_field(self) -> field_type:
48        return getattr(self, f"_{field_name}")
dataset_update_mechanism: Optional[str]
47    def get_field(self) -> field_type:
48        return getattr(self, f"_{field_name}")
dataset_availability: Optional[DatasetAvailabilityType]
47    def get_field(self) -> field_type:
48        return getattr(self, f"_{field_name}")