spdx_tools.spdx.validation.uri_validators

 1# SPDX-FileCopyrightText: 2022 spdx contributors
 2#
 3# SPDX-License-Identifier: Apache-2.0
 4
 5import re
 6
 7from beartype.typing import List
 8from uritools import isabsuri, urisplit
 9
10url_pattern = (
11    "(http:\\/\\/www\\.|https:\\/\\/www\\.|http:\\/\\/|https:\\/\\/|ssh:\\/\\/|git:\\/\\/|svn:\\/\\/|sftp:"
12    "\\/\\/|ftp:\\/\\/)?([\\w\\-.!~*'()%;:&=+$,]+@)?[a-z0-9]+([\\-\\.]{1}[a-z0-9]+){0,100}\\.[a-z]{2,5}"
13    "(:[0-9]{1,5})?(\\/.*)?"
14)
15url_pattern_ignore_case = re.compile(url_pattern, re.IGNORECASE)
16
17supported_download_repos: str = "(git|hg|svn|bzr)"
18git_pattern = "(git\\+git@[a-zA-Z0-9\\.\\-]+:[a-zA-Z0-9/\\\\.@\\-]+)"
19bazaar_pattern = "(bzr\\+lp:[a-zA-Z0-9\\.\\-]+)"
20download_location_pattern = (
21    "^(((" + supported_download_repos + "\\+)?" + url_pattern + ")|" + git_pattern + "|" + bazaar_pattern + ")$"
22)
23compiled_pattern = re.compile(download_location_pattern, re.IGNORECASE)
24
25
26def validate_url(url: str) -> List[str]:
27    if not url_pattern_ignore_case.match(url):
28        return [f"must be a valid URL, but is: {url}"]
29
30    return []
31
32
33def validate_download_location(location: str) -> List[str]:
34    if not (validate_url(location) == [] or compiled_pattern.match(location)):
35        return [f"must be a valid URL or download location according to the specification, but is: {location}"]
36
37    return []
38
39
40def validate_uri(uri: str) -> List[str]:
41    if not isabsuri(uri):
42        return [f"must be a valid URI specified in RFC-3986 and must contain no fragment (#), but is: {uri}"]
43    else:
44        split = urisplit(uri)
45        if split.scheme is None:
46            return [f"must have a URI scheme, but is: {uri}"]
47
48    return []
url_pattern = "(http:\\/\\/www\\.|https:\\/\\/www\\.|http:\\/\\/|https:\\/\\/|ssh:\\/\\/|git:\\/\\/|svn:\\/\\/|sftp:\\/\\/|ftp:\\/\\/)?([\\w\\-.!~*'()%;:&=+$,]+@)?[a-z0-9]+([\\-\\.]{1}[a-z0-9]+){0,100}\\.[a-z]{2,5}(:[0-9]{1,5})?(\\/.*)?"
url_pattern_ignore_case = re.compile("(http:\\/\\/www\\.|https:\\/\\/www\\.|http:\\/\\/|https:\\/\\/|ssh:\\/\\/|git:\\/\\/|svn:\\/\\/|sftp:\\/\\/|ftp:\\/\\/)?([\\w\\-.!~*'()%;:&=+$,]+@)?[a-z0-9]+([\\-\\.]{1}[a-z0-9]+){0,100}\\.[a-z]{2,5}, re.IGNORECASE)
supported_download_repos: str = '(git|hg|svn|bzr)'
git_pattern = '(git\\+git@[a-zA-Z0-9\\.\\-]+:[a-zA-Z0-9/\\\\.@\\-]+)'
bazaar_pattern = '(bzr\\+lp:[a-zA-Z0-9\\.\\-]+)'
download_location_pattern = "^((((git|hg|svn|bzr)\\+)?(http:\\/\\/www\\.|https:\\/\\/www\\.|http:\\/\\/|https:\\/\\/|ssh:\\/\\/|git:\\/\\/|svn:\\/\\/|sftp:\\/\\/|ftp:\\/\\/)?([\\w\\-.!~*'()%;:&=+$,]+@)?[a-z0-9]+([\\-\\.]{1}[a-z0-9]+){0,100}\\.[a-z]{2,5}(:[0-9]{1,5})?(\\/.*)?)|(git\\+git@[a-zA-Z0-9\\.\\-]+:[a-zA-Z0-9/\\\\.@\\-]+)|(bzr\\+lp:[a-zA-Z0-9\\.\\-]+))$"
compiled_pattern = re.compile("^((((git|hg|svn|bzr)\\+)?(http:\\/\\/www\\.|https:\\/\\/www\\.|http:\\/\\/|https:\\/\\/|ssh:\\/\\/|git:\\/\\/|svn:\\/\\/|sftp:\\/\\/|ftp:\\/\\/)?([\\w\\-.!~*'()%;:&=+$,]+@)?[a-z0-9]+([\\-\\.]{1}[a-z0, re.IGNORECASE)
def validate_url(url: str) -> list[str]:
27def validate_url(url: str) -> List[str]:
28    if not url_pattern_ignore_case.match(url):
29        return [f"must be a valid URL, but is: {url}"]
30
31    return []
def validate_download_location(location: str) -> list[str]:
34def validate_download_location(location: str) -> List[str]:
35    if not (validate_url(location) == [] or compiled_pattern.match(location)):
36        return [f"must be a valid URL or download location according to the specification, but is: {location}"]
37
38    return []
def validate_uri(uri: str) -> list[str]:
41def validate_uri(uri: str) -> List[str]:
42    if not isabsuri(uri):
43        return [f"must be a valid URI specified in RFC-3986 and must contain no fragment (#), but is: {uri}"]
44    else:
45        split = urisplit(uri)
46        if split.scheme is None:
47            return [f"must have a URI scheme, but is: {uri}"]
48
49    return []