spdx_tools.spdx.validation.uri_validators

 1# SPDX-FileCopyrightText: 2022 spdx contributors
 2#
 3# SPDX-License-Identifier: Apache-2.0
 4
 5import re
 6
 7from beartype.typing import List
 8from uritools import isabsuri, urisplit
 9
10url_pattern = (
11    "(http:\\/\\/www\\.|https:\\/\\/www\\.|http:\\/\\/|https:\\/\\/|ssh:\\/\\/|git:\\/\\/|svn:\\/\\/|sftp:"
12    "\\/\\/|ftp:\\/\\/)?([\\w\\-.!~*'()%;:&=+$,]+@)?[a-z0-9]+([\\-\\.]{1}[a-z0-9]+){0,100}\\.[a-z]{2,5}"
13    "(:[0-9]{1,5})?(\\/.*)?"
14)
15supported_download_repos: str = "(git|hg|svn|bzr)"
16git_pattern = "(git\\+git@[a-zA-Z0-9\\.\\-]+:[a-zA-Z0-9/\\\\.@\\-]+)"
17bazaar_pattern = "(bzr\\+lp:[a-zA-Z0-9\\.\\-]+)"
18download_location_pattern = (
19    "^(((" + supported_download_repos + "\\+)?" + url_pattern + ")|" + git_pattern + "|" + bazaar_pattern + ")$"
20)
21
22
23def validate_url(url: str) -> List[str]:
24    if not re.match(url_pattern, url):
25        return [f"must be a valid URL, but is: {url}"]
26
27    return []
28
29
30def validate_download_location(location: str) -> List[str]:
31    if not (validate_url(location) == [] or re.match(download_location_pattern, location)):
32        return [f"must be a valid URL or download location according to the specification, but is: {location}"]
33
34    return []
35
36
37def validate_uri(uri: str) -> List[str]:
38    if not isabsuri(uri):
39        return [f"must be a valid URI specified in RFC-3986 and must contain no fragment (#), but is: {uri}"]
40    else:
41        split = urisplit(uri)
42        if split.scheme is None:
43            return [f"must have a URI scheme, but is: {uri}"]
44
45    return []
url_pattern = "(http:\\/\\/www\\.|https:\\/\\/www\\.|http:\\/\\/|https:\\/\\/|ssh:\\/\\/|git:\\/\\/|svn:\\/\\/|sftp:\\/\\/|ftp:\\/\\/)?([\\w\\-.!~*'()%;:&=+$,]+@)?[a-z0-9]+([\\-\\.]{1}[a-z0-9]+){0,100}\\.[a-z]{2,5}(:[0-9]{1,5})?(\\/.*)?"
supported_download_repos: str = '(git|hg|svn|bzr)'
git_pattern = '(git\\+git@[a-zA-Z0-9\\.\\-]+:[a-zA-Z0-9/\\\\.@\\-]+)'
bazaar_pattern = '(bzr\\+lp:[a-zA-Z0-9\\.\\-]+)'
download_location_pattern = "^((((git|hg|svn|bzr)\\+)?(http:\\/\\/www\\.|https:\\/\\/www\\.|http:\\/\\/|https:\\/\\/|ssh:\\/\\/|git:\\/\\/|svn:\\/\\/|sftp:\\/\\/|ftp:\\/\\/)?([\\w\\-.!~*'()%;:&=+$,]+@)?[a-z0-9]+([\\-\\.]{1}[a-z0-9]+){0,100}\\.[a-z]{2,5}(:[0-9]{1,5})?(\\/.*)?)|(git\\+git@[a-zA-Z0-9\\.\\-]+:[a-zA-Z0-9/\\\\.@\\-]+)|(bzr\\+lp:[a-zA-Z0-9\\.\\-]+))$"
def validate_url(url: str) -> list[str]:
24def validate_url(url: str) -> List[str]:
25    if not re.match(url_pattern, url):
26        return [f"must be a valid URL, but is: {url}"]
27
28    return []
def validate_download_location(location: str) -> list[str]:
31def validate_download_location(location: str) -> List[str]:
32    if not (validate_url(location) == [] or re.match(download_location_pattern, location)):
33        return [f"must be a valid URL or download location according to the specification, but is: {location}"]
34
35    return []
def validate_uri(uri: str) -> list[str]:
38def validate_uri(uri: str) -> List[str]:
39    if not isabsuri(uri):
40        return [f"must be a valid URI specified in RFC-3986 and must contain no fragment (#), but is: {uri}"]
41    else:
42        split = urisplit(uri)
43        if split.scheme is None:
44            return [f"must have a URI scheme, but is: {uri}"]
45
46    return []