metadata: define a clear DependencyMetadata interface

This CL adds a typed interface that exposes parsed metadata for
downstream consumption.

Conventionally:
- A validated field should be retrieved by the property of the same name
- A validated field returns "None" if said field is not provided, or is
  clearly invalid (e.g. "Unknown" values)
- Raw values can still be retrieved with get_entries()

When using the properties accessor, fields are normalized and/or coerced to a suitable type (e.g. list of str, str of a particular format).

Bug: b/321154076
Change-Id: Ia56969a838e682a7b7eb1dc0781d48e1e38a2ff0
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/tools/depot_tools/+/5446637
Reviewed-by: Rachael Newitt <renewitt@google.com>
Commit-Queue: Jiewei Qian <qjw@chromium.org>
changes/37/5446637/10
Jiewei Qian 1 year ago committed by LUCI CQ
parent 1a61eb625d
commit b7ed76a09d

@ -6,7 +6,7 @@
from collections import defaultdict from collections import defaultdict
import os import os
import sys import sys
from typing import Dict, List, Set, Tuple from typing import Dict, List, Set, Tuple, Union, Optional, Literal, Any
_THIS_DIR = os.path.abspath(os.path.dirname(__file__)) _THIS_DIR = os.path.abspath(os.path.dirname(__file__))
# The repo's root directory. # The repo's root directory.
@ -24,7 +24,19 @@ import metadata.validation_result as vr
class DependencyMetadata: class DependencyMetadata:
"""The metadata for a single dependency.""" """The metadata for a single dependency.
See @property declarations below to retrieve validated fields for
downstream consumption.
The property returns `None` if the provided value (e.g. in
README.chromium file) is clearly invalid.
Otherwise, it returns a suitably typed value (see comments on each
property).
To retrieve unvalidated (i.e. raw values) fields, use get_entries().
"""
# Fields that are always required. # Fields that are always required.
_MANDATORY_FIELDS = { _MANDATORY_FIELDS = {
@ -171,9 +183,11 @@ class DependencyMetadata:
version_value = self._metadata.get(known_fields.VERSION) version_value = self._metadata.get(known_fields.VERSION)
date_value = self._metadata.get(known_fields.DATE) date_value = self._metadata.get(known_fields.DATE)
revision_value = self._metadata.get(known_fields.REVISION) revision_value = self._metadata.get(known_fields.REVISION)
if ((not version_value or version_util.is_unknown(version_value)) if ((not version_value
and (not date_value or util.is_unknown(date_value)) or version_util.version_is_unknown(version_value)) and
and (not revision_value or util.is_unknown(revision_value))): (not date_value or version_util.version_is_unknown(date_value))
and (not revision_value
or version_util.version_is_unknown(revision_value))):
versioning_fields = [ versioning_fields = [
known_fields.VERSION, known_fields.DATE, known_fields.REVISION known_fields.VERSION, known_fields.DATE, known_fields.REVISION
] ]
@ -199,3 +213,105 @@ class DependencyMetadata:
results.append(result) results.append(result)
return results return results
def _return_as_property(self, field: field_types.MetadataField) -> Any:
"""Helper function to create a property for DependencyMetadata.
The property accessor will validate and return sanitized field value.
"""
assert field in known_fields.ALL_FIELDS
raw_value = self._metadata.get(field, None)
if raw_value is None:
# Field is not set.
return None
return field.narrow_type(raw_value)
@property
def name(self) -> Optional[str]:
return self._return_as_property(known_fields.NAME)
@property
def short_name(self) -> Optional[str]:
return self._return_as_property(known_fields.SHORT_NAME)
@property
def url(self) -> Optional[List[str]]:
"""
Returns a list of URLs that points to upstream repo.
The URLs are guaranteed to `urllib.parse.urlparse` without errors.
Returns None if this repository is the canonical repository of this
dependency (see is_canonical below).
"""
return self._return_as_property(known_fields.URL)
@property
def is_canonical(self) -> bool:
"""
Returns whether this repository is the canonical public repository of this dependency.
This is derived from a special value in the URL field.
"""
value = self._metadata.get(known_fields.URL, "")
return known_fields.URL.repo_is_canonical(value)
@property
def version(self) -> Optional[str]:
return self._return_as_property(known_fields.VERSION)
@property
def date(self) -> Optional[str]:
"""Returns in "YYYY-MM-DD" format."""
return self._return_as_property(known_fields.DATE)
@property
def revision(self) -> Optional[str]:
return self._return_as_property(known_fields.REVISION)
@property
def license(self) -> Optional[List[str]]:
"""Returns a list of license names."""
return self._return_as_property(known_fields.LICENSE)
@property
def license_file(self) -> Optional[List[str]]:
# TODO(b/321154076): Consider excluding files that doesn't exist on
# disk if it's not too hard.
#
# Plumbing src_root and dependency_dir into field validator is
# required.
return self._return_as_property(known_fields.LICENSE_FILE)
@property
def security_critical(self) -> Optional[bool]:
return self._return_as_property(known_fields.SECURITY_CRITICAL)
@property
def shipped(self) -> Optional[bool]:
return self._return_as_property(known_fields.SHIPPED)
@property
def shipped_in_chromium(self) -> Optional[bool]:
return self._return_as_property(known_fields.SHIPPED_IN_CHROMIUM)
@property
def license_android_compatible(self) -> Optional[bool]:
return self._return_as_property(known_fields.LICENSE_ANDROID_COMPATIBLE)
@property
def cpe_prefix(self) -> Optional[str]:
"""Returns a lowercase string (CPE names are case-insensitive)."""
return self._return_as_property(known_fields.CPE_PREFIX)
@property
def description(self) -> Optional[str]:
return self._return_as_property(known_fields.DESCRIPTION)
@property
def local_modifications(self) -> Optional[Union[Literal[False], str]]:
"""Returns `False` if there's no local modifications.
Otherwise the text content extracted from the metadata.
"""
return self._return_as_property(known_fields.LOCAL_MODIFICATIONS)

@ -68,12 +68,15 @@ class CPEPrefixField(field_types.SingleLineTextField):
def __init__(self): def __init__(self):
super().__init__(name="CPEPrefix") super().__init__(name="CPEPrefix")
def _is_valid(self, value: str) -> bool:
return (util.is_unknown(value) or is_formatted_string_cpe(value)
or is_uri_cpe(value))
def validate(self, value: str) -> Optional[vr.ValidationResult]: def validate(self, value: str) -> Optional[vr.ValidationResult]:
"""Checks the given value is either 'unknown', or conforms to """Checks the given value is either 'unknown', or conforms to
either the CPE 2.3 or 2.2 format. either the CPE 2.3 or 2.2 format.
""" """
if (util.is_unknown(value) or is_formatted_string_cpe(value) if self._is_valid(value):
or is_uri_cpe(value)):
return None return None
return vr.ValidationError( return vr.ValidationError(
@ -85,3 +88,13 @@ class CPEPrefixField(field_types.SingleLineTextField):
"https://nvd.nist.gov/products/cpe/search.", "https://nvd.nist.gov/products/cpe/search.",
f"Current value: '{value}'.", f"Current value: '{value}'.",
]) ])
def narrow_type(self, value: str) -> Optional[str]:
if not self._is_valid(value):
return None
# CPE names are case-insensitive, we normalize to lowercase.
# See https://cpe.mitre.org/specification/.
value = value.lower()
return value

@ -6,7 +6,7 @@
import datetime import datetime
import os import os
import sys import sys
from typing import Optional from typing import Optional, Tuple
_THIS_DIR = os.path.abspath(os.path.dirname(__file__)) _THIS_DIR = os.path.abspath(os.path.dirname(__file__))
# The repo's root directory. # The repo's root directory.
@ -59,13 +59,65 @@ _RECOGNIZED_DATE_FORMATS = (
) )
def format_matches(value: str, date_format: str): def parse_with_format(value: str,
"""Returns whether the given value matches the date format.""" date_format: str) -> Optional[datetime.datetime]:
"""Returns datetime object if `value` can be parsed with `date_format`"""
try: try:
datetime.datetime.strptime(value, date_format) return datetime.datetime.strptime(value, date_format)
except ValueError: except ValueError:
return False return None
return True
def to_preferred_format(dt: datetime.datetime) -> str:
return datetime.datetime.strftime(dt, _PREFERRED_PREFIX_FORMAT)
def parse_date(value: str) -> Optional[Tuple[str, bool]]:
"""Try to parse value into a YYYY-MM-DD date.
If successful: returns (str, int).
- The str is guaranteed to be in YYYY-MM-DD format.
- The bool indicates whether `value` is ambiguous.
For example, "2020/03/05" matches both "YYYY/MM/DD" and "YYYY/DD/MM".
"""
matches = []
value = value.strip()
if not value:
return None
first_part = value.split()[0]
# Try to match preferred prefix.
if dt := parse_with_format(first_part, _PREFERRED_PREFIX_FORMAT):
matches.append(dt)
if not matches:
# Try alternative prefix formats.
for date_format in _RECOGNIZED_PREFIX_FORMATS:
if dt := parse_with_format(first_part, date_format):
matches.append(dt)
if not matches:
# Try matching the complete string.
for date_format in _RECOGNIZED_DATE_FORMATS:
if dt := parse_with_format(value, date_format):
matches.append(dt)
if not matches:
# Try ISO 8601.
try:
dt = datetime.datetime.fromisoformat(value)
matches.append(dt)
except ValueError:
pass
if not matches:
return None
# Determine if the value is parsed without ambiguity.
is_ambiguous = len(set(map(to_preferred_format, matches))) > 1
return to_preferred_format(matches[0]), is_ambiguous
class DateField(field_types.SingleLineTextField): class DateField(field_types.SingleLineTextField):
@ -81,32 +133,29 @@ class DateField(field_types.SingleLineTextField):
reason=f"{self._name} is empty.", reason=f"{self._name} is empty.",
additional=["Provide date in format YYYY-MM-DD."]) additional=["Provide date in format YYYY-MM-DD."])
# Check if the first part (to ignore timezone info) uses the if not (parsed := parse_date(value)):
# preferred format. return vr.ValidationError(
parts = value.split() reason=f"{self._name} is invalid.",
if format_matches(parts[0], _PREFERRED_PREFIX_FORMAT): additional=["Use YYYY-MM-DD.", f"Current value is '{value}'."])
parsed_date, is_ambiguous = parsed
if is_ambiguous:
return vr.ValidationError(
reason=f"{self._name} is ambiguous.",
additional=["Use YYYY-MM-DD.", f"Current value is '{value}'."])
if not parse_with_format(value, _PREFERRED_PREFIX_FORMAT):
return vr.ValidationWarning(
reason=f"{self._name} isn't using the canonical format.",
additional=["Use YYYY-MM-DD.", f"Current value is '{value}'."])
return None
def narrow_type(self, value: str) -> Optional[str]:
"""Returns ISO 8601 date string, guarantees to be YYYY-MM-DD or None."""
if not (parsed := parse_date(value)):
return None return None
# Check if the first part (to ignore timezone info) uses a # We still return a date even if the parsing result is ambiguous. An
# recognized format. # date that's a few month off is better than nothing at all.
for prefix_format in _RECOGNIZED_PREFIX_FORMATS: return parsed[0]
if format_matches(parts[0], prefix_format):
return vr.ValidationWarning(
reason=f"{self._name} is not in the preferred format.",
additional=[
"Use YYYY-MM-DD.", f"Current value is '{value}'."
])
# Check the entire value for recognized date formats.
for date_format in _RECOGNIZED_DATE_FORMATS:
if format_matches(value, date_format):
return vr.ValidationWarning(
reason=f"{self._name} is not in the preferred format.",
additional=[
"Use YYYY-MM-DD.", f"Current value is '{value}'."
])
# Return an error as the value's format was not recognized.
return vr.ValidationError(
reason=f"{self._name} is invalid.",
additional=["Use YYYY-MM-DD.", f"Current value is '{value}'."])

@ -49,6 +49,11 @@ _PATTERN_LICENSE_ALLOWED = re.compile(
_PATTERN_VERBOSE_DELIMITER = re.compile(r" and | or | / ") _PATTERN_VERBOSE_DELIMITER = re.compile(r" and | or | / ")
# Split on the canonical delimiter, or any of the non-canonical delimiters.
_PATTERN_SPLIT_LICENSE = re.compile("{}|{}".format(
_PATTERN_VERBOSE_DELIMITER.pattern,
field_types.MetadataField.VALUE_DELIMITER))
def process_license_value(value: str, def process_license_value(value: str,
atomic_delimiter: str) -> List[Tuple[str, bool]]: atomic_delimiter: str) -> List[Tuple[str, bool]]:
@ -134,3 +139,11 @@ class LicenseField(field_types.SingleLineTextField):
reason=f"Separate licenses using a '{self.VALUE_DELIMITER}'.") reason=f"Separate licenses using a '{self.VALUE_DELIMITER}'.")
return None return None
def narrow_type(self, value: str) -> Optional[List[str]]:
if not value:
# Empty License field is equivalent to "not declared".
return None
parts = _PATTERN_SPLIT_LICENSE.split(value)
return list(filter(bool, map(lambda str: str.strip(), parts)))

@ -6,6 +6,7 @@
import os import os
import re import re
import sys import sys
from typing import Optional, Union, Literal
_THIS_DIR = os.path.abspath(os.path.dirname(__file__)) _THIS_DIR = os.path.abspath(os.path.dirname(__file__))
# The repo's root directory. # The repo's root directory.
@ -39,13 +40,28 @@ class LocalModificationsField(field_types.FreeformTextField):
def __init__(self): def __init__(self):
super().__init__(name="Local Modifications", structured=False) super().__init__(name="Local Modifications", structured=False)
def should_terminate_field(self, field_value) -> bool: def _is_no_modification(self, value) -> bool:
field_value = field_value.strip() for pattern in _PATTERNS_NOT_MODIFIED:
if pattern.match(value):
return True
return False
def should_terminate_field(self, value) -> bool:
value = value.strip()
# If we can reasonably infer the field value means "No modification", # If we can reasonably infer the field value means "No modification",
# terminate this field to avoid over extraction. # terminate this field to avoid over extraction.
for pattern in _PATTERNS_NOT_MODIFIED: if self._is_no_modification(value):
if pattern.match(field_value): return True
return True
return False return False
def narrow_type(self, value) -> Optional[Union[Literal[False], str]]:
if not value:
return False
if self._is_no_modification(value):
return False
return value

@ -0,0 +1,41 @@
#!/usr/bin/env python3
# Copyright 2024 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
import os
import re
import sys
from typing import Optional
_THIS_DIR = os.path.abspath(os.path.dirname(__file__))
# The repo's root directory.
_ROOT_DIR = os.path.abspath(os.path.join(_THIS_DIR, "..", "..", ".."))
# Add the repo's root directory for clearer imports.
sys.path.insert(0, _ROOT_DIR)
import metadata.fields.field_types as field_types
import metadata.fields.custom.version as version_field
import metadata.fields.util as util
import metadata.validation_result as vr
class RevisionField(field_types.SingleLineTextField):
"""Custom field for the revision."""
def __init__(self):
super().__init__(name="Revision")
def narrow_type(self, value: str) -> Optional[str]:
value = super().narrow_type(value)
if not value:
return None
if version_field.version_is_unknown(value):
return None
if util.is_known_invalid_value(value):
return None
return value

@ -6,7 +6,9 @@
import os import os
import re import re
import sys import sys
from typing import Optional from typing import Optional, List
from urllib.parse import urlparse, urlunparse
from itertools import filterfalse
_THIS_DIR = os.path.abspath(os.path.dirname(__file__)) _THIS_DIR = os.path.abspath(os.path.dirname(__file__))
# The repo's root directory. # The repo's root directory.
@ -19,29 +21,73 @@ import metadata.fields.field_types as field_types
import metadata.fields.util as util import metadata.fields.util as util
import metadata.validation_result as vr import metadata.validation_result as vr
_PATTERN_URL_ALLOWED = re.compile(r"^(https?|ftp|git):\/\/\S+$")
_PATTERN_URL_CANONICAL_REPO = re.compile( _PATTERN_URL_CANONICAL_REPO = re.compile(
r"^This is the canonical (public )?repo(sitory)?\.?$", re.IGNORECASE) r"^This is the canonical (public )?repo(sitory)?\.?$", re.IGNORECASE)
_SUPPORTED_SCHEMES = {
'http',
'https',
'git',
'ftp',
}
# URLs can't contain whitespaces. Treat them as delimiters so we can handle cases where URL field contains one URL per line (without comma delimiter).
_PATTERN_URL_DELIMITER = re.compile("{}|{}".format(
r'\s+', field_types.MetadataField.VALUE_DELIMITER))
def _split_urls(value: str) -> List[str]:
"""Split url field value into individual URLs."""
urls = _PATTERN_URL_DELIMITER.split(value)
return list(filter(lambda x: len(x) > 0, map(str.strip, urls)))
def _url_canonicalize(url: str) -> str:
"""Return the canonicalized URL (e.g. make scheme lower case)."""
return urlunparse(urlparse(url))
def _url_is_canonical(url: str) -> bool:
return url == _url_canonicalize(url)
def _url_is_valid(url: str) -> bool:
"""Checks whether the given `url` is acceptable:
* url is can be parsed without an error.
* url uses a supported scheme / protocol.
"""
try:
u = urlparse(url)
except:
return False
if u.scheme not in _SUPPORTED_SCHEMES:
return False
return True
class URLField(field_types.MetadataField): class URLField(field_types.MetadataField):
"""Custom field for the package URL(s).""" """Custom field for the package URL(s)."""
def __init__(self): def __init__(self):
super().__init__(name="URL") super().__init__(name="URL")
def repo_is_canonical(self, value: str):
"""Returns if `raw_value` indicates this repository is the canonical repository."""
return util.matches(_PATTERN_URL_CANONICAL_REPO, value.strip())
def validate(self, value: str) -> Optional[vr.ValidationResult]: def validate(self, value: str) -> Optional[vr.ValidationResult]:
"""Checks the given value has acceptable URL values only. """Checks the given value has acceptable URL values only.
Note: this field supports multiple values. Note: this field supports multiple values.
""" """
if util.matches(_PATTERN_URL_CANONICAL_REPO, value): if self.repo_is_canonical(value):
return None return None
invalid_values = [] urls = _split_urls(value)
for url in value.split(self.VALUE_DELIMITER): if not urls:
url = url.strip() return vr.ValidationError(reason=f"{self._name} must be provided.")
if not util.matches(_PATTERN_URL_ALLOWED, url):
invalid_values.append(url) invalid_values = list(filterfalse(_url_is_valid, urls))
if invalid_values: if invalid_values:
return vr.ValidationError( return vr.ValidationError(
@ -53,4 +99,26 @@ class URLField(field_types.MetadataField):
f"Invalid values: {util.quoted(invalid_values)}.", f"Invalid values: {util.quoted(invalid_values)}.",
]) ])
non_canon_values = list(filterfalse(_url_is_canonical, urls))
if non_canon_values:
canon_values = list(map(_url_canonicalize, non_canon_values))
return vr.ValidationWarning(
reason=f"{self._name} is contains non-canonical URLs.",
additional=[
"URLs should be canonical and well-formed."
f"Non canonical values: {util.quoted(non_canon_values)}.",
f"Canonicalized URLs should be: {util.quoted(canon_values)}."
])
return None return None
def narrow_type(self, value) -> Optional[List[str]]:
if not value:
return None
if self.repo_is_canonical(value):
return None
# Filter out invalid URLs, and canonicalize the URLs.
return list(
map(_url_canonicalize, filter(_url_is_valid, _split_urls(value))))

@ -19,17 +19,16 @@ import metadata.fields.field_types as field_types
import metadata.fields.util as util import metadata.fields.util as util
import metadata.validation_result as vr import metadata.validation_result as vr
_PATTERN_NOT_APPLICABLE = re.compile(r"^N ?\/ ?A$", re.IGNORECASE)
def version_is_unknown(value: str) -> bool:
def is_unknown(value: str) -> bool:
"""Returns whether the value denotes the version being unknown.""" """Returns whether the value denotes the version being unknown."""
return (value == "0" or util.matches(_PATTERN_NOT_APPLICABLE, value) return (value == "0" or util.is_not_applicable(value)
or util.is_unknown(value)) or util.is_unknown(value))
class VersionField(field_types.SingleLineTextField): class VersionField(field_types.SingleLineTextField):
"""Custom field for the package version.""" """Custom field for the package version."""
def __init__(self): def __init__(self):
super().__init__(name="Version") super().__init__(name="Version")
@ -55,3 +54,16 @@ class VersionField(field_types.SingleLineTextField):
]) ])
return None return None
def narrow_type(self, value: str) -> Optional[str]:
value = super().narrow_type(value)
if not value:
return None
if version_is_unknown(value):
return None
if util.is_known_invalid_value(value):
return None
return value

@ -7,6 +7,7 @@ import os
import re import re
import sys import sys
from typing import Optional from typing import Optional
from enum import Enum
_THIS_DIR = os.path.abspath(os.path.dirname(__file__)) _THIS_DIR = os.path.abspath(os.path.dirname(__file__))
# The repo's root directory. # The repo's root directory.
@ -26,7 +27,6 @@ _PATTERN_YES_OR_NO = re.compile(r"^(yes|no)$", re.IGNORECASE)
# case-insensitive. e.g. "No (test only)", "Yes?" # case-insensitive. e.g. "No (test only)", "Yes?"
_PATTERN_STARTS_WITH_YES_OR_NO = re.compile(r"^(yes|no)", re.IGNORECASE) _PATTERN_STARTS_WITH_YES_OR_NO = re.compile(r"^(yes|no)", re.IGNORECASE)
class MetadataField: class MetadataField:
"""Base class for all metadata fields.""" """Base class for all metadata fields."""
@ -73,6 +73,15 @@ class MetadataField:
""" """
raise NotImplementedError(f"{self._name} field validation not defined.") raise NotImplementedError(f"{self._name} field validation not defined.")
def narrow_type(self, value):
"""Returns a narrowly typed (e.g. bool) value for this field for
downstream consumption.
The alternative being the downstream parses the string again.
"""
raise NotImplementedError(
f"{self._name} field value coersion not defined.")
class FreeformTextField(MetadataField): class FreeformTextField(MetadataField):
"""Field where the value is freeform text.""" """Field where the value is freeform text."""
@ -86,6 +95,9 @@ class FreeformTextField(MetadataField):
return None return None
def narrow_type(self, value):
assert value is not None
return value
class SingleLineTextField(FreeformTextField): class SingleLineTextField(FreeformTextField):
"""Field where the field as a whole is a single line of text.""" """Field where the field as a whole is a single line of text."""
@ -126,3 +138,6 @@ class YesNoField(SingleLineTextField):
f"This field must be {util.YES} or {util.NO}.", f"This field must be {util.YES} or {util.NO}.",
f"Current value is '{value}'.", f"Current value is '{value}'.",
]) ])
def narrow_type(self, value) -> Optional[bool]:
return util.infer_as_boolean(super().narrow_type(value))

@ -21,12 +21,12 @@ import metadata.fields.custom.license_file
import metadata.fields.custom.local_modifications import metadata.fields.custom.local_modifications
import metadata.fields.custom.url import metadata.fields.custom.url
import metadata.fields.custom.version import metadata.fields.custom.version
import metadata.fields.custom.revision
import metadata.fields.field_types as field_types import metadata.fields.field_types as field_types
# Freeform text fields. # Freeform text fields.
NAME = field_types.SingleLineTextField("Name") NAME = field_types.SingleLineTextField("Name")
SHORT_NAME = field_types.SingleLineTextField("Short Name") SHORT_NAME = field_types.SingleLineTextField("Short Name")
REVISION = field_types.SingleLineTextField("Revision")
DESCRIPTION = field_types.FreeformTextField("Description", structured=False) DESCRIPTION = field_types.FreeformTextField("Description", structured=False)
# Yes/no fields. # Yes/no fields.
@ -43,6 +43,7 @@ LICENSE = metadata.fields.custom.license.LicenseField()
LICENSE_FILE = metadata.fields.custom.license_file.LicenseFileField() LICENSE_FILE = metadata.fields.custom.license_file.LicenseFileField()
URL = metadata.fields.custom.url.URLField() URL = metadata.fields.custom.url.URLField()
VERSION = metadata.fields.custom.version.VersionField() VERSION = metadata.fields.custom.version.VersionField()
REVISION = metadata.fields.custom.revision.RevisionField()
LOCAL_MODIFICATIONS = metadata.fields.custom.local_modifications.LocalModificationsField( LOCAL_MODIFICATIONS = metadata.fields.custom.local_modifications.LocalModificationsField(
) )

@ -26,6 +26,22 @@ _PATTERN_STARTS_WITH_YES = re.compile(r"^yes", re.IGNORECASE)
# case-insensitive. # case-insensitive.
_PATTERN_STARTS_WITH_NO = re.compile(r"^no", re.IGNORECASE) _PATTERN_STARTS_WITH_NO = re.compile(r"^no", re.IGNORECASE)
# Variants of N/A (Not Applicable).
_PATTERN_NOT_APPLICABLE = re.compile(r"^(N ?\/ ?A)\.?|na\.?|not applicable\.?$",
re.IGNORECASE)
# A collection of values that provides little information.
# Use lower-case for easier comparison.
_KNOWN_INVALID_VALUES = {
"0",
"varies",
"-",
"unknown",
"head",
"see deps",
"deps",
}
def matches(pattern: re.Pattern, value: str) -> bool: def matches(pattern: re.Pattern, value: str) -> bool:
"""Returns whether the value matches the pattern.""" """Returns whether the value matches the pattern."""
@ -61,3 +77,20 @@ def infer_as_boolean(value: str, default: bool = True) -> bool:
return False return False
else: else:
return default return default
def is_known_invalid_value(value: str):
"""Returns whether `value` is among the known bad values that provides
little machine readable information.
"""
if not value:
return False
if value.lower() in _KNOWN_INVALID_VALUES:
return True
return False
def is_not_applicable(value: str) -> bool:
return matches(_PATTERN_NOT_APPLICABLE, value)

@ -95,17 +95,18 @@ class FieldValidationTest(unittest.TestCase):
def test_date_validation(self): def test_date_validation(self):
self._run_field_validation( self._run_field_validation(
field=known_fields.DATE, field=known_fields.DATE,
valid_values=[ valid_values=["2012-03-04"],
"2012-03-04", "2012-03-04 UTC", "2012-03-04 UTC+10:00"
],
error_values=[ error_values=[
"", "",
"\n", "\n",
"N/A", "N/A",
"03-04-12", # Ambiguous month and day.
"04/03/2012", # Ambiguous month and day.
], ],
warning_values=[ warning_values=[
"2012-03-04 UTC", "2012-03-04 UTC+10:00",
"2012/03/04 UTC+10:00", "20120304", "April 3, 2012", "2012/03/04 UTC+10:00", "20120304", "April 3, 2012",
"3 Apr 2012", "03-04-12", "04/03/2012", "3 Apr 2012", "30/12/2000", "20-03-2020",
"Tue Apr 3 05:06:07 2012 +0800" "Tue Apr 3 05:06:07 2012 +0800"
], ],
) )
@ -181,14 +182,18 @@ class FieldValidationTest(unittest.TestCase):
"https://www.example.com/a", "https://www.example.com/a",
"http://www.example.com/b", "http://www.example.com/b",
"ftp://www.example.com/c,git://www.example.com/d", "ftp://www.example.com/c,git://www.example.com/d",
"https://www.example.com/a\n https://example.com/b",
"This is the canonical public repository", "This is the canonical public repository",
], ],
warning_values=[
# Scheme is case-insensitive, but should be lower case.
"Https://www.example.com/g",
],
error_values=[ error_values=[
"", "",
"\n", "\n",
"ghttps://www.example.com/e", "ghttps://www.example.com/e",
"https://www.example.com/ f", "https://www.example.com/ f",
"Https://www.example.com/g",
"This is an unrecognized message for the URL", "This is an unrecognized message for the URL",
], ],
) )

@ -0,0 +1,187 @@
#!/usr/bin/env python3
# Copyright 2024 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
import os
import sys
import unittest
from typing import Any, Callable
_THIS_DIR = os.path.abspath(os.path.dirname(__file__))
# The repo's root directory.
_ROOT_DIR = os.path.abspath(os.path.join(_THIS_DIR, "..", ".."))
# Add the repo's root directory for clearer imports.
sys.path.insert(0, _ROOT_DIR)
from metadata.fields.field_types import MetadataField
import metadata.fields.known as fields
from metadata.dependency_metadata import DependencyMetadata
class FieldValidationTest(unittest.TestCase):
"""Tests narrow_type() on fields we validate and extract structural data."""
def _test_on_field(self, field: MetadataField) -> Callable:
def expect(value: str, expected_value: Any, reason: str):
output = field.narrow_type(value)
self.assertEqual(
output, expected_value,
f'Field "{field.get_name()}" should {reason}. Input value'
f' was: "{value}", but got coerced into {repr(output)}')
return expect
def test_name(self):
expect = self._test_on_field(fields.NAME)
expect("package name", "package name", "return as-is")
expect("", "", "not coerce empty string to `None`")
def test_short_name(self):
expect = self._test_on_field(fields.SHORT_NAME)
expect("pkg-name", "pkg-name", "return as-is")
expect("", "", "not coerce empty string to `None`")
def test_url(self):
expect = self._test_on_field(fields.URL)
expect("", None, "treat empty string as None")
expect("https://example.com/", ["https://example.com/"],
"return valid url")
expect("https://example.com/,\nhttps://example2.com/",
["https://example.com/", "https://example2.com/"],
"return multiple valid urls")
expect("file://test", [], "reject unsupported scheme")
expect("file://test,\nhttps://example.com", ["https://example.com"],
"reject unsupported scheme")
expect("HTTPS://example.com", ["https://example.com"],
"canonicalize url")
expect("http", [], "reject invalid url")
expect("This is the canonical repo.", None,
"understand the this repo is canonical message")
def test_version(self):
expect = self._test_on_field(fields.VERSION)
expect("", None, "treat empty string as None")
expect("0", None, "treat invalid value as None")
expect("varies", None, "treat invalid value as None")
expect("see deps", None, "treat invalid value as None")
expect("N/A", None, "N/A is treated as None")
expect("Not applicable.", None, "N/A is treated as None")
def test_date(self):
expect = self._test_on_field(fields.DATE)
expect("", None, "treat empty string as None")
expect("0", None, "treat invalid value as None")
expect("varies", None, "treat invalid value as None")
expect("2024-01-02", "2024-01-02", "accepts ISO 8601 date")
expect("2024-01-02T03:04:05Z", "2024-01-02",
"accepts ISO 8601 date time")
expect("Jan 2 2024", "2024-01-02", "accepts locale format")
expect(
"02/03/2000", "2000-03-02",
"accepts ambiguous MM/DD format (better than no date info at all)")
expect("11/30/2000", "2000-11-30", "accepts unambiguous MM/DD format")
def test_revision(self):
expect = self._test_on_field(fields.REVISION)
expect("", None, "treat empty string as None")
expect("0", None, "treat invalid value as None")
expect("varies", None, "treat invalid value as None")
expect("see deps", None, "treat invalid value as None")
expect("N/A", None, "N/A is treated as None")
expect("Not applicable.", None, "N/A is treated as None")
def test_license(self):
expect = self._test_on_field(fields.LICENSE)
expect("", None, "treat empty string as None")
expect("LICENSE-1", ["LICENSE-1"], "return as a list")
expect("LGPL v2 and BSD", ["LGPL v2", "BSD"], "return as a list")
def test_license_file(self):
# TODO(b/321154076): Consider excluding files that doesn't exist on
# disk if it's not too hard.
#
# Right now, we return the unparsed license file field as-is.
expect = self._test_on_field(fields.LICENSE_FILE)
expect("src/file", "src/file", "return value as-is")
def test_security_critical(self):
expect = self._test_on_field(fields.SECURITY_CRITICAL)
expect("yes", True, "understand truthy value")
expect("Yes", True, "understand truthy value")
expect("no", False, "understand falsey value")
expect("No, because", False,
"understand falsey value, with description")
def test_shipped(self):
expect = self._test_on_field(fields.SHIPPED)
expect("yes", True, "understand truthy value")
expect("Yes, but", True, "understand truthy value with extra comment")
expect("no", False, "understand falsey value")
expect("no, because", False,
"understand falsey value, with extra comment")
def test_shipped_in_chromium(self):
expect = self._test_on_field(fields.SHIPPED_IN_CHROMIUM)
expect("yes", True, "understand truthy value")
expect("Yes", True, "understand truthy value")
expect("no", False, "understand falsey value")
expect("no, because", False,
"understand falsey value, with extra comment")
def test_license_android_compatible(self):
expect = self._test_on_field(fields.LICENSE_ANDROID_COMPATIBLE)
expect("yes", True, "understand truthy value")
expect("Yes", True, "understand truthy value")
expect("no", False, "understand falsey value")
expect("no, because", False,
"understand falsey value, with extra comment")
def test_cpe_prefix(self):
expect = self._test_on_field(fields.CPE_PREFIX)
expect("unknown", "unknown", "understand unknown")
expect("bad_cpe_format", None, "rejects invalid value")
expect("cpe:/a:d3", "cpe:/a:d3", "accept a valid cpe prefix")
expect("cpe:/a:D3", "cpe:/a:d3", "normalize to lowercase")
def test_description(self):
expect = self._test_on_field(fields.DESCRIPTION)
expect("desc", "desc", "return value as-is")
def test_local_modification(self):
expect = self._test_on_field(fields.LOCAL_MODIFICATIONS)
expect("none", False, "understands none")
expect("(none)", False, "understands none")
expect("not applicable", False, "understands N/A")
expect("", False, "treat empty string as False")
expect("modified X file", "modified X file",
"return value as-is if it doesn't mean no modification")
def test_dependency_data_return_as_property(self):
dm = DependencyMetadata()
dm.add_entry("name", "package")
dm.add_entry("url", "git://git@example.com,\nbad_url://example.com")
dm.add_entry("security critical", "no")
dm.add_entry("date", "2024-01-02")
dm.add_entry("revision", "")
self.assertEqual(dm.name, "package")
self.assertEqual(dm.url, ["git://git@example.com"])
self.assertEqual(dm.security_critical, False)
self.assertEqual(dm.date, "2024-01-02")
self.assertEqual(dm.revision, None)
self.assertEqual(dm.version, None)
def test_dependency_data_repo_is_canonical(self):
dm = DependencyMetadata()
dm.add_entry("name", "package")
dm.add_entry("url", "This is the canonical repo.")
self.assertEqual(dm.url, None)
self.assertEqual(dm.is_canonical, True)
if __name__ == "__main__":
unittest.main()
Loading…
Cancel
Save