From a1cfc693af3505461fea2a59eaf484720c897c4f Mon Sep 17 00:00:00 2001
From: Anne Redulla <aredulla@google.com>
Date: Wed, 23 Aug 2023 00:13:23 +0000
Subject: [PATCH] [ssci] Added CheckChromiumMetadataFiles in
 presubmit_canned_checks

Bug: b:277147404
Change-Id: I14a2f11b256bc85fdfe225443ef533c38463ca3e
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/tools/depot_tools/+/4796694
Reviewed-by: Gavin Mak <gavinmak@google.com>
Reviewed-by: Rachael Newitt <renewitt@google.com>
Commit-Queue: Anne Redulla <aredulla@google.com>
---
 metadata/README.md                            |  4 +
 metadata/dependency_metadata.py               |  4 +-
 ...est => README.chromium.test.multi-invalid} |  6 +-
 .../data/README.chromium.test.multi-valid     | 32 ++++++++
 .../data/README.chromium.test.single-valid    | 20 +++++
 metadata/tests/parse_test.py                  | 42 ++++++++--
 metadata/tests/validate_test.py               | 76 ++++++++++++++++++
 metadata/validate.py                          | 80 +++++++++++++++++++
 metadata/validation_result.py                 | 26 ++++--
 presubmit_canned_checks.py                    | 30 ++++++-
 10 files changed, 302 insertions(+), 18 deletions(-)
 create mode 100644 metadata/README.md
 rename metadata/tests/data/{README.chromium.test => README.chromium.test.multi-invalid} (82%)
 create mode 100644 metadata/tests/data/README.chromium.test.multi-valid
 create mode 100644 metadata/tests/data/README.chromium.test.single-valid
 create mode 100644 metadata/tests/validate_test.py
 create mode 100644 metadata/validate.py

diff --git a/metadata/README.md b/metadata/README.md
new file mode 100644
index 000000000..94accc471
--- /dev/null
+++ b/metadata/README.md
@@ -0,0 +1,4 @@
+# Validation for Chromium's Third Party Metadata Files
+
+This directory contains the code to validate Chromium's third party metadata
+files, i.e. `README.chromium` files.
diff --git a/metadata/dependency_metadata.py b/metadata/dependency_metadata.py
index 0f2a251d1..08d5b5536 100644
--- a/metadata/dependency_metadata.py
+++ b/metadata/dependency_metadata.py
@@ -119,7 +119,7 @@ class DependencyMetadata:
     if repeated_field_info:
       repeated = ", ".join(repeated_field_info)
       error = vr.ValidationError(
-          f"Multiple entries for the same field: {repeated}")
+          f"Multiple entries for the same field: {repeated}.")
       error.set_tag(tag="reason", value="repeated field")
       results.append(error)
 
@@ -128,7 +128,7 @@ class DependencyMetadata:
     for field in required_fields:
       if field not in self._metadata:
         field_name = field.get_name()
-        error = vr.ValidationError(f"Required field '{field_name}' is missing")
+        error = vr.ValidationError(f"Required field '{field_name}' is missing.")
         error.set_tag(tag="reason", value="missing required field")
         results.append(error)
 
diff --git a/metadata/tests/data/README.chromium.test b/metadata/tests/data/README.chromium.test.multi-invalid
similarity index 82%
rename from metadata/tests/data/README.chromium.test
rename to metadata/tests/data/README.chromium.test.multi-invalid
index 84bf001b6..5264e3119 100644
--- a/metadata/tests/data/README.chromium.test
+++ b/metadata/tests/data/README.chromium.test.multi-invalid
@@ -1,4 +1,4 @@
-Name: Test-A README for Chromium metadata
+Name: Test-A README for Chromium metadata (0 errors, 0 warnings)
 Short Name: metadata-test-valid
 URL: https://www.example.com/metadata,
      https://www.example.com/parser
@@ -21,7 +21,7 @@ EXCEPT:
 
 -------------------- DEPENDENCY DIVIDER --------------------
 
-Name: Test-B README for Chromium metadata
+Name: Test-B README for Chromium metadata (4 errors, 1 warning)
 SHORT NAME: metadata-test-invalid
 URL: file://home/drive/chromium/src/metadata
 Version:0
@@ -37,7 +37,7 @@ Local Modifications:     None.
 -------------------- DEPENDENCY DIVIDER --------------------
 -------------------- DEPENDENCY DIVIDER --------------------
 
-Name: Test-C README for Chromium metadata
+Name: Test-C README for Chromium metadata (5 errors, 1 warning)
 URL: https://www.example.com/first
 URL: https://www.example.com/second
 Version: N/A
diff --git a/metadata/tests/data/README.chromium.test.multi-valid b/metadata/tests/data/README.chromium.test.multi-valid
new file mode 100644
index 000000000..50e7dda4d
--- /dev/null
+++ b/metadata/tests/data/README.chromium.test.multi-valid
@@ -0,0 +1,32 @@
+Name: Test-A README for Chromium metadata
+Short Name: metadata-test-valid
+URL: https://www.example.com/metadata,
+     https://www.example.com/parser
+Version: 1.0.12
+Date: 2020-12-03
+License: Apache, 2.0 and MIT
+License File: LICENSE
+Security Critical: yes
+Shipped: yes
+CPEPrefix: unknown
+This line should be ignored because CPEPrefix is a one-liner field.
+Description:
+A test metadata file, with a
+ multi-line description.
+
+Local Modifications:
+None,
+EXCEPT:
+* nothing.
+
+-------------------- DEPENDENCY DIVIDER --------------------
+
+Name: Test-B README for Chromium metadata
+Short Name: metadata-test-valid-again
+URL: https://www.example.com/metadata
+Version: 1.0.12
+Date: 2020-12-03
+License: Apache, 2.0 and MIT
+License File: LICENSE
+Security Critical: yes
+Shipped: yes
diff --git a/metadata/tests/data/README.chromium.test.single-valid b/metadata/tests/data/README.chromium.test.single-valid
new file mode 100644
index 000000000..e53b57651
--- /dev/null
+++ b/metadata/tests/data/README.chromium.test.single-valid
@@ -0,0 +1,20 @@
+Name: Test-A README for Chromium metadata
+Short Name: metadata-test-valid
+URL: https://www.example.com/metadata,
+     https://www.example.com/parser
+Version: 1.0.12
+Date: 2020-12-03
+License: Apache, 2.0 and MIT
+License File: LICENSE
+Security Critical: yes
+Shipped: yes
+CPEPrefix: unknown
+This line should be ignored because CPEPrefix is a one-liner field.
+Description:
+A test metadata file, with a
+ multi-line description.
+
+Local Modifications:
+None,
+EXCEPT:
+* nothing.
diff --git a/metadata/tests/parse_test.py b/metadata/tests/parse_test.py
index 8e8a722f4..23819e154 100644
--- a/metadata/tests/parse_test.py
+++ b/metadata/tests/parse_test.py
@@ -18,8 +18,37 @@ import metadata.parse
 
 
 class ParseTest(unittest.TestCase):
-  def test_parse(self):
-    filepath = os.path.join(_THIS_DIR, "data", "README.chromium.test")
+  def test_parse_single(self):
+    """Check parsing works for a single dependency's metadata."""
+    filepath = os.path.join(_THIS_DIR, "data",
+                            "README.chromium.test.single-valid")
+    all_metadata = metadata.parse.parse_file(filepath)
+
+    self.assertEqual(len(all_metadata), 1)
+    self.assertListEqual(
+        all_metadata[0].get_entries(),
+        [
+            ("Name", "Test-A README for Chromium metadata"),
+            ("Short Name", "metadata-test-valid"),
+            ("URL", "https://www.example.com/metadata,\n"
+             "     https://www.example.com/parser"),
+            ("Version", "1.0.12"),
+            ("Date", "2020-12-03"),
+            ("License", "Apache, 2.0 and MIT"),
+            ("License File", "LICENSE"),
+            ("Security Critical", "yes"),
+            ("Shipped", "yes"),
+            ("CPEPrefix", "unknown"),
+            ("Description", "A test metadata file, with a\n"
+             " multi-line description."),
+            ("Local Modifications", "None,\nEXCEPT:\n* nothing."),
+        ],
+    )
+
+  def test_parse_multiple(self):
+    """Check parsing works for multiple dependencies' metadata."""
+    filepath = os.path.join(_THIS_DIR, "data",
+                            "README.chromium.test.multi-invalid")
     all_metadata = metadata.parse.parse_file(filepath)
 
     # Dependency metadata with no entries at all are ignored.
@@ -29,7 +58,8 @@ class ParseTest(unittest.TestCase):
     self.assertListEqual(
         all_metadata[0].get_entries(),
         [
-            ("Name", "Test-A README for Chromium metadata"),
+            ("Name",
+             "Test-A README for Chromium metadata (0 errors, 0 warnings)"),
             ("Short Name", "metadata-test-valid"),
             ("URL", "https://www.example.com/metadata,\n"
              "     https://www.example.com/parser"),
@@ -51,7 +81,8 @@ class ParseTest(unittest.TestCase):
     self.assertListEqual(
         all_metadata[1].get_entries(),
         [
-            ("Name", "Test-B README for Chromium metadata"),
+            ("Name",
+             "Test-B README for Chromium metadata (4 errors, 1 warning)"),
             ("SHORT NAME", "metadata-test-invalid"),
             ("URL", "file://home/drive/chromium/src/metadata"),
             ("Version", "0"),
@@ -68,7 +99,8 @@ class ParseTest(unittest.TestCase):
     self.assertListEqual(
         all_metadata[2].get_entries(),
         [
-            ("Name", "Test-C README for Chromium metadata"),
+            ("Name",
+             "Test-C README for Chromium metadata (5 errors, 1 warning)"),
             ("URL", "https://www.example.com/first"),
             ("URL", "https://www.example.com/second"),
             ("Version", "N/A"),
diff --git a/metadata/tests/validate_test.py b/metadata/tests/validate_test.py
new file mode 100644
index 000000000..33a6978a5
--- /dev/null
+++ b/metadata/tests/validate_test.py
@@ -0,0 +1,76 @@
+#!/usr/bin/env python3
+# Copyright 2023 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+import os
+import sys
+import unittest
+
+_THIS_DIR = os.path.abspath(os.path.dirname(__file__))
+# The repo's root directory.
+_ROOT_DIR = os.path.abspath(os.path.join(_THIS_DIR, "..", ".."))
+
+# Add the repo's root directory for clearer imports.
+sys.path.insert(0, _ROOT_DIR)
+
+import metadata.validate
+
+
+class ValidateTest(unittest.TestCase):
+  def test_validate_file(self):
+    # Validate a valid file (no errors or warnings).
+    test_filepath = os.path.join(_THIS_DIR, "data",
+                                 "README.chromium.test.multi-valid")
+    results = metadata.validate.validate_file(
+        filepath=test_filepath,
+        repo_root_dir=_THIS_DIR,
+    )
+    self.assertEqual(len(results), 0)
+
+    # Validate an invalid file (both errors and warnings).
+    test_filepath = os.path.join(_THIS_DIR, "data",
+                                 "README.chromium.test.multi-invalid")
+    results = metadata.validate.validate_file(
+        filepath=test_filepath,
+        repo_root_dir=_THIS_DIR,
+    )
+    self.assertEqual(len(results), 11)
+    error_count = 0
+    warning_count = 0
+    for result in results:
+      if result.is_fatal():
+        error_count += 1
+      else:
+        warning_count += 1
+    self.assertEqual(error_count, 9)
+    self.assertEqual(warning_count, 2)
+
+  def test_check_file(self):
+    # Check a valid file (no errors or warnings).
+    test_filepath = os.path.join(_THIS_DIR, "data",
+                                 "README.chromium.test.multi-valid")
+    errors, warnings = metadata.validate.check_file(
+        filepath=test_filepath,
+        repo_root_dir=_THIS_DIR,
+    )
+    self.assertEqual(len(errors), 0)
+    self.assertEqual(len(warnings), 0)
+
+    # Check an invalid file (both errors and warnings).
+    test_filepath = os.path.join(_THIS_DIR, "data",
+                                 "README.chromium.test.multi-invalid")
+    errors, warnings = metadata.validate.check_file(
+        filepath=test_filepath,
+        repo_root_dir=_THIS_DIR,
+    )
+    # TODO(aredulla): update this test once validation errors can be returned
+    #                 as errors.
+    # self.assertEqual(len(errors), 9)
+    # self.assertEqual(len(warnings), 2)
+    self.assertEqual(len(errors), 0)
+    self.assertEqual(len(warnings), 11)
+
+
+if __name__ == "__main__":
+  unittest.main()
diff --git a/metadata/validate.py b/metadata/validate.py
new file mode 100644
index 000000000..460660fb5
--- /dev/null
+++ b/metadata/validate.py
@@ -0,0 +1,80 @@
+#!/usr/bin/env python3
+# Copyright 2023 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+import os
+import sys
+from typing import List, Tuple
+
+_THIS_DIR = os.path.abspath(os.path.dirname(__file__))
+# The repo's root directory.
+_ROOT_DIR = os.path.abspath(os.path.join(_THIS_DIR, ".."))
+
+# Add the repo's root directory for clearer imports.
+sys.path.insert(0, _ROOT_DIR)
+
+import metadata.parse
+import metadata.validation_result as vr
+
+
+def validate_file(filepath: str,
+                  repo_root_dir: str) -> List[vr.ValidationResult]:
+  """Validate the metadata file."""
+  if not os.path.exists(filepath):
+    result = vr.ValidationError(f"'{filepath}' not found.")
+    result.set_tag(tag="reason", value="file not found")
+    return [result]
+
+  # Get the directory the metadata file is in.
+  parent_dir = os.path.dirname(filepath)
+
+  results = []
+  dependencies = metadata.parse.parse_file(filepath)
+  for dependency in dependencies:
+    results.extend(
+        dependency.validate(
+            source_file_dir=parent_dir,
+            repo_root_dir=repo_root_dir,
+        ))
+
+  return results
+
+
+def check_file(filepath: str,
+               repo_root_dir: str) -> Tuple[List[str], List[str]]:
+  """Run metadata validation on the given file, and return all validation
+  errors and validation warnings.
+
+  Args:
+    filepath: the path to a metadata file,
+              e.g. "/chromium/src/third_party/libname/README.chromium"
+    repo_root_dir: the repository's root directory; this is needed to construct
+                   file paths to license files.
+
+  Returns:
+    error_messages: the fatal validation issues present in the file;
+                    i.e. presubmit should fail.
+    warning_messages: the non-fatal validation issues present in the file;
+                      i.e. presubmit should still pass.
+  """
+  error_messages = []
+  warning_messages = []
+  for result in validate_file(filepath, repo_root_dir):
+    # Construct the message.
+    if result.get_tag("reason") == "file not found":
+      message = result.get_message(postscript="", width=60)
+    else:
+      message = result.get_message(width=60)
+
+    # TODO(aredulla): Actually distinguish between validation errors and
+    # warnings. The quality of metadata is currently being uplifted, but is not
+    # yet guaranteed to pass validation. So for now, all validation results will
+    # be returned as warnings so CLs are not blocked by invalid metadata in
+    # presubmits yet.
+    # if result.is_fatal():
+    #   error_messages.append(message)
+    # else:
+    warning_messages.append(message)
+
+  return error_messages, warning_messages
diff --git a/metadata/validation_result.py b/metadata/validation_result.py
index d86d8b3cc..45a1d1dca 100644
--- a/metadata/validation_result.py
+++ b/metadata/validation_result.py
@@ -7,6 +7,11 @@ import textwrap
 from typing import Dict, Union
 
 
+_CHROMIUM_METADATA_PRESCRIPT = "Third party metadata issue:"
+_CHROMIUM_METADATA_POSTSCRIPT = ("Check //third_party/README.chromium.template "
+                                 "for details.")
+
+
 class ValidationResult:
   """Base class for validation issues."""
   def __init__(self, message: str, fatal: bool):
@@ -33,20 +38,27 @@ class ValidationResult:
   def get_all_tags(self) -> Dict[str, str]:
     return dict(self._tags)
 
-  def get_message(self, width: int = 0) -> str:
+  def get_message(self,
+                  prescript: str = _CHROMIUM_METADATA_PRESCRIPT,
+                  postscript: str = _CHROMIUM_METADATA_POSTSCRIPT,
+                  width: int = 0) -> str:
+    components = [prescript, self._message, postscript]
+    message = " ".join(
+        [component for component in components if len(component) > 0])
+
     if width > 0:
-      return textwrap.fill(text=self._message, width=width)
+      return textwrap.fill(text=message, width=width)
 
-    return self._message
+    return message
 
 
 class ValidationError(ValidationResult):
   """Fatal validation issue. Presubmit should fail."""
-  def __init__(self, message: str, **tags: Dict[str, str]):
-    super().__init__(message=message, fatal=True, **tags)
+  def __init__(self, message: str):
+    super().__init__(message=message, fatal=True)
 
 
 class ValidationWarning(ValidationResult):
   """Non-fatal validation issue. Presubmit should pass."""
-  def __init__(self, message: str, **tags: Dict[str, str]):
-    super().__init__(message=message, fatal=False, **tags)
+  def __init__(self, message: str):
+    super().__init__(message=message, fatal=False)
diff --git a/presubmit_canned_checks.py b/presubmit_canned_checks.py
index b038e13d8..e9ad8638d 100644
--- a/presubmit_canned_checks.py
+++ b/presubmit_canned_checks.py
@@ -10,6 +10,8 @@ import io as _io
 import os as _os
 import zlib
 
+import metadata.validate
+
 _HERE = _os.path.dirname(_os.path.abspath(__file__))
 
 # These filters will be disabled if callers do not explicitly supply a
@@ -375,6 +377,33 @@ def CheckChangeHasNoCrAndHasOnlyOneEol(input_api, output_api,
       items=eof_files))
   return outputs
 
+
+def CheckChromiumMetadataFiles(input_api, output_api, file_filter=None):
+  """Check affected Chromium metadata files are valid."""
+  # Restrict this check to README.chromium files if the file filter is not
+  # specified.
+  if file_filter is None:
+    file_filter = lambda f: f.LocalPath().endswith('README.chromium')
+
+  # The repo's root directory is required to check license files.
+  repo_root_dir = input_api.change.RepositoryRoot()
+
+  outputs = []
+  for f in input_api.AffectedFiles(file_filter=file_filter):
+    errors, warnings = metadata.validate.check_file(
+        filepath=f.AbsoluteLocalPath(),
+        repo_root_dir=repo_root_dir,
+    )
+
+    for warning in warnings:
+      outputs.append(output_api.PresubmitPromptWarning(warning, [f]))
+
+    for error in errors:
+      outputs.append(output_api.PresubmitError(error, [f]))
+
+  return outputs
+
+
 def CheckGenderNeutral(input_api, output_api, source_file_filter=None):
   """Checks that there are no gendered pronouns in any of the text files to be
   submitted.
@@ -398,7 +427,6 @@ def CheckGenderNeutral(input_api, output_api, source_file_filter=None):
   return []
 
 
-
 def _ReportErrorFileAndLine(filename, line_num, dummy_line):
   """Default error formatter for _FindNewViolationsOfRule."""
   return '%s:%s' % (filename, line_num)