depot_tools/metadata/parse.py

#!/usr/bin/env python3
# Copyright 2023 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.

import os
import re
import sys
from typing import List

_THIS_DIR = os.path.abspath(os.path.dirname(__file__))
# The repo's root directory.
_ROOT_DIR = os.path.abspath(os.path.join(_THIS_DIR, ".."))

# Add the repo's root directory for clearer imports.
sys.path.insert(0, _ROOT_DIR)

import metadata.fields.known as known_fields
import metadata.dependency_metadata as dm

# Line used to separate dependencies within the same metadata file.
DEPENDENCY_DIVIDER = re.compile(r"^-{20} DEPENDENCY DIVIDER -{20}$")

# Delimiter used to separate a field's name from its value.
FIELD_DELIMITER = ":"

# Heuristic for detecting unknown field names.
_PATTERN_FIELD_NAME_WORD_HEURISTIC = r"[A-Z]\w+"
_PATTERN_FIELD_NAME_HEURISTIC = re.compile(r"^({}(?: {})*){}[\b\s]".format(
    _PATTERN_FIELD_NAME_WORD_HEURISTIC, _PATTERN_FIELD_NAME_WORD_HEURISTIC,
    FIELD_DELIMITER))
_DEFAULT_TO_STRUCTURED_TEXT = False

# Pattern used to check if a line from a metadata file declares a new
# field.
_PATTERN_KNOWN_FIELD_DECLARATION = re.compile(
    "^({}){}".format("|".join(known_fields.ALL_FIELD_NAMES), FIELD_DELIMITER),
    re.IGNORECASE)


def parse_content(content: str) -> List[dm.DependencyMetadata]:
    """Reads and parses the metadata from the given string.

    Args:
        content: the string to parse metadata from.

    Returns: all the metadata, which may be for zero or more
             dependencies, from the given string.
  """
    dependencies = []
    current_metadata = dm.DependencyMetadata()
    current_field_name = None
    current_field_value = ""
    current_field_is_structured = _DEFAULT_TO_STRUCTURED_TEXT
    for line in content.splitlines(keepends=True):
        # Check if a new dependency is being described.
        if DEPENDENCY_DIVIDER.match(line):
            if current_field_name:
                # Save the field value for the previous dependency.
                current_metadata.add_entry(current_field_name,
                                           current_field_value)
            if current_metadata.has_entries():
                # Add the previous dependency to the results.
                dependencies.append(current_metadata)
            # Reset for the new dependency's metadata,
            # and reset the field state.
            current_metadata = dm.DependencyMetadata()
            current_field_name = None
            current_field_value = ""
            current_field_is_structured = False

        elif (_PATTERN_KNOWN_FIELD_DECLARATION.match(line)
              or (current_field_is_structured
                  and _PATTERN_FIELD_NAME_HEURISTIC.match(line))):
            # Save the field value to the current dependency's metadata.
            if current_field_name:
                current_metadata.add_entry(current_field_name,
                                           current_field_value)

            current_field_name, current_field_value = line.split(
                FIELD_DELIMITER, 1)
            field = known_fields.get_field(current_field_name)

            # Treats unknown fields as `_DEFAULT_TO_STRUCTURED_TEXT`.
            current_field_is_structured = field.is_structured(
            ) if field else _DEFAULT_TO_STRUCTURED_TEXT

            if field and field.is_one_liner():
                # The field should be on one line, so add it now.
                current_metadata.add_entry(current_field_name,
                                           current_field_value)
                # Reset the field state.
                current_field_name = None
                current_field_value = ""

        elif current_field_name:
            # The field is on multiple lines, so add this line to the
            # field value.
            current_field_value += line

    # At this point, the end of the file has been reached.
    # Save any remaining field data and metadata.
    if current_field_name:
        current_metadata.add_entry(current_field_name, current_field_value)
    if current_metadata.has_entries():
        dependencies.append(current_metadata)

    return dependencies
[ssci] Added parser for README validator Bug: b:277147404 Change-Id: I7ee0fe35e1017eb477255f12045d00e855f7dfb4 Reviewed-on: https://chromium-review.googlesource.com/c/chromium/tools/depot_tools/+/4787830 Reviewed-by: Rachael Newitt <renewitt@google.com> Auto-Submit: Anne Redulla <aredulla@google.com> Commit-Queue: Rachael Newitt <renewitt@google.com> 2 years ago			`#!/usr/bin/env python3`
			`# Copyright 2023 The Chromium Authors. All rights reserved.`
			`# Use of this source code is governed by a BSD-style license that can be`
			`# found in the LICENSE file.`

			`import os`
			`import re`
			`import sys`
			`from typing import List`

			`_THIS_DIR = os.path.abspath(os.path.dirname(__file__))`
			`# The repo's root directory.`
			`_ROOT_DIR = os.path.abspath(os.path.join(_THIS_DIR, ".."))`

			`# Add the repo's root directory for clearer imports.`
			`sys.path.insert(0, _ROOT_DIR)`

			`import metadata.fields.known as known_fields`
			`import metadata.dependency_metadata as dm`

			`# Line used to separate dependencies within the same metadata file.`
			`DEPENDENCY_DIVIDER = re.compile(r"^-{20} DEPENDENCY DIVIDER -{20}$")`

			`# Delimiter used to separate a field's name from its value.`
			`FIELD_DELIMITER = ":"`

metadata: add "structured" field parsing This CL adds a "structured" concept to the parser. In a structured field, the parser will proactively look for field-like patterns to start a new field (even if they aren't known fields). This mitigates the issue when an unknown field immediately follows a multi-line text field, such as: URL: https://example.com UnknownField: abc And URL field value parses to "https://example.com<newline>UnknownField:abc". Bug: b/324149233 Change-Id: I54807bd7b242fc14c679483453ade83f8fd20225 Reviewed-on: https://chromium-review.googlesource.com/c/chromium/tools/depot_tools/+/5379679 Reviewed-by: Anne Redulla <aredulla@google.com> Commit-Queue: Jiewei Qian <qjw@chromium.org> 1 year ago			`# Heuristic for detecting unknown field names.`
			`_PATTERN_FIELD_NAME_WORD_HEURISTIC = r"[A-Z]\w+"`
			`_PATTERN_FIELD_NAME_HEURISTIC = re.compile(r"^({}(?: {})*){}[\b\s]".format(`
			`_PATTERN_FIELD_NAME_WORD_HEURISTIC, _PATTERN_FIELD_NAME_WORD_HEURISTIC,`
			`FIELD_DELIMITER))`
			`_DEFAULT_TO_STRUCTURED_TEXT = False`

[ssci] PEP8 formatting for metadata directory All files in metadata/ are new, so they should follow the PEP-8 style. Change-Id: I5d8424536c3d7b703e6b8087e0e2d70c06a1549c Reviewed-on: https://chromium-review.googlesource.com/c/chromium/tools/depot_tools/+/4834909 Reviewed-by: Rachael Newitt <renewitt@google.com> Commit-Queue: Rachael Newitt <renewitt@google.com> 2 years ago			`# Pattern used to check if a line from a metadata file declares a new`
			`# field.`
metadata: add "structured" field parsing This CL adds a "structured" concept to the parser. In a structured field, the parser will proactively look for field-like patterns to start a new field (even if they aren't known fields). This mitigates the issue when an unknown field immediately follows a multi-line text field, such as: URL: https://example.com UnknownField: abc And URL field value parses to "https://example.com<newline>UnknownField:abc". Bug: b/324149233 Change-Id: I54807bd7b242fc14c679483453ade83f8fd20225 Reviewed-on: https://chromium-review.googlesource.com/c/chromium/tools/depot_tools/+/5379679 Reviewed-by: Anne Redulla <aredulla@google.com> Commit-Queue: Jiewei Qian <qjw@chromium.org> 1 year ago			`_PATTERN_KNOWN_FIELD_DECLARATION = re.compile(`
[ssci] Added parser for README validator Bug: b:277147404 Change-Id: I7ee0fe35e1017eb477255f12045d00e855f7dfb4 Reviewed-on: https://chromium-review.googlesource.com/c/chromium/tools/depot_tools/+/4787830 Reviewed-by: Rachael Newitt <renewitt@google.com> Auto-Submit: Anne Redulla <aredulla@google.com> Commit-Queue: Rachael Newitt <renewitt@google.com> 2 years ago			`"^({}){}".format("\|".join(known_fields.ALL_FIELD_NAMES), FIELD_DELIMITER),`
[ssci] PEP8 formatting for metadata directory All files in metadata/ are new, so they should follow the PEP-8 style. Change-Id: I5d8424536c3d7b703e6b8087e0e2d70c06a1549c Reviewed-on: https://chromium-review.googlesource.com/c/chromium/tools/depot_tools/+/4834909 Reviewed-by: Rachael Newitt <renewitt@google.com> Commit-Queue: Rachael Newitt <renewitt@google.com> 2 years ago			`re.IGNORECASE)`
[ssci] Added parser for README validator Bug: b:277147404 Change-Id: I7ee0fe35e1017eb477255f12045d00e855f7dfb4 Reviewed-on: https://chromium-review.googlesource.com/c/chromium/tools/depot_tools/+/4787830 Reviewed-by: Rachael Newitt <renewitt@google.com> Auto-Submit: Anne Redulla <aredulla@google.com> Commit-Queue: Rachael Newitt <renewitt@google.com> 2 years ago

[ssci] Added CheckChromiumDependencyMetadata in presubmit_canned_checks This CL adds a new function `CheckChromiumDependencyMetadata` in `presubmit_canned_checks.py`. It can be used to check that files satisfy the format defined by `README.chromium.template` (https://chromium.googlesource.com/chromium/src/+/main/third_party/README.chromium.template). The code for metadata validation can be found in `//metadata`. Note that all metadata validation issues will be returned as warnings only for now, while the quality of metadata is being uplifted. Bug: b:277147404 Change-Id: Iacf1b3a11219ab752549f6dc6e882c93c0fbe780 Reviewed-on: https://chromium-review.googlesource.com/c/chromium/tools/depot_tools/+/4812578 Commit-Queue: Anne Redulla <aredulla@google.com> Reviewed-by: Rachael Newitt <renewitt@google.com> Reviewed-by: Gavin Mak <gavinmak@google.com> Reviewed-by: Bruce Dawson <brucedawson@chromium.org> 2 years ago			`def parse_content(content: str) -> List[dm.DependencyMetadata]:`
[ssci] PEP8 formatting for metadata directory All files in metadata/ are new, so they should follow the PEP-8 style. Change-Id: I5d8424536c3d7b703e6b8087e0e2d70c06a1549c Reviewed-on: https://chromium-review.googlesource.com/c/chromium/tools/depot_tools/+/4834909 Reviewed-by: Rachael Newitt <renewitt@google.com> Commit-Queue: Rachael Newitt <renewitt@google.com> 2 years ago			`"""Reads and parses the metadata from the given string.`
[ssci] Added parser for README validator Bug: b:277147404 Change-Id: I7ee0fe35e1017eb477255f12045d00e855f7dfb4 Reviewed-on: https://chromium-review.googlesource.com/c/chromium/tools/depot_tools/+/4787830 Reviewed-by: Rachael Newitt <renewitt@google.com> Auto-Submit: Anne Redulla <aredulla@google.com> Commit-Queue: Rachael Newitt <renewitt@google.com> 2 years ago
			`Args:`
[ssci] Added CheckChromiumDependencyMetadata in presubmit_canned_checks This CL adds a new function `CheckChromiumDependencyMetadata` in `presubmit_canned_checks.py`. It can be used to check that files satisfy the format defined by `README.chromium.template` (https://chromium.googlesource.com/chromium/src/+/main/third_party/README.chromium.template). The code for metadata validation can be found in `//metadata`. Note that all metadata validation issues will be returned as warnings only for now, while the quality of metadata is being uplifted. Bug: b:277147404 Change-Id: Iacf1b3a11219ab752549f6dc6e882c93c0fbe780 Reviewed-on: https://chromium-review.googlesource.com/c/chromium/tools/depot_tools/+/4812578 Commit-Queue: Anne Redulla <aredulla@google.com> Reviewed-by: Rachael Newitt <renewitt@google.com> Reviewed-by: Gavin Mak <gavinmak@google.com> Reviewed-by: Bruce Dawson <brucedawson@chromium.org> 2 years ago			`content: the string to parse metadata from.`
[ssci] Added parser for README validator Bug: b:277147404 Change-Id: I7ee0fe35e1017eb477255f12045d00e855f7dfb4 Reviewed-on: https://chromium-review.googlesource.com/c/chromium/tools/depot_tools/+/4787830 Reviewed-by: Rachael Newitt <renewitt@google.com> Auto-Submit: Anne Redulla <aredulla@google.com> Commit-Queue: Rachael Newitt <renewitt@google.com> 2 years ago
[ssci] PEP8 formatting for metadata directory All files in metadata/ are new, so they should follow the PEP-8 style. Change-Id: I5d8424536c3d7b703e6b8087e0e2d70c06a1549c Reviewed-on: https://chromium-review.googlesource.com/c/chromium/tools/depot_tools/+/4834909 Reviewed-by: Rachael Newitt <renewitt@google.com> Commit-Queue: Rachael Newitt <renewitt@google.com> 2 years ago			`Returns: all the metadata, which may be for zero or more`
			`dependencies, from the given string.`
[ssci] Added parser for README validator Bug: b:277147404 Change-Id: I7ee0fe35e1017eb477255f12045d00e855f7dfb4 Reviewed-on: https://chromium-review.googlesource.com/c/chromium/tools/depot_tools/+/4787830 Reviewed-by: Rachael Newitt <renewitt@google.com> Auto-Submit: Anne Redulla <aredulla@google.com> Commit-Queue: Rachael Newitt <renewitt@google.com> 2 years ago			`"""`
[ssci] PEP8 formatting for metadata directory All files in metadata/ are new, so they should follow the PEP-8 style. Change-Id: I5d8424536c3d7b703e6b8087e0e2d70c06a1549c Reviewed-on: https://chromium-review.googlesource.com/c/chromium/tools/depot_tools/+/4834909 Reviewed-by: Rachael Newitt <renewitt@google.com> Commit-Queue: Rachael Newitt <renewitt@google.com> 2 years ago			`dependencies = []`
			`current_metadata = dm.DependencyMetadata()`
			`current_field_name = None`
			`current_field_value = ""`
metadata: add "structured" field parsing This CL adds a "structured" concept to the parser. In a structured field, the parser will proactively look for field-like patterns to start a new field (even if they aren't known fields). This mitigates the issue when an unknown field immediately follows a multi-line text field, such as: URL: https://example.com UnknownField: abc And URL field value parses to "https://example.com<newline>UnknownField:abc". Bug: b/324149233 Change-Id: I54807bd7b242fc14c679483453ade83f8fd20225 Reviewed-on: https://chromium-review.googlesource.com/c/chromium/tools/depot_tools/+/5379679 Reviewed-by: Anne Redulla <aredulla@google.com> Commit-Queue: Jiewei Qian <qjw@chromium.org> 1 year ago			`current_field_is_structured = _DEFAULT_TO_STRUCTURED_TEXT`
[ssci] PEP8 formatting for metadata directory All files in metadata/ are new, so they should follow the PEP-8 style. Change-Id: I5d8424536c3d7b703e6b8087e0e2d70c06a1549c Reviewed-on: https://chromium-review.googlesource.com/c/chromium/tools/depot_tools/+/4834909 Reviewed-by: Rachael Newitt <renewitt@google.com> Commit-Queue: Rachael Newitt <renewitt@google.com> 2 years ago			`for line in content.splitlines(keepends=True):`
			`# Check if a new dependency is being described.`
			`if DEPENDENCY_DIVIDER.match(line):`
			`if current_field_name:`
			`# Save the field value for the previous dependency.`
			`current_metadata.add_entry(current_field_name,`
			`current_field_value)`
			`if current_metadata.has_entries():`
			`# Add the previous dependency to the results.`
			`dependencies.append(current_metadata)`
			`# Reset for the new dependency's metadata,`
			`# and reset the field state.`
			`current_metadata = dm.DependencyMetadata()`
			`current_field_name = None`
			`current_field_value = ""`
metadata: add "structured" field parsing This CL adds a "structured" concept to the parser. In a structured field, the parser will proactively look for field-like patterns to start a new field (even if they aren't known fields). This mitigates the issue when an unknown field immediately follows a multi-line text field, such as: URL: https://example.com UnknownField: abc And URL field value parses to "https://example.com<newline>UnknownField:abc". Bug: b/324149233 Change-Id: I54807bd7b242fc14c679483453ade83f8fd20225 Reviewed-on: https://chromium-review.googlesource.com/c/chromium/tools/depot_tools/+/5379679 Reviewed-by: Anne Redulla <aredulla@google.com> Commit-Queue: Jiewei Qian <qjw@chromium.org> 1 year ago			`current_field_is_structured = False`
[ssci] PEP8 formatting for metadata directory All files in metadata/ are new, so they should follow the PEP-8 style. Change-Id: I5d8424536c3d7b703e6b8087e0e2d70c06a1549c Reviewed-on: https://chromium-review.googlesource.com/c/chromium/tools/depot_tools/+/4834909 Reviewed-by: Rachael Newitt <renewitt@google.com> Commit-Queue: Rachael Newitt <renewitt@google.com> 2 years ago
metadata: add "structured" field parsing This CL adds a "structured" concept to the parser. In a structured field, the parser will proactively look for field-like patterns to start a new field (even if they aren't known fields). This mitigates the issue when an unknown field immediately follows a multi-line text field, such as: URL: https://example.com UnknownField: abc And URL field value parses to "https://example.com<newline>UnknownField:abc". Bug: b/324149233 Change-Id: I54807bd7b242fc14c679483453ade83f8fd20225 Reviewed-on: https://chromium-review.googlesource.com/c/chromium/tools/depot_tools/+/5379679 Reviewed-by: Anne Redulla <aredulla@google.com> Commit-Queue: Jiewei Qian <qjw@chromium.org> 1 year ago			`elif (_PATTERN_KNOWN_FIELD_DECLARATION.match(line)`
			`or (current_field_is_structured`
			`and _PATTERN_FIELD_NAME_HEURISTIC.match(line))):`
[ssci] PEP8 formatting for metadata directory All files in metadata/ are new, so they should follow the PEP-8 style. Change-Id: I5d8424536c3d7b703e6b8087e0e2d70c06a1549c Reviewed-on: https://chromium-review.googlesource.com/c/chromium/tools/depot_tools/+/4834909 Reviewed-by: Rachael Newitt <renewitt@google.com> Commit-Queue: Rachael Newitt <renewitt@google.com> 2 years ago			`# Save the field value to the current dependency's metadata.`
			`if current_field_name:`
			`current_metadata.add_entry(current_field_name,`
			`current_field_value)`

			`current_field_name, current_field_value = line.split(`
			`FIELD_DELIMITER, 1)`
			`field = known_fields.get_field(current_field_name)`
metadata: add "structured" field parsing This CL adds a "structured" concept to the parser. In a structured field, the parser will proactively look for field-like patterns to start a new field (even if they aren't known fields). This mitigates the issue when an unknown field immediately follows a multi-line text field, such as: URL: https://example.com UnknownField: abc And URL field value parses to "https://example.com<newline>UnknownField:abc". Bug: b/324149233 Change-Id: I54807bd7b242fc14c679483453ade83f8fd20225 Reviewed-on: https://chromium-review.googlesource.com/c/chromium/tools/depot_tools/+/5379679 Reviewed-by: Anne Redulla <aredulla@google.com> Commit-Queue: Jiewei Qian <qjw@chromium.org> 1 year ago
			# Treats unknown fields as `_DEFAULT_TO_STRUCTURED_TEXT`.
			`current_field_is_structured = field.is_structured(`
			`) if field else _DEFAULT_TO_STRUCTURED_TEXT`

[ssci] PEP8 formatting for metadata directory All files in metadata/ are new, so they should follow the PEP-8 style. Change-Id: I5d8424536c3d7b703e6b8087e0e2d70c06a1549c Reviewed-on: https://chromium-review.googlesource.com/c/chromium/tools/depot_tools/+/4834909 Reviewed-by: Rachael Newitt <renewitt@google.com> Commit-Queue: Rachael Newitt <renewitt@google.com> 2 years ago			`if field and field.is_one_liner():`
			`# The field should be on one line, so add it now.`
			`current_metadata.add_entry(current_field_name,`
			`current_field_value)`
			`# Reset the field state.`
			`current_field_name = None`
			`current_field_value = ""`

			`elif current_field_name:`
			`# The field is on multiple lines, so add this line to the`
			`# field value.`
			`current_field_value += line`

			`# At this point, the end of the file has been reached.`
			`# Save any remaining field data and metadata.`
			`if current_field_name:`
[ssci] Added parser for README validator Bug: b:277147404 Change-Id: I7ee0fe35e1017eb477255f12045d00e855f7dfb4 Reviewed-on: https://chromium-review.googlesource.com/c/chromium/tools/depot_tools/+/4787830 Reviewed-by: Rachael Newitt <renewitt@google.com> Auto-Submit: Anne Redulla <aredulla@google.com> Commit-Queue: Rachael Newitt <renewitt@google.com> 2 years ago			`current_metadata.add_entry(current_field_name, current_field_value)`
[ssci] PEP8 formatting for metadata directory All files in metadata/ are new, so they should follow the PEP-8 style. Change-Id: I5d8424536c3d7b703e6b8087e0e2d70c06a1549c Reviewed-on: https://chromium-review.googlesource.com/c/chromium/tools/depot_tools/+/4834909 Reviewed-by: Rachael Newitt <renewitt@google.com> Commit-Queue: Rachael Newitt <renewitt@google.com> 2 years ago			`if current_metadata.has_entries():`
[ssci] Added parser for README validator Bug: b:277147404 Change-Id: I7ee0fe35e1017eb477255f12045d00e855f7dfb4 Reviewed-on: https://chromium-review.googlesource.com/c/chromium/tools/depot_tools/+/4787830 Reviewed-by: Rachael Newitt <renewitt@google.com> Auto-Submit: Anne Redulla <aredulla@google.com> Commit-Queue: Rachael Newitt <renewitt@google.com> 2 years ago			`dependencies.append(current_metadata)`

[ssci] PEP8 formatting for metadata directory All files in metadata/ are new, so they should follow the PEP-8 style. Change-Id: I5d8424536c3d7b703e6b8087e0e2d70c06a1549c Reviewed-on: https://chromium-review.googlesource.com/c/chromium/tools/depot_tools/+/4834909 Reviewed-by: Rachael Newitt <renewitt@google.com> Commit-Queue: Rachael Newitt <renewitt@google.com> 2 years ago			`return dependencies`