You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
depot_tools/metadata/parse.py

109 lines
4.1 KiB
Python

#!/usr/bin/env python3
# Copyright 2023 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
import os
import re
import sys
from typing import List
_THIS_DIR = os.path.abspath(os.path.dirname(__file__))
# The repo's root directory.
_ROOT_DIR = os.path.abspath(os.path.join(_THIS_DIR, ".."))
# Add the repo's root directory for clearer imports.
sys.path.insert(0, _ROOT_DIR)
import metadata.fields.known as known_fields
import metadata.dependency_metadata as dm
# Line used to separate dependencies within the same metadata file.
DEPENDENCY_DIVIDER = re.compile(r"^-{20} DEPENDENCY DIVIDER -{20}$")
# Delimiter used to separate a field's name from its value.
FIELD_DELIMITER = ":"
# Heuristic for detecting unknown field names.
_PATTERN_FIELD_NAME_WORD_HEURISTIC = r"[A-Z]\w+"
_PATTERN_FIELD_NAME_HEURISTIC = re.compile(r"^({}(?: {})*){}[\b\s]".format(
_PATTERN_FIELD_NAME_WORD_HEURISTIC, _PATTERN_FIELD_NAME_WORD_HEURISTIC,
FIELD_DELIMITER))
_DEFAULT_TO_STRUCTURED_TEXT = False
# Pattern used to check if a line from a metadata file declares a new
# field.
_PATTERN_KNOWN_FIELD_DECLARATION = re.compile(
"^({}){}".format("|".join(known_fields.ALL_FIELD_NAMES), FIELD_DELIMITER),
re.IGNORECASE)
def parse_content(content: str) -> List[dm.DependencyMetadata]:
"""Reads and parses the metadata from the given string.
Args:
content: the string to parse metadata from.
Returns: all the metadata, which may be for zero or more
dependencies, from the given string.
"""
dependencies = []
current_metadata = dm.DependencyMetadata()
current_field_name = None
current_field_value = ""
current_field_is_structured = _DEFAULT_TO_STRUCTURED_TEXT
for line in content.splitlines(keepends=True):
# Check if a new dependency is being described.
if DEPENDENCY_DIVIDER.match(line):
if current_field_name:
# Save the field value for the previous dependency.
current_metadata.add_entry(current_field_name,
current_field_value)
if current_metadata.has_entries():
# Add the previous dependency to the results.
dependencies.append(current_metadata)
# Reset for the new dependency's metadata,
# and reset the field state.
current_metadata = dm.DependencyMetadata()
current_field_name = None
current_field_value = ""
current_field_is_structured = False
elif (_PATTERN_KNOWN_FIELD_DECLARATION.match(line)
or (current_field_is_structured
and _PATTERN_FIELD_NAME_HEURISTIC.match(line))):
# Save the field value to the current dependency's metadata.
if current_field_name:
current_metadata.add_entry(current_field_name,
current_field_value)
current_field_name, current_field_value = line.split(
FIELD_DELIMITER, 1)
field = known_fields.get_field(current_field_name)
# Treats unknown fields as `_DEFAULT_TO_STRUCTURED_TEXT`.
current_field_is_structured = field.is_structured(
) if field else _DEFAULT_TO_STRUCTURED_TEXT
if field and field.is_one_liner():
# The field should be on one line, so add it now.
current_metadata.add_entry(current_field_name,
current_field_value)
# Reset the field state.
current_field_name = None
current_field_value = ""
elif current_field_name:
# The field is on multiple lines, so add this line to the
# field value.
current_field_value += line
# At this point, the end of the file has been reached.
# Save any remaining field data and metadata.
if current_field_name:
current_metadata.add_entry(current_field_name, current_field_value)
if current_metadata.has_entries():
dependencies.append(current_metadata)
return dependencies