diff --git a/presubmit_diff.py b/presubmit_diff.py new file mode 100755 index 000000000..259a10358 --- /dev/null +++ b/presubmit_diff.py @@ -0,0 +1,170 @@ +#!/usr/bin/env python3 +# Copyright (c) 2024 The Chromium Authors. All rights reserved. +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. +"""Tool for generating a unified git diff outside of a git workspace. + +This is intended as a preprocessor for presubmit_support.py. +""" +from __future__ import annotations + +import argparse +import base64 +import os +import platform +import sys + +import gclient_utils +from gerrit_util import CreateHttpConn, ReadHttpResponse +import subprocess2 + +DEV_NULL = "/dev/null" +HEADER_DELIMITER = "@@" + + +def fetch_content(host: str, repo: str, ref: str, file: str) -> str: + """Fetches the content of a file from Gitiles. + + If the file does not exist at the commit, it returns an empty string. + + Args: + host: Gerrit host. + repo: Gerrit repo. + ref: Gerrit commit. + file: Path of file to fetch. + + Returns: + A string containing the content of the file at the commit, or an empty + string if the file does not exist at the commit. + """ + conn = CreateHttpConn(f"{host}.googlesource.com", + f"{repo}/+show/{ref}/{file}?format=text") + response = ReadHttpResponse(conn, accept_statuses=[200, 404]) + return base64.b64decode(response.read()).decode("utf-8") + + +def git_diff(src: str | None, dest: str | None) -> str: + """Returns the result of `git diff --no-index` between two paths. + + If a path is not specified, the diff is against /dev/null. At least one of + src or dest must be specified. + + Args: + src: Source path. + dest: Destination path. + + Returns: + A string containing the git diff. + """ + return subprocess2.capture( + ["git", "diff", "--no-index", "--", src or DEV_NULL, dest + or DEV_NULL]).decode("utf-8") + + +def _process_diff(diff: str, src_root: str, dst_root: str) -> str: + """Adjust paths in the diff header so they're relative to the root. + + This also modifies paths on Windows to use forward slashes. + """ + if not diff: + return "" + + header, body = diff.split(HEADER_DELIMITER, maxsplit=1) + norm_src = src_root.rstrip(os.sep) + norm_dst = dst_root.rstrip(os.sep) + + if platform.system() == "Windows": + # Absolute paths on Windows use the format: + # "a/C:\\abspath\\to\\file.txt" + header = header.replace("\\\\", "\\") + header = header.replace('"', "") + header = header.replace(norm_src + "\\", "") + header = header.replace(norm_dst + "\\", "") + else: + # Other systems use: + # a/abspath/to/file.txt + header = header.replace(norm_src, "") + header = header.replace(norm_dst, "") + + return header + HEADER_DELIMITER + body + + +def create_diffs(host: str, repo: str, ref: str, root: str, + files: list[str]) -> dict[str, str]: + """Calculates diffs of files in a directory against a commit. + + Args: + host: Gerrit host. + repo: Gerrit repo. + ref: Gerrit commit. + root: Path of local directory containing modified files. + files: List of file paths relative to root. + + Returns: + A dict mapping file paths to diffs. + + Raises: + RuntimeError: If a file is missing in both the root and the repo. + """ + diffs = {} + with gclient_utils.temporary_directory() as tmp_root: + # TODO(gavinmak): Parallelize fetching content. + for file in files: + new_file = os.path.join(root, file) + if not os.path.exists(new_file): + new_file = None + + old_file = None + old_content = fetch_content(host, repo, ref, file) + if old_content: + old_file = os.path.join(tmp_root, file) + os.makedirs(os.path.dirname(old_file), exist_ok=True) + with open(old_file, "w") as f: + f.write(old_content) + + if not old_file and not new_file: + raise RuntimeError(f"Could not access file {file} from {root} " + f"or from {host}/{repo}:{ref}.") + + diff = git_diff(old_file, new_file) + diffs[file] = _process_diff(diff, tmp_root, root) + + return diffs + + +def main(argv): + parser = argparse.ArgumentParser( + usage="%(prog)s [options] ", + description="Makes a unified git diff against a Gerrit commit.", + ) + parser.add_argument("--output", help="File to write the diff to.") + parser.add_argument("--host", required=True, help="Gerrit host.") + parser.add_argument("--repo", required=True, help="Gerrit repo.") + parser.add_argument("--ref", + required=True, + help="Gerrit ref to diff against.") + parser.add_argument("--root", + required=True, + help="Folder containing modified files.") + parser.add_argument( + "files", + nargs="+", + help="List of changed files. Paths are relative to the repo root.", + ) + options = parser.parse_args(argv) + + diffs = create_diffs(options.host, options.repo, options.ref, options.root, + options.files) + + unified_diff = "\n".join([d for d in diffs.values() if d]) + if options.output: + with open(options.output, "w") as f: + f.write(unified_diff) + else: + print(unified_diff) + + return 0 + + +if __name__ == "__main__": + sys.exit(main(sys.argv[1:])) diff --git a/tests/presubmit_diff_test.py b/tests/presubmit_diff_test.py new file mode 100755 index 000000000..e0336d829 --- /dev/null +++ b/tests/presubmit_diff_test.py @@ -0,0 +1,223 @@ +#!/usr/bin/env vpython3 +# Copyright (c) 2024 The Chromium Authors. All rights reserved. +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. +"""Unit tests for presubmit_diff.py.""" + +import os +import sys +import tempfile +import unittest +from unittest import mock + +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +import gclient_utils +import presubmit_diff + + +class PresubmitDiffTest(unittest.TestCase): + + def setUp(self): + # State of the local directory. + self.root = tempfile.mkdtemp() + os.makedirs(os.path.join(self.root, "nested")) + with open(os.path.join(self.root, "unchanged.txt"), "w") as f: + f.write("unchanged\n") + with open(os.path.join(self.root, "added.txt"), "w") as f: + f.write("added\n") + with open(os.path.join(self.root, "modified.txt"), "w") as f: + f.write("modified... foo\n") + with open(os.path.join(self.root, "nested/modified.txt"), "w") as f: + f.write("goodbye\n") + + # State of the remote repository. + fetch_data = { + "unchanged.txt": "unchanged\n", + "deleted.txt": "deleted\n", + "modified.txt": "modified... bar\n", + "nested/modified.txt": "hello\n", + } + + def fetch_side_effect(host, repo, ref, file): + return fetch_data.get(file, "") + + fetch_content_mock = mock.patch("presubmit_diff.fetch_content", + side_effect=fetch_side_effect) + fetch_content_mock.start() + + self.addCleanup(mock.patch.stopall) + + def tearDown(self): + gclient_utils.rmtree(self.root) + + def _test_create_diffs(self, files: list[str], expected: dict[str, str]): + actual = presubmit_diff.create_diffs("host", "repo", "ref", self.root, + files) + self.assertEqual(actual.keys(), expected.keys()) + + # Manually check each line in the diffs except the "index" line because + # hashes can differ in length. + for file, diff in actual.items(): + expected_lines = expected[file].splitlines() + for idx, line in enumerate(diff.splitlines()): + if line.startswith("index "): + continue + self.assertEqual(line, expected_lines[idx]) + + def test_create_diffs_with_nonexistent_file_raises_error(self): + self.assertRaises( + RuntimeError, + presubmit_diff.create_diffs, + "host", + "repo", + "ref", + self.root, + ["doesnotexist.txt"], + ) + + def test_create_diffs_with_unchanged_file(self): + self._test_create_diffs( + ["unchanged.txt"], + {"unchanged.txt": ""}, + ) + + def test_create_diffs_with_added_file(self): + expected_diff = """diff --git a/added.txt b/added.txt +new file mode 100644 +index 00000000..d5f7fc3f +--- /dev/null ++++ b/added.txt +@@ -0,0 +1 @@ ++added +""" + self._test_create_diffs( + ["added.txt"], + {"added.txt": expected_diff}, + ) + + def test_create_diffs_with_deleted_file(self): + expected_diff = """diff --git a/deleted.txt b/deleted.txt +deleted file mode 100644 +index 71779d2c..00000000 +--- a/deleted.txt ++++ /dev/null +@@ -1 +0,0 @@ +-deleted +""" + self._test_create_diffs( + ["deleted.txt"], + {"deleted.txt": expected_diff}, + ) + + # pylint: disable=line-too-long + + def test_create_diffs_with_modified_files(self): + expected_diff = """diff --git a/modified.txt b/modified.txt +index a7dd0b00..12d68703 100644 +--- a/modified.txt ++++ b/modified.txt +@@ -1 +1 @@ +-modified... bar ++modified... foo +""" + expected_nested_diff = """diff --git a/nested/modified.txt b/nested/modified.txt +index ce013625..dd7e1c6f 100644 +--- a/nested/modified.txt ++++ b/nested/modified.txt +@@ -1 +1 @@ +-hello ++goodbye +""" + self._test_create_diffs( + ["modified.txt", "nested/modified.txt"], + { + "modified.txt": expected_diff, + "nested/modified.txt": expected_nested_diff, + }, + ) + + # Test cases for _process_diff. + def test_process_diff_with_no_changes(self): + self.assertEqual( + presubmit_diff._process_diff( + "", + "/path/to/src", + "/path/to/dst", + ), + "", + ) + + @mock.patch("platform.system", return_value="Linux") + @mock.patch("os.sep", new="/") + def test_process_diff_handles_unix_paths(self, sys_mock): + diff = """diff --git a/path/to/src/file.txt b/path/to/dst/file.txt +index ce013625..dd7e1c6f 100644 +--- a/path/to/file.txt ++++ b/path/to/file.txt +@@ -1 +1 @@ +-random ++content +""" + expected = """diff --git a/file.txt b/file.txt +index ce013625..dd7e1c6f 100644 +--- a/path/to/file.txt ++++ b/path/to/file.txt +@@ -1 +1 @@ +-random ++content +""" + self.assertEqual( + presubmit_diff._process_diff( + diff, + "/path/to/src", + "/path/to/dst", + ), + expected, + ) + + # Trailing slashes are handled. + self.assertEqual( + presubmit_diff._process_diff( + diff, + "/path/to/src/", + "/path/to/dst/", + ), + expected, + ) + + @mock.patch("platform.system", return_value="Windows") + @mock.patch("os.sep", new="\\") + def test_process_diff_handles_windows_paths(self, sys_mock): + diff = """diff --git "a/C:\\\\path\\\\to\\\\src\\\\file.txt" "b/C:\\\\path\\\\to\\\\dst\\\\file.txt" +index ce013625..dd7e1c6f 100644 +--- "a/C:\\\\path\\\\to\\\\src\\\\file.txt ++++ "b/C:\\\\path\\\\to\\\\dst\\\\file.txt" +@@ -1 +1 @@ +-random ++content +""" + expected = """diff --git a/file.txt b/file.txt +index ce013625..dd7e1c6f 100644 +--- a/file.txt ++++ b/file.txt +@@ -1 +1 @@ +-random ++content +""" + self.assertEqual( + expected, + presubmit_diff._process_diff(diff, "C:\\path\\to\\src", + "C:\\path\\to\\dst"), + ) + + # Trailing slashes are handled. + self.assertEqual( + expected, + presubmit_diff._process_diff(diff, "C:\\path\\to\\src\\", + "C:\\path\\to\\dst\\"), + ) + + +if __name__ == "__main__": + unittest.main()