Handle binaries in presubmit_diff

presubmit_diff always tries to decode files from Gitiles with UTF-8
but if the file is binary, it gets a UnicodeDecodeError. Don't
assume that a file is text and just write the bytes directly.

Bug: b/358175830
Change-Id: Iaa259051b4157737397933e9d994c00e9da5836c
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/tools/depot_tools/+/5771938
Reviewed-by: Scott Lee <ddoman@chromium.org>
Commit-Queue: Gavin Mak <gavinmak@google.com>
changes/38/5771938/5
Gavin Mak 1 year ago committed by LUCI CQ
parent a603aded9a
commit f52dad9747

@ -10,10 +10,10 @@ from __future__ import annotations
import argparse
import base64
import concurrent.futures
import os
import platform
import sys
import concurrent.futures
import gclient_utils
from gerrit_util import (CreateHttpConn, ReadHttpResponse,
@ -24,10 +24,10 @@ DEV_NULL = "/dev/null"
HEADER_DELIMITER = "@@"
def fetch_content(host: str, repo: str, ref: str, file: str) -> str:
def fetch_content(host: str, repo: str, ref: str, file: str) -> bytes:
"""Fetches the content of a file from Gitiles.
If the file does not exist at the commit, it returns an empty string.
If the file does not exist at the commit, returns an empty bytes object.
Args:
host: Gerrit host.
@ -36,13 +36,13 @@ def fetch_content(host: str, repo: str, ref: str, file: str) -> str:
file: Path of file to fetch.
Returns:
A string containing the content of the file at the commit, or an empty
string if the file does not exist at the commit.
Bytes of the file at the commit or an empty bytes object if the file
does not exist at the commit.
"""
conn = CreateHttpConn(f"{host}.googlesource.com",
f"{repo}/+show/{ref}/{file}?format=text")
response = ReadHttpResponse(conn, accept_statuses=[200, 404])
return base64.b64decode(response.read()).decode("utf-8")
return base64.b64decode(response.read())
def git_diff(src: str | None, dest: str | None) -> str:
@ -110,7 +110,7 @@ def _create_diff(host: str, repo: str, ref: str, root: str, file: str) -> str:
if old_content:
old_file = os.path.join(tmp_root, file)
os.makedirs(os.path.dirname(old_file), exist_ok=True)
with open(old_file, "w") as f:
with open(old_file, "wb") as f:
f.write(old_content)
if not old_file and not new_file:

@ -23,25 +23,31 @@ class PresubmitDiffTest(unittest.TestCase):
# State of the local directory.
self.root = tempfile.mkdtemp()
os.makedirs(os.path.join(self.root, "nested"))
with open(os.path.join(self.root, "unchanged.txt"), "w") as f:
f.write("unchanged\n")
with open(os.path.join(self.root, "added.txt"), "w") as f:
f.write("added\n")
with open(os.path.join(self.root, "modified.txt"), "w") as f:
f.write("modified... foo\n")
with open(os.path.join(self.root, "nested/modified.txt"), "w") as f:
f.write("goodbye\n")
# On Windows, writing "\n" in text mode becomes "\r\n". Write in binary
# so that doesn't happen, otherwise tests will fail.
with open(os.path.join(self.root, "unchanged.txt"), "wb") as f:
f.write("unchanged\n".encode("utf-8"))
with open(os.path.join(self.root, "added.txt"), "wb") as f:
f.write("added\n".encode("utf-8"))
with open(os.path.join(self.root, "modified.txt"), "wb") as f:
f.write("modified... foo\n".encode("utf-8"))
with open(os.path.join(self.root, "nested/modified.txt"), "wb") as f:
f.write("goodbye\n".encode("utf-8"))
# State of the remote repository.
fetch_data = {
"unchanged.txt": "unchanged\n",
"deleted.txt": "deleted\n",
"modified.txt": "modified... bar\n",
"nested/modified.txt": "hello\n",
"unchanged.txt": "unchanged\n".encode("utf-8"),
"deleted.txt": "deleted\n".encode("utf-8"),
"modified.txt": "modified... bar\n".encode("utf-8"),
"nested/modified.txt": "hello\n".encode("utf-8"),
# Intenionally invalid start byte for utf-8.
"deleted_binary": b"\xff\x00",
}
def fetch_side_effect(host, repo, ref, file):
return fetch_data.get(file, "")
return fetch_data.get(file, b"")
fetch_content_mock = mock.patch("presubmit_diff.fetch_content",
side_effect=fetch_side_effect)
@ -111,6 +117,17 @@ index 71779d2c..00000000
{"deleted.txt": expected_diff},
)
def test_create_diffs_with_binary_file(self):
expected_diff = """diff --git a/deleted_binary b/deleted_binary
deleted file mode 100644
index ce542efaa..00000000
Binary files a/deleted_binary and /dev/null differ
"""
self._test_create_diffs(
["deleted_binary"],
{"deleted_binary": expected_diff},
)
# pylint: disable=line-too-long
def test_create_diffs_with_modified_files(self):

Loading…
Cancel
Save