Fix git diff calls when a lot of files modified in a branch

Running a command line longer that 32k on Windows lead to issues (as
usual). We run git diff in multiple scenarios on a list of changed
files. This list is not partitioned and may lead to a command line
much longer than OS can allow.

This change replaces BuildGitDiffCmd with RunGitDiffCmd that splits
the file list internally and runs git diff multiple times if required.

Change-Id: Icfd8efc7520105b8d98125e90bb96c2bb9bd3a0a
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/tools/depot_tools/+/5613673
Reviewed-by: Bruce Dawson <brucedawson@chromium.org>
Commit-Queue: Aleksei Khoroshilov <akhoroshilov@brave.com>
Reviewed-by: Gavin Mak <gavinmak@google.com>
changes/73/5613673/10
Aleksey Khoroshilov 1 year ago committed by LUCI CQ
parent e757e08663
commit c0883c509e

@ -644,11 +644,10 @@ def _ComputeFormatDiffLineRanges(files, upstream_commit, expand=0):
return {} return {}
# Take the git diff and find the line ranges where there are changes. # Take the git diff and find the line ranges where there are changes.
diff_cmd = BuildGitDiffCmd(['-U0'], diff_output = RunGitDiffCmd(['-U0'],
upstream_commit, upstream_commit,
files, files,
allow_prefix=True) allow_prefix=True)
diff_output = RunGit(diff_cmd)
pattern = r'(?:^diff --git a/(?:.*) b/(.*))|(?:^@@.*\+(.*) @@)' pattern = r'(?:^diff --git a/(?:.*) b/(.*))|(?:^@@.*\+(.*) @@)'
# 2 capture groups # 2 capture groups
@ -6146,8 +6145,34 @@ def CMDowners(parser, args):
ignore_author=options.ignore_self).run() ignore_author=options.ignore_self).run()
def BuildGitDiffCmd(diff_type, upstream_commit, args, allow_prefix=False): def _SplitArgsByCmdLineLimit(args):
"""Generates a diff command.""" """Splits a list of arguments into shorter lists that fit within the command
line limit."""
# The maximum command line length is 32768 characters on Windows and 2097152
# characters on other platforms. Use a lower limit to be safe.
command_line_limit = 30000 if sys.platform.startswith('win32') else 2000000
batch_args = []
batch_length = 0
for arg in args:
# Add 1 to account for the space between arguments.
arg_length = len(arg) + 1
# If the current argument is too long to fit in a single command line,
# split it into smaller parts.
if batch_length + arg_length > command_line_limit and batch_args:
yield batch_args
batch_args = []
batch_length = 0
batch_args.append(arg)
batch_length += arg_length
if batch_args:
yield batch_args
def RunGitDiffCmd(diff_type, upstream_commit, files, allow_prefix=False):
"""Generates and runs diff command."""
# Generate diff for the current branch's changes. # Generate diff for the current branch's changes.
diff_cmd = ['-c', 'core.quotePath=false', 'diff', '--no-ext-diff'] diff_cmd = ['-c', 'core.quotePath=false', 'diff', '--no-ext-diff']
@ -6161,14 +6186,18 @@ def BuildGitDiffCmd(diff_type, upstream_commit, args, allow_prefix=False):
diff_cmd += diff_type diff_cmd += diff_type
diff_cmd += [upstream_commit, '--'] diff_cmd += [upstream_commit, '--']
if args: if not files:
for arg in args: return RunGit(diff_cmd)
if arg == '-' or os.path.isdir(arg) or os.path.isfile(arg):
diff_cmd.append(arg) for file in files:
else: if file != '-' and not os.path.isdir(file) and not os.path.isfile(file):
DieWithError('Argument "%s" is not a file or a directory' % arg) DieWithError('Argument "%s" is not a file or a directory' % file)
output = ''
for files_batch in _SplitArgsByCmdLineLimit(files):
output += RunGit(diff_cmd + files_batch)
return diff_cmd return output
def _RunClangFormatDiff(opts, clang_diff_files, top_dir, upstream_commit): def _RunClangFormatDiff(opts, clang_diff_files, top_dir, upstream_commit):
@ -6212,8 +6241,7 @@ def _RunClangFormatDiff(opts, clang_diff_files, top_dir, upstream_commit):
if not opts.dry_run and not opts.diff: if not opts.dry_run and not opts.diff:
cmd.append('-i') cmd.append('-i')
diff_cmd = BuildGitDiffCmd(['-U0'], upstream_commit, clang_diff_files) diff_output = RunGitDiffCmd(['-U0'], upstream_commit, clang_diff_files)
diff_output = RunGit(diff_cmd).encode('utf-8')
env = os.environ.copy() env = os.environ.copy()
env['PATH'] = (str(os.path.dirname(clang_format_tool)) + os.pathsep + env['PATH'] = (str(os.path.dirname(clang_format_tool)) + os.pathsep +
@ -6223,7 +6251,7 @@ def _RunClangFormatDiff(opts, clang_diff_files, top_dir, upstream_commit):
# to die with an error if `error_ok` is not set. # to die with an error if `error_ok` is not set.
stdout = RunCommand(cmd, stdout = RunCommand(cmd,
error_ok=True, error_ok=True,
stdin=diff_output, stdin=diff_output.encode(),
cwd=top_dir, cwd=top_dir,
env=env, env=env,
shell=sys.platform.startswith('win32')) shell=sys.platform.startswith('win32'))
@ -6276,8 +6304,7 @@ def _RunGoogleJavaFormat(opts, paths, top_dir, upstream_commit):
# If --diff is passed, google-java-format will output formatted content. # If --diff is passed, google-java-format will output formatted content.
# Diff it with the existing file in the checkout and output the result. # Diff it with the existing file in the checkout and output the result.
if opts.diff: if opts.diff:
diff_cmd = BuildGitDiffCmd(['-U3'], '--no-index', [path, '-']) stdout = RunGitDiffCmd(['-U3'], '--no-index', [path, '-'])
stdout = RunGit(diff_cmd, stdin=stdout.encode(), **kwds)
return stdout return stdout
results = [] results = []
@ -6602,11 +6629,11 @@ def CMDformat(parser, args):
action='store_true', action='store_true',
help='Disable auto-formatting of .java') help='Disable auto-formatting of .java')
opts, args = parser.parse_args(args) opts, files = parser.parse_args(args)
# Normalize any remaining args against the current path, so paths relative # Normalize files against the current path, so paths relative to the
# to the current directory are still resolved as expected. # current directory are still resolved as expected.
args = [os.path.join(os.getcwd(), arg) for arg in args] files = [os.path.join(os.getcwd(), file) for file in files]
# git diff generates paths against the root of the repository. Change # git diff generates paths against the root of the repository. Change
# to that directory so clang-format can find files even within subdirs. # to that directory so clang-format can find files even within subdirs.
@ -6632,9 +6659,8 @@ def CMDformat(parser, args):
'Are you in detached state?') 'Are you in detached state?')
# Filter out copied/renamed/deleted files # Filter out copied/renamed/deleted files
changed_files_cmd = BuildGitDiffCmd(['--name-only', '--diff-filter=crd'], diff_output = RunGitDiffCmd(['--name-only', '--diff-filter=crd'],
upstream_commit, args) upstream_commit, files)
diff_output = RunGit(changed_files_cmd)
diff_files = diff_output.splitlines() diff_files = diff_output.splitlines()
if opts.js: if opts.js:

Loading…
Cancel
Save