util: Add clang-format helper script

pull/5346/head
Roland Fischer 5 years ago committed by Victor Julien
parent de7c7eeff0
commit 69ed6f253f

@ -0,0 +1,642 @@
#!/bin/bash
#
# Script to clang-format suricata C code changes
#
# Rewriting branch parts of it is inspired by
# https://www.thetopsites.net/article/53885283.shtml
#set -x
# We verify the minimal clang-format version for better error messaging as older clang-format
# will barf on unknown settings with a generic error.
CLANG_FORMAT_REQUIRED_VERSION=9
EXIT_CODE_ERROR=2
EXIT_CODE_FORMATTING_REQUIRED=1
EXIT_CODE_OK=0
PRINT_DEBUG=0
# Debug output if PRINT_DEBUG is 1
function Debug {
if [ $PRINT_DEBUG -ne 0 ]; then
echo "DEBUG: $@"
fi
}
# ignore text formatting by default
bold=
normal=
italic=
# $TERM is set to dumb when calling scripts in github actions.
if [ -n "$TERM" -a "$TERM" != "dumb" ]; then
Debug "TERM: '$TERM'"
# tput, albeit unlikely, might not be installed
command -v tput >/dev/null 2>&1 # built-in which
if [ $? -eq 0 ]; then
Debug "Setting text formatting"
bold=$(tput bold)
normal=$(tput sgr0)
italic=$(echo -e '\E[3m')
fi
else
Debug "No text formatting"
fi
EXEC=$(basename $0)
pushd . >/dev/null # we might change dir - save so that we can revert
USAGE=$(cat << EOM
usage: $EXEC --help
$EXEC help <command>
$EXEC <command> [<args>]
Format selected changes using clang-format.
Note: This does ONLY format the changed code, not the whole file! It
uses ${italic}git-clang-format${normal} for the actual formatting. If you want to format
whole files, use ${italic}clang-format -i <file>${normal}.
It auto-detects the correct clang-format version and compared to ${italic}git-clang-format${normal}
proper it provides additional functionality such as reformatting of all commits on a branch.
Commands used in various situations:
Formatting branch changes (compared to master):
branch Format all changes in branch as additional commit
rewrite-branch Format every commit in branch and rewrite history
Formatting single changes:
cached Format changes in git staging
commit Format changes in most recent commit
Checking if formatting is correct:
check-branch Checks if formatting for branch changes is correct
More info an a command:
help Display more info for a particular <command>
EOM
)
HELP_BRANCH=$(cat << EOM
${bold}NAME${normal}
$EXEC branch - Format all changes in branch as additional commit
${bold}SYNOPSIS${normal}
$EXEC branch [--force]
${bold}DESCRIPTION${normal}
Format all changes in your branch enabling you to add it as an additional
formatting commit. It automatically detects all commits on your branch.
Requires that all changes are committed unless --force is provided.
You will need to commit the reformatted code.
This is equivalent to calling:
$ git clang-format --extensions c,h [--force] first_commit_on_current_branch^
${bold}OPTIONS${normal}
-f, --force
Allow changes to unstaged files.
${bold}EXAMPLES${normal}
On your branch whose changes you want to reformat:
$ $EXEC branch
${bold}EXIT STATUS${normal}
$EXEC exits with a status of zero if the changes were successfully
formatted, or if no formatting change was required. A status of two will
be returned if any errors were encountered.
EOM
)
HELP_CACHED=$(cat << EOM
${bold}NAME${normal}
$EXEC cached - Format changes in git staging
${bold}SYNOPSIS${normal}
$EXEC cached [--force]
${bold}DESCRIPTION${normal}
Format staged changes using clang-format.
You will need to commit the reformatted code.
This is equivalent to calling:
$ git clang-format --extensions c,h [--force]
${bold}OPTIONS${normal}
-f, --force
Allow changes to unstaged files.
${bold}EXAMPLES${normal}
Format all changes in staging, i.e. in files added with ${italic}git add <file>${normal}.
$ $EXEC cached
${bold}EXIT STATUS${normal}
$EXEC exits with a status of zero if the changes were successfully
formatted, or if no formatting change was required. A status of two will
be returned if any errors were encountered.
EOM
)
HELP_CHECK_BRANCH=$(cat << EOM
${bold}NAME${normal}
$EXEC check-branch - Checks if formatting for branch changes is correct
${bold}SYNOPSIS${normal}
$EXEC check-branch [--show-commits] [--quiet]
$EXEC check-branch --diff [--show-commits] [--quiet]
$EXEC check-branch --diffstat [--show-commits] [--quiet]
${bold}DESCRIPTION${normal}
Check if all branch changes are correctly formatted.
Note, it does not check every commit's formatting, but rather the
overall diff between HEAD and master.
Returns 1 if formatting is off, 0 if it is correct.
${bold}OPTIONS${normal}
-d, --diff
Print formatting diff, i.e. diff of each file with correct formatting.
-s, --diffstat
Print formatting diffstat output, i.e. files with wrong formatting.
-c, --show-commits
Print branch commits.
-q, --quiet
Do not print any error if formatting is off, only set exit code.
${bold}EXIT STATUS${normal}
$EXEC exits with a status of zero if the formatting is correct. A
status of one will be returned if the formatting is not correct. A status
of two will be returned if any errors were encountered.
EOM
)
HELP_COMMIT=$(cat << EOM
${bold}NAME${normal}
$EXEC commit - Format changes in most recent commit
${bold}SYNOPSIS${normal}
$EXEC commit
${bold}DESCRIPTION${normal}
Format changes in most recent commit using clang-format.
You will need to commit the reformatted code.
This is equivalent to calling:
$ git clang-format --extensions c,h HEAD^
${bold}EXAMPLES${normal}
Format all changes in most recent commit:
$ $EXEC commit
Note that this modifies the files, but doesnt commit them youll likely want to run
$ git commit --amend -a
${bold}EXIT STATUS${normal}
$EXEC exits with a status of zero if the changes were successfully
formatted, or if no formatting change was required. A status of two will
be returned if any errors were encountered.
EOM
)
HELP_REWRITE_BRANCH=$(cat << EOM
${bold}NAME${normal}
$EXEC rewrite-branch - Format every commit in branch and rewrite history
${bold}SYNOPSIS${normal}
$EXEC rewrite-branch
${bold}DESCRIPTION${normal}
Reformat all commits in branch off master one-by-one. This will ${bold}rewrite
the branch history${normal} using the existing commit metadata!
It automatically detects all commits on your branch.
This is handy in case you want to format all of your branch commits
while keeping the commits.
This can also be helpful if you have multiple commits in your branch and
the changed files have been reformatted, i.e. where a git rebase would
fail in many ways over-and-over again.
You can achieve the same manually on a separate branch by:
${italic}git checkout -n <original_commit>${normal},
${italic}git clang-format${normal} and ${italic}git commit${normal} for each original commit in your branch.
${bold}OPTIONS${normal}
None
${bold}EXAMPLES${normal}
In your branch that you want to reformat. Commit all your changes prior
to calling:
$ $EXEC rewrite-branch
${bold}EXIT STATUS${normal}
$EXEC exits with a status of zero if the changes were successfully
formatted, or if no formatting change was required. A status of two will
be returned if any errors were encountered.
EOM
)
# Error message on stderr
function Error {
echo "${bold}ERROR${normal}: $@" 1>&2
}
# Exit program (and reset path)
function ExitWith {
popd >/dev/null # we might have changed dir
if [ $# -ne 1 ]; then
# Huh? No exit value provided?
Error "Internal: ExitWith requires parameter"
exit $EXIT_CODE_ERROR
else
exit $1
fi
}
# Failure exit with error message
function Die {
Error $@
ExitWith $EXIT_CODE_ERROR
}
# Ensure required program exists. Exits with failure if not found.
# Call with
# RequireProgram ENVVAR_TO_SET program ...
# One can provide multiple alternative programs. Returns first program found in
# provided list.
function RequireProgram {
if [ $# -lt 2 ]; then
Die "Internal - RequireProgram: Need env and program parameters"
fi
# eat variable to set
local envvar=$1
shift
for program in $@; do
command -v $program >/dev/null 2>&1 # built-in which
if [ $? -eq 0 ]; then
eval "$envvar=$(command -v $program)"
return
fi
done
if [ $# -eq 1 ]; then
Die "$1 not found"
else
Die "None of $@ found"
fi
}
# Make sure we are running from the top-level git directory.
# Same approach as for setup-decoder.sh. Good enough.
# We could probably use git rev-parse --show-toplevel to do so, as long as we
# handle the libhtp subfolder correctly.
function SetTopLevelDir {
if [ -e ./src/suricata.c ]; then
# Do nothing.
true
elif [ -e ./suricata.c -o -e ../src/suricata.c ]; then
cd ..
else
Die "This does not appear to be a suricata source directory."
fi
}
# print help for given command
function HelpCommand {
local help_command=$1
local HELP_COMMAND=$(echo "HELP_$help_command" | sed "s/-/_/g" | tr [:lower:] [:upper:])
case $help_command in
branch|cached|check-branch|commit|rewrite-branch)
echo "${!HELP_COMMAND}";
;;
"")
echo "$USAGE";
;;
*)
echo "$USAGE";
echo "";
Die "No manual entry for $help_command"
;;
esac
}
# Return first commit of branch (off master).
#
# Use $first_commit^ if you need the commit on master we branched off.
# Do not compare with master directly as it will diff with the latest commit
# on master. If our branch has not been rebased on the latest master, this
# would result in including all new commits on master!
function FirstCommitOfBranch {
local first_commit=$(git rev-list origin/master..HEAD | tail -n 1)
echo $first_commit
}
# Check if branch formatting is correct.
# Compares with master branch as baseline which means it's limited to branches
# other than master.
# Exits with 1 if not, 0 if ok.
function CheckBranch {
# check parameters
local quiet=0
local show_diff=0
local show_diffstat=0
local show_commits=0
local git_clang_format="$GIT_CLANG_FORMAT --diff"
while [[ $# -gt 0 ]]
do
case "$1" in
-q|--quiet)
quiet=1
shift
;;
-d|--diff)
show_diff=1
shift
;;
-s|--diffstat)
show_diffstat=1
git_clang_format="$GIT_CLANG_FORMAT_DIFFSTAT --diffstat"
shift
;;
-c|--show-commits)
show_commits=1
shift
;;
*) # unknown option
echo "$HELP_CHECK_BRANCH";
echo "";
Die "Unknown $command option: $1"
;;
esac
done
if [ $show_diffstat -eq 1 -a $show_diff -eq 1 ]; then
echo "$HELP_CHECK_BRANCH";
echo "";
Die "Cannot combine $command options --diffstat with --diff"
fi
# Find first commit on branch. Use $first_commit^ if you need the
# commit on master we branched off.
local first_commit=$(FirstCommitOfBranch)
# git-clang-format is a python script that does not like SIGPIPE shut down
# by "| head" prematurely. Use work-around with writing to tmpfile first.
local format_changes="$git_clang_format --extensions c,h $first_commit^"
local tmpfile=$(mktemp /tmp/clang-format.check.XXXXXX)
$format_changes > $tmpfile
local changes=$(cat $tmpfile | head -1)
if [ $show_diff -eq 1 -o $show_diffstat -eq 1 ]; then
cat $tmpfile
echo ""
fi
rm $tmpfile
# Branch commits can help with trouble shooting. Print after diff/diffstat
# as output might be tail'd
if [ $show_commits -eq 1 ]; then
echo "Commits on branch (new -> old):"
git log --oneline $first_commit^..HEAD
echo ""
else
if [ $quiet -ne 1 ]; then
echo "First commit on branch: $first_commit"
fi
fi
# Exit code of git-clang-format is useless as it's 0 no matter if files
# changed or not. Check actual output. Not ideal, but works.
if [ "${changes}" != "no modified files to format" -a \
"${changes}" != "clang-format did not modify any files" ]; then
if [ $quiet -ne 1 ]; then
Error "Branch requires formatting"
Debug "View required changes with clang-format: ${italic}$format_changes${normal}"
Error "View required changes with: ${italic}$EXEC $command --diff${normal}"
Error "Use ${italic}$EXEC rewrite-branch${normal} or ${italic}$EXEC branch${normal} to fix formatting"
ExitWith $EXIT_CODE_FORMATTING_REQUIRED
else
return $EXIT_CODE_FORMATTING_REQUIRED
fi
else
if [ $quiet -ne 1 ]; then
echo "no modified files to format"
fi
return $EXIT_CODE_OK
fi
}
# Reformat all changes in branch as a separate commit.
function ReformatBranch {
# check parameters
local with_unstaged=
if [ $# -gt 1 ]; then
echo "$HELP_BRANCH";
echo "";
Die "Too many $command options: $1"
elif [ $# -eq 1 ]; then
if [ "$1" == "--force" -o "$1" == "-f" ]; then
with_unstaged='--force'
else
echo "$HELP_BRANCH";
echo "";
Die "Unknown $command option: $1"
fi
fi
# Find first commit on branch. Use $first_commit^ if you need the
# commit on master we branched off.
local first_commit=$(FirstCommitOfBranch)
echo "First commit on branch: $first_commit"
$GIT_CLANG_FORMAT --style file --extensions c,h $with_unstaged $first_commit^
if [ $? -ne 0 ]; then
Die "Cannot reformat branch. git clang-format failed"
fi
}
# Reformat changes in commit
function ReformatCommit {
# check parameters
local commit=HEAD^ # only most recent for now
if [ $# -gt 0 ]; then
echo "$HELP_MOST_RECENT";
echo "";
Die "Too many $command options: $1"
fi
$GIT_CLANG_FORMAT --style file --extensions c,h $commit
if [ $? -ne 0 ]; then
Die "Cannot reformat most recent commit. git clang-format failed"
fi
}
# Reformat currently staged changes
function ReformatCached {
# check parameters
local with_unstaged=
if [ $# -gt 1 ]; then
echo "$HELP_CACHED";
echo "";
Die "Too many $command options: $1"
elif [ $# -eq 1 ]; then
if [ "$1" == "--force" -o "$1" == "-f" ]; then
with_unstaged='--force'
else
echo "$HELP_CACHED";
echo "";
Die "Unknown $command option: $1"
fi
fi
$GIT_CLANG_FORMAT --style file --extensions c,h $with_unstaged
if [ $? -ne 0 ]; then
Die "Cannot reformat staging. git clang-format failed"
fi
}
# Reformat all commits of a branch (compared with master) and rewrites
# the history with the formatted commits one-by-one.
# This is helpful for quickly reformatting branches with multiple commits,
# or where the master version of a file has been reformatted.
#
# You can achieve the same manually by git checkout -n <commit>, git clang-format
# for each commit in your branch.
function ReformatCommitsOnBranch {
# Do not allow rewriting of master.
# CheckBranch below will also tell us there are no changes compared with
# master, but let's make this foolproof and explicit here.
local current_branch=$(git rev-parse --abbrev-ref HEAD)
if [ "$current_branch" == "master" ]; then
Die "Must not rewrite master branch history."
fi
CheckBranch "--quiet"
if [ $? -eq 0 ]; then
echo "no modified files to format"
else
# Only rewrite if there are changes
# Squelch warning. Our usage of git filter-branch is limited and should be ok.
# Should investigate using git-filter-repo in the future instead.
export FILTER_BRANCH_SQUELCH_WARNING=1
# Find first commit on branch. Use $first_commit^ if you need the
# commit on master we branched off.
local first_commit=$(FirstCommitOfBranch)
echo "First commit on branch: $first_commit"
# Use --force in case it's run a second time on the same branch
git filter-branch --force --tree-filter "$GIT_CLANG_FORMAT $first_commit^" -- $first_commit..HEAD
if [ $? -ne 0 ]; then
Die "Cannot rewrite branch. git filter-branch failed"
fi
fi
}
if [ $# -eq 0 ]; then
echo "$USAGE";
Die "Missing arguments. Call with one argument"
fi
SetTopLevelDir
RequireProgram GIT git
# ubuntu uses clang-format-{version} name for newer versions. fedora not.
RequireProgram GIT_CLANG_FORMAT git-clang-format-11 git-clang-format-10 git-clang-format-9 git-clang-format
GIT_CLANG_FORMAT_BINARY=clang-format
if [[ $GIT_CLANG_FORMAT =~ .*git-clang-format-11$ ]]; then
# default binary is clang-format, specify the correct version.
# Alternative: git config clangformat.binary "clang-format-11"
GIT_CLANG_FORMAT_BINARY="clang-format-11"
elif [[ $GIT_CLANG_FORMAT =~ .*git-clang-format-10$ ]]; then
# default binary is clang-format, specify the correct version.
# Alternative: git config clangformat.binary "clang-format-10"
GIT_CLANG_FORMAT_BINARY="clang-format-10"
elif [[ $GIT_CLANG_FORMAT =~ .*git-clang-format-9$ ]]; then
# default binary is clang-format, specify the correct version.
# Alternative: git config clangformat.binary "clang-format-9"
GIT_CLANG_FORMAT_BINARY="clang-format-9"
elif [[ $GIT_CLANG_FORMAT =~ .*git-clang-format$ ]]; then
Debug "Using regular clang-format"
else
Debug "Internal: unhandled clang-format version"
fi
# enforce minimal clang-format version as required by .clang-format
clang_format_version=$($GIT_CLANG_FORMAT_BINARY --version | sed 's/.*clang-format version \([0-9]*\.[0-9]*\.[0-9]*\).*/\1/')
Debug "Found clang-format version: $clang_format_version"
clang_format_version_major=$(echo $clang_format_version | sed 's/\([0-9]*\)\.\([0-9]*\)\.\([0-9]*\).*/\1/')
Debug "clang-format version major: $clang_format_version_major"
if [ $((clang_format_version_major + 0)) -lt $((CLANG_FORMAT_REQUIRED_VERSION + 0)) ]; then
Die "Require clang version $CLANG_FORMAT_REQUIRED_VERSION, found $clang_format_version_major ($clang_format_version)."
fi
# overwite git-clang-version for --diffstat as upstream does not have that yet
RequireProgram GIT_CLANG_FORMAT_DIFFSTAT scripts/git-clang-format-custom
if [ "$GIT_CLANG_FORMAT_BINARY" != "clang-format" ]; then
GIT_CLANG_FORMAT="$GIT_CLANG_FORMAT --binary $GIT_CLANG_FORMAT_BINARY"
GIT_CLANG_FORMAT_DIFFSTAT="$GIT_CLANG_FORMAT_DIFFSTAT --binary $GIT_CLANG_FORMAT_BINARY"
fi
Debug "Using $GIT_CLANG_FORMAT"
Debug "Using $GIT_CLANG_FORMAT_DIFFSTAT"
command_rc=0
command=$1
case $command in
branch)
shift;
ReformatBranch "$@";
;;
check-branch)
shift;
CheckBranch "$@";
command_rc=$?;
;;
cached)
shift;
ReformatCached "$@";
;;
commit)
shift;
ReformatCommit "$@";
;;
rewrite-branch)
ReformatCommitsOnBranch
;;
help)
shift;
HelpCommand $1;
;;
-h|--help)
echo "$USAGE";
;;
*)
Die "$EXEC: '$command' is not a command. See '$EXEC --help'"
;;
esac
ExitWith $command_rc

@ -0,0 +1,600 @@
#!/usr/bin/env python
# Copy of https://github.com/llvm-mirror/clang/blob/master/tools/clang-format/git-clang-format
# Adds a --diffstat option to show the files needing formatting.
# This change will be upstreamed, but the current git-clang-format does not
# have it yet. We use it in the internal scripts/clang-format.sh
#
#===- git-clang-format - ClangFormat Git Integration ---------*- python -*--===#
#
# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
# See https://llvm.org/LICENSE.txt for license information.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
#
#===------------------------------------------------------------------------===#
r"""
clang-format git integration
============================
This file provides a clang-format integration for git. Put it somewhere in your
path and ensure that it is executable. Then, "git clang-format" will invoke
clang-format on the changes in current files or a specific commit.
For further details, run:
git clang-format -h
Requires Python 2.7 or Python 3
"""
from __future__ import absolute_import, division, print_function
import argparse
import collections
import contextlib
import errno
import os
import re
import subprocess
import sys
usage = 'git clang-format [OPTIONS] [<commit>] [<commit>] [--] [<file>...]'
desc = '''
If zero or one commits are given, run clang-format on all lines that differ
between the working directory and <commit>, which defaults to HEAD. Changes are
only applied to the working directory.
If two commits are given (requires --diff), run clang-format on all lines in the
second <commit> that differ from the first <commit>.
The following git-config settings set the default of the corresponding option:
clangFormat.binary
clangFormat.commit
clangFormat.extensions
clangFormat.style
'''
# Name of the temporary index file in which save the output of clang-format.
# This file is created within the .git directory.
temp_index_basename = 'clang-format-index'
Range = collections.namedtuple('Range', 'start, count')
def main():
config = load_git_config()
# In order to keep '--' yet allow options after positionals, we need to
# check for '--' ourselves. (Setting nargs='*' throws away the '--', while
# nargs=argparse.REMAINDER disallows options after positionals.)
argv = sys.argv[1:]
try:
idx = argv.index('--')
except ValueError:
dash_dash = []
else:
dash_dash = argv[idx:]
argv = argv[:idx]
default_extensions = ','.join([
# From clang/lib/Frontend/FrontendOptions.cpp, all lower case
'c', 'h', # C
'm', # ObjC
'mm', # ObjC++
'cc', 'cp', 'cpp', 'c++', 'cxx', 'hh', 'hpp', 'hxx', # C++
'cu', # CUDA
# Other languages that clang-format supports
'proto', 'protodevel', # Protocol Buffers
'java', # Java
'js', # JavaScript
'ts', # TypeScript
'cs', # C Sharp
])
p = argparse.ArgumentParser(
usage=usage, formatter_class=argparse.RawDescriptionHelpFormatter,
description=desc)
p.add_argument('--binary',
default=config.get('clangformat.binary', 'clang-format'),
help='path to clang-format'),
p.add_argument('--commit',
default=config.get('clangformat.commit', 'HEAD'),
help='default commit to use if none is specified'),
p.add_argument('--diff', action='store_true',
help='print a diff instead of applying the changes')
p.add_argument('--diffstat', action='store_true',
help='print diffstat instead of applying the changes')
p.add_argument('--extensions',
default=config.get('clangformat.extensions',
default_extensions),
help=('comma-separated list of file extensions to format, '
'excluding the period and case-insensitive')),
p.add_argument('-f', '--force', action='store_true',
help='allow changes to unstaged files')
p.add_argument('-p', '--patch', action='store_true',
help='select hunks interactively')
p.add_argument('-q', '--quiet', action='count', default=0,
help='print less information')
p.add_argument('--style',
default=config.get('clangformat.style', None),
help='passed to clang-format'),
p.add_argument('-v', '--verbose', action='count', default=0,
help='print extra information')
# We gather all the remaining positional arguments into 'args' since we need
# to use some heuristics to determine whether or not <commit> was present.
# However, to print pretty messages, we make use of metavar and help.
p.add_argument('args', nargs='*', metavar='<commit>',
help='revision from which to compute the diff')
p.add_argument('ignored', nargs='*', metavar='<file>...',
help='if specified, only consider differences in these files')
opts = p.parse_args(argv)
opts.verbose -= opts.quiet
del opts.quiet
commits, files = interpret_args(opts.args, dash_dash, opts.commit)
if len(commits) > 1:
if not opts.diff:
die('--diff is required when two commits are given')
else:
if len(commits) > 2:
die('at most two commits allowed; %d given' % len(commits))
changed_lines = compute_diff_and_extract_lines(commits, files)
if opts.verbose >= 1:
ignored_files = set(changed_lines)
filter_by_extension(changed_lines, opts.extensions.lower().split(','))
if opts.verbose >= 1:
ignored_files.difference_update(changed_lines)
if ignored_files:
print('Ignoring changes in the following files (wrong extension):')
for filename in ignored_files:
print(' %s' % filename)
if changed_lines:
print('Running clang-format on the following files:')
for filename in changed_lines:
print(' %s' % filename)
if not changed_lines:
print('no modified files to format')
return
# The computed diff outputs absolute paths, so we must cd before accessing
# those files.
cd_to_toplevel()
if len(commits) > 1:
old_tree = commits[1]
new_tree = run_clang_format_and_save_to_tree(changed_lines,
revision=commits[1],
binary=opts.binary,
style=opts.style)
else:
old_tree = create_tree_from_workdir(changed_lines)
new_tree = run_clang_format_and_save_to_tree(changed_lines,
binary=opts.binary,
style=opts.style)
if opts.verbose >= 1:
print('old tree: %s' % old_tree)
print('new tree: %s' % new_tree)
if old_tree == new_tree:
if opts.verbose >= 0:
print('clang-format did not modify any files')
elif opts.diff:
print_diff(old_tree, new_tree)
elif opts.diffstat:
print_diffstat(old_tree, new_tree)
else:
changed_files = apply_changes(old_tree, new_tree, force=opts.force,
patch_mode=opts.patch)
if (opts.verbose >= 0 and not opts.patch) or opts.verbose >= 1:
print('changed files:')
for filename in changed_files:
print(' %s' % filename)
def load_git_config(non_string_options=None):
"""Return the git configuration as a dictionary.
All options are assumed to be strings unless in `non_string_options`, in which
is a dictionary mapping option name (in lower case) to either "--bool" or
"--int"."""
if non_string_options is None:
non_string_options = {}
out = {}
for entry in run('git', 'config', '--list', '--null').split('\0'):
if entry:
name, value = entry.split('\n', 1)
if name in non_string_options:
value = run('git', 'config', non_string_options[name], name)
out[name] = value
return out
def interpret_args(args, dash_dash, default_commit):
"""Interpret `args` as "[commits] [--] [files]" and return (commits, files).
It is assumed that "--" and everything that follows has been removed from
args and placed in `dash_dash`.
If "--" is present (i.e., `dash_dash` is non-empty), the arguments to its
left (if present) are taken as commits. Otherwise, the arguments are checked
from left to right if they are commits or files. If commits are not given,
a list with `default_commit` is used."""
if dash_dash:
if len(args) == 0:
commits = [default_commit]
else:
commits = args
for commit in commits:
object_type = get_object_type(commit)
if object_type not in ('commit', 'tag'):
if object_type is None:
die("'%s' is not a commit" % commit)
else:
die("'%s' is a %s, but a commit was expected" % (commit, object_type))
files = dash_dash[1:]
elif args:
commits = []
while args:
if not disambiguate_revision(args[0]):
break
commits.append(args.pop(0))
if not commits:
commits = [default_commit]
files = args
else:
commits = [default_commit]
files = []
return commits, files
def disambiguate_revision(value):
"""Returns True if `value` is a revision, False if it is a file, or dies."""
# If `value` is ambiguous (neither a commit nor a file), the following
# command will die with an appropriate error message.
run('git', 'rev-parse', value, verbose=False)
object_type = get_object_type(value)
if object_type is None:
return False
if object_type in ('commit', 'tag'):
return True
die('`%s` is a %s, but a commit or filename was expected' %
(value, object_type))
def get_object_type(value):
"""Returns a string description of an object's type, or None if it is not
a valid git object."""
cmd = ['git', 'cat-file', '-t', value]
p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
stdout, stderr = p.communicate()
if p.returncode != 0:
return None
return convert_string(stdout.strip())
def compute_diff_and_extract_lines(commits, files):
"""Calls compute_diff() followed by extract_lines()."""
diff_process = compute_diff(commits, files)
changed_lines = extract_lines(diff_process.stdout)
diff_process.stdout.close()
diff_process.wait()
if diff_process.returncode != 0:
# Assume error was already printed to stderr.
sys.exit(2)
return changed_lines
def compute_diff(commits, files):
"""Return a subprocess object producing the diff from `commits`.
The return value's `stdin` file object will produce a patch with the
differences between the working directory and the first commit if a single
one was specified, or the difference between both specified commits, filtered
on `files` (if non-empty). Zero context lines are used in the patch."""
git_tool = 'diff-index'
if len(commits) > 1:
git_tool = 'diff-tree'
cmd = ['git', git_tool, '-p', '-U0'] + commits + ['--']
cmd.extend(files)
p = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
p.stdin.close()
return p
def extract_lines(patch_file):
"""Extract the changed lines in `patch_file`.
The return value is a dictionary mapping filename to a list of (start_line,
line_count) pairs.
The input must have been produced with ``-U0``, meaning unidiff format with
zero lines of context. The return value is a dict mapping filename to a
list of line `Range`s."""
matches = {}
for line in patch_file:
line = convert_string(line)
match = re.search(r'^\+\+\+\ [^/]+/(.*)', line)
if match:
filename = match.group(1).rstrip('\r\n')
match = re.search(r'^@@ -[0-9,]+ \+(\d+)(,(\d+))?', line)
if match:
start_line = int(match.group(1))
line_count = 1
if match.group(3):
line_count = int(match.group(3))
if line_count > 0:
matches.setdefault(filename, []).append(Range(start_line, line_count))
return matches
def filter_by_extension(dictionary, allowed_extensions):
"""Delete every key in `dictionary` that doesn't have an allowed extension.
`allowed_extensions` must be a collection of lowercase file extensions,
excluding the period."""
allowed_extensions = frozenset(allowed_extensions)
for filename in list(dictionary.keys()):
base_ext = filename.rsplit('.', 1)
if len(base_ext) == 1 and '' in allowed_extensions:
continue
if len(base_ext) == 1 or base_ext[1].lower() not in allowed_extensions:
del dictionary[filename]
def cd_to_toplevel():
"""Change to the top level of the git repository."""
toplevel = run('git', 'rev-parse', '--show-toplevel')
os.chdir(toplevel)
def create_tree_from_workdir(filenames):
"""Create a new git tree with the given files from the working directory.
Returns the object ID (SHA-1) of the created tree."""
return create_tree(filenames, '--stdin')
def run_clang_format_and_save_to_tree(changed_lines, revision=None,
binary='clang-format', style=None):
"""Run clang-format on each file and save the result to a git tree.
Returns the object ID (SHA-1) of the created tree."""
def iteritems(container):
try:
return container.iteritems() # Python 2
except AttributeError:
return container.items() # Python 3
def index_info_generator():
for filename, line_ranges in iteritems(changed_lines):
if revision:
git_metadata_cmd = ['git', 'ls-tree',
'%s:%s' % (revision, os.path.dirname(filename)),
os.path.basename(filename)]
git_metadata = subprocess.Popen(git_metadata_cmd, stdin=subprocess.PIPE,
stdout=subprocess.PIPE)
stdout = git_metadata.communicate()[0]
mode = oct(int(stdout.split()[0], 8))
else:
mode = oct(os.stat(filename).st_mode)
# Adjust python3 octal format so that it matches what git expects
if mode.startswith('0o'):
mode = '0' + mode[2:]
blob_id = clang_format_to_blob(filename, line_ranges,
revision=revision,
binary=binary,
style=style)
yield '%s %s\t%s' % (mode, blob_id, filename)
return create_tree(index_info_generator(), '--index-info')
def create_tree(input_lines, mode):
"""Create a tree object from the given input.
If mode is '--stdin', it must be a list of filenames. If mode is
'--index-info' is must be a list of values suitable for "git update-index
--index-info", such as "<mode> <SP> <sha1> <TAB> <filename>". Any other mode
is invalid."""
assert mode in ('--stdin', '--index-info')
cmd = ['git', 'update-index', '--add', '-z', mode]
with temporary_index_file():
p = subprocess.Popen(cmd, stdin=subprocess.PIPE)
for line in input_lines:
p.stdin.write(to_bytes('%s\0' % line))
p.stdin.close()
if p.wait() != 0:
die('`%s` failed' % ' '.join(cmd))
tree_id = run('git', 'write-tree')
return tree_id
def clang_format_to_blob(filename, line_ranges, revision=None,
binary='clang-format', style=None):
"""Run clang-format on the given file and save the result to a git blob.
Runs on the file in `revision` if not None, or on the file in the working
directory if `revision` is None.
Returns the object ID (SHA-1) of the created blob."""
clang_format_cmd = [binary]
if style:
clang_format_cmd.extend(['-style='+style])
clang_format_cmd.extend([
'-lines=%s:%s' % (start_line, start_line+line_count-1)
for start_line, line_count in line_ranges])
if revision:
clang_format_cmd.extend(['-assume-filename='+filename])
git_show_cmd = ['git', 'cat-file', 'blob', '%s:%s' % (revision, filename)]
git_show = subprocess.Popen(git_show_cmd, stdin=subprocess.PIPE,
stdout=subprocess.PIPE)
git_show.stdin.close()
clang_format_stdin = git_show.stdout
else:
clang_format_cmd.extend([filename])
git_show = None
clang_format_stdin = subprocess.PIPE
try:
clang_format = subprocess.Popen(clang_format_cmd, stdin=clang_format_stdin,
stdout=subprocess.PIPE)
if clang_format_stdin == subprocess.PIPE:
clang_format_stdin = clang_format.stdin
except OSError as e:
if e.errno == errno.ENOENT:
die('cannot find executable "%s"' % binary)
else:
raise
clang_format_stdin.close()
hash_object_cmd = ['git', 'hash-object', '-w', '--path='+filename, '--stdin']
hash_object = subprocess.Popen(hash_object_cmd, stdin=clang_format.stdout,
stdout=subprocess.PIPE)
clang_format.stdout.close()
stdout = hash_object.communicate()[0]
if hash_object.returncode != 0:
die('`%s` failed' % ' '.join(hash_object_cmd))
if clang_format.wait() != 0:
die('`%s` failed' % ' '.join(clang_format_cmd))
if git_show and git_show.wait() != 0:
die('`%s` failed' % ' '.join(git_show_cmd))
return convert_string(stdout).rstrip('\r\n')
@contextlib.contextmanager
def temporary_index_file(tree=None):
"""Context manager for setting GIT_INDEX_FILE to a temporary file and deleting
the file afterward."""
index_path = create_temporary_index(tree)
old_index_path = os.environ.get('GIT_INDEX_FILE')
os.environ['GIT_INDEX_FILE'] = index_path
try:
yield
finally:
if old_index_path is None:
del os.environ['GIT_INDEX_FILE']
else:
os.environ['GIT_INDEX_FILE'] = old_index_path
os.remove(index_path)
def create_temporary_index(tree=None):
"""Create a temporary index file and return the created file's path.
If `tree` is not None, use that as the tree to read in. Otherwise, an
empty index is created."""
gitdir = run('git', 'rev-parse', '--git-dir')
path = os.path.join(gitdir, temp_index_basename)
if tree is None:
tree = '--empty'
run('git', 'read-tree', '--index-output='+path, tree)
return path
def print_diff(old_tree, new_tree):
"""Print the diff between the two trees to stdout."""
# We use the porcelain 'diff' and not plumbing 'diff-tree' because the output
# is expected to be viewed by the user, and only the former does nice things
# like color and pagination.
#
# We also only print modified files since `new_tree` only contains the files
# that were modified, so unmodified files would show as deleted without the
# filter.
subprocess.check_call(['git', 'diff', '--diff-filter=M', old_tree, new_tree,
'--'])
def print_diffstat(old_tree, new_tree):
"""Print the diffstat between the two trees to stdout."""
# We use the porcelain 'diff' and not plumbing 'diff-tree' because the output
# is expected to be viewed by the user, and only the former does nice things
# like color and pagination.
#
# We also only print modified files since `new_tree` only contains the files
# that were modified, so unmodified files would show as deleted without the
# filter.
subprocess.check_call(['git', 'diff', '--diff-filter=M', '--stat', old_tree, new_tree,
'--'])
def apply_changes(old_tree, new_tree, force=False, patch_mode=False):
"""Apply the changes in `new_tree` to the working directory.
Bails if there are local changes in those files and not `force`. If
`patch_mode`, runs `git checkout --patch` to select hunks interactively."""
changed_files = run('git', 'diff-tree', '--diff-filter=M', '-r', '-z',
'--name-only', old_tree,
new_tree).rstrip('\0').split('\0')
if not force:
unstaged_files = run('git', 'diff-files', '--name-status', *changed_files)
if unstaged_files:
print('The following files would be modified but '
'have unstaged changes:', file=sys.stderr)
print(unstaged_files, file=sys.stderr)
print('Please commit, stage, or stash them first.', file=sys.stderr)
sys.exit(2)
if patch_mode:
# In patch mode, we could just as well create an index from the new tree
# and checkout from that, but then the user will be presented with a
# message saying "Discard ... from worktree". Instead, we use the old
# tree as the index and checkout from new_tree, which gives the slightly
# better message, "Apply ... to index and worktree". This is not quite
# right, since it won't be applied to the user's index, but oh well.
with temporary_index_file(old_tree):
subprocess.check_call(['git', 'checkout', '--patch', new_tree])
index_tree = old_tree
else:
with temporary_index_file(new_tree):
run('git', 'checkout-index', '-a', '-f')
return changed_files
def run(*args, **kwargs):
stdin = kwargs.pop('stdin', '')
verbose = kwargs.pop('verbose', True)
strip = kwargs.pop('strip', True)
for name in kwargs:
raise TypeError("run() got an unexpected keyword argument '%s'" % name)
p = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE,
stdin=subprocess.PIPE)
stdout, stderr = p.communicate(input=stdin)
stdout = convert_string(stdout)
stderr = convert_string(stderr)
if p.returncode == 0:
if stderr:
if verbose:
print('`%s` printed to stderr:' % ' '.join(args), file=sys.stderr)
print(stderr.rstrip(), file=sys.stderr)
if strip:
stdout = stdout.rstrip('\r\n')
return stdout
if verbose:
print('`%s` returned %s' % (' '.join(args), p.returncode), file=sys.stderr)
if stderr:
print(stderr.rstrip(), file=sys.stderr)
sys.exit(2)
def die(message):
print('error:', message, file=sys.stderr)
sys.exit(2)
def to_bytes(str_input):
# Encode to UTF-8 to get binary data.
if isinstance(str_input, bytes):
return str_input
return str_input.encode('utf-8')
def to_string(bytes_input):
if isinstance(bytes_input, str):
return bytes_input
return bytes_input.encode('utf-8')
def convert_string(bytes_input):
try:
return to_string(bytes_input.decode('utf-8'))
except AttributeError: # 'str' object has no attribute 'decode'.
return str(bytes_input)
except UnicodeError:
return str(bytes_input)
if __name__ == '__main__':
main()
Loading…
Cancel
Save