You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
397 lines
14 KiB
Python
397 lines
14 KiB
Python
#!/usr/bin/env python
|
|
# Copyright 2017 The Chromium Authors. All rights reserved.
|
|
# Use of this source code is governed by a BSD-style license that can be
|
|
# found in the LICENSE file.
|
|
|
|
"""Splits a branch into smaller branches and uploads CLs."""
|
|
|
|
from __future__ import print_function
|
|
|
|
import collections
|
|
import os
|
|
import random
|
|
import re
|
|
import subprocess2
|
|
import sys
|
|
import tempfile
|
|
|
|
import git_footers
|
|
import owners
|
|
import owners_finder
|
|
|
|
import git_common as git
|
|
|
|
import third_party.pygtrie as trie
|
|
|
|
|
|
# If a call to `git cl split` will generate more than this number of CLs, the
|
|
# command will prompt the user to make sure they know what they're doing. Large
|
|
# numbers of CLs generated by `git cl split` have caused infrastructure issues
|
|
# in the past.
|
|
CL_SPLIT_FORCE_LIMIT = 10
|
|
|
|
|
|
def ReadFile(file_path):
|
|
"""Returns the content of |file_path|."""
|
|
with open(file_path) as f:
|
|
content = f.read()
|
|
return content
|
|
|
|
|
|
def EnsureInGitRepository():
|
|
"""Throws an exception if the current directory is not a git repository."""
|
|
git.run('rev-parse')
|
|
|
|
|
|
def CreateBranchForDirectory(prefix, cl_index, directory, upstream):
|
|
"""Creates a branch named |prefix| + "_" + |cl_index| + "_" + |directory|.
|
|
|
|
Return false if the branch already exists. |upstream| is used as upstream for
|
|
the created branch.
|
|
"""
|
|
existing_branches = set(git.branches(use_limit = False))
|
|
branch_name = '_'.join([prefix, cl_index, directory])
|
|
if branch_name in existing_branches:
|
|
return False
|
|
git.run('checkout', '-t', upstream, '-b', branch_name)
|
|
return True
|
|
|
|
|
|
def FormatDescriptionOrComment(txt, directory, cl_index, num_cls):
|
|
"""Replaces $directory with |directory|, $cl_index with |cl_index|, and
|
|
$num_cls with |num_cls| in |txt|."""
|
|
return txt.replace('$directory', '/' + directory).replace(
|
|
'$cl_index', str(cl_index)).replace('$num_cls', str(num_cls))
|
|
|
|
|
|
def AddUploadedByGitClSplitToDescription(description):
|
|
"""Adds a 'This CL was uploaded by git cl split.' line to |description|.
|
|
|
|
The line is added before footers, or at the end of |description| if it has no
|
|
footers.
|
|
"""
|
|
split_footers = git_footers.split_footers(description)
|
|
lines = split_footers[0]
|
|
if not lines[-1] or lines[-1].isspace():
|
|
lines = lines + ['']
|
|
lines = lines + ['This CL was uploaded by git cl split.']
|
|
if split_footers[1]:
|
|
lines += [''] + split_footers[1]
|
|
return '\n'.join(lines)
|
|
|
|
|
|
def UploadCl(cl_index, num_cls, refactor_branch, refactor_branch_upstream,
|
|
directory, files, description, comment, reviewer, changelist,
|
|
cmd_upload, cq_dry_run, enable_auto_submit):
|
|
"""Uploads a CL with all changes to |files| in |refactor_branch|.
|
|
|
|
Args:
|
|
cl_index: The index of this CL in the list of CLs to upload.
|
|
num_cls: The total number of CLs that will be uploaded.
|
|
refactor_branch: Name of the branch that contains the changes to upload.
|
|
refactor_branch_upstream: Name of the upstream of |refactor_branch|.
|
|
directory: Path to the directory that contains the OWNERS file for which
|
|
to upload a CL.
|
|
files: List of AffectedFile instances to include in the uploaded CL.
|
|
description: Description of the uploaded CL.
|
|
comment: Comment to post on the uploaded CL.
|
|
reviewer: The reviewer for the CL.
|
|
changelist: The Changelist class.
|
|
cmd_upload: The function associated with the git cl upload command.
|
|
cq_dry_run: If CL uploads should also do a cq dry run.
|
|
enable_auto_submit: If CL uploads should also enable auto submit.
|
|
"""
|
|
# Create a branch.
|
|
if not CreateBranchForDirectory(refactor_branch, cl_index, directory,
|
|
refactor_branch_upstream):
|
|
print('Skipping CL ' + cl_index + ' for directory "' + directory +
|
|
'" for which a branch already exists.')
|
|
return
|
|
|
|
# Checkout all changes to files in |files|.
|
|
deleted_files = [f.AbsoluteLocalPath() for f in files if f.Action() == 'D']
|
|
if deleted_files:
|
|
git.run(*['rm'] + deleted_files)
|
|
modified_files = [f.AbsoluteLocalPath() for f in files if f.Action() != 'D']
|
|
if modified_files:
|
|
git.run(*['checkout', refactor_branch, '--'] + modified_files)
|
|
|
|
# Commit changes. The temporary file is created with delete=False so that it
|
|
# can be deleted manually after git has read it rather than automatically
|
|
# when it is closed.
|
|
with tempfile.NamedTemporaryFile(delete=False) as tmp_file:
|
|
tmp_file.write(
|
|
FormatDescriptionOrComment(description, directory, cl_index, num_cls))
|
|
# Close the file to let git open it at the next line.
|
|
tmp_file.close()
|
|
git.run('commit', '-F', tmp_file.name)
|
|
os.remove(tmp_file.name)
|
|
|
|
# Upload a CL.
|
|
upload_args = ['-f', '-r', reviewer]
|
|
if cq_dry_run:
|
|
upload_args.append('--cq-dry-run')
|
|
if not comment:
|
|
upload_args.append('--send-mail')
|
|
if enable_auto_submit:
|
|
upload_args.append('--enable-auto-submit')
|
|
print('Uploading CL for ' + directory + '.')
|
|
cmd_upload(upload_args)
|
|
if comment:
|
|
changelist().AddComment(
|
|
FormatDescriptionOrComment(comment, directory, cl_index, num_cls),
|
|
publish=True)
|
|
|
|
|
|
class ChangeList(object):
|
|
"""Representation of a CL and the files affected by it."""
|
|
|
|
def __init__(self, path, owners_db, author, files):
|
|
self._path = path
|
|
self._files = files
|
|
self._owners_db = owners_db
|
|
self._author = author
|
|
self._owners = None
|
|
|
|
def _EnsureOwners(self):
|
|
if not self._owners:
|
|
self._owners = set()
|
|
files = [f.LocalPath() for f in self.GetFiles()]
|
|
if not files:
|
|
files = [self.GetPath()]
|
|
possible_owners = self._owners_db.all_possible_owners(
|
|
files, self._author).keys()
|
|
for owner in possible_owners:
|
|
if 0 == len(self._owners_db.files_not_covered_by(files, [owner])):
|
|
self._owners |= set([owner])
|
|
assert len(self._owners)
|
|
|
|
def Merge(self, other):
|
|
self._owners = self.GetCommonOwners(other)
|
|
self._files |= other.GetFiles()
|
|
|
|
def GetPath(self):
|
|
return self._path
|
|
|
|
def GetFiles(self):
|
|
return self._files
|
|
|
|
def GetOwners(self):
|
|
self._EnsureOwners()
|
|
return self._owners
|
|
|
|
def GetCommonOwners(self, other):
|
|
return self.GetOwners() & other.GetOwners()
|
|
|
|
def HaveCommonOwners(self, other):
|
|
return len(self.GetCommonOwners(other)) > 0
|
|
|
|
def GetChangeSizeInBytes(self):
|
|
return sum(
|
|
[c[0] + c[1] for f in self._files for c in f.ChangeSizeInBytes()])
|
|
|
|
|
|
def SplitCLs(owners_database, author, files):
|
|
"""Returns a map of files split by OWNERS file.
|
|
|
|
Returns:
|
|
A map where keys are paths to directories containing an OWNERS file and
|
|
values are lists of files sharing an OWNERS file.
|
|
"""
|
|
|
|
# The target CL size in # of changed bytes.
|
|
# TODO(yannic): Use # of changed lines instead and make this configurable.
|
|
max_cl_size = 1000
|
|
|
|
candidates = trie.Trie()
|
|
# Enable sorting so dry-run will split the CL the same way the CL is uploaded.
|
|
candidates.enable_sorting()
|
|
|
|
# 1. Create one CL candidate for every affected file.
|
|
for f in files:
|
|
path = f.LocalPath()
|
|
candidates[path] = ChangeList(path, owners_database, author, set([f]))
|
|
|
|
change_lists = []
|
|
|
|
# 2. Try to merge CL in common directories up to a maximum size of
|
|
# |max_cl_size|.
|
|
# This is O( len(files) * max([len(f.path) for f in files]) ).
|
|
edited = True
|
|
while edited:
|
|
edited = False
|
|
|
|
# 2.1. Iterate over all candidates and merge candidates into the candidate
|
|
# for their parent directory if the resulting CL doesn't exceed
|
|
# |max_cl_size|.
|
|
for item in candidates.items():
|
|
path = ''.join(item[0])
|
|
candidate = item[1]
|
|
|
|
# The number of CL candidates in subdirectories is equivalent to the
|
|
# number of nodes with prefix |path| in the Trie.
|
|
# Only try to merge |candidate| with the candidate for the parent
|
|
# directory if there are no more CLs for subdirectories.
|
|
sub_cls = len([''.join(k) for k in candidates.keys(path)]) - 1
|
|
if not sub_cls:
|
|
parent_path = os.path.dirname(path)
|
|
if len(parent_path) < 1:
|
|
# Don't create CLs for more than one top-level directory.
|
|
continue
|
|
|
|
if parent_path not in candidates:
|
|
candidates[parent_path] = ChangeList(parent_path, owners_database,
|
|
author, set())
|
|
parent_cl = candidates[parent_path]
|
|
|
|
if not parent_cl.HaveCommonOwners(candidate):
|
|
# Don't merge if the resulting CL would need more than one reviewer.
|
|
continue
|
|
|
|
# Merge |candidate| into the CL for it's parent directory and remove
|
|
# candidate.
|
|
edited = True
|
|
del candidates[path]
|
|
parent_cl.Merge(candidate)
|
|
|
|
# Add |parent_cl| to list of CLs to submit if the CL is larger than
|
|
# |max_cl_size|.
|
|
# TODO(yannic): Doing it this way, we might end up with CLs of size
|
|
# 2 * max_cl_size if we merged two candidates that just don't exceed
|
|
# the maximal size.
|
|
if parent_cl.GetChangeSizeInBytes() > max_cl_size:
|
|
change_lists.append(parent_cl)
|
|
del candidates[parent_path]
|
|
|
|
# 3. Add all remaining candidates to the list of CLs.
|
|
for item in candidates.items():
|
|
change_lists.append(item[1])
|
|
|
|
return change_lists
|
|
|
|
|
|
def PrintClInfo(cl_index, num_cls, directory, file_paths, description,
|
|
reviewer):
|
|
"""Prints info about a CL.
|
|
|
|
Args:
|
|
cl_index: The index of this CL in the list of CLs to upload.
|
|
num_cls: The total number of CLs that will be uploaded.
|
|
directory: Path to the directory that contains the OWNERS file for which
|
|
to upload a CL.
|
|
file_paths: A list of files in this CL.
|
|
description: The CL description.
|
|
reviewer: The reviewer for this CL.
|
|
"""
|
|
description_lines = FormatDescriptionOrComment(
|
|
description, directory, cl_index, num_cls).splitlines()
|
|
indented_description = '\n'.join([' ' + l for l in description_lines])
|
|
|
|
print('CL {}/{}'.format(cl_index, num_cls))
|
|
print('Path: {}'.format(directory))
|
|
print('Reviewers: {}'.format(reviewer))
|
|
print('\n' + indented_description + '\n')
|
|
print('\n'.join(file_paths))
|
|
print()
|
|
|
|
|
|
def _SelectReviewer(possible_owners, used_reviewers):
|
|
"""Select a reviewer from |owners| and adds them to the set of used reviewers.
|
|
|
|
Returns:
|
|
The reviewer.
|
|
"""
|
|
|
|
# It's debatable whether we want to avoid reusing reviewers. It could be
|
|
# easier to ask the smallest possible amount of reviewers to become familiar
|
|
# with the change being split. However, doing so would mean we send all CLs to
|
|
# top-level owners, which might be too much to ask from them.
|
|
# We may revisit this decicion later.
|
|
unused_reviewers = possible_owners.difference(used_reviewers)
|
|
if len(unused_reviewers) < 1:
|
|
unused_reviewers = possible_owners
|
|
# Pick a random reviwer from the set of owners so we don't prefer owners
|
|
# with emails of low lexical order.
|
|
reviewer = random.choice(tuple(unused_reviewers))
|
|
used_reviewers.add(reviewer)
|
|
return reviewer
|
|
|
|
|
|
def SplitCl(description_file, comment_file, changelist, cmd_upload, dry_run,
|
|
cq_dry_run, enable_auto_submit):
|
|
""""Splits a branch into smaller branches and uploads CLs.
|
|
|
|
Args:
|
|
description_file: File containing the description of uploaded CLs.
|
|
comment_file: File containing the comment of uploaded CLs.
|
|
changelist: The Changelist class.
|
|
cmd_upload: The function associated with the git cl upload command.
|
|
dry_run: Whether this is a dry run (no branches or CLs created).
|
|
cq_dry_run: If CL uploads should also do a cq dry run.
|
|
enable_auto_submit: If CL uploads should also enable auto submit.
|
|
|
|
Returns:
|
|
0 in case of success. 1 in case of error.
|
|
"""
|
|
description = AddUploadedByGitClSplitToDescription(ReadFile(description_file))
|
|
comment = ReadFile(comment_file) if comment_file else None
|
|
|
|
try:
|
|
EnsureInGitRepository()
|
|
|
|
cl = changelist()
|
|
change = cl.GetChange(cl.GetCommonAncestorWithUpstream(), None)
|
|
files = change.AffectedFiles()
|
|
|
|
if not files:
|
|
print('Cannot split an empty CL.')
|
|
return 1
|
|
|
|
author = git.run('config', 'user.email').strip() or None
|
|
refactor_branch = git.current_branch()
|
|
assert refactor_branch, "Can't run from detached branch."
|
|
refactor_branch_upstream = git.upstream(refactor_branch)
|
|
assert refactor_branch_upstream, \
|
|
"Branch %s must have an upstream." % refactor_branch
|
|
|
|
owners_database = owners.Database(change.RepositoryRoot(), file, os.path)
|
|
owners_database.load_data_needed_for([f.LocalPath() for f in files])
|
|
|
|
change_lists = SplitCLs(owners_database, author, set(files))
|
|
|
|
num_cls = len(change_lists)
|
|
if cq_dry_run and num_cls > CL_SPLIT_FORCE_LIMIT:
|
|
print(
|
|
'This will generate "%r" CLs. This many CLs can potentially generate'
|
|
' too much load on the build infrastructure. Please email'
|
|
' infra-dev@chromium.org to ensure that this won\'t break anything.'
|
|
' The infra team reserves the right to cancel your jobs if they are'
|
|
' overloading the CQ.' % num_cls)
|
|
answer = raw_input('Proceed? (y/n):')
|
|
if answer.lower() != 'y':
|
|
return 0
|
|
|
|
reviewers = set()
|
|
for cl_index, cl in enumerate(change_lists, 1):
|
|
# Use '/' as a path separator in the branch name and the CL description
|
|
# and comment.
|
|
directory = cl.GetPath().replace(os.path.sep, '/')
|
|
file_paths = [f.LocalPath() for f in cl.GetFiles()]
|
|
reviewer = _SelectReviewer(cl.GetOwners(), reviewers)
|
|
|
|
if dry_run:
|
|
PrintClInfo(cl_index, num_cls, directory, file_paths, description,
|
|
reviewer)
|
|
else:
|
|
UploadCl(cl_index, num_cls, refactor_branch, refactor_branch_upstream,
|
|
directory, files, description, comment, reviewer, changelist,
|
|
cmd_upload, cq_dry_run, enable_auto_submit)
|
|
|
|
# Go back to the original branch.
|
|
git.run('checkout', refactor_branch)
|
|
|
|
except subprocess2.CalledProcessError as cpe:
|
|
sys.stderr.write(cpe.stderr)
|
|
return 1
|
|
return 0
|