Stop using compression for git cache.

Change git cache download from GS git directory directly.
Bug: 943696
Change-Id: Ibe473effbf18d5635736c3ca0ab0ef0bbf21be8b
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/tools/depot_tools/+/1575003
Reviewed-by: Andrii Shyshkalov <tandrii@chromium.org>
Commit-Queue: Karen Qian <karenqian@google.com>
changes/03/1575003/24
Karen Qian 6 years ago committed by Commit Bot
parent e49aed39f0
commit 0cbd5a5ec2

@ -376,87 +376,54 @@ class Mirror(object):
""" """
if not self.bootstrap_bucket: if not self.bootstrap_bucket:
return False return False
python_fallback = (
(sys.platform.startswith('win') and
not gclient_utils.FindExecutable('7z')) or
(not gclient_utils.FindExecutable('unzip')) or
('ZIP64_SUPPORT' not in subprocess.check_output(["unzip", "-v"]))
)
gs_folder = 'gs://%s/%s' % (self.bootstrap_bucket, self.basedir)
gsutil = Gsutil(self.gsutil_exe, boto_path=None) gsutil = Gsutil(self.gsutil_exe, boto_path=None)
# Get the most recent version of the zipfile.
_, ls_out, ls_err = gsutil.check_call('ls', gs_folder) # Get the most recent version of the directory.
# This is determined from the most recent version of a .ready file.
def compare_filenames(a, b): # The .ready file is only uploaded when an entire directory has been
# |a| and |b| look like gs://.../.../9999.zip. They both have the same # uploaded to GS.
# gs://bootstrap_bucket/basedir/ prefix because they come from the same _, ls_out, ls_err = gsutil.check_call('ls', self._gs_path)
# `gsutil ls`.
# This function only compares the numeral parts before .zip. ready_file_pattern = re.compile(r'.*/(\d+).ready$')
regex_pattern = r'/(\d+)\.zip$'
match_a = re.search(regex_pattern, a) objects = set(ls_out.strip().splitlines())
match_b = re.search(regex_pattern, b) ready_dirs = []
if (match_a is not None) and (match_b is not None):
num_a = int(match_a.group(1)) for name in objects:
num_b = int(match_b.group(1)) m = ready_file_pattern.match(name)
return cmp(num_a, num_b) # Given <path>/<number>.ready,
# If it doesn't match the format, fallback to string comparison. # we are interested in <path>/<number> directory
return cmp(a, b)
if m and (name[:-len('.ready')] + '/') in objects:
ls_out_sorted = sorted(ls_out.splitlines(), cmp=compare_filenames) ready_dirs.append((int(m.group(1)), name[:-len('.ready')]))
if not ls_out_sorted:
# This repo is not on Google Storage. if not ready_dirs:
self.print('No bootstrap file for %s found in %s, stderr:\n %s' % self.print('No bootstrap file for %s found in %s, stderr:\n %s' %
(self.mirror_path, self.bootstrap_bucket, (self.mirror_path, self.bootstrap_bucket,
' '.join((ls_err or '').splitlines(True)))) ' '.join((ls_err or '').splitlines(True))))
return False return False
latest_checkout = ls_out_sorted[-1] latest_dir = max(ready_dirs)[1]
# Download zip file to a temporary directory.
try: try:
# create new temporary directory locally
tempdir = tempfile.mkdtemp(prefix='_cache_tmp', dir=self.GetCachePath()) tempdir = tempfile.mkdtemp(prefix='_cache_tmp', dir=self.GetCachePath())
self.print('Downloading %s' % latest_checkout) self.RunGit(['init', '--bare'], cwd=tempdir)
self.print('Downloading files in %s/* into %s.' %
(latest_dir, tempdir))
with self.print_duration_of('download'): with self.print_duration_of('download'):
code = gsutil.call('cp', latest_checkout, tempdir) code = gsutil.call('-m', 'cp', '-r', latest_dir + "/*",
tempdir)
if code: if code:
return False return False
filename = os.path.join(tempdir, latest_checkout.split('/')[-1]) except Exception as e:
self.print('Encountered error: %s' % str(e), file=sys.stderr)
# Unpack the file with 7z on Windows, unzip on linux, or fallback. gclient_utils.rmtree(tempdir)
with self.print_duration_of('unzip'):
if not python_fallback:
if sys.platform.startswith('win'):
cmd = ['7z', 'x', '-o%s' % directory, '-tzip', filename]
else:
cmd = ['unzip', filename, '-d', directory]
retcode = subprocess.call(cmd)
else:
try:
with zipfile.ZipFile(filename, 'r') as f:
f.printdir()
f.extractall(directory)
except Exception as e:
self.print('Encountered error: %s' % str(e), file=sys.stderr)
retcode = 1
else:
retcode = 0
finally:
# Clean up the downloaded zipfile.
#
# This is somehow racy on Windows.
# Catching OSError because WindowsError isn't portable and
# pylint complains.
exponential_backoff_retry(
lambda: gclient_utils.rm_file_or_tree(tempdir),
excs=(OSError,),
name='rmtree [%s]' % (tempdir,),
printerr=self.print)
if retcode:
self.print(
'Extracting bootstrap zipfile %s failed.\n'
'Resuming normal operations.' % filename)
return False return False
# delete the old directory
if os.path.exists(directory):
gclient_utils.rmtree(directory)
self.Rename(tempdir, directory)
return True return True
def contains_revision(self, revision): def contains_revision(self, revision):
@ -507,47 +474,45 @@ class Mirror(object):
% os.path.join(self.mirror_path, 'config')) % os.path.join(self.mirror_path, 'config'))
def _ensure_bootstrapped(self, depth, bootstrap, force=False): def _ensure_bootstrapped(self, depth, bootstrap, force=False):
tempdir = None
pack_dir = os.path.join(self.mirror_path, 'objects', 'pack') pack_dir = os.path.join(self.mirror_path, 'objects', 'pack')
pack_files = [] pack_files = []
if os.path.isdir(pack_dir): if os.path.isdir(pack_dir):
pack_files = [f for f in os.listdir(pack_dir) if f.endswith('.pack')] pack_files = [f for f in os.listdir(pack_dir) if f.endswith('.pack')]
self.print('%s has %d .pack files, re-bootstrapping if >%d' % self.print('%s has %d .pack files, re-bootstrapping if >%d' %
(self.mirror_path, len(pack_files), GC_AUTOPACKLIMIT)) (self.mirror_path, len(pack_files), GC_AUTOPACKLIMIT))
should_bootstrap = (force or should_bootstrap = (force or
not self.exists() or not self.exists() or
len(pack_files) > GC_AUTOPACKLIMIT) len(pack_files) > GC_AUTOPACKLIMIT)
if should_bootstrap:
if self.exists(): if not should_bootstrap:
# Re-bootstrapping an existing mirror; preserve existing fetch spec. if depth and os.path.exists(os.path.join(self.mirror_path, 'shallow')):
self._preserve_fetchspec() logging.warn(
tempdir = tempfile.mkdtemp( 'Shallow fetch requested, but repo cache already exists.')
prefix='_cache_tmp', suffix=self.basedir, dir=self.GetCachePath()) return
bootstrapped = not depth and bootstrap and self.bootstrap_repo(tempdir)
if bootstrapped: if self.exists():
# Bootstrap succeeded; delete previous cache, if any. # Re-bootstrapping an existing mirror; preserve existing fetch spec.
gclient_utils.rmtree(self.mirror_path) self._preserve_fetchspec()
elif not self.exists() or not self.supported_project(): else:
# Bootstrap failed due to either os.mkdir(self.mirror_path)
# 1. No previous cache
# 2. Project doesn't have a bootstrap zip file bootstrapped = (not depth and bootstrap and
self.bootstrap_repo(self.mirror_path))
if not bootstrapped:
if not self.exists() or not self.supported_project():
# Bootstrap failed due to:
# 1. No previous cache.
# 2. Project doesn't have a bootstrap folder.
# Start with a bare git dir. # Start with a bare git dir.
self.RunGit(['init', '--bare'], cwd=tempdir) self.RunGit(['init', '--bare'], cwd=self.mirror_path)
else: else:
# Bootstrap failed, previous cache exists; warn and continue. # Bootstrap failed, previous cache exists; warn and continue.
logging.warn( logging.warn(
'Git cache has a lot of pack files (%d). Tried to re-bootstrap ' 'Git cache has a lot of pack files (%d). Tried to re-bootstrap '
'but failed. Continuing with non-optimized repository.' 'but failed. Continuing with non-optimized repository.'
% len(pack_files)) % len(pack_files))
gclient_utils.rmtree(tempdir)
tempdir = None
else:
if depth and os.path.exists(os.path.join(self.mirror_path, 'shallow')):
logging.warn(
'Shallow fetch requested, but repo cache already exists.')
return tempdir
def _fetch(self, rundir, verbose, depth, reset_fetch_config): def _fetch(self, rundir, verbose, depth, reset_fetch_config):
self.config(rundir, reset_fetch_config) self.config(rundir, reset_fetch_config)
@ -583,23 +548,16 @@ class Mirror(object):
if not ignore_lock: if not ignore_lock:
lockfile.lock() lockfile.lock()
tempdir = None
try: try:
tempdir = self._ensure_bootstrapped(depth, bootstrap) self._ensure_bootstrapped(depth, bootstrap)
rundir = tempdir or self.mirror_path self._fetch(self.mirror_path, verbose, depth, reset_fetch_config)
self._fetch(rundir, verbose, depth, reset_fetch_config)
except ClobberNeeded: except ClobberNeeded:
# This is a major failure, we need to clean and force a bootstrap. # This is a major failure, we need to clean and force a bootstrap.
gclient_utils.rmtree(rundir) gclient_utils.rmtree(self.mirror_path)
self.print(GIT_CACHE_CORRUPT_MESSAGE) self.print(GIT_CACHE_CORRUPT_MESSAGE)
tempdir = self._ensure_bootstrapped(depth, bootstrap, force=True) self._ensure_bootstrapped(depth, bootstrap, force=True)
assert tempdir self._fetch(self.mirror_path, verbose, depth, reset_fetch_config)
self._fetch(tempdir, verbose, depth, reset_fetch_config)
finally: finally:
if tempdir:
if os.path.exists(self.mirror_path):
gclient_utils.rmtree(self.mirror_path)
self.Rename(tempdir, self.mirror_path)
if not ignore_lock: if not ignore_lock:
lockfile.unlock() lockfile.unlock()
@ -906,4 +864,4 @@ if __name__ == '__main__':
sys.exit(main(sys.argv[1:])) sys.exit(main(sys.argv[1:]))
except KeyboardInterrupt: except KeyboardInterrupt:
sys.stderr.write('interrupted\n') sys.stderr.write('interrupted\n')
sys.exit(1) sys.exit(1)
Loading…
Cancel
Save