From 0cbd5a5ec22dcba2a999fbb86a1b8ea7f122ec25 Mon Sep 17 00:00:00 2001 From: Karen Qian Date: Mon, 29 Apr 2019 20:14:50 +0000 Subject: [PATCH] Stop using compression for git cache. Change git cache download from GS git directory directly. Bug: 943696 Change-Id: Ibe473effbf18d5635736c3ca0ab0ef0bbf21be8b Reviewed-on: https://chromium-review.googlesource.com/c/chromium/tools/depot_tools/+/1575003 Reviewed-by: Andrii Shyshkalov Commit-Queue: Karen Qian --- git_cache.py | 174 +++++++++++++++++++-------------------------------- 1 file changed, 66 insertions(+), 108 deletions(-) diff --git a/git_cache.py b/git_cache.py index bd534d085..5bc15d75f 100755 --- a/git_cache.py +++ b/git_cache.py @@ -376,87 +376,54 @@ class Mirror(object): """ if not self.bootstrap_bucket: return False - python_fallback = ( - (sys.platform.startswith('win') and - not gclient_utils.FindExecutable('7z')) or - (not gclient_utils.FindExecutable('unzip')) or - ('ZIP64_SUPPORT' not in subprocess.check_output(["unzip", "-v"])) - ) - - gs_folder = 'gs://%s/%s' % (self.bootstrap_bucket, self.basedir) + gsutil = Gsutil(self.gsutil_exe, boto_path=None) - # Get the most recent version of the zipfile. - _, ls_out, ls_err = gsutil.check_call('ls', gs_folder) - - def compare_filenames(a, b): - # |a| and |b| look like gs://.../.../9999.zip. They both have the same - # gs://bootstrap_bucket/basedir/ prefix because they come from the same - # `gsutil ls`. - # This function only compares the numeral parts before .zip. - regex_pattern = r'/(\d+)\.zip$' - match_a = re.search(regex_pattern, a) - match_b = re.search(regex_pattern, b) - if (match_a is not None) and (match_b is not None): - num_a = int(match_a.group(1)) - num_b = int(match_b.group(1)) - return cmp(num_a, num_b) - # If it doesn't match the format, fallback to string comparison. - return cmp(a, b) - - ls_out_sorted = sorted(ls_out.splitlines(), cmp=compare_filenames) - if not ls_out_sorted: - # This repo is not on Google Storage. + + # Get the most recent version of the directory. + # This is determined from the most recent version of a .ready file. + # The .ready file is only uploaded when an entire directory has been + # uploaded to GS. + _, ls_out, ls_err = gsutil.check_call('ls', self._gs_path) + + ready_file_pattern = re.compile(r'.*/(\d+).ready$') + + objects = set(ls_out.strip().splitlines()) + ready_dirs = [] + + for name in objects: + m = ready_file_pattern.match(name) + # Given /.ready, + # we are interested in / directory + + if m and (name[:-len('.ready')] + '/') in objects: + ready_dirs.append((int(m.group(1)), name[:-len('.ready')])) + + if not ready_dirs: self.print('No bootstrap file for %s found in %s, stderr:\n %s' % (self.mirror_path, self.bootstrap_bucket, - ' '.join((ls_err or '').splitlines(True)))) + ' '.join((ls_err or '').splitlines(True)))) return False - latest_checkout = ls_out_sorted[-1] + latest_dir = max(ready_dirs)[1] - # Download zip file to a temporary directory. try: + # create new temporary directory locally tempdir = tempfile.mkdtemp(prefix='_cache_tmp', dir=self.GetCachePath()) - self.print('Downloading %s' % latest_checkout) + self.RunGit(['init', '--bare'], cwd=tempdir) + self.print('Downloading files in %s/* into %s.' % + (latest_dir, tempdir)) with self.print_duration_of('download'): - code = gsutil.call('cp', latest_checkout, tempdir) + code = gsutil.call('-m', 'cp', '-r', latest_dir + "/*", + tempdir) if code: return False - filename = os.path.join(tempdir, latest_checkout.split('/')[-1]) - - # Unpack the file with 7z on Windows, unzip on linux, or fallback. - with self.print_duration_of('unzip'): - if not python_fallback: - if sys.platform.startswith('win'): - cmd = ['7z', 'x', '-o%s' % directory, '-tzip', filename] - else: - cmd = ['unzip', filename, '-d', directory] - retcode = subprocess.call(cmd) - else: - try: - with zipfile.ZipFile(filename, 'r') as f: - f.printdir() - f.extractall(directory) - except Exception as e: - self.print('Encountered error: %s' % str(e), file=sys.stderr) - retcode = 1 - else: - retcode = 0 - finally: - # Clean up the downloaded zipfile. - # - # This is somehow racy on Windows. - # Catching OSError because WindowsError isn't portable and - # pylint complains. - exponential_backoff_retry( - lambda: gclient_utils.rm_file_or_tree(tempdir), - excs=(OSError,), - name='rmtree [%s]' % (tempdir,), - printerr=self.print) - - if retcode: - self.print( - 'Extracting bootstrap zipfile %s failed.\n' - 'Resuming normal operations.' % filename) + except Exception as e: + self.print('Encountered error: %s' % str(e), file=sys.stderr) + gclient_utils.rmtree(tempdir) return False + # delete the old directory + if os.path.exists(directory): + gclient_utils.rmtree(directory) + self.Rename(tempdir, directory) return True def contains_revision(self, revision): @@ -507,47 +474,45 @@ class Mirror(object): % os.path.join(self.mirror_path, 'config')) def _ensure_bootstrapped(self, depth, bootstrap, force=False): - tempdir = None pack_dir = os.path.join(self.mirror_path, 'objects', 'pack') pack_files = [] - if os.path.isdir(pack_dir): pack_files = [f for f in os.listdir(pack_dir) if f.endswith('.pack')] self.print('%s has %d .pack files, re-bootstrapping if >%d' % - (self.mirror_path, len(pack_files), GC_AUTOPACKLIMIT)) + (self.mirror_path, len(pack_files), GC_AUTOPACKLIMIT)) should_bootstrap = (force or not self.exists() or len(pack_files) > GC_AUTOPACKLIMIT) - if should_bootstrap: - if self.exists(): - # Re-bootstrapping an existing mirror; preserve existing fetch spec. - self._preserve_fetchspec() - tempdir = tempfile.mkdtemp( - prefix='_cache_tmp', suffix=self.basedir, dir=self.GetCachePath()) - bootstrapped = not depth and bootstrap and self.bootstrap_repo(tempdir) - if bootstrapped: - # Bootstrap succeeded; delete previous cache, if any. - gclient_utils.rmtree(self.mirror_path) - elif not self.exists() or not self.supported_project(): - # Bootstrap failed due to either - # 1. No previous cache - # 2. Project doesn't have a bootstrap zip file + + if not should_bootstrap: + if depth and os.path.exists(os.path.join(self.mirror_path, 'shallow')): + logging.warn( + 'Shallow fetch requested, but repo cache already exists.') + return + + if self.exists(): + # Re-bootstrapping an existing mirror; preserve existing fetch spec. + self._preserve_fetchspec() + else: + os.mkdir(self.mirror_path) + + bootstrapped = (not depth and bootstrap and + self.bootstrap_repo(self.mirror_path)) + + if not bootstrapped: + if not self.exists() or not self.supported_project(): + # Bootstrap failed due to: + # 1. No previous cache. + # 2. Project doesn't have a bootstrap folder. # Start with a bare git dir. - self.RunGit(['init', '--bare'], cwd=tempdir) + self.RunGit(['init', '--bare'], cwd=self.mirror_path) else: # Bootstrap failed, previous cache exists; warn and continue. logging.warn( 'Git cache has a lot of pack files (%d). Tried to re-bootstrap ' 'but failed. Continuing with non-optimized repository.' % len(pack_files)) - gclient_utils.rmtree(tempdir) - tempdir = None - else: - if depth and os.path.exists(os.path.join(self.mirror_path, 'shallow')): - logging.warn( - 'Shallow fetch requested, but repo cache already exists.') - return tempdir def _fetch(self, rundir, verbose, depth, reset_fetch_config): self.config(rundir, reset_fetch_config) @@ -583,23 +548,16 @@ class Mirror(object): if not ignore_lock: lockfile.lock() - tempdir = None try: - tempdir = self._ensure_bootstrapped(depth, bootstrap) - rundir = tempdir or self.mirror_path - self._fetch(rundir, verbose, depth, reset_fetch_config) + self._ensure_bootstrapped(depth, bootstrap) + self._fetch(self.mirror_path, verbose, depth, reset_fetch_config) except ClobberNeeded: # This is a major failure, we need to clean and force a bootstrap. - gclient_utils.rmtree(rundir) + gclient_utils.rmtree(self.mirror_path) self.print(GIT_CACHE_CORRUPT_MESSAGE) - tempdir = self._ensure_bootstrapped(depth, bootstrap, force=True) - assert tempdir - self._fetch(tempdir, verbose, depth, reset_fetch_config) + self._ensure_bootstrapped(depth, bootstrap, force=True) + self._fetch(self.mirror_path, verbose, depth, reset_fetch_config) finally: - if tempdir: - if os.path.exists(self.mirror_path): - gclient_utils.rmtree(self.mirror_path) - self.Rename(tempdir, self.mirror_path) if not ignore_lock: lockfile.unlock() @@ -906,4 +864,4 @@ if __name__ == '__main__': sys.exit(main(sys.argv[1:])) except KeyboardInterrupt: sys.stderr.write('interrupted\n') - sys.exit(1) + sys.exit(1) \ No newline at end of file