From 6fc0c97ab284021b72e3bc962f7fa879ffcad65b Mon Sep 17 00:00:00 2001 From: Josip Sokcevic Date: Mon, 22 Jan 2024 21:15:40 +0000 Subject: [PATCH] [git_cache] Track if git cache is fully initialized git_cache populate can get interrupted midway (e.g. by LUCI CV sending a signal that build is no longer needed). When that happens, a git mirror may be in a state where some commits are available, but cloning such repositry results in an empty repsitory. This change ensures that the initial fetch operation finished successfully. R=ddoman@google.com Bug: 1517944 Change-Id: I1ee860860877dbfff7a444b45fe4515fe26b248c Reviewed-on: https://chromium-review.googlesource.com/c/chromium/tools/depot_tools/+/5219032 Commit-Queue: Josip Sokcevic Reviewed-by: Scott Lee --- git_cache.py | 37 ++++++++++++++++++++++++++++--------- tests/git_cache_test.py | 28 ++++++++++++++++++++++++++++ 2 files changed, 56 insertions(+), 9 deletions(-) diff --git a/git_cache.py b/git_cache.py index 204e59195..0c7c92487 100755 --- a/git_cache.py +++ b/git_cache.py @@ -26,6 +26,7 @@ import subcommand GC_AUTOPACKLIMIT = 50 GIT_CACHE_CORRUPT_MESSAGE = 'WARNING: The Git cache is corrupt.' +INIT_SENTIENT_FILE = ".mirror_init" # gsutil creates many processes and threads. Creating too many gsutil cp # processes may result in running out of resources, and may perform worse due to @@ -136,6 +137,10 @@ class Mirror(object): self.print('%s took %.1f minutes' % (what, (time.time() - start) / 60.0)) + @property + def _init_sentient_file(self): + return os.path.join(self.mirror_path, INIT_SENTIENT_FILE) + @property def bootstrap_bucket(self): b = os.getenv('OVERRIDE_BOOTSTRAP_BUCKET') @@ -459,6 +464,9 @@ class Mirror(object): # 2. Project doesn't have a bootstrap folder. # Start with a bare git dir. self.RunGit(['init', '--bare']) + with open(self._init_sentient_file, 'w'): + # Create sentient file + pass # Set appropriate symbolic-ref remote_info = exponential_backoff_retry( lambda: subprocess.check_output( @@ -523,6 +531,8 @@ class Mirror(object): self.RunGit(['fetch', 'origin', commit], retry=True) except subprocess.CalledProcessError: logging.warning('Fetch of %s failed' % commit) + if os.path.isfile(self._init_sentient_file): + os.remove(self._init_sentient_file) def populate(self, depth=None, @@ -537,20 +547,29 @@ class Mirror(object): depth = 10000 gclient_utils.safe_makedirs(self.GetCachePath()) + def bootstrap(force=False): + self._ensure_bootstrapped(depth, + bootstrap, + reset_fetch_config, + force=force) + self._fetch(verbose, depth, no_fetch_tags, reset_fetch_config) + + def wipe_cache(): + self.print(GIT_CACHE_CORRUPT_MESSAGE) + gclient_utils.rmtree(self.mirror_path) + with lockfile.lock(self.mirror_path, lock_timeout): + if os.path.isfile(self._init_sentient_file): + # Previous bootstrap didn't finish + wipe_cache() + try: - self._ensure_bootstrapped(depth, bootstrap, reset_fetch_config) - self._fetch(verbose, depth, no_fetch_tags, reset_fetch_config) + bootstrap() except ClobberNeeded: # This is a major failure, we need to clean and force a # bootstrap. - gclient_utils.rmtree(self.mirror_path) - self.print(GIT_CACHE_CORRUPT_MESSAGE) - self._ensure_bootstrapped(depth, - bootstrap, - reset_fetch_config, - force=True) - self._fetch(verbose, depth, no_fetch_tags, reset_fetch_config) + wipe_cache() + bootstrap(force=True) def update_bootstrap(self, prune=False, gc_aggressive=False): # NOTE: There have been cases where repos were being recursively diff --git a/tests/git_cache_test.py b/tests/git_cache_test.py index cd7851ec3..b9a8d0feb 100755 --- a/tests/git_cache_test.py +++ b/tests/git_cache_test.py @@ -148,6 +148,34 @@ class GitCacheTest(unittest.TestCase): self.assertNotIn(git_cache.GIT_CACHE_CORRUPT_MESSAGE, sys.stdout.getvalue()) + @mock.patch('sys.stdout', StringIO()) + def testBadInit(self): + self.git(['init', '-q']) + with open(os.path.join(self.origin_dir, 'foo'), 'w') as f: + f.write('touched\n') + self.git(['add', 'foo']) + self.git([ + '-c', 'user.name=Test user', '-c', 'user.email=joj@test.com', + 'commit', '-m', 'foo' + ]) + + mirror = git_cache.Mirror(self.origin_dir) + + # Simulate init being interrupted during fetch phase. + with mock.patch.object(mirror, '_fetch'): + mirror.populate() + + # Corrupt message is not expected at this point since it was + # "interrupted". + self.assertNotIn(git_cache.GIT_CACHE_CORRUPT_MESSAGE, + sys.stdout.getvalue()) + + # We call mirror.populate() without _fetch patched. This time, a + # sentient file should prompt cache deletion. + mirror.populate() + self.assertIn(git_cache.GIT_CACHE_CORRUPT_MESSAGE, + sys.stdout.getvalue()) + def _makeGitRepoWithTag(self): self.git(['init', '-q']) with open(os.path.join(self.origin_dir, 'foo'), 'w') as f: