From fa474e8c5e8ec066e90176395bd2343f247010d4 Mon Sep 17 00:00:00 2001 From: Josip Sokcevic Date: Fri, 17 Sep 2021 16:59:49 +0000 Subject: [PATCH] Refactor gsutil and fix concurrecny issue on MacOS This is a partial reland of commit e8ef6259b967f7d76d96fe3039dc7c9614a52d6d, but without actually upgrading gsutil. This refactoring allows easier upgrade of gsutil. R=dpranke@google.com Bug: 1249003 Change-Id: I63433b411e640a9fdb455ce25a51d910be0818a9 Reviewed-on: https://chromium-review.googlesource.com/c/chromium/tools/depot_tools/+/3166676 Reviewed-by: Dirk Pranke Commit-Queue: Josip Sokcevic --- download_from_google_storage.py | 7 ++--- gsutil.py | 54 ++++++++++++++++++++++----------- tests/gsutil_test.py | 6 ++-- 3 files changed, 43 insertions(+), 24 deletions(-) diff --git a/download_from_google_storage.py b/download_from_google_storage.py index 84bc8e30db..27a67c8dea 100755 --- a/download_from_google_storage.py +++ b/download_from_google_storage.py @@ -79,12 +79,11 @@ class Gsutil(object): VPYTHON3 = ('vpython3.bat' if GetNormalizedPlatform() == 'win32' else 'vpython3') - def __init__(self, path, boto_path=None, version='4.28'): + def __init__(self, path, boto_path=None): if not os.path.exists(path): raise FileNotFoundError('GSUtil not found in %s' % path) self.path = path self.boto_path = boto_path - self.version = version def get_sub_env(self): env = os.environ.copy() @@ -101,12 +100,12 @@ class Gsutil(object): return env def call(self, *args): - cmd = [self.VPYTHON3, self.path, '--force-version', self.version] + cmd = [self.VPYTHON3, self.path] cmd.extend(args) return subprocess2.call(cmd, env=self.get_sub_env()) def check_call(self, *args): - cmd = [self.VPYTHON3, self.path, '--force-version', self.version] + cmd = [self.VPYTHON3, self.path] cmd.extend(args) ((out, err), code) = subprocess2.communicate( cmd, diff --git a/gsutil.py b/gsutil.py index d5423ad7ef..59a0e1466e 100755 --- a/gsutil.py +++ b/gsutil.py @@ -31,11 +31,11 @@ API_URL = 'https://www.googleapis.com/storage/v1/b/pub/o/' THIS_DIR = os.path.dirname(os.path.abspath(__file__)) DEFAULT_BIN_DIR = os.path.join(THIS_DIR, 'external_bin', 'gsutil') -DEFAULT_FALLBACK_GSUTIL = os.path.join( - THIS_DIR, 'third_party', 'gsutil', 'gsutil') IS_WINDOWS = os.name == 'nt' +VERSION = '4.28' + class InvalidGsutilError(Exception): pass @@ -98,7 +98,20 @@ def ensure_gsutil(version, target, clean): return gsutil_bin if not os.path.exists(target): - os.makedirs(target) + try: + os.makedirs(target) + except FileExistsError: + # Another process is prepping workspace, so let's check if gsutil_bin is + # present. If after several checks it's still not, continue with + # downloading gsutil. + delay = 2 # base delay, in seconds + for _ in range(3): # make N attempts + # sleep first as it's not expected to have file ready just yet. + time.sleep(delay) + delay *= 1.5 # next delay increased by that factor + if os.path.isfile(gsutil_bin): + return gsutil_bin + with temporary_directory(target) as instance_dir: # Clean up if we're redownloading a corrupted gsutil. cleanup_path = os.path.join(instance_dir, 'clean') @@ -129,13 +142,18 @@ def ensure_gsutil(version, target, clean): return gsutil_bin -def run_gsutil(force_version, fallback, target, args, clean=False): - if force_version: - gsutil_bin = ensure_gsutil(force_version, target, clean) - else: - gsutil_bin = fallback - disable_update = ['-o', 'GSUtil:software_update_check_period=0'] +def run_gsutil(target, args, clean=False): + gsutil_bin = ensure_gsutil(VERSION, target, clean) + args_opt = ['-o', 'GSUtil:software_update_check_period=0'] + if sys.platform == 'darwin': + # We are experiencing problems with multiprocessing on MacOS where gsutil.py + # may hang. + # This behavior is documented in gsutil codebase, and recommendation is to + # set GSUtil:parallel_process_count=1. + # https://github.com/GoogleCloudPlatform/gsutil/blob/06efc9dc23719fab4fd5fadb506d252bbd3fe0dd/gslib/command.py#L1331 + # https://github.com/GoogleCloudPlatform/gsutil/issues/1100 + args_opt.extend(['-o', 'GSUtil:parallel_process_count=1']) if sys.platform == 'cygwin': # This script requires Windows Python, so invoke with depot_tools' # Python. @@ -147,15 +165,12 @@ def run_gsutil(force_version, fallback, target, args, clean=False): sys.exit(subprocess.call(cmd)) assert sys.platform != 'cygwin' - # Run "gsutil" through "vpython". We need to do this because on GCE instances, - # expectations are made about Python having access to "google-compute-engine" - # and "boto" packages that are not met with non-system Python (e.g., bundles). cmd = [ 'vpython', '-vpython-spec', os.path.join(THIS_DIR, 'gsutil.vpython'), '--', gsutil_bin - ] + disable_update + args + ] + args_opt + args return subprocess.call(cmd, shell=IS_WINDOWS) @@ -163,13 +178,19 @@ def parse_args(): bin_dir = os.environ.get('DEPOT_TOOLS_GSUTIL_BIN_DIR', DEFAULT_BIN_DIR) parser = argparse.ArgumentParser() - parser.add_argument('--force-version', default='4.30') + parser.add_argument('--clean', action='store_true', help='Clear any existing gsutil package, forcing a new download.') - parser.add_argument('--fallback', default=DEFAULT_FALLBACK_GSUTIL) parser.add_argument('--target', default=bin_dir, help='The target directory to download/store a gsutil version in. ' '(default is %(default)s).') + + # These two args exist for backwards-compatibility but are no-ops. + parser.add_argument('--force-version', default=VERSION, + help='(deprecated, this flag has no effect)') + parser.add_argument('--fallback', + help='(deprecated, this flag has no effect)') + parser.add_argument('args', nargs=argparse.REMAINDER) args, extras = parser.parse_known_args() @@ -182,8 +203,7 @@ def parse_args(): def main(): args = parse_args() - return run_gsutil(args.force_version, args.fallback, args.target, args.args, - clean=args.clean) + return run_gsutil(args.target, args.args, clean=args.clean) if __name__ == '__main__': diff --git a/tests/gsutil_test.py b/tests/gsutil_test.py index cd81291046..d9b7bbbd5c 100755 --- a/tests/gsutil_test.py +++ b/tests/gsutil_test.py @@ -72,7 +72,7 @@ class GsutilUnitTests(unittest.TestCase): setattr(subprocess, 'call', self.old_call) def test_download_gsutil(self): - version = '4.2' + version = gsutil.VERSION filename = 'gsutil_%s.zip' % version full_filename = os.path.join(self.tempdir, filename) fake_file = b'This is gsutil.zip' @@ -113,7 +113,7 @@ class GsutilUnitTests(unittest.TestCase): self.assertEqual(self.fake.expectations, []) def test_ensure_gsutil_full(self): - version = '4.2' + version = gsutil.VERSION gsutil_dir = os.path.join(self.tempdir, 'gsutil_%s' % version, 'gsutil') gsutil_bin = os.path.join(gsutil_dir, 'gsutil') gsutil_flag = os.path.join(gsutil_dir, 'install.flag') @@ -137,7 +137,7 @@ class GsutilUnitTests(unittest.TestCase): self.assertEqual(self.fake.expectations, []) def test_ensure_gsutil_short(self): - version = '4.2' + version = gsutil.VERSION gsutil_dir = os.path.join(self.tempdir, 'gsutil_%s' % version, 'gsutil') gsutil_bin = os.path.join(gsutil_dir, 'gsutil') gsutil_flag = os.path.join(gsutil_dir, 'install.flag')