diff --git a/recipe_modules/depot_tools/api.py b/recipe_modules/depot_tools/api.py index 68b2bbdf2c..6b41e63d9b 100644 --- a/recipe_modules/depot_tools/api.py +++ b/recipe_modules/depot_tools/api.py @@ -21,6 +21,8 @@ class DepotToolsApi(recipe_api.RecipeApi): def gn_py_path(self): return self.package_repo_resource('gn.py') + # TODO(dnj): Remove this once everything uses the "gsutil" recipe module + # version. @property def gsutil_py_path(self): return self.package_repo_resource('gsutil.py') diff --git a/recipe_modules/gsutil/__init__.py b/recipe_modules/gsutil/__init__.py new file mode 100644 index 0000000000..240b08c670 --- /dev/null +++ b/recipe_modules/gsutil/__init__.py @@ -0,0 +1,4 @@ +DEPS = [ + 'recipe_engine/path', + 'recipe_engine/python', +] diff --git a/recipe_modules/gsutil/api.py b/recipe_modules/gsutil/api.py new file mode 100644 index 0000000000..dd7e357cbe --- /dev/null +++ b/recipe_modules/gsutil/api.py @@ -0,0 +1,196 @@ +# Copyright 2013 The Chromium Authors. All rights reserved. +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + +import re + +from recipe_engine import recipe_api + +class GSUtilApi(recipe_api.RecipeApi): + @property + def gsutil_py_path(self): + return self.package_repo_resource('gsutil.py') + + def __call__(self, cmd, name=None, use_retry_wrapper=True, version=None, + parallel_upload=False, multithreaded=False, **kwargs): + """A step to run arbitrary gsutil commands. + + Note that this assumes that gsutil authentication environment variables + (AWS_CREDENTIAL_FILE and BOTO_CONFIG) are already set, though if you want to + set them to something else you can always do so using the env={} kwarg. + + Note also that gsutil does its own wildcard processing, so wildcards are + valid in file-like portions of the cmd. See 'gsutil help wildcards'. + + Arguments: + cmd: list of (string) arguments to pass to gsutil. + Include gsutil-level options first (see 'gsutil help options'). + name: the (string) name of the step to use. + Defaults to the first non-flag token in the cmd. + """ + if not name: + name = (t for t in cmd if not t.startswith('-')).next() + full_name = 'gsutil ' + name + + gsutil_path = self.gsutil_py_path + cmd_prefix = [] + + if use_retry_wrapper: + # We pass the real gsutil_path to the wrapper so it doesn't have to do + # brittle path logic. + cmd_prefix = ['--', gsutil_path] + gsutil_path = self.resource('gsutil_smart_retry.py') + + if version: + cmd_prefix.extend(['--force-version', version]) + + if parallel_upload: + cmd_prefix.extend([ + '-o', + 'GSUtil:parallel_composite_upload_threshold=50M' + ]) + + if multithreaded: + cmd_prefix.extend(['-m']) + + if use_retry_wrapper: + # The -- argument for the wrapped gsutil.py is escaped as ---- as python + # 2.7.3 removes all occurences of --, not only the first. It is unescaped + # in gsutil_wrapper.py and then passed as -- to gsutil.py. + # Note, that 2.7.6 doesn't have this problem, but it doesn't hurt. + cmd_prefix.append('----') + else: + cmd_prefix.append('--') + + return self.m.python(full_name, gsutil_path, cmd_prefix + cmd, + infra_step=True, **kwargs) + + def upload(self, source, bucket, dest, args=None, link_name='gsutil.upload', + metadata=None, unauthenticated_url=False, **kwargs): + args = [] if args is None else args[:] + # Note that metadata arguments have to be passed before the command cp. + metadata_args = self._generate_metadata_args(metadata) + full_dest = 'gs://%s/%s' % (bucket, dest) + cmd = metadata_args + ['cp'] + args + [source, full_dest] + name = kwargs.pop('name', 'upload') + + result = self(cmd, name, **kwargs) + + if link_name: + result.presentation.links[link_name] = self._http_url( + bucket, dest, unauthenticated_url=unauthenticated_url) + return result + + def download(self, bucket, source, dest, args=None, **kwargs): + args = [] if args is None else args[:] + full_source = 'gs://%s/%s' % (bucket, source) + cmd = ['cp'] + args + [full_source, dest] + name = kwargs.pop('name', 'download') + return self(cmd, name, **kwargs) + + def download_url(self, url, dest, args=None, **kwargs): + args = args or [] + url = self._normalize_url(url) + cmd = ['cp'] + args + [url, dest] + name = kwargs.pop('name', 'download_url') + self(cmd, name, **kwargs) + + def cat(self, url, args=None, **kwargs): + args = args or [] + url = self._normalize_url(url) + cmd = ['cat'] + args + [url] + name = kwargs.pop('name', 'cat') + return self(cmd, name, **kwargs) + + def copy(self, source_bucket, source, dest_bucket, dest, args=None, + link_name='gsutil.copy', metadata=None, unauthenticated_url=False, + **kwargs): + args = args or [] + args += self._generate_metadata_args(metadata) + full_source = 'gs://%s/%s' % (source_bucket, source) + full_dest = 'gs://%s/%s' % (dest_bucket, dest) + cmd = ['cp'] + args + [full_source, full_dest] + name = kwargs.pop('name', 'copy') + + result = self(cmd, name, **kwargs) + + if link_name: + result.presentation.links[link_name] = self._http_url( + dest_bucket, dest, unauthenticated_url=unauthenticated_url) + + def list(self, url, args=None, **kwargs): + args = args or [] + url = self._normalize_url(url) + cmd = ['ls'] + args + [url] + name = kwargs.pop('name', 'list') + return self(cmd, name, **kwargs) + + def signurl(self, private_key_file, bucket, dest, args=None, **kwargs): + args = args or [] + full_source = 'gs://%s/%s' % (bucket, dest) + cmd = ['signurl'] + args + [private_key_file, full_source] + name = kwargs.pop('name', 'signurl') + return self(cmd, name, **kwargs) + + def remove_url(self, url, args=None, **kwargs): + args = args or [] + url = self._normalize_url(url) + cmd = ['rm'] + args + [url] + name = kwargs.pop('name', 'remove') + self(cmd, name, **kwargs) + + def _generate_metadata_args(self, metadata): + result = [] + if metadata: + for k, v in sorted(metadata.iteritems(), key=lambda (k, _): k): + field = self._get_metadata_field(k) + param = (field) if v is None else ('%s:%s' % (field, v)) + result += ['-h', param] + return result + + def _normalize_url(self, url): + gs_prefix = 'gs://' + # Defines the regex that matches a normalized URL. + for prefix in ( + gs_prefix, + 'https://storage.cloud.google.com/', + 'https://storage.googleapis.com/', + ): + if url.startswith(prefix): + return gs_prefix + url[len(prefix):] + raise AssertionError("%s cannot be normalized" % url) + + @classmethod + def _http_url(cls, bucket, dest, unauthenticated_url=False): + if unauthenticated_url: + base = 'https://storage.googleapis.com/%s/%s' + else: + base = 'https://storage.cloud.google.com/%s/%s' + return base % (bucket, dest) + + @staticmethod + def _get_metadata_field(name, provider_prefix=None): + """Returns: (str) the metadata field to use with Google Storage + + The Google Storage specification for metadata can be found at: + https://developers.google.com/storage/docs/gsutil/addlhelp/WorkingWithObjectMetadata + """ + # Already contains custom provider prefix + if name.lower().startswith('x-'): + return name + + # See if it's innately supported by Google Storage + if name in ( + 'Cache-Control', + 'Content-Disposition', + 'Content-Encoding', + 'Content-Language', + 'Content-MD5', + 'Content-Type', + ): + return name + + # Add provider prefix + if not provider_prefix: + provider_prefix = 'x-goog-meta' + return '%s-%s' % (provider_prefix, name) diff --git a/recipe_modules/gsutil/example.expected/basic.json b/recipe_modules/gsutil/example.expected/basic.json new file mode 100644 index 0000000000..90b01f796d --- /dev/null +++ b/recipe_modules/gsutil/example.expected/basic.json @@ -0,0 +1,186 @@ +[ + { + "cmd": [ + "python", + "-u", + "RECIPE_MODULE[depot_tools::gsutil]/resources/gsutil_smart_retry.py", + "--", + "RECIPE_PACKAGE_REPO[depot_tools]/gsutil.py", + "----", + "-h", + "Cache-Control:no-cache", + "-h", + "x-goog-meta-Remove-Me", + "-h", + "x-goog-meta-Test-Field:value", + "-h", + "x-custom-field:custom-value", + "cp", + "[TMP_BASE]/boom", + "gs://example/some/random/path/to/boom" + ], + "name": "gsutil upload", + "~followup_annotations": [ + "@@@STEP_LINK@gsutil.upload@https://storage.googleapis.com/example/some/random/path/to/boom@@@" + ] + }, + { + "cmd": [ + "python", + "-u", + "RECIPE_PACKAGE_REPO[depot_tools]/gsutil.py", + "-o", + "GSUtil:parallel_composite_upload_threshold=50M", + "-m", + "--", + "-h", + "Cache-Control:no-cache", + "-h", + "x-goog-meta-Remove-Me", + "-h", + "x-goog-meta-Test-Field:value", + "-h", + "x-custom-field:custom-value", + "cp", + "[TMP_BASE]/boom", + "gs://example/some/random/path/to/boom" + ], + "name": "gsutil upload (2)", + "~followup_annotations": [ + "@@@STEP_LINK@gsutil.upload@https://storage.googleapis.com/example/some/random/path/to/boom@@@" + ] + }, + { + "cmd": [ + "python", + "-u", + "RECIPE_MODULE[depot_tools::gsutil]/resources/gsutil_smart_retry.py", + "--", + "RECIPE_PACKAGE_REPO[depot_tools]/gsutil.py", + "----", + "cp", + "gs://example/some/random/path/**", + "gs://example/staging" + ], + "name": "gsutil cp" + }, + { + "cmd": [ + "python", + "-u", + "RECIPE_MODULE[depot_tools::gsutil]/resources/gsutil_smart_retry.py", + "--", + "RECIPE_PACKAGE_REPO[depot_tools]/gsutil.py", + "--force-version", + "3.25", + "----", + "cp", + "gs://example/some/random/path/**", + "gs://example/staging" + ], + "name": "gsutil cp (2)" + }, + { + "cmd": [ + "python", + "-u", + "RECIPE_MODULE[depot_tools::gsutil]/resources/gsutil_smart_retry.py", + "--", + "RECIPE_PACKAGE_REPO[depot_tools]/gsutil.py", + "----", + "cp", + "gs://example/some/random/path/to/boom", + "[TMP_BASE]/boom" + ], + "name": "gsutil gsutil download url" + }, + { + "cmd": [ + "python", + "-u", + "RECIPE_MODULE[depot_tools::gsutil]/resources/gsutil_smart_retry.py", + "--", + "RECIPE_PACKAGE_REPO[depot_tools]/gsutil.py", + "----", + "cp", + "gs://example/staging/to/boom", + "[TMP_BASE]/erang" + ], + "name": "gsutil download" + }, + { + "cmd": [ + "python", + "-u", + "RECIPE_MODULE[depot_tools::gsutil]/resources/gsutil_smart_retry.py", + "--", + "RECIPE_PACKAGE_REPO[depot_tools]/gsutil.py", + "----", + "signurl", + "path/to/key", + "gs://example/some/random/path/to/boom" + ], + "name": "gsutil signed url" + }, + { + "cmd": [ + "python", + "-u", + "RECIPE_MODULE[depot_tools::gsutil]/resources/gsutil_smart_retry.py", + "--", + "RECIPE_PACKAGE_REPO[depot_tools]/gsutil.py", + "----", + "rm", + "gs://example/staging/to/boom" + ], + "name": "gsutil remove" + }, + { + "cmd": [ + "python", + "-u", + "RECIPE_MODULE[depot_tools::gsutil]/resources/gsutil_smart_retry.py", + "--", + "RECIPE_PACKAGE_REPO[depot_tools]/gsutil.py", + "----", + "ls", + "gs://example/foo" + ], + "name": "gsutil list" + }, + { + "cmd": [ + "python", + "-u", + "RECIPE_MODULE[depot_tools::gsutil]/resources/gsutil_smart_retry.py", + "--", + "RECIPE_PACKAGE_REPO[depot_tools]/gsutil.py", + "----", + "cp", + "gs://example/some/random/path/to/boom", + "gs://example/staging/to/boom" + ], + "name": "gsutil copy", + "~followup_annotations": [ + "@@@STEP_LINK@gsutil.copy@https://storage.cloud.google.com/example/staging/to/boom@@@" + ] + }, + { + "cmd": [ + "python", + "-u", + "RECIPE_MODULE[depot_tools::gsutil]/resources/gsutil_smart_retry.py", + "--", + "RECIPE_PACKAGE_REPO[depot_tools]/gsutil.py", + "----", + "cat", + "gs://example/foo" + ], + "name": "gsutil cat" + }, + { + "name": "$result", + "recipe_result": null, + "status_code": 0 + } +] \ No newline at end of file diff --git a/recipe_modules/gsutil/example.py b/recipe_modules/gsutil/example.py new file mode 100644 index 0000000000..8b3e408a91 --- /dev/null +++ b/recipe_modules/gsutil/example.py @@ -0,0 +1,77 @@ +# Copyright 2013 The Chromium Authors. All rights reserved. +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + +DEPS = [ + 'gsutil', + 'recipe_engine/path', +] + + +def RunSteps(api): + """Move things around in a loop!""" + local_file = api.path['tmp_base'].join('boom') + bucket = 'example' + cloud_file = 'some/random/path/to/boom' + + api.gsutil.upload(local_file, bucket, cloud_file, + metadata={ + 'Test-Field': 'value', + 'Remove-Me': None, + 'x-custom-field': 'custom-value', + 'Cache-Control': 'no-cache', + }, + unauthenticated_url=True) + + # Upload without retry wrapper. + api.gsutil.upload(local_file, bucket, cloud_file, + metadata={ + 'Test-Field': 'value', + 'Remove-Me': None, + 'x-custom-field': 'custom-value', + 'Cache-Control': 'no-cache', + }, + unauthenticated_url=True, + parallel_upload=True, + multithreaded=True, + use_retry_wrapper=False) + + api.gsutil(['cp', + 'gs://%s/some/random/path/**' % bucket, + 'gs://%s/staging' % bucket]) + + api.gsutil(['cp', + 'gs://%s/some/random/path/**' % bucket, + 'gs://%s/staging' % bucket], version='3.25') + + api.gsutil.download_url( + 'https://storage.cloud.google.com/' + bucket + '/' + cloud_file, + local_file, + name='gsutil download url') + + # Non-normalized URL. + try: + api.gsutil.download_url( + 'https://someotherservice.localhost', + local_file, + name='gsutil download url') + except AssertionError: + pass + + new_cloud_file = 'staging/to/boom' + new_local_file = api.path['tmp_base'].join('erang') + api.gsutil.download(bucket, new_cloud_file, new_local_file) + + private_key_file = 'path/to/key' + _signed_url = api.gsutil.signurl(private_key_file, bucket, cloud_file, + name='signed url') + api.gsutil.remove_url('gs://%s/%s' % (bucket, new_cloud_file)) + + api.gsutil.list('gs://%s/foo' % bucket) + api.gsutil.copy(bucket, cloud_file, bucket, new_cloud_file) + + api.gsutil.cat('gs://%s/foo' % bucket) + + +def GenTests(api): + yield api.test('basic') diff --git a/recipe_modules/gsutil/resources/gsutil_smart_retry.py b/recipe_modules/gsutil/resources/gsutil_smart_retry.py new file mode 100755 index 0000000000..ebc8cb48ee --- /dev/null +++ b/recipe_modules/gsutil/resources/gsutil_smart_retry.py @@ -0,0 +1,69 @@ +#!/usr/bin/python +# +# Copyright 2014 The Chromium Authors. All rights reserved. +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + +"""Wrapper that does auto-retry for gsutil. + +Pass the path to the real gsutil as the first argument. + +Deletes ~/.gsutil after failures, which sometimes helps. +""" + + +import logging +import argparse +import os +import shutil +import subprocess +import sys + + +def main(argv): + parser = argparse.ArgumentParser() + parser.add_argument( + 'command', metavar='ARG', nargs='+', + help='the gsutil command (including the gsutil path) to run') + parser.add_argument('--soft-retries', + metavar='N', nargs=1, default=2, type=int, + help='number of times to retry') + parser.add_argument('--hard-retries', + metavar='N', nargs=1, default=2, type=int, + help='number of times to retry, with deleting trackers ') + args = parser.parse_args() + + # The -- argument for the wrapped gsutil.py is escaped as ---- as python + # 2.7.3 removes all occurences of --, not only the first. + if '----' in args.command: + args.command[args.command.index('----')] = '--' + + cmd = [sys.executable, '-u'] + args.command + + for hard in range(args.hard_retries): + for soft in range(args.soft_retries): + retcode = subprocess.call(cmd) + + if retcode == 0: + return 0 + + logging.warning('Command %s failed with retcode %d, try %d.%d.' % ( + ' '.join(cmd), retcode, hard+1, soft+1)) + + # Failed at least once, try deleting the tracker files + try: + logging.warning('Trying harder: deleting tracker files') + gsutil_dir = os.path.join(os.environ['HOME'], '.gsutil') + logging.info('Removing %s' % gsutil_dir) + shutil.rmtree(gsutil_dir) + except BaseException as e: + logging.warning('Deleting tracker files failed: %s' % e) + + logging.error('Command %s failed %d retries, giving up.' % ( + ' '.join(args.command), args.soft_retries*args.hard_retries)) + + return retcode + + +if __name__ == '__main__': + sys.exit(main(sys.argv))