You cannot select more than 25 topics
			Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
		
		
		
		
		
			
		
			
				
	
	
		
			284 lines
		
	
	
		
			10 KiB
		
	
	
	
		
			Python
		
	
			
		
		
	
	
			284 lines
		
	
	
		
			10 KiB
		
	
	
	
		
			Python
		
	
| #!/usr/bin/env python3
 | |
| # Copyright (c) 2024 The Chromium Authors. All rights reserved.
 | |
| # Use of this source code is governed by a BSD-style license that can be
 | |
| # found in the LICENSE file.
 | |
| """Uploads files to Google Storage and output DEPS blob."""
 | |
| 
 | |
| import hashlib
 | |
| import optparse
 | |
| import os
 | |
| import json
 | |
| import tempfile
 | |
| 
 | |
| import re
 | |
| import sys
 | |
| import tarfile
 | |
| 
 | |
| from download_from_google_storage import Gsutil
 | |
| from download_from_google_storage import GSUTIL_DEFAULT_PATH
 | |
| from typing import List
 | |
| 
 | |
| MISSING_GENERATION_MSG = (
 | |
|     'missing generation number, please retrieve from Cloud Storage'
 | |
|     'before saving to DEPS')
 | |
| 
 | |
| USAGE_STRING = """%prog [options] target [target2 ...].
 | |
| Target(s) is the files or directies intended to be uploaded to Google Storage.
 | |
| If a single target is a directory, it will be compressed and uploaded as a
 | |
| tar.gz file.
 | |
| If target is "-", then a list of directories will be taken from standard input.
 | |
| The list of directories will be compressed together and uploaded as one tar.gz
 | |
| file.
 | |
| 
 | |
| Example usage
 | |
| ------------
 | |
| ./upload_to_google_storage_first_class.py --bucket gsutil-upload-playground
 | |
| --object-name my_object_name hello_world.txt
 | |
| 
 | |
| ./upload_to_google_storage_first_class.py --bucket gsutil-upload-playground
 | |
| --object-name my_object_name my_dir1
 | |
| 
 | |
| ./upload_to_google_storage_first_class.py --bucket gsutil-upload-playground
 | |
| --object-name my_object_name my_dir1 my_dir2
 | |
| 
 | |
| Scan the current directory and upload all files larger than 1MB:
 | |
| find . -name .svn -prune -o -size +1000k -type f -print0 |
 | |
| ./upload_to_google_storage_first_class.py --bucket gsutil-upload-playground
 | |
| --object-name my_object_name -
 | |
| """
 | |
| 
 | |
| 
 | |
| def get_targets(args: List[str], parser: optparse.OptionParser,
 | |
|                 use_null_terminator: bool) -> List[str]:
 | |
|     """Get target(s) to upload to GCS"""
 | |
|     if not args:
 | |
|         parser.error('Missing target.')
 | |
| 
 | |
|     if len(args) == 1 and args[0] == '-':
 | |
|         # Take stdin as a newline or null separated list of files.
 | |
|         if use_null_terminator:
 | |
|             return sys.stdin.read().split('\0')
 | |
| 
 | |
|         return sys.stdin.read().splitlines()
 | |
| 
 | |
|     return args
 | |
| 
 | |
| 
 | |
| def create_archive(dirs: List[str]) -> str:
 | |
|     """Given a list of directories, compress them all into one tar file"""
 | |
|     # tarfile name cannot have a forward slash or else an error will be
 | |
|     # thrown
 | |
|     _, filename = tempfile.mkstemp(suffix='.tar.gz')
 | |
|     with tarfile.open(filename, 'w:gz') as tar:
 | |
|         for d in dirs:
 | |
|             tar.add(d)
 | |
|     return filename
 | |
| 
 | |
| 
 | |
| def validate_archive_dirs(dirs: List[str]) -> bool:
 | |
|     """Validate the list of directories"""
 | |
|     for d in dirs:
 | |
|         # We don't allow .. in paths in our archives.
 | |
|         if d == '..':
 | |
|             return False
 | |
|         # We only allow dirs.
 | |
|         if not os.path.isdir(d):
 | |
|             return False
 | |
|         # Symlinks must point to a target inside the dirs
 | |
|         if os.path.islink(d) and not any(
 | |
|                 os.realpath(d).startswith(os.realpath(dir_prefix))
 | |
|                 for dir_prefix in dirs):
 | |
|             return False
 | |
|         # We required that the subdirectories we are archiving are all just
 | |
|         # below cwd.
 | |
|         if d not in next(os.walk('.'))[1]:
 | |
|             return False
 | |
| 
 | |
|     return True
 | |
| 
 | |
| 
 | |
| def get_sha256sum(filename: str) -> str:
 | |
|     """Get the sha256sum of the file"""
 | |
|     sha = hashlib.sha256()
 | |
|     with open(filename, 'rb') as f:
 | |
|         while True:
 | |
|             # Read in 1mb chunks, so it doesn't all have to be loaded into
 | |
|             # memory.
 | |
|             chunk = f.read(1024 * 1024)
 | |
|             if not chunk:
 | |
|                 break
 | |
|             sha.update(chunk)
 | |
|     return sha.hexdigest()
 | |
| 
 | |
| 
 | |
| def upload_to_google_storage(file: str, base_url: str, object_name: str,
 | |
|                              gsutil: Gsutil, force: bool, gzip: str,
 | |
|                              dry_run: bool) -> str:
 | |
|     """Upload file to GCS"""
 | |
|     file_url = '%s/%s' % (base_url, object_name)
 | |
|     if gsutil.check_call('ls', file_url)[0] == 0 and not force:
 | |
|         # File exists, check MD5 hash.
 | |
|         _, out, _ = gsutil.check_call_with_retries('ls', '-L', file_url)
 | |
|         etag_match = re.search(r'ETag:\s+\S+', out)
 | |
|         if etag_match:
 | |
|             raise Exception('File with url %s already exists' % file_url)
 | |
|     if dry_run:
 | |
|         return
 | |
|     print("Uploading %s as %s" % (file, file_url))
 | |
|     gsutil_args = ['-h', 'Cache-Control:public, max-age=31536000', 'cp', '-v']
 | |
|     if gzip:
 | |
|         gsutil_args.extend(['-z', gzip])
 | |
|     gsutil_args.extend([file, file_url])
 | |
|     code, _, err = gsutil.check_call_with_retries(*gsutil_args)
 | |
|     if code != 0:
 | |
|         raise Exception(
 | |
|             code, 'Encountered error on uploading %s to %s\n%s' %
 | |
|             (file, file_url, err))
 | |
|     pattern = re.escape(file_url) + r'#(?P<generation>\d+)'
 | |
|     # The geneartion number is printed as part of the progress / status info
 | |
|     # which gsutil outputs to stderr to keep separated from any final output
 | |
|     # data.
 | |
|     for line in err.strip().splitlines():
 | |
|         m = re.search(pattern, line)
 | |
|         if m:
 | |
|             return m.group('generation')
 | |
|     print('Warning: generation number could not be parsed from status'
 | |
|           f'info: {err}')
 | |
|     return MISSING_GENERATION_MSG
 | |
| 
 | |
| 
 | |
| def construct_deps_blob(bucket: str, object_name: str, file: str,
 | |
|                         generation: str) -> dict:
 | |
|     """Output a blob hint that would need be added to a DEPS file"""
 | |
|     return {
 | |
|         'path': {
 | |
|             'dep_type':
 | |
|             'gcs',
 | |
|             'bucket':
 | |
|             bucket,
 | |
|             'objects': [{
 | |
|                 'object_name': object_name,
 | |
|                 'sha256sum': get_sha256sum(file),
 | |
|                 'size_bytes': os.path.getsize(file),
 | |
|                 'generation': int(generation),
 | |
|             }],
 | |
|         }
 | |
|     }
 | |
| 
 | |
| 
 | |
| def main():
 | |
|     parser = optparse.OptionParser(USAGE_STRING)
 | |
|     parser.add_option('-b',
 | |
|                       '--bucket',
 | |
|                       help='Google Storage bucket to upload to.')
 | |
|     parser.add_option('-p',
 | |
|                       '--prefix',
 | |
|                       help='Prefix that goes before object-name (i.e. in '
 | |
|                       'between bucket and object name).')
 | |
|     parser.add_option('-o',
 | |
|                       '--object-name',
 | |
|                       help='Optional object name of uploaded tar file. '
 | |
|                       'If empty, the sha256sum will be the object name.')
 | |
|     parser.add_option('-d',
 | |
|                       '--dry-run',
 | |
|                       action='store_true',
 | |
|                       help='Check if file already exists on GS without '
 | |
|                       'uploading it and output DEP blob.')
 | |
|     parser.add_option('-c',
 | |
|                       '--config',
 | |
|                       action='store_true',
 | |
|                       help='Alias for "gsutil config".  Run this if you want '
 | |
|                       'to initialize your saved Google Storage '
 | |
|                       'credentials.  This will create a read-only '
 | |
|                       'credentials file in ~/.boto.depot_tools.')
 | |
|     parser.add_option('-e', '--boto', help='Specify a custom boto file.')
 | |
|     parser.add_option('-f',
 | |
|                       '--force',
 | |
|                       action='store_true',
 | |
|                       help='Force upload even if remote file exists.')
 | |
|     parser.add_option('-g',
 | |
|                       '--gsutil_path',
 | |
|                       default=GSUTIL_DEFAULT_PATH,
 | |
|                       help='Path to the gsutil script.')
 | |
|     parser.add_option('-0',
 | |
|                       '--use_null_terminator',
 | |
|                       action='store_true',
 | |
|                       help='Use \\0 instead of \\n when parsing '
 | |
|                       'the file list from stdin.  This is useful if the input '
 | |
|                       'is coming from "find ... -print0".')
 | |
|     parser.add_option('-z',
 | |
|                       '--gzip',
 | |
|                       metavar='ext',
 | |
|                       help='For files which end in <ext> gzip them before '
 | |
|                       'upload. '
 | |
|                       'ext is a comma-separated list')
 | |
|     (options, args) = parser.parse_args()
 | |
| 
 | |
|     # Enumerate our inputs.
 | |
|     input_filenames = get_targets(args, parser, options.use_null_terminator)
 | |
| 
 | |
|     # Allow uploading the entire directory
 | |
|     if len(input_filenames) == 1 and input_filenames[0] in ('.', './'):
 | |
|         input_filenames = next(os.walk('.'))[1]
 | |
| 
 | |
|     if len(input_filenames) > 1 or (len(input_filenames) == 1
 | |
|                                     and os.path.isdir(input_filenames[0])):
 | |
|         if not validate_archive_dirs(input_filenames):
 | |
|             parser.error(
 | |
|                 'Only directories just below cwd are valid entries. '
 | |
|                 'Entries cannot contain .. and entries can not be symlinks. '
 | |
|                 'Entries was %s' % input_filenames)
 | |
|             return 1
 | |
|         file = create_archive(input_filenames)
 | |
|     else:
 | |
|         file = input_filenames[0]
 | |
| 
 | |
|     object_name = options.object_name
 | |
|     if not object_name:
 | |
|         object_name = get_sha256sum(file)
 | |
| 
 | |
|     if options.prefix:
 | |
|         object_name = f'{options.prefix}/{object_name}'
 | |
| 
 | |
|     # Make sure we can find a working instance of gsutil.
 | |
|     if os.path.exists(GSUTIL_DEFAULT_PATH):
 | |
|         gsutil = Gsutil(GSUTIL_DEFAULT_PATH, boto_path=options.boto)
 | |
|     else:
 | |
|         gsutil = None
 | |
|         for path in os.environ["PATH"].split(os.pathsep):
 | |
|             if os.path.exists(path) and 'gsutil' in os.listdir(path):
 | |
|                 gsutil = Gsutil(os.path.join(path, 'gsutil'),
 | |
|                                 boto_path=options.boto)
 | |
|         if not gsutil:
 | |
|             parser.error('gsutil not found in %s, bad depot_tools checkout?' %
 | |
|                          GSUTIL_DEFAULT_PATH)
 | |
| 
 | |
|     # Passing in -g/--config will run our copy of GSUtil, then quit.
 | |
|     if options.config:
 | |
|         print('===Note from depot_tools===')
 | |
|         print('If you do not have a project ID, enter "0" when asked for one.')
 | |
|         print('===End note from depot_tools===')
 | |
|         print()
 | |
|         gsutil.check_call('version')
 | |
|         return gsutil.call('config')
 | |
| 
 | |
|     assert '/' not in options.bucket, "Slashes not allowed in bucket name"
 | |
| 
 | |
|     base_url = f'gs://{options.bucket}'
 | |
| 
 | |
|     generation = upload_to_google_storage(file, base_url, object_name, gsutil,
 | |
|                                           options.force, options.gzip,
 | |
|                                           options.dry_run)
 | |
|     print(
 | |
|         json.dumps(construct_deps_blob(options.bucket, object_name, file,
 | |
|                                        generation),
 | |
|                    indent=2))
 | |
| 
 | |
| 
 | |
| if __name__ == '__main__':
 | |
|     try:
 | |
|         sys.exit(main())
 | |
|     except KeyboardInterrupt:
 | |
|         sys.stderr.write('interrupted\n')
 | |
|         sys.exit(1)
 |