#!/usr/bin/python # Copyright 2014 The Chromium Authors. All rights reserved. # Use of this source code is governed by a BSD-style license that can be # found in the LICENSE file. """Simple client for the Gerrit REST API. Example usage: ./gerrit_client.py -j /tmp/out.json -f json \ -u https://chromium.googlesource.com/chromium/src/+log """ import argparse import json import logging import os import sys import tarfile import time import urllib import urlparse DEPOT_TOOLS = os.path.abspath( os.path.join(os.path.dirname(__file__), os.pardir, os.pardir, os.pardir, os.pardir)) sys.path.insert(0, DEPOT_TOOLS) from gerrit_util import CreateHttpConn, ReadHttpResponse, ReadHttpJsonResponse def reparse_url(parsed_url, query_params): return urlparse.ParseResult( scheme=parsed_url.scheme, netloc=parsed_url.netloc, path=parsed_url.path, params=parsed_url.params, fragment=parsed_url.fragment, query=urllib.urlencode(query_params, doseq=True)) def gitiles_get(parsed_url, handler, attempts): # This insanity is due to CreateHttpConn interface :( host = parsed_url.netloc path = parsed_url.path if parsed_url.query: path += '?%s' % (parsed_url.query, ) retry_delay_seconds = 1 attempt = 1 while True: try: return handler(CreateHttpConn(host, path)) except Exception as e: if attempt >= attempts: raise logging.exception('Failed to perform Gitiles operation: %s', e) # Retry from previous loop. logging.error('Sleeping %d seconds before retry (%d/%d)...', retry_delay_seconds, attempt, attempts) time.sleep(retry_delay_seconds) retry_delay_seconds *= 2 attempt += 1 def fetch_log_with_paging(query_params, limit, fetch): """Fetches log, possibly requesting multiple pages to do so. Args: query_params (dict): Parameters to use in the request. limit (int): Page size. fetch (function): Function to use to make the requests. Returns: Dict with key "log", whose value is a list of commits. """ # Log api returns {'log': [list of commits], 'next': hash}. last_result = fetch(query_params) commits = last_result['log'] while last_result.get('next') and len(commits) < limit: query_params['s'] = last_result.get('next') last_result = fetch(query_params) # The first commit in `last_result` is not necessarily the parent of the # last commit in result so far! This is because log command can be done on # one file object, for example: # https://gerrit.googlesource.com/gitiles/+log/1c21279f337da8130/COPYING # Even when getting log for the whole repository, there could be merge # commits. commits.extend(last_result['log']) # Use 'next' field (if any) from `last_result`, but commits aggregated # from all the results. This essentially imitates paging with at least # `limit` page size. last_result['log'] = commits logging.debug( 'fetched %d commits, next: %s.', len(commits), last_result.get('next')) return last_result def main(arguments): parser = create_argparser() args = parser.parse_args(arguments) if args.extract_to and args.format != "archive": parser.error('--extract-to requires --format=archive') if not args.extract_to and args.format == "archive": parser.error('--format=archive requires --extract-to') if args.extract_to: # make sure it is absolute and ends with '/' args.extract_to = os.path.join(os.path.abspath(args.extract_to), '') os.makedirs(args.extract_to) parsed_url = urlparse.urlparse(args.url) if not parsed_url.scheme.startswith('http'): parser.error('Invalid URI scheme (expected http or https): %s' % args.url) query_params = {} if parsed_url.query: query_params.update(urlparse.parse_qs(parsed_url.query)) # Force the format specified on command-line. if query_params.get('format'): parser.error('URL must not contain format; use --format command line flag ' 'instead.') query_params['format'] = args.format kwargs = {} accept_statuses = frozenset([int(s) for s in args.accept_statuses.split(',')]) if accept_statuses: kwargs['accept_statuses'] = accept_statuses # Choose handler. if args.format == 'json': def handler(conn): return ReadHttpJsonResponse(conn, **kwargs) elif args.format == 'text': # Text fetching will pack the text into structured JSON. def handler(conn): # Wrap in a structured JSON for export to recipe module. return { 'value': ReadHttpResponse(conn, **kwargs).read() or None, } elif args.format == 'archive': # Archive fetching hooks result to tarfile extraction. This implementation # is able to do a streaming extraction operation without having to buffer # the entire tarfile. def handler(conn): ret = { 'extracted': { 'filecount': 0, 'bytes': 0, }, 'skipped': { 'filecount': 0, 'bytes': 0, 'names': [], } } fileobj = ReadHttpResponse(conn, **kwargs) with tarfile.open(mode='r|*', fileobj=fileobj) as tf: # monkeypatch the TarFile object to allow printing messages and # collecting stats for each extracted file. extractall makes a single # linear pass over the tarfile, which is compatible with # ReadHttpResponse; other naive implementations (such as `getmembers`) # do random access over the file and would require buffering the whole # thing (!!). em = tf._extract_member def _extract_member(tarinfo, targetpath): if not os.path.abspath(targetpath).startswith(args.extract_to): print 'Skipping %s' % (tarinfo.name,) ret['skipped']['filecount'] += 1 ret['skipped']['bytes'] += tarinfo.size ret['skipped']['names'].append(tarinfo.name) return print 'Extracting %s' % (tarinfo.name,) ret['extracted']['filecount'] += 1 ret['extracted']['bytes'] += tarinfo.size return em(tarinfo, targetpath) tf._extract_member = _extract_member tf.extractall(args.extract_to) return ret if args.log_start: query_params['s'] = args.log_start def fetch(query_params): parsed_url_with_query = reparse_url(parsed_url, query_params) result = gitiles_get(parsed_url_with_query, handler, args.attempts) if not args.quiet: logging.info('Read from %s: %s', parsed_url_with_query.geturl(), result) return result if args.log_limit: if args.format != 'json': parser.error('--log-limit works with json format only') result = fetch_log_with_paging(query_params, args.log_limit, fetch) else: # Either not a log request, or don't care about paging. # So, just return whatever is fetched the first time. result = fetch(query_params) with open(args.json_file, 'w') as json_file: json.dump(result, json_file) return 0 def create_argparser(): parser = argparse.ArgumentParser() parser.add_argument( '-j', '--json-file', help='Path to json file for output.') parser.add_argument( '--extract-to', help='Local path to extract archive url. Must not exist.') parser.add_argument( '-f', '--format', required=True, choices=('json', 'text', 'archive')) parser.add_argument( '-u', '--url', required=True, help='Url of gitiles. For example, ' 'https://chromium.googlesource.com/chromium/src/+refs. ' 'Insert a/ after domain for authenticated access.') parser.add_argument( '-a', '--attempts', type=int, default=1, help='The number of attempts to make (with exponential backoff) before ' 'failing. If several requests are to be made, applies per each ' 'request separately.') parser.add_argument( '-q', '--quiet', action='store_true', help='Suppress file contents logging output.') parser.add_argument( '--log-limit', type=int, default=None, help='Follow gitiles pages to fetch at least this many commits. By ' 'default, first page with unspecified number of commits is fetched. ' 'Only for https:////+log/... gitiles request.') parser.add_argument( '--log-start', help='If given, continue fetching log by paging from this commit hash. ' 'This value can be typically be taken from json result of previous ' 'call to log, which returns next page start commit as "next" key. ' 'Only for https:////+log/... gitiles request.') parser.add_argument( '--accept-statuses', type=str, default='200', help='Comma-separated list of Status codes to accept as "successful" ' 'HTTP responses.') return parser if __name__ == '__main__': logging.basicConfig() logging.getLogger().setLevel(logging.INFO) sys.exit(main(sys.argv[1:]))