From 9c0620120980e4c247ff8325ee8bbdcd4d9576e0 Mon Sep 17 00:00:00 2001 From: Bruce Dawson Date: Thu, 2 May 2019 19:20:28 +0000 Subject: [PATCH] Fix multiple confusingly escaped regex strings Python (prior to 3.8) treats meaningless string escape sequences as if they were a slash followed by the character. That is, '\w' == '\\w'. Python 3.8 rejects this, and it's confusing. This change fixes seven of these regex strings found in depot_tools (through a regex search, natch). Most of the fixes don't actually change the value of the strings, and this was manually verified: >>> '(/c(/.*/\+)?)?/(\d+)(/(\d+)?/?)?$' == r'(/c(/.*/\+)?)?/(\d+)(/(\d+)?/?)?$' True >>> '#\s*OWNERS_STATUS\s+=\s+(.+)$' == r'#\s*OWNERS_STATUS\s+=\s+(.+)$' True >>> 'COM\d' == r'COM\d' True >>> '^\s+Change-Id:\s*(\S+)$' == r'^\s+Change-Id:\s*(\S+)$' True >>> 'ETag:\s+([a-z0-9]{32})' == r'ETag:\s+([a-z0-9]{32})' True Two exceptions were the regex expressions in filter_demo_output.py and scm.py. These were turned into raw strings despite this changing the value of the string passed to re. This works because re supports the \x, \d, \w, \t, and other escape sequences needed to make this work. TL;DR - use raw strings for regex to avoid melting your brain. If bulk changing regex strings to raw watch out for double-slashes. Bug: 958138 Change-Id: Ic45264cfc63e8bae9cfcffe2cd88a57c2d3dcdae Reviewed-on: https://chromium-review.googlesource.com/c/chromium/tools/depot_tools/+/1590534 Commit-Queue: Bruce Dawson Reviewed-by: Dirk Pranke --- git_cl.py | 2 +- man/src/filter_demo_output.py | 2 +- owners.py | 2 +- patch.py | 2 +- scm.py | 2 +- testing_support/gerrit_test_case.py | 2 +- upload_to_google_storage.py | 2 +- 7 files changed, 7 insertions(+), 7 deletions(-) diff --git a/git_cl.py b/git_cl.py index 85b212c7e..a92cafe50 100755 --- a/git_cl.py +++ b/git_cl.py @@ -2467,7 +2467,7 @@ class _GerritChangelistImpl(_ChangelistCodereviewBase): part = parsed_url.fragment else: part = parsed_url.path - match = re.match('(/c(/.*/\+)?)?/(\d+)(/(\d+)?/?)?$', part) + match = re.match(r'(/c(/.*/\+)?)?/(\d+)(/(\d+)?/?)?$', part) if match: return _ParsedIssueNumberArgument( issue=int(match.group(3)), diff --git a/man/src/filter_demo_output.py b/man/src/filter_demo_output.py index 92816db7c..5a506f61e 100755 --- a/man/src/filter_demo_output.py +++ b/man/src/filter_demo_output.py @@ -66,7 +66,7 @@ def main(): backend = sys.argv[1] output = sys.stdin.read().rstrip() - callout_re = re.compile('\x1b\[(\d+)c\n') + callout_re = re.compile(r'\x1b\[(\d+)c\n') callouts = collections.defaultdict(int) for i, line in enumerate(output.splitlines(True)): m = callout_re.match(line) diff --git a/owners.py b/owners.py index 1efd5560b..973b2ab5b 100644 --- a/owners.py +++ b/owners.py @@ -308,7 +308,7 @@ class Database(object): line = line.strip() if line.startswith('#'): if is_toplevel: - m = re.match('#\s*OWNERS_STATUS\s+=\s+(.+)$', line) + m = re.match(r'#\s*OWNERS_STATUS\s+=\s+(.+)$', line) if m: self._status_file = m.group(1).strip() continue diff --git a/patch.py b/patch.py index 1bc608c9f..63209bb1f 100644 --- a/patch.py +++ b/patch.py @@ -60,7 +60,7 @@ class FilePatchBase(object): if filename == 'CON': raise UnsupportedPatchFormat( filename, 'Filename can\'t be \'CON\'.') - if re.match('COM\d', filename): + if re.match(r'COM\d', filename): raise UnsupportedPatchFormat( filename, 'Filename can\'t be \'%s\'.' % filename) return filename diff --git a/scm.py b/scm.py index b30b14cfd..9b3a7b850 100644 --- a/scm.py +++ b/scm.py @@ -147,7 +147,7 @@ class GIT(object): # 3-way merges can cause the status can be 'MMM' instead of 'M'. This # can happen when the user has 2 local branches and he diffs between # these 2 branches instead diffing to upstream. - m = re.match('^(\w)+\t(.+)$', statusline) + m = re.match(r'^(\w)+\t(.+)$', statusline) if not m: raise gclient_utils.Error( 'status currently unsupported: %s' % statusline) diff --git a/testing_support/gerrit_test_case.py b/testing_support/gerrit_test_case.py index bc95694f2..ce93170ff 100644 --- a/testing_support/gerrit_test_case.py +++ b/testing_support/gerrit_test_case.py @@ -77,7 +77,7 @@ class GerritTestCase(unittest.TestCase): """ COMMIT_RE = re.compile(r'^commit ([0-9a-fA-F]{40})$') - CHANGEID_RE = re.compile('^\s+Change-Id:\s*(\S+)$') + CHANGEID_RE = re.compile(r'^\s+Change-Id:\s*(\S+)$') DEVNULL = open(os.devnull, 'w') TEST_USERNAME = 'test-username' TEST_EMAIL = 'test-username@test.org' diff --git a/upload_to_google_storage.py b/upload_to_google_storage.py index 031214d6a..2f5ffc7d1 100755 --- a/upload_to_google_storage.py +++ b/upload_to_google_storage.py @@ -77,7 +77,7 @@ def _upload_worker( if gsutil.check_call('ls', file_url)[0] == 0 and not force: # File exists, check MD5 hash. _, out, _ = gsutil.check_call_with_retries('ls', '-L', file_url) - etag_match = re.search('ETag:\s+([a-z0-9]{32})', out) + etag_match = re.search(r'ETag:\s+([a-z0-9]{32})', out) if etag_match: remote_md5 = etag_match.group(1) # Calculate the MD5 checksum to match it to Google Storage's ETag.