From 7e16cf303221bbcf81d632924e19ddc888da9c3b Mon Sep 17 00:00:00 2001 From: nick Date: Fri, 16 Sep 2016 16:05:05 -0700 Subject: [PATCH] owners.py: partial fix for owners-check perf regression fnmatch.fnmatch seems to fall off a performance cliff once you start cycling through more patterns than can fit in its internal cache. BUG=642793 Review-Url: https://codereview.chromium.org/2293233002 --- owners.py | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/owners.py b/owners.py index 78c7cca9c..b97405926 100644 --- a/owners.py +++ b/owners.py @@ -118,6 +118,9 @@ class Database(object): # Mapping reviewers to the preceding comment per file in the OWNERS files. self.comments = {} + # Cache of compiled regexes for _fnmatch() + self._fnmatch_cache = {} + # Set of paths that stop us from looking above them for owners. # (This is implicitly true for the root directory). self._stop_looking = set(['']) @@ -197,13 +200,13 @@ class Database(object): dirpath = self.os_path.dirname(dirpath) def _should_stop_looking(self, objname): - return any(fnmatch.fnmatch(objname, stop_looking) + return any(self._fnmatch(objname, stop_looking) for stop_looking in self._stop_looking) def _owners_for(self, objname): obj_owners = set() for owned_path, path_owners in self._paths_to_owners.iteritems(): - if fnmatch.fnmatch(objname, owned_path): + if self._fnmatch(objname, owned_path): obj_owners |= path_owners return obj_owners @@ -339,6 +342,14 @@ class Database(object): distance += 1 return all_possible_owners + def _fnmatch(self, filename, pattern): + """Same as fnmatch.fnmatch(), but interally caches the compiled regexes.""" + matcher = self._fnmatch_cache.get(pattern) + if matcher is None: + matcher = re.compile(fnmatch.translate(pattern)).match + self._fnmatch_cache[pattern] = matcher + return matcher(filename) + @staticmethod def total_costs_by_owner(all_possible_owners, dirs): # We want to minimize both the number of reviewers and the distance