From 1a54c22b73062d56e44c4e415ee933f3cc663d08 Mon Sep 17 00:00:00 2001 From: "dpranke@chromium.org" Date: Tue, 18 Dec 2012 21:18:43 +0000 Subject: [PATCH] Rework the owner-suggesting algorithm. It turns out that we were weighting all possible owners equally, and picking the last one out of the list. Given the way we traversed owners files, and given that we got rid of the "set noparent"s, this meant that we were always suggesting Ben for just about everything. This change implements a much smarter algorithm that attempts to balance number of reviewers and closeness to the files under review. The unit tests added show specific examples and explanations for why things are chosen the way they are. R=maruel@chromium.org BUG=76727 Review URL: https://chromiumcodereview.appspot.com/11567052 git-svn-id: svn://svn.chromium.org/chrome/trunk/tools/depot_tools@173784 0039d316-1c4b-4281-b951-d872f2087c98 --- owners.py | 102 +++++++++++++------------- tests/owners_unittest.py | 150 +++++++++++++++++++++++++++++++-------- 2 files changed, 172 insertions(+), 80 deletions(-) diff --git a/owners.py b/owners.py index 1b1775f08..0f9d72336 100644 --- a/owners.py +++ b/owners.py @@ -49,6 +49,7 @@ Examples for all of these combinations can be found in tests/owners_unittest.py. """ import collections +import random import re @@ -252,60 +253,59 @@ class Database(object): ('%s is not a "set" directive, "*", ' 'or an email address: "%s"' % (line_type, directive))) - def _covering_set_of_owners_for(self, files): - # Get the set of directories from the files. - dirs = set() - for f in files: - dirs.add(self._enclosing_dir_with_owners(f)) - - - owned_dirs = {} - dir_owners = {} - + dirs_remaining = set(self._enclosing_dir_with_owners(f) for f in files) + all_possible_owners = self._all_possible_owners(dirs_remaining) + suggested_owners = set() + while dirs_remaining: + owner = self.lowest_cost_owner(all_possible_owners, dirs_remaining) + suggested_owners.add(owner) + for dirname, _ in all_possible_owners[owner]: + dirs_remaining.remove(dirname) + return suggested_owners + + def _all_possible_owners(self, dirs): + + """Returns a list of (potential owner, distance-from-dir) tuples; a + distance of 1 is the lowest/closest possible distance (which makes the + subsequent math easier).""" + + all_possible_owners = {} for current_dir in dirs: - # Get the list of owners for each directory. - current_owners = set() dirname = current_dir - while dirname in self.owners_for: - current_owners |= self.owners_for[dirname] + distance = 1 + while True: + for owner in self.owners_for.get(dirname, []): + all_possible_owners.setdefault(owner, []) + all_possible_owners[owner].append((current_dir, distance)) if self._stop_looking(dirname): break - prev_parent = dirname dirname = self.os_path.dirname(dirname) - if prev_parent == dirname: - break - - # Map each directory to a list of its owners. - dir_owners[current_dir] = current_owners - - # Add the directory to the list of each owner. - for owner in current_owners: - owned_dirs.setdefault(owner, set()).add(current_dir) - - final_owners = set() - while dirs: - # Find the owner that has the most directories. - max_count = 0 - max_owner = None - owner_count = {} - for dirname in dirs: - for owner in dir_owners[dirname]: - count = owner_count.get(owner, 0) + 1 - owner_count[owner] = count - if count >= max_count: - max_owner = owner - max_count = count - - # If no more directories have OWNERS, we're done. - if not max_owner: - break - - final_owners.add(max_owner) - - # Remove all directories owned by the current owner from the remaining - # list. - for dirname in owned_dirs[max_owner]: - dirs.discard(dirname) - - return final_owners + distance += 1 + return all_possible_owners + + @staticmethod + def lowest_cost_owner(all_possible_owners, dirs): + # We want to minimize both the number of reviewers and the distance + # from the files/dirs needing reviews. The "pow(X, 1.75)" below is + # an arbitrarily-selected scaling factor that seems to work well - it + # will select one reviewer in the parent directory over three reviewers + # in subdirs, but not one reviewer over just two. + total_costs_by_owner = {} + for owner in all_possible_owners: + total_distance = 0 + num_directories_owned = 0 + for dirname, distance in all_possible_owners[owner]: + if dirname in dirs: + total_distance += distance + num_directories_owned += 1 + if num_directories_owned: + total_costs_by_owner[owner] = (total_distance / + pow(num_directories_owned, 1.75)) + + # Return the lowest cost owner. In the case of a tie, pick one randomly. + lowest_cost = min(total_costs_by_owner.itervalues()) + lowest_cost_owners = filter( + lambda owner: total_costs_by_owner[owner] == lowest_cost, + total_costs_by_owner) + return random.Random().choice(lowest_cost_owners) diff --git a/tests/owners_unittest.py b/tests/owners_unittest.py index 34272a133..462386d9a 100755 --- a/tests/owners_unittest.py +++ b/tests/owners_unittest.py @@ -58,7 +58,7 @@ def test_repo(): }) -class OwnersDatabaseTest(unittest.TestCase): +class _BaseTestCase(unittest.TestCase): def setUp(self): self.repo = test_repo() self.files = self.repo.files @@ -73,6 +73,8 @@ class OwnersDatabaseTest(unittest.TestCase): glob = glob or self.glob return owners.Database(root, fopen, os_path, glob) + +class OwnersDatabaseTest(_BaseTestCase): def test_constructor(self): self.assertNotEquals(self.db(), None) @@ -205,16 +207,41 @@ class OwnersDatabaseTest(unittest.TestCase): self.assertRaises(owners.SyntaxErrorInOwnersFile, self.db().directories_not_covered_by, ['DEPS'], [brett]) - def assert_reviewers_for(self, files, expected_reviewers): + def assert_syntax_error(self, owners_file_contents): + db = self.db() + self.files['/foo/OWNERS'] = owners_file_contents + self.files['/foo/DEPS'] = '' + try: + db.reviewers_for(['foo/DEPS']) + self.fail() # pragma: no cover + except owners.SyntaxErrorInOwnersFile, e: + self.assertTrue(str(e).startswith('/foo/OWNERS:1')) + + def test_syntax_error__unknown_token(self): + self.assert_syntax_error('{}\n') + + def test_syntax_error__unknown_set(self): + self.assert_syntax_error('set myfatherisbillgates\n') + + def test_syntax_error__bad_email(self): + self.assert_syntax_error('ben\n') + + +class ReviewersForTest(_BaseTestCase): + def assert_reviewers_for(self, files, *potential_suggested_reviewers): db = self.db() - self.assertEquals(db.reviewers_for(set(files)), set(expected_reviewers)) + suggested_reviewers = db.reviewers_for(set(files)) + self.assertTrue(suggested_reviewers in + [set(suggestion) for suggestion in potential_suggested_reviewers]) def test_reviewers_for__basic_functionality(self): self.assert_reviewers_for(['chrome/gpu/gpu_channel.h'], - [brett]) + [ken]) def test_reviewers_for__set_noparent_works(self): - self.assert_reviewers_for(['content/content.gyp'], [darin]) + self.assert_reviewers_for(['content/content.gyp'], + [john], + [darin]) def test_reviewers_for__valid_inputs(self): db = self.db() @@ -236,15 +263,16 @@ class OwnersDatabaseTest(unittest.TestCase): self.assert_reviewers_for([ 'chrome/gpu/gpu_channel.h', 'content/baz/froboz.h', - 'chrome/renderer/gpu/gpu_channel_host.h'], [brett]) + 'chrome/renderer/gpu/gpu_channel_host.h'], + [brett]) def test_reviewers_for__two_owners(self): self.assert_reviewers_for([ 'chrome/gpu/gpu_channel.h', 'content/content.gyp', 'content/baz/froboz.h', - 'content/views/pie.h' - ], [john, brett]) + 'content/views/pie.h'], + [ken, john]) def test_reviewers_for__all_files(self): self.assert_reviewers_for([ @@ -254,32 +282,96 @@ class OwnersDatabaseTest(unittest.TestCase): 'content/content.gyp', 'content/bar/foo.cc', 'content/baz/froboz.h', - 'content/views/pie.h'], [john, brett]) + 'content/views/pie.h'], + [peter, ken, john]) def test_reviewers_for__per_file_owners_file(self): self.files['/content/baz/OWNERS'] = owners_file(lines=[ 'per-file ugly.*=tom@example.com']) - self.assert_reviewers_for(['content/baz/OWNERS'], [darin]) - - def assert_syntax_error(self, owners_file_contents): - db = self.db() - self.files['/foo/OWNERS'] = owners_file_contents - self.files['/foo/DEPS'] = '' - try: - db.reviewers_for(['foo/DEPS']) - self.fail() # pragma: no cover - except owners.SyntaxErrorInOwnersFile, e: - self.assertTrue(str(e).startswith('/foo/OWNERS:1')) - - def test_syntax_error__unknown_token(self): - self.assert_syntax_error('{}\n') - - def test_syntax_error__unknown_set(self): - self.assert_syntax_error('set myfatherisbillgates\n') - - def test_syntax_error__bad_email(self): - self.assert_syntax_error('ben\n') + self.assert_reviewers_for(['content/baz/OWNERS'], + [john], + [darin]) + def test_reviewers_for__per_file(self): + self.files['/content/baz/OWNERS'] = owners_file(lines=[ + 'per-file ugly.*=tom@example.com']) + self.assert_reviewers_for(['content/baz/ugly.cc'], + [tom]) + + +class LowestCostOwnersTest(_BaseTestCase): + # Keep the data in the test_lowest_cost_owner* methods as consistent with + # test_repo() where possible to minimize confusion. + + def check(self, possible_owners, dirs, *possible_lowest_cost_owners): + suggested_owner = owners.Database.lowest_cost_owner(possible_owners, dirs) + self.assertTrue(suggested_owner in possible_lowest_cost_owners) + + def test_one_dir_with_owner(self): + # brett is the only immediate owner for stuff in baz; john is also + # an owner, but further removed. We should always get brett. + self.check({brett: [('content/baz', 1)], + john: [('content/baz', 2)]}, + ['content/baz'], + brett) + + # john and darin are owners for content; the suggestion could be either. + def test_one_dir_with_two_owners(self): + self.check({john: [('content', 1)], + darin: [('content', 1)]}, + ['content'], + john, darin) + + def test_one_dir_with_two_owners_in_parent(self): + # As long as the distance is the same, it shouldn't matter (brett isn't + # listed in this case). + self.check({john: [('content/baz', 2)], + darin: [('content/baz', 2)]}, + ['content/baz'], + john, darin) + + def test_two_dirs_two_owners(self): + # If they both match both dirs, they should be treated equally. + self.check({john: [('content/baz', 2), ('content/bar', 2)], + darin: [('content/baz', 2), ('content/bar', 2)]}, + ['content/baz', 'content/bar'], + john, darin) + + # Here brett is better since he's closer for one of the two dirs. + self.check({brett: [('content/baz', 1), ('content/views', 1)], + darin: [('content/baz', 2), ('content/views', 1)]}, + ['content/baz', 'content/views'], + brett) + + def test_hierarchy(self): + # the choices in these tests are more arbitrary value judgements; + # also, here we drift away from test_repo() to cover more cases. + + # Here ben isn't picked, even though he can review both; we prefer + # closer reviewers. + self.check({ben: [('chrome/gpu', 2), ('chrome/renderer', 2)], + ken: [('chrome/gpu', 1)], + peter: [('chrome/renderer', 1)]}, + ['chrome/gpu', 'chrome/renderer'], + ken, peter) + + # Here we always pick ben since he can review either dir as well as + # the others but can review both (giving us fewer total reviewers). + self.check({ben: [('chrome/gpu', 1), ('chrome/renderer', 1)], + ken: [('chrome/gpu', 1)], + peter: [('chrome/renderer', 1)]}, + ['chrome/gpu', 'chrome/renderer'], + ben) + + # However, three reviewers is too many, so ben gets this one. + self.check({ben: [('chrome/gpu', 2), ('chrome/renderer', 2), + ('chrome/browser', 2)], + ken: [('chrome/gpu', 1)], + peter: [('chrome/renderer', 1)], + brett: [('chrome/browser', 1)]}, + ['chrome/gpu', 'chrome/renderer', + 'chrome/browser'], + ben) if __name__ == '__main__': unittest.main()