From 055a0dee074ee3b09b007da170e6e8ce365ea2aa Mon Sep 17 00:00:00 2001 From: "dpranke@chromium.org" Date: Wed, 19 Dec 2012 20:13:33 +0000 Subject: [PATCH] Try again to land the improved owners algorithm. Initially landed in r173784, reverted in r173808 R=maruel@chromium.org BUG=76727 Review URL: https://chromiumcodereview.appspot.com/11645009 git-svn-id: svn://svn.chromium.org/chrome/trunk/tools/depot_tools@173979 0039d316-1c4b-4281-b951-d872f2087c98 --- owners.py | 103 +++++++++++++------------- tests/owners_unittest.py | 156 +++++++++++++++++++++++++++++++-------- 2 files changed, 179 insertions(+), 80 deletions(-) diff --git a/owners.py b/owners.py index 1b1775f08..a863a7853 100644 --- a/owners.py +++ b/owners.py @@ -49,6 +49,7 @@ Examples for all of these combinations can be found in tests/owners_unittest.py. """ import collections +import random import re @@ -252,60 +253,60 @@ class Database(object): ('%s is not a "set" directive, "*", ' 'or an email address: "%s"' % (line_type, directive))) - def _covering_set_of_owners_for(self, files): - # Get the set of directories from the files. - dirs = set() - for f in files: - dirs.add(self._enclosing_dir_with_owners(f)) - - - owned_dirs = {} - dir_owners = {} - + dirs_remaining = set(self._enclosing_dir_with_owners(f) for f in files) + all_possible_owners = self._all_possible_owners(dirs_remaining) + suggested_owners = set() + while dirs_remaining: + owner = self.lowest_cost_owner(all_possible_owners, dirs_remaining) + suggested_owners.add(owner) + for dirname, _ in all_possible_owners[owner]: + dirs_remaining.remove(dirname) + return suggested_owners + + def _all_possible_owners(self, dirs): + """Returns a list of (potential owner, distance-from-dir) tuples; a + distance of 1 is the lowest/closest possible distance (which makes the + subsequent math easier).""" + all_possible_owners = {} for current_dir in dirs: - # Get the list of owners for each directory. - current_owners = set() dirname = current_dir - while dirname in self.owners_for: - current_owners |= self.owners_for[dirname] + distance = 1 + while True: + for owner in self.owners_for.get(dirname, []): + all_possible_owners.setdefault(owner, []) + # It's possible the same owner might match a directory from + # multiple files, and we only want the closest entry. + if not any(current_dir == el[0] for el in all_possible_owners[owner]): + all_possible_owners[owner].append((current_dir, distance)) if self._stop_looking(dirname): break - prev_parent = dirname dirname = self.os_path.dirname(dirname) - if prev_parent == dirname: - break - - # Map each directory to a list of its owners. - dir_owners[current_dir] = current_owners - - # Add the directory to the list of each owner. - for owner in current_owners: - owned_dirs.setdefault(owner, set()).add(current_dir) - - final_owners = set() - while dirs: - # Find the owner that has the most directories. - max_count = 0 - max_owner = None - owner_count = {} - for dirname in dirs: - for owner in dir_owners[dirname]: - count = owner_count.get(owner, 0) + 1 - owner_count[owner] = count - if count >= max_count: - max_owner = owner - max_count = count - - # If no more directories have OWNERS, we're done. - if not max_owner: - break - - final_owners.add(max_owner) - - # Remove all directories owned by the current owner from the remaining - # list. - for dirname in owned_dirs[max_owner]: - dirs.discard(dirname) - - return final_owners + distance += 1 + return all_possible_owners + + @staticmethod + def lowest_cost_owner(all_possible_owners, dirs): + # We want to minimize both the number of reviewers and the distance + # from the files/dirs needing reviews. The "pow(X, 1.75)" below is + # an arbitrarily-selected scaling factor that seems to work well - it + # will select one reviewer in the parent directory over three reviewers + # in subdirs, but not one reviewer over just two. + total_costs_by_owner = {} + for owner in all_possible_owners: + total_distance = 0 + num_directories_owned = 0 + for dirname, distance in all_possible_owners[owner]: + if dirname in dirs: + total_distance += distance + num_directories_owned += 1 + if num_directories_owned: + total_costs_by_owner[owner] = (total_distance / + pow(num_directories_owned, 1.75)) + + # Return the lowest cost owner. In the case of a tie, pick one randomly. + lowest_cost = min(total_costs_by_owner.itervalues()) + lowest_cost_owners = filter( + lambda owner: total_costs_by_owner[owner] == lowest_cost, + total_costs_by_owner) + return random.Random().choice(lowest_cost_owners) diff --git a/tests/owners_unittest.py b/tests/owners_unittest.py index 34272a133..6ad598e4b 100755 --- a/tests/owners_unittest.py +++ b/tests/owners_unittest.py @@ -40,6 +40,8 @@ def test_repo(): '/OWNERS': owners_file(owners.EVERYONE), '/base/vlog.h': '', '/chrome/OWNERS': owners_file(ben, brett), + '/chrome/browser/OWNERS': owners_file(brett), + '/chrome/browser/defaults.h': '', '/chrome/gpu/OWNERS': owners_file(ken), '/chrome/gpu/gpu_channel.h': '', '/chrome/renderer/OWNERS': owners_file(peter), @@ -58,7 +60,7 @@ def test_repo(): }) -class OwnersDatabaseTest(unittest.TestCase): +class _BaseTestCase(unittest.TestCase): def setUp(self): self.repo = test_repo() self.files = self.repo.files @@ -73,6 +75,8 @@ class OwnersDatabaseTest(unittest.TestCase): glob = glob or self.glob return owners.Database(root, fopen, os_path, glob) + +class OwnersDatabaseTest(_BaseTestCase): def test_constructor(self): self.assertNotEquals(self.db(), None) @@ -205,16 +209,41 @@ class OwnersDatabaseTest(unittest.TestCase): self.assertRaises(owners.SyntaxErrorInOwnersFile, self.db().directories_not_covered_by, ['DEPS'], [brett]) - def assert_reviewers_for(self, files, expected_reviewers): + def assert_syntax_error(self, owners_file_contents): + db = self.db() + self.files['/foo/OWNERS'] = owners_file_contents + self.files['/foo/DEPS'] = '' + try: + db.reviewers_for(['foo/DEPS']) + self.fail() # pragma: no cover + except owners.SyntaxErrorInOwnersFile, e: + self.assertTrue(str(e).startswith('/foo/OWNERS:1')) + + def test_syntax_error__unknown_token(self): + self.assert_syntax_error('{}\n') + + def test_syntax_error__unknown_set(self): + self.assert_syntax_error('set myfatherisbillgates\n') + + def test_syntax_error__bad_email(self): + self.assert_syntax_error('ben\n') + + +class ReviewersForTest(_BaseTestCase): + def assert_reviewers_for(self, files, *potential_suggested_reviewers): db = self.db() - self.assertEquals(db.reviewers_for(set(files)), set(expected_reviewers)) + suggested_reviewers = db.reviewers_for(set(files)) + self.assertTrue(suggested_reviewers in + [set(suggestion) for suggestion in potential_suggested_reviewers]) def test_reviewers_for__basic_functionality(self): self.assert_reviewers_for(['chrome/gpu/gpu_channel.h'], - [brett]) + [ken]) def test_reviewers_for__set_noparent_works(self): - self.assert_reviewers_for(['content/content.gyp'], [darin]) + self.assert_reviewers_for(['content/content.gyp'], + [john], + [darin]) def test_reviewers_for__valid_inputs(self): db = self.db() @@ -236,15 +265,16 @@ class OwnersDatabaseTest(unittest.TestCase): self.assert_reviewers_for([ 'chrome/gpu/gpu_channel.h', 'content/baz/froboz.h', - 'chrome/renderer/gpu/gpu_channel_host.h'], [brett]) + 'chrome/renderer/gpu/gpu_channel_host.h'], + [brett]) def test_reviewers_for__two_owners(self): self.assert_reviewers_for([ 'chrome/gpu/gpu_channel.h', 'content/content.gyp', 'content/baz/froboz.h', - 'content/views/pie.h' - ], [john, brett]) + 'content/views/pie.h'], + [ken, john]) def test_reviewers_for__all_files(self): self.assert_reviewers_for([ @@ -254,32 +284,100 @@ class OwnersDatabaseTest(unittest.TestCase): 'content/content.gyp', 'content/bar/foo.cc', 'content/baz/froboz.h', - 'content/views/pie.h'], [john, brett]) + 'content/views/pie.h'], + [peter, ken, john]) def test_reviewers_for__per_file_owners_file(self): self.files['/content/baz/OWNERS'] = owners_file(lines=[ 'per-file ugly.*=tom@example.com']) - self.assert_reviewers_for(['content/baz/OWNERS'], [darin]) - - def assert_syntax_error(self, owners_file_contents): - db = self.db() - self.files['/foo/OWNERS'] = owners_file_contents - self.files['/foo/DEPS'] = '' - try: - db.reviewers_for(['foo/DEPS']) - self.fail() # pragma: no cover - except owners.SyntaxErrorInOwnersFile, e: - self.assertTrue(str(e).startswith('/foo/OWNERS:1')) - - def test_syntax_error__unknown_token(self): - self.assert_syntax_error('{}\n') - - def test_syntax_error__unknown_set(self): - self.assert_syntax_error('set myfatherisbillgates\n') - - def test_syntax_error__bad_email(self): - self.assert_syntax_error('ben\n') + self.assert_reviewers_for(['content/baz/OWNERS'], + [john], + [darin]) + def test_reviewers_for__per_file(self): + self.files['/content/baz/OWNERS'] = owners_file(lines=[ + 'per-file ugly.*=tom@example.com']) + self.assert_reviewers_for(['content/baz/ugly.cc'], + [tom]) + + def test_reviewers_for__two_nested_dirs(self): + # The same owner is listed in two directories (one above the other) + self.assert_reviewers_for(['chrome/browser/defaults.h'], + [brett]) + +class LowestCostOwnersTest(_BaseTestCase): + # Keep the data in the test_lowest_cost_owner* methods as consistent with + # test_repo() where possible to minimize confusion. + + def check(self, possible_owners, dirs, *possible_lowest_cost_owners): + suggested_owner = owners.Database.lowest_cost_owner(possible_owners, dirs) + self.assertTrue(suggested_owner in possible_lowest_cost_owners) + + def test_one_dir_with_owner(self): + # brett is the only immediate owner for stuff in baz; john is also + # an owner, but further removed. We should always get brett. + self.check({brett: [('content/baz', 1)], + john: [('content/baz', 2)]}, + ['content/baz'], + brett) + + # john and darin are owners for content; the suggestion could be either. + def test_one_dir_with_two_owners(self): + self.check({john: [('content', 1)], + darin: [('content', 1)]}, + ['content'], + john, darin) + + def test_one_dir_with_two_owners_in_parent(self): + # As long as the distance is the same, it shouldn't matter (brett isn't + # listed in this case). + self.check({john: [('content/baz', 2)], + darin: [('content/baz', 2)]}, + ['content/baz'], + john, darin) + + def test_two_dirs_two_owners(self): + # If they both match both dirs, they should be treated equally. + self.check({john: [('content/baz', 2), ('content/bar', 2)], + darin: [('content/baz', 2), ('content/bar', 2)]}, + ['content/baz', 'content/bar'], + john, darin) + + # Here brett is better since he's closer for one of the two dirs. + self.check({brett: [('content/baz', 1), ('content/views', 1)], + darin: [('content/baz', 2), ('content/views', 1)]}, + ['content/baz', 'content/views'], + brett) + + def test_hierarchy(self): + # the choices in these tests are more arbitrary value judgements; + # also, here we drift away from test_repo() to cover more cases. + + # Here ben isn't picked, even though he can review both; we prefer + # closer reviewers. + self.check({ben: [('chrome/gpu', 2), ('chrome/renderer', 2)], + ken: [('chrome/gpu', 1)], + peter: [('chrome/renderer', 1)]}, + ['chrome/gpu', 'chrome/renderer'], + ken, peter) + + # Here we always pick ben since he can review either dir as well as + # the others but can review both (giving us fewer total reviewers). + self.check({ben: [('chrome/gpu', 1), ('chrome/renderer', 1)], + ken: [('chrome/gpu', 1)], + peter: [('chrome/renderer', 1)]}, + ['chrome/gpu', 'chrome/renderer'], + ben) + + # However, three reviewers is too many, so ben gets this one. + self.check({ben: [('chrome/gpu', 2), ('chrome/renderer', 2), + ('chrome/browser', 2)], + ken: [('chrome/gpu', 1)], + peter: [('chrome/renderer', 1)], + brett: [('chrome/browser', 1)]}, + ['chrome/gpu', 'chrome/renderer', + 'chrome/browser'], + ben) if __name__ == '__main__': unittest.main()