Use hashlib for truly deterministic hashes

It turns out python's built-in hash function uses a random seed
that changes per-process, so branch names based on that hash
aren't actually deterministic. This changes the hashing algorithm
to use a stable hash so we can get consistent results.

Bug: 389069356
Change-Id: I1d3241b922005a7bff4d8621dc79dc4551bf264e
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/tools/depot_tools/+/6258544
Reviewed-by: Josip Sokcevic <sokcevic@chromium.org>
Commit-Queue: Devon Loehr <dloehr@google.com>
changes/44/6258544/3
Devon Loehr 2 weeks ago committed by LUCI CQ
parent ed3da5161f
commit 65aa0a04ff

@ -6,6 +6,7 @@
import collections
import dataclasses
import hashlib
import os
import re
import tempfile
@ -33,14 +34,17 @@ def EmitWarning(msg: str):
print("Warning: ", msg)
def HashList(lst: List[Any]) -> int:
def HashList(lst: List[Any]) -> str:
"""
Hash a list, returning a positive integer. Lists with identical elements
should have the same hash, regardless of order.
"""
# Python refuses to hash lists directly because they're mutable
tup = tuple(sorted(lst))
return abs(hash(tup))
# We need a bytes-like object for hashlib algorithms
byts = bytes().join(
(action + file).encode() for action, file in sorted(lst))
# No security implication: we just need a deterministic output
hashed = hashlib.sha1(byts)
return hashed.hexdigest()[:10]
FilesAndOwnersDirectory = collections.namedtuple("FilesAndOwnersDirectory",
"files owners_directories")
@ -115,7 +119,7 @@ def CreateBranchName(prefix: str, files: List[Tuple[str, str]]) -> str:
common_path = "None"
# Replace path delimiter with underscore in common_path.
common_path = common_path.replace(os.path.sep, '_')
return f"{prefix}_{HashList(files):020}_{common_path}_split"
return f"{prefix}_{HashList(files)}_{common_path}_split"
def CreateBranchForOneCL(prefix: str, files: List[Tuple[str, str]],

Loading…
Cancel
Save