From 65aa0a04ff7fd6f1fa35e160f0d1bb8236570b92 Mon Sep 17 00:00:00 2001 From: Devon Loehr Date: Fri, 14 Feb 2025 10:13:01 -0800 Subject: [PATCH] Use hashlib for truly deterministic hashes It turns out python's built-in hash function uses a random seed that changes per-process, so branch names based on that hash aren't actually deterministic. This changes the hashing algorithm to use a stable hash so we can get consistent results. Bug: 389069356 Change-Id: I1d3241b922005a7bff4d8621dc79dc4551bf264e Reviewed-on: https://chromium-review.googlesource.com/c/chromium/tools/depot_tools/+/6258544 Reviewed-by: Josip Sokcevic Commit-Queue: Devon Loehr --- split_cl.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/split_cl.py b/split_cl.py index f544805f0..24e07d991 100644 --- a/split_cl.py +++ b/split_cl.py @@ -6,6 +6,7 @@ import collections import dataclasses +import hashlib import os import re import tempfile @@ -33,14 +34,17 @@ def EmitWarning(msg: str): print("Warning: ", msg) -def HashList(lst: List[Any]) -> int: +def HashList(lst: List[Any]) -> str: """ Hash a list, returning a positive integer. Lists with identical elements should have the same hash, regardless of order. """ - # Python refuses to hash lists directly because they're mutable - tup = tuple(sorted(lst)) - return abs(hash(tup)) + # We need a bytes-like object for hashlib algorithms + byts = bytes().join( + (action + file).encode() for action, file in sorted(lst)) + # No security implication: we just need a deterministic output + hashed = hashlib.sha1(byts) + return hashed.hexdigest()[:10] FilesAndOwnersDirectory = collections.namedtuple("FilesAndOwnersDirectory", "files owners_directories") @@ -115,7 +119,7 @@ def CreateBranchName(prefix: str, files: List[Tuple[str, str]]) -> str: common_path = "None" # Replace path delimiter with underscore in common_path. common_path = common_path.replace(os.path.sep, '_') - return f"{prefix}_{HashList(files):020}_{common_path}_split" + return f"{prefix}_{HashList(files)}_{common_path}_split" def CreateBranchForOneCL(prefix: str, files: List[Tuple[str, str]],