diff --git a/fastdeploy/multimodal/hasher.py b/fastdeploy/multimodal/hasher.py index 6d2fc4f9b91..8793107d3de 100644 --- a/fastdeploy/multimodal/hasher.py +++ b/fastdeploy/multimodal/hasher.py @@ -25,5 +25,9 @@ class MultimodalHasher: @classmethod def hash_features(cls, obj: object) -> str: if isinstance(obj, np.ndarray): - return hashlib.sha256((obj.tobytes())).hexdigest() + # Encode shape and dtype into the hash to avoid collisions between + # arrays that share the same raw bytes but differ in layout, e.g. + # a (6,4) vs (4,6) array, or float32 vs uint8 reinterpretation. + header = f"{obj.shape}|{obj.dtype}|".encode() + return hashlib.sha256(header + obj.tobytes()).hexdigest() return hashlib.sha256((pickle.dumps(obj))).hexdigest() diff --git a/tests/multimodal/test_hasher.py b/tests/multimodal/test_hasher.py index a89ff2cf13a..ea6368449dc 100644 --- a/tests/multimodal/test_hasher.py +++ b/tests/multimodal/test_hasher.py @@ -26,9 +26,23 @@ def test_hash_features_ndarray(self): """Test hash features with numpy ndarray""" arr = np.random.randint(low=0, high=255, size=(28, 28), dtype=np.uint8) arr_hash = MultimodalHasher.hash_features(arr) - target_hash = hashlib.sha256((arr.tobytes())).hexdigest() + header = f"{arr.shape}|{arr.dtype}|".encode() + target_hash = hashlib.sha256(header + arr.tobytes()).hexdigest() assert arr_hash == target_hash, f"Ndarray hash mismatch: {arr_hash} != {target_hash}" + def test_hash_features_ndarray_shape_sensitivity(self): + """Arrays with same bytes but different shapes must produce different hashes""" + base = np.arange(24, dtype=np.float32) + a = base.reshape(6, 4) + b = base.reshape(4, 6) + assert MultimodalHasher.hash_features(a) != MultimodalHasher.hash_features(b) + + def test_hash_features_ndarray_dtype_sensitivity(self): + """Arrays with same shape but different dtypes must produce different hashes""" + a = np.zeros((4, 4), dtype=np.float32) + b = np.zeros((4, 4), dtype=np.float64) + assert MultimodalHasher.hash_features(a) != MultimodalHasher.hash_features(b) + def test_hash_features_object(self): """Test hash features with unsupported object type""" obj = {"key": "value"}