Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion fastdeploy/multimodal/hasher.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,5 +25,9 @@ class MultimodalHasher:
@classmethod
def hash_features(cls, obj: object) -> str:
if isinstance(obj, np.ndarray):
return hashlib.sha256((obj.tobytes())).hexdigest()
# Encode shape and dtype into the hash to avoid collisions between
# arrays that share the same raw bytes but differ in layout, e.g.
# a (6,4) vs (4,6) array, or float32 vs uint8 reinterpretation.
header = f"{obj.shape}|{obj.dtype}|".encode()
return hashlib.sha256(header + obj.tobytes()).hexdigest()
return hashlib.sha256((pickle.dumps(obj))).hexdigest()
16 changes: 15 additions & 1 deletion tests/multimodal/test_hasher.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,23 @@ def test_hash_features_ndarray(self):
"""Test hash features with numpy ndarray"""
arr = np.random.randint(low=0, high=255, size=(28, 28), dtype=np.uint8)
arr_hash = MultimodalHasher.hash_features(arr)
target_hash = hashlib.sha256((arr.tobytes())).hexdigest()
header = f"{arr.shape}|{arr.dtype}|".encode()
target_hash = hashlib.sha256(header + arr.tobytes()).hexdigest()
assert arr_hash == target_hash, f"Ndarray hash mismatch: {arr_hash} != {target_hash}"

def test_hash_features_ndarray_shape_sensitivity(self):
"""Arrays with same bytes but different shapes must produce different hashes"""
base = np.arange(24, dtype=np.float32)
a = base.reshape(6, 4)
b = base.reshape(4, 6)
assert MultimodalHasher.hash_features(a) != MultimodalHasher.hash_features(b)

def test_hash_features_ndarray_dtype_sensitivity(self):
"""Arrays with same shape but different dtypes must produce different hashes"""
a = np.zeros((4, 4), dtype=np.float32)
b = np.zeros((4, 4), dtype=np.float64)
assert MultimodalHasher.hash_features(a) != MultimodalHasher.hash_features(b)
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🟡 建议 可补充空数组边界测试

当前测试覆盖了 shape 和 dtype 差异场景,建议补充以下边界情况:

def test_hash_features_ndarray_empty(self):
    """Empty arrays should produce valid hashes"""
    a = np.zeros((0,), dtype=np.float32)
    b = np.zeros((0, 2), dtype=np.float32)
    # 空数组应产生有效哈希,且不同 shape 的空数组哈希不同
    assert MultimodalHasher.hash_features(a) != MultimodalHasher.hash_features(b)

这可确保 tobytes() 对空数组的行为与预期一致。


def test_hash_features_object(self):
"""Test hash features with unsupported object type"""
obj = {"key": "value"}
Expand Down
Loading