Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
81 changes: 81 additions & 0 deletions awscrt/checksums.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,3 +30,84 @@ def crc64nvme(input: bytes, previous_crc64nvme: int = 0) -> int:
Returns an unsigned 64-bit integer.
"""
return _awscrt.checksums_crc64nvme(input, previous_crc64nvme)


def combine_crc32(crc32_result1: int, crc32_result2: int, data_length2: int) -> int:
"""
Combine two CRC32 (Ethernet, gzip) checksums computed over separate data blocks.

This is equivalent to computing the CRC32 of the concatenated data blocks without
having to re-scan the data.

Given:
crc1 = CRC32(data_block_A)
crc2 = CRC32(data_block_B)

This function computes:
result = CRC32(data_block_A || data_block_B)

Args:
crc32_result1: The CRC32 checksum of the first data block
crc32_result2: The CRC32 checksum of the second data block
data_length2: The length (in bytes) of the original data that produced crc32_result2.
This is NOT the size of the checksum (which is always 4 bytes),
but rather the size of the data block that was checksummed.

Returns:
The combined CRC32 checksum as if computed over the concatenated data
"""
return _awscrt.checksums_crc32_combine(crc32_result1, crc32_result2, data_length2)


def combine_crc32c(crc32c_result1: int, crc32c_result2: int, data_length2: int) -> int:
"""
Combine two CRC32C (Castagnoli, iSCSI) checksums computed over separate data blocks.

This is equivalent to computing the CRC32C of the concatenated data blocks without
having to re-scan the data.

Given:
crc1 = CRC32C(data_block_A)
crc2 = CRC32C(data_block_B)

This function computes:
result = CRC32C(data_block_A || data_block_B)

Args:
crc32c_result1: The CRC32C checksum of the first data block
crc32c_result2: The CRC32C checksum of the second data block
data_length2: The length (in bytes) of the original data that produced crc32c_result2.
This is NOT the size of the checksum (which is always 4 bytes),
but rather the size of the data block that was checksummed.

Returns:
The combined CRC32C checksum as if computed over the concatenated data
"""
return _awscrt.checksums_crc32c_combine(crc32c_result1, crc32c_result2, data_length2)


def combine_crc64nvme(crc64nvme_result1: int, crc64nvme_result2: int, data_length2: int) -> int:
"""
Combine two CRC64-NVME (CRC64-Rocksoft) checksums computed over separate data blocks.

This is equivalent to computing the CRC64-NVME of the concatenated data blocks without
having to re-scan the data.

Given:
crc1 = CRC64_NVME(data_block_A)
crc2 = CRC64_NVME(data_block_B)

This function computes:
result = CRC64_NVME(data_block_A || data_block_B)

Args:
crc64nvme_result1: The CRC64-NVME checksum of the first data block
crc64nvme_result2: The CRC64-NVME checksum of the second data block
data_length2: The length (in bytes) of the original data that produced crc64nvme_result2.
This is NOT the size of the checksum (which is always 8 bytes),
but rather the size of the data block that was checksummed.

Returns:
The combined CRC64-NVME checksum as if computed over the concatenated data
"""
return _awscrt.checksums_crc64nvme_combine(crc64nvme_result1, crc64nvme_result2, data_length2)
2 changes: 1 addition & 1 deletion crt/aws-lc
2 changes: 1 addition & 1 deletion crt/s2n
Submodule s2n updated from f6ca8f to 1c9844
3 changes: 3 additions & 0 deletions source/checksums.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,5 +9,8 @@
PyObject *aws_py_checksums_crc32(PyObject *self, PyObject *args);
PyObject *aws_py_checksums_crc32c(PyObject *self, PyObject *args);
PyObject *aws_py_checksums_crc64nvme(PyObject *self, PyObject *args);
PyObject *aws_py_checksums_crc32_combine(PyObject *self, PyObject *args);
PyObject *aws_py_checksums_crc32c_combine(PyObject *self, PyObject *args);
PyObject *aws_py_checksums_crc64nvme_combine(PyObject *self, PyObject *args);

#endif /* AWS_CRT_PYTHON_CHECKSUMS_H */
105 changes: 105 additions & 0 deletions source/crc.c
Original file line number Diff line number Diff line change
Expand Up @@ -104,3 +104,108 @@ PyObject *aws_py_checksums_crc64nvme(PyObject *self, PyObject *args) {
}
return py_result;
}

PyObject *aws_py_checksums_crc32_combine(PyObject *self, PyObject *args) {
(void)self;
PyObject *py_crc1;
PyObject *py_crc2;
PyObject *py_len2;

if (!PyArg_ParseTuple(args, "OOO", &py_crc1, &py_crc2, &py_len2)) {
return NULL;
}

uint32_t crc1 = PyLong_AsUnsignedLong(py_crc1);
if (crc1 == (uint32_t)-1 && PyErr_Occurred()) {
PyErr_Clear();
PyErr_SetString(PyExc_ValueError, "crc32_result1 is not a valid unsigned 32-bit integer");
return NULL;
}

uint32_t crc2 = PyLong_AsUnsignedLong(py_crc2);
if (crc2 == (uint32_t)-1 && PyErr_Occurred()) {
PyErr_Clear();
PyErr_SetString(PyExc_ValueError, "crc32_result2 is not a valid unsigned 32-bit integer");
return NULL;
}

uint64_t len2 = PyLong_AsUnsignedLongLong(py_len2);
if (len2 == (uint64_t)-1 && PyErr_Occurred()) {
PyErr_Clear();
PyErr_SetString(PyExc_ValueError, "data_length2 is not a valid unsigned 64-bit integer");
return NULL;
}

uint32_t result = aws_checksums_crc32_combine(crc1, crc2, len2);
return PyLong_FromUnsignedLong(result);
}

PyObject *aws_py_checksums_crc32c_combine(PyObject *self, PyObject *args) {
(void)self;
PyObject *py_crc1;
PyObject *py_crc2;
PyObject *py_len2;

if (!PyArg_ParseTuple(args, "OOO", &py_crc1, &py_crc2, &py_len2)) {
return NULL;
}

uint32_t crc1 = PyLong_AsUnsignedLong(py_crc1);
if (crc1 == (uint32_t)-1 && PyErr_Occurred()) {
PyErr_Clear();
PyErr_SetString(PyExc_ValueError, "crc32c_result1 is not a valid unsigned 32-bit integer");
return NULL;
}

uint32_t crc2 = PyLong_AsUnsignedLong(py_crc2);
if (crc2 == (uint32_t)-1 && PyErr_Occurred()) {
PyErr_Clear();
PyErr_SetString(PyExc_ValueError, "crc32c_result2 is not a valid unsigned 32-bit integer");
return NULL;
}

uint64_t len2 = PyLong_AsUnsignedLongLong(py_len2);
if (len2 == (uint64_t)-1 && PyErr_Occurred()) {
PyErr_Clear();
PyErr_SetString(PyExc_ValueError, "data_length2 is not a valid unsigned 64-bit integer");
return NULL;
}

uint32_t result = aws_checksums_crc32c_combine(crc1, crc2, len2);
return PyLong_FromUnsignedLong(result);
}

PyObject *aws_py_checksums_crc64nvme_combine(PyObject *self, PyObject *args) {
(void)self;
PyObject *py_crc1;
PyObject *py_crc2;
PyObject *py_len2;

if (!PyArg_ParseTuple(args, "OOO", &py_crc1, &py_crc2, &py_len2)) {
return NULL;
}

uint64_t crc1 = PyLong_AsUnsignedLongLong(py_crc1);
if (crc1 == (uint64_t)-1 && PyErr_Occurred()) {
PyErr_Clear();
PyErr_SetString(PyExc_ValueError, "crc64nvme_result1 is not a valid unsigned 64-bit integer");
return NULL;
}

uint64_t crc2 = PyLong_AsUnsignedLongLong(py_crc2);
if (crc2 == (uint64_t)-1 && PyErr_Occurred()) {
PyErr_Clear();
PyErr_SetString(PyExc_ValueError, "crc64nvme_result2 is not a valid unsigned 64-bit integer");
return NULL;
}

uint64_t len2 = PyLong_AsUnsignedLongLong(py_len2);
if (len2 == (uint64_t)-1 && PyErr_Occurred()) {
PyErr_Clear();
PyErr_SetString(PyExc_ValueError, "data_length2 is not a valid unsigned 64-bit integer");
return NULL;
}

uint64_t result = aws_checksums_crc64nvme_combine(crc1, crc2, len2);
return PyLong_FromUnsignedLongLong(result);
}
3 changes: 3 additions & 0 deletions source/module.c
Original file line number Diff line number Diff line change
Expand Up @@ -859,6 +859,9 @@ static PyMethodDef s_module_methods[] = {
AWS_PY_METHOD_DEF(checksums_crc32, METH_VARARGS),
AWS_PY_METHOD_DEF(checksums_crc32c, METH_VARARGS),
AWS_PY_METHOD_DEF(checksums_crc64nvme, METH_VARARGS),
AWS_PY_METHOD_DEF(checksums_crc32_combine, METH_VARARGS),
AWS_PY_METHOD_DEF(checksums_crc32c_combine, METH_VARARGS),
AWS_PY_METHOD_DEF(checksums_crc64nvme_combine, METH_VARARGS),

/* HTTP */
AWS_PY_METHOD_DEF(http_connection_close, METH_VARARGS),
Expand Down
81 changes: 81 additions & 0 deletions test/test_checksums.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,87 @@ def test_crc64nvme_huge_buffer(self):
val = checksums.crc64nvme(huge_buffer)
self.assertEqual(0x2645c28052b1fbb0, val)

def _test_combine_helper(self, checksum_fn, combine_fn):
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Reading this helper brought me joy.

"""Helper method to test checksum combine functions with various scenarios."""

# Test 1: Basic combine of two blocks
data1 = b"Hello, "
data2 = b"World!"

crc1 = checksum_fn(data1)
crc2 = checksum_fn(data2)
combined = combine_fn(crc1, crc2, len(data2))
expected = checksum_fn(data1 + data2)

self.assertEqual(expected, combined)

# Test 2: Empty second block
data1 = b"Hello, World!"
data2 = b""

crc1 = checksum_fn(data1)
crc2 = checksum_fn(data2)
combined = combine_fn(crc1, crc2, len(data2))

self.assertEqual(crc1, combined)

# Test 3: Multiple blocks
data1 = b"The quick "
data2 = b"brown fox "
data3 = b"jumps over the lazy dog"

crc1 = checksum_fn(data1)
crc2 = checksum_fn(data2)
crc3 = checksum_fn(data3)

combined_12 = combine_fn(crc1, crc2, len(data2))
combined_123 = combine_fn(combined_12, crc3, len(data3))
expected = checksum_fn(data1 + data2 + data3)

self.assertEqual(expected, combined_123)

# Test 4: Large blocks
data1 = bytes(1024)
data2 = bytes(range(256)) * 4

crc1 = checksum_fn(data1)
crc2 = checksum_fn(data2)
combined = combine_fn(crc1, crc2, len(data2))
expected = checksum_fn(data1 + data2)

self.assertEqual(expected, combined)

def test_crc32_combine(self):
"""Test CRC32 combine function."""
self._test_combine_helper(checksums.crc32, checksums.combine_crc32)

def test_crc32c_combine(self):
"""Test CRC32C combine function."""
self._test_combine_helper(checksums.crc32c, checksums.combine_crc32c)

def test_crc64nvme_combine(self):
"""Test CRC64-NVME combine function."""
self._test_combine_helper(checksums.crc64nvme, checksums.combine_crc64nvme)

def test_combine_invalid_inputs(self):
"""Test that combine functions raise ValueError for invalid inputs."""
# Test invalid values (should fail for all algorithms)
for combine_fn in [checksums.combine_crc32, checksums.combine_crc32c, checksums.combine_crc64nvme]:
with self.assertRaises(ValueError) as context:
combine_fn(-1, 0, 0)
self.assertIn("not a valid unsigned", str(context.exception))

with self.assertRaises(ValueError) as context:
combine_fn(0, 0, -1)
self.assertIn("not a valid unsigned", str(context.exception))

# Test that valid inputs don't raise exceptions
for combine_fn in [checksums.combine_crc32, checksums.combine_crc32c, checksums.combine_crc64nvme]:
# This should not raise any exception
result = combine_fn(0, 0, 0)
# Result should be an integer
self.assertIsInstance(result, int)


if __name__ == '__main__':
unittest.main()