diff --git a/awscrt/checksums.py b/awscrt/checksums.py index 06a0005b1..abade6e61 100644 --- a/awscrt/checksums.py +++ b/awscrt/checksums.py @@ -30,3 +30,84 @@ def crc64nvme(input: bytes, previous_crc64nvme: int = 0) -> int: Returns an unsigned 64-bit integer. """ return _awscrt.checksums_crc64nvme(input, previous_crc64nvme) + + +def combine_crc32(crc32_result1: int, crc32_result2: int, data_length2: int) -> int: + """ + Combine two CRC32 (Ethernet, gzip) checksums computed over separate data blocks. + + This is equivalent to computing the CRC32 of the concatenated data blocks without + having to re-scan the data. + + Given: + crc1 = CRC32(data_block_A) + crc2 = CRC32(data_block_B) + + This function computes: + result = CRC32(data_block_A || data_block_B) + + Args: + crc32_result1: The CRC32 checksum of the first data block + crc32_result2: The CRC32 checksum of the second data block + data_length2: The length (in bytes) of the original data that produced crc32_result2. + This is NOT the size of the checksum (which is always 4 bytes), + but rather the size of the data block that was checksummed. + + Returns: + The combined CRC32 checksum as if computed over the concatenated data + """ + return _awscrt.checksums_crc32_combine(crc32_result1, crc32_result2, data_length2) + + +def combine_crc32c(crc32c_result1: int, crc32c_result2: int, data_length2: int) -> int: + """ + Combine two CRC32C (Castagnoli, iSCSI) checksums computed over separate data blocks. + + This is equivalent to computing the CRC32C of the concatenated data blocks without + having to re-scan the data. + + Given: + crc1 = CRC32C(data_block_A) + crc2 = CRC32C(data_block_B) + + This function computes: + result = CRC32C(data_block_A || data_block_B) + + Args: + crc32c_result1: The CRC32C checksum of the first data block + crc32c_result2: The CRC32C checksum of the second data block + data_length2: The length (in bytes) of the original data that produced crc32c_result2. + This is NOT the size of the checksum (which is always 4 bytes), + but rather the size of the data block that was checksummed. + + Returns: + The combined CRC32C checksum as if computed over the concatenated data + """ + return _awscrt.checksums_crc32c_combine(crc32c_result1, crc32c_result2, data_length2) + + +def combine_crc64nvme(crc64nvme_result1: int, crc64nvme_result2: int, data_length2: int) -> int: + """ + Combine two CRC64-NVME (CRC64-Rocksoft) checksums computed over separate data blocks. + + This is equivalent to computing the CRC64-NVME of the concatenated data blocks without + having to re-scan the data. + + Given: + crc1 = CRC64_NVME(data_block_A) + crc2 = CRC64_NVME(data_block_B) + + This function computes: + result = CRC64_NVME(data_block_A || data_block_B) + + Args: + crc64nvme_result1: The CRC64-NVME checksum of the first data block + crc64nvme_result2: The CRC64-NVME checksum of the second data block + data_length2: The length (in bytes) of the original data that produced crc64nvme_result2. + This is NOT the size of the checksum (which is always 8 bytes), + but rather the size of the data block that was checksummed. + + Returns: + The combined CRC64-NVME checksum as if computed over the concatenated data + """ + return _awscrt.checksums_crc64nvme_combine(crc64nvme_result1, crc64nvme_result2, data_length2) diff --git a/crt/aws-checksums b/crt/aws-checksums index 9978ba2c3..270b15acc 160000 --- a/crt/aws-checksums +++ b/crt/aws-checksums @@ -1 +1 @@ -Subproject commit 9978ba2c33a7a259c1a6bd0f62abe26827d03b85 +Subproject commit 270b15acc1b2125340ec1c6dda6cc3c28ef0fa44 diff --git a/crt/aws-lc b/crt/aws-lc index b5e2f866e..c23b2ae88 160000 --- a/crt/aws-lc +++ b/crt/aws-lc @@ -1 +1 @@ -Subproject commit b5e2f866efc0c7f90fcb6781281ea31063efbd96 +Subproject commit c23b2ae88deec89091d8eeb37178fbef24c96919 diff --git a/crt/s2n b/crt/s2n index f6ca8f094..1c9844722 160000 --- a/crt/s2n +++ b/crt/s2n @@ -1 +1 @@ -Subproject commit f6ca8f0941851af4a05739c4a4b426970e953317 +Subproject commit 1c9844722d9cc3beca5551c0f9bfed8aabbc22ed diff --git a/source/checksums.h b/source/checksums.h index 824d743b2..cb6464f3e 100644 --- a/source/checksums.h +++ b/source/checksums.h @@ -9,5 +9,8 @@ PyObject *aws_py_checksums_crc32(PyObject *self, PyObject *args); PyObject *aws_py_checksums_crc32c(PyObject *self, PyObject *args); PyObject *aws_py_checksums_crc64nvme(PyObject *self, PyObject *args); +PyObject *aws_py_checksums_crc32_combine(PyObject *self, PyObject *args); +PyObject *aws_py_checksums_crc32c_combine(PyObject *self, PyObject *args); +PyObject *aws_py_checksums_crc64nvme_combine(PyObject *self, PyObject *args); #endif /* AWS_CRT_PYTHON_CHECKSUMS_H */ diff --git a/source/crc.c b/source/crc.c index b02fc4759..b7a67cf19 100644 --- a/source/crc.c +++ b/source/crc.c @@ -104,3 +104,108 @@ PyObject *aws_py_checksums_crc64nvme(PyObject *self, PyObject *args) { } return py_result; } + +PyObject *aws_py_checksums_crc32_combine(PyObject *self, PyObject *args) { + (void)self; + PyObject *py_crc1; + PyObject *py_crc2; + PyObject *py_len2; + + if (!PyArg_ParseTuple(args, "OOO", &py_crc1, &py_crc2, &py_len2)) { + return NULL; + } + + uint32_t crc1 = PyLong_AsUnsignedLong(py_crc1); + if (crc1 == (uint32_t)-1 && PyErr_Occurred()) { + PyErr_Clear(); + PyErr_SetString(PyExc_ValueError, "crc32_result1 is not a valid unsigned 32-bit integer"); + return NULL; + } + + uint32_t crc2 = PyLong_AsUnsignedLong(py_crc2); + if (crc2 == (uint32_t)-1 && PyErr_Occurred()) { + PyErr_Clear(); + PyErr_SetString(PyExc_ValueError, "crc32_result2 is not a valid unsigned 32-bit integer"); + return NULL; + } + + uint64_t len2 = PyLong_AsUnsignedLongLong(py_len2); + if (len2 == (uint64_t)-1 && PyErr_Occurred()) { + PyErr_Clear(); + PyErr_SetString(PyExc_ValueError, "data_length2 is not a valid unsigned 64-bit integer"); + return NULL; + } + + uint32_t result = aws_checksums_crc32_combine(crc1, crc2, len2); + return PyLong_FromUnsignedLong(result); +} + +PyObject *aws_py_checksums_crc32c_combine(PyObject *self, PyObject *args) { + (void)self; + PyObject *py_crc1; + PyObject *py_crc2; + PyObject *py_len2; + + if (!PyArg_ParseTuple(args, "OOO", &py_crc1, &py_crc2, &py_len2)) { + return NULL; + } + + uint32_t crc1 = PyLong_AsUnsignedLong(py_crc1); + if (crc1 == (uint32_t)-1 && PyErr_Occurred()) { + PyErr_Clear(); + PyErr_SetString(PyExc_ValueError, "crc32c_result1 is not a valid unsigned 32-bit integer"); + return NULL; + } + + uint32_t crc2 = PyLong_AsUnsignedLong(py_crc2); + if (crc2 == (uint32_t)-1 && PyErr_Occurred()) { + PyErr_Clear(); + PyErr_SetString(PyExc_ValueError, "crc32c_result2 is not a valid unsigned 32-bit integer"); + return NULL; + } + + uint64_t len2 = PyLong_AsUnsignedLongLong(py_len2); + if (len2 == (uint64_t)-1 && PyErr_Occurred()) { + PyErr_Clear(); + PyErr_SetString(PyExc_ValueError, "data_length2 is not a valid unsigned 64-bit integer"); + return NULL; + } + + uint32_t result = aws_checksums_crc32c_combine(crc1, crc2, len2); + return PyLong_FromUnsignedLong(result); +} + +PyObject *aws_py_checksums_crc64nvme_combine(PyObject *self, PyObject *args) { + (void)self; + PyObject *py_crc1; + PyObject *py_crc2; + PyObject *py_len2; + + if (!PyArg_ParseTuple(args, "OOO", &py_crc1, &py_crc2, &py_len2)) { + return NULL; + } + + uint64_t crc1 = PyLong_AsUnsignedLongLong(py_crc1); + if (crc1 == (uint64_t)-1 && PyErr_Occurred()) { + PyErr_Clear(); + PyErr_SetString(PyExc_ValueError, "crc64nvme_result1 is not a valid unsigned 64-bit integer"); + return NULL; + } + + uint64_t crc2 = PyLong_AsUnsignedLongLong(py_crc2); + if (crc2 == (uint64_t)-1 && PyErr_Occurred()) { + PyErr_Clear(); + PyErr_SetString(PyExc_ValueError, "crc64nvme_result2 is not a valid unsigned 64-bit integer"); + return NULL; + } + + uint64_t len2 = PyLong_AsUnsignedLongLong(py_len2); + if (len2 == (uint64_t)-1 && PyErr_Occurred()) { + PyErr_Clear(); + PyErr_SetString(PyExc_ValueError, "data_length2 is not a valid unsigned 64-bit integer"); + return NULL; + } + + uint64_t result = aws_checksums_crc64nvme_combine(crc1, crc2, len2); + return PyLong_FromUnsignedLongLong(result); +} diff --git a/source/module.c b/source/module.c index 6da50b3d8..84b5673d2 100644 --- a/source/module.c +++ b/source/module.c @@ -859,6 +859,9 @@ static PyMethodDef s_module_methods[] = { AWS_PY_METHOD_DEF(checksums_crc32, METH_VARARGS), AWS_PY_METHOD_DEF(checksums_crc32c, METH_VARARGS), AWS_PY_METHOD_DEF(checksums_crc64nvme, METH_VARARGS), + AWS_PY_METHOD_DEF(checksums_crc32_combine, METH_VARARGS), + AWS_PY_METHOD_DEF(checksums_crc32c_combine, METH_VARARGS), + AWS_PY_METHOD_DEF(checksums_crc64nvme_combine, METH_VARARGS), /* HTTP */ AWS_PY_METHOD_DEF(http_connection_close, METH_VARARGS), diff --git a/test/test_checksums.py b/test/test_checksums.py index 5d46581b8..91890ceb4 100644 --- a/test/test_checksums.py +++ b/test/test_checksums.py @@ -137,6 +137,87 @@ def test_crc64nvme_huge_buffer(self): val = checksums.crc64nvme(huge_buffer) self.assertEqual(0x2645c28052b1fbb0, val) + def _test_combine_helper(self, checksum_fn, combine_fn): + """Helper method to test checksum combine functions with various scenarios.""" + + # Test 1: Basic combine of two blocks + data1 = b"Hello, " + data2 = b"World!" + + crc1 = checksum_fn(data1) + crc2 = checksum_fn(data2) + combined = combine_fn(crc1, crc2, len(data2)) + expected = checksum_fn(data1 + data2) + + self.assertEqual(expected, combined) + + # Test 2: Empty second block + data1 = b"Hello, World!" + data2 = b"" + + crc1 = checksum_fn(data1) + crc2 = checksum_fn(data2) + combined = combine_fn(crc1, crc2, len(data2)) + + self.assertEqual(crc1, combined) + + # Test 3: Multiple blocks + data1 = b"The quick " + data2 = b"brown fox " + data3 = b"jumps over the lazy dog" + + crc1 = checksum_fn(data1) + crc2 = checksum_fn(data2) + crc3 = checksum_fn(data3) + + combined_12 = combine_fn(crc1, crc2, len(data2)) + combined_123 = combine_fn(combined_12, crc3, len(data3)) + expected = checksum_fn(data1 + data2 + data3) + + self.assertEqual(expected, combined_123) + + # Test 4: Large blocks + data1 = bytes(1024) + data2 = bytes(range(256)) * 4 + + crc1 = checksum_fn(data1) + crc2 = checksum_fn(data2) + combined = combine_fn(crc1, crc2, len(data2)) + expected = checksum_fn(data1 + data2) + + self.assertEqual(expected, combined) + + def test_crc32_combine(self): + """Test CRC32 combine function.""" + self._test_combine_helper(checksums.crc32, checksums.combine_crc32) + + def test_crc32c_combine(self): + """Test CRC32C combine function.""" + self._test_combine_helper(checksums.crc32c, checksums.combine_crc32c) + + def test_crc64nvme_combine(self): + """Test CRC64-NVME combine function.""" + self._test_combine_helper(checksums.crc64nvme, checksums.combine_crc64nvme) + + def test_combine_invalid_inputs(self): + """Test that combine functions raise ValueError for invalid inputs.""" + # Test invalid values (should fail for all algorithms) + for combine_fn in [checksums.combine_crc32, checksums.combine_crc32c, checksums.combine_crc64nvme]: + with self.assertRaises(ValueError) as context: + combine_fn(-1, 0, 0) + self.assertIn("not a valid unsigned", str(context.exception)) + + with self.assertRaises(ValueError) as context: + combine_fn(0, 0, -1) + self.assertIn("not a valid unsigned", str(context.exception)) + + # Test that valid inputs don't raise exceptions + for combine_fn in [checksums.combine_crc32, checksums.combine_crc32c, checksums.combine_crc64nvme]: + # This should not raise any exception + result = combine_fn(0, 0, 0) + # Result should be an integer + self.assertIsInstance(result, int) + if __name__ == '__main__': unittest.main()