From caf47427b8c1f098691c0a0665de16cef93e2996 Mon Sep 17 00:00:00 2001 From: chaokunyang Date: Sat, 7 Feb 2026 19:58:34 +0800 Subject: [PATCH 01/10] adjust collection size --- benchmarks/cpython_benchmark/fory_benchmark.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/benchmarks/cpython_benchmark/fory_benchmark.py b/benchmarks/cpython_benchmark/fory_benchmark.py index 48859b5142..0fcd14af46 100644 --- a/benchmarks/cpython_benchmark/fory_benchmark.py +++ b/benchmarks/cpython_benchmark/fory_benchmark.py @@ -150,13 +150,13 @@ ], 60, ) -LARGE_TUPLE = tuple(range(2**20 + 1)) -LARGE_FLOAT_TUPLE = tuple([random.random() * 10000 for _ in range(2**20 + 1)]) -LARGE_BOOLEAN_TUPLE = tuple([bool(random.random() > 0.5) for _ in range(2**20 + 1)]) +LARGE_TUPLE = tuple(range(2**8 + 1)) +LARGE_FLOAT_TUPLE = tuple([random.random() * 10000 for _ in range(2**8 + 1)]) +LARGE_BOOLEAN_TUPLE = tuple([bool(random.random() > 0.5) for _ in range(2**8 + 1)]) LIST = [[list(range(10)), list(range(10))] for _ in range(10)] -LARGE_LIST = [i for i in range(2**20 + 1)] +LARGE_LIST = [i for i in range(2**8 + 1)] def mutate_dict(orig_dict, random_source): From e038f795fb8b6dfa23cc8f91cff6a878babf8a45 Mon Sep 17 00:00:00 2001 From: chaokunyang Date: Sat, 7 Feb 2026 23:33:06 +0800 Subject: [PATCH 02/10] optimize pyfory perf --- cpp/fory/python/pyfory.cc | 569 ++++++++++++++++++++ cpp/fory/python/pyfory.h | 50 +- python/pyfory/collection.pxi | 448 ++++++++++++--- python/pyfory/includes/libserialization.pxd | 67 ++- python/pyfory/serialization.pyx | 60 ++- 5 files changed, 1109 insertions(+), 85 deletions(-) diff --git a/cpp/fory/python/pyfory.cc b/cpp/fory/python/pyfory.cc index 04117a7b54..81e46b8a93 100644 --- a/cpp/fory/python/pyfory.cc +++ b/cpp/fory/python/pyfory.cc @@ -19,6 +19,11 @@ #include "fory/python/pyfory.h" +#include +#include + +#include "fory/util/string_util.h" + static PyObject **py_sequence_get_items(PyObject *collection) { if (PyList_CheckExact(collection)) { return ((PyListObject *)collection)->ob_item; @@ -28,6 +33,156 @@ static PyObject **py_sequence_get_items(PyObject *collection) { return nullptr; } +static bool ensure_readable_size(fory::Buffer *buffer, Py_ssize_t size, + Py_ssize_t element_size) { + uint64_t readable = + static_cast(buffer->size() - buffer->reader_index()); + uint64_t expected = static_cast(size) * element_size; + return readable >= expected; +} + +static bool py_parse_int64(PyObject *obj, int64_t *out) { + if (!PyLong_CheckExact(obj)) { + return false; + } + + auto *long_obj = reinterpret_cast(obj); + Py_ssize_t size = Py_SIZE(long_obj); + if (size == 0) { + *out = 0; + return true; + } + if (size == 1) { + *out = static_cast(long_obj->ob_digit[0]); + return true; + } + if (size == -1) { + *out = -static_cast(long_obj->ob_digit[0]); + return true; + } + if (size == 2 || size == -2) { + int64_t value = static_cast(long_obj->ob_digit[0]); + value |= static_cast(long_obj->ob_digit[1]) << PyLong_SHIFT; + if (size < 0) { + value = -value; + } + *out = value; + return true; + } + + int overflow = 0; + long long value = PyLong_AsLongLongAndOverflow(obj, &overflow); + if (overflow != 0) { + PyErr_Clear(); + return false; + } + if (value == -1 && PyErr_Occurred()) { + PyErr_Clear(); + return false; + } + *out = static_cast(value); + return true; +} + +static bool py_write_string_to_buffer(PyObject *obj, fory::Buffer *buffer) { + if (!PyUnicode_CheckExact(obj)) { + return false; + } + Py_ssize_t length = PyUnicode_GET_LENGTH(obj); + int kind = PyUnicode_KIND(obj); + const void *str_data = PyUnicode_DATA(obj); + uint64_t header = 0; + Py_ssize_t byte_size = 0; + if (kind == PyUnicode_1BYTE_KIND) { + byte_size = length; + header = static_cast(length) << 2; + } else if (kind == PyUnicode_2BYTE_KIND) { + byte_size = length << 1; + header = (static_cast(length) << 3) | 1; + } else { + str_data = PyUnicode_AsUTF8AndSize(obj, &length); + if (str_data == nullptr) { + PyErr_Clear(); + return false; + } + byte_size = length; + header = (static_cast(byte_size) << 2) | 2; + } + if (byte_size < 0 || + static_cast(byte_size) > std::numeric_limits::max()) { + return false; + } + uint32_t payload_size = static_cast(byte_size); + buffer->write_var_uint64(header); + if (payload_size > 0) { + buffer->write_bytes(str_data, payload_size); + } + return true; +} + +static bool py_read_string_from_buffer(fory::Buffer *buffer, PyObject **out) { + if (out == nullptr) { + return false; + } + fory::Error error; + uint64_t header = buffer->read_var_uint64(error); + if (FORY_PREDICT_FALSE(!error.ok())) { + return false; + } + uint32_t payload_size = static_cast(header >> 2); + if (FORY_PREDICT_FALSE(payload_size > buffer->remaining_size())) { + return false; + } + + uint32_t reader_index = buffer->reader_index(); + const char *bytes = reinterpret_cast(buffer->data() + reader_index); + uint32_t encoding = static_cast(header & 0b11); + PyObject *value = nullptr; + if (encoding == 0) { + value = PyUnicode_DecodeLatin1(bytes, payload_size, "strict"); + } else if (encoding == 1) { + if (FORY_PREDICT_FALSE((payload_size & 1) != 0)) { + return false; + } + if (fory::utf16_has_surrogate_pairs( + reinterpret_cast(bytes), payload_size >> 1)) { + int utf16_le = -1; + value = PyUnicode_DecodeUTF16(bytes, payload_size, nullptr, &utf16_le); + } else { + value = PyUnicode_FromKindAndData(PyUnicode_2BYTE_KIND, bytes, + payload_size >> 1); + } + } else { + value = PyUnicode_DecodeUTF8(bytes, payload_size, "strict"); + } + if (value == nullptr) { + return false; + } + buffer->reader_index(reader_index + payload_size); + *out = value; + return true; +} + +static bool read_var_int64_from_buffer(fory::Buffer *buffer, fory::Error *error, + int64_t *out) { + if (out == nullptr) { + return false; + } + if (error == nullptr) { + return false; + } + int64_t value = buffer->read_var_int64(*error); + if (FORY_PREDICT_FALSE(!error->ok())) { + return false; + } + *out = value; + return true; +} + +static void write_var_int64_to_buffer(fory::Buffer *buffer, int64_t value) { + buffer->write_var_int64(value); +} + namespace fory { int Fory_PyBooleanSequenceWriteToBuffer(PyObject *collection, Buffer *buffer, Py_ssize_t start_index) { @@ -58,4 +213,418 @@ int Fory_PyFloatSequenceWriteToBuffer(PyObject *collection, Buffer *buffer, } return 0; } + +int Fory_PyInt64SequenceWriteVarintToBuffer(PyObject *collection, + Buffer *buffer) { + PyObject **items = py_sequence_get_items(collection); + if (items == nullptr) { + return -1; + } + Py_ssize_t size = Py_SIZE(collection); + if (size < 0) { + return -1; + } + for (Py_ssize_t i = 0; i < size; i++) { + int64_t value; + if (!py_parse_int64(items[i], &value)) { + return -1; + } + buffer->write_var_int64(value); + } + return 0; +} + +int Fory_PyStringSequenceWriteToBuffer(PyObject *collection, Buffer *buffer) { + PyObject **items = py_sequence_get_items(collection); + if (items == nullptr) { + return -1; + } + Py_ssize_t size = Py_SIZE(collection); + for (Py_ssize_t i = 0; i < size; i++) { + if (!py_write_string_to_buffer(items[i], buffer)) { + return -1; + } + } + return 0; +} + +int Fory_PyDetectSequenceNoNullExactTypeKind(PyObject *collection) { + PyObject **items = py_sequence_get_items(collection); + if (items == nullptr) { + return kForyPySequenceValueNone; + } + Py_ssize_t size = Py_SIZE(collection); + if (size == 0) { + return kForyPySequenceValueNone; + } + + PyObject *first = items[0]; + int kind = kForyPySequenceValueNone; + if (PyUnicode_CheckExact(first)) { + kind = kForyPySequenceValueString; + } else if (PyLong_CheckExact(first)) { + if (first == Py_True || first == Py_False) { + kind = kForyPySequenceValueBool; + } else { + kind = kForyPySequenceValueInt64; + } + } else if (PyFloat_CheckExact(first)) { + kind = kForyPySequenceValueFloat64; + } else { + return kForyPySequenceValueNone; + } + + for (Py_ssize_t i = 1; i < size; i++) { + PyObject *item = items[i]; + if (kind == kForyPySequenceValueString) { + if (!PyUnicode_CheckExact(item)) { + return kForyPySequenceValueNone; + } + continue; + } + if (kind == kForyPySequenceValueBool) { + if (item != Py_True && item != Py_False) { + return kForyPySequenceValueNone; + } + continue; + } + if (kind == kForyPySequenceValueFloat64) { + if (!PyFloat_CheckExact(item)) { + return kForyPySequenceValueNone; + } + continue; + } + if (!PyLong_CheckExact(item) || item == Py_True || item == Py_False) { + return kForyPySequenceValueNone; + } + } + return kind; +} + +int Fory_PyDetectStringKeyMapValueKind(PyObject *map) { + if (!PyDict_CheckExact(map)) { + return kForyPyStringMapValueNone; + } + Py_ssize_t pos = 0; + PyObject *key = nullptr; + PyObject *value = nullptr; + int value_kind = kForyPyStringMapValueNone; + while (PyDict_Next(map, &pos, &key, &value)) { + if (!PyUnicode_CheckExact(key)) { + return kForyPyStringMapValueNone; + } + int current_kind = kForyPyStringMapValueNone; + if (PyLong_CheckExact(value)) { + int64_t int64_value; + if (!py_parse_int64(value, &int64_value)) { + return kForyPyStringMapValueNone; + } + current_kind = kForyPyStringMapValueInt64; + } else if (PyUnicode_CheckExact(value)) { + current_kind = kForyPyStringMapValueString; + } else { + return kForyPyStringMapValueNone; + } + if (value_kind == kForyPyStringMapValueNone) { + value_kind = current_kind; + } else if (value_kind != current_kind) { + return kForyPyStringMapValueNone; + } + } + return value_kind; +} + +int Fory_PyStringInt64MapWriteChunkToBuffer(PyObject *map, Py_ssize_t *pos, + Py_ssize_t chunk_size, + Buffer *buffer) { + if (!PyDict_CheckExact(map) || pos == nullptr || chunk_size < 0) { + return -1; + } + PyObject *key = nullptr; + PyObject *value = nullptr; + for (Py_ssize_t i = 0; i < chunk_size; i++) { + if (!PyDict_Next(map, pos, &key, &value)) { + return -1; + } + if (!py_write_string_to_buffer(key, buffer)) { + return -1; + } + int64_t int64_value; + if (!py_parse_int64(value, &int64_value)) { + return -1; + } + write_var_int64_to_buffer(buffer, int64_value); + } + return 0; +} + +int Fory_PyStringStringMapWriteChunkToBuffer(PyObject *map, Py_ssize_t *pos, + Py_ssize_t chunk_size, + Buffer *buffer) { + if (!PyDict_CheckExact(map) || pos == nullptr || chunk_size < 0) { + return -1; + } + PyObject *key = nullptr; + PyObject *value = nullptr; + for (Py_ssize_t i = 0; i < chunk_size; i++) { + if (!PyDict_Next(map, pos, &key, &value)) { + return -1; + } + if (!py_write_string_to_buffer(key, buffer) || + !py_write_string_to_buffer(value, buffer)) { + return -1; + } + } + return 0; +} + +static void set_map_contiguous_chunk_write_result( + Py_ssize_t chunk_size, int has_next, PyObject *next_key, PyObject *next_value, + Py_ssize_t *written_chunk_size, int *has_next_out, int64_t *next_key_addr, + int64_t *next_value_addr) { + if (written_chunk_size != nullptr) { + *written_chunk_size = chunk_size; + } + if (has_next_out != nullptr) { + *has_next_out = has_next; + } + if (next_key_addr != nullptr) { + *next_key_addr = has_next == 0 + ? 0 + : static_cast( + reinterpret_cast(next_key)); + } + if (next_value_addr != nullptr) { + *next_value_addr = has_next == 0 + ? 0 + : static_cast( + reinterpret_cast(next_value)); + } +} + +int Fory_PyStringInt64MapWriteContiguousChunkToBuffer( + PyObject *map, Py_ssize_t *pos, PyObject *first_key, PyObject *first_value, + Py_ssize_t max_chunk_size, Buffer *buffer, Py_ssize_t *written_chunk_size, + int *has_next, int64_t *next_key_addr, int64_t *next_value_addr) { + if (!PyDict_CheckExact(map) || pos == nullptr || max_chunk_size <= 0 || + buffer == nullptr || first_key == nullptr || first_value == nullptr) { + return -1; + } + if (!PyUnicode_CheckExact(first_key) || !PyLong_CheckExact(first_value) || + first_value == Py_True || first_value == Py_False) { + return -1; + } + PyObject *current_key = first_key; + PyObject *current_value = first_value; + Py_ssize_t chunk_size = 0; + while (true) { + if (!py_write_string_to_buffer(current_key, buffer)) { + return -1; + } + int64_t int64_value; + if (!py_parse_int64(current_value, &int64_value)) { + return -1; + } + write_var_int64_to_buffer(buffer, int64_value); + chunk_size += 1; + if (chunk_size >= max_chunk_size) { + int iter_has_next = PyDict_Next(map, pos, ¤t_key, ¤t_value) + ? 1 + : 0; + set_map_contiguous_chunk_write_result( + chunk_size, iter_has_next, current_key, current_value, + written_chunk_size, has_next, next_key_addr, next_value_addr); + return 0; + } + if (!PyDict_Next(map, pos, ¤t_key, ¤t_value)) { + set_map_contiguous_chunk_write_result( + chunk_size, 0, nullptr, nullptr, written_chunk_size, has_next, + next_key_addr, next_value_addr); + return 0; + } + if (!PyUnicode_CheckExact(current_key) || !PyLong_CheckExact(current_value) || + current_value == Py_True || current_value == Py_False) { + set_map_contiguous_chunk_write_result( + chunk_size, 1, current_key, current_value, written_chunk_size, has_next, + next_key_addr, next_value_addr); + return 0; + } + } +} + +int Fory_PyStringStringMapWriteContiguousChunkToBuffer( + PyObject *map, Py_ssize_t *pos, PyObject *first_key, PyObject *first_value, + Py_ssize_t max_chunk_size, Buffer *buffer, Py_ssize_t *written_chunk_size, + int *has_next, int64_t *next_key_addr, int64_t *next_value_addr) { + if (!PyDict_CheckExact(map) || pos == nullptr || max_chunk_size <= 0 || + buffer == nullptr || first_key == nullptr || first_value == nullptr) { + return -1; + } + if (!PyUnicode_CheckExact(first_key) || !PyUnicode_CheckExact(first_value)) { + return -1; + } + PyObject *current_key = first_key; + PyObject *current_value = first_value; + Py_ssize_t chunk_size = 0; + while (true) { + if (!py_write_string_to_buffer(current_key, buffer) || + !py_write_string_to_buffer(current_value, buffer)) { + return -1; + } + chunk_size += 1; + if (chunk_size >= max_chunk_size) { + int iter_has_next = PyDict_Next(map, pos, ¤t_key, ¤t_value) + ? 1 + : 0; + set_map_contiguous_chunk_write_result( + chunk_size, iter_has_next, current_key, current_value, + written_chunk_size, has_next, next_key_addr, next_value_addr); + return 0; + } + if (!PyDict_Next(map, pos, ¤t_key, ¤t_value)) { + set_map_contiguous_chunk_write_result( + chunk_size, 0, nullptr, nullptr, written_chunk_size, has_next, + next_key_addr, next_value_addr); + return 0; + } + if (!PyUnicode_CheckExact(current_key) || !PyUnicode_CheckExact(current_value)) { + set_map_contiguous_chunk_write_result( + chunk_size, 1, current_key, current_value, written_chunk_size, has_next, + next_key_addr, next_value_addr); + return 0; + } + } +} + +int Fory_PyStringInt64MapReadChunkFromBuffer(PyObject *map, + Py_ssize_t chunk_size, + Buffer *buffer) { + if (!PyDict_CheckExact(map) || chunk_size < 0) { + return -1; + } + Error error; + for (Py_ssize_t i = 0; i < chunk_size; i++) { + PyObject *key = nullptr; + if (!py_read_string_from_buffer(buffer, &key)) { + return -1; + } + int64_t value; + if (!read_var_int64_from_buffer(buffer, &error, &value)) { + Py_DECREF(key); + return -1; + } + PyObject *py_value = PyLong_FromLongLong(value); + if (py_value == nullptr) { + Py_DECREF(key); + return -1; + } + int status = PyDict_SetItem(map, key, py_value); + Py_DECREF(key); + Py_DECREF(py_value); + if (status != 0) { + return -1; + } + } + return 0; +} + +int Fory_PyStringStringMapReadChunkFromBuffer(PyObject *map, + Py_ssize_t chunk_size, + Buffer *buffer) { + if (!PyDict_CheckExact(map) || chunk_size < 0) { + return -1; + } + for (Py_ssize_t i = 0; i < chunk_size; i++) { + PyObject *key = nullptr; + PyObject *value = nullptr; + if (!py_read_string_from_buffer(buffer, &key)) { + return -1; + } + if (!py_read_string_from_buffer(buffer, &value)) { + Py_DECREF(key); + return -1; + } + int status = PyDict_SetItem(map, key, value); + Py_DECREF(key); + Py_DECREF(value); + if (status != 0) { + return -1; + } + } + return 0; +} + +int Fory_PyBooleanSequenceReadFromBuffer(PyObject *collection, Buffer *buffer, + Py_ssize_t size) { + if (Py_SIZE(collection) != size) { + return -1; + } + PyObject **items = py_sequence_get_items(collection); + if (items == nullptr) { + return -1; + } + if (!ensure_readable_size(buffer, size, sizeof(bool))) { + return -1; + } + uint32_t reader_index = buffer->reader_index(); + uint8_t *data = buffer->data(); + for (Py_ssize_t i = 0; i < size; i++) { + PyObject *value = data[reader_index + i] == 0 ? Py_False : Py_True; + Py_INCREF(value); + items[i] = value; + } + buffer->increase_reader_index(static_cast(size)); + return 0; +} + +int Fory_PyFloatSequenceReadFromBuffer(PyObject *collection, Buffer *buffer, + Py_ssize_t size) { + if (Py_SIZE(collection) != size) { + return -1; + } + PyObject **items = py_sequence_get_items(collection); + if (items == nullptr) { + return -1; + } + if (!ensure_readable_size(buffer, size, sizeof(double))) { + return -1; + } + Error error; + for (Py_ssize_t i = 0; i < size; i++) { + double value = buffer->read_double(error); + if (FORY_PREDICT_FALSE(!error.ok())) { + return -1; + } + PyObject *obj = PyFloat_FromDouble(value); + if (obj == nullptr) { + return -1; + } + items[i] = obj; + } + return 0; +} + +int Fory_PyInt64SequenceReadVarintFromBuffer(PyObject *collection, + Buffer *buffer, Py_ssize_t size) { + if (Py_SIZE(collection) != size) { + return -1; + } + PyObject **items = py_sequence_get_items(collection); + if (items == nullptr) { + return -1; + } + Error error; + for (Py_ssize_t i = 0; i < size; i++) { + int64_t value = buffer->read_var_int64(error); + if (FORY_PREDICT_FALSE(!error.ok())) { + return -1; + } + PyObject *obj = PyLong_FromLongLong(value); + if (obj == nullptr) { + return -1; + } + items[i] = obj; + } + return 0; +} } // namespace fory diff --git a/cpp/fory/python/pyfory.h b/cpp/fory/python/pyfory.h index 467a86970f..e102834a23 100644 --- a/cpp/fory/python/pyfory.h +++ b/cpp/fory/python/pyfory.h @@ -26,4 +26,52 @@ int Fory_PyBooleanSequenceWriteToBuffer(PyObject *collection, Buffer *buffer, Py_ssize_t start_index); int Fory_PyFloatSequenceWriteToBuffer(PyObject *collection, Buffer *buffer, Py_ssize_t start_index); -} // namespace fory \ No newline at end of file +int Fory_PyInt64SequenceWriteVarintToBuffer(PyObject *collection, + Buffer *buffer); +int Fory_PyStringSequenceWriteToBuffer(PyObject *collection, Buffer *buffer); + +enum ForyPyStringMapValueKind : int { + kForyPyStringMapValueNone = 0, + kForyPyStringMapValueInt64 = 1, + kForyPyStringMapValueString = 2, +}; + +enum ForyPySequenceValueKind : int { + kForyPySequenceValueNone = 0, + kForyPySequenceValueString = 1, + kForyPySequenceValueInt64 = 2, + kForyPySequenceValueBool = 3, + kForyPySequenceValueFloat64 = 4, +}; + +int Fory_PyDetectSequenceNoNullExactTypeKind(PyObject *collection); + +int Fory_PyDetectStringKeyMapValueKind(PyObject *map); +int Fory_PyStringInt64MapWriteChunkToBuffer(PyObject *map, Py_ssize_t *pos, + Py_ssize_t chunk_size, + Buffer *buffer); +int Fory_PyStringStringMapWriteChunkToBuffer(PyObject *map, Py_ssize_t *pos, + Py_ssize_t chunk_size, + Buffer *buffer); +int Fory_PyStringInt64MapWriteContiguousChunkToBuffer( + PyObject *map, Py_ssize_t *pos, PyObject *first_key, PyObject *first_value, + Py_ssize_t max_chunk_size, Buffer *buffer, Py_ssize_t *written_chunk_size, + int *has_next, int64_t *next_key_addr, int64_t *next_value_addr); +int Fory_PyStringStringMapWriteContiguousChunkToBuffer( + PyObject *map, Py_ssize_t *pos, PyObject *first_key, PyObject *first_value, + Py_ssize_t max_chunk_size, Buffer *buffer, Py_ssize_t *written_chunk_size, + int *has_next, int64_t *next_key_addr, int64_t *next_value_addr); +int Fory_PyStringInt64MapReadChunkFromBuffer(PyObject *map, + Py_ssize_t chunk_size, + Buffer *buffer); +int Fory_PyStringStringMapReadChunkFromBuffer(PyObject *map, + Py_ssize_t chunk_size, + Buffer *buffer); + +int Fory_PyBooleanSequenceReadFromBuffer(PyObject *collection, Buffer *buffer, + Py_ssize_t size); +int Fory_PyFloatSequenceReadFromBuffer(PyObject *collection, Buffer *buffer, + Py_ssize_t size); +int Fory_PyInt64SequenceReadVarintFromBuffer(PyObject *collection, + Buffer *buffer, Py_ssize_t size); +} // namespace fory diff --git a/python/pyfory/collection.pxi b/python/pyfory/collection.pxi index df10f18588..b0782d025a 100644 --- a/python/pyfory/collection.pxi +++ b/python/pyfory/collection.pxi @@ -40,11 +40,19 @@ cdef class CollectionSerializer(Serializer): cdef int8_t elem_tracking_ref cdef elem_type cdef TypeInfo elem_type_info + cdef TypeInfo list_type_info + cdef TypeInfo tuple_type_info + cdef TypeInfo set_type_info + cdef TypeInfo dict_type_info def __init__(self, fory, type_, elem_serializer=None, elem_tracking_ref=None): super().__init__(fory, type_) self.type_resolver = fory.type_resolver self.ref_resolver = fory.ref_resolver + self.list_type_info = None + self.tuple_type_info = None + self.set_type_info = None + self.dict_type_info = None self.elem_serializer = elem_serializer if elem_serializer is None: self.elem_type = None @@ -57,30 +65,91 @@ cdef class CollectionSerializer(Serializer): if elem_tracking_ref is not None: self.elem_tracking_ref = (1 if elem_tracking_ref else 0) + cdef inline TypeInfo _get_type_info_fast(self, type cls): + if cls is list: + if self.list_type_info is None: + self.list_type_info = self.type_resolver.get_type_info(list) + return self.list_type_info + if cls is tuple: + if self.tuple_type_info is None: + self.tuple_type_info = self.type_resolver.get_type_info(tuple) + return self.tuple_type_info + if cls is set: + if self.set_type_info is None: + self.set_type_info = self.type_resolver.get_type_info(set) + return self.set_type_info + if cls is dict: + if self.dict_type_info is None: + self.dict_type_info = self.type_resolver.get_type_info(dict) + return self.dict_type_info + return self.type_resolver.get_type_info(cls) + cdef inline pair[int8_t, int64_t] write_header(self, Buffer buffer, value): cdef int8_t collect_flag = COLL_DEFAULT_FLAG elem_type = self.elem_type cdef TypeInfo elem_type_info = self.elem_type_info cdef c_bool has_null = False cdef c_bool has_same_type = True + cdef int fast_kind = kForyPySequenceValueNone + cdef type value_type = type(value) + cdef Py_ssize_t seq_size + cdef Py_ssize_t i + cdef object s if elem_type is None: - for s in value: - if not has_null and s is None: - has_null = True - continue - if elem_type is None: - elem_type = type(s) - elif has_same_type and type(s) is not elem_type: - has_same_type = False + if value_type is list or value_type is tuple: + fast_kind = Fory_PyDetectSequenceNoNullExactTypeKind(value) + if fast_kind == kForyPySequenceValueString: + elem_type = str + elif fast_kind == kForyPySequenceValueInt64: + elem_type = int + elif fast_kind == kForyPySequenceValueBool: + elem_type = bool + elif fast_kind == kForyPySequenceValueFloat64: + elem_type = float + if fast_kind == kForyPySequenceValueNone: + if value_type is list or value_type is tuple: + seq_size = Py_SIZE(value) + for i in range(seq_size): + if value_type is list: + s = PyList_GET_ITEM(value, i) + else: + s = PyTuple_GET_ITEM(value, i) + if not has_null and s is None: + has_null = True + continue + if elem_type is None: + elem_type = type(s) + elif has_same_type and type(s) is not elem_type: + has_same_type = False + else: + for s in value: + if not has_null and s is None: + has_null = True + continue + if elem_type is None: + elem_type = type(s) + elif has_same_type and type(s) is not elem_type: + has_same_type = False if has_same_type: collect_flag |= COLL_IS_SAME_TYPE elem_type_info = self.type_resolver.get_type_info(elem_type) else: collect_flag |= COLL_IS_DECL_ELEMENT_TYPE | COLL_IS_SAME_TYPE - for s in value: - if s is None: - has_null = True - break + if value_type is list or value_type is tuple: + seq_size = Py_SIZE(value) + for i in range(seq_size): + if value_type is list: + s = PyList_GET_ITEM(value, i) + else: + s = PyTuple_GET_ITEM(value, i) + if s is None: + has_null = True + break + else: + for s in value: + if s is None: + has_null = True + break if has_null: collect_flag |= COLL_HAS_NULL if self.fory.track_ref: @@ -111,6 +180,12 @@ cdef class CollectionSerializer(Serializer): cdef serializer = type(elem_type_info.serializer) cdef c_bool tracking_ref cdef c_bool has_null + cdef type cls + cdef type value_type = type(value) + cdef object s + cdef TypeInfo typeinfo + cdef Py_ssize_t i + cdef Py_ssize_t seq_size if (collect_flag & COLL_IS_SAME_TYPE) != 0: if (collect_flag & COLL_HAS_NULL) == 0: if elem_type is str: @@ -134,34 +209,63 @@ cdef class CollectionSerializer(Serializer): tracking_ref = (collect_flag & COLL_TRACKING_REF) != 0 has_null = (collect_flag & COLL_HAS_NULL) != 0 if tracking_ref: - # When ref tracking is enabled, write with ref handling - for s in value: - cls = type(s) - if cls is str: - buffer.write_int16(NOT_NULL_STRING_FLAG) - buffer.write_string(s) - elif cls is int: - buffer.write_int16(NOT_NULL_INT64_FLAG) - buffer.write_varint64(s) - elif cls is bool: - buffer.write_int16(NOT_NULL_BOOL_FLAG) - buffer.write_bool(s) - elif cls is float: - buffer.write_int16(NOT_NULL_FLOAT64_FLAG) - buffer.write_double(s) - else: - if not ref_resolver.write_ref_or_null(buffer, s): - typeinfo = type_resolver.get_type_info(cls) - type_resolver.write_type_info(buffer, typeinfo) - if is_py: - typeinfo.serializer.write(buffer, s) - else: - typeinfo.serializer.xwrite(buffer, s) + if value_type is list or value_type is tuple: + seq_size = Py_SIZE(value) + for i in range(seq_size): + if value_type is list: + s = PyList_GET_ITEM(value, i) + else: + s = PyTuple_GET_ITEM(value, i) + cls = type(s) + if cls is str: + buffer.write_int16(NOT_NULL_STRING_FLAG) + buffer.write_string(s) + elif cls is int: + buffer.write_int16(NOT_NULL_INT64_FLAG) + buffer.write_varint64(s) + elif cls is bool: + buffer.write_int16(NOT_NULL_BOOL_FLAG) + buffer.write_bool(s) + elif cls is float: + buffer.write_int16(NOT_NULL_FLOAT64_FLAG) + buffer.write_double(s) + else: + if not ref_resolver.write_ref_or_null(buffer, s): + typeinfo = self._get_type_info_fast(cls) + type_resolver.write_type_info(buffer, typeinfo) + if is_py: + typeinfo.serializer.write(buffer, s) + else: + typeinfo.serializer.xwrite(buffer, s) + else: + # When ref tracking is enabled, write with ref handling + for s in value: + cls = type(s) + if cls is str: + buffer.write_int16(NOT_NULL_STRING_FLAG) + buffer.write_string(s) + elif cls is int: + buffer.write_int16(NOT_NULL_INT64_FLAG) + buffer.write_varint64(s) + elif cls is bool: + buffer.write_int16(NOT_NULL_BOOL_FLAG) + buffer.write_bool(s) + elif cls is float: + buffer.write_int16(NOT_NULL_FLOAT64_FLAG) + buffer.write_double(s) + else: + if not ref_resolver.write_ref_or_null(buffer, s): + typeinfo = self._get_type_info_fast(cls) + type_resolver.write_type_info(buffer, typeinfo) + if is_py: + typeinfo.serializer.write(buffer, s) + else: + typeinfo.serializer.xwrite(buffer, s) elif not has_null: # When ref tracking is disabled and no nulls, write type info directly for s in value: cls = type(s) - typeinfo = type_resolver.get_type_info(cls) + typeinfo = self._get_type_info_fast(cls) type_resolver.write_type_info(buffer, typeinfo) if is_py: typeinfo.serializer.write(buffer, s) @@ -175,7 +279,7 @@ cdef class CollectionSerializer(Serializer): else: buffer.write_int8(NOT_NULL_VALUE_FLAG) cls = type(s) - typeinfo = type_resolver.get_type_info(cls) + typeinfo = self._get_type_info_fast(cls) type_resolver.write_type_info(buffer, typeinfo) if is_py: typeinfo.serializer.write(buffer, s) @@ -183,6 +287,10 @@ cdef class CollectionSerializer(Serializer): typeinfo.serializer.xwrite(buffer, s) cdef inline _write_string(self, Buffer buffer, value): + value_type = type(value) + if value_type is list or value_type is tuple: + if Fory_PyStringSequenceWriteToBuffer(value, &buffer.c_buffer) == 0: + return for s in value: buffer.write_string(s) @@ -191,10 +299,18 @@ cdef class CollectionSerializer(Serializer): self._add_element(collection_, i, buffer.read_string()) cdef inline _write_int(self, Buffer buffer, value): + value_type = type(value) + if (value_type is list or value_type is tuple) and ( + Fory_PyInt64SequenceWriteVarintToBuffer(value, &buffer.c_buffer) == 0): + return for s in value: buffer.write_varint64(s) cdef inline _read_int(self, Buffer buffer, int64_t len_, object collection_): + collection_type = type(collection_) + if collection_type is list or collection_type is tuple: + if Fory_PyInt64SequenceReadVarintFromBuffer(collection_, &buffer.c_buffer, len_) == 0: + return for i in range(len_): self._add_element(collection_, i, buffer.read_varint64()) @@ -212,6 +328,10 @@ cdef class CollectionSerializer(Serializer): buffer.write_bool(s) cdef inline _read_bool(self, Buffer buffer, int64_t len_, object collection_): + collection_type = type(collection_) + if collection_type is list or collection_type is tuple: + if Fory_PyBooleanSequenceReadFromBuffer(collection_, &buffer.c_buffer, len_) == 0: + return for i in range(len_): self._add_element(collection_, i, buffer.read_bool()) @@ -229,6 +349,10 @@ cdef class CollectionSerializer(Serializer): buffer.write_double(s) cdef inline _read_float(self, Buffer buffer, int64_t len_, object collection_): + collection_type = type(collection_) + if collection_type is list or collection_type is tuple: + if Fory_PyFloatSequenceReadFromBuffer(collection_, &buffer.c_buffer, len_) == 0: + return for i in range(len_): self._add_element(collection_, i, buffer.read_double()) @@ -297,15 +421,38 @@ cdef class CollectionSerializer(Serializer): cpdef _write_same_type_ref(self, Buffer buffer, value, TypeInfo typeinfo): cdef MapRefResolver ref_resolver = self.ref_resolver - cdef TypeResolver type_resolver = self.type_resolver + cdef type value_type = type(value) + cdef object s + cdef Py_ssize_t i + cdef Py_ssize_t seq_size if not self.fory.xlang: - for s in value: - if not ref_resolver.write_ref_or_null(buffer, s): - typeinfo.serializer.write(buffer, s) + if value_type is list or value_type is tuple: + seq_size = Py_SIZE(value) + for i in range(seq_size): + if value_type is list: + s = PyList_GET_ITEM(value, i) + else: + s = PyTuple_GET_ITEM(value, i) + if not ref_resolver.write_ref_or_null(buffer, s): + typeinfo.serializer.write(buffer, s) + else: + for s in value: + if not ref_resolver.write_ref_or_null(buffer, s): + typeinfo.serializer.write(buffer, s) else: - for s in value: - if not ref_resolver.write_ref_or_null(buffer, s): - typeinfo.serializer.xwrite(buffer, s) + if value_type is list or value_type is tuple: + seq_size = Py_SIZE(value) + for i in range(seq_size): + if value_type is list: + s = PyList_GET_ITEM(value, i) + else: + s = PyTuple_GET_ITEM(value, i) + if not ref_resolver.write_ref_or_null(buffer, s): + typeinfo.serializer.xwrite(buffer, s) + else: + for s in value: + if not ref_resolver.write_ref_or_null(buffer, s): + typeinfo.serializer.xwrite(buffer, s) cpdef _read_same_type_ref(self, Buffer buffer, int64_t len_, object collection_, TypeInfo typeinfo): cdef MapRefResolver ref_resolver = self.ref_resolver @@ -313,9 +460,9 @@ cdef class CollectionSerializer(Serializer): cdef c_bool is_py = not self.fory.xlang self.fory.inc_depth() for i in range(len_): - ref_id = ref_resolver.try_preserve_ref_id(buffer) + ref_id = ref_resolver.try_preserve_ref_id_no_stub(buffer) if ref_id < NOT_NULL_VALUE_FLAG: - obj = ref_resolver.get_read_object() + obj = ref_resolver.read_object else: if is_py: obj = typeinfo.serializer.read(buffer) @@ -427,30 +574,67 @@ cdef inline get_next_element( ): cdef int32_t ref_id cdef TypeInfo typeinfo - ref_id = ref_resolver.try_preserve_ref_id(buffer) + cdef object o + ref_id = ref_resolver.try_preserve_ref_id_no_stub(buffer) if ref_id < NOT_NULL_VALUE_FLAG: - return ref_resolver.get_read_object() - # indicates that the object is first read. - typeinfo = type_resolver.read_type_info(buffer) - cdef uint8_t type_id = typeinfo.type_id + return ref_resolver.read_object + cdef uint8_t type_id + cdef int32_t type_id_offset + if ref_id == NOT_NULL_VALUE_FLAG: + type_id = buffer.read_uint8() + if type_id == TypeId.STRING: + return buffer.read_string() + elif type_id == TypeId.VARINT32: + return buffer.read_varint32() + elif type_id == TypeId.VARINT64: + return buffer.read_varint64() + elif type_id == TypeId.BOOL: + return buffer.read_bool() + elif type_id == TypeId.FLOAT64: + return buffer.read_double() + if not is_namespaced_type(type_id): + typeinfo = type_resolver.get_type_info_by_id(type_id) + else: + type_id_offset = buffer.get_reader_index() - 1 + buffer.set_reader_index(type_id_offset) + typeinfo = type_resolver.read_type_info(buffer) + else: + type_id = buffer.read_uint8() + if type_id == TypeId.STRING: + o = buffer.read_string() + ref_resolver.set_read_object(ref_id, o) + return o + elif type_id == TypeId.VARINT32: + o = buffer.read_varint32() + ref_resolver.set_read_object(ref_id, o) + return o + elif type_id == TypeId.VARINT64: + o = buffer.read_varint64() + ref_resolver.set_read_object(ref_id, o) + return o + elif type_id == TypeId.BOOL: + o = buffer.read_bool() + ref_resolver.set_read_object(ref_id, o) + return o + elif type_id == TypeId.FLOAT64: + o = buffer.read_double() + ref_resolver.set_read_object(ref_id, o) + return o + if not is_namespaced_type(type_id): + typeinfo = type_resolver.get_type_info_by_id(type_id) + else: + type_id_offset = buffer.get_reader_index() - 1 + buffer.set_reader_index(type_id_offset) + typeinfo = type_resolver.read_type_info(buffer) # Note that all read operations in fast paths of list/tuple/set/dict/sub_dict # must match corresponding writing operations. Otherwise, ref tracking will # error. - if type_id == TypeId.STRING: - return buffer.read_string() - elif type_id == TypeId.VARINT32: - return buffer.read_varint64() - elif type_id == TypeId.BOOL: - return buffer.read_bool() - elif type_id == TypeId.FLOAT64: - return buffer.read_double() + if is_py: + o = typeinfo.serializer.read(buffer) else: - if is_py: - o = typeinfo.serializer.read(buffer) - else: - o = typeinfo.serializer.xread(buffer) - ref_resolver.set_read_object(ref_id, o) - return o + o = typeinfo.serializer.xread(buffer) + ref_resolver.set_read_object(ref_id, o) + return o @cython.final @@ -601,9 +785,9 @@ cdef class SetSerializer(CollectionSerializer): if tracking_ref: # When ref tracking is enabled, read with ref handling for i in range(len_): - ref_id = ref_resolver.try_preserve_ref_id(buffer) + ref_id = ref_resolver.try_preserve_ref_id_no_stub(buffer) if ref_id < NOT_NULL_VALUE_FLAG: - instance.add(ref_resolver.get_read_object()) + instance.add(ref_resolver.read_object) continue # indicates that the object is first read. typeinfo = type_resolver.read_type_info(buffer) @@ -734,12 +918,54 @@ cdef class MapSerializer(Serializer): if value_tracking_ref is not None: self.value_tracking_ref = (1 if value_tracking_ref and fory.track_ref else 0) + cdef inline c_bool _write_homogeneous_string_key_map_fast_path( + self, Buffer buffer, dict obj, int32_t length): + if self.key_serializer is not None or self.value_serializer is not None: + return False + cdef int value_kind = Fory_PyDetectStringKeyMapValueKind(obj) + if value_kind == kForyPyStringMapValueNone: + return False + cdef TypeResolver type_resolver = self.type_resolver + cdef TypeInfo key_type_info = type_resolver.get_type_info(str) + cdef TypeInfo value_type_info + if value_kind == kForyPyStringMapValueInt64: + value_type_info = type_resolver.get_type_info(int) + else: + value_type_info = type_resolver.get_type_info(str) + cdef Py_ssize_t pos = 0 + cdef int32_t remaining = length + cdef int32_t chunk_size + cdef int status + while remaining > 0: + chunk_size = remaining + if chunk_size > MAX_CHUNK_SIZE: + chunk_size = MAX_CHUNK_SIZE + # Homogeneous string key/value chunks do not track refs and do not use declared types. + buffer.write_uint8(0) + buffer.write_uint8(chunk_size) + type_resolver.write_type_info(buffer, key_type_info) + type_resolver.write_type_info(buffer, value_type_info) + if value_kind == kForyPyStringMapValueInt64: + status = Fory_PyStringInt64MapWriteChunkToBuffer( + obj, &pos, chunk_size, &buffer.c_buffer + ) + else: + status = Fory_PyStringStringMapWriteChunkToBuffer( + obj, &pos, chunk_size, &buffer.c_buffer + ) + if status != 0: + raise RuntimeError("c++ fast path failed for homogeneous string-key map") + remaining -= chunk_size + return True + cpdef inline write(self, Buffer buffer, o): cdef dict obj = o cdef int32_t length = len(obj) buffer.write_var_uint32(length) if length == 0: return + if self._write_homogeneous_string_key_map_fast_path(buffer, obj, length): + return cdef int64_t key_addr, value_addr cdef Py_ssize_t pos = 0 cdef Fory fory = self.fory @@ -750,9 +976,12 @@ cdef class MapSerializer(Serializer): cdef type key_cls, value_cls, key_serializer_type, value_serializer_type cdef TypeInfo key_type_info, value_type_info cdef int32_t chunk_size_offset, chunk_header, chunk_size + cdef Py_ssize_t fast_chunk_size + cdef int64_t next_key_addr, next_value_addr cdef c_bool key_write_ref, value_write_ref cdef int has_next = PyDict_Next(obj, &pos, &key_addr, &value_addr) cdef c_bool is_py = not self.fory.xlang + cdef int status while has_next != 0: key = int2obj(key_addr) Py_INCREF(key) @@ -794,11 +1023,6 @@ cdef class MapSerializer(Serializer): value_serializer.write(buffer, value) else: value_serializer.xwrite(buffer, value) - if not self.ref_resolver.write_ref_or_null(buffer, value): - if is_py: - value_serializer.write(buffer, value) - else: - value_serializer.xwrite(buffer, value) else: buffer.write_int8(NULL_KEY_VALUE_DECL_TYPE) if is_py: @@ -852,6 +1076,47 @@ cdef class MapSerializer(Serializer): buffer.put_int8(chunk_size_offset - 1, chunk_header) key_serializer_type = type(key_serializer) value_serializer_type = type(value_serializer) + if (not key_write_ref and not value_write_ref and key_cls is str and + (value_serializer_type is Int64Serializer or + value_serializer_type is StringSerializer)): + if value_serializer_type is Int64Serializer: + status = Fory_PyStringInt64MapWriteContiguousChunkToBuffer( + obj, + &pos, + key, + value, + MAX_CHUNK_SIZE, + &buffer.c_buffer, + &fast_chunk_size, + &has_next, + &next_key_addr, + &next_value_addr, + ) + else: + status = Fory_PyStringStringMapWriteContiguousChunkToBuffer( + obj, + &pos, + key, + value, + MAX_CHUNK_SIZE, + &buffer.c_buffer, + &fast_chunk_size, + &has_next, + &next_key_addr, + &next_value_addr, + ) + if status != 0: + raise RuntimeError( + "c++ fast path failed for contiguous string-key map write chunk" + ) + chunk_size = fast_chunk_size + if has_next != 0: + key_addr = next_key_addr + value_addr = next_value_addr + key_serializer = self.key_serializer + value_serializer = self.value_serializer + buffer.put_int8(chunk_size_offset, chunk_size) + continue chunk_size = 0 while True: if (key is None or value is None or @@ -912,6 +1177,8 @@ cdef class MapSerializer(Serializer): cdef int32_t size = buffer.read_var_uint32() cdef dict map_ = _PyDict_NewPresized(size) ref_resolver.reference(map_) + if size == 0: + return map_ cdef int32_t ref_id cdef TypeInfo key_type_info, value_type_info cdef int32_t chunk_header = 0 @@ -923,6 +1190,7 @@ cdef class MapSerializer(Serializer): cdef c_bool key_is_declared_type, value_is_declared_type cdef type key_serializer_type, value_serializer_type cdef int32_t chunk_size + cdef int status cdef c_bool is_py = not self.fory.xlang self.fory.inc_depth() while size > 0: @@ -936,9 +1204,9 @@ cdef class MapSerializer(Serializer): track_key_ref = (chunk_header & TRACKING_KEY_REF) != 0 if (chunk_header & KEY_DECL_TYPE) != 0: if track_key_ref: - ref_id = ref_resolver.try_preserve_ref_id(buffer) + ref_id = ref_resolver.try_preserve_ref_id_no_stub(buffer) if ref_id < NOT_NULL_VALUE_FLAG: - key = ref_resolver.get_read_object() + key = ref_resolver.read_object else: if is_py: key = key_serializer.read(buffer) @@ -961,9 +1229,9 @@ cdef class MapSerializer(Serializer): track_value_ref = (chunk_header & TRACKING_VALUE_REF) != 0 if (chunk_header & VALUE_DECL_TYPE) != 0: if track_value_ref: - ref_id = ref_resolver.try_preserve_ref_id(buffer) + ref_id = ref_resolver.try_preserve_ref_id_no_stub(buffer) if ref_id < NOT_NULL_VALUE_FLAG: - value = ref_resolver.get_read_object() + value = ref_resolver.read_object else: if is_py: value = ( value_serializer).read(buffer) @@ -1000,11 +1268,31 @@ cdef class MapSerializer(Serializer): value_serializer = type_resolver.read_type_info(buffer).serializer key_serializer_type = type(key_serializer) value_serializer_type = type(value_serializer) + if (not track_key_ref and not track_value_ref and + key_serializer_type is StringSerializer and + (value_serializer_type is StringSerializer or + value_serializer_type is Int64Serializer)): + if value_serializer_type is Int64Serializer: + status = Fory_PyStringInt64MapReadChunkFromBuffer( + map_, chunk_size, &buffer.c_buffer + ) + else: + status = Fory_PyStringStringMapReadChunkFromBuffer( + map_, chunk_size, &buffer.c_buffer + ) + if status != 0: + raise RuntimeError( + "c++ fast path failed for homogeneous string-key map read" + ) + size -= chunk_size + if size != 0: + chunk_header = buffer.read_uint8() + continue for i in range(chunk_size): if track_key_ref: - ref_id = ref_resolver.try_preserve_ref_id(buffer) + ref_id = ref_resolver.try_preserve_ref_id_no_stub(buffer) if ref_id < NOT_NULL_VALUE_FLAG: - key = ref_resolver.get_read_object() + key = ref_resolver.read_object else: if is_py: key = key_serializer.read(buffer) @@ -1028,9 +1316,9 @@ cdef class MapSerializer(Serializer): else: key = fory.xread_no_ref(buffer, serializer=key_serializer) if track_value_ref: - ref_id = ref_resolver.try_preserve_ref_id(buffer) + ref_id = ref_resolver.try_preserve_ref_id_no_stub(buffer) if ref_id < NOT_NULL_VALUE_FLAG: - value = ref_resolver.get_read_object() + value = ref_resolver.read_object else: if is_py: value = value_serializer.read(buffer) diff --git a/python/pyfory/includes/libserialization.pxd b/python/pyfory/includes/libserialization.pxd index db25654acc..82f6cf82bb 100644 --- a/python/pyfory/includes/libserialization.pxd +++ b/python/pyfory/includes/libserialization.pxd @@ -15,7 +15,7 @@ # specific language governing permissions and limitations # under the License. -from libc.stdint cimport int32_t, uint8_t +from libc.stdint cimport int32_t, int64_t, uint8_t from libcpp cimport bool as c_bool from pyfory.includes.libutil cimport CBuffer @@ -92,5 +92,70 @@ cdef extern from "fory/type/type.h" namespace "fory" nogil: cdef c_bool is_type_share_meta(TypeId type_id) cdef extern from "fory/python/pyfory.h" namespace "fory": + cdef enum ForyPyStringMapValueKind: + kForyPyStringMapValueNone + kForyPyStringMapValueInt64 + kForyPyStringMapValueString + + cdef enum ForyPySequenceValueKind: + kForyPySequenceValueNone + kForyPySequenceValueString + kForyPySequenceValueInt64 + kForyPySequenceValueBool + kForyPySequenceValueFloat64 + int Fory_PyBooleanSequenceWriteToBuffer(object collection, CBuffer *buffer, Py_ssize_t start_index) int Fory_PyFloatSequenceWriteToBuffer(object collection, CBuffer *buffer, Py_ssize_t start_index) + int Fory_PyInt64SequenceWriteVarintToBuffer(object collection, CBuffer *buffer) + int Fory_PyStringSequenceWriteToBuffer(object collection, CBuffer *buffer) + int Fory_PyDetectStringKeyMapValueKind(object map) + int Fory_PyStringInt64MapWriteChunkToBuffer( + object map, + Py_ssize_t *pos, + Py_ssize_t chunk_size, + CBuffer *buffer, + ) + int Fory_PyStringStringMapWriteChunkToBuffer( + object map, + Py_ssize_t *pos, + Py_ssize_t chunk_size, + CBuffer *buffer, + ) + int Fory_PyStringInt64MapWriteContiguousChunkToBuffer( + object map, + Py_ssize_t *pos, + object first_key, + object first_value, + Py_ssize_t max_chunk_size, + CBuffer *buffer, + Py_ssize_t *written_chunk_size, + int *has_next, + int64_t *next_key_addr, + int64_t *next_value_addr, + ) + int Fory_PyStringStringMapWriteContiguousChunkToBuffer( + object map, + Py_ssize_t *pos, + object first_key, + object first_value, + Py_ssize_t max_chunk_size, + CBuffer *buffer, + Py_ssize_t *written_chunk_size, + int *has_next, + int64_t *next_key_addr, + int64_t *next_value_addr, + ) + int Fory_PyStringInt64MapReadChunkFromBuffer( + object map, + Py_ssize_t chunk_size, + CBuffer *buffer, + ) + int Fory_PyStringStringMapReadChunkFromBuffer( + object map, + Py_ssize_t chunk_size, + CBuffer *buffer, + ) + int Fory_PyDetectSequenceNoNullExactTypeKind(object collection) + int Fory_PyBooleanSequenceReadFromBuffer(object collection, CBuffer *buffer, Py_ssize_t size) + int Fory_PyFloatSequenceReadFromBuffer(object collection, CBuffer *buffer, Py_ssize_t size) + int Fory_PyInt64SequenceReadVarintFromBuffer(object collection, CBuffer *buffer, Py_ssize_t size) diff --git a/python/pyfory/serialization.pyx b/python/pyfory/serialization.pyx index 154c4cc8f5..235464456f 100644 --- a/python/pyfory/serialization.pyx +++ b/python/pyfory/serialization.pyx @@ -37,7 +37,21 @@ from pyfory.policy import DeserializationPolicy, DEFAULT_POLICY from pyfory.includes.libserialization cimport \ (TypeId, TypeRegistrationKind, get_type_registration_kind, is_namespaced_type, is_type_share_meta, - Fory_PyBooleanSequenceWriteToBuffer, Fory_PyFloatSequenceWriteToBuffer) + Fory_PyBooleanSequenceWriteToBuffer, Fory_PyFloatSequenceWriteToBuffer, + Fory_PyInt64SequenceWriteVarintToBuffer, + Fory_PyStringSequenceWriteToBuffer, + Fory_PyBooleanSequenceReadFromBuffer, Fory_PyFloatSequenceReadFromBuffer, + Fory_PyInt64SequenceReadVarintFromBuffer, + Fory_PyDetectStringKeyMapValueKind, Fory_PyStringInt64MapWriteChunkToBuffer, + Fory_PyStringStringMapWriteChunkToBuffer, + Fory_PyStringInt64MapWriteContiguousChunkToBuffer, + Fory_PyStringStringMapWriteContiguousChunkToBuffer, + Fory_PyStringInt64MapReadChunkFromBuffer, + Fory_PyStringStringMapReadChunkFromBuffer, + Fory_PyDetectSequenceNoNullExactTypeKind, + kForyPyStringMapValueNone, kForyPyStringMapValueInt64, kForyPyStringMapValueString, + kForyPySequenceValueNone, kForyPySequenceValueString, kForyPySequenceValueInt64, + kForyPySequenceValueBool, kForyPySequenceValueFloat64) from libc.stdint cimport int8_t, int16_t, int32_t, int64_t, uint64_t from libc.stdint cimport * @@ -45,8 +59,8 @@ from libcpp.vector cimport vector from cpython cimport PyObject from cpython.dict cimport PyDict_Next from cpython.ref cimport * -from cpython.list cimport PyList_New, PyList_SET_ITEM -from cpython.tuple cimport PyTuple_New, PyTuple_SET_ITEM +from cpython.list cimport PyList_New, PyList_SET_ITEM, PyList_GET_ITEM +from cpython.tuple cimport PyTuple_New, PyTuple_SET_ITEM, PyTuple_GET_ITEM from libcpp cimport bool as c_bool from libcpp.utility cimport pair from cython.operator cimport dereference as deref @@ -136,7 +150,28 @@ cdef class MapRefResolver: buffer.write_int8(NULL_FLAG) return True cdef uint64_t object_id = obj + cdef int32_t existing_size = self.written_objects.size() cdef int32_t next_id + cdef int32_t i + cdef PyObject *written_obj + cdef uint64_t written_object_id + if self.written_objects_id.size() == 0: + if existing_size < WRITE_REF_LINEAR_SCAN_LIMIT: + for i in range(existing_size): + written_obj = self.written_objects[i] + if written_obj == obj: + buffer.write_int8(REF_FLAG) + buffer.write_var_uint32( i) + return True + self.written_objects.push_back( obj) + Py_INCREF(obj) + buffer.write_int8(REF_VALUE_FLAG) + return False + self.written_objects_id.reserve(self.written_objects.size() * 2) + for i in range(existing_size): + written_obj = self.written_objects[i] + written_object_id = written_obj + self.written_objects_id[written_object_id] = i cdef flat_hash_map[uint64_t, int32_t].iterator it = \ self.written_objects_id.find(object_id) if it == self.written_objects_id.end(): @@ -206,6 +241,24 @@ cdef class MapRefResolver: self.read_ref_ids.push_back(-1) return head_flag + cpdef inline int32_t try_preserve_ref_id_no_stub(self, Buffer buffer): + if not self.track_ref: + return buffer.read_int8() + head_flag = buffer.read_int8() + cdef int32_t ref_id + cdef PyObject *obj + if head_flag == REF_FLAG: + ref_id = buffer.read_var_uint32() + assert 0 <= ref_id < self.read_objects.size(), f"Invalid ref id {ref_id}, current size {self.read_objects.size()}" + obj = self.read_objects[ref_id] + assert obj != NULL, f"Invalid ref id {ref_id}, current size {self.read_objects.size()}" + self.read_object = obj + return head_flag + self.read_object = None + if head_flag == REF_VALUE_FLAG: + return self.preserve_ref_id() + return head_flag + cpdef inline int32_t last_preserved_ref_id(self): cdef int32_t length = self.read_ref_ids.size() assert length > 0 @@ -282,6 +335,7 @@ cdef int32_t NOT_NULL_FLOAT64_FLAG = fmod.NOT_NULL_FLOAT64_FLAG cdef int32_t NOT_NULL_BOOL_FLAG = fmod.NOT_NULL_BOOL_FLAG cdef int32_t NOT_NULL_STRING_FLAG = fmod.NOT_NULL_STRING_FLAG cdef int32_t SMALL_STRING_THRESHOLD = fmod.SMALL_STRING_THRESHOLD +cdef int32_t WRITE_REF_LINEAR_SCAN_LIMIT = 3 cdef inline uint64_t _mix64(uint64_t x): From fcb112aad204a66dd107dce2d59ee641f266b346 Mon Sep 17 00:00:00 2001 From: chaokunyang Date: Sun, 8 Feb 2026 10:01:54 +0800 Subject: [PATCH 03/10] optimize perf --- cpp/fory/python/pyfory.cc | 173 ++++++++-- cpp/fory/python/pyfory.h | 6 + python/pyfory/collection.pxi | 364 +++++++++++++++++--- python/pyfory/includes/libserialization.pxd | 8 + python/pyfory/serialization.pyx | 66 ++-- 5 files changed, 511 insertions(+), 106 deletions(-) diff --git a/cpp/fory/python/pyfory.cc b/cpp/fory/python/pyfory.cc index 81e46b8a93..848bf3a18a 100644 --- a/cpp/fory/python/pyfory.cc +++ b/cpp/fory/python/pyfory.cc @@ -135,7 +135,8 @@ static bool py_read_string_from_buffer(fory::Buffer *buffer, PyObject **out) { } uint32_t reader_index = buffer->reader_index(); - const char *bytes = reinterpret_cast(buffer->data() + reader_index); + const char *bytes = + reinterpret_cast(buffer->data() + reader_index); uint32_t encoding = static_cast(header & 0b11); PyObject *value = nullptr; if (encoding == 0) { @@ -224,13 +225,27 @@ int Fory_PyInt64SequenceWriteVarintToBuffer(PyObject *collection, if (size < 0) { return -1; } + uint64_t max_write_bytes = static_cast(size) * 9; + if (max_write_bytes > std::numeric_limits::max()) { + return -1; + } + buffer->grow(static_cast(max_write_bytes)); + uint32_t writer_index = buffer->writer_index(); for (Py_ssize_t i = 0; i < size; i++) { int64_t value; if (!py_parse_int64(items[i], &value)) { return -1; } - buffer->write_var_int64(value); + uint64_t zigzag = (static_cast(value) << 1) ^ + static_cast(value >> 63); + if (FORY_PREDICT_TRUE(zigzag < 0x80)) { + buffer->unsafe_put_byte(writer_index, static_cast(zigzag)); + writer_index += 1; + } else { + writer_index += buffer->put_var_uint64(writer_index, zigzag); + } } + buffer->writer_index(writer_index); return 0; } @@ -301,6 +316,56 @@ int Fory_PyDetectSequenceNoNullExactTypeKind(PyObject *collection) { return kind; } +int Fory_PyDetectSequenceTypeAndNull(PyObject *collection, int *has_null, + int *has_same_type, + int64_t *element_type_addr) { + if (has_null == nullptr || has_same_type == nullptr || + element_type_addr == nullptr) { + return -1; + } + PyObject **items = py_sequence_get_items(collection); + if (items == nullptr) { + return -1; + } + Py_ssize_t size = Py_SIZE(collection); + *has_null = 0; + *has_same_type = 1; + *element_type_addr = 0; + PyTypeObject *element_type = nullptr; + for (Py_ssize_t i = 0; i < size; i++) { + PyObject *item = items[i]; + if (item == Py_None) { + *has_null = 1; + continue; + } + PyTypeObject *current_type = Py_TYPE(item); + if (element_type == nullptr) { + element_type = current_type; + } else if (*has_same_type != 0 && current_type != element_type) { + *has_same_type = 0; + } + } + if (element_type != nullptr) { + *element_type_addr = + static_cast(reinterpret_cast(element_type)); + } + return 0; +} + +int Fory_PySequenceHasNull(PyObject *collection) { + PyObject **items = py_sequence_get_items(collection); + if (items == nullptr) { + return -1; + } + Py_ssize_t size = Py_SIZE(collection); + for (Py_ssize_t i = 0; i < size; i++) { + if (items[i] == Py_None) { + return 1; + } + } + return 0; +} + int Fory_PyDetectStringKeyMapValueKind(PyObject *map) { if (!PyDict_CheckExact(map)) { return kForyPyStringMapValueNone; @@ -379,9 +444,9 @@ int Fory_PyStringStringMapWriteChunkToBuffer(PyObject *map, Py_ssize_t *pos, } static void set_map_contiguous_chunk_write_result( - Py_ssize_t chunk_size, int has_next, PyObject *next_key, PyObject *next_value, - Py_ssize_t *written_chunk_size, int *has_next_out, int64_t *next_key_addr, - int64_t *next_value_addr) { + Py_ssize_t chunk_size, int has_next, PyObject *next_key, + PyObject *next_value, Py_ssize_t *written_chunk_size, int *has_next_out, + int64_t *next_key_addr, int64_t *next_value_addr) { if (written_chunk_size != nullptr) { *written_chunk_size = chunk_size; } @@ -389,16 +454,16 @@ static void set_map_contiguous_chunk_write_result( *has_next_out = has_next; } if (next_key_addr != nullptr) { - *next_key_addr = has_next == 0 - ? 0 - : static_cast( - reinterpret_cast(next_key)); + *next_key_addr = + has_next == 0 + ? 0 + : static_cast(reinterpret_cast(next_key)); } if (next_value_addr != nullptr) { - *next_value_addr = has_next == 0 - ? 0 - : static_cast( - reinterpret_cast(next_value)); + *next_value_addr = + has_next == 0 + ? 0 + : static_cast(reinterpret_cast(next_value)); } } @@ -428,25 +493,25 @@ int Fory_PyStringInt64MapWriteContiguousChunkToBuffer( write_var_int64_to_buffer(buffer, int64_value); chunk_size += 1; if (chunk_size >= max_chunk_size) { - int iter_has_next = PyDict_Next(map, pos, ¤t_key, ¤t_value) - ? 1 - : 0; + int iter_has_next = + PyDict_Next(map, pos, ¤t_key, ¤t_value) ? 1 : 0; set_map_contiguous_chunk_write_result( chunk_size, iter_has_next, current_key, current_value, written_chunk_size, has_next, next_key_addr, next_value_addr); return 0; } if (!PyDict_Next(map, pos, ¤t_key, ¤t_value)) { - set_map_contiguous_chunk_write_result( - chunk_size, 0, nullptr, nullptr, written_chunk_size, has_next, - next_key_addr, next_value_addr); + set_map_contiguous_chunk_write_result(chunk_size, 0, nullptr, nullptr, + written_chunk_size, has_next, + next_key_addr, next_value_addr); return 0; } - if (!PyUnicode_CheckExact(current_key) || !PyLong_CheckExact(current_value) || - current_value == Py_True || current_value == Py_False) { + if (!PyUnicode_CheckExact(current_key) || + !PyLong_CheckExact(current_value) || current_value == Py_True || + current_value == Py_False) { set_map_contiguous_chunk_write_result( - chunk_size, 1, current_key, current_value, written_chunk_size, has_next, - next_key_addr, next_value_addr); + chunk_size, 1, current_key, current_value, written_chunk_size, + has_next, next_key_addr, next_value_addr); return 0; } } @@ -473,24 +538,24 @@ int Fory_PyStringStringMapWriteContiguousChunkToBuffer( } chunk_size += 1; if (chunk_size >= max_chunk_size) { - int iter_has_next = PyDict_Next(map, pos, ¤t_key, ¤t_value) - ? 1 - : 0; + int iter_has_next = + PyDict_Next(map, pos, ¤t_key, ¤t_value) ? 1 : 0; set_map_contiguous_chunk_write_result( chunk_size, iter_has_next, current_key, current_value, written_chunk_size, has_next, next_key_addr, next_value_addr); return 0; } if (!PyDict_Next(map, pos, ¤t_key, ¤t_value)) { - set_map_contiguous_chunk_write_result( - chunk_size, 0, nullptr, nullptr, written_chunk_size, has_next, - next_key_addr, next_value_addr); + set_map_contiguous_chunk_write_result(chunk_size, 0, nullptr, nullptr, + written_chunk_size, has_next, + next_key_addr, next_value_addr); return 0; } - if (!PyUnicode_CheckExact(current_key) || !PyUnicode_CheckExact(current_value)) { + if (!PyUnicode_CheckExact(current_key) || + !PyUnicode_CheckExact(current_value)) { set_map_contiguous_chunk_write_result( - chunk_size, 1, current_key, current_value, written_chunk_size, has_next, - next_key_addr, next_value_addr); + chunk_size, 1, current_key, current_value, written_chunk_size, + has_next, next_key_addr, next_value_addr); return 0; } } @@ -613,18 +678,58 @@ int Fory_PyInt64SequenceReadVarintFromBuffer(PyObject *collection, if (items == nullptr) { return -1; } + if (size < 0) { + return -1; + } + uint32_t reader_index = buffer->reader_index(); + uint8_t *data = buffer->data(); + uint32_t buffer_size = buffer->size(); Error error; for (Py_ssize_t i = 0; i < size; i++) { - int64_t value = buffer->read_var_int64(error); - if (FORY_PREDICT_FALSE(!error.ok())) { + if (FORY_PREDICT_FALSE(reader_index >= buffer_size)) { return -1; } + int64_t value; + uint8_t first = data[reader_index]; + if (FORY_PREDICT_TRUE((first & 0x80) == 0)) { + uint64_t zigzag = first; + value = static_cast((zigzag >> 1) ^ + -static_cast(zigzag & 1)); + reader_index += 1; + } else { + buffer->reader_index(reader_index); + value = buffer->read_var_int64(error); + if (FORY_PREDICT_FALSE(!error.ok())) { + return -1; + } + reader_index = buffer->reader_index(); + } PyObject *obj = PyLong_FromLongLong(value); if (obj == nullptr) { return -1; } items[i] = obj; } + buffer->reader_index(reader_index); + return 0; +} + +int Fory_PyStringSequenceReadFromBuffer(PyObject *collection, Buffer *buffer, + Py_ssize_t size) { + if (Py_SIZE(collection) != size) { + return -1; + } + PyObject **items = py_sequence_get_items(collection); + if (items == nullptr) { + return -1; + } + for (Py_ssize_t i = 0; i < size; i++) { + PyObject *value = nullptr; + if (!py_read_string_from_buffer(buffer, &value)) { + return -1; + } + items[i] = value; + } return 0; } } // namespace fory diff --git a/cpp/fory/python/pyfory.h b/cpp/fory/python/pyfory.h index e102834a23..09107209bc 100644 --- a/cpp/fory/python/pyfory.h +++ b/cpp/fory/python/pyfory.h @@ -45,6 +45,10 @@ enum ForyPySequenceValueKind : int { }; int Fory_PyDetectSequenceNoNullExactTypeKind(PyObject *collection); +int Fory_PyDetectSequenceTypeAndNull(PyObject *collection, int *has_null, + int *has_same_type, + int64_t *element_type_addr); +int Fory_PySequenceHasNull(PyObject *collection); int Fory_PyDetectStringKeyMapValueKind(PyObject *map); int Fory_PyStringInt64MapWriteChunkToBuffer(PyObject *map, Py_ssize_t *pos, @@ -74,4 +78,6 @@ int Fory_PyFloatSequenceReadFromBuffer(PyObject *collection, Buffer *buffer, Py_ssize_t size); int Fory_PyInt64SequenceReadVarintFromBuffer(PyObject *collection, Buffer *buffer, Py_ssize_t size); +int Fory_PyStringSequenceReadFromBuffer(PyObject *collection, Buffer *buffer, + Py_ssize_t size); } // namespace fory diff --git a/python/pyfory/collection.pxi b/python/pyfory/collection.pxi index b0782d025a..38cd8c33ba 100644 --- a/python/pyfory/collection.pxi +++ b/python/pyfory/collection.pxi @@ -94,6 +94,9 @@ cdef class CollectionSerializer(Serializer): cdef type value_type = type(value) cdef Py_ssize_t seq_size cdef Py_ssize_t i + cdef int has_null_i + cdef int has_same_type_i + cdef int64_t elem_type_addr cdef object s if elem_type is None: if value_type is list or value_type is tuple: @@ -108,19 +111,54 @@ cdef class CollectionSerializer(Serializer): elem_type = float if fast_kind == kForyPySequenceValueNone: if value_type is list or value_type is tuple: - seq_size = Py_SIZE(value) - for i in range(seq_size): - if value_type is list: - s = PyList_GET_ITEM(value, i) - else: - s = PyTuple_GET_ITEM(value, i) - if not has_null and s is None: - has_null = True - continue - if elem_type is None: - elem_type = type(s) - elif has_same_type and type(s) is not elem_type: - has_same_type = False + has_null_i = 0 + has_same_type_i = 1 + elem_type_addr = 0 + if Fory_PyDetectSequenceTypeAndNull( + value, &has_null_i, &has_same_type_i, &elem_type_addr) == 0: + has_null = has_null_i != 0 + has_same_type = has_same_type_i != 0 + if elem_type_addr != 0: + if elem_type_addr == list: + elem_type = list + elif elem_type_addr == tuple: + elem_type = tuple + elif elem_type_addr == set: + elem_type = set + elif elem_type_addr == dict: + elem_type = dict + elif elem_type_addr == str: + elem_type = str + elif elem_type_addr == int: + elem_type = int + elif elem_type_addr == bool: + elem_type = bool + elif elem_type_addr == float: + elem_type = float + else: + seq_size = Py_SIZE(value) + for i in range(seq_size): + if value_type is list: + s = PyList_GET_ITEM(value, i) + else: + s = PyTuple_GET_ITEM(value, i) + if s is not None: + elem_type = type(s) + break + else: + seq_size = Py_SIZE(value) + for i in range(seq_size): + if value_type is list: + s = PyList_GET_ITEM(value, i) + else: + s = PyTuple_GET_ITEM(value, i) + if not has_null and s is None: + has_null = True + continue + if elem_type is None: + elem_type = type(s) + elif has_same_type and type(s) is not elem_type: + has_same_type = False else: for s in value: if not has_null and s is None: @@ -136,15 +174,19 @@ cdef class CollectionSerializer(Serializer): else: collect_flag |= COLL_IS_DECL_ELEMENT_TYPE | COLL_IS_SAME_TYPE if value_type is list or value_type is tuple: - seq_size = Py_SIZE(value) - for i in range(seq_size): - if value_type is list: - s = PyList_GET_ITEM(value, i) - else: - s = PyTuple_GET_ITEM(value, i) - if s is None: - has_null = True - break + has_null_i = Fory_PySequenceHasNull(value) + if has_null_i >= 0: + has_null = has_null_i != 0 + else: + seq_size = Py_SIZE(value) + for i in range(seq_size): + if value_type is list: + s = PyList_GET_ITEM(value, i) + else: + s = PyTuple_GET_ITEM(value, i) + if s is None: + has_null = True + break else: for s in value: if s is None: @@ -295,6 +337,10 @@ cdef class CollectionSerializer(Serializer): buffer.write_string(s) cdef inline _read_string(self, Buffer buffer, int64_t len_, object collection_): + collection_type = type(collection_) + if collection_type is list or collection_type is tuple: + if Fory_PyStringSequenceReadFromBuffer(collection_, &buffer.c_buffer, len_) == 0: + return for i in range(len_): self._add_element(collection_, i, buffer.read_string()) @@ -421,55 +467,291 @@ cdef class CollectionSerializer(Serializer): cpdef _write_same_type_ref(self, Buffer buffer, value, TypeInfo typeinfo): cdef MapRefResolver ref_resolver = self.ref_resolver + cdef Serializer item_serializer = typeinfo.serializer + cdef type item_cls = typeinfo.cls + cdef ListSerializer list_serializer + cdef TupleSerializer tuple_serializer + cdef SetSerializer set_serializer + cdef MapSerializer map_serializer cdef type value_type = type(value) cdef object s cdef Py_ssize_t i cdef Py_ssize_t seq_size - if not self.fory.xlang: - if value_type is list or value_type is tuple: + cdef c_bool value_is_list = value_type is list + cdef c_bool value_is_tuple = value_type is tuple + cdef c_bool is_py = not self.fory.xlang + if item_cls is list: + list_serializer = item_serializer + if is_py: + if value_is_list or value_is_tuple: + seq_size = Py_SIZE(value) + for i in range(seq_size): + if value_is_list: + s = PyList_GET_ITEM(value, i) + else: + s = PyTuple_GET_ITEM(value, i) + if not ref_resolver.write_ref_or_null(buffer, s): + list_serializer.write(buffer, s) + else: + for s in value: + if not ref_resolver.write_ref_or_null(buffer, s): + list_serializer.write(buffer, s) + else: + if value_is_list or value_is_tuple: + seq_size = Py_SIZE(value) + for i in range(seq_size): + if value_is_list: + s = PyList_GET_ITEM(value, i) + else: + s = PyTuple_GET_ITEM(value, i) + if not ref_resolver.write_ref_or_null(buffer, s): + list_serializer.xwrite(buffer, s) + else: + for s in value: + if not ref_resolver.write_ref_or_null(buffer, s): + list_serializer.xwrite(buffer, s) + return + if item_cls is tuple: + tuple_serializer = item_serializer + if is_py: + if value_is_list or value_is_tuple: + seq_size = Py_SIZE(value) + for i in range(seq_size): + if value_is_list: + s = PyList_GET_ITEM(value, i) + else: + s = PyTuple_GET_ITEM(value, i) + if not ref_resolver.write_ref_or_null(buffer, s): + tuple_serializer.write(buffer, s) + else: + for s in value: + if not ref_resolver.write_ref_or_null(buffer, s): + tuple_serializer.write(buffer, s) + else: + if value_is_list or value_is_tuple: + seq_size = Py_SIZE(value) + for i in range(seq_size): + if value_is_list: + s = PyList_GET_ITEM(value, i) + else: + s = PyTuple_GET_ITEM(value, i) + if not ref_resolver.write_ref_or_null(buffer, s): + tuple_serializer.xwrite(buffer, s) + else: + for s in value: + if not ref_resolver.write_ref_or_null(buffer, s): + tuple_serializer.xwrite(buffer, s) + return + if item_cls is set: + set_serializer = item_serializer + if is_py: + if value_is_list or value_is_tuple: + seq_size = Py_SIZE(value) + for i in range(seq_size): + if value_is_list: + s = PyList_GET_ITEM(value, i) + else: + s = PyTuple_GET_ITEM(value, i) + if not ref_resolver.write_ref_or_null(buffer, s): + set_serializer.write(buffer, s) + else: + for s in value: + if not ref_resolver.write_ref_or_null(buffer, s): + set_serializer.write(buffer, s) + else: + if value_is_list or value_is_tuple: + seq_size = Py_SIZE(value) + for i in range(seq_size): + if value_is_list: + s = PyList_GET_ITEM(value, i) + else: + s = PyTuple_GET_ITEM(value, i) + if not ref_resolver.write_ref_or_null(buffer, s): + set_serializer.xwrite(buffer, s) + else: + for s in value: + if not ref_resolver.write_ref_or_null(buffer, s): + set_serializer.xwrite(buffer, s) + return + if item_cls is dict: + map_serializer = item_serializer + if is_py: + if value_is_list or value_is_tuple: + seq_size = Py_SIZE(value) + for i in range(seq_size): + if value_is_list: + s = PyList_GET_ITEM(value, i) + else: + s = PyTuple_GET_ITEM(value, i) + if not ref_resolver.write_ref_or_null(buffer, s): + map_serializer.write(buffer, s) + else: + for s in value: + if not ref_resolver.write_ref_or_null(buffer, s): + map_serializer.write(buffer, s) + else: + if value_is_list or value_is_tuple: + seq_size = Py_SIZE(value) + for i in range(seq_size): + if value_is_list: + s = PyList_GET_ITEM(value, i) + else: + s = PyTuple_GET_ITEM(value, i) + if not ref_resolver.write_ref_or_null(buffer, s): + map_serializer.xwrite(buffer, s) + else: + for s in value: + if not ref_resolver.write_ref_or_null(buffer, s): + map_serializer.xwrite(buffer, s) + return + if is_py: + if value_is_list or value_is_tuple: seq_size = Py_SIZE(value) for i in range(seq_size): - if value_type is list: + if value_is_list: s = PyList_GET_ITEM(value, i) else: s = PyTuple_GET_ITEM(value, i) if not ref_resolver.write_ref_or_null(buffer, s): - typeinfo.serializer.write(buffer, s) + item_serializer.write(buffer, s) else: for s in value: if not ref_resolver.write_ref_or_null(buffer, s): - typeinfo.serializer.write(buffer, s) + item_serializer.write(buffer, s) else: - if value_type is list or value_type is tuple: + if value_is_list or value_is_tuple: seq_size = Py_SIZE(value) for i in range(seq_size): - if value_type is list: + if value_is_list: s = PyList_GET_ITEM(value, i) else: s = PyTuple_GET_ITEM(value, i) if not ref_resolver.write_ref_or_null(buffer, s): - typeinfo.serializer.xwrite(buffer, s) + item_serializer.xwrite(buffer, s) else: for s in value: if not ref_resolver.write_ref_or_null(buffer, s): - typeinfo.serializer.xwrite(buffer, s) + item_serializer.xwrite(buffer, s) cpdef _read_same_type_ref(self, Buffer buffer, int64_t len_, object collection_, TypeInfo typeinfo): cdef MapRefResolver ref_resolver = self.ref_resolver - cdef TypeResolver type_resolver = self.type_resolver + cdef Serializer item_serializer = typeinfo.serializer + cdef type item_cls = typeinfo.cls + cdef ListSerializer list_serializer + cdef TupleSerializer tuple_serializer + cdef SetSerializer set_serializer + cdef MapSerializer map_serializer cdef c_bool is_py = not self.fory.xlang + cdef int32_t ref_id + cdef object obj + cdef int64_t i self.fory.inc_depth() - for i in range(len_): - ref_id = ref_resolver.try_preserve_ref_id_no_stub(buffer) - if ref_id < NOT_NULL_VALUE_FLAG: - obj = ref_resolver.read_object + if item_cls is list: + list_serializer = item_serializer + if is_py: + for i in range(len_): + ref_id = ref_resolver.try_preserve_ref_id_no_stub(buffer) + if ref_id < NOT_NULL_VALUE_FLAG: + obj = ref_resolver.read_object + else: + obj = list_serializer.read(buffer) + ref_resolver.set_read_object(ref_id, obj) + self._add_element(collection_, i, obj) + else: + for i in range(len_): + ref_id = ref_resolver.try_preserve_ref_id_no_stub(buffer) + if ref_id < NOT_NULL_VALUE_FLAG: + obj = ref_resolver.read_object + else: + obj = list_serializer.xread(buffer) + ref_resolver.set_read_object(ref_id, obj) + self._add_element(collection_, i, obj) + self.fory.dec_depth() + return + if item_cls is tuple: + tuple_serializer = item_serializer + if is_py: + for i in range(len_): + ref_id = ref_resolver.try_preserve_ref_id_no_stub(buffer) + if ref_id < NOT_NULL_VALUE_FLAG: + obj = ref_resolver.read_object + else: + obj = tuple_serializer.read(buffer) + ref_resolver.set_read_object(ref_id, obj) + self._add_element(collection_, i, obj) + else: + for i in range(len_): + ref_id = ref_resolver.try_preserve_ref_id_no_stub(buffer) + if ref_id < NOT_NULL_VALUE_FLAG: + obj = ref_resolver.read_object + else: + obj = tuple_serializer.xread(buffer) + ref_resolver.set_read_object(ref_id, obj) + self._add_element(collection_, i, obj) + self.fory.dec_depth() + return + if item_cls is set: + set_serializer = item_serializer + if is_py: + for i in range(len_): + ref_id = ref_resolver.try_preserve_ref_id_no_stub(buffer) + if ref_id < NOT_NULL_VALUE_FLAG: + obj = ref_resolver.read_object + else: + obj = set_serializer.read(buffer) + ref_resolver.set_read_object(ref_id, obj) + self._add_element(collection_, i, obj) + else: + for i in range(len_): + ref_id = ref_resolver.try_preserve_ref_id_no_stub(buffer) + if ref_id < NOT_NULL_VALUE_FLAG: + obj = ref_resolver.read_object + else: + obj = set_serializer.xread(buffer) + ref_resolver.set_read_object(ref_id, obj) + self._add_element(collection_, i, obj) + self.fory.dec_depth() + return + if item_cls is dict: + map_serializer = item_serializer + if is_py: + for i in range(len_): + ref_id = ref_resolver.try_preserve_ref_id_no_stub(buffer) + if ref_id < NOT_NULL_VALUE_FLAG: + obj = ref_resolver.read_object + else: + obj = map_serializer.read(buffer) + ref_resolver.set_read_object(ref_id, obj) + self._add_element(collection_, i, obj) else: - if is_py: - obj = typeinfo.serializer.read(buffer) + for i in range(len_): + ref_id = ref_resolver.try_preserve_ref_id_no_stub(buffer) + if ref_id < NOT_NULL_VALUE_FLAG: + obj = ref_resolver.read_object + else: + obj = map_serializer.xread(buffer) + ref_resolver.set_read_object(ref_id, obj) + self._add_element(collection_, i, obj) + self.fory.dec_depth() + return + if is_py: + for i in range(len_): + ref_id = ref_resolver.try_preserve_ref_id_no_stub(buffer) + if ref_id < NOT_NULL_VALUE_FLAG: + obj = ref_resolver.read_object else: - obj = typeinfo.serializer.xread(buffer) - ref_resolver.set_read_object(ref_id, obj) - self._add_element(collection_, i, obj) + obj = item_serializer.read(buffer) + ref_resolver.set_read_object(ref_id, obj) + self._add_element(collection_, i, obj) + else: + for i in range(len_): + ref_id = ref_resolver.try_preserve_ref_id_no_stub(buffer) + if ref_id < NOT_NULL_VALUE_FLAG: + obj = ref_resolver.read_object + else: + obj = item_serializer.xread(buffer) + ref_resolver.set_read_object(ref_id, obj) + self._add_element(collection_, i, obj) self.fory.dec_depth() cpdef _add_element(self, object collection_, int64_t index, object element): diff --git a/python/pyfory/includes/libserialization.pxd b/python/pyfory/includes/libserialization.pxd index 82f6cf82bb..0558d8de3a 100644 --- a/python/pyfory/includes/libserialization.pxd +++ b/python/pyfory/includes/libserialization.pxd @@ -156,6 +156,14 @@ cdef extern from "fory/python/pyfory.h" namespace "fory": CBuffer *buffer, ) int Fory_PyDetectSequenceNoNullExactTypeKind(object collection) + int Fory_PyDetectSequenceTypeAndNull( + object collection, + int *has_null, + int *has_same_type, + int64_t *element_type_addr, + ) + int Fory_PySequenceHasNull(object collection) int Fory_PyBooleanSequenceReadFromBuffer(object collection, CBuffer *buffer, Py_ssize_t size) int Fory_PyFloatSequenceReadFromBuffer(object collection, CBuffer *buffer, Py_ssize_t size) int Fory_PyInt64SequenceReadVarintFromBuffer(object collection, CBuffer *buffer, Py_ssize_t size) + int Fory_PyStringSequenceReadFromBuffer(object collection, CBuffer *buffer, Py_ssize_t size) diff --git a/python/pyfory/serialization.pyx b/python/pyfory/serialization.pyx index 235464456f..3b1de07ca2 100644 --- a/python/pyfory/serialization.pyx +++ b/python/pyfory/serialization.pyx @@ -27,7 +27,6 @@ import time import warnings from typing import TypeVar, Union, Iterable -from pyfory.buffer import get_bit, set_bit, clear_bit from pyfory import _fory as fmod from pyfory._fory import _ENABLE_TYPE_REGISTRATION_FORCIBLY from pyfory.lib import mmh3 @@ -42,6 +41,7 @@ from pyfory.includes.libserialization cimport \ Fory_PyStringSequenceWriteToBuffer, Fory_PyBooleanSequenceReadFromBuffer, Fory_PyFloatSequenceReadFromBuffer, Fory_PyInt64SequenceReadVarintFromBuffer, + Fory_PyStringSequenceReadFromBuffer, Fory_PyDetectStringKeyMapValueKind, Fory_PyStringInt64MapWriteChunkToBuffer, Fory_PyStringStringMapWriteChunkToBuffer, Fory_PyStringInt64MapWriteContiguousChunkToBuffer, @@ -49,6 +49,8 @@ from pyfory.includes.libserialization cimport \ Fory_PyStringInt64MapReadChunkFromBuffer, Fory_PyStringStringMapReadChunkFromBuffer, Fory_PyDetectSequenceNoNullExactTypeKind, + Fory_PyDetectSequenceTypeAndNull, + Fory_PySequenceHasNull, kForyPyStringMapValueNone, kForyPyStringMapValueInt64, kForyPyStringMapValueString, kForyPySequenceValueNone, kForyPySequenceValueString, kForyPySequenceValueInt64, kForyPySequenceValueBool, kForyPySequenceValueFloat64) @@ -64,8 +66,9 @@ from cpython.tuple cimport PyTuple_New, PyTuple_SET_ITEM, PyTuple_GET_ITEM from libcpp cimport bool as c_bool from libcpp.utility cimport pair from cython.operator cimport dereference as deref -from pyfory.buffer cimport Buffer +from pyfory.buffer cimport Buffer, get_address from pyfory.includes.libabsl cimport flat_hash_map +from pyfory.includes.libutil cimport CBuffer from pyfory.meta.metastring import MetaStringDecoder try: @@ -335,7 +338,7 @@ cdef int32_t NOT_NULL_FLOAT64_FLAG = fmod.NOT_NULL_FLOAT64_FLAG cdef int32_t NOT_NULL_BOOL_FLAG = fmod.NOT_NULL_BOOL_FLAG cdef int32_t NOT_NULL_STRING_FLAG = fmod.NOT_NULL_STRING_FLAG cdef int32_t SMALL_STRING_THRESHOLD = fmod.SMALL_STRING_THRESHOLD -cdef int32_t WRITE_REF_LINEAR_SCAN_LIMIT = 3 +cdef int32_t WRITE_REF_LINEAR_SCAN_LIMIT = 2 cdef inline uint64_t _mix64(uint64_t x): @@ -347,6 +350,19 @@ cdef inline uint64_t _mix64(uint64_t x): return x +cdef inline Buffer _wrap_bytes_no_copy(bytes data): + cdef Buffer buffer = Buffer.__new__(Buffer) + cdef Py_ssize_t length = len(data) + cdef uint8_t* address = NULL + if length > 0: + address = get_address(data) + buffer.data = data + buffer.c_buffer = CBuffer(address, length, False) + buffer.c_buffer.reader_index(0) + buffer.c_buffer.writer_index(0) + return buffer + + cdef inline int64_t _hash_small_metastring(int64_t v1, int64_t v2, int32_t length, @@ -1360,25 +1376,14 @@ cdef class Fory: if buffer is None: self.buffer.set_writer_index(0) buffer = self.buffer - cdef int32_t mask_index = buffer.get_writer_index() - # 1byte used for bit mask - buffer.grow(1) - buffer.set_writer_index(mask_index + 1) + cdef uint8_t mask = 0 if obj is None: - set_bit(buffer, mask_index, 0) - else: - clear_bit(buffer, mask_index, 0) - + mask |= 0b1 if self.xlang: - # set reader as x_lang. - set_bit(buffer, mask_index, 1) - else: - # set reader as native. - clear_bit(buffer, mask_index, 1) + mask |= 0b10 if self.buffer_callback is not None: - set_bit(buffer, mask_index, 2) - else: - clear_bit(buffer, mask_index, 2) + mask |= 0b100 + buffer.write_uint8(mask) cdef int32_t start_offset if not self.xlang: self.write_ref(buffer, obj) @@ -1492,7 +1497,7 @@ cdef class Fory: """ try: if type(buffer) == bytes: - buffer = Buffer(buffer) + buffer = _wrap_bytes_no_copy(buffer) return self._deserialize(buffer, buffers, unsupported_objects) finally: self.reset_read() @@ -1503,12 +1508,11 @@ cdef class Fory: self.depth += 1 if unsupported_objects is not None: self._unsupported_objects = iter(unsupported_objects) - cdef int32_t reader_index = buffer.get_reader_index() - buffer.set_reader_index(reader_index + 1) - if get_bit(buffer, reader_index, 0): + cdef uint8_t mask = buffer.read_uint8() + if (mask & 0b1) != 0: return None - cdef c_bool is_target_x_lang = get_bit(buffer, reader_index, 1) - self.is_peer_out_of_band_enabled = get_bit(buffer, reader_index, 2) + cdef c_bool is_target_x_lang = (mask & 0b10) != 0 + self.is_peer_out_of_band_enabled = (mask & 0b100) != 0 if self.is_peer_out_of_band_enabled: assert buffers is not None, ( "buffers shouldn't be null when the serialized stream is " @@ -1530,9 +1534,9 @@ cdef class Fory: cpdef inline read_ref(self, Buffer buffer): cdef MapRefResolver ref_resolver = self.ref_resolver - cdef int32_t ref_id = ref_resolver.try_preserve_ref_id(buffer) + cdef int32_t ref_id = ref_resolver.try_preserve_ref_id_no_stub(buffer) if ref_id < NOT_NULL_VALUE_FLAG: - return ref_resolver.get_read_object() + return ref_resolver.read_object # indicates that the object is first read. cdef TypeInfo typeinfo = self.type_resolver.read_type_info(buffer) cls = typeinfo.cls @@ -1572,7 +1576,7 @@ cdef class Fory: cdef int32_t ref_id if serializer is None or serializer.need_to_write_ref: ref_resolver = self.ref_resolver - ref_id = ref_resolver.try_preserve_ref_id(buffer) + ref_id = ref_resolver.try_preserve_ref_id_no_stub(buffer) # indicates that the object is first read. if ref_id >= NOT_NULL_VALUE_FLAG: # Don't push -1 here - try_preserve_ref_id already pushed ref_id @@ -1580,7 +1584,7 @@ cdef class Fory: ref_resolver.set_read_object(ref_id, o) return o else: - return ref_resolver.get_read_object() + return ref_resolver.read_object cdef int8_t head_flag = buffer.read_int8() if head_flag == NULL_FLAG: return None @@ -1688,9 +1692,9 @@ cdef class Fory: cpdef inline read_ref_pyobject(self, Buffer buffer): cdef MapRefResolver ref_resolver = self.ref_resolver - cdef int32_t ref_id = ref_resolver.try_preserve_ref_id(buffer) + cdef int32_t ref_id = ref_resolver.try_preserve_ref_id_no_stub(buffer) if ref_id < NOT_NULL_VALUE_FLAG: - return ref_resolver.get_read_object() + return ref_resolver.read_object # indicates that the object is first read. cdef TypeInfo typeinfo = self.type_resolver.read_type_info(buffer) self.inc_depth() From 97a373ca899dd5be5fe37cf2a9414f4673a89279 Mon Sep 17 00:00:00 2001 From: chaokunyang Date: Sat, 14 Feb 2026 12:14:28 +0800 Subject: [PATCH 04/10] fix(cpp): prevent readable-size underflow in pyfory fast path --- cpp/fory/python/pyfory.cc | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/cpp/fory/python/pyfory.cc b/cpp/fory/python/pyfory.cc index 848bf3a18a..41be582e12 100644 --- a/cpp/fory/python/pyfory.cc +++ b/cpp/fory/python/pyfory.cc @@ -35,9 +35,23 @@ static PyObject **py_sequence_get_items(PyObject *collection) { static bool ensure_readable_size(fory::Buffer *buffer, Py_ssize_t size, Py_ssize_t element_size) { - uint64_t readable = - static_cast(buffer->size() - buffer->reader_index()); - uint64_t expected = static_cast(size) * element_size; + if (size < 0 || element_size < 0) { + return false; + } + uint32_t buffer_size = buffer->size(); + uint32_t reader_index = buffer->reader_index(); + if (FORY_PREDICT_FALSE(reader_index > buffer_size)) { + return false; + } + uint64_t element_size_u64 = static_cast(element_size); + uint64_t size_u64 = static_cast(size); + if (FORY_PREDICT_FALSE(element_size_u64 != 0 && + size_u64 > std::numeric_limits::max() / + element_size_u64)) { + return false; + } + uint64_t readable = static_cast(buffer_size - reader_index); + uint64_t expected = size_u64 * element_size_u64; return readable >= expected; } From 18fb3d9cd5bb32409245abe43fdf0acbb201699b Mon Sep 17 00:00:00 2001 From: chaokunyang Date: Sat, 14 Feb 2026 12:37:39 +0800 Subject: [PATCH 05/10] fix(cpp): guard int fast path for python 3.13 --- cpp/fory/python/pyfory.cc | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cpp/fory/python/pyfory.cc b/cpp/fory/python/pyfory.cc index 41be582e12..48e6905039 100644 --- a/cpp/fory/python/pyfory.cc +++ b/cpp/fory/python/pyfory.cc @@ -60,6 +60,7 @@ static bool py_parse_int64(PyObject *obj, int64_t *out) { return false; } +#if PY_VERSION_HEX < 0x030D0000 auto *long_obj = reinterpret_cast(obj); Py_ssize_t size = Py_SIZE(long_obj); if (size == 0) { @@ -83,6 +84,7 @@ static bool py_parse_int64(PyObject *obj, int64_t *out) { *out = value; return true; } +#endif int overflow = 0; long long value = PyLong_AsLongLongAndOverflow(obj, &overflow); From ee2e152436448e48494c7549cb5a50784c3a05bb Mon Sep 17 00:00:00 2001 From: chaokunyang Date: Sat, 14 Feb 2026 12:45:48 +0800 Subject: [PATCH 06/10] fix(cpp): skip py long internals on limited api --- cpp/fory/python/pyfory.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/fory/python/pyfory.cc b/cpp/fory/python/pyfory.cc index 48e6905039..79b0ee4b35 100644 --- a/cpp/fory/python/pyfory.cc +++ b/cpp/fory/python/pyfory.cc @@ -60,7 +60,7 @@ static bool py_parse_int64(PyObject *obj, int64_t *out) { return false; } -#if PY_VERSION_HEX < 0x030D0000 +#if PY_VERSION_HEX < 0x030D0000 && !defined(Py_LIMITED_API) auto *long_obj = reinterpret_cast(obj); Py_ssize_t size = Py_SIZE(long_obj); if (size == 0) { From 99b4982e742502ec3de2da79c66bef414bb27b87 Mon Sep 17 00:00:00 2001 From: chaokunyang Date: Sat, 14 Feb 2026 12:47:49 +0800 Subject: [PATCH 07/10] fix(cpp): avoid direct py long internals in pyfory --- cpp/fory/python/pyfory.cc | 26 -------------------------- 1 file changed, 26 deletions(-) diff --git a/cpp/fory/python/pyfory.cc b/cpp/fory/python/pyfory.cc index 79b0ee4b35..6f9292dbd4 100644 --- a/cpp/fory/python/pyfory.cc +++ b/cpp/fory/python/pyfory.cc @@ -60,32 +60,6 @@ static bool py_parse_int64(PyObject *obj, int64_t *out) { return false; } -#if PY_VERSION_HEX < 0x030D0000 && !defined(Py_LIMITED_API) - auto *long_obj = reinterpret_cast(obj); - Py_ssize_t size = Py_SIZE(long_obj); - if (size == 0) { - *out = 0; - return true; - } - if (size == 1) { - *out = static_cast(long_obj->ob_digit[0]); - return true; - } - if (size == -1) { - *out = -static_cast(long_obj->ob_digit[0]); - return true; - } - if (size == 2 || size == -2) { - int64_t value = static_cast(long_obj->ob_digit[0]); - value |= static_cast(long_obj->ob_digit[1]) << PyLong_SHIFT; - if (size < 0) { - value = -value; - } - *out = value; - return true; - } -#endif - int overflow = 0; long long value = PyLong_AsLongLongAndOverflow(obj, &overflow); if (overflow != 0) { From 540ef0f50a4bd3c5704ca80925fce8851eac66e6 Mon Sep 17 00:00:00 2001 From: chaokunyang Date: Tue, 17 Feb 2026 21:23:51 +0800 Subject: [PATCH 08/10] refactor(python): remove try_preserve_ref_id_no_stub path --- python/pyfory/collection.pxi | 32 +++++++++++------------ python/pyfory/serialization.pyx | 46 +++------------------------------ 2 files changed, 19 insertions(+), 59 deletions(-) diff --git a/python/pyfory/collection.pxi b/python/pyfory/collection.pxi index 38cd8c33ba..b2219cad66 100644 --- a/python/pyfory/collection.pxi +++ b/python/pyfory/collection.pxi @@ -650,7 +650,7 @@ cdef class CollectionSerializer(Serializer): list_serializer = item_serializer if is_py: for i in range(len_): - ref_id = ref_resolver.try_preserve_ref_id_no_stub(buffer) + ref_id = ref_resolver.try_preserve_ref_id(buffer) if ref_id < NOT_NULL_VALUE_FLAG: obj = ref_resolver.read_object else: @@ -659,7 +659,7 @@ cdef class CollectionSerializer(Serializer): self._add_element(collection_, i, obj) else: for i in range(len_): - ref_id = ref_resolver.try_preserve_ref_id_no_stub(buffer) + ref_id = ref_resolver.try_preserve_ref_id(buffer) if ref_id < NOT_NULL_VALUE_FLAG: obj = ref_resolver.read_object else: @@ -672,7 +672,7 @@ cdef class CollectionSerializer(Serializer): tuple_serializer = item_serializer if is_py: for i in range(len_): - ref_id = ref_resolver.try_preserve_ref_id_no_stub(buffer) + ref_id = ref_resolver.try_preserve_ref_id(buffer) if ref_id < NOT_NULL_VALUE_FLAG: obj = ref_resolver.read_object else: @@ -681,7 +681,7 @@ cdef class CollectionSerializer(Serializer): self._add_element(collection_, i, obj) else: for i in range(len_): - ref_id = ref_resolver.try_preserve_ref_id_no_stub(buffer) + ref_id = ref_resolver.try_preserve_ref_id(buffer) if ref_id < NOT_NULL_VALUE_FLAG: obj = ref_resolver.read_object else: @@ -694,7 +694,7 @@ cdef class CollectionSerializer(Serializer): set_serializer = item_serializer if is_py: for i in range(len_): - ref_id = ref_resolver.try_preserve_ref_id_no_stub(buffer) + ref_id = ref_resolver.try_preserve_ref_id(buffer) if ref_id < NOT_NULL_VALUE_FLAG: obj = ref_resolver.read_object else: @@ -703,7 +703,7 @@ cdef class CollectionSerializer(Serializer): self._add_element(collection_, i, obj) else: for i in range(len_): - ref_id = ref_resolver.try_preserve_ref_id_no_stub(buffer) + ref_id = ref_resolver.try_preserve_ref_id(buffer) if ref_id < NOT_NULL_VALUE_FLAG: obj = ref_resolver.read_object else: @@ -716,7 +716,7 @@ cdef class CollectionSerializer(Serializer): map_serializer = item_serializer if is_py: for i in range(len_): - ref_id = ref_resolver.try_preserve_ref_id_no_stub(buffer) + ref_id = ref_resolver.try_preserve_ref_id(buffer) if ref_id < NOT_NULL_VALUE_FLAG: obj = ref_resolver.read_object else: @@ -725,7 +725,7 @@ cdef class CollectionSerializer(Serializer): self._add_element(collection_, i, obj) else: for i in range(len_): - ref_id = ref_resolver.try_preserve_ref_id_no_stub(buffer) + ref_id = ref_resolver.try_preserve_ref_id(buffer) if ref_id < NOT_NULL_VALUE_FLAG: obj = ref_resolver.read_object else: @@ -736,7 +736,7 @@ cdef class CollectionSerializer(Serializer): return if is_py: for i in range(len_): - ref_id = ref_resolver.try_preserve_ref_id_no_stub(buffer) + ref_id = ref_resolver.try_preserve_ref_id(buffer) if ref_id < NOT_NULL_VALUE_FLAG: obj = ref_resolver.read_object else: @@ -745,7 +745,7 @@ cdef class CollectionSerializer(Serializer): self._add_element(collection_, i, obj) else: for i in range(len_): - ref_id = ref_resolver.try_preserve_ref_id_no_stub(buffer) + ref_id = ref_resolver.try_preserve_ref_id(buffer) if ref_id < NOT_NULL_VALUE_FLAG: obj = ref_resolver.read_object else: @@ -857,7 +857,7 @@ cdef inline get_next_element( cdef int32_t ref_id cdef TypeInfo typeinfo cdef object o - ref_id = ref_resolver.try_preserve_ref_id_no_stub(buffer) + ref_id = ref_resolver.try_preserve_ref_id(buffer) if ref_id < NOT_NULL_VALUE_FLAG: return ref_resolver.read_object cdef uint8_t type_id @@ -1067,7 +1067,7 @@ cdef class SetSerializer(CollectionSerializer): if tracking_ref: # When ref tracking is enabled, read with ref handling for i in range(len_): - ref_id = ref_resolver.try_preserve_ref_id_no_stub(buffer) + ref_id = ref_resolver.try_preserve_ref_id(buffer) if ref_id < NOT_NULL_VALUE_FLAG: instance.add(ref_resolver.read_object) continue @@ -1486,7 +1486,7 @@ cdef class MapSerializer(Serializer): track_key_ref = (chunk_header & TRACKING_KEY_REF) != 0 if (chunk_header & KEY_DECL_TYPE) != 0: if track_key_ref: - ref_id = ref_resolver.try_preserve_ref_id_no_stub(buffer) + ref_id = ref_resolver.try_preserve_ref_id(buffer) if ref_id < NOT_NULL_VALUE_FLAG: key = ref_resolver.read_object else: @@ -1511,7 +1511,7 @@ cdef class MapSerializer(Serializer): track_value_ref = (chunk_header & TRACKING_VALUE_REF) != 0 if (chunk_header & VALUE_DECL_TYPE) != 0: if track_value_ref: - ref_id = ref_resolver.try_preserve_ref_id_no_stub(buffer) + ref_id = ref_resolver.try_preserve_ref_id(buffer) if ref_id < NOT_NULL_VALUE_FLAG: value = ref_resolver.read_object else: @@ -1572,7 +1572,7 @@ cdef class MapSerializer(Serializer): continue for i in range(chunk_size): if track_key_ref: - ref_id = ref_resolver.try_preserve_ref_id_no_stub(buffer) + ref_id = ref_resolver.try_preserve_ref_id(buffer) if ref_id < NOT_NULL_VALUE_FLAG: key = ref_resolver.read_object else: @@ -1598,7 +1598,7 @@ cdef class MapSerializer(Serializer): else: key = fory.xread_no_ref(buffer, serializer=key_serializer) if track_value_ref: - ref_id = ref_resolver.try_preserve_ref_id_no_stub(buffer) + ref_id = ref_resolver.try_preserve_ref_id(buffer) if ref_id < NOT_NULL_VALUE_FLAG: value = ref_resolver.read_object else: diff --git a/python/pyfory/serialization.pyx b/python/pyfory/serialization.pyx index 3b1de07ca2..8a33c01ec7 100644 --- a/python/pyfory/serialization.pyx +++ b/python/pyfory/serialization.pyx @@ -153,28 +153,7 @@ cdef class MapRefResolver: buffer.write_int8(NULL_FLAG) return True cdef uint64_t object_id = obj - cdef int32_t existing_size = self.written_objects.size() cdef int32_t next_id - cdef int32_t i - cdef PyObject *written_obj - cdef uint64_t written_object_id - if self.written_objects_id.size() == 0: - if existing_size < WRITE_REF_LINEAR_SCAN_LIMIT: - for i in range(existing_size): - written_obj = self.written_objects[i] - if written_obj == obj: - buffer.write_int8(REF_FLAG) - buffer.write_var_uint32( i) - return True - self.written_objects.push_back( obj) - Py_INCREF(obj) - buffer.write_int8(REF_VALUE_FLAG) - return False - self.written_objects_id.reserve(self.written_objects.size() * 2) - for i in range(existing_size): - written_obj = self.written_objects[i] - written_object_id = written_obj - self.written_objects_id[written_object_id] = i cdef flat_hash_map[uint64_t, int32_t].iterator it = \ self.written_objects_id.find(object_id) if it == self.written_objects_id.end(): @@ -244,24 +223,6 @@ cdef class MapRefResolver: self.read_ref_ids.push_back(-1) return head_flag - cpdef inline int32_t try_preserve_ref_id_no_stub(self, Buffer buffer): - if not self.track_ref: - return buffer.read_int8() - head_flag = buffer.read_int8() - cdef int32_t ref_id - cdef PyObject *obj - if head_flag == REF_FLAG: - ref_id = buffer.read_var_uint32() - assert 0 <= ref_id < self.read_objects.size(), f"Invalid ref id {ref_id}, current size {self.read_objects.size()}" - obj = self.read_objects[ref_id] - assert obj != NULL, f"Invalid ref id {ref_id}, current size {self.read_objects.size()}" - self.read_object = obj - return head_flag - self.read_object = None - if head_flag == REF_VALUE_FLAG: - return self.preserve_ref_id() - return head_flag - cpdef inline int32_t last_preserved_ref_id(self): cdef int32_t length = self.read_ref_ids.size() assert length > 0 @@ -338,7 +299,6 @@ cdef int32_t NOT_NULL_FLOAT64_FLAG = fmod.NOT_NULL_FLOAT64_FLAG cdef int32_t NOT_NULL_BOOL_FLAG = fmod.NOT_NULL_BOOL_FLAG cdef int32_t NOT_NULL_STRING_FLAG = fmod.NOT_NULL_STRING_FLAG cdef int32_t SMALL_STRING_THRESHOLD = fmod.SMALL_STRING_THRESHOLD -cdef int32_t WRITE_REF_LINEAR_SCAN_LIMIT = 2 cdef inline uint64_t _mix64(uint64_t x): @@ -1534,7 +1494,7 @@ cdef class Fory: cpdef inline read_ref(self, Buffer buffer): cdef MapRefResolver ref_resolver = self.ref_resolver - cdef int32_t ref_id = ref_resolver.try_preserve_ref_id_no_stub(buffer) + cdef int32_t ref_id = ref_resolver.try_preserve_ref_id(buffer) if ref_id < NOT_NULL_VALUE_FLAG: return ref_resolver.read_object # indicates that the object is first read. @@ -1576,7 +1536,7 @@ cdef class Fory: cdef int32_t ref_id if serializer is None or serializer.need_to_write_ref: ref_resolver = self.ref_resolver - ref_id = ref_resolver.try_preserve_ref_id_no_stub(buffer) + ref_id = ref_resolver.try_preserve_ref_id(buffer) # indicates that the object is first read. if ref_id >= NOT_NULL_VALUE_FLAG: # Don't push -1 here - try_preserve_ref_id already pushed ref_id @@ -1692,7 +1652,7 @@ cdef class Fory: cpdef inline read_ref_pyobject(self, Buffer buffer): cdef MapRefResolver ref_resolver = self.ref_resolver - cdef int32_t ref_id = ref_resolver.try_preserve_ref_id_no_stub(buffer) + cdef int32_t ref_id = ref_resolver.try_preserve_ref_id(buffer) if ref_id < NOT_NULL_VALUE_FLAG: return ref_resolver.read_object # indicates that the object is first read. From 5a25773cfb139ae9a0c0f5e9f5a7aeed4ed50b10 Mon Sep 17 00:00:00 2001 From: chaokunyang Date: Tue, 17 Feb 2026 21:36:04 +0800 Subject: [PATCH 09/10] refactor(python): remove collection typeinfo cache fields --- python/pyfory/collection.pxi | 24 ------------------------ 1 file changed, 24 deletions(-) diff --git a/python/pyfory/collection.pxi b/python/pyfory/collection.pxi index b2219cad66..4771143f85 100644 --- a/python/pyfory/collection.pxi +++ b/python/pyfory/collection.pxi @@ -40,19 +40,11 @@ cdef class CollectionSerializer(Serializer): cdef int8_t elem_tracking_ref cdef elem_type cdef TypeInfo elem_type_info - cdef TypeInfo list_type_info - cdef TypeInfo tuple_type_info - cdef TypeInfo set_type_info - cdef TypeInfo dict_type_info def __init__(self, fory, type_, elem_serializer=None, elem_tracking_ref=None): super().__init__(fory, type_) self.type_resolver = fory.type_resolver self.ref_resolver = fory.ref_resolver - self.list_type_info = None - self.tuple_type_info = None - self.set_type_info = None - self.dict_type_info = None self.elem_serializer = elem_serializer if elem_serializer is None: self.elem_type = None @@ -66,22 +58,6 @@ cdef class CollectionSerializer(Serializer): self.elem_tracking_ref = (1 if elem_tracking_ref else 0) cdef inline TypeInfo _get_type_info_fast(self, type cls): - if cls is list: - if self.list_type_info is None: - self.list_type_info = self.type_resolver.get_type_info(list) - return self.list_type_info - if cls is tuple: - if self.tuple_type_info is None: - self.tuple_type_info = self.type_resolver.get_type_info(tuple) - return self.tuple_type_info - if cls is set: - if self.set_type_info is None: - self.set_type_info = self.type_resolver.get_type_info(set) - return self.set_type_info - if cls is dict: - if self.dict_type_info is None: - self.dict_type_info = self.type_resolver.get_type_info(dict) - return self.dict_type_info return self.type_resolver.get_type_info(cls) cdef inline pair[int8_t, int64_t] write_header(self, Buffer buffer, value): From e62454e5c34c8d607966416989513f96ed346870 Mon Sep 17 00:00:00 2001 From: chaokunyang Date: Wed, 18 Feb 2026 17:27:00 +0800 Subject: [PATCH 10/10] clean code --- cpp/fory/util/buffer.h | 40 +++++++++++++++++++++++++++++++++ python/pyfory/collection.pxi | 12 ++++++++++ python/pyfory/resolver.py | 15 ++++++++++--- python/pyfory/serialization.pyx | 31 +++++++++++++++++++++---- 4 files changed, 91 insertions(+), 7 deletions(-) diff --git a/cpp/fory/util/buffer.h b/cpp/fory/util/buffer.h index 0631d989bf..86b7b4adf1 100644 --- a/cpp/fory/util/buffer.h +++ b/cpp/fory/util/buffer.h @@ -300,18 +300,38 @@ class Buffer { /// Slow path for get_var_uint32 when not enough bytes for bulk read. uint32_t get_var_uint32_slow(uint32_t offset, uint32_t *read_bytes_length) { uint32_t position = offset; + if (FORY_PREDICT_FALSE(position >= size_)) { + *read_bytes_length = 0; + return 0; + } int b = data_[position++]; uint32_t result = b & 0x7F; if ((b & 0x80) != 0) { + if (FORY_PREDICT_FALSE(position >= size_)) { + *read_bytes_length = 0; + return 0; + } b = data_[position++]; result |= (b & 0x7F) << 7; if ((b & 0x80) != 0) { + if (FORY_PREDICT_FALSE(position >= size_)) { + *read_bytes_length = 0; + return 0; + } b = data_[position++]; result |= (b & 0x7F) << 14; if ((b & 0x80) != 0) { + if (FORY_PREDICT_FALSE(position >= size_)) { + *read_bytes_length = 0; + return 0; + } b = data_[position++]; result |= (b & 0x7F) << 21; if ((b & 0x80) != 0) { + if (FORY_PREDICT_FALSE(position >= size_)) { + *read_bytes_length = 0; + return 0; + } b = data_[position++]; result |= (b & 0x7F) << 28; } @@ -442,6 +462,10 @@ class Buffer { uint64_t result = 0; int shift = 0; for (int i = 0; i < 8; ++i) { + if (FORY_PREDICT_FALSE(position >= size_)) { + *read_bytes_length = 0; + return 0; + } uint8_t b = data_[position++]; result |= static_cast(b & 0x7F) << shift; if ((b & 0x80) == 0) { @@ -450,6 +474,10 @@ class Buffer { } shift += 7; } + if (FORY_PREDICT_FALSE(position >= size_)) { + *read_bytes_length = 0; + return 0; + } uint8_t last = data_[position++]; result |= static_cast(last) << 56; *read_bytes_length = position - offset; @@ -830,6 +858,10 @@ class Buffer { } uint32_t read_bytes = 0; uint32_t value = get_var_uint32(reader_index_, &read_bytes); + if (FORY_PREDICT_FALSE(read_bytes == 0)) { + error.set_buffer_out_of_bound(reader_index_, 1, size_); + return 0; + } increase_reader_index(read_bytes); return value; } @@ -843,6 +875,10 @@ class Buffer { } uint32_t read_bytes = 0; uint32_t raw = get_var_uint32(reader_index_, &read_bytes); + if (FORY_PREDICT_FALSE(read_bytes == 0)) { + error.set_buffer_out_of_bound(reader_index_, 1, size_); + return 0; + } increase_reader_index(read_bytes); return static_cast((raw >> 1) ^ (~(raw & 1) + 1)); } @@ -855,6 +891,10 @@ class Buffer { } uint32_t read_bytes = 0; uint64_t value = get_var_uint64(reader_index_, &read_bytes); + if (FORY_PREDICT_FALSE(read_bytes == 0)) { + error.set_buffer_out_of_bound(reader_index_, 1, size_); + return 0; + } increase_reader_index(read_bytes); return value; } diff --git a/python/pyfory/collection.pxi b/python/pyfory/collection.pxi index 4771143f85..92147ea8e7 100644 --- a/python/pyfory/collection.pxi +++ b/python/pyfory/collection.pxi @@ -317,6 +317,9 @@ cdef class CollectionSerializer(Serializer): if collection_type is list or collection_type is tuple: if Fory_PyStringSequenceReadFromBuffer(collection_, &buffer.c_buffer, len_) == 0: return + raise RuntimeError( + "c++ fast path failed for homogeneous string sequence read" + ) for i in range(len_): self._add_element(collection_, i, buffer.read_string()) @@ -333,6 +336,9 @@ cdef class CollectionSerializer(Serializer): if collection_type is list or collection_type is tuple: if Fory_PyInt64SequenceReadVarintFromBuffer(collection_, &buffer.c_buffer, len_) == 0: return + raise RuntimeError( + "c++ fast path failed for homogeneous int64 sequence read" + ) for i in range(len_): self._add_element(collection_, i, buffer.read_varint64()) @@ -354,6 +360,9 @@ cdef class CollectionSerializer(Serializer): if collection_type is list or collection_type is tuple: if Fory_PyBooleanSequenceReadFromBuffer(collection_, &buffer.c_buffer, len_) == 0: return + raise RuntimeError( + "c++ fast path failed for homogeneous bool sequence read" + ) for i in range(len_): self._add_element(collection_, i, buffer.read_bool()) @@ -375,6 +384,9 @@ cdef class CollectionSerializer(Serializer): if collection_type is list or collection_type is tuple: if Fory_PyFloatSequenceReadFromBuffer(collection_, &buffer.c_buffer, len_) == 0: return + raise RuntimeError( + "c++ fast path failed for homogeneous float64 sequence read" + ) for i in range(len_): self._add_element(collection_, i, buffer.read_double()) diff --git a/python/pyfory/resolver.py b/python/pyfory/resolver.py index 480fabfe02..c44b3f6a89 100644 --- a/python/pyfory/resolver.py +++ b/python/pyfory/resolver.py @@ -167,6 +167,8 @@ def write_ref_or_null(self, buffer, obj): def read_ref_or_null(self, buffer): head_flag = buffer.read_int8() + if head_flag != NULL_FLAG and head_flag != NOT_NULL_VALUE_FLAG and head_flag != REF_VALUE_FLAG and head_flag != REF_FLAG: + raise ValueError(f"Invalid ref flag {head_flag}") if head_flag == REF_FLAG: # read reference id and get object from reference resolver ref_id = buffer.read_var_uint32() @@ -184,6 +186,8 @@ def preserve_ref_id(self) -> int: def try_preserve_ref_id(self, buffer) -> int: head_flag = buffer.read_int8() + if head_flag != NULL_FLAG and head_flag != NOT_NULL_VALUE_FLAG and head_flag != REF_VALUE_FLAG and head_flag != REF_FLAG: + raise ValueError(f"Invalid ref flag {head_flag}") if head_flag == REF_FLAG: # read reference id and get object from reference resolver ref_id = buffer.read_var_uint32() @@ -192,9 +196,10 @@ def try_preserve_ref_id(self, buffer) -> int: self.read_object = None if head_flag == REF_VALUE_FLAG: return self.preserve_ref_id() - # For NOT_NULL_VALUE_FLAG, push -1 to read_ref_ids so reference() knows - # this object is not referenceable (it's a value type, not a reference type) - self.read_ref_ids.append(-1) + if head_flag == NOT_NULL_VALUE_FLAG: + # For NOT_NULL_VALUE_FLAG, push -1 to read_ref_ids so reference() knows + # this object is not referenceable (it's a value type, not a reference type) + self.read_ref_ids.append(-1) # `head_flag` except `REF_FLAG` can be used as stub reference id because we use # `refId >= NOT_NULL_VALUE_FLAG` to read data. return head_flag @@ -203,6 +208,8 @@ def last_preserved_ref_id(self) -> int: return self.read_ref_ids[-1] def reference(self, obj): + if len(self.read_ref_ids) == 0: + raise RuntimeError("No preserved ref id") ref_id = self.read_ref_ids.pop() # When NOT_NULL_VALUE_FLAG was read instead of REF_VALUE_FLAG, # -1 is pushed to read_ref_ids. This means the object is a value type @@ -214,6 +221,8 @@ def reference(self, obj): def get_read_object(self, id_=None): if id_ is None: return self.read_object + if id_ < 0 or id_ >= len(self.read_objects): + raise RuntimeError(f"Ref id {id_} invalid, current size {len(self.read_objects)}") return self.read_objects[id_] def set_read_object(self, id_, obj): diff --git a/python/pyfory/serialization.pyx b/python/pyfory/serialization.pyx index 8a33c01ec7..bbe7e3005a 100644 --- a/python/pyfory/serialization.pyx +++ b/python/pyfory/serialization.pyx @@ -172,6 +172,8 @@ cdef class MapRefResolver: cpdef inline int8_t read_ref_or_null(self, Buffer buffer): cdef int8_t head_flag = buffer.read_int8() if not self.track_ref: + if head_flag != NULL_FLAG and head_flag != NOT_NULL_VALUE_FLAG: + raise ValueError(f"Invalid ref flag {head_flag}") return head_flag cdef int32_t ref_id cdef PyObject * obj @@ -185,6 +187,12 @@ cdef class MapRefResolver: return REF_FLAG else: self.read_object = None + if ( + head_flag != NULL_FLAG + and head_flag != NOT_NULL_VALUE_FLAG + and head_flag != REF_VALUE_FLAG + ): + raise ValueError(f"Invalid ref flag {head_flag}") return head_flag cpdef inline int32_t preserve_ref_id(self): @@ -218,10 +226,14 @@ cdef class MapRefResolver: self.read_object = None if head_flag == REF_VALUE_FLAG: return self.preserve_ref_id() - # For NOT_NULL_VALUE_FLAG, push -1 to read_ref_ids so reference() knows - # this object is not referenceable (it's a value type, not a reference type) - self.read_ref_ids.push_back(-1) - return head_flag + if head_flag == NOT_NULL_VALUE_FLAG: + # For NOT_NULL_VALUE_FLAG, push -1 to read_ref_ids so reference() knows + # this object is not referenceable (it's a value type, not a reference type) + self.read_ref_ids.push_back(-1) + return head_flag + if head_flag == NULL_FLAG: + return head_flag + raise ValueError(f"Invalid ref flag {head_flag}") cpdef inline int32_t last_preserved_ref_id(self): cdef int32_t length = self.read_ref_ids.size() @@ -911,6 +923,7 @@ cdef class MetaContext: cpdef inline read_shared_type_info_with_type_id(self, Buffer buffer, uint8_t type_id): """Read shared type info when type_id is already consumed.""" cdef uint32_t user_type_id = NO_USER_TYPE_ID + cdef int32_t read_type_infos_size cdef TypeRegistrationKind reg_kind = get_type_registration_kind(type_id) cdef c_bool share_meta = is_type_share_meta(type_id) if reg_kind == TypeRegistrationKind.BY_ID and not share_meta: @@ -926,9 +939,19 @@ cdef class MetaContext: if is_ref: # Reference to previously read type + read_type_infos_size = len(self._read_type_infos) + if index < 0 or index >= read_type_infos_size: + raise ValueError( + f"Invalid shared type index {index}, size {read_type_infos_size}" + ) return self._read_type_infos[index] else: # New type - read TypeDef inline and build TypeInfo + read_type_infos_size = len(self._read_type_infos) + if index != read_type_infos_size: + raise ValueError( + f"Invalid shared type define index {index}, expected {read_type_infos_size}" + ) type_info = self.type_resolver._read_and_build_type_info(buffer) self._read_type_infos.append(type_info) return type_info