From 8c7d5c317dc3faad891caff0ed72d934dca7eb55 Mon Sep 17 00:00:00 2001
From: asadjan4611 <asadjan4611@gmail.com>
Date: Thu, 12 Feb 2026 11:34:11 +0500
Subject: [PATCH 01/23] feat(python): add bfloat16 and bfloat16_array support

- Add BFloat16 Cython type with IEEE 754 compliant conversions
- Add BFloat16Array class backed by array.array('H')
- Implement serializers for scalar and array types
- Register types in type resolver (TypeId.BFLOAT16 = 18, TypeId.BFLOAT16_ARRAY = 54)
- Add buffer read/write methods for bfloat16
- Add codegen support for bfloat16
- Add row format support (with temporary float16 mapping until C++ support)
- Add comprehensive test suite with 11 test cases covering all edge cases
- Follow existing float16 implementation patterns

Fixes #3289
---
 python/pyfory/__init__.py            |  16 ++++
 python/pyfory/bfloat16.pyx           | 112 +++++++++++++++++++++++
 python/pyfory/bfloat16_array.py      |  77 ++++++++++++++++
 python/pyfory/buffer.pxd             |   4 +
 python/pyfory/buffer.pyx             |   8 ++
 python/pyfory/codegen.py             |   3 +
 python/pyfory/format/__init__.py     |   1 +
 python/pyfory/format/schema.pxi      |   7 +-
 python/pyfory/format/schema.py       |   3 +
 python/pyfory/includes/libformat.pxd |   4 +
 python/pyfory/primitive.pxi          |  14 +++
 python/pyfory/registry.py            |  16 ++++
 python/pyfory/serialization.pyx      |  18 ++++
 python/pyfory/serializer.py          |  44 +++++++++
 python/pyfory/tests/test_bfloat16.py | 128 +++++++++++++++++++++++++++
 python/pyfory/types.py               |   3 +
 16 files changed, 457 insertions(+), 1 deletion(-)
 create mode 100644 python/pyfory/bfloat16.pyx
 create mode 100644 python/pyfory/bfloat16_array.py
 create mode 100644 python/pyfory/tests/test_bfloat16.py

diff --git a/python/pyfory/__init__.py b/python/pyfory/__init__.py
index a80b447ddf..9cb2898ad3 100644
--- a/python/pyfory/__init__.py
+++ b/python/pyfory/__init__.py
@@ -51,6 +51,7 @@
     TaggedUint64Serializer,
     Float32Serializer,
     Float64Serializer,
+    BFloat16Serializer,
     StringSerializer,
     DateSerializer,
     TimestampSerializer,
@@ -89,6 +90,8 @@
     tagged_uint64,
     float32,
     float64,
+    bfloat16,
+    bfloat16_array,
     int8_array,
     uint8_array,
     int16_array,
@@ -119,6 +122,14 @@
 from pyfory.policy import DeserializationPolicy  # noqa: F401 # pylint: disable=unused-import
 from pyfory.buffer import Buffer  # noqa: F401 # pylint: disable=unused-import
 
+# BFloat16 support
+try:
+    from pyfory.bfloat16 import BFloat16  # noqa: F401
+    from pyfory.bfloat16_array import BFloat16Array  # noqa: F401
+except ImportError:
+    # Cython extensions not built yet
+    pass
+
 __version__ = "0.16.0.dev0"
 
 __all__ = [
@@ -152,6 +163,10 @@
     "tagged_uint64",
     "float32",
     "float64",
+    "BFloat16",
+    "BFloat16Array",
+    "bfloat16",
+    "bfloat16_array",
     "int8_array",
     "uint8_array",
     "int16_array",
@@ -194,6 +209,7 @@
     "TaggedUint64Serializer",
     "Float32Serializer",
     "Float64Serializer",
+    "BFloat16Serializer",
     "StringSerializer",
     "DateSerializer",
     "TimestampSerializer",
diff --git a/python/pyfory/bfloat16.pyx b/python/pyfory/bfloat16.pyx
new file mode 100644
index 0000000000..feabbf2332
--- /dev/null
+++ b/python/pyfory/bfloat16.pyx
@@ -0,0 +1,112 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# distutils: language = c++
+# cython: embedsignature = True
+# cython: language_level = 3
+
+from libc.stdint cimport uint16_t, uint32_t
+
+cdef inline uint16_t float32_to_bfloat16_bits(float value) nogil:
+    cdef uint32_t f32_bits = (<uint32_t*>&value)[0]
+    cdef uint16_t bf16_bits = <uint16_t>(f32_bits >> 16)
+    cdef uint16_t truncated = <uint16_t>(f32_bits & 0xFFFF)
+    if truncated > 0x8000:
+        bf16_bits += 1
+        if (bf16_bits & 0x7F80) == 0x7F80:
+            bf16_bits = (bf16_bits & 0x8000) | 0x7F80
+    elif truncated == 0x8000 and (bf16_bits & 1):
+        bf16_bits += 1
+        if (bf16_bits & 0x7F80) == 0x7F80:
+            bf16_bits = (bf16_bits & 0x8000) | 0x7F80
+    return bf16_bits
+
+cdef inline float bfloat16_bits_to_float32(uint16_t bits) nogil:
+    cdef uint32_t f32_bits = <uint32_t>bits << 16
+    return (<float*>&f32_bits)[0]
+
+
+cdef class BFloat16:
+    cdef uint16_t _bits
+    
+    def __init__(self, value):
+        if isinstance(value, BFloat16):
+            self._bits = (<BFloat16>value)._bits
+        else:
+            self._bits = float32_to_bfloat16_bits(<float>float(value))
+    
+    @staticmethod
+    def from_bits(uint16_t bits):
+        cdef BFloat16 bf16 = BFloat16.__new__(BFloat16)
+        bf16._bits = bits
+        return bf16
+    
+    def to_bits(self):
+        return self._bits
+    
+    def to_float32(self):
+        return bfloat16_bits_to_float32(self._bits)
+    
+    def __float__(self):
+        return float(self.to_float32())
+    
+    def __repr__(self):
+        return f"BFloat16({self.to_float32()})"
+    
+    def __str__(self):
+        return str(self.to_float32())
+    
+    def __eq__(self, other):
+        if isinstance(other, BFloat16):
+            if self.is_nan() or (<BFloat16>other).is_nan():
+                return False
+            if self.is_zero() and (<BFloat16>other).is_zero():
+                return True
+            return self._bits == (<BFloat16>other)._bits
+        return False
+    
+    def __hash__(self):
+        return hash(self._bits)
+    
+    def is_nan(self):
+        cdef uint16_t exp = (self._bits >> 7) & 0xFF
+        cdef uint16_t mant = self._bits & 0x7F
+        return exp == 0xFF and mant != 0
+    
+    def is_inf(self):
+        cdef uint16_t exp = (self._bits >> 7) & 0xFF
+        cdef uint16_t mant = self._bits & 0x7F
+        return exp == 0xFF and mant == 0
+    
+    def is_zero(self):
+        return (self._bits & 0x7FFF) == 0
+    
+    def is_finite(self):
+        cdef uint16_t exp = (self._bits >> 7) & 0xFF
+        return exp != 0xFF
+    
+    def is_normal(self):
+        cdef uint16_t exp = (self._bits >> 7) & 0xFF
+        return exp != 0 and exp != 0xFF
+    
+    def is_subnormal(self):
+        cdef uint16_t exp = (self._bits >> 7) & 0xFF
+        cdef uint16_t mant = self._bits & 0x7F
+        return exp == 0 and mant != 0
+    
+    def signbit(self):
+        return (self._bits & 0x8000) != 0
diff --git a/python/pyfory/bfloat16_array.py b/python/pyfory/bfloat16_array.py
new file mode 100644
index 0000000000..06d6658407
--- /dev/null
+++ b/python/pyfory/bfloat16_array.py
@@ -0,0 +1,77 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import array
+
+from pyfory.bfloat16 import BFloat16
+
+
+class BFloat16Array:
+    def __init__(self, values=None):
+        if values is None:
+            self._data = array.array('H')
+        else:
+            self._data = array.array('H', [BFloat16(v).to_bits() if not isinstance(v, BFloat16) else v.to_bits() for v in values])
+    
+    def __len__(self):
+        return len(self._data)
+    
+    def __getitem__(self, index):
+        return BFloat16.from_bits(self._data[index])
+    
+    def __setitem__(self, index, value):
+        if isinstance(value, BFloat16):
+            self._data[index] = value.to_bits()
+        else:
+            self._data[index] = BFloat16(value).to_bits()
+    
+    def __iter__(self):
+        for bits in self._data:
+            yield BFloat16.from_bits(bits)
+    
+    def __repr__(self):
+        return f"BFloat16Array([{', '.join(str(bf16) for bf16 in self)}])"
+    
+    def __eq__(self, other):
+        if not isinstance(other, BFloat16Array):
+            return False
+        return self._data == other._data
+    
+    def append(self, value):
+        if isinstance(value, BFloat16):
+            self._data.append(value.to_bits())
+        else:
+            self._data.append(BFloat16(value).to_bits())
+    
+    def extend(self, values):
+        for value in values:
+            self.append(value)
+    
+    @property
+    def itemsize(self):
+        return 2
+    
+    def tobytes(self):
+        return self._data.tobytes()
+    
+    @classmethod
+    def frombytes(cls, data):
+        arr = cls()
+        arr._data = array.array('H')
+        arr._data.frombytes(data)
+        return arr
+
diff --git a/python/pyfory/buffer.pxd b/python/pyfory/buffer.pxd
index d6f02f133b..3b3fc77cd1 100644
--- a/python/pyfory/buffer.pxd
+++ b/python/pyfory/buffer.pxd
@@ -128,6 +128,8 @@ cdef class Buffer:
 
     cpdef inline write_float64(self, double value)
 
+    cpdef inline write_bfloat16(self, uint16_t value)
+
     cpdef inline skip(self, int32_t length)
 
     cpdef inline c_bool read_bool(self)
@@ -158,6 +160,8 @@ cdef class Buffer:
 
     cpdef inline double read_float64(self)
 
+    cpdef inline uint16_t read_bfloat16(self)
+
     cpdef inline write_varint64(self, int64_t v)
 
     cpdef inline write_var_uint64(self, int64_t v)
diff --git a/python/pyfory/buffer.pyx b/python/pyfory/buffer.pyx
index 3f8e0935c6..827ca0256a 100644
--- a/python/pyfory/buffer.pyx
+++ b/python/pyfory/buffer.pyx
@@ -244,6 +244,14 @@ cdef class Buffer:
     cpdef inline write_float64(self, double value):
         self.c_buffer.write_double(value)
 
+    cpdef inline write_bfloat16(self, uint16_t value):
+        self.c_buffer.write_uint16(value)
+
+    cpdef inline uint16_t read_bfloat16(self):
+        cdef uint16_t value = self.c_buffer.read_uint16(self._error)
+        self._raise_if_error()
+        return value
+
     cpdef put_buffer(self, uint32_t offset, v, int32_t src_index, int32_t length):
         if length == 0:  # access an emtpy buffer may raise out-of-bound exception.
             return
diff --git a/python/pyfory/codegen.py b/python/pyfory/codegen.py
index 820484de7c..6b9d72d684 100644
--- a/python/pyfory/codegen.py
+++ b/python/pyfory/codegen.py
@@ -58,6 +58,7 @@
         "write_nullable_pyfloat64",
         "read_nullable_pyfloat64",
     ),
+    "bfloat16": ("write_bfloat16", "read_bfloat16", "write_nullable_bfloat16", "read_nullable_bfloat16"),
 }
 
 
@@ -144,6 +145,8 @@ def compile_function(
         context["read_nullable_pyfloat64"] = serialization.read_nullable_pyfloat64
         context["write_nullable_pystr"] = serialization.write_nullable_pystr
         context["read_nullable_pystr"] = serialization.read_nullable_pystr
+        context["write_nullable_bfloat16"] = serialization.write_nullable_bfloat16
+        context["read_nullable_bfloat16"] = serialization.read_nullable_bfloat16
     stmts = [f"{ident(statement)}" for statement in stmts]
     # Sanitize the function name to ensure it is valid Python syntax
     sanitized_function_name = _sanitize_function_name(function_name)
diff --git a/python/pyfory/format/__init__.py b/python/pyfory/format/__init__.py
index 6c9fb205d8..d2732299af 100644
--- a/python/pyfory/format/__init__.py
+++ b/python/pyfory/format/__init__.py
@@ -36,6 +36,7 @@
         int32,
         int64,
         float16,
+        bfloat16,
         float32,
         float64,
         utf8,
diff --git a/python/pyfory/format/schema.pxi b/python/pyfory/format/schema.pxi
index 84c859cf12..7c9edf2759 100644
--- a/python/pyfory/format/schema.pxi
+++ b/python/pyfory/format/schema.pxi
@@ -34,7 +34,7 @@ from pyfory.includes.libformat cimport (
     CListType, CListTypePtr, CMapType, CMapTypePtr, CStructType, CStructTypePtr,
     CFixedWidthType, CDecimalType, CTypeId,
     boolean as c_boolean, int8 as c_int8, int16 as c_int16, int32 as c_int32,
-    int64 as c_int64, float16 as c_float16, float32 as c_float32,
+    int64 as c_int64, float16 as c_float16, bfloat16 as c_bfloat16, float32 as c_float32,
     float64 as c_float64, utf8 as c_utf8, binary as c_binary,
     duration as c_duration, timestamp as c_timestamp, date32 as c_date32,
     decimal as c_decimal, fory_list, struct_, fory_map, fory_field, fory_schema,
@@ -42,6 +42,7 @@ from pyfory.includes.libformat cimport (
 )
 
 
+
 # Create Python-accessible TypeId enum
 # The CTypeId enum from libformat.pxd is only accessible from Cython
 class TypeId:
@@ -417,6 +418,10 @@ def float16():
     """Create a 16-bit floating point type."""
     return DataType.wrap(c_float16())
 
+def bfloat16():
+    """Create a 16-bit brain floating point type."""
+    return DataType.wrap(c_bfloat16())
+
 def float32():
     """Create a 32-bit floating point type."""
     return DataType.wrap(c_float32())
diff --git a/python/pyfory/format/schema.py b/python/pyfory/format/schema.py
index 18baa20378..2abc9287a2 100644
--- a/python/pyfory/format/schema.py
+++ b/python/pyfory/format/schema.py
@@ -56,6 +56,8 @@ def arrow_type_to_fory_type_id(arrow_type):
     # Floating point types
     if pa_types.is_float16(arrow_type):
         return 17  # FLOAT16
+    if hasattr(pa_types, 'is_bfloat16') and pa_types.is_bfloat16(arrow_type):
+        return 18  # BFLOAT16
     if pa_types.is_float32(arrow_type):
         return 19  # FLOAT32
     if pa_types.is_float64(arrow_type):
@@ -116,6 +118,7 @@ def fory_type_id_to_arrow_type(type_id, precision=None, scale=None, list_type=No
         4: pa.int32(),  # INT32
         6: pa.int64(),  # INT64
         17: pa.float16(),  # FLOAT16
+        18: pa.float16(),  # BFLOAT16 (Arrow doesn't have native bfloat16, map to float16)
         19: pa.float32(),  # FLOAT32
         20: pa.float64(),  # FLOAT64
         21: pa.utf8(),  # STRING
diff --git a/python/pyfory/includes/libformat.pxd b/python/pyfory/includes/libformat.pxd
index 240cb0f44a..67cfd7289d 100755
--- a/python/pyfory/includes/libformat.pxd
+++ b/python/pyfory/includes/libformat.pxd
@@ -136,6 +136,9 @@ cdef extern from "fory/row/schema.h" namespace "fory::row" nogil:
     cdef cppclass CFloat16Type" fory::row::Float16Type"(CFixedWidthType):
         pass
 
+    cdef cppclass CBFloat16Type" fory::row::BFloat16Type"(CFixedWidthType):
+        pass
+
     cdef cppclass CFloat32Type" fory::row::Float32Type"(CFixedWidthType):
         pass
 
@@ -223,6 +226,7 @@ cdef extern from "fory/row/schema.h" namespace "fory::row" nogil:
     shared_ptr[CDataType] int32" fory::row::int32"()
     shared_ptr[CDataType] int64" fory::row::int64"()
     shared_ptr[CDataType] float16" fory::row::float16"()
+    shared_ptr[CDataType] bfloat16" fory::row::bfloat16"()
     shared_ptr[CDataType] float32" fory::row::float32"()
     shared_ptr[CDataType] float64" fory::row::float64"()
     shared_ptr[CDataType] utf8" fory::row::utf8"()
diff --git a/python/pyfory/primitive.pxi b/python/pyfory/primitive.pxi
index 829c9edb34..ea825712ad 100644
--- a/python/pyfory/primitive.pxi
+++ b/python/pyfory/primitive.pxi
@@ -204,6 +204,20 @@ cdef class Float64Serializer(XlangCompatibleSerializer):
         return buffer.read_double()
 
 
+@cython.final
+cdef class BFloat16Serializer(XlangCompatibleSerializer):
+    cpdef inline write(self, Buffer buffer, value):
+        from pyfory.bfloat16 import BFloat16
+        if isinstance(value, BFloat16):
+            buffer.write_bfloat16((<BFloat16>value).to_bits())
+        else:
+            buffer.write_bfloat16(BFloat16(value).to_bits())
+
+    cpdef inline read(self, Buffer buffer):
+        from pyfory.bfloat16 import BFloat16
+        return BFloat16.from_bits(buffer.read_bfloat16())
+
+
 @cython.final
 cdef class StringSerializer(XlangCompatibleSerializer):
     def __init__(self, fory, type_, track_ref=False):
diff --git a/python/pyfory/registry.py b/python/pyfory/registry.py
index 21038de295..23a25505a1 100644
--- a/python/pyfory/registry.py
+++ b/python/pyfory/registry.py
@@ -55,6 +55,7 @@
     TaggedUint64Serializer,
     Float32Serializer,
     Float64Serializer,
+    BFloat16Serializer,
     StringSerializer,
     DateSerializer,
     TimestampSerializer,
@@ -320,6 +321,13 @@ def _initialize_common(self):
             serializer=Float64Serializer,
         )
         register(float, type_id=TypeId.FLOAT64, serializer=Float64Serializer)
+        # BFloat16
+        from pyfory.bfloat16 import BFloat16
+        register(
+            BFloat16,
+            type_id=TypeId.BFLOAT16,
+            serializer=BFloat16Serializer,
+        )
         register(str, type_id=TypeId.STRING, serializer=StringSerializer)
         # TODO(chaokunyang) DURATION DECIMAL
         register(datetime.datetime, type_id=TypeId.TIMESTAMP, serializer=TimestampSerializer)
@@ -331,6 +339,14 @@ def _initialize_common(self):
                 type_id=typeid,
                 serializer=PyArraySerializer(self.fory, ftype, typeid),
             )
+        # Register BFloat16Array
+        from pyfory.bfloat16_array import BFloat16Array
+        from pyfory.serializer import BFloat16ArraySerializer
+        register(
+            BFloat16Array,
+            type_id=TypeId.BFLOAT16_ARRAY,
+            serializer=BFloat16ArraySerializer(self.fory, BFloat16Array, TypeId.BFLOAT16_ARRAY),
+            )
         if np:
             # overwrite pyarray  with same type id.
             # if pyarray are needed, one must annotate that value with XXXArrayType
diff --git a/python/pyfory/serialization.pyx b/python/pyfory/serialization.pyx
index 154c4cc8f5..12ee7c54bd 100644
--- a/python/pyfory/serialization.pyx
+++ b/python/pyfory/serialization.pyx
@@ -1790,6 +1790,24 @@ cpdef inline read_nullable_pystr(Buffer buffer):
     else:
         return None
 
+cpdef inline write_nullable_bfloat16(Buffer buffer, value):
+    if value is None:
+        buffer.write_int8(NULL_FLAG)
+    else:
+        buffer.write_int8(NOT_NULL_VALUE_FLAG)
+        from pyfory.bfloat16 import BFloat16
+        if isinstance(value, BFloat16):
+            buffer.write_bfloat16((<BFloat16>value).to_bits())
+        else:
+            buffer.write_bfloat16(BFloat16(value).to_bits())
+
+cpdef inline read_nullable_bfloat16(Buffer buffer):
+    if buffer.read_int8() == NOT_NULL_VALUE_FLAG:
+        from pyfory.bfloat16 import BFloat16
+        return BFloat16.from_bits(buffer.read_bfloat16())
+    else:
+        return None
+
 
 cdef class Serializer:
     """
diff --git a/python/pyfory/serializer.py b/python/pyfory/serializer.py
index 00b6a44ab3..b27739594f 100644
--- a/python/pyfory/serializer.py
+++ b/python/pyfory/serializer.py
@@ -65,6 +65,7 @@
         TaggedUint64Serializer,
         Float32Serializer,
         Float64Serializer,
+        BFloat16Serializer,
         StringSerializer,
         DateSerializer,
         TimestampSerializer,
@@ -284,6 +285,7 @@ def xread(self, buffer):
         TypeId.UINT64_ARRAY: "Q",
         TypeId.FLOAT32_ARRAY: "f",
         TypeId.FLOAT64_ARRAY: "d",
+        TypeId.BFLOAT16_ARRAY: "H",  # bfloat16 uses 'H' typecode (uint16)
     }
 )
 
@@ -418,6 +420,48 @@ def read(self, buffer):
         return self._serializer.read(buffer)
 
 
+class BFloat16ArraySerializer(XlangCompatibleSerializer):
+    def __init__(self, fory, ftype, type_id: int):
+        super().__init__(fory, ftype)
+        self.type_id = type_id
+        self.itemsize = 2
+    
+    def xwrite(self, buffer, value):
+        from pyfory.bfloat16_array import BFloat16Array
+        if isinstance(value, BFloat16Array):
+            arr_data = value._data
+        elif isinstance(value, array.array) and value.typecode == 'H':
+            arr_data = value
+        else:
+            arr_data = BFloat16Array(value)._data
+        nbytes = len(arr_data) * 2
+        buffer.write_var_uint32(nbytes)
+        if nbytes > 0:
+            if is_little_endian:
+                buffer.write_buffer(arr_data)
+            else:
+                swapped = array.array('H', arr_data)
+                swapped.byteswap()
+                buffer.write_buffer(swapped)
+    
+    def xread(self, buffer):
+        from pyfory.bfloat16_array import BFloat16Array
+        data = buffer.read_bytes_and_size()
+        arr = array.array('H', [])
+        arr.frombytes(data)
+        if not is_little_endian:
+            arr.byteswap()
+        bf16_arr = BFloat16Array.__new__(BFloat16Array)
+        bf16_arr._data = arr
+        return bf16_arr
+    
+    def write(self, buffer, value):
+        self.xwrite(buffer, value)
+    
+    def read(self, buffer):
+        return self.xread(buffer)
+
+
 if np:
     _np_dtypes_dict = (
         {
diff --git a/python/pyfory/tests/test_bfloat16.py b/python/pyfory/tests/test_bfloat16.py
new file mode 100644
index 0000000000..3fa3672374
--- /dev/null
+++ b/python/pyfory/tests/test_bfloat16.py
@@ -0,0 +1,128 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import math
+import pytest
+
+import pyfory
+from pyfory import Fory
+from pyfory.bfloat16 import BFloat16
+from pyfory.bfloat16_array import BFloat16Array
+from pyfory.types import TypeId
+
+
+def ser_de(fory, value):
+    data = fory.serialize(value)
+    return fory.deserialize(data)
+
+
+def test_bfloat16_basic():
+    bf16 = BFloat16(3.14)
+    assert isinstance(bf16, BFloat16)
+    assert bf16.to_float32() == pytest.approx(3.14, abs=0.01)
+    bits = bf16.to_bits()
+    assert BFloat16.from_bits(bits).to_bits() == bits
+
+
+def test_bfloat16_special_values():
+    assert BFloat16(float('nan')).is_nan()
+    assert BFloat16(float('inf')).is_inf()
+    assert BFloat16(float('-inf')).is_inf()
+    assert BFloat16(0.0).is_zero()
+    assert BFloat16(1.0).is_finite()
+    assert not BFloat16(1.0).is_nan()
+    assert not BFloat16(1.0).is_inf()
+
+
+def test_bfloat16_conversion():
+    assert BFloat16(0.0).to_float32() == 0.0
+    assert BFloat16(1.0).to_float32() == 1.0
+    assert BFloat16(-1.0).to_float32() == -1.0
+    assert BFloat16(3.14).to_float32() == pytest.approx(3.14, abs=0.01)
+    assert math.isnan(BFloat16(float('nan')).to_float32())
+    assert math.isinf(BFloat16(float('inf')).to_float32())
+    assert math.isinf(BFloat16(float('-inf')).to_float32())
+
+
+def test_bfloat16_serialization():
+    fory = Fory(xlang=True)
+    assert ser_de(fory, BFloat16(0.0)).to_bits() == BFloat16(0.0).to_bits()
+    assert ser_de(fory, BFloat16(1.0)).to_bits() == BFloat16(1.0).to_bits()
+    assert ser_de(fory, BFloat16(3.14)).to_bits() == BFloat16(3.14).to_bits()
+    assert ser_de(fory, BFloat16(float('inf'))).is_inf()
+    assert ser_de(fory, BFloat16(float('nan'))).is_nan()
+
+
+def test_bfloat16_array_basic():
+    arr = BFloat16Array([1.0, 2.0, 3.14])
+    assert len(arr) == 3
+    assert arr[0].to_float32() == pytest.approx(1.0)
+    arr[0] = BFloat16(5.0)
+    assert arr[0].to_float32() == pytest.approx(5.0)
+
+
+def test_bfloat16_array_serialization():
+    fory = Fory(xlang=True)
+    arr = BFloat16Array([1.0, 2.0, 3.14])
+    result = ser_de(fory, arr)
+    assert isinstance(result, BFloat16Array)
+    assert len(result) == 3
+    assert result[0].to_float32() == pytest.approx(1.0)
+
+
+def test_bfloat16_in_dataclass():
+    from dataclasses import dataclass
+    
+    @dataclass
+    class TestStruct:
+        value: BFloat16
+        arr: BFloat16Array
+    
+    fory = Fory(xlang=True)
+    fory.register_type(TestStruct)
+    obj = TestStruct(value=BFloat16(3.14), arr=BFloat16Array([1.0, 2.0]))
+    result = ser_de(fory, obj)
+    assert result.value.to_float32() == pytest.approx(3.14, abs=0.01)
+    assert len(result.arr) == 2
+
+
+def test_bfloat16_in_list():
+    fory = Fory(xlang=True)
+    values = [BFloat16(1.0), BFloat16(2.0)]
+    result = ser_de(fory, values)
+    assert len(result) == 2
+    assert result[0].to_float32() == pytest.approx(1.0)
+
+
+def test_bfloat16_in_map():
+    fory = Fory(xlang=True)
+    data = {"a": BFloat16(1.0), "b": BFloat16(2.0)}
+    result = ser_de(fory, data)
+    assert result["a"].to_float32() == pytest.approx(1.0)
+
+
+def test_bfloat16_type_registration():
+    fory = Fory(xlang=True)
+    type_info = fory.type_resolver.get_type_info(BFloat16)
+    assert type_info.type_id == TypeId.BFLOAT16
+
+
+def test_bfloat16_array_type_registration():
+    fory = Fory(xlang=True)
+    type_info = fory.type_resolver.get_type_info(BFloat16Array)
+    assert type_info.type_id == TypeId.BFLOAT16_ARRAY
+
diff --git a/python/pyfory/types.py b/python/pyfory/types.py
index 7f8f871dd8..20b20684d8 100644
--- a/python/pyfory/types.py
+++ b/python/pyfory/types.py
@@ -314,6 +314,7 @@ def get_primitive_type_size(type_id) -> int:
 uint64_array = TypeVar("uint64_array", bound=array.ArrayType)
 float32_array = TypeVar("float32_array", bound=array.ArrayType)
 float64_array = TypeVar("float64_array", bound=array.ArrayType)
+bfloat16_array = TypeVar("bfloat16_array", bound=array.ArrayType)
 BoolNDArrayType = TypeVar("BoolNDArrayType", bound=ndarray)
 Int8NDArrayType = TypeVar("Int8NDArrayType", bound=ndarray)
 Uint8NDArrayType = TypeVar("Uint8NDArrayType", bound=ndarray)
@@ -351,6 +352,7 @@ def get_primitive_type_size(type_id) -> int:
     uint64_array,
     float32_array,
     float64_array,
+    bfloat16_array,
 }
 _np_array_types = {
     BoolNDArrayType,
@@ -384,6 +386,7 @@ def is_py_array_type(type_) -> bool:
     TypeId.UINT64_ARRAY,
     TypeId.FLOAT32_ARRAY,
     TypeId.FLOAT64_ARRAY,
+    TypeId.BFLOAT16_ARRAY,
 }
 
 

From c89d86ef0fb87c1b4c5a752fa9e84b67aa36757a Mon Sep 17 00:00:00 2001
From: asadjan4611 <asadjan4611@gmail.com>
Date: Thu, 12 Feb 2026 11:59:54 +0500
Subject: [PATCH 02/23] style(python): fix code formatting for bfloat16
 implementation

- Change single quotes to double quotes (ruff format requirement)
- Remove trailing whitespace
- Add blank lines after imports (PEP 8)
- Remove unused import (pyfory)
- Fix closing parenthesis alignment
---
 python/pyfory/bfloat16_array.py      | 28 ++++++++++++++--------------
 python/pyfory/format/schema.py       |  2 +-
 python/pyfory/registry.py            |  4 +++-
 python/pyfory/serializer.py          | 16 +++++++++-------
 python/pyfory/tests/test_bfloat16.py | 22 ++++++++++------------
 5 files changed, 37 insertions(+), 35 deletions(-)

diff --git a/python/pyfory/bfloat16_array.py b/python/pyfory/bfloat16_array.py
index 06d6658407..d22c1d8834 100644
--- a/python/pyfory/bfloat16_array.py
+++ b/python/pyfory/bfloat16_array.py
@@ -23,55 +23,55 @@
 class BFloat16Array:
     def __init__(self, values=None):
         if values is None:
-            self._data = array.array('H')
+            self._data = array.array("H")
         else:
-            self._data = array.array('H', [BFloat16(v).to_bits() if not isinstance(v, BFloat16) else v.to_bits() for v in values])
-    
+            self._data = array.array("H", [BFloat16(v).to_bits() if not isinstance(v, BFloat16) else v.to_bits() for v in values])
+
     def __len__(self):
         return len(self._data)
-    
+
     def __getitem__(self, index):
         return BFloat16.from_bits(self._data[index])
-    
+
     def __setitem__(self, index, value):
         if isinstance(value, BFloat16):
             self._data[index] = value.to_bits()
         else:
             self._data[index] = BFloat16(value).to_bits()
-    
+
     def __iter__(self):
         for bits in self._data:
             yield BFloat16.from_bits(bits)
-    
+
     def __repr__(self):
         return f"BFloat16Array([{', '.join(str(bf16) for bf16 in self)}])"
-    
+
     def __eq__(self, other):
         if not isinstance(other, BFloat16Array):
             return False
         return self._data == other._data
-    
+
     def append(self, value):
         if isinstance(value, BFloat16):
             self._data.append(value.to_bits())
         else:
             self._data.append(BFloat16(value).to_bits())
-    
+
     def extend(self, values):
         for value in values:
             self.append(value)
-    
+
     @property
     def itemsize(self):
         return 2
-    
+
     def tobytes(self):
         return self._data.tobytes()
-    
+
     @classmethod
     def frombytes(cls, data):
         arr = cls()
-        arr._data = array.array('H')
+        arr._data = array.array("H")
         arr._data.frombytes(data)
         return arr
 
diff --git a/python/pyfory/format/schema.py b/python/pyfory/format/schema.py
index 2abc9287a2..ab93af7365 100644
--- a/python/pyfory/format/schema.py
+++ b/python/pyfory/format/schema.py
@@ -56,7 +56,7 @@ def arrow_type_to_fory_type_id(arrow_type):
     # Floating point types
     if pa_types.is_float16(arrow_type):
         return 17  # FLOAT16
-    if hasattr(pa_types, 'is_bfloat16') and pa_types.is_bfloat16(arrow_type):
+    if hasattr(pa_types, "is_bfloat16") and pa_types.is_bfloat16(arrow_type):
         return 18  # BFLOAT16
     if pa_types.is_float32(arrow_type):
         return 19  # FLOAT32
diff --git a/python/pyfory/registry.py b/python/pyfory/registry.py
index 23a25505a1..0976039b06 100644
--- a/python/pyfory/registry.py
+++ b/python/pyfory/registry.py
@@ -323,6 +323,7 @@ def _initialize_common(self):
         register(float, type_id=TypeId.FLOAT64, serializer=Float64Serializer)
         # BFloat16
         from pyfory.bfloat16 import BFloat16
+
         register(
             BFloat16,
             type_id=TypeId.BFLOAT16,
@@ -342,11 +343,12 @@ def _initialize_common(self):
         # Register BFloat16Array
         from pyfory.bfloat16_array import BFloat16Array
         from pyfory.serializer import BFloat16ArraySerializer
+
         register(
             BFloat16Array,
             type_id=TypeId.BFLOAT16_ARRAY,
             serializer=BFloat16ArraySerializer(self.fory, BFloat16Array, TypeId.BFLOAT16_ARRAY),
-            )
+        )
         if np:
             # overwrite pyarray  with same type id.
             # if pyarray are needed, one must annotate that value with XXXArrayType
diff --git a/python/pyfory/serializer.py b/python/pyfory/serializer.py
index b27739594f..74e1b2c7a2 100644
--- a/python/pyfory/serializer.py
+++ b/python/pyfory/serializer.py
@@ -425,12 +425,13 @@ def __init__(self, fory, ftype, type_id: int):
         super().__init__(fory, ftype)
         self.type_id = type_id
         self.itemsize = 2
-    
+
     def xwrite(self, buffer, value):
         from pyfory.bfloat16_array import BFloat16Array
+
         if isinstance(value, BFloat16Array):
             arr_data = value._data
-        elif isinstance(value, array.array) and value.typecode == 'H':
+        elif isinstance(value, array.array) and value.typecode == "H":
             arr_data = value
         else:
             arr_data = BFloat16Array(value)._data
@@ -440,24 +441,25 @@ def xwrite(self, buffer, value):
             if is_little_endian:
                 buffer.write_buffer(arr_data)
             else:
-                swapped = array.array('H', arr_data)
+                swapped = array.array("H", arr_data)
                 swapped.byteswap()
                 buffer.write_buffer(swapped)
-    
+
     def xread(self, buffer):
         from pyfory.bfloat16_array import BFloat16Array
+
         data = buffer.read_bytes_and_size()
-        arr = array.array('H', [])
+        arr = array.array("H", [])
         arr.frombytes(data)
         if not is_little_endian:
             arr.byteswap()
         bf16_arr = BFloat16Array.__new__(BFloat16Array)
         bf16_arr._data = arr
         return bf16_arr
-    
+
     def write(self, buffer, value):
         self.xwrite(buffer, value)
-    
+
     def read(self, buffer):
         return self.xread(buffer)
 
diff --git a/python/pyfory/tests/test_bfloat16.py b/python/pyfory/tests/test_bfloat16.py
index 3fa3672374..b788690aa3 100644
--- a/python/pyfory/tests/test_bfloat16.py
+++ b/python/pyfory/tests/test_bfloat16.py
@@ -18,7 +18,6 @@
 import math
 import pytest
 
-import pyfory
 from pyfory import Fory
 from pyfory.bfloat16 import BFloat16
 from pyfory.bfloat16_array import BFloat16Array
@@ -39,9 +38,9 @@ def test_bfloat16_basic():
 
 
 def test_bfloat16_special_values():
-    assert BFloat16(float('nan')).is_nan()
-    assert BFloat16(float('inf')).is_inf()
-    assert BFloat16(float('-inf')).is_inf()
+    assert BFloat16(float("nan")).is_nan()
+    assert BFloat16(float("inf")).is_inf()
+    assert BFloat16(float("-inf")).is_inf()
     assert BFloat16(0.0).is_zero()
     assert BFloat16(1.0).is_finite()
     assert not BFloat16(1.0).is_nan()
@@ -53,9 +52,9 @@ def test_bfloat16_conversion():
     assert BFloat16(1.0).to_float32() == 1.0
     assert BFloat16(-1.0).to_float32() == -1.0
     assert BFloat16(3.14).to_float32() == pytest.approx(3.14, abs=0.01)
-    assert math.isnan(BFloat16(float('nan')).to_float32())
-    assert math.isinf(BFloat16(float('inf')).to_float32())
-    assert math.isinf(BFloat16(float('-inf')).to_float32())
+    assert math.isnan(BFloat16(float("nan")).to_float32())
+    assert math.isinf(BFloat16(float("inf")).to_float32())
+    assert math.isinf(BFloat16(float("-inf")).to_float32())
 
 
 def test_bfloat16_serialization():
@@ -63,8 +62,8 @@ def test_bfloat16_serialization():
     assert ser_de(fory, BFloat16(0.0)).to_bits() == BFloat16(0.0).to_bits()
     assert ser_de(fory, BFloat16(1.0)).to_bits() == BFloat16(1.0).to_bits()
     assert ser_de(fory, BFloat16(3.14)).to_bits() == BFloat16(3.14).to_bits()
-    assert ser_de(fory, BFloat16(float('inf'))).is_inf()
-    assert ser_de(fory, BFloat16(float('nan'))).is_nan()
+    assert ser_de(fory, BFloat16(float("inf"))).is_inf()
+    assert ser_de(fory, BFloat16(float("nan"))).is_nan()
 
 
 def test_bfloat16_array_basic():
@@ -86,12 +85,12 @@ def test_bfloat16_array_serialization():
 
 def test_bfloat16_in_dataclass():
     from dataclasses import dataclass
-    
+
     @dataclass
     class TestStruct:
         value: BFloat16
         arr: BFloat16Array
-    
+
     fory = Fory(xlang=True)
     fory.register_type(TestStruct)
     obj = TestStruct(value=BFloat16(3.14), arr=BFloat16Array([1.0, 2.0]))
@@ -125,4 +124,3 @@ def test_bfloat16_array_type_registration():
     fory = Fory(xlang=True)
     type_info = fory.type_resolver.get_type_info(BFloat16Array)
     assert type_info.type_id == TypeId.BFLOAT16_ARRAY
-

From b2bb7c64692b06100289ff5fe160cf230d81be30 Mon Sep 17 00:00:00 2001
From: asadjan4611 <asadjan4611@gmail.com>
Date: Thu, 12 Feb 2026 12:08:06 +0500
Subject: [PATCH 03/23] fix(python): remove trailing newline in
 bfloat16_array.py to match code style

---
 python/pyfory/bfloat16_array.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/python/pyfory/bfloat16_array.py b/python/pyfory/bfloat16_array.py
index d22c1d8834..e1ffc5b4a7 100644
--- a/python/pyfory/bfloat16_array.py
+++ b/python/pyfory/bfloat16_array.py
@@ -74,4 +74,3 @@ def frombytes(cls, data):
         arr._data = array.array("H")
         arr._data.frombytes(data)
         return arr
-

From 6537f74e76c30f43c17e209425246d96e2bf6ba2 Mon Sep 17 00:00:00 2001
From: asadjan4611 <asadjan4611@gmail.com>
Date: Thu, 12 Feb 2026 12:22:24 +0500
Subject: [PATCH 04/23] fix(python): resolve Cython compilation errors for
 bfloat16

- Remove invalid Cython type casts (<BFloat16>) in serialization.pyx and primitive.pxi
- Use isinstance() check instead of type casting for Python classes
- Fix bfloat16() function to use float16() as temporary workaround until C++ support is added
- Comment out bfloat16() declaration in libformat.pxd with TODO for future C++ implementation
---
 python/pyfory/format/schema.pxi      | 6 ++++--
 python/pyfory/includes/libformat.pxd | 3 ++-
 python/pyfory/primitive.pxi          | 2 +-
 python/pyfory/serialization.pyx      | 2 +-
 4 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/python/pyfory/format/schema.pxi b/python/pyfory/format/schema.pxi
index 7c9edf2759..b155b19d17 100644
--- a/python/pyfory/format/schema.pxi
+++ b/python/pyfory/format/schema.pxi
@@ -34,7 +34,7 @@ from pyfory.includes.libformat cimport (
     CListType, CListTypePtr, CMapType, CMapTypePtr, CStructType, CStructTypePtr,
     CFixedWidthType, CDecimalType, CTypeId,
     boolean as c_boolean, int8 as c_int8, int16 as c_int16, int32 as c_int32,
-    int64 as c_int64, float16 as c_float16, bfloat16 as c_bfloat16, float32 as c_float32,
+    int64 as c_int64, float16 as c_float16, float32 as c_float32,
     float64 as c_float64, utf8 as c_utf8, binary as c_binary,
     duration as c_duration, timestamp as c_timestamp, date32 as c_date32,
     decimal as c_decimal, fory_list, struct_, fory_map, fory_field, fory_schema,
@@ -420,7 +420,9 @@ def float16():
 
 def bfloat16():
     """Create a 16-bit brain floating point type."""
-    return DataType.wrap(c_bfloat16())
+    # TODO: Use c_bfloat16() when C++ row format supports bfloat16
+    # For now, use float16 as a temporary workaround since C++ doesn't have bfloat16() yet
+    return DataType.wrap(c_float16())
 
 def float32():
     """Create a 32-bit floating point type."""
diff --git a/python/pyfory/includes/libformat.pxd b/python/pyfory/includes/libformat.pxd
index 67cfd7289d..d579ac67c7 100755
--- a/python/pyfory/includes/libformat.pxd
+++ b/python/pyfory/includes/libformat.pxd
@@ -226,7 +226,8 @@ cdef extern from "fory/row/schema.h" namespace "fory::row" nogil:
     shared_ptr[CDataType] int32" fory::row::int32"()
     shared_ptr[CDataType] int64" fory::row::int64"()
     shared_ptr[CDataType] float16" fory::row::float16"()
-    shared_ptr[CDataType] bfloat16" fory::row::bfloat16"()
+    # TODO: Uncomment when C++ row format supports bfloat16
+    # shared_ptr[CDataType] bfloat16" fory::row::bfloat16"()
     shared_ptr[CDataType] float32" fory::row::float32"()
     shared_ptr[CDataType] float64" fory::row::float64"()
     shared_ptr[CDataType] utf8" fory::row::utf8"()
diff --git a/python/pyfory/primitive.pxi b/python/pyfory/primitive.pxi
index ea825712ad..f3ea3df02b 100644
--- a/python/pyfory/primitive.pxi
+++ b/python/pyfory/primitive.pxi
@@ -209,7 +209,7 @@ cdef class BFloat16Serializer(XlangCompatibleSerializer):
     cpdef inline write(self, Buffer buffer, value):
         from pyfory.bfloat16 import BFloat16
         if isinstance(value, BFloat16):
-            buffer.write_bfloat16((<BFloat16>value).to_bits())
+            buffer.write_bfloat16(value.to_bits())
         else:
             buffer.write_bfloat16(BFloat16(value).to_bits())
 
diff --git a/python/pyfory/serialization.pyx b/python/pyfory/serialization.pyx
index 12ee7c54bd..795ccd5899 100644
--- a/python/pyfory/serialization.pyx
+++ b/python/pyfory/serialization.pyx
@@ -1797,7 +1797,7 @@ cpdef inline write_nullable_bfloat16(Buffer buffer, value):
         buffer.write_int8(NOT_NULL_VALUE_FLAG)
         from pyfory.bfloat16 import BFloat16
         if isinstance(value, BFloat16):
-            buffer.write_bfloat16((<BFloat16>value).to_bits())
+            buffer.write_bfloat16(value.to_bits())
         else:
             buffer.write_bfloat16(BFloat16(value).to_bits())
 

From b6793a5d477ac7ab7c5ee740450e99cc5a67fef1 Mon Sep 17 00:00:00 2001
From: asadjan4611 <asadjan4611@gmail.com>
Date: Fri, 13 Feb 2026 10:25:09 +0500
Subject: [PATCH 05/23] fix(python): use memcpy for safe type punning in
 bfloat16 conversion

Replace unsafe pointer casts with memcpy to ensure cross-platform
compatibility across all OS versions (Windows, Linux, macOS) and
architectures (x86_64, ARM). This fixes strict aliasing violations
that cause compilation failures on ARM and newer compilers.
---
 python/pyfory/bfloat16.pyx | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/python/pyfory/bfloat16.pyx b/python/pyfory/bfloat16.pyx
index feabbf2332..9d705733d0 100644
--- a/python/pyfory/bfloat16.pyx
+++ b/python/pyfory/bfloat16.pyx
@@ -20,9 +20,11 @@
 # cython: language_level = 3
 
 from libc.stdint cimport uint16_t, uint32_t
+from libc.string cimport memcpy
 
 cdef inline uint16_t float32_to_bfloat16_bits(float value) nogil:
-    cdef uint32_t f32_bits = (<uint32_t*>&value)[0]
+    cdef uint32_t f32_bits
+    memcpy(&f32_bits, &value, sizeof(float))
     cdef uint16_t bf16_bits = <uint16_t>(f32_bits >> 16)
     cdef uint16_t truncated = <uint16_t>(f32_bits & 0xFFFF)
     if truncated > 0x8000:
@@ -37,7 +39,9 @@ cdef inline uint16_t float32_to_bfloat16_bits(float value) nogil:
 
 cdef inline float bfloat16_bits_to_float32(uint16_t bits) nogil:
     cdef uint32_t f32_bits = <uint32_t>bits << 16
-    return (<float*>&f32_bits)[0]
+    cdef float result
+    memcpy(&result, &f32_bits, sizeof(float))
+    return result
 
 
 cdef class BFloat16:

From e38733930ba6a5bc660d00ea6d3019c5956da621 Mon Sep 17 00:00:00 2001
From: asadjan4611 <asadjan4611@gmail.com>
Date: Fri, 13 Feb 2026 10:46:22 +0500
Subject: [PATCH 06/23] fix(python): use explicit size constant for ARM
 compatibility

Replace sizeof(float) with explicit constant 4 in memcpy calls to ensure
cross-platform compatibility, especially on ARM architectures where
sizeof() may cause compilation issues. This matches the project's
pattern of using explicit size constants (as seen in types.py).

Fixes build failures on:
- ubuntu-24.04-arm (aarch64)
- macos-arm64 (Apple Silicon)
- ubuntu-24.04-arm with Python 3.13
---
 python/pyfory/bfloat16.pyx | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/python/pyfory/bfloat16.pyx b/python/pyfory/bfloat16.pyx
index 9d705733d0..d4eb2446e6 100644
--- a/python/pyfory/bfloat16.pyx
+++ b/python/pyfory/bfloat16.pyx
@@ -24,7 +24,7 @@ from libc.string cimport memcpy
 
 cdef inline uint16_t float32_to_bfloat16_bits(float value) nogil:
     cdef uint32_t f32_bits
-    memcpy(&f32_bits, &value, sizeof(float))
+    memcpy(&f32_bits, &value, 4)
     cdef uint16_t bf16_bits = <uint16_t>(f32_bits >> 16)
     cdef uint16_t truncated = <uint16_t>(f32_bits & 0xFFFF)
     if truncated > 0x8000:
@@ -40,7 +40,7 @@ cdef inline uint16_t float32_to_bfloat16_bits(float value) nogil:
 cdef inline float bfloat16_bits_to_float32(uint16_t bits) nogil:
     cdef uint32_t f32_bits = <uint32_t>bits << 16
     cdef float result
-    memcpy(&result, &f32_bits, sizeof(float))
+    memcpy(&result, &f32_bits, 4)
     return result
 
 

From 71241baa122bd4d0cb5ddf3aac6a4ea2cea45dd4 Mon Sep 17 00:00:00 2001
From: asadjan4611 <asadjan4611@gmail.com>
Date: Fri, 13 Feb 2026 11:09:44 +0500
Subject: [PATCH 07/23] fix(python): correct row schema Arrow conversion type
 ids

---
 python/pyfory/format/schema.py | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/python/pyfory/format/schema.py b/python/pyfory/format/schema.py
index ab93af7365..68309b15c0 100644
--- a/python/pyfory/format/schema.py
+++ b/python/pyfory/format/schema.py
@@ -207,17 +207,17 @@ def fory_field_list_to_arrow_schema(field_list):
         nullable = field_spec.get("nullable", True)
 
         # Handle nested types
-        if type_id == 21:  # LIST
+        if type_id == 22:  # LIST
             value_type = field_spec.get("value_type")
             arrow_type = pa.list_(value_type)
-        elif type_id == 23:  # MAP
+        elif type_id == 24:  # MAP
             key_type = field_spec.get("key_type")
             item_type = field_spec.get("item_type")
             arrow_type = pa.map_(key_type, item_type)
-        elif type_id == 15:  # STRUCT
+        elif type_id == 27:  # STRUCT
             struct_fields = field_spec.get("struct_fields", [])
             arrow_type = pa.struct(struct_fields)
-        elif type_id == 27:  # DECIMAL
+        elif type_id == 40:  # DECIMAL
             precision = field_spec.get("precision", 38)
             scale = field_spec.get("scale", 18)
             arrow_type = pa.decimal128(precision, scale)
@@ -277,20 +277,20 @@ def reconstruct_arrow_type(spec):
     """
     type_id = spec["type_id"]
 
-    if type_id == 21:  # LIST
+    if type_id == 22:  # LIST
         value_type = reconstruct_arrow_type(spec["value_type"])
         return pa.list_(value_type)
-    elif type_id == 23:  # MAP
+    elif type_id == 24:  # MAP
         key_type = reconstruct_arrow_type(spec["key_type"])
         item_type = reconstruct_arrow_type(spec["item_type"])
         return pa.map_(key_type, item_type)
-    elif type_id == 15:  # STRUCT
+    elif type_id == 27:  # STRUCT
         fields = []
         for field_spec in spec["fields"]:
             field_type = reconstruct_arrow_type(field_spec["type"])
             fields.append(pa.field(field_spec["name"], field_type, nullable=field_spec.get("nullable", True)))
         return pa.struct(fields)
-    elif type_id == 27:  # DECIMAL
+    elif type_id == 40:  # DECIMAL
         return pa.decimal128(spec.get("precision", 38), spec.get("scale", 18))
     else:
         return fory_type_id_to_arrow_type(type_id)

From 8066aceae4456e179da16fdbd118e2e56fb0296a Mon Sep 17 00:00:00 2001
From: asadjan4611 <asadjan4611@gmail.com>
Date: Fri, 13 Feb 2026 12:12:19 +0500
Subject: [PATCH 08/23] fix(python): build and export bfloat16 python module

---
 BUILD                  | 14 ++++++++++++++
 python/pyfory/types.py |  1 +
 2 files changed, 15 insertions(+)

diff --git a/BUILD b/BUILD
index 809727199e..9d8433f75f 100644
--- a/BUILD
+++ b/BUILD
@@ -70,6 +70,17 @@ pyx_library(
     ],
 )
 
+pyx_library(
+    name = "bfloat16",
+    srcs = glob([
+        "python/pyfory/bfloat16.pyx",
+        "python/pyfory/__init__.py",
+    ]),
+    cc_kwargs = dict(
+        linkstatic = 1,
+    ),
+)
+
 pyx_library(
     name = "_format",
     srcs = glob(
@@ -96,6 +107,7 @@ genrule(
     name = "cp_fory_so",
     srcs = [
         ":python/pyfory/buffer.so",
+        ":python/pyfory/bfloat16.so",
         ":python/pyfory/lib/mmh3/mmh3.so",
         ":python/pyfory/format/_format.so",
         ":python/pyfory/serialization.so",
@@ -111,11 +123,13 @@ genrule(
         if [ "$${u_name: 0: 4}" == "MING" ] || [ "$${u_name: 0: 4}" == "MSYS" ]
         then
             cp -f $(location python/pyfory/buffer.so) "$$WORK_DIR/python/pyfory/buffer.pyd"
+            cp -f $(location python/pyfory/bfloat16.so) "$$WORK_DIR/python/pyfory/bfloat16.pyd"
             cp -f $(location python/pyfory/lib/mmh3/mmh3.so) "$$WORK_DIR/python/pyfory/lib/mmh3/mmh3.pyd"
             cp -f $(location python/pyfory/format/_format.so) "$$WORK_DIR/python/pyfory/format/_format.pyd"
             cp -f $(location python/pyfory/serialization.so) "$$WORK_DIR/python/pyfory/serialization.pyd"
         else
             cp -f $(location python/pyfory/buffer.so) "$$WORK_DIR/python/pyfory"
+            cp -f $(location python/pyfory/bfloat16.so) "$$WORK_DIR/python/pyfory"
             cp -f $(location python/pyfory/lib/mmh3/mmh3.so) "$$WORK_DIR/python/pyfory/lib/mmh3"
             cp -f $(location python/pyfory/format/_format.so) "$$WORK_DIR/python/pyfory/format"
             cp -f $(location python/pyfory/serialization.so) "$$WORK_DIR/python/pyfory"
diff --git a/python/pyfory/types.py b/python/pyfory/types.py
index 20b20684d8..db4333a83b 100644
--- a/python/pyfory/types.py
+++ b/python/pyfory/types.py
@@ -198,6 +198,7 @@ def is_type_share_meta(type_id: int) -> bool:
 tagged_uint64 = TypeVar("tagged_uint64", bound=int)
 float32 = TypeVar("float32", bound=float)
 float64 = TypeVar("float64", bound=float)
+bfloat16 = TypeVar("bfloat16", bound=float)
 
 
 class RefMeta:

From 8ed6f57a2d711eaea4a9f58f16177122f016512a Mon Sep 17 00:00:00 2001
From: asadjan4611 <asadjan4611@gmail.com>
Date: Fri, 13 Feb 2026 12:33:04 +0500
Subject: [PATCH 09/23] docs(python): document bfloat16 support and stabilize
 pure mode serializers

---
 python/README.md             | 28 ++++++++++++++++++++++++++++
 python/pyfory/_serializer.py | 15 +++++++++++++++
 python/pyfory/registry.py    | 36 +++++++++++++++++++++---------------
 python/pyfory/serializer.py  |  2 ++
 4 files changed, 66 insertions(+), 15 deletions(-)

diff --git a/python/README.md b/python/README.md
index b74b3fd47a..0377f64d25 100644
--- a/python/README.md
+++ b/python/README.md
@@ -490,6 +490,34 @@ fory.register(Person.class, "example.Person");
 Person person = (Person) fory.deserialize(binaryData);
 ```
 
+### BFloat16 Support
+
+`pyfory` supports `bfloat16` scalar values and `bfloat16` arrays in xlang mode:
+
+- Scalar type: `pyfory.BFloat16` (type id `18`)
+- Array type: `pyfory.BFloat16Array` (type id `54`)
+
+```python
+import pyfory
+from pyfory import BFloat16, BFloat16Array
+
+fory = pyfory.Fory(xlang=True, ref=False, strict=True)
+
+# Scalar bfloat16
+v = BFloat16(3.1415926)
+data = fory.serialize(v)
+out = fory.deserialize(data)
+print(float(out))
+
+# bfloat16 array
+arr = BFloat16Array([1.0, 2.5, -3.25])
+data = fory.serialize(arr)
+out = fory.deserialize(data)
+print(out)
+```
+
+`BFloat16Array` stores values in a packed `array('H')` representation and writes bytes in little-endian order for cross-language compatibility.
+
 ## 📊 Row Format - Zero-Copy Processing
 
 Apache Fury™ provides a random-access row format that enables reading nested fields from binary data without full deserialization. This drastically reduces overhead when working with large objects where only partial data access is needed. The format also supports memory-mapped files for ultra-low memory footprint.
diff --git a/python/pyfory/_serializer.py b/python/pyfory/_serializer.py
index 361f2ec148..2a6bf746c2 100644
--- a/python/pyfory/_serializer.py
+++ b/python/pyfory/_serializer.py
@@ -255,6 +255,21 @@ def read(self, buffer):
         return buffer.read_double()
 
 
+class BFloat16Serializer(XlangCompatibleSerializer):
+    def write(self, buffer, value):
+        from pyfory.bfloat16 import BFloat16
+
+        if isinstance(value, BFloat16):
+            buffer.write_bfloat16(value.to_bits())
+        else:
+            buffer.write_bfloat16(BFloat16(value).to_bits())
+
+    def read(self, buffer):
+        from pyfory.bfloat16 import BFloat16
+
+        return BFloat16.from_bits(buffer.read_bfloat16())
+
+
 class StringSerializer(XlangCompatibleSerializer):
     def __init__(self, fory, type_):
         super().__init__(fory, type_)
diff --git a/python/pyfory/registry.py b/python/pyfory/registry.py
index 0976039b06..9b08b37616 100644
--- a/python/pyfory/registry.py
+++ b/python/pyfory/registry.py
@@ -321,14 +321,17 @@ def _initialize_common(self):
             serializer=Float64Serializer,
         )
         register(float, type_id=TypeId.FLOAT64, serializer=Float64Serializer)
-        # BFloat16
-        from pyfory.bfloat16 import BFloat16
+        # BFloat16 is optional if the extension module is unavailable.
+        try:
+            from pyfory.bfloat16 import BFloat16
 
-        register(
-            BFloat16,
-            type_id=TypeId.BFLOAT16,
-            serializer=BFloat16Serializer,
-        )
+            register(
+                BFloat16,
+                type_id=TypeId.BFLOAT16,
+                serializer=BFloat16Serializer,
+            )
+        except ImportError:
+            pass
         register(str, type_id=TypeId.STRING, serializer=StringSerializer)
         # TODO(chaokunyang) DURATION DECIMAL
         register(datetime.datetime, type_id=TypeId.TIMESTAMP, serializer=TimestampSerializer)
@@ -340,15 +343,18 @@ def _initialize_common(self):
                 type_id=typeid,
                 serializer=PyArraySerializer(self.fory, ftype, typeid),
             )
-        # Register BFloat16Array
-        from pyfory.bfloat16_array import BFloat16Array
-        from pyfory.serializer import BFloat16ArraySerializer
+        # BFloat16Array is optional if the extension module is unavailable.
+        try:
+            from pyfory.bfloat16_array import BFloat16Array
+            from pyfory.serializer import BFloat16ArraySerializer
 
-        register(
-            BFloat16Array,
-            type_id=TypeId.BFLOAT16_ARRAY,
-            serializer=BFloat16ArraySerializer(self.fory, BFloat16Array, TypeId.BFLOAT16_ARRAY),
-        )
+            register(
+                BFloat16Array,
+                type_id=TypeId.BFLOAT16_ARRAY,
+                serializer=BFloat16ArraySerializer(self.fory, BFloat16Array, TypeId.BFLOAT16_ARRAY),
+            )
+        except ImportError:
+            pass
         if np:
             # overwrite pyarray  with same type id.
             # if pyarray are needed, one must annotate that value with XXXArrayType
diff --git a/python/pyfory/serializer.py b/python/pyfory/serializer.py
index 74e1b2c7a2..abad1dbd54 100644
--- a/python/pyfory/serializer.py
+++ b/python/pyfory/serializer.py
@@ -102,6 +102,7 @@
         TaggedUint64Serializer,
         Float32Serializer,
         Float64Serializer,
+        BFloat16Serializer,
         StringSerializer,
         DateSerializer,
         TimestampSerializer,
@@ -1449,6 +1450,7 @@ def xread(self, buffer):
     "TaggedUint64Serializer",
     "Float32Serializer",
     "Float64Serializer",
+    "BFloat16Serializer",
     "StringSerializer",
     "DateSerializer",
     "TimestampSerializer",

From 9fcb74dd379e1a74e0b7b7bce11c94fd4b9d83bb Mon Sep 17 00:00:00 2001
From: asadjan4611 <asadjan4611@gmail.com>
Date: Fri, 13 Feb 2026 20:51:23 +0500
Subject: [PATCH 10/23] fix(python): configure bazel shell for windows editable
 builds

---
 python/setup.py | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

diff --git a/python/setup.py b/python/setup.py
index 6dc32ade8a..70b7dfce8b 100644
--- a/python/setup.py
+++ b/python/setup.py
@@ -41,10 +41,33 @@
 print(f"fory_cpp_src_dir: {fory_cpp_src_dir}")
 
 
+def _configure_bazel_shell_for_windows():
+    if os.name != "nt":
+        return
+    # Bazel genrules require a POSIX shell; prefer Git Bash on Windows.
+    candidates = []
+    for env_key in ("BAZEL_SH", "GIT_BASH", "BASH"):
+        value = os.environ.get(env_key)
+        if value:
+            candidates.append(value)
+    program_files = [os.environ.get("ProgramFiles"), os.environ.get("ProgramFiles(x86)")]
+    for base in program_files:
+        if not base:
+            continue
+        candidates.append(pjoin(base, "Git", "bin", "bash.exe"))
+        candidates.append(pjoin(base, "Git", "usr", "bin", "bash.exe"))
+    for path in candidates:
+        if os.path.exists(path):
+            os.environ["BAZEL_SH"] = path
+            print(f"Using BAZEL_SH={path}")
+            return
+
+
 class BinaryDistribution(Distribution):
     def __init__(self, attrs=None):
         super().__init__(attrs=attrs)
         if BAZEL_BUILD_EXT:
+            _configure_bazel_shell_for_windows()
             import sys
 
             python_version = f"{sys.version_info.major}.{sys.version_info.minor}"

From 102acfb58e8168a1890b89fe04a83fefb845d0b6 Mon Sep 17 00:00:00 2001
From: asadjan4611 <asadjan4611@gmail.com>
Date: Sat, 21 Feb 2026 00:23:54 +0500
Subject: [PATCH 11/23] fix(python): restore xlang serializer base after
 conflict merge

---
 python/pyfory/_serializer.py | 14 +++++++++++---
 python/pyfory/serializer.py  |  2 ++
 2 files changed, 13 insertions(+), 3 deletions(-)

diff --git a/python/pyfory/_serializer.py b/python/pyfory/_serializer.py
index c2a1d450f9..2052c0239a 100644
--- a/python/pyfory/_serializer.py
+++ b/python/pyfory/_serializer.py
@@ -52,6 +52,17 @@ def support_subclass(cls) -> bool:
         return False
 
 
+class XlangCompatibleSerializer(Serializer):
+    def __init__(self, fory, type_):
+        super().__init__(fory, type_)
+
+    def xwrite(self, buffer, value):
+        self.write(buffer, value)
+
+    def xread(self, buffer):
+        return self.read(buffer)
+
+
 class BooleanSerializer(Serializer):
     def write(self, buffer, value):
         buffer.write_bool(value)
@@ -232,7 +243,6 @@ def read(self, buffer):
         return buffer.read_double()
 
 
-
 class BFloat16Serializer(XlangCompatibleSerializer):
     def write(self, buffer, value):
         from pyfory.bfloat16 import BFloat16
@@ -248,9 +258,7 @@ def read(self, buffer):
         return BFloat16.from_bits(buffer.read_bfloat16())
 
 
-
 class StringSerializer(Serializer):
-  
     def __init__(self, fory, type_):
         super().__init__(fory, type_)
         self.need_to_write_ref = False
diff --git a/python/pyfory/serializer.py b/python/pyfory/serializer.py
index 896bc9e3a3..991b71a0b0 100644
--- a/python/pyfory/serializer.py
+++ b/python/pyfory/serializer.py
@@ -45,6 +45,7 @@
 if ENABLE_FORY_CYTHON_SERIALIZATION:
     from pyfory.serialization import (  # noqa: F401, F811
         Serializer,
+        XlangCompatibleSerializer,
         BooleanSerializer,
         ByteSerializer,
         Int16Serializer,
@@ -81,6 +82,7 @@
 else:
     from pyfory._serializer import (  # noqa: F401 # pylint: disable=unused-import
         Serializer,
+        XlangCompatibleSerializer,
         BooleanSerializer,
         ByteSerializer,
         Int16Serializer,

From bedb82e010d7fb2e62900ac42f78aace0a7448e8 Mon Sep 17 00:00:00 2001
From: asadjan4611 <asadjan4611@gmail.com>
Date: Sat, 21 Feb 2026 12:37:28 +0500
Subject: [PATCH 12/23] fix(python): restore xlang serializer base in cython
 runtime

---
 python/pyfory/serialization.pyx | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/python/pyfory/serialization.pyx b/python/pyfory/serialization.pyx
index bbed04963a..f400f97765 100644
--- a/python/pyfory/serialization.pyx
+++ b/python/pyfory/serialization.pyx
@@ -1772,6 +1772,17 @@ cdef class Serializer:
         return False
 
 
+cdef class XlangCompatibleSerializer(Serializer):
+    def __init__(self, fory, type_):
+        super().__init__(fory, type_)
+
+    cpdef xwrite(self, Buffer buffer, value):
+        self.write(buffer, value)
+
+    cpdef xread(self, Buffer buffer):
+        return self.read(buffer)
+
+
 @cython.final
 cdef class EnumSerializer(Serializer):
     cdef tuple _members

From 2f2b6b4f362c44f6a09ef458d0a9c207148ad98c Mon Sep 17 00:00:00 2001
From: asadjan4611 <asadjan4611@gmail.com>
Date: Sat, 21 Feb 2026 19:57:18 +0500
Subject: [PATCH 13/23] fix(python): align bfloat16 serializers with unified
 API and typed buffer reads

---
 python/pyfory/__init__.py            |  10 +-
 python/pyfory/_serializer.py         | 776 ++++++++++++++-------------
 python/pyfory/bfloat16.pxd           |  22 +
 python/pyfory/bfloat16.pyx           |  22 +-
 python/pyfory/bfloat16_array.py      |  16 +-
 python/pyfory/buffer.pxd             |   3 +-
 python/pyfory/buffer.pyx             |   8 +-
 python/pyfory/primitive.pxi          | 593 ++++++++++----------
 python/pyfory/registry.py            |  77 ++-
 python/pyfory/serialization.pyx      | 252 ++++++---
 python/pyfory/serializer.py          | 168 +++---
 python/pyfory/tests/test_bfloat16.py |  58 +-
 12 files changed, 1088 insertions(+), 917 deletions(-)
 create mode 100644 python/pyfory/bfloat16.pxd

diff --git a/python/pyfory/__init__.py b/python/pyfory/__init__.py
index a5e05952ae..bdac49f515 100644
--- a/python/pyfory/__init__.py
+++ b/python/pyfory/__init__.py
@@ -122,12 +122,10 @@
 from pyfory.buffer import Buffer  # noqa: F401 # pylint: disable=unused-import
 
 # BFloat16 support
-try:
-    from pyfory.bfloat16 import BFloat16  # noqa: F401
-    from pyfory.bfloat16_array import BFloat16Array  # noqa: F401
-except ImportError:
-    # Cython extensions not built yet
-    pass
+from pyfory.bfloat16 import bfloat16  # noqa: F401
+from pyfory.bfloat16_array import BFloat16Array  # noqa: F401
+# Keep compatibility with existing API naming.
+BFloat16 = bfloat16
 
 __version__ = "0.16.0.dev0"
 
diff --git a/python/pyfory/_serializer.py b/python/pyfory/_serializer.py
index 2052c0239a..c18326c83e 100644
--- a/python/pyfory/_serializer.py
+++ b/python/pyfory/_serializer.py
@@ -1,383 +1,393 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-import datetime
-import logging
-import platform
-import time
-from abc import ABC
-
-from pyfory._fory import NOT_NULL_INT64_FLAG
-from pyfory.resolver import NOT_NULL_VALUE_FLAG, NULL_FLAG
-from pyfory.types import is_primitive_type
-
-try:
-    import numpy as np
-except ImportError:
-    np = None
-
-logger = logging.getLogger(__name__)
-
-
-class Serializer(ABC):
-    __slots__ = "fory", "type_", "need_to_write_ref"
-
-    def __init__(self, fory, type_: type):
-        self.fory = fory
-        self.type_: type = type_
-        self.need_to_write_ref = fory.track_ref and not is_primitive_type(type_)
-
-    def write(self, buffer, value):
-        raise NotImplementedError
-
-    def read(self, buffer):
-        raise NotImplementedError
-
-    @classmethod
-    def support_subclass(cls) -> bool:
-        return False
-
-
-class XlangCompatibleSerializer(Serializer):
-    def __init__(self, fory, type_):
-        super().__init__(fory, type_)
-
-    def xwrite(self, buffer, value):
-        self.write(buffer, value)
-
-    def xread(self, buffer):
-        return self.read(buffer)
-
-
-class BooleanSerializer(Serializer):
-    def write(self, buffer, value):
-        buffer.write_bool(value)
-
-    def read(self, buffer):
-        return buffer.read_bool()
-
-
-class ByteSerializer(Serializer):
-    def write(self, buffer, value):
-        buffer.write_int8(value)
-
-    def read(self, buffer):
-        return buffer.read_int8()
-
-
-class Int16Serializer(Serializer):
-    def write(self, buffer, value):
-        buffer.write_int16(value)
-
-    def read(self, buffer):
-        return buffer.read_int16()
-
-
-class Int32Serializer(Serializer):
-    """Serializer for INT32/VARINT32 type - uses variable-length encoding for xlang compatibility."""
-
-    def write(self, buffer, value):
-        buffer.write_varint32(value)
-
-    def read(self, buffer):
-        return buffer.read_varint32()
-
-
-class FixedInt32Serializer(Serializer):
-    """Serializer for fixed-width 32-bit signed integer (INT32 type_id=4)."""
-
-    def write(self, buffer, value):
-        buffer.write_int32(value)
-
-    def read(self, buffer):
-        return buffer.read_int32()
-
-
-class Int64Serializer(Serializer):
-    """Serializer for INT64/VARINT64 type - uses variable-length encoding for xlang compatibility."""
-
-    def write(self, buffer, value):
-        buffer.write_varint64(value)
-
-    def read(self, buffer):
-        return buffer.read_varint64()
-
-
-class FixedInt64Serializer(Serializer):
-    """Serializer for fixed-width 64-bit signed integer (INT64 type_id=6)."""
-
-    def write(self, buffer, value):
-        buffer.write_int64(value)
-
-    def read(self, buffer):
-        return buffer.read_int64()
-
-
-class Varint32Serializer(Serializer):
-    """Serializer for VARINT32 type - variable-length encoded signed 32-bit integer."""
-
-    def write(self, buffer, value):
-        buffer.write_varint32(value)
-
-    def read(self, buffer):
-        return buffer.read_varint32()
-
-
-class Varint64Serializer(Serializer):
-    """Serializer for VARINT64 type - variable-length encoded signed 64-bit integer."""
-
-    def write(self, buffer, value):
-        buffer.write_varint64(value)
-
-    def read(self, buffer):
-        return buffer.read_varint64()
-
-
-class TaggedInt64Serializer(Serializer):
-    """Serializer for TAGGED_INT64 type - tagged encoding for signed 64-bit integer."""
-
-    def write(self, buffer, value):
-        buffer.write_tagged_int64(value)
-
-    def read(self, buffer):
-        return buffer.read_tagged_int64()
-
-
-class Uint8Serializer(Serializer):
-    """Serializer for UINT8 type - unsigned 8-bit integer."""
-
-    def write(self, buffer, value):
-        buffer.write_uint8(value)
-
-    def read(self, buffer):
-        return buffer.read_uint8()
-
-
-class Uint16Serializer(Serializer):
-    """Serializer for UINT16 type - unsigned 16-bit integer."""
-
-    def write(self, buffer, value):
-        buffer.write_uint16(value)
-
-    def read(self, buffer):
-        return buffer.read_uint16()
-
-
-class Uint32Serializer(Serializer):
-    """Serializer for UINT32 type - fixed-size unsigned 32-bit integer."""
-
-    def write(self, buffer, value):
-        buffer.write_uint32(value)
-
-    def read(self, buffer):
-        return buffer.read_uint32()
-
-
-class VarUint32Serializer(Serializer):
-    """Serializer for VAR_UINT32 type - variable-length encoded unsigned 32-bit integer."""
-
-    def write(self, buffer, value):
-        buffer.write_var_uint32(value)
-
-    def read(self, buffer):
-        return buffer.read_var_uint32()
-
-
-class Uint64Serializer(Serializer):
-    """Serializer for UINT64 type - fixed-size unsigned 64-bit integer."""
-
-    def write(self, buffer, value):
-        buffer.write_uint64(value)
-
-    def read(self, buffer):
-        return buffer.read_uint64()
-
-
-class VarUint64Serializer(Serializer):
-    """Serializer for VAR_UINT64 type - variable-length encoded unsigned 64-bit integer."""
-
-    def write(self, buffer, value):
-        buffer.write_var_uint64(value)
-
-    def read(self, buffer):
-        return buffer.read_var_uint64()
-
-
-class TaggedUint64Serializer(Serializer):
-    """Serializer for TAGGED_UINT64 type - tagged encoding for unsigned 64-bit integer."""
-
-    def write(self, buffer, value):
-        buffer.write_tagged_uint64(value)
-
-    def read(self, buffer):
-        return buffer.read_tagged_uint64()
-
-
-class Float32Serializer(Serializer):
-    def write(self, buffer, value):
-        buffer.write_float(value)
-
-    def read(self, buffer):
-        return buffer.read_float()
-
-
-class Float64Serializer(Serializer):
-    def write(self, buffer, value):
-        buffer.write_double(value)
-
-    def read(self, buffer):
-        return buffer.read_double()
-
-
-class BFloat16Serializer(XlangCompatibleSerializer):
-    def write(self, buffer, value):
-        from pyfory.bfloat16 import BFloat16
-
-        if isinstance(value, BFloat16):
-            buffer.write_bfloat16(value.to_bits())
-        else:
-            buffer.write_bfloat16(BFloat16(value).to_bits())
-
-    def read(self, buffer):
-        from pyfory.bfloat16 import BFloat16
-
-        return BFloat16.from_bits(buffer.read_bfloat16())
-
-
-class StringSerializer(Serializer):
-    def __init__(self, fory, type_):
-        super().__init__(fory, type_)
-        self.need_to_write_ref = False
-
-    def write(self, buffer, value: str):
-        buffer.write_string(value)
-
-    def read(self, buffer):
-        return buffer.read_string()
-
-
-_base_date = datetime.date(1970, 1, 1)
-
-
-class DateSerializer(Serializer):
-    def write(self, buffer, value: datetime.date):
-        if not isinstance(value, datetime.date):
-            raise TypeError("{} should be {} instead of {}".format(value, datetime.date, type(value)))
-        days = (value - _base_date).days
-        buffer.write_int32(days)
-
-    def read(self, buffer):
-        days = buffer.read_int32()
-        return _base_date + datetime.timedelta(days=days)
-
-
-class TimestampSerializer(Serializer):
-    __win_platform = platform.system() == "Windows"
-
-    def _get_timestamp(self, value: datetime.datetime):
-        seconds_offset = 0
-        if TimestampSerializer.__win_platform and value.tzinfo is None:
-            is_dst = time.daylight and time.localtime().tm_isdst > 0
-            seconds_offset = time.altzone if is_dst else time.timezone
-            value = value.replace(tzinfo=datetime.timezone.utc)
-        micros = int((value.timestamp() + seconds_offset) * 1_000_000)
-        seconds, micros_rem = divmod(micros, 1_000_000)
-        nanos = micros_rem * 1000
-        return seconds, nanos
-
-    def write(self, buffer, value: datetime.datetime):
-        if not isinstance(value, datetime.datetime):
-            raise TypeError("{} should be {} instead of {}".format(value, datetime, type(value)))
-        seconds, nanos = self._get_timestamp(value)
-        buffer.write_int64(seconds)
-        buffer.write_uint32(nanos)
-
-    def read(self, buffer):
-        seconds = buffer.read_int64()
-        nanos = buffer.read_uint32()
-        ts = seconds + nanos / 1_000_000_000
-        # TODO support timezone
-        return datetime.datetime.fromtimestamp(ts)
-
-
-class EnumSerializer(Serializer):
-    def __init__(self, fory, type_):
-        super().__init__(fory, type_)
-        self.need_to_write_ref = False
-        self._members = tuple(type_)
-        self._ordinal_by_member = {member: idx for idx, member in enumerate(self._members)}
-
-    @classmethod
-    def support_subclass(cls) -> bool:
-        return True
-
-    def write(self, buffer, value):
-        buffer.write_var_uint32(self._ordinal_by_member[value])
-
-    def read(self, buffer):
-        ordinal = buffer.read_var_uint32()
-        return self._members[ordinal]
-
-
-class SliceSerializer(Serializer):
-    def write(self, buffer, value: slice):
-        start, stop, step = value.start, value.stop, value.step
-        if type(start) is int:
-            # TODO support varint128
-            buffer.write_int16(NOT_NULL_INT64_FLAG)
-            buffer.write_varint64(start)
-        else:
-            if start is None:
-                buffer.write_int8(NULL_FLAG)
-            else:
-                buffer.write_int8(NOT_NULL_VALUE_FLAG)
-                self.fory.write_no_ref(buffer, start)
-        if type(stop) is int:
-            # TODO support varint128
-            buffer.write_int16(NOT_NULL_INT64_FLAG)
-            buffer.write_varint64(stop)
-        else:
-            if stop is None:
-                buffer.write_int8(NULL_FLAG)
-            else:
-                buffer.write_int8(NOT_NULL_VALUE_FLAG)
-                self.fory.write_no_ref(buffer, stop)
-        if type(step) is int:
-            # TODO support varint128
-            buffer.write_int16(NOT_NULL_INT64_FLAG)
-            buffer.write_varint64(step)
-        else:
-            if step is None:
-                buffer.write_int8(NULL_FLAG)
-            else:
-                buffer.write_int8(NOT_NULL_VALUE_FLAG)
-                self.fory.write_no_ref(buffer, step)
-
-    def read(self, buffer):
-        if buffer.read_int8() == NULL_FLAG:
-            start = None
-        else:
-            start = self.fory.read_no_ref(buffer)
-        if buffer.read_int8() == NULL_FLAG:
-            stop = None
-        else:
-            stop = self.fory.read_no_ref(buffer)
-        if buffer.read_int8() == NULL_FLAG:
-            step = None
-        else:
-            step = self.fory.read_no_ref(buffer)
-        return slice(start, stop, step)
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import datetime
+import logging
+import platform
+import time
+from abc import ABC
+
+from pyfory._fory import NOT_NULL_INT64_FLAG
+from pyfory.resolver import NOT_NULL_VALUE_FLAG, NULL_FLAG
+from pyfory.types import is_primitive_type
+
+try:
+    import numpy as np
+except ImportError:
+    np = None
+
+logger = logging.getLogger(__name__)
+
+
+class Serializer(ABC):
+    __slots__ = "fory", "type_", "need_to_write_ref"
+
+    def __init__(self, fory, type_: type):
+        self.fory = fory
+        self.type_: type = type_
+        self.need_to_write_ref = fory.track_ref and not is_primitive_type(type_)
+
+    def write(self, buffer, value):
+        raise NotImplementedError
+
+    def read(self, buffer):
+        raise NotImplementedError
+
+    def xwrite(self, buffer, value):
+        self.write(buffer, value)
+
+    def xread(self, buffer):
+        return self.read(buffer)
+
+    @classmethod
+    def support_subclass(cls) -> bool:
+        return False
+
+
+class BooleanSerializer(Serializer):
+    def write(self, buffer, value):
+        buffer.write_bool(value)
+
+    def read(self, buffer):
+        return buffer.read_bool()
+
+
+class ByteSerializer(Serializer):
+    def write(self, buffer, value):
+        buffer.write_int8(value)
+
+    def read(self, buffer):
+        return buffer.read_int8()
+
+
+class Int16Serializer(Serializer):
+    def write(self, buffer, value):
+        buffer.write_int16(value)
+
+    def read(self, buffer):
+        return buffer.read_int16()
+
+
+class Int32Serializer(Serializer):
+    """Serializer for INT32/VARINT32 type - uses variable-length encoding for xlang compatibility."""
+
+    def write(self, buffer, value):
+        buffer.write_varint32(value)
+
+    def read(self, buffer):
+        return buffer.read_varint32()
+
+
+class FixedInt32Serializer(Serializer):
+    """Serializer for fixed-width 32-bit signed integer (INT32 type_id=4)."""
+
+    def write(self, buffer, value):
+        buffer.write_int32(value)
+
+    def read(self, buffer):
+        return buffer.read_int32()
+
+
+class Int64Serializer(Serializer):
+    """Serializer for INT64/VARINT64 type - uses variable-length encoding for xlang compatibility."""
+
+    def xwrite(self, buffer, value):
+        buffer.write_varint64(value)
+
+    def xread(self, buffer):
+        return buffer.read_varint64()
+
+    def write(self, buffer, value):
+        buffer.write_varint64(value)
+
+    def read(self, buffer):
+        return buffer.read_varint64()
+
+
+class FixedInt64Serializer(Serializer):
+    """Serializer for fixed-width 64-bit signed integer (INT64 type_id=6)."""
+
+    def write(self, buffer, value):
+        buffer.write_int64(value)
+
+    def read(self, buffer):
+        return buffer.read_int64()
+
+
+class Varint32Serializer(Serializer):
+    """Serializer for VARINT32 type - variable-length encoded signed 32-bit integer."""
+
+    def write(self, buffer, value):
+        buffer.write_varint32(value)
+
+    def read(self, buffer):
+        return buffer.read_varint32()
+
+
+class Varint64Serializer(Serializer):
+    """Serializer for VARINT64 type - variable-length encoded signed 64-bit integer."""
+
+    def write(self, buffer, value):
+        buffer.write_varint64(value)
+
+    def read(self, buffer):
+        return buffer.read_varint64()
+
+
+class TaggedInt64Serializer(Serializer):
+    """Serializer for TAGGED_INT64 type - tagged encoding for signed 64-bit integer."""
+
+    def write(self, buffer, value):
+        buffer.write_tagged_int64(value)
+
+    def read(self, buffer):
+        return buffer.read_tagged_int64()
+
+
+class Uint8Serializer(Serializer):
+    """Serializer for UINT8 type - unsigned 8-bit integer."""
+
+    def write(self, buffer, value):
+        buffer.write_uint8(value)
+
+    def read(self, buffer):
+        return buffer.read_uint8()
+
+
+class Uint16Serializer(Serializer):
+    """Serializer for UINT16 type - unsigned 16-bit integer."""
+
+    def write(self, buffer, value):
+        buffer.write_uint16(value)
+
+    def read(self, buffer):
+        return buffer.read_uint16()
+
+
+class Uint32Serializer(Serializer):
+    """Serializer for UINT32 type - fixed-size unsigned 32-bit integer."""
+
+    def write(self, buffer, value):
+        buffer.write_uint32(value)
+
+    def read(self, buffer):
+        return buffer.read_uint32()
+
+
+class VarUint32Serializer(Serializer):
+    """Serializer for VAR_UINT32 type - variable-length encoded unsigned 32-bit integer."""
+
+    def write(self, buffer, value):
+        buffer.write_var_uint32(value)
+
+    def read(self, buffer):
+        return buffer.read_var_uint32()
+
+
+class Uint64Serializer(Serializer):
+    """Serializer for UINT64 type - fixed-size unsigned 64-bit integer."""
+
+    def write(self, buffer, value):
+        buffer.write_uint64(value)
+
+    def read(self, buffer):
+        return buffer.read_uint64()
+
+
+class VarUint64Serializer(Serializer):
+    """Serializer for VAR_UINT64 type - variable-length encoded unsigned 64-bit integer."""
+
+    def write(self, buffer, value):
+        buffer.write_var_uint64(value)
+
+    def read(self, buffer):
+        return buffer.read_var_uint64()
+
+
+class TaggedUint64Serializer(Serializer):
+    """Serializer for TAGGED_UINT64 type - tagged encoding for unsigned 64-bit integer."""
+
+    def write(self, buffer, value):
+        buffer.write_tagged_uint64(value)
+
+    def read(self, buffer):
+        return buffer.read_tagged_uint64()
+
+
+class Float32Serializer(Serializer):
+    def write(self, buffer, value):
+        buffer.write_float(value)
+
+    def read(self, buffer):
+        return buffer.read_float()
+
+
+class Float64Serializer(Serializer):
+    def write(self, buffer, value):
+        buffer.write_double(value)
+
+    def read(self, buffer):
+        return buffer.read_double()
+
+
+class BFloat16Serializer(Serializer):
+    def write(self, buffer, value):
+        from pyfory.bfloat16 import bfloat16
+
+        if isinstance(value, bfloat16):
+            buffer.write_bfloat16(value.to_bits())
+        else:
+            buffer.write_bfloat16(bfloat16(value).to_bits())
+
+    def read(self, buffer):
+        return buffer.read_bfloat16()
+
+
+class StringSerializer(Serializer):
+    def __init__(self, fory, type_):
+        super().__init__(fory, type_)
+        self.need_to_write_ref = False
+
+    def write(self, buffer, value: str):
+        buffer.write_string(value)
+
+    def read(self, buffer):
+        return buffer.read_string()
+
+
+_base_date = datetime.date(1970, 1, 1)
+
+
+class DateSerializer(Serializer):
+    def write(self, buffer, value: datetime.date):
+        if not isinstance(value, datetime.date):
+            raise TypeError("{} should be {} instead of {}".format(value, datetime.date, type(value)))
+        days = (value - _base_date).days
+        buffer.write_int32(days)
+
+    def read(self, buffer):
+        days = buffer.read_int32()
+        return _base_date + datetime.timedelta(days=days)
+
+
+class TimestampSerializer(Serializer):
+    __win_platform = platform.system() == "Windows"
+
+    def _get_timestamp(self, value: datetime.datetime):
+        seconds_offset = 0
+        if TimestampSerializer.__win_platform and value.tzinfo is None:
+            is_dst = time.daylight and time.localtime().tm_isdst > 0
+            seconds_offset = time.altzone if is_dst else time.timezone
+            value = value.replace(tzinfo=datetime.timezone.utc)
+        micros = int((value.timestamp() + seconds_offset) * 1_000_000)
+        seconds, micros_rem = divmod(micros, 1_000_000)
+        nanos = micros_rem * 1000
+        return seconds, nanos
+
+    def write(self, buffer, value: datetime.datetime):
+        if not isinstance(value, datetime.datetime):
+            raise TypeError("{} should be {} instead of {}".format(value, datetime, type(value)))
+        seconds, nanos = self._get_timestamp(value)
+        buffer.write_int64(seconds)
+        buffer.write_uint32(nanos)
+
+    def read(self, buffer):
+        seconds = buffer.read_int64()
+        nanos = buffer.read_uint32()
+        ts = seconds + nanos / 1_000_000_000
+        # TODO support timezone
+        return datetime.datetime.fromtimestamp(ts)
+
+
+class EnumSerializer(Serializer):
+    def __init__(self, fory, type_):
+        super().__init__(fory, type_)
+        self.need_to_write_ref = False
+
+    @classmethod
+    def support_subclass(cls) -> bool:
+        return True
+
+    def write(self, buffer, value):
+        buffer.write_string(value.name)
+
+    def read(self, buffer):
+        name = buffer.read_string()
+        return getattr(self.type_, name)
+
+    def xwrite(self, buffer, value):
+        buffer.write_var_uint32(value.value)
+
+    def xread(self, buffer):
+        ordinal = buffer.read_var_uint32()
+        return self.type_(ordinal)
+
+
+class SliceSerializer(Serializer):
+    def write(self, buffer, value: slice):
+        start, stop, step = value.start, value.stop, value.step
+        if type(start) is int:
+            # TODO support varint128
+            buffer.write_int16(NOT_NULL_INT64_FLAG)
+            buffer.write_varint64(start)
+        else:
+            if start is None:
+                buffer.write_int8(NULL_FLAG)
+            else:
+                buffer.write_int8(NOT_NULL_VALUE_FLAG)
+                self.fory.write_no_ref(buffer, start)
+        if type(stop) is int:
+            # TODO support varint128
+            buffer.write_int16(NOT_NULL_INT64_FLAG)
+            buffer.write_varint64(stop)
+        else:
+            if stop is None:
+                buffer.write_int8(NULL_FLAG)
+            else:
+                buffer.write_int8(NOT_NULL_VALUE_FLAG)
+                self.fory.write_no_ref(buffer, stop)
+        if type(step) is int:
+            # TODO support varint128
+            buffer.write_int16(NOT_NULL_INT64_FLAG)
+            buffer.write_varint64(step)
+        else:
+            if step is None:
+                buffer.write_int8(NULL_FLAG)
+            else:
+                buffer.write_int8(NOT_NULL_VALUE_FLAG)
+                self.fory.write_no_ref(buffer, step)
+
+    def read(self, buffer):
+        if buffer.read_int8() == NULL_FLAG:
+            start = None
+        else:
+            start = self.fory.read_no_ref(buffer)
+        if buffer.read_int8() == NULL_FLAG:
+            stop = None
+        else:
+            stop = self.fory.read_no_ref(buffer)
+        if buffer.read_int8() == NULL_FLAG:
+            step = None
+        else:
+            step = self.fory.read_no_ref(buffer)
+        return slice(start, stop, step)
+
+    def xwrite(self, buffer, value):
+        raise NotImplementedError
+
+    def xread(self, buffer):
+        raise NotImplementedError
diff --git a/python/pyfory/bfloat16.pxd b/python/pyfory/bfloat16.pxd
new file mode 100644
index 0000000000..8e6fb20aa8
--- /dev/null
+++ b/python/pyfory/bfloat16.pxd
@@ -0,0 +1,22 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from libc.stdint cimport uint16_t
+
+cdef class bfloat16:
+    cdef uint16_t _bits
+
diff --git a/python/pyfory/bfloat16.pyx b/python/pyfory/bfloat16.pyx
index d4eb2446e6..f111e59481 100644
--- a/python/pyfory/bfloat16.pyx
+++ b/python/pyfory/bfloat16.pyx
@@ -44,18 +44,18 @@ cdef inline float bfloat16_bits_to_float32(uint16_t bits) nogil:
     return result
 
 
-cdef class BFloat16:
+cdef class bfloat16:
     cdef uint16_t _bits
     
     def __init__(self, value):
-        if isinstance(value, BFloat16):
-            self._bits = (<BFloat16>value)._bits
+        if isinstance(value, bfloat16):
+            self._bits = (<bfloat16>value)._bits
         else:
             self._bits = float32_to_bfloat16_bits(<float>float(value))
     
     @staticmethod
     def from_bits(uint16_t bits):
-        cdef BFloat16 bf16 = BFloat16.__new__(BFloat16)
+        cdef bfloat16 bf16 = bfloat16.__new__(bfloat16)
         bf16._bits = bits
         return bf16
     
@@ -69,18 +69,18 @@ cdef class BFloat16:
         return float(self.to_float32())
     
     def __repr__(self):
-        return f"BFloat16({self.to_float32()})"
+        return f"bfloat16({self.to_float32()})"
     
     def __str__(self):
         return str(self.to_float32())
     
     def __eq__(self, other):
-        if isinstance(other, BFloat16):
-            if self.is_nan() or (<BFloat16>other).is_nan():
+        if isinstance(other, bfloat16):
+            if self.is_nan() or (<bfloat16>other).is_nan():
                 return False
-            if self.is_zero() and (<BFloat16>other).is_zero():
+            if self.is_zero() and (<bfloat16>other).is_zero():
                 return True
-            return self._bits == (<BFloat16>other)._bits
+            return self._bits == (<bfloat16>other)._bits
         return False
     
     def __hash__(self):
@@ -114,3 +114,7 @@ cdef class BFloat16:
     
     def signbit(self):
         return (self._bits & 0x8000) != 0
+
+
+# Backward-compatible alias for existing user code.
+BFloat16 = bfloat16
diff --git a/python/pyfory/bfloat16_array.py b/python/pyfory/bfloat16_array.py
index e1ffc5b4a7..ad3b3a3772 100644
--- a/python/pyfory/bfloat16_array.py
+++ b/python/pyfory/bfloat16_array.py
@@ -17,7 +17,7 @@
 
 import array
 
-from pyfory.bfloat16 import BFloat16
+from pyfory.bfloat16 import bfloat16
 
 
 class BFloat16Array:
@@ -25,23 +25,23 @@ def __init__(self, values=None):
         if values is None:
             self._data = array.array("H")
         else:
-            self._data = array.array("H", [BFloat16(v).to_bits() if not isinstance(v, BFloat16) else v.to_bits() for v in values])
+            self._data = array.array("H", [bfloat16(v).to_bits() if not isinstance(v, bfloat16) else v.to_bits() for v in values])
 
     def __len__(self):
         return len(self._data)
 
     def __getitem__(self, index):
-        return BFloat16.from_bits(self._data[index])
+        return bfloat16.from_bits(self._data[index])
 
     def __setitem__(self, index, value):
-        if isinstance(value, BFloat16):
+        if isinstance(value, bfloat16):
             self._data[index] = value.to_bits()
         else:
-            self._data[index] = BFloat16(value).to_bits()
+            self._data[index] = bfloat16(value).to_bits()
 
     def __iter__(self):
         for bits in self._data:
-            yield BFloat16.from_bits(bits)
+            yield bfloat16.from_bits(bits)
 
     def __repr__(self):
         return f"BFloat16Array([{', '.join(str(bf16) for bf16 in self)}])"
@@ -52,10 +52,10 @@ def __eq__(self, other):
         return self._data == other._data
 
     def append(self, value):
-        if isinstance(value, BFloat16):
+        if isinstance(value, bfloat16):
             self._data.append(value.to_bits())
         else:
-            self._data.append(BFloat16(value).to_bits())
+            self._data.append(bfloat16(value).to_bits())
 
     def extend(self, values):
         for value in values:
diff --git a/python/pyfory/buffer.pxd b/python/pyfory/buffer.pxd
index 3b3fc77cd1..e63b19df12 100644
--- a/python/pyfory/buffer.pxd
+++ b/python/pyfory/buffer.pxd
@@ -25,6 +25,7 @@ from libc.stdint cimport *
 from libcpp cimport bool as c_bool
 from libcpp.memory cimport shared_ptr
 from pyfory.includes.libutil cimport CBuffer, CError
+from pyfory.bfloat16 cimport bfloat16
 
 
 cdef class Buffer:
@@ -160,7 +161,7 @@ cdef class Buffer:
 
     cpdef inline double read_float64(self)
 
-    cpdef inline uint16_t read_bfloat16(self)
+    cpdef inline bfloat16 read_bfloat16(self)
 
     cpdef inline write_varint64(self, int64_t v)
 
diff --git a/python/pyfory/buffer.pyx b/python/pyfory/buffer.pyx
index 827ca0256a..eee1dc8307 100644
--- a/python/pyfory/buffer.pyx
+++ b/python/pyfory/buffer.pyx
@@ -30,6 +30,7 @@ from cython.operator cimport dereference as deref
 from libcpp.string cimport string as c_string
 from libc.stdint cimport *
 from libcpp cimport bool as c_bool
+from pyfory.bfloat16 cimport bfloat16
 from pyfory.includes.libutil cimport(
     CBuffer, allocate_buffer, get_bit as c_get_bit, set_bit as c_set_bit, clear_bit as c_clear_bit,
     set_bit_to as c_set_bit_to, CError, CErrorCode, CResultVoidError, utf16_has_surrogate_pairs
@@ -247,10 +248,13 @@ cdef class Buffer:
     cpdef inline write_bfloat16(self, uint16_t value):
         self.c_buffer.write_uint16(value)
 
-    cpdef inline uint16_t read_bfloat16(self):
+    cpdef inline bfloat16 read_bfloat16(self):
         cdef uint16_t value = self.c_buffer.read_uint16(self._error)
+        cdef bfloat16 result
         self._raise_if_error()
-        return value
+        result = bfloat16.__new__(bfloat16)
+        result._bits = value
+        return result
 
     cpdef put_buffer(self, uint32_t offset, v, int32_t src_index, int32_t length):
         if length == 0:  # access an emtpy buffer may raise out-of-bound exception.
diff --git a/python/pyfory/primitive.pxi b/python/pyfory/primitive.pxi
index 2a58945143..51fe941f14 100644
--- a/python/pyfory/primitive.pxi
+++ b/python/pyfory/primitive.pxi
@@ -1,295 +1,298 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-@cython.final
-cdef class BooleanSerializer(Serializer):
-    cpdef inline write(self, Buffer buffer, value):
-        buffer.write_bool(value)
-
-    cpdef inline read(self, Buffer buffer):
-        return buffer.read_bool()
-
-
-@cython.final
-cdef class ByteSerializer(Serializer):
-    cpdef inline write(self, Buffer buffer, value):
-        buffer.write_int8(value)
-
-    cpdef inline read(self, Buffer buffer):
-        return buffer.read_int8()
-
-
-@cython.final
-cdef class Int16Serializer(Serializer):
-    cpdef inline write(self, Buffer buffer, value):
-        buffer.write_int16(value)
-
-    cpdef inline read(self, Buffer buffer):
-        return buffer.read_int16()
-
-
-@cython.final
-cdef class Int32Serializer(Serializer):
-    cpdef inline write(self, Buffer buffer, value):
-        buffer.write_varint32(value)
-
-    cpdef inline read(self, Buffer buffer):
-        return buffer.read_varint32()
-
-
-@cython.final
-cdef class Int64Serializer(Serializer):
-    cpdef inline write(self, Buffer buffer, value):
-        buffer.write_varint64(value)
-
-    cpdef inline read(self, Buffer buffer):
-        return buffer.read_varint64()
-
-
-@cython.final
-cdef class FixedInt32Serializer(Serializer):
-    """Serializer for fixed-width 32-bit signed integer (INT32 type_id=4)."""
-    cpdef inline write(self, Buffer buffer, value):
-        buffer.write_int32(value)
-
-    cpdef inline read(self, Buffer buffer):
-        return buffer.read_int32()
-
-
-@cython.final
-cdef class FixedInt64Serializer(Serializer):
-    """Serializer for fixed-width 64-bit signed integer (INT64 type_id=6)."""
-    cpdef inline write(self, Buffer buffer, value):
-        buffer.write_int64(value)
-
-    cpdef inline read(self, Buffer buffer):
-        return buffer.read_int64()
-
-
-@cython.final
-cdef class Varint32Serializer(Serializer):
-    """Serializer for VARINT32 type - variable-length encoded signed 32-bit integer."""
-    cpdef inline write(self, Buffer buffer, value):
-        buffer.write_varint32(value)
-
-    cpdef inline read(self, Buffer buffer):
-        return buffer.read_varint32()
-
-
-@cython.final
-cdef class Varint64Serializer(Serializer):
-    """Serializer for VARINT64 type - variable-length encoded signed 64-bit integer."""
-    cpdef inline write(self, Buffer buffer, value):
-        buffer.write_varint64(value)
-
-    cpdef inline read(self, Buffer buffer):
-        return buffer.read_varint64()
-
-
-@cython.final
-cdef class TaggedInt64Serializer(Serializer):
-    """Serializer for TAGGED_INT64 type - tagged encoding for signed 64-bit integer."""
-    cpdef inline write(self, Buffer buffer, value):
-        buffer.write_tagged_int64(value)
-
-    cpdef inline read(self, Buffer buffer):
-        return buffer.read_tagged_int64()
-
-
-@cython.final
-cdef class Uint8Serializer(Serializer):
-    """Serializer for UINT8 type - unsigned 8-bit integer."""
-    cpdef inline write(self, Buffer buffer, value):
-        buffer.write_uint8(value)
-
-    cpdef inline read(self, Buffer buffer):
-        return buffer.read_uint8()
-
-
-@cython.final
-cdef class Uint16Serializer(Serializer):
-    """Serializer for UINT16 type - unsigned 16-bit integer."""
-    cpdef inline write(self, Buffer buffer, value):
-        buffer.write_uint16(value)
-
-    cpdef inline read(self, Buffer buffer):
-        return buffer.read_uint16()
-
-
-@cython.final
-cdef class Uint32Serializer(Serializer):
-    """Serializer for UINT32 type - fixed-size unsigned 32-bit integer."""
-    cpdef inline write(self, Buffer buffer, value):
-        buffer.write_uint32(value)
-
-    cpdef inline read(self, Buffer buffer):
-        return buffer.read_uint32()
-
-
-@cython.final
-cdef class VarUint32Serializer(Serializer):
-    """Serializer for VAR_UINT32 type - variable-length encoded unsigned 32-bit integer."""
-    cpdef inline write(self, Buffer buffer, value):
-        buffer.write_var_uint32(value)
-
-    cpdef inline read(self, Buffer buffer):
-        return buffer.read_var_uint32()
-
-
-@cython.final
-cdef class Uint64Serializer(Serializer):
-    """Serializer for UINT64 type - fixed-size unsigned 64-bit integer."""
-    cpdef inline write(self, Buffer buffer, value):
-        buffer.write_uint64(value)
-
-    cpdef inline read(self, Buffer buffer):
-        return buffer.read_uint64()
-
-
-@cython.final
-cdef class VarUint64Serializer(Serializer):
-    """Serializer for VAR_UINT64 type - variable-length encoded unsigned 64-bit integer."""
-    cpdef inline write(self, Buffer buffer, value):
-        buffer.write_var_uint64(value)
-
-    cpdef inline read(self, Buffer buffer):
-        return buffer.read_var_uint64()
-
-
-@cython.final
-cdef class TaggedUint64Serializer(Serializer):
-    """Serializer for TAGGED_UINT64 type - tagged encoding for unsigned 64-bit integer."""
-    cpdef inline write(self, Buffer buffer, value):
-        buffer.write_tagged_uint64(value)
-
-    cpdef inline read(self, Buffer buffer):
-        return buffer.read_tagged_uint64()
-
-
-@cython.final
-cdef class Float32Serializer(Serializer):
-    cpdef inline write(self, Buffer buffer, value):
-        buffer.write_float(value)
-
-    cpdef inline read(self, Buffer buffer):
-        return buffer.read_float()
-
-
-@cython.final
-cdef class Float64Serializer(Serializer):
-    cpdef inline write(self, Buffer buffer, value):
-        buffer.write_double(value)
-
-    cpdef inline read(self, Buffer buffer):
-        return buffer.read_double()
-
-
-@cython.final
-
-cdef class BFloat16Serializer(XlangCompatibleSerializer):
-    cpdef inline write(self, Buffer buffer, value):
-        from pyfory.bfloat16 import BFloat16
-        if isinstance(value, BFloat16):
-            buffer.write_bfloat16(value.to_bits())
-        else:
-            buffer.write_bfloat16(BFloat16(value).to_bits())
-
-    cpdef inline read(self, Buffer buffer):
-        from pyfory.bfloat16 import BFloat16
-        return BFloat16.from_bits(buffer.read_bfloat16())
-
-@cython.final
-
-cdef class StringSerializer(Serializer):
-
-    def __init__(self, fory, type_, track_ref=False):
-        super().__init__(fory, type_)
-        self.need_to_write_ref = track_ref
-
-    cpdef inline write(self, Buffer buffer, value):
-        buffer.write_string(value)
-
-    cpdef inline read(self, Buffer buffer):
-        return buffer.read_string()
-
-
-cdef _base_date = datetime.date(1970, 1, 1)
-cdef int _base_date_ordinal = _base_date.toordinal()  # Precompute for faster date deserialization
-
-
-@cython.final
-cdef class DateSerializer(Serializer):
-    cpdef inline write(self, Buffer buffer, value):
-        if type(value) is not datetime.date:
-            raise TypeError(
-                "{} should be {} instead of {}".format(
-                    value, datetime.date, type(value)
-                )
-            )
-        days = (value - _base_date).days
-        buffer.write_int32(days)
-
-    cpdef inline read(self, Buffer buffer):
-        days = buffer.read_int32()
-        return datetime.date.fromordinal(_base_date_ordinal + days)
-
-
-@cython.final
-cdef class TimestampSerializer(Serializer):
-    cdef bint win_platform
-
-    def __init__(self, fory, type_: Union[type, TypeVar]):
-        super().__init__(fory, type_)
-        self.win_platform = platform.system() == "Windows"
-
-    cdef inline _get_timestamp(self, value):
-        seconds_offset = 0
-        if self.win_platform and value.tzinfo is None:
-            is_dst = time.daylight and time.localtime().tm_isdst > 0
-            seconds_offset = time.altzone if is_dst else time.timezone
-            value = value.replace(tzinfo=datetime.timezone.utc)
-        cdef long long micros = <long long>((value.timestamp() + seconds_offset) * 1000000)
-        cdef long long seconds
-        cdef long long micros_rem
-        if micros >= 0:
-            seconds = micros // 1000000
-            micros_rem = micros % 1000000
-        else:
-            seconds = -((-micros) // 1000000)
-            micros_rem = micros - seconds * 1000000
-        if micros_rem < 0:
-            seconds -= 1
-            micros_rem += 1000000
-        return seconds, <unsigned int>(micros_rem * 1000)
-
-    cpdef inline write(self, Buffer buffer, value):
-        if type(value) is not datetime.datetime:
-            raise TypeError(
-                "{} should be {} instead of {}".format(value, datetime, type(value))
-            )
-        cdef long long seconds
-        cdef unsigned int nanos
-        seconds, nanos = self._get_timestamp(value)
-        buffer.write_int64(seconds)
-        buffer.write_uint32(nanos)
-
-    cpdef inline read(self, Buffer buffer):
-        cdef long long seconds = buffer.read_int64()
-        cdef unsigned int nanos = buffer.read_uint32()
-        ts = seconds + (<double>nanos) / 1000000000.0
-        # TODO support timezone
-        return datetime.datetime.fromtimestamp(ts)
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+@cython.final
+cdef class BooleanSerializer(Serializer):
+    cpdef inline write(self, Buffer buffer, value):
+        buffer.write_bool(value)
+
+    cpdef inline read(self, Buffer buffer):
+        return buffer.read_bool()
+
+
+@cython.final
+cdef class ByteSerializer(Serializer):
+    cpdef inline write(self, Buffer buffer, value):
+        buffer.write_int8(value)
+
+    cpdef inline read(self, Buffer buffer):
+        return buffer.read_int8()
+
+
+@cython.final
+cdef class Int16Serializer(Serializer):
+    cpdef inline write(self, Buffer buffer, value):
+        buffer.write_int16(value)
+
+    cpdef inline read(self, Buffer buffer):
+        return buffer.read_int16()
+
+
+@cython.final
+cdef class Int32Serializer(Serializer):
+    cpdef inline write(self, Buffer buffer, value):
+        buffer.write_varint32(value)
+
+    cpdef inline read(self, Buffer buffer):
+        return buffer.read_varint32()
+
+
+@cython.final
+cdef class Int64Serializer(Serializer):
+    cpdef inline xwrite(self, Buffer buffer, value):
+        buffer.write_varint64(value)
+
+    cpdef inline xread(self, Buffer buffer):
+        return buffer.read_varint64()
+
+    cpdef inline write(self, Buffer buffer, value):
+        buffer.write_varint64(value)
+
+    cpdef inline read(self, Buffer buffer):
+        return buffer.read_varint64()
+
+
+@cython.final
+cdef class FixedInt32Serializer(Serializer):
+    """Serializer for fixed-width 32-bit signed integer (INT32 type_id=4)."""
+    cpdef inline write(self, Buffer buffer, value):
+        buffer.write_int32(value)
+
+    cpdef inline read(self, Buffer buffer):
+        return buffer.read_int32()
+
+
+@cython.final
+cdef class FixedInt64Serializer(Serializer):
+    """Serializer for fixed-width 64-bit signed integer (INT64 type_id=6)."""
+    cpdef inline write(self, Buffer buffer, value):
+        buffer.write_int64(value)
+
+    cpdef inline read(self, Buffer buffer):
+        return buffer.read_int64()
+
+
+@cython.final
+cdef class Varint32Serializer(Serializer):
+    """Serializer for VARINT32 type - variable-length encoded signed 32-bit integer."""
+    cpdef inline write(self, Buffer buffer, value):
+        buffer.write_varint32(value)
+
+    cpdef inline read(self, Buffer buffer):
+        return buffer.read_varint32()
+
+
+@cython.final
+cdef class Varint64Serializer(Serializer):
+    """Serializer for VARINT64 type - variable-length encoded signed 64-bit integer."""
+    cpdef inline write(self, Buffer buffer, value):
+        buffer.write_varint64(value)
+
+    cpdef inline read(self, Buffer buffer):
+        return buffer.read_varint64()
+
+
+@cython.final
+cdef class TaggedInt64Serializer(Serializer):
+    """Serializer for TAGGED_INT64 type - tagged encoding for signed 64-bit integer."""
+    cpdef inline write(self, Buffer buffer, value):
+        buffer.write_tagged_int64(value)
+
+    cpdef inline read(self, Buffer buffer):
+        return buffer.read_tagged_int64()
+
+
+@cython.final
+cdef class Uint8Serializer(Serializer):
+    """Serializer for UINT8 type - unsigned 8-bit integer."""
+    cpdef inline write(self, Buffer buffer, value):
+        buffer.write_uint8(value)
+
+    cpdef inline read(self, Buffer buffer):
+        return buffer.read_uint8()
+
+
+@cython.final
+cdef class Uint16Serializer(Serializer):
+    """Serializer for UINT16 type - unsigned 16-bit integer."""
+    cpdef inline write(self, Buffer buffer, value):
+        buffer.write_uint16(value)
+
+    cpdef inline read(self, Buffer buffer):
+        return buffer.read_uint16()
+
+
+@cython.final
+cdef class Uint32Serializer(Serializer):
+    """Serializer for UINT32 type - fixed-size unsigned 32-bit integer."""
+    cpdef inline write(self, Buffer buffer, value):
+        buffer.write_uint32(value)
+
+    cpdef inline read(self, Buffer buffer):
+        return buffer.read_uint32()
+
+
+@cython.final
+cdef class VarUint32Serializer(Serializer):
+    """Serializer for VAR_UINT32 type - variable-length encoded unsigned 32-bit integer."""
+    cpdef inline write(self, Buffer buffer, value):
+        buffer.write_var_uint32(value)
+
+    cpdef inline read(self, Buffer buffer):
+        return buffer.read_var_uint32()
+
+
+@cython.final
+cdef class Uint64Serializer(Serializer):
+    """Serializer for UINT64 type - fixed-size unsigned 64-bit integer."""
+    cpdef inline write(self, Buffer buffer, value):
+        buffer.write_uint64(value)
+
+    cpdef inline read(self, Buffer buffer):
+        return buffer.read_uint64()
+
+
+@cython.final
+cdef class VarUint64Serializer(Serializer):
+    """Serializer for VAR_UINT64 type - variable-length encoded unsigned 64-bit integer."""
+    cpdef inline write(self, Buffer buffer, value):
+        buffer.write_var_uint64(value)
+
+    cpdef inline read(self, Buffer buffer):
+        return buffer.read_var_uint64()
+
+
+@cython.final
+cdef class TaggedUint64Serializer(Serializer):
+    """Serializer for TAGGED_UINT64 type - tagged encoding for unsigned 64-bit integer."""
+    cpdef inline write(self, Buffer buffer, value):
+        buffer.write_tagged_uint64(value)
+
+    cpdef inline read(self, Buffer buffer):
+        return buffer.read_tagged_uint64()
+
+
+@cython.final
+cdef class Float32Serializer(Serializer):
+    cpdef inline write(self, Buffer buffer, value):
+        buffer.write_float(value)
+
+    cpdef inline read(self, Buffer buffer):
+        return buffer.read_float()
+
+
+@cython.final
+cdef class Float64Serializer(Serializer):
+    cpdef inline write(self, Buffer buffer, value):
+        buffer.write_double(value)
+
+    cpdef inline read(self, Buffer buffer):
+        return buffer.read_double()
+
+
+@cython.final
+cdef class BFloat16Serializer(Serializer):
+    cpdef inline write(self, Buffer buffer, value):
+        from pyfory.bfloat16 import bfloat16
+        if isinstance(value, bfloat16):
+            buffer.write_bfloat16(value.to_bits())
+        else:
+            buffer.write_bfloat16(bfloat16(value).to_bits())
+
+    cpdef inline read(self, Buffer buffer):
+        return buffer.read_bfloat16()
+
+
+@cython.final
+cdef class StringSerializer(Serializer):
+    def __init__(self, fory, type_, track_ref=False):
+        super().__init__(fory, type_)
+        self.need_to_write_ref = track_ref
+
+    cpdef inline write(self, Buffer buffer, value):
+        buffer.write_string(value)
+
+    cpdef inline read(self, Buffer buffer):
+        return buffer.read_string()
+
+
+cdef _base_date = datetime.date(1970, 1, 1)
+cdef int _base_date_ordinal = _base_date.toordinal()  # Precompute for faster date deserialization
+
+
+@cython.final
+cdef class DateSerializer(Serializer):
+    cpdef inline write(self, Buffer buffer, value):
+        if type(value) is not datetime.date:
+            raise TypeError(
+                "{} should be {} instead of {}".format(
+                    value, datetime.date, type(value)
+                )
+            )
+        days = (value - _base_date).days
+        buffer.write_int32(days)
+
+    cpdef inline read(self, Buffer buffer):
+        days = buffer.read_int32()
+        return datetime.date.fromordinal(_base_date_ordinal + days)
+
+
+@cython.final
+cdef class TimestampSerializer(Serializer):
+    cdef bint win_platform
+
+    def __init__(self, fory, type_: Union[type, TypeVar]):
+        super().__init__(fory, type_)
+        self.win_platform = platform.system() == "Windows"
+
+    cdef inline _get_timestamp(self, value):
+        seconds_offset = 0
+        if self.win_platform and value.tzinfo is None:
+            is_dst = time.daylight and time.localtime().tm_isdst > 0
+            seconds_offset = time.altzone if is_dst else time.timezone
+            value = value.replace(tzinfo=datetime.timezone.utc)
+        cdef long long micros = <long long>((value.timestamp() + seconds_offset) * 1000000)
+        cdef long long seconds
+        cdef long long micros_rem
+        if micros >= 0:
+            seconds = micros // 1000000
+            micros_rem = micros % 1000000
+        else:
+            seconds = -((-micros) // 1000000)
+            micros_rem = micros - seconds * 1000000
+        if micros_rem < 0:
+            seconds -= 1
+            micros_rem += 1000000
+        return seconds, <unsigned int>(micros_rem * 1000)
+
+    cpdef inline write(self, Buffer buffer, value):
+        if type(value) is not datetime.datetime:
+            raise TypeError(
+                "{} should be {} instead of {}".format(value, datetime, type(value))
+            )
+        cdef long long seconds
+        cdef unsigned int nanos
+        seconds, nanos = self._get_timestamp(value)
+        buffer.write_int64(seconds)
+        buffer.write_uint32(nanos)
+
+    cpdef inline read(self, Buffer buffer):
+        cdef long long seconds = buffer.read_int64()
+        cdef unsigned int nanos = buffer.read_uint32()
+        ts = seconds + (<double>nanos) / 1000000000.0
+        # TODO support timezone
+        return datetime.datetime.fromtimestamp(ts)
diff --git a/python/pyfory/registry.py b/python/pyfory/registry.py
index 480b473155..46dd57f683 100644
--- a/python/pyfory/registry.py
+++ b/python/pyfory/registry.py
@@ -35,7 +35,6 @@
     Serializer,
     Numpy1DArraySerializer,
     NDArraySerializer,
-    PythonNDArraySerializer,
     PyArraySerializer,
     DynamicPyArraySerializer,
     NoneSerializer,
@@ -250,7 +249,7 @@ def _initialize_py(self):
         register(tuple, serializer=TupleSerializer)
         register(slice, serializer=SliceSerializer)
         if np is not None:
-            register(np.ndarray, serializer=PythonNDArraySerializer)
+            register(np.ndarray, serializer=NDArraySerializer)
         register(array.array, serializer=DynamicPyArraySerializer)
         register(types.MappingProxyType, serializer=MappingProxySerializer)
         register(pickle.PickleBuffer, serializer=PickleBufferSerializer)
@@ -322,17 +321,13 @@ def _initialize_common(self):
             serializer=Float64Serializer,
         )
         register(float, type_id=TypeId.FLOAT64, serializer=Float64Serializer)
-        # BFloat16 is optional if the extension module is unavailable.
-        try:
-            from pyfory.bfloat16 import BFloat16
+        from pyfory.bfloat16 import bfloat16
 
-            register(
-                BFloat16,
-                type_id=TypeId.BFLOAT16,
-                serializer=BFloat16Serializer,
-            )
-        except ImportError:
-            pass
+        register(
+            bfloat16,
+            type_id=TypeId.BFLOAT16,
+            serializer=BFloat16Serializer,
+        )
         register(str, type_id=TypeId.STRING, serializer=StringSerializer)
         # TODO(chaokunyang) DURATION DECIMAL
         register(datetime.datetime, type_id=TypeId.TIMESTAMP, serializer=TimestampSerializer)
@@ -344,18 +339,14 @@ def _initialize_common(self):
                 type_id=typeid,
                 serializer=PyArraySerializer(self.fory, ftype, typeid),
             )
-        # BFloat16Array is optional if the extension module is unavailable.
-        try:
-            from pyfory.bfloat16_array import BFloat16Array
-            from pyfory.serializer import BFloat16ArraySerializer
+        from pyfory.bfloat16_array import BFloat16Array
+        from pyfory.serializer import BFloat16ArraySerializer
 
-            register(
-                BFloat16Array,
-                type_id=TypeId.BFLOAT16_ARRAY,
-                serializer=BFloat16ArraySerializer(self.fory, BFloat16Array, TypeId.BFLOAT16_ARRAY),
-            )
-        except ImportError:
-            pass
+        register(
+            BFloat16Array,
+            type_id=TypeId.BFLOAT16_ARRAY,
+            serializer=BFloat16ArraySerializer(self.fory, BFloat16Array, TypeId.BFLOAT16_ARRAY),
+        )
         if np:
             # overwrite pyarray  with same type id.
             # if pyarray are needed, one must annotate that value with XXXArrayType
@@ -475,7 +466,8 @@ def _register_type(
             raise TypeError(f"type name {typename} and id {type_id} should not be set at the same time")
         if cls in self._types_info:
             raise TypeError(f"{cls} registered already")
-        return self._register_xtype(
+        register_type = self._register_xtype if self.fory.xlang else self._register_pytype
+        return register_type(
             cls,
             type_id=type_id,
             user_type_id=user_type_id,
@@ -543,6 +535,30 @@ def _register_xtype(
             internal=internal,
         )
 
+    def _register_pytype(
+        self,
+        cls: Union[type, TypeVar],
+        *,
+        type_id: int = None,
+        user_type_id: int = NO_USER_TYPE_ID,
+        namespace: str = None,
+        typename: str = None,
+        serializer: Serializer = None,
+        internal: bool = False,
+    ):
+        # Set default type_id when None, similar to _register_xtype
+        if type_id is None and typename is not None:
+            type_id = self._next_type_id()
+        return self.__register_type(
+            cls,
+            type_id=type_id,
+            user_type_id=user_type_id,
+            namespace=namespace,
+            typename=typename,
+            serializer=serializer,
+            internal=internal,
+        )
+
     def __register_type(
         self,
         cls: Union[type, TypeVar],
@@ -589,7 +605,7 @@ def __register_type(
                 if user_type_id not in self._user_type_id_to_type_info or not internal:
                     self._user_type_id_to_type_info[user_type_id] = typeinfo
                 self._used_user_type_ids.add(user_type_id)
-            elif not TypeId.is_namespaced_type(type_id):
+            elif not self.fory.xlang or not TypeId.is_namespaced_type(type_id):
                 if type_id not in self._type_id_to_type_info or not internal:
                     self._type_id_to_type_info[type_id] = typeinfo
         self._types_info[cls] = typeinfo
@@ -612,6 +628,9 @@ def register_serializer(self, cls: Union[type, TypeVar], serializer):
         if cls not in self._types_info:
             raise TypeUnregisteredError(f"{cls} not registered")
         typeinfo = self._types_info[cls]
+        if not self.fory.xlang:
+            typeinfo.serializer = serializer
+            return
         prev_type_id = typeinfo.type_id
         prev_user_type_id = typeinfo.user_type_id
         if needs_user_type_id(prev_type_id) and prev_user_type_id not in {None, NO_USER_TYPE_ID}:
@@ -682,7 +701,7 @@ def _set_type_info(self, typeinfo):
             # Set a stub serializer FIRST to break recursion for self-referencing types.
             # get_type_info() only calls _set_type_info when serializer is None,
             # so setting stub first prevents re-entry for circular type references.
-            typeinfo.serializer = DataClassStubSerializer(self.fory, typeinfo.cls)
+            typeinfo.serializer = DataClassStubSerializer(self.fory, typeinfo.cls, xlang=self.fory.xlang)
 
             if self.meta_share:
                 type_def = encode_typedef(self, typeinfo.cls)
@@ -690,9 +709,9 @@ def _set_type_info(self, typeinfo):
                     typeinfo.serializer = type_def.create_serializer(self)
                     typeinfo.type_def = type_def
                 else:
-                    typeinfo.serializer = DataClassSerializer(self.fory, typeinfo.cls)
+                    typeinfo.serializer = DataClassSerializer(self.fory, typeinfo.cls, xlang=self.fory.xlang)
             else:
-                typeinfo.serializer = DataClassSerializer(self.fory, typeinfo.cls)
+                typeinfo.serializer = DataClassSerializer(self.fory, typeinfo.cls, xlang=self.fory.xlang)
         else:
             typeinfo.serializer = self._create_serializer(typeinfo.cls)
 
@@ -729,7 +748,7 @@ def _create_serializer(self, cls):
                 # lazy create serializer to handle nested struct fields.
                 from pyfory.struct import DataClassStubSerializer
 
-                serializer = DataClassStubSerializer(self.fory, cls)
+                serializer = DataClassStubSerializer(self.fory, cls, xlang=self.fory.xlang)
             elif issubclass(cls, enum.Enum):
                 serializer = EnumSerializer(self.fory, cls)
             elif ("builtin_function_or_method" in str(cls) or "cython_function_or_method" in str(cls)) and "<locals>" not in str(cls):
diff --git a/python/pyfory/serialization.pyx b/python/pyfory/serialization.pyx
index f400f97765..61b89bc9a9 100644
--- a/python/pyfory/serialization.pyx
+++ b/python/pyfory/serialization.pyx
@@ -626,7 +626,7 @@ cdef class TypeResolver:
         else:
             if type_id >= self._c_registered_id_to_type_info.size():
                 self._c_registered_id_to_type_info.resize(type_id * 2, NULL)
-            if type_id > 0 and not is_namespaced_type(<TypeId>type_id):
+            if type_id > 0 and (not self.fory.xlang or not is_namespaced_type(<TypeId>type_id)):
                 self._c_registered_id_to_type_info[type_id] = <PyObject *> typeinfo
         self._c_types_info[<uintptr_t> <PyObject *> typeinfo.cls] = <PyObject *> typeinfo
         # Resize if load factor >= 0.4 (using integer arithmetic: size/capacity >= 4/10)
@@ -1083,8 +1083,9 @@ cdef class Fory:
                 it controls which types can be deserialized, overriding the default policy.
                 **Strongly recommended** when strict=False to maintain security controls.
 
-            field_nullable: Treat all dataclass fields as nullable regardless of
-                Optional annotation.
+            field_nullable: Treat all dataclass fields as nullable in Python-native mode
+                (xlang=False), regardless of Optional annotation. Ignored in cross-language
+                mode.
 
         Example:
             >>> # Python-native mode with reference tracking
@@ -1102,7 +1103,7 @@ cdef class Fory:
         self.compatible = compatible
         self.track_ref = ref
         self.ref_resolver = MapRefResolver(ref)
-        self.field_nullable = field_nullable
+        self.field_nullable = field_nullable if not self.xlang else False
         self.metastring_resolver = MetaStringResolver()
         self.type_resolver = TypeResolver(self, meta_share=compatible, meta_compressor=meta_compressor)
         self.serialization_context = SerializationContext(fory=self, scoped_meta_share_enabled=compatible)
@@ -1298,7 +1299,7 @@ cdef class Fory:
 
     cpdef inline _serialize(
             self, obj, Buffer buffer, buffer_callback=None, unsupported_callback=None):
-        assert self.depth == 0, "Nested serialization should use write_ref/write_no_ref."
+        assert self.depth == 0, "Nested serialization should use write_ref/write_no_ref/xwrite_ref/xwrite_no_ref."
         self.depth += 1
         self.buffer_callback = buffer_callback
         self._unsupported_callback = unsupported_callback
@@ -1314,13 +1315,21 @@ cdef class Fory:
         else:
             clear_bit(buffer, mask_index, 0)
 
-        # Unified protocol always writes xlang-compatible payload framing.
-        set_bit(buffer, mask_index, 1)
+        if self.xlang:
+            # set reader as x_lang.
+            set_bit(buffer, mask_index, 1)
+        else:
+            # set reader as native.
+            clear_bit(buffer, mask_index, 1)
         if self.buffer_callback is not None:
             set_bit(buffer, mask_index, 2)
         else:
             clear_bit(buffer, mask_index, 2)
-        self.write_ref(buffer, obj)
+        cdef int32_t start_offset
+        if not self.xlang:
+            self.write_ref(buffer, obj)
+        else:
+            self.xwrite_ref(buffer, obj)
 
         if buffer is not self.buffer:
             return buffer
@@ -1328,29 +1337,32 @@ cdef class Fory:
             return buffer.to_bytes(0, buffer.get_writer_index())
 
     cpdef inline write_ref(
-            self, Buffer buffer, obj, TypeInfo typeinfo=None, Serializer serializer=None):
-        if serializer is None and typeinfo is not None:
-            serializer = typeinfo.serializer
-        if serializer is None or serializer.need_to_write_ref:
-            if self.ref_resolver.write_ref_or_null(buffer, obj):
-                return
-            self.write_no_ref(buffer, obj, serializer=serializer, typeinfo=typeinfo)
-        else:
-            if obj is None:
-                buffer.write_int8(NULL_FLAG)
-            else:
-                buffer.write_int8(NOT_NULL_VALUE_FLAG)
-                self.write_no_ref(buffer, obj, serializer=serializer, typeinfo=typeinfo)
-
-    cpdef inline write_no_ref(
-            self,
-            Buffer buffer,
-            obj,
-            Serializer serializer=None,
-            TypeInfo typeinfo=None):
-        if serializer is not None:
-            serializer.write(buffer, obj)
+            self, Buffer buffer, obj, TypeInfo typeinfo=None):
+        cls = type(obj)
+        if cls is str:
+            buffer.write_int16(NOT_NULL_STRING_FLAG)
+            buffer.write_string(obj)
+            return
+        elif cls is int:
+            buffer.write_int16(NOT_NULL_INT64_FLAG)
+            buffer.write_varint64(obj)
+            return
+        elif cls is bool:
+            buffer.write_int16(NOT_NULL_BOOL_FLAG)
+            buffer.write_bool(obj)
+            return
+        elif cls is float:
+            buffer.write_int16(NOT_NULL_FLOAT64_FLAG)
+            buffer.write_double(obj)
+            return
+        if self.ref_resolver.write_ref_or_null(buffer, obj):
             return
+        if typeinfo is None:
+            typeinfo = self.type_resolver.get_type_info(cls)
+        self.type_resolver.write_type_info(buffer, typeinfo)
+        typeinfo.serializer.write(buffer, obj)
+
+    cpdef inline write_no_ref(self, Buffer buffer, obj):
         cls = type(obj)
         if cls is str:
             buffer.write_var_uint32(STRING_TYPE_ID)
@@ -1368,11 +1380,34 @@ cdef class Fory:
             buffer.write_var_uint32(FLOAT64_TYPE_ID)
             buffer.write_double(obj)
             return
-        if typeinfo is None:
-            typeinfo = self.type_resolver.get_type_info(cls)
+        cdef TypeInfo typeinfo = self.type_resolver.get_type_info(cls)
         self.type_resolver.write_type_info(buffer, typeinfo)
         typeinfo.serializer.write(buffer, obj)
 
+    cpdef inline xwrite_ref(
+            self, Buffer buffer, obj, Serializer serializer=None):
+        if serializer is None or serializer.need_to_write_ref:
+            if not self.ref_resolver.write_ref_or_null(buffer, obj):
+                self.xwrite_no_ref(
+                    buffer, obj, serializer=serializer
+                )
+        else:
+            if obj is None:
+                buffer.write_int8(NULL_FLAG)
+            else:
+                buffer.write_int8(NOT_NULL_VALUE_FLAG)
+                self.xwrite_no_ref(
+                    buffer, obj, serializer=serializer
+                )
+
+    cpdef inline xwrite_no_ref(
+            self, Buffer buffer, obj, Serializer serializer=None):
+        if serializer is None:
+            typeinfo = self.type_resolver.get_type_info(type(obj))
+            self.type_resolver.write_type_info(buffer, typeinfo)
+            serializer = typeinfo.serializer
+        serializer.xwrite(buffer, obj)
+
     def deserialize(
             self,
             buffer: Union[Buffer, bytes],
@@ -1410,7 +1445,7 @@ cdef class Fory:
 
     cpdef inline _deserialize(
             self, Buffer buffer, buffers=None, unsupported_objects=None):
-        assert self.depth == 0, "Nested deserialization should use read_ref/read_no_ref."
+        assert self.depth == 0, "Nested deserialization should use read_ref/read_no_ref/xread_ref/xread_no_ref."
         self.depth += 1
         if unsupported_objects is not None:
             self._unsupported_objects = iter(unsupported_objects)
@@ -1418,6 +1453,7 @@ cdef class Fory:
         buffer.set_reader_index(reader_index + 1)
         if get_bit(buffer, reader_index, 0):
             return None
+        cdef c_bool is_target_x_lang = get_bit(buffer, reader_index, 1)
         self.is_peer_out_of_band_enabled = get_bit(buffer, reader_index, 2)
         if self.is_peer_out_of_band_enabled:
             assert buffers is not None, (
@@ -1431,50 +1467,90 @@ cdef class Fory:
                 "produced with buffer_callback null."
             )
 
-        return self.read_ref(buffer)
+        if not is_target_x_lang:
+            obj = self.read_ref(buffer)
+        else:
+            obj = self.xread_ref(buffer)
+
+        return obj
 
-    cpdef inline read_ref(self, Buffer buffer, Serializer serializer=None):
-        cdef int8_t head_flag
+    cpdef inline read_ref(self, Buffer buffer):
+        cdef MapRefResolver ref_resolver = self.ref_resolver
+        cdef int32_t ref_id = ref_resolver.try_preserve_ref_id(buffer)
+        if ref_id < NOT_NULL_VALUE_FLAG:
+            return ref_resolver.get_read_object()
+        # indicates that the object is first read.
+        cdef TypeInfo typeinfo = self.type_resolver.read_type_info(buffer)
+        cls = typeinfo.cls
+        if cls is str:
+            return buffer.read_string()
+        elif cls is int:
+            return buffer.read_varint64()
+        elif cls is bool:
+            return buffer.read_bool()
+        elif cls is float:
+            return buffer.read_double()
+        self.inc_depth()
+        o = typeinfo.serializer.read(buffer)
+        self.depth -= 1
+        ref_resolver.set_read_object(ref_id, o)
+        return o
+
+    cpdef inline read_no_ref(self, Buffer buffer):
+        """Deserialize not-null and non-reference object from buffer."""
+        cdef TypeInfo typeinfo = self.type_resolver.read_type_info(buffer)
+        cls = typeinfo.cls
+        if cls is str:
+            return buffer.read_string()
+        elif cls is int:
+            return buffer.read_varint64()
+        elif cls is bool:
+            return buffer.read_bool()
+        elif cls is float:
+            return buffer.read_double()
+        self.inc_depth()
+        o = typeinfo.serializer.read(buffer)
+        self.depth -= 1
+        return o
+
+    cpdef inline xread_ref(self, Buffer buffer, Serializer serializer=None):
         cdef MapRefResolver ref_resolver
         cdef int32_t ref_id
         if serializer is None or serializer.need_to_write_ref:
             ref_resolver = self.ref_resolver
             ref_id = ref_resolver.try_preserve_ref_id(buffer)
+            # indicates that the object is first read.
             if ref_id >= NOT_NULL_VALUE_FLAG:
-                o = self._read_no_ref_internal(buffer, serializer)
+                # Don't push -1 here - try_preserve_ref_id already pushed ref_id
+                o = self._xread_no_ref_internal(buffer, serializer)
                 ref_resolver.set_read_object(ref_id, o)
                 return o
-            return ref_resolver.get_read_object()
-        head_flag = buffer.read_int8()
+            else:
+                return ref_resolver.get_read_object()
+        cdef int8_t head_flag = buffer.read_int8()
         if head_flag == NULL_FLAG:
             return None
-        return self.read_no_ref(buffer, serializer=serializer)
+        return self.xread_no_ref(
+            buffer, serializer=serializer
+        )
 
-    cpdef inline read_no_ref(self, Buffer buffer, Serializer serializer=None):
-        """Deserialize not-null and non-reference object from buffer."""
+    cpdef inline xread_no_ref(
+            self, Buffer buffer, Serializer serializer=None):
+        if serializer is None:
+            serializer = self.type_resolver.read_type_info(buffer).serializer
+        # Push -1 to read_ref_ids so reference() can pop it and skip reference tracking
+        # This handles the case where xread_no_ref is called directly without xread_ref
         if self.ref_resolver.track_ref:
-            # Push -1 so reference() can pop and skip tracking when read_no_ref is called directly.
             self.ref_resolver.read_ref_ids.push_back(-1)
-        return self._read_no_ref_internal(buffer, serializer)
+        return self._xread_no_ref_internal(buffer, serializer)
 
-    cdef inline _read_no_ref_internal(
+    cdef inline _xread_no_ref_internal(
             self, Buffer buffer, Serializer serializer):
-        cdef TypeInfo typeinfo
-        cdef cls
+        """Internal method to read without pushing to read_ref_ids."""
         if serializer is None:
-            typeinfo = self.type_resolver.read_type_info(buffer)
-            cls = typeinfo.cls
-            if cls is str:
-                return buffer.read_string()
-            elif cls is int:
-                return buffer.read_varint64()
-            elif cls is bool:
-                return buffer.read_bool()
-            elif cls is float:
-                return buffer.read_double()
-            serializer = typeinfo.serializer
+            serializer = self.type_resolver.read_type_info(buffer).serializer
         self.inc_depth()
-        o = serializer.read(buffer)
+        o = serializer.xread(buffer)
         self.depth -= 1
         return o
 
@@ -1719,16 +1795,15 @@ cpdef inline write_nullable_bfloat16(Buffer buffer, value):
         buffer.write_int8(NULL_FLAG)
     else:
         buffer.write_int8(NOT_NULL_VALUE_FLAG)
-        from pyfory.bfloat16 import BFloat16
-        if isinstance(value, BFloat16):
+        from pyfory.bfloat16 import bfloat16
+        if isinstance(value, bfloat16):
             buffer.write_bfloat16(value.to_bits())
         else:
-            buffer.write_bfloat16(BFloat16(value).to_bits())
+            buffer.write_bfloat16(bfloat16(value).to_bits())
 
 cpdef inline read_nullable_bfloat16(Buffer buffer):
     if buffer.read_int8() == NOT_NULL_VALUE_FLAG:
-        from pyfory.bfloat16 import BFloat16
-        return BFloat16.from_bits(buffer.read_bfloat16())
+        return buffer.read_bfloat16()
     else:
         return None
 
@@ -1738,7 +1813,13 @@ cdef class Serializer:
     Base class for type-specific serializers.
 
     Serializer defines the interface for serializing and deserializing objects of a
-    specific type.
+    specific type. Each serializer implements two modes:
+
+    - Python-native mode (write/read): Optimized for Python-to-Python serialization,
+      supporting all Python-specific features like __reduce__, local functions, etc.
+
+    - Cross-language mode (xwrite/xread): Serializes to a cross-language format
+      compatible with other Fory implementations (Java, Go, Rust, C++, etc).
 
     Custom serializers can be registered for user-defined types using
     Fory.register_serializer() to override default serialization behavior.
@@ -1750,7 +1831,7 @@ cdef class Serializer:
 
     Note:
         This is a base class for implementing custom serializers. Subclasses must
-        implement write() and read() methods.
+        implement write(), read(), xwrite(), and xread() methods.
     """
     cdef readonly Fory fory
     cdef readonly object type_
@@ -1767,43 +1848,35 @@ cdef class Serializer:
     cpdef read(self, Buffer buffer):
         raise NotImplementedError(f"read method not implemented in {type(self)}")
 
-    @classmethod
-    def support_subclass(cls) -> bool:
-        return False
-
-
-cdef class XlangCompatibleSerializer(Serializer):
-    def __init__(self, fory, type_):
-        super().__init__(fory, type_)
-
     cpdef xwrite(self, Buffer buffer, value):
         self.write(buffer, value)
 
     cpdef xread(self, Buffer buffer):
         return self.read(buffer)
 
+    @classmethod
+    def support_subclass(cls) -> bool:
+        return False
 
 @cython.final
 cdef class EnumSerializer(Serializer):
-    cdef tuple _members
-    cdef dict _ordinal_by_member
-
-    def __init__(self, fory, type_):
-        super().__init__(fory, type_)
-        self.need_to_write_ref = False
-        self._members = tuple(type_)
-        self._ordinal_by_member = {member: idx for idx, member in enumerate(self._members)}
-
     @classmethod
     def support_subclass(cls) -> bool:
         return True
 
     cpdef inline write(self, Buffer buffer, value):
-        buffer.write_var_uint32(self._ordinal_by_member[value])
+        buffer.write_string(value.name)
 
     cpdef inline read(self, Buffer buffer):
+        name = buffer.read_string()
+        return getattr(self.type_, name)
+
+    cpdef inline xwrite(self, Buffer buffer, value):
+        buffer.write_var_uint32(value.value)
+
+    cpdef inline xread(self, Buffer buffer):
         ordinal = buffer.read_var_uint32()
-        return self._members[ordinal]
+        return self.type_(ordinal)
 
 
 @cython.final
@@ -1857,5 +1930,12 @@ cdef class SliceSerializer(Serializer):
             step = self.fory.read_no_ref(buffer)
         return slice(start, stop, step)
 
+    cpdef xwrite(self, Buffer buffer, value):
+        raise NotImplementedError
+
+    cpdef xread(self, Buffer buffer):
+        raise NotImplementedError
+
+
 include "primitive.pxi"
 include "collection.pxi"
diff --git a/python/pyfory/serializer.py b/python/pyfory/serializer.py
index 991b71a0b0..902d2dfd05 100644
--- a/python/pyfory/serializer.py
+++ b/python/pyfory/serializer.py
@@ -45,7 +45,6 @@
 if ENABLE_FORY_CYTHON_SERIALIZATION:
     from pyfory.serialization import (  # noqa: F401, F811
         Serializer,
-        XlangCompatibleSerializer,
         BooleanSerializer,
         ByteSerializer,
         Int16Serializer,
@@ -82,7 +81,6 @@
 else:
     from pyfory._serializer import (  # noqa: F401 # pylint: disable=unused-import
         Serializer,
-        XlangCompatibleSerializer,
         BooleanSerializer,
         ByteSerializer,
         Int16Serializer,
@@ -151,6 +149,12 @@ def __init__(self, fory):
         super().__init__(fory, None)
         self.need_to_write_ref = False
 
+    def xwrite(self, buffer, value):
+        raise NotImplementedError
+
+    def xread(self, buffer):
+        raise NotImplementedError
+
     def write(self, buffer, value):
         pass
 
@@ -218,6 +222,12 @@ def read(self, buffer):
         name = self.fory.read_ref(buffer)
         return self.type_(start, stop, step, dtype=dtype, name=name)
 
+    def xwrite(self, buffer, value):
+        raise NotImplementedError
+
+    def xread(self, buffer):
+        raise NotImplementedError
+
 
 # Use numpy array or python array module.
 typecode_dict = (
@@ -314,7 +324,7 @@ def __init__(self, fory, ftype, type_id: str):
         self.typecode = typeid_code[type_id]
         self.itemsize, ftype, self.type_id = typecode_dict[self.typecode]
 
-    def write(self, buffer, value):
+    def xwrite(self, buffer, value):
         assert value.itemsize == self.itemsize
         view = memoryview(value)
         assert view.format == self.typecode
@@ -330,7 +340,7 @@ def write(self, buffer, value):
             swapped.byteswap()
             buffer.write_buffer(swapped)
 
-    def read(self, buffer):
+    def xread(self, buffer):
         data = buffer.read_bytes_and_size()
         arr = array.array(self.typecode, [])
         arr.frombytes(data)
@@ -339,14 +349,37 @@ def read(self, buffer):
             arr.byteswap()
         return arr
 
+    def write(self, buffer, value: array.array):
+        nbytes = len(value) * value.itemsize
+        buffer.write_string(value.typecode)
+        buffer.write_var_uint32(nbytes)
+        if is_little_endian or value.itemsize == 1:
+            buffer.write_buffer(value)
+        else:
+            # Swap bytes on big-endian machines for multi-byte types
+            swapped = array.array(value.typecode, value)
+            swapped.byteswap()
+            buffer.write_buffer(swapped)
+
+    def read(self, buffer):
+        typecode = buffer.read_string()
+        data = buffer.read_bytes_and_size()
+        arr = array.array(typecode[0], [])  # Take first character
+        arr.frombytes(data)
+        if not is_little_endian and arr.itemsize > 1:
+            # Swap bytes on big-endian machines for multi-byte types
+            arr.byteswap()
+        return arr
+
 
 class DynamicPyArraySerializer(Serializer):
     """Serializer for dynamic Python arrays that handles any typecode."""
 
     def __init__(self, fory, cls):
         super().__init__(fory, cls)
+        self._serializer = ReduceSerializer(fory, cls)
 
-    def write(self, buffer, value):
+    def xwrite(self, buffer, value):
         itemsize, ftype, type_id = typecode_dict[value.typecode]
         view = memoryview(value)
         nbytes = len(value) * itemsize
@@ -368,7 +401,7 @@ def write(self, buffer, value):
             swapped.byteswap()
             buffer.write_buffer(swapped)
 
-    def read(self, buffer):
+    def xread(self, buffer):
         type_id = buffer.read_uint8()
         typecode = typeid_code[type_id]
         itemsize = typecode_dict[typecode][0]
@@ -379,14 +412,20 @@ def read(self, buffer):
             arr.byteswap()
         return arr
 
+    def write(self, buffer, value):
+        self._serializer.write(buffer, value)
+
+    def read(self, buffer):
+        return self._serializer.read(buffer)
 
-class BFloat16ArraySerializer(XlangCompatibleSerializer):
+
+class BFloat16ArraySerializer(Serializer):
     def __init__(self, fory, ftype, type_id: int):
         super().__init__(fory, ftype)
         self.type_id = type_id
         self.itemsize = 2
 
-    def xwrite(self, buffer, value):
+    def write(self, buffer, value):
         from pyfory.bfloat16_array import BFloat16Array
 
         if isinstance(value, BFloat16Array):
@@ -405,7 +444,7 @@ def xwrite(self, buffer, value):
                 swapped.byteswap()
                 buffer.write_buffer(swapped)
 
-    def xread(self, buffer):
+    def read(self, buffer):
         from pyfory.bfloat16_array import BFloat16Array
 
         data = buffer.read_bytes_and_size()
@@ -417,13 +456,6 @@ def xread(self, buffer):
         bf16_arr._data = arr
         return bf16_arr
 
-    def write(self, buffer, value):
-        self.xwrite(buffer, value)
-
-    def read(self, buffer):
-        return self.xread(buffer)
-
-
 if np:
     _np_dtypes_dict = (
         {
@@ -456,7 +488,6 @@ def read(self, buffer):
     )
 else:
     _np_dtypes_dict = {}
-_np_typeid_to_dtype = {type_id: dtype for dtype, (_, _, _, type_id) in _np_dtypes_dict.items()}
 
 
 class Numpy1DArraySerializer(Serializer):
@@ -466,8 +497,9 @@ def __init__(self, fory, ftype, dtype):
         super().__init__(fory, ftype)
         self.dtype = dtype
         self.itemsize, self.typecode, _, self.type_id = _np_dtypes_dict[self.dtype]
+        self._serializer = ReduceSerializer(fory, np.ndarray)
 
-    def write(self, buffer, value):
+    def xwrite(self, buffer, value):
         assert value.itemsize == self.itemsize
         view = memoryview(value)
         try:
@@ -489,7 +521,7 @@ def write(self, buffer, value):
             # Swap bytes on big-endian machines for multi-byte types
             buffer.write_bytes(value.astype(value.dtype.newbyteorder("<")).tobytes())
 
-    def read(self, buffer):
+    def xread(self, buffer):
         data = buffer.read_bytes_and_size()
         arr = np.frombuffer(data, dtype=self.dtype.newbyteorder("<"))
         if self.itemsize > 1:
@@ -501,53 +533,32 @@ def read(self, buffer):
                 arr = arr.astype(self.dtype)
         return arr
 
+    def write(self, buffer, value):
+        self._serializer.write(buffer, value)
+
+    def read(self, buffer):
+        return self._serializer.read(buffer)
+
 
 class NDArraySerializer(Serializer):
-    def write(self, buffer, value):
-        # Write concrete 1D primitive ndarray using type id + bytes payload.
-        dtype_info = _np_dtypes_dict.get(value.dtype)
-        if dtype_info is None or value.ndim != 1:
-            raise NotImplementedError(f"Unsupported ndarray: dtype={value.dtype}, ndim={value.ndim}")
-        itemsize, _typecode, _ftype, type_id = dtype_info
+    def xwrite(self, buffer, value):
+        itemsize, typecode, ftype, type_id = _np_dtypes_dict[value.dtype]
         view = memoryview(value)
         nbytes = len(value) * itemsize
         buffer.write_uint8(type_id)
         buffer.write_var_uint32(nbytes)
         if value.dtype == np.dtype("bool") or not view.c_contiguous:
-            if not is_little_endian and itemsize > 1:
-                buffer.write_bytes(value.astype(value.dtype.newbyteorder("<")).tobytes())
-            else:
-                buffer.write_bytes(value.tobytes())
-        elif is_little_endian or itemsize == 1:
-            buffer.write_buffer(value)
+            buffer.write_bytes(value.tobytes())
         else:
-            buffer.write_bytes(value.astype(value.dtype.newbyteorder("<")).tobytes())
-
-    def read(self, buffer):
-        type_id = buffer.read_uint8()
-        dtype = _np_typeid_to_dtype.get(type_id)
-        if dtype is None:
-            raise NotImplementedError(f"Unsupported ndarray type id: {type_id}")
-        data = buffer.read_bytes_and_size()
-        arr = np.frombuffer(data, dtype=dtype.newbyteorder("<"))
-        if dtype.itemsize > 1:
-            if is_little_endian:
-                arr = arr.view(dtype)
-            else:
-                arr = arr.astype(dtype)
-        return arr
+            buffer.write_buffer(value)
 
+    def xread(self, buffer):
+        raise NotImplementedError("Multi-dimensional array not supported currently")
 
-class PythonNDArraySerializer(NDArraySerializer):
     def write(self, buffer, value):
-        dtype_info = _np_dtypes_dict.get(value.dtype)
-        if dtype_info is not None and value.ndim == 1:
-            super().write(buffer, value)
-            return
-
         fory = self.fory
         dtype = value.dtype
-        buffer.write_string(dtype.str)
+        fory.write_ref(buffer, dtype)
         buffer.write_var_uint32(len(value.shape))
         for dim in value.shape:
             buffer.write_var_uint32(dim)
@@ -559,22 +570,8 @@ def write(self, buffer, value):
             fory.write_buffer_object(buffer, NDArrayBufferObject(value))
 
     def read(self, buffer):
-        reader_index = buffer.get_reader_index()
-        type_id = buffer.read_uint8()
-        dtype = _np_typeid_to_dtype.get(type_id)
-        if dtype is not None:
-            data = buffer.read_bytes_and_size()
-            arr = np.frombuffer(data, dtype=dtype.newbyteorder("<"))
-            if dtype.itemsize > 1:
-                if is_little_endian:
-                    arr = arr.view(dtype)
-                else:
-                    arr = arr.astype(dtype)
-            return arr
-
-        buffer.set_reader_index(reader_index)
         fory = self.fory
-        dtype = np.dtype(buffer.read_string())
+        dtype = fory.read_ref(buffer)
         ndim = buffer.read_var_uint32()
         shape = tuple(buffer.read_var_uint32() for _ in range(ndim))
         if dtype.kind == "O":
@@ -1249,6 +1246,12 @@ def _deserialize_function(self, buffer):
             func = result
         return func
 
+    def xwrite(self, buffer, value):
+        raise NotImplementedError()
+
+    def xread(self, buffer):
+        raise NotImplementedError()
+
     def write(self, buffer, value):
         """Serialize a function for Python-only mode."""
         self._serialize_function(buffer, value)
@@ -1313,6 +1316,12 @@ def read(self, buffer):
             method = result
         return method
 
+    def xwrite(self, buffer, value):
+        return self.write(buffer, value)
+
+    def xread(self, buffer):
+        return self.read(buffer)
+
 
 class ObjectSerializer(Serializer):
     """Serializer for regular Python objects.
@@ -1355,6 +1364,14 @@ def read(self, buffer):
             setattr(obj, field_name, field_value)
         return obj
 
+    def xwrite(self, buffer, value):
+        # for cross-language or minimal framing, reuse the same logic
+        return self.write(buffer, value)
+
+    def xread(self, buffer):
+        # symmetric to xwrite
+        return self.read(buffer)
+
 
 @dataclasses.dataclass
 class NonExistEnum:
@@ -1372,9 +1389,16 @@ def support_subclass(cls) -> bool:
         return True
 
     def write(self, buffer, value):
-        buffer.write_var_uint32(value.value)
+        buffer.write_string(value.name)
 
     def read(self, buffer):
+        name = buffer.read_string()
+        return NonExistEnum(name=name)
+
+    def xwrite(self, buffer, value):
+        buffer.write_var_uint32(value.value)
+
+    def xread(self, buffer):
         value = buffer.read_var_uint32()
         return NonExistEnum(value=value)
 
@@ -1386,6 +1410,12 @@ def write(self, buffer, value):
     def read(self, buffer):
         return self.fory.handle_unsupported_read(buffer)
 
+    def xwrite(self, buffer, value):
+        raise NotImplementedError(f"{self.type_} is not supported for xwrite")
+
+    def xread(self, buffer):
+        raise NotImplementedError(f"{self.type_} is not supported for xread")
+
 
 __all__ = [
     # Base serializers (imported)
diff --git a/python/pyfory/tests/test_bfloat16.py b/python/pyfory/tests/test_bfloat16.py
index b788690aa3..5f7c543bbf 100644
--- a/python/pyfory/tests/test_bfloat16.py
+++ b/python/pyfory/tests/test_bfloat16.py
@@ -19,7 +19,7 @@
 import pytest
 
 from pyfory import Fory
-from pyfory.bfloat16 import BFloat16
+from pyfory.bfloat16 import bfloat16
 from pyfory.bfloat16_array import BFloat16Array
 from pyfory.types import TypeId
 
@@ -30,47 +30,47 @@ def ser_de(fory, value):
 
 
 def test_bfloat16_basic():
-    bf16 = BFloat16(3.14)
-    assert isinstance(bf16, BFloat16)
+    bf16 = bfloat16(3.14)
+    assert isinstance(bf16, bfloat16)
     assert bf16.to_float32() == pytest.approx(3.14, abs=0.01)
     bits = bf16.to_bits()
-    assert BFloat16.from_bits(bits).to_bits() == bits
+    assert bfloat16.from_bits(bits).to_bits() == bits
 
 
 def test_bfloat16_special_values():
-    assert BFloat16(float("nan")).is_nan()
-    assert BFloat16(float("inf")).is_inf()
-    assert BFloat16(float("-inf")).is_inf()
-    assert BFloat16(0.0).is_zero()
-    assert BFloat16(1.0).is_finite()
-    assert not BFloat16(1.0).is_nan()
-    assert not BFloat16(1.0).is_inf()
+    assert bfloat16(float("nan")).is_nan()
+    assert bfloat16(float("inf")).is_inf()
+    assert bfloat16(float("-inf")).is_inf()
+    assert bfloat16(0.0).is_zero()
+    assert bfloat16(1.0).is_finite()
+    assert not bfloat16(1.0).is_nan()
+    assert not bfloat16(1.0).is_inf()
 
 
 def test_bfloat16_conversion():
-    assert BFloat16(0.0).to_float32() == 0.0
-    assert BFloat16(1.0).to_float32() == 1.0
-    assert BFloat16(-1.0).to_float32() == -1.0
-    assert BFloat16(3.14).to_float32() == pytest.approx(3.14, abs=0.01)
-    assert math.isnan(BFloat16(float("nan")).to_float32())
-    assert math.isinf(BFloat16(float("inf")).to_float32())
-    assert math.isinf(BFloat16(float("-inf")).to_float32())
+    assert bfloat16(0.0).to_float32() == 0.0
+    assert bfloat16(1.0).to_float32() == 1.0
+    assert bfloat16(-1.0).to_float32() == -1.0
+    assert bfloat16(3.14).to_float32() == pytest.approx(3.14, abs=0.01)
+    assert math.isnan(bfloat16(float("nan")).to_float32())
+    assert math.isinf(bfloat16(float("inf")).to_float32())
+    assert math.isinf(bfloat16(float("-inf")).to_float32())
 
 
 def test_bfloat16_serialization():
     fory = Fory(xlang=True)
-    assert ser_de(fory, BFloat16(0.0)).to_bits() == BFloat16(0.0).to_bits()
-    assert ser_de(fory, BFloat16(1.0)).to_bits() == BFloat16(1.0).to_bits()
-    assert ser_de(fory, BFloat16(3.14)).to_bits() == BFloat16(3.14).to_bits()
-    assert ser_de(fory, BFloat16(float("inf"))).is_inf()
-    assert ser_de(fory, BFloat16(float("nan"))).is_nan()
+    assert ser_de(fory, bfloat16(0.0)).to_bits() == bfloat16(0.0).to_bits()
+    assert ser_de(fory, bfloat16(1.0)).to_bits() == bfloat16(1.0).to_bits()
+    assert ser_de(fory, bfloat16(3.14)).to_bits() == bfloat16(3.14).to_bits()
+    assert ser_de(fory, bfloat16(float("inf"))).is_inf()
+    assert ser_de(fory, bfloat16(float("nan"))).is_nan()
 
 
 def test_bfloat16_array_basic():
     arr = BFloat16Array([1.0, 2.0, 3.14])
     assert len(arr) == 3
     assert arr[0].to_float32() == pytest.approx(1.0)
-    arr[0] = BFloat16(5.0)
+    arr[0] = bfloat16(5.0)
     assert arr[0].to_float32() == pytest.approx(5.0)
 
 
@@ -88,12 +88,12 @@ def test_bfloat16_in_dataclass():
 
     @dataclass
     class TestStruct:
-        value: BFloat16
+        value: bfloat16
         arr: BFloat16Array
 
     fory = Fory(xlang=True)
     fory.register_type(TestStruct)
-    obj = TestStruct(value=BFloat16(3.14), arr=BFloat16Array([1.0, 2.0]))
+    obj = TestStruct(value=bfloat16(3.14), arr=BFloat16Array([1.0, 2.0]))
     result = ser_de(fory, obj)
     assert result.value.to_float32() == pytest.approx(3.14, abs=0.01)
     assert len(result.arr) == 2
@@ -101,7 +101,7 @@ class TestStruct:
 
 def test_bfloat16_in_list():
     fory = Fory(xlang=True)
-    values = [BFloat16(1.0), BFloat16(2.0)]
+    values = [bfloat16(1.0), bfloat16(2.0)]
     result = ser_de(fory, values)
     assert len(result) == 2
     assert result[0].to_float32() == pytest.approx(1.0)
@@ -109,14 +109,14 @@ def test_bfloat16_in_list():
 
 def test_bfloat16_in_map():
     fory = Fory(xlang=True)
-    data = {"a": BFloat16(1.0), "b": BFloat16(2.0)}
+    data = {"a": bfloat16(1.0), "b": bfloat16(2.0)}
     result = ser_de(fory, data)
     assert result["a"].to_float32() == pytest.approx(1.0)
 
 
 def test_bfloat16_type_registration():
     fory = Fory(xlang=True)
-    type_info = fory.type_resolver.get_type_info(BFloat16)
+    type_info = fory.type_resolver.get_type_info(bfloat16)
     assert type_info.type_id == TypeId.BFLOAT16
 
 

From c9e2e1205ca91d6b98c5fcff9e0ab9d81ad499db Mon Sep 17 00:00:00 2001
From: asadjan4611 <asadjan4611@gmail.com>
Date: Sat, 21 Feb 2026 20:12:36 +0500
Subject: [PATCH 14/23] fix(python): resolve bfloat16 cython build and
 serializer API drift

---
 BUILD                           |  3 +++
 python/pyfory/__init__.py       |  2 +-
 python/pyfory/bfloat16.pxd      |  5 +++++
 python/pyfory/bfloat16.pyx      |  4 ++--
 python/pyfory/buffer.pyx        |  5 +----
 python/pyfory/serialization.pyx | 39 +++++++++++++++++++--------------
 6 files changed, 35 insertions(+), 23 deletions(-)

diff --git a/BUILD b/BUILD
index 9d8433f75f..a532538830 100644
--- a/BUILD
+++ b/BUILD
@@ -23,6 +23,7 @@ pyx_library(
     name = "buffer",
     srcs = glob([
         "python/pyfory/includes/*.pxd",
+        "python/pyfory/bfloat16.pxd",
         "python/pyfory/buffer.pxd",
         "python/pyfory/buffer.pyx",
         "python/pyfory/__init__.py",
@@ -54,6 +55,7 @@ pyx_library(
     name = "serialization",
     srcs = glob([
         "python/pyfory/includes/*.pxd",
+        "python/pyfory/bfloat16.pxd",
         "python/pyfory/buffer.pxd",
         "python/pyfory/serialization.pyx",
         "python/pyfory/*.pxi",
@@ -73,6 +75,7 @@ pyx_library(
 pyx_library(
     name = "bfloat16",
     srcs = glob([
+        "python/pyfory/bfloat16.pxd",
         "python/pyfory/bfloat16.pyx",
         "python/pyfory/__init__.py",
     ]),
diff --git a/python/pyfory/__init__.py b/python/pyfory/__init__.py
index bdac49f515..a2badc3d86 100644
--- a/python/pyfory/__init__.py
+++ b/python/pyfory/__init__.py
@@ -89,7 +89,7 @@
     tagged_uint64,
     float32,
     float64,
-    bfloat16,
+    bfloat16 as bfloat16_type,
     bfloat16_array,
     int8_array,
     uint8_array,
diff --git a/python/pyfory/bfloat16.pxd b/python/pyfory/bfloat16.pxd
index 8e6fb20aa8..b1e2d0ab51 100644
--- a/python/pyfory/bfloat16.pxd
+++ b/python/pyfory/bfloat16.pxd
@@ -20,3 +20,8 @@ from libc.stdint cimport uint16_t
 cdef class bfloat16:
     cdef uint16_t _bits
 
+    @staticmethod
+    cpdef bfloat16 from_bits(uint16_t bits)
+
+    cpdef uint16_t to_bits(self)
+
diff --git a/python/pyfory/bfloat16.pyx b/python/pyfory/bfloat16.pyx
index f111e59481..d019ba8b58 100644
--- a/python/pyfory/bfloat16.pyx
+++ b/python/pyfory/bfloat16.pyx
@@ -54,12 +54,12 @@ cdef class bfloat16:
             self._bits = float32_to_bfloat16_bits(<float>float(value))
     
     @staticmethod
-    def from_bits(uint16_t bits):
+    cpdef bfloat16 from_bits(uint16_t bits):
         cdef bfloat16 bf16 = bfloat16.__new__(bfloat16)
         bf16._bits = bits
         return bf16
     
-    def to_bits(self):
+    cpdef uint16_t to_bits(self):
         return self._bits
     
     def to_float32(self):
diff --git a/python/pyfory/buffer.pyx b/python/pyfory/buffer.pyx
index eee1dc8307..91a146eab3 100644
--- a/python/pyfory/buffer.pyx
+++ b/python/pyfory/buffer.pyx
@@ -250,11 +250,8 @@ cdef class Buffer:
 
     cpdef inline bfloat16 read_bfloat16(self):
         cdef uint16_t value = self.c_buffer.read_uint16(self._error)
-        cdef bfloat16 result
         self._raise_if_error()
-        result = bfloat16.__new__(bfloat16)
-        result._bits = value
-        return result
+        return bfloat16.from_bits(value)
 
     cpdef put_buffer(self, uint32_t offset, v, int32_t src_index, int32_t length):
         if length == 0:  # access an emtpy buffer may raise out-of-bound exception.
diff --git a/python/pyfory/serialization.pyx b/python/pyfory/serialization.pyx
index 61b89bc9a9..bf8e49f774 100644
--- a/python/pyfory/serialization.pyx
+++ b/python/pyfory/serialization.pyx
@@ -1362,7 +1362,7 @@ cdef class Fory:
         self.type_resolver.write_type_info(buffer, typeinfo)
         typeinfo.serializer.write(buffer, obj)
 
-    cpdef inline write_no_ref(self, Buffer buffer, obj):
+    cpdef inline write_no_ref(self, Buffer buffer, obj, Serializer serializer=None, TypeInfo typeinfo=None):
         cls = type(obj)
         if cls is str:
             buffer.write_var_uint32(STRING_TYPE_ID)
@@ -1380,9 +1380,12 @@ cdef class Fory:
             buffer.write_var_uint32(FLOAT64_TYPE_ID)
             buffer.write_double(obj)
             return
-        cdef TypeInfo typeinfo = self.type_resolver.get_type_info(cls)
-        self.type_resolver.write_type_info(buffer, typeinfo)
-        typeinfo.serializer.write(buffer, obj)
+        if serializer is None:
+            if typeinfo is None:
+                typeinfo = self.type_resolver.get_type_info(cls)
+            self.type_resolver.write_type_info(buffer, typeinfo)
+            serializer = typeinfo.serializer
+        serializer.write(buffer, obj)
 
     cpdef inline xwrite_ref(
             self, Buffer buffer, obj, Serializer serializer=None):
@@ -1496,20 +1499,24 @@ cdef class Fory:
         ref_resolver.set_read_object(ref_id, o)
         return o
 
-    cpdef inline read_no_ref(self, Buffer buffer):
+    cpdef inline read_no_ref(self, Buffer buffer, Serializer serializer=None):
         """Deserialize not-null and non-reference object from buffer."""
-        cdef TypeInfo typeinfo = self.type_resolver.read_type_info(buffer)
-        cls = typeinfo.cls
-        if cls is str:
-            return buffer.read_string()
-        elif cls is int:
-            return buffer.read_varint64()
-        elif cls is bool:
-            return buffer.read_bool()
-        elif cls is float:
-            return buffer.read_double()
+        cdef TypeInfo typeinfo
+        cdef object cls
+        if serializer is None:
+            typeinfo = self.type_resolver.read_type_info(buffer)
+            cls = typeinfo.cls
+            if cls is str:
+                return buffer.read_string()
+            elif cls is int:
+                return buffer.read_varint64()
+            elif cls is bool:
+                return buffer.read_bool()
+            elif cls is float:
+                return buffer.read_double()
+            serializer = typeinfo.serializer
         self.inc_depth()
-        o = typeinfo.serializer.read(buffer)
+        o = serializer.read(buffer)
         self.depth -= 1
         return o
 

From 56f45d3003bc0f0800bbb0a586c92053d3492732 Mon Sep 17 00:00:00 2001
From: asadjan4611 <asadjan4611@gmail.com>
Date: Sat, 21 Feb 2026 20:18:52 +0500
Subject: [PATCH 15/23] style(python): align bfloat16 files with ci formatter

---
 python/pyfory/__init__.py   | 1 +
 python/pyfory/serializer.py | 1 +
 2 files changed, 2 insertions(+)

diff --git a/python/pyfory/__init__.py b/python/pyfory/__init__.py
index a2badc3d86..c5ef84715b 100644
--- a/python/pyfory/__init__.py
+++ b/python/pyfory/__init__.py
@@ -124,6 +124,7 @@
 # BFloat16 support
 from pyfory.bfloat16 import bfloat16  # noqa: F401
 from pyfory.bfloat16_array import BFloat16Array  # noqa: F401
+
 # Keep compatibility with existing API naming.
 BFloat16 = bfloat16
 
diff --git a/python/pyfory/serializer.py b/python/pyfory/serializer.py
index 902d2dfd05..badc91dea9 100644
--- a/python/pyfory/serializer.py
+++ b/python/pyfory/serializer.py
@@ -456,6 +456,7 @@ def read(self, buffer):
         bf16_arr._data = arr
         return bf16_arr
 
+
 if np:
     _np_dtypes_dict = (
         {

From efb010017b5c4bda71defa17ff351636d08aa498 Mon Sep 17 00:00:00 2001
From: asadjan4611 <asadjan4611@gmail.com>
Date: Sat, 21 Feb 2026 20:30:37 +0500
Subject: [PATCH 16/23] fix(python): include bfloat16 pxd in format cython
 target

---
 BUILD | 1 +
 1 file changed, 1 insertion(+)

diff --git a/BUILD b/BUILD
index a532538830..743d1867f6 100644
--- a/BUILD
+++ b/BUILD
@@ -90,6 +90,7 @@ pyx_library(
         [
             "python/pyfory/__init__.py",
             "python/pyfory/includes/*.pxd",
+            "python/pyfory/bfloat16.pxd",
             "python/pyfory/buffer.pxd",
             "python/pyfory/*.pxi",
             "python/pyfory/format/_format.pyx",

From f1a3f42436601259061b6755459eb0b8b8eee269 Mon Sep 17 00:00:00 2001
From: asadjan4611 <asadjan4611@gmail.com>
Date: Sat, 21 Feb 2026 20:56:12 +0500
Subject: [PATCH 17/23] fix(python): resolve bfloat16 cython redeclaration and
 static method issues

---
 python/pyfory/bfloat16.pxd |  5 +----
 python/pyfory/bfloat16.pyx | 16 +++++++++-------
 python/pyfory/buffer.pyx   |  4 ++--
 3 files changed, 12 insertions(+), 13 deletions(-)

diff --git a/python/pyfory/bfloat16.pxd b/python/pyfory/bfloat16.pxd
index b1e2d0ab51..54f7318970 100644
--- a/python/pyfory/bfloat16.pxd
+++ b/python/pyfory/bfloat16.pxd
@@ -20,8 +20,5 @@ from libc.stdint cimport uint16_t
 cdef class bfloat16:
     cdef uint16_t _bits
 
-    @staticmethod
-    cpdef bfloat16 from_bits(uint16_t bits)
-
-    cpdef uint16_t to_bits(self)
 
+cdef bfloat16 bfloat16_from_bits(uint16_t bits)
\ No newline at end of file
diff --git a/python/pyfory/bfloat16.pyx b/python/pyfory/bfloat16.pyx
index d019ba8b58..bf7bc6a3ff 100644
--- a/python/pyfory/bfloat16.pyx
+++ b/python/pyfory/bfloat16.pyx
@@ -44,9 +44,13 @@ cdef inline float bfloat16_bits_to_float32(uint16_t bits) nogil:
     return result
 
 
+cdef bfloat16 bfloat16_from_bits(uint16_t bits):
+    cdef bfloat16 value = bfloat16.__new__(bfloat16)
+    value._bits = bits
+    return value
+
+
 cdef class bfloat16:
-    cdef uint16_t _bits
-    
     def __init__(self, value):
         if isinstance(value, bfloat16):
             self._bits = (<bfloat16>value)._bits
@@ -54,12 +58,10 @@ cdef class bfloat16:
             self._bits = float32_to_bfloat16_bits(<float>float(value))
     
     @staticmethod
-    cpdef bfloat16 from_bits(uint16_t bits):
-        cdef bfloat16 bf16 = bfloat16.__new__(bfloat16)
-        bf16._bits = bits
-        return bf16
+    def from_bits(uint16_t bits):
+        return bfloat16_from_bits(bits)
     
-    cpdef uint16_t to_bits(self):
+    def to_bits(self):
         return self._bits
     
     def to_float32(self):
diff --git a/python/pyfory/buffer.pyx b/python/pyfory/buffer.pyx
index 91a146eab3..460e9a04d5 100644
--- a/python/pyfory/buffer.pyx
+++ b/python/pyfory/buffer.pyx
@@ -30,7 +30,7 @@ from cython.operator cimport dereference as deref
 from libcpp.string cimport string as c_string
 from libc.stdint cimport *
 from libcpp cimport bool as c_bool
-from pyfory.bfloat16 cimport bfloat16
+from pyfory.bfloat16 cimport bfloat16, bfloat16_from_bits
 from pyfory.includes.libutil cimport(
     CBuffer, allocate_buffer, get_bit as c_get_bit, set_bit as c_set_bit, clear_bit as c_clear_bit,
     set_bit_to as c_set_bit_to, CError, CErrorCode, CResultVoidError, utf16_has_surrogate_pairs
@@ -251,7 +251,7 @@ cdef class Buffer:
     cpdef inline bfloat16 read_bfloat16(self):
         cdef uint16_t value = self.c_buffer.read_uint16(self._error)
         self._raise_if_error()
-        return bfloat16.from_bits(value)
+        return bfloat16_from_bits(value)
 
     cpdef put_buffer(self, uint32_t offset, v, int32_t src_index, int32_t length):
         if length == 0:  # access an emtpy buffer may raise out-of-bound exception.

From 9d74ae24a56427c17e27ecbbeb4eb8074eca3ead Mon Sep 17 00:00:00 2001
From: asadjan4611 <asadjan4611@gmail.com>
Date: Sat, 21 Feb 2026 21:09:29 +0500
Subject: [PATCH 18/23] fix(python): remove obsolete xlang arg for dataclass
 serializers

---
 python/pyfory/registry.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/python/pyfory/registry.py b/python/pyfory/registry.py
index 46dd57f683..c2dea631e4 100644
--- a/python/pyfory/registry.py
+++ b/python/pyfory/registry.py
@@ -701,7 +701,7 @@ def _set_type_info(self, typeinfo):
             # Set a stub serializer FIRST to break recursion for self-referencing types.
             # get_type_info() only calls _set_type_info when serializer is None,
             # so setting stub first prevents re-entry for circular type references.
-            typeinfo.serializer = DataClassStubSerializer(self.fory, typeinfo.cls, xlang=self.fory.xlang)
+            typeinfo.serializer = DataClassStubSerializer(self.fory, typeinfo.cls)
 
             if self.meta_share:
                 type_def = encode_typedef(self, typeinfo.cls)
@@ -709,9 +709,9 @@ def _set_type_info(self, typeinfo):
                     typeinfo.serializer = type_def.create_serializer(self)
                     typeinfo.type_def = type_def
                 else:
-                    typeinfo.serializer = DataClassSerializer(self.fory, typeinfo.cls, xlang=self.fory.xlang)
+                    typeinfo.serializer = DataClassSerializer(self.fory, typeinfo.cls)
             else:
-                typeinfo.serializer = DataClassSerializer(self.fory, typeinfo.cls, xlang=self.fory.xlang)
+                typeinfo.serializer = DataClassSerializer(self.fory, typeinfo.cls)
         else:
             typeinfo.serializer = self._create_serializer(typeinfo.cls)
 
@@ -748,7 +748,7 @@ def _create_serializer(self, cls):
                 # lazy create serializer to handle nested struct fields.
                 from pyfory.struct import DataClassStubSerializer
 
-                serializer = DataClassStubSerializer(self.fory, cls, xlang=self.fory.xlang)
+                serializer = DataClassStubSerializer(self.fory, cls)
             elif issubclass(cls, enum.Enum):
                 serializer = EnumSerializer(self.fory, cls)
             elif ("builtin_function_or_method" in str(cls) or "cython_function_or_method" in str(cls)) and "<locals>" not in str(cls):

From ec59341f6aaf21d5cec58cfa3894a33234afec3e Mon Sep 17 00:00:00 2001
From: asadjan4611 <asadjan4611@gmail.com>
Date: Sat, 21 Feb 2026 21:24:27 +0500
Subject: [PATCH 19/23] fix(python): accept legacy xlang kwarg in dataclass
 serializers

---
 python/pyfory/struct.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/python/pyfory/struct.py b/python/pyfory/struct.py
index b844b37b90..3fe5a027ad 100644
--- a/python/pyfory/struct.py
+++ b/python/pyfory/struct.py
@@ -286,6 +286,7 @@ def __init__(
         serializers: List[Serializer] = None,
         nullable_fields: Dict[str, bool] = None,
         dynamic_fields: Dict[str, bool] = None,
+        xlang=None,
     ):
         super().__init__(fory, clz)
 
@@ -1017,7 +1018,7 @@ def read(self, buffer):
 
 
 class DataClassStubSerializer(DataClassSerializer):
-    def __init__(self, fory, clz: type):
+    def __init__(self, fory, clz: type, xlang=None):
         Serializer.__init__(self, fory, clz)
 
     def write(self, buffer, value):

From 848a1cdcd45bc43d335298551912165314f08fd2 Mon Sep 17 00:00:00 2001
From: asadjan4611 <asadjan4611@gmail.com>
Date: Sat, 21 Feb 2026 21:35:18 +0500
Subject: [PATCH 20/23] fix(python): repair unsigned xlang dataclass refs and
 retry bazel fetch

---
 python/pyfory/struct.py | 22 ++++++++++++++++++----
 python/setup.py         | 17 ++++++++++++++++-
 2 files changed, 34 insertions(+), 5 deletions(-)

diff --git a/python/pyfory/struct.py b/python/pyfory/struct.py
index 3fe5a027ad..7eb533da05 100644
--- a/python/pyfory/struct.py
+++ b/python/pyfory/struct.py
@@ -768,7 +768,11 @@ def _gen_generated_write_method(self):
                     # dynamic=True: don't pass serializer, write actual type info
                     # dynamic=False: pass serializer, use declared type
                     serializer_arg = "None" if is_dynamic else serializer_var
-                    stmts.append(f"{fory}.write_ref({buffer}, {field_value}, serializer={serializer_arg})")
+                    if self.fory.xlang:
+                        stmts.append(f"{fory}.xwrite_ref({buffer}, {field_value}, serializer={serializer_arg})")
+                    else:
+                        # Python-native write_ref doesn't take serializer kwarg.
+                        stmts.append(f"{fory}.write_ref({buffer}, {field_value})")
             else:
                 stmt = self._get_write_stmt_for_codegen(serializer, buffer, field_value)
                 if stmt is None:
@@ -874,7 +878,11 @@ def _gen_generated_read_method(self):
                     # dynamic=True: don't pass serializer, read type info from buffer
                     # dynamic=False: pass serializer, use declared type
                     serializer_arg = "None" if is_dynamic else serializer_var
-                    stmts.append(f"{field_value} = {fory}.read_ref({buffer}, serializer={serializer_arg})")
+                    if self.fory.xlang:
+                        stmts.append(f"{field_value} = {fory}.xread_ref({buffer}, serializer={serializer_arg})")
+                    else:
+                        # Python-native read_ref doesn't take serializer kwarg.
+                        stmts.append(f"{field_value} = {fory}.read_ref({buffer})")
             else:
                 stmt = self._get_read_stmt_for_codegen(serializer, buffer, field_value)
                 if stmt is None:
@@ -951,7 +959,10 @@ def write(self, buffer: Buffer, value):
                 else:
                     # dynamic=True: don't pass serializer, write actual type info
                     # dynamic=False: pass serializer, use declared type
-                    self.fory.write_ref(buffer, field_value, serializer=None if is_dynamic else serializer)
+                    if self.fory.xlang:
+                        self.fory.xwrite_ref(buffer, field_value, serializer=None if is_dynamic else serializer)
+                    else:
+                        self.fory.write_ref(buffer, field_value)
             else:
                 if is_dynamic:
                     self.fory.write_no_ref(buffer, field_value)
@@ -993,7 +1004,10 @@ def read(self, buffer):
                     buffer.set_reader_index(buffer.get_reader_index() - 1)
                     # dynamic=True: don't pass serializer, read type info from buffer
                     # dynamic=False: pass serializer, use declared type
-                    field_value = self.fory.read_ref(buffer, serializer=None if is_dynamic else serializer)
+                    if self.fory.xlang:
+                        field_value = self.fory.xread_ref(buffer, serializer=None if is_dynamic else serializer)
+                    else:
+                        field_value = self.fory.read_ref(buffer)
             else:
                 if is_dynamic:
                     field_value = self.fory.read_no_ref(buffer)
diff --git a/python/setup.py b/python/setup.py
index 70b7dfce8b..29860b8ba9 100644
--- a/python/setup.py
+++ b/python/setup.py
@@ -18,6 +18,7 @@
 import os
 import platform
 import subprocess
+import time
 from os.path import abspath, join as pjoin
 
 from setuptools import setup
@@ -82,7 +83,21 @@ def __init__(self, attrs=None):
             bazel_args += ["//:cp_fory_so"]
             # Ensure Windows path compatibility
             cwd_path = os.path.normpath(project_dir)
-            subprocess.check_call(bazel_args, cwd=cwd_path)
+            max_attempts = 3
+            for attempt in range(1, max_attempts + 1):
+                try:
+                    subprocess.check_call(bazel_args, cwd=cwd_path)
+                    break
+                except subprocess.CalledProcessError:
+                    if attempt == max_attempts:
+                        raise
+                    # Retry transient dependency fetch failures (e.g. 502 from external archives).
+                    backoff_seconds = 5 * attempt
+                    print(
+                        f"Bazel build failed (attempt {attempt}/{max_attempts}), "
+                        f"retrying in {backoff_seconds}s..."
+                    )
+                    time.sleep(backoff_seconds)
 
     def has_ext_modules(self):
         return True

From 66dbf19a65f5703b073dc95420d254b27a4785cb Mon Sep 17 00:00:00 2001
From: asadjan4611 <asadjan4611@gmail.com>
Date: Sat, 21 Feb 2026 21:38:10 +0500
Subject: [PATCH 21/23] style(python): format setup retry log line

---
 python/setup.py | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/python/setup.py b/python/setup.py
index 29860b8ba9..991fffe760 100644
--- a/python/setup.py
+++ b/python/setup.py
@@ -93,10 +93,7 @@ def __init__(self, attrs=None):
                         raise
                     # Retry transient dependency fetch failures (e.g. 502 from external archives).
                     backoff_seconds = 5 * attempt
-                    print(
-                        f"Bazel build failed (attempt {attempt}/{max_attempts}), "
-                        f"retrying in {backoff_seconds}s..."
-                    )
+                    print(f"Bazel build failed (attempt {attempt}/{max_attempts}), retrying in {backoff_seconds}s...")
                     time.sleep(backoff_seconds)
 
     def has_ext_modules(self):

From 3ac6ba84b80223585f61df1a4a5c06756922b922 Mon Sep 17 00:00:00 2001
From: asadjan4611 <asadjan4611@gmail.com>
Date: Sat, 21 Feb 2026 21:49:53 +0500
Subject: [PATCH 22/23] fix(python): remove keyword arguments from cpdef
 function calls in collection.pxi

Cython cpdef functions do not support keyword arguments when called
from C code. Changed all read_no_ref(buffer, serializer=...) calls
to use positional arguments read_no_ref(buffer, serializer) instead.
---
 python/pyfory/collection.pxi | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/python/pyfory/collection.pxi b/python/pyfory/collection.pxi
index 394a07dd3f..58ff9cb0eb 100644
--- a/python/pyfory/collection.pxi
+++ b/python/pyfory/collection.pxi
@@ -239,7 +239,7 @@ cdef class CollectionSerializer(Serializer):
     cpdef _read_same_type_no_ref(self, Buffer buffer, int64_t len_, object collection_, TypeInfo typeinfo):
         self.fory.inc_depth()
         for i in range(len_):
-            obj = self.fory.read_no_ref(buffer, serializer=typeinfo.serializer)
+            obj = self.fory.read_no_ref(buffer, typeinfo.serializer)
             self._add_element(collection_, i, obj)
         self.fory.dec_depth()
 
@@ -262,7 +262,7 @@ cdef class CollectionSerializer(Serializer):
                 self._add_element(
                     collection_,
                     i,
-                    self.fory.read_no_ref(buffer, serializer=typeinfo.serializer),
+                    self.fory.read_no_ref(buffer, typeinfo.serializer),
                 )
         self.fory.dec_depth()
 
@@ -354,7 +354,7 @@ cdef class ListSerializer(CollectionSerializer):
                     if is_py:
                         elem = typeinfo.serializer.read(buffer)
                     else:
-                        elem = self.fory.read_no_ref(buffer, serializer=typeinfo.serializer)
+                        elem = self.fory.read_no_ref(buffer, typeinfo.serializer)
                     Py_INCREF(elem)
                     PyList_SET_ITEM(list_, i, elem)
             else:
@@ -368,7 +368,7 @@ cdef class ListSerializer(CollectionSerializer):
                         if is_py:
                             elem = typeinfo.serializer.read(buffer)
                         else:
-                            elem = self.fory.read_no_ref(buffer, serializer=typeinfo.serializer)
+                            elem = self.fory.read_no_ref(buffer, typeinfo.serializer)
                     Py_INCREF(elem)
                     PyList_SET_ITEM(list_, i, elem)
             self.fory.dec_depth()
@@ -469,7 +469,7 @@ cdef class TupleSerializer(CollectionSerializer):
                     if is_py:
                         elem = typeinfo.serializer.read(buffer)
                     else:
-                        elem = self.fory.read_no_ref(buffer, serializer=typeinfo.serializer)
+                        elem = self.fory.read_no_ref(buffer, typeinfo.serializer)
                     Py_INCREF(elem)
                     PyTuple_SET_ITEM(tuple_, i, elem)
             else:
@@ -483,7 +483,7 @@ cdef class TupleSerializer(CollectionSerializer):
                         if is_py:
                             elem = typeinfo.serializer.read(buffer)
                         else:
-                            elem = self.fory.read_no_ref(buffer, serializer=typeinfo.serializer)
+                            elem = self.fory.read_no_ref(buffer, typeinfo.serializer)
                     Py_INCREF(elem)
                     PyTuple_SET_ITEM(tuple_, i, elem)
             self.fory.dec_depth()
@@ -592,7 +592,7 @@ cdef class SetSerializer(CollectionSerializer):
                         if is_py:
                             instance.add(typeinfo.serializer.read(buffer))
                         else:
-                            instance.add(self.fory.read_no_ref(buffer, serializer=typeinfo.serializer))
+                            instance.add(self.fory.read_no_ref(buffer, typeinfo.serializer))
             else:
                 # When ref tracking is disabled but has nulls, read null flag first
                 for i in range(len_):
@@ -614,7 +614,7 @@ cdef class SetSerializer(CollectionSerializer):
                             if is_py:
                                 instance.add(typeinfo.serializer.read(buffer))
                             else:
-                                instance.add(self.fory.read_no_ref(buffer, serializer=typeinfo.serializer))
+                                instance.add(self.fory.read_no_ref(buffer, typeinfo.serializer))
             self.fory.dec_depth()
         return instance
 
@@ -898,7 +898,7 @@ cdef class MapSerializer(Serializer):
                                 if is_py:
                                     key = key_serializer.read(buffer)
                                 else:
-                                    key = fory.read_no_ref(buffer, serializer=key_serializer)
+                                    key = fory.read_no_ref(buffer, key_serializer)
                         else:
                             if is_py:
                                 key = fory.read_ref(buffer)
@@ -923,7 +923,7 @@ cdef class MapSerializer(Serializer):
                                 if is_py:
                                     value = (<object> value_serializer).read(buffer)
                                 else:
-                                    value = fory.read_no_ref(buffer, serializer=value_serializer)
+                                    value = fory.read_no_ref(buffer, value_serializer)
                         else:
                             if is_py:
                                 value = fory.read_ref(buffer)
@@ -975,7 +975,7 @@ cdef class MapSerializer(Serializer):
                         if is_py:
                             key = (<object> key_serializer).read(buffer)
                         else:
-                            key = fory.read_no_ref(buffer, serializer=key_serializer)
+                            key = fory.read_no_ref(buffer, key_serializer)
                 if track_value_ref:
                     ref_id = ref_resolver.try_preserve_ref_id(buffer)
                     if ref_id < NOT_NULL_VALUE_FLAG:
@@ -1003,7 +1003,7 @@ cdef class MapSerializer(Serializer):
                         if is_py:
                             value = (<object> value_serializer).read(buffer)
                         else:
-                            value = fory.read_no_ref(buffer, serializer=value_serializer)
+                            value = fory.read_no_ref(buffer, value_serializer)
                 map_[key] = value
                 size -= 1
             if size != 0:

From 308bde932d87921cffa5d78cbb0d6dac4839a132 Mon Sep 17 00:00:00 2001
From: asadjan4611 <asadjan4611@gmail.com>
Date: Sat, 21 Feb 2026 22:00:30 +0500
Subject: [PATCH 23/23] fix(python): remove keyword arguments from cpdef
 function calls in struct.py and collection.py

Cython cpdef functions do not support keyword arguments when called
from C code. Changed all xwrite_ref, xread_ref, write_no_ref, and
read_no_ref calls to use positional arguments instead of keyword
arguments (serializer=...).
---
 python/pyfory/collection.py | 10 +++++-----
 python/pyfory/struct.py     | 16 ++++++++--------
 2 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/python/pyfory/collection.py b/python/pyfory/collection.py
index c7d6b9c376..df5cac3e07 100644
--- a/python/pyfory/collection.py
+++ b/python/pyfory/collection.py
@@ -197,7 +197,7 @@ def _read_same_type_no_ref(self, buffer, len_, collection_, typeinfo):
         for _ in range(len_):
             self._add_element(
                 collection_,
-                self.fory.read_no_ref(buffer, serializer=typeinfo.serializer),
+                self.fory.read_no_ref(buffer, typeinfo.serializer),
             )
         self.fory.dec_depth()
 
@@ -209,7 +209,7 @@ def _read_same_type_has_null(self, buffer, len_, collection_, typeinfo):
             else:
                 self._add_element(
                     collection_,
-                    self.fory.read_no_ref(buffer, serializer=typeinfo.serializer),
+                    self.fory.read_no_ref(buffer, typeinfo.serializer),
                 )
         self.fory.dec_depth()
 
@@ -241,7 +241,7 @@ def _read_different_types(self, buffer, len_, collection_, collect_flag):
                 if typeinfo is None:
                     elem = None
                 else:
-                    elem = self.fory.read_no_ref(buffer, serializer=typeinfo.serializer)
+                    elem = self.fory.read_no_ref(buffer, typeinfo.serializer)
                 self._add_element(collection_, elem)
         else:
             # When ref tracking is disabled but has nulls, read null flag first
@@ -254,7 +254,7 @@ def _read_different_types(self, buffer, len_, collection_, collect_flag):
                     if typeinfo is None:
                         elem = None
                     else:
-                        elem = self.fory.read_no_ref(buffer, serializer=typeinfo.serializer)
+                        elem = self.fory.read_no_ref(buffer, typeinfo.serializer)
                 self._add_element(collection_, elem)
         self.fory.dec_depth()
 
@@ -575,7 +575,7 @@ def _read_obj(self, serializer, buffer):
         return serializer.read(buffer)
 
     def _read_obj_no_ref(self, serializer, buffer):
-        return self.fory.read_no_ref(buffer, serializer=serializer)
+        return self.fory.read_no_ref(buffer, serializer)
 
 
 SubMapSerializer = MapSerializer
diff --git a/python/pyfory/struct.py b/python/pyfory/struct.py
index 7eb533da05..324bd08e6d 100644
--- a/python/pyfory/struct.py
+++ b/python/pyfory/struct.py
@@ -769,7 +769,7 @@ def _gen_generated_write_method(self):
                     # dynamic=False: pass serializer, use declared type
                     serializer_arg = "None" if is_dynamic else serializer_var
                     if self.fory.xlang:
-                        stmts.append(f"{fory}.xwrite_ref({buffer}, {field_value}, serializer={serializer_arg})")
+                        stmts.append(f"{fory}.xwrite_ref({buffer}, {field_value}, {serializer_arg})")
                     else:
                         # Python-native write_ref doesn't take serializer kwarg.
                         stmts.append(f"{fory}.write_ref({buffer}, {field_value})")
@@ -781,7 +781,7 @@ def _gen_generated_write_method(self):
                     if is_dynamic:
                         stmt = f"{fory}.write_no_ref({buffer}, {field_value})"
                     else:
-                        stmt = f"{fory}.write_no_ref({buffer}, {field_value}, serializer={serializer_var})"
+                        stmt = f"{fory}.write_no_ref({buffer}, {field_value}, {serializer_var})"
                 # In compatible mode, handle None for non-nullable fields (schema evolution)
                 # Write zero/default value when field is None due to missing from remote schema
                 if self.fory.compatible:
@@ -879,7 +879,7 @@ def _gen_generated_read_method(self):
                     # dynamic=False: pass serializer, use declared type
                     serializer_arg = "None" if is_dynamic else serializer_var
                     if self.fory.xlang:
-                        stmts.append(f"{field_value} = {fory}.xread_ref({buffer}, serializer={serializer_arg})")
+                        stmts.append(f"{field_value} = {fory}.xread_ref({buffer}, {serializer_arg})")
                     else:
                         # Python-native read_ref doesn't take serializer kwarg.
                         stmts.append(f"{field_value} = {fory}.read_ref({buffer})")
@@ -891,7 +891,7 @@ def _gen_generated_read_method(self):
                     if is_dynamic:
                         stmt = f"{field_value} = {fory}.read_no_ref({buffer})"
                     else:
-                        stmt = f"{field_value} = {fory}.read_no_ref({buffer}, serializer={serializer_var})"
+                        stmt = f"{field_value} = {fory}.read_no_ref({buffer}, {serializer_var})"
                 stmts.append(stmt)
 
             if field_name not in current_class_field_names:
@@ -960,14 +960,14 @@ def write(self, buffer: Buffer, value):
                     # dynamic=True: don't pass serializer, write actual type info
                     # dynamic=False: pass serializer, use declared type
                     if self.fory.xlang:
-                        self.fory.xwrite_ref(buffer, field_value, serializer=None if is_dynamic else serializer)
+                        self.fory.xwrite_ref(buffer, field_value, None if is_dynamic else serializer)
                     else:
                         self.fory.write_ref(buffer, field_value)
             else:
                 if is_dynamic:
                     self.fory.write_no_ref(buffer, field_value)
                 else:
-                    self.fory.write_no_ref(buffer, field_value, serializer=serializer)
+                    self.fory.write_no_ref(buffer, field_value, serializer)
 
     def read(self, buffer):
         """Read dataclass instance from buffer.
@@ -1005,14 +1005,14 @@ def read(self, buffer):
                     # dynamic=True: don't pass serializer, read type info from buffer
                     # dynamic=False: pass serializer, use declared type
                     if self.fory.xlang:
-                        field_value = self.fory.xread_ref(buffer, serializer=None if is_dynamic else serializer)
+                        field_value = self.fory.xread_ref(buffer, None if is_dynamic else serializer)
                     else:
                         field_value = self.fory.read_ref(buffer)
             else:
                 if is_dynamic:
                     field_value = self.fory.read_no_ref(buffer)
                 else:
-                    field_value = self.fory.read_no_ref(buffer, serializer=serializer)
+                    field_value = self.fory.read_no_ref(buffer, serializer)
             if field_name in current_class_field_names:
                 setattr(obj, field_name, field_value)
                 read_field_names.add(field_name)