diff --git a/mypyc/codegen/emit.py b/mypyc/codegen/emit.py index 236da808c038..bcaf0963af6f 100644 --- a/mypyc/codegen/emit.py +++ b/mypyc/codegen/emit.py @@ -33,6 +33,7 @@ RUnion, int_rprimitive, is_bool_or_bit_rprimitive, + is_bytearray_rprimitive, is_bytes_rprimitive, is_dict_rprimitive, is_fixed_width_rtype, @@ -664,6 +665,16 @@ def emit_cast( self.emit_lines(f" {dest} = {src};", "else {") self.emit_cast_error_handler(error, src, dest, typ, raise_exception) self.emit_line("}") + elif is_bytearray_rprimitive(typ): + if declare_dest: + self.emit_line(f"PyObject *{dest};") + check = "(PyByteArray_Check({}))" + if likely: + check = f"(likely{check})" + self.emit_arg_check(src, dest, typ, check.format(src, src), optional) + self.emit_lines(f" {dest} = {src};", "else {") + self.emit_cast_error_handler(error, src, dest, typ, raise_exception) + self.emit_line("}") elif is_tuple_rprimitive(typ): if declare_dest: self.emit_line(f"{self.ctype(typ)} {dest};") diff --git a/mypyc/doc/bytearray_operations.rst b/mypyc/doc/bytearray_operations.rst new file mode 100644 index 000000000000..3ee242b5648b --- /dev/null +++ b/mypyc/doc/bytearray_operations.rst @@ -0,0 +1,13 @@ +.. _bytearray-ops: + +Native bytearray operations +============================ + +These ``bytearray`` operations have fast, optimized implementations. Other +bytearray operations use generic implementations that are often slower. + +Construction +------------ + +* ``bytearray()`` +* ``bytearray(x)`` diff --git a/mypyc/doc/index.rst b/mypyc/doc/index.rst index 094e0f8cd9b8..004ead0f4791 100644 --- a/mypyc/doc/index.rst +++ b/mypyc/doc/index.rst @@ -37,6 +37,7 @@ generate fast code. float_operations str_operations bytes_operations + bytearray_operations list_operations dict_operations set_operations diff --git a/mypyc/doc/using_type_annotations.rst b/mypyc/doc/using_type_annotations.rst index dc0b04a974fd..4ecc77ab6669 100644 --- a/mypyc/doc/using_type_annotations.rst +++ b/mypyc/doc/using_type_annotations.rst @@ -37,11 +37,14 @@ implementations: * ``float`` (:ref:`native operations `) * ``bool`` (:ref:`native operations `) * ``str`` (:ref:`native operations `) +* ``bytes`` (:ref:`native operations `) +* ``bytearray`` (:ref:`native operations `) * ``list[T]`` (:ref:`native operations `) * ``dict[K, V]`` (:ref:`native operations `) * ``set[T]`` (:ref:`native operations `) * ``tuple[T, ...]`` (variable-length tuple; :ref:`native operations `) * ``None`` +* ``range`` The link after each type lists all supported native, optimized operations for the type. You can use all operations supported by diff --git a/mypyc/ir/deps.py b/mypyc/ir/deps.py index 9aee5d1b33e4..249b456e2c85 100644 --- a/mypyc/ir/deps.py +++ b/mypyc/ir/deps.py @@ -51,3 +51,4 @@ def get_header(self) -> str: BYTES_EXTRA_OPS: Final = SourceDep("bytes_extra_ops.c") BYTES_WRITER_EXTRA_OPS: Final = SourceDep("byteswriter_extra_ops.c") +BYTEARRAY_EXTRA_OPS: Final = SourceDep("bytearray_extra_ops.c") diff --git a/mypyc/ir/rtypes.py b/mypyc/ir/rtypes.py index 1335f818bebc..20845a10177f 100644 --- a/mypyc/ir/rtypes.py +++ b/mypyc/ir/rtypes.py @@ -504,6 +504,11 @@ def __hash__(self) -> int: # Python bytes object. bytes_rprimitive: Final = RPrimitive("builtins.bytes", is_unboxed=False, is_refcounted=True) +# Python bytearray object. +bytearray_rprimitive: Final = RPrimitive( + "builtins.bytearray", is_unboxed=False, is_refcounted=True +) + # Tuple of an arbitrary length (corresponds to Tuple[t, ...], with # explicit '...'). tuple_rprimitive: Final = RPrimitive("builtins.tuple", is_unboxed=False, is_refcounted=True) @@ -632,6 +637,10 @@ def is_bytes_rprimitive(rtype: RType) -> TypeGuard[RPrimitive]: return isinstance(rtype, RPrimitive) and rtype.name == "builtins.bytes" +def is_bytearray_rprimitive(rtype: RType) -> TypeGuard[RPrimitive]: + return isinstance(rtype, RPrimitive) and rtype.name == "builtins.bytearray" + + def is_tuple_rprimitive(rtype: RType) -> TypeGuard[RPrimitive]: return isinstance(rtype, RPrimitive) and rtype.name == "builtins.tuple" @@ -646,6 +655,7 @@ def is_sequence_rprimitive(rtype: RType) -> TypeGuard[RPrimitive]: or is_tuple_rprimitive(rtype) or is_str_rprimitive(rtype) or is_bytes_rprimitive(rtype) + or is_bytearray_rprimitive(rtype) ) diff --git a/mypyc/irbuild/mapper.py b/mypyc/irbuild/mapper.py index c986499b6f65..550dc6e42c9e 100644 --- a/mypyc/irbuild/mapper.py +++ b/mypyc/irbuild/mapper.py @@ -31,6 +31,7 @@ RType, RUnion, bool_rprimitive, + bytearray_rprimitive, bytes_rprimitive, dict_rprimitive, float_rprimitive, @@ -88,6 +89,8 @@ def type_to_rtype(self, typ: Type | None) -> RType: return str_rprimitive elif typ.type.fullname == "builtins.bytes": return bytes_rprimitive + elif typ.type.fullname == "builtins.bytearray": + return bytearray_rprimitive elif typ.type.fullname == "builtins.list": return list_rprimitive # Dict subclasses are at least somewhat common and we diff --git a/mypyc/irbuild/specialize.py b/mypyc/irbuild/specialize.py index c0ed5f9f69da..785a0a7ac61f 100644 --- a/mypyc/irbuild/specialize.py +++ b/mypyc/irbuild/specialize.py @@ -94,7 +94,8 @@ join_formatted_strings, tokenizer_format_call, ) -from mypyc.primitives.bytes_ops import isinstance_bytearray, isinstance_bytes +from mypyc.primitives.bytearray_ops import isinstance_bytearray +from mypyc.primitives.bytes_ops import isinstance_bytes from mypyc.primitives.dict_ops import ( dict_items_op, dict_keys_op, diff --git a/mypyc/lib-rt/bytearray_extra_ops.c b/mypyc/lib-rt/bytearray_extra_ops.c new file mode 100644 index 000000000000..a956f8c76f1f --- /dev/null +++ b/mypyc/lib-rt/bytearray_extra_ops.c @@ -0,0 +1,5 @@ +#include "bytearray_extra_ops.h" + +PyObject *CPyByteArray_New(void) { + return PyByteArray_FromStringAndSize(NULL, 0); +} diff --git a/mypyc/lib-rt/bytearray_extra_ops.h b/mypyc/lib-rt/bytearray_extra_ops.h new file mode 100644 index 000000000000..41f17be3ab96 --- /dev/null +++ b/mypyc/lib-rt/bytearray_extra_ops.h @@ -0,0 +1,10 @@ +#ifndef MYPYC_BYTEARRAY_EXTRA_OPS_H +#define MYPYC_BYTEARRAY_EXTRA_OPS_H + +#include +#include "CPy.h" + +// Construct empty bytearray +PyObject *CPyByteArray_New(void); + +#endif diff --git a/mypyc/primitives/bytearray_ops.py b/mypyc/primitives/bytearray_ops.py new file mode 100644 index 000000000000..2128a6d48d5c --- /dev/null +++ b/mypyc/primitives/bytearray_ops.py @@ -0,0 +1,44 @@ +"""Primitive bytearray ops. + +NOTE: Most of these should be added to bytearray_extra_ops.c, which requires the + BYTEARRAY_EXTRA_OPS primitive dependency, since these are used relatively rarely and we + don't want to compile them unless needed. +""" + +from __future__ import annotations + +from mypyc.ir.deps import BYTEARRAY_EXTRA_OPS +from mypyc.ir.ops import ERR_MAGIC, ERR_NEVER +from mypyc.ir.rtypes import bit_rprimitive, bytearray_rprimitive, object_rprimitive +from mypyc.primitives.registry import custom_primitive_op, function_op, load_address_op + +# Get the 'bytearray' type object. +load_address_op(name="builtins.bytearray", type=object_rprimitive, src="PyByteArray_Type") + +# bytearray(obj) +function_op( + name="builtins.bytearray", + arg_types=[object_rprimitive], + return_type=bytearray_rprimitive, + c_function_name="PyByteArray_FromObject", + error_kind=ERR_MAGIC, +) + +# bytearray() -- construct empty bytearray +function_op( + name="builtins.bytearray", + arg_types=[], + return_type=bytearray_rprimitive, + c_function_name="CPyByteArray_New", + error_kind=ERR_MAGIC, + dependencies=[BYTEARRAY_EXTRA_OPS], +) + +# isinstance(obj, bytearray) +isinstance_bytearray = custom_primitive_op( + name="builtins.isinstance", + arg_types=[object_rprimitive], + return_type=bit_rprimitive, + c_function_name="PyByteArray_Check", + error_kind=ERR_NEVER, +) diff --git a/mypyc/primitives/bytes_ops.py b/mypyc/primitives/bytes_ops.py index 2268acd80aa5..53e7832c3998 100644 --- a/mypyc/primitives/bytes_ops.py +++ b/mypyc/primitives/bytes_ops.py @@ -47,24 +47,6 @@ error_kind=ERR_NEVER, ) -# bytearray(obj) -function_op( - name="builtins.bytearray", - arg_types=[object_rprimitive], - return_type=object_rprimitive, - c_function_name="PyByteArray_FromObject", - error_kind=ERR_MAGIC, -) - -# translate isinstance(obj, bytearray) -isinstance_bytearray = function_op( - name="builtins.isinstance", - arg_types=[object_rprimitive], - return_type=bit_rprimitive, - c_function_name="PyByteArray_Check", - error_kind=ERR_NEVER, -) - # bytes ==/!= (return -1/0/1) bytes_compare = custom_op( arg_types=[bytes_rprimitive, bytes_rprimitive], diff --git a/mypyc/primitives/librt_strings_ops.py b/mypyc/primitives/librt_strings_ops.py index ac1aa7da1fbe..ffe37b5c099a 100644 --- a/mypyc/primitives/librt_strings_ops.py +++ b/mypyc/primitives/librt_strings_ops.py @@ -2,6 +2,7 @@ from mypyc.ir.ops import ERR_MAGIC, ERR_NEVER from mypyc.ir.rtypes import ( bool_rprimitive, + bytearray_rprimitive, bytes_rprimitive, bytes_writer_rprimitive, int64_rprimitive, @@ -42,6 +43,16 @@ dependencies=[LIBRT_STRINGS, BYTES_WRITER_EXTRA_OPS], ) +method_op( + name="write", + arg_types=[bytes_writer_rprimitive, bytearray_rprimitive], + return_type=none_rprimitive, + c_function_name="CPyBytesWriter_Write", + error_kind=ERR_MAGIC, + experimental=True, + dependencies=[LIBRT_STRINGS, BYTES_WRITER_EXTRA_OPS], +) + method_op( name="append", arg_types=[bytes_writer_rprimitive, uint8_rprimitive], diff --git a/mypyc/primitives/registry.py b/mypyc/primitives/registry.py index a59599f693c4..0e3032719ca1 100644 --- a/mypyc/primitives/registry.py +++ b/mypyc/primitives/registry.py @@ -388,6 +388,7 @@ def load_address_op(name: str, type: RType, src: str) -> LoadAddressDescription: # Import various modules that set up global state. +import mypyc.primitives.bytearray_ops import mypyc.primitives.bytes_ops import mypyc.primitives.dict_ops import mypyc.primitives.float_ops diff --git a/mypyc/test-data/irbuild-bytes.test b/mypyc/test-data/irbuild-bytes.test index 613ba4e46eee..6ec09bedad48 100644 --- a/mypyc/test-data/irbuild-bytes.test +++ b/mypyc/test-data/irbuild-bytes.test @@ -48,20 +48,17 @@ def f(s: str, num: int) -> None: def f(s, num): s :: str num :: int - r0 :: object - r1 :: str - r2, r3, a, r4, b, r5, r6, c :: object + r0, a, r1, b :: bytearray + r2 :: object + r3, c :: bytearray L0: - r0 = builtins :: module - r1 = 'bytearray' - r2 = CPyObject_GetAttr(r0, r1) - r3 = PyObject_Vectorcall(r2, 0, 0, 0) - a = r3 - r4 = PyByteArray_FromObject(s) - b = r4 - r5 = box(int, num) - r6 = PyByteArray_FromObject(r5) - c = r6 + r0 = CPyByteArray_New() + a = r0 + r1 = PyByteArray_FromObject(s) + b = r1 + r2 = box(int, num) + r3 = PyByteArray_FromObject(r2) + c = r3 return 1 [case testBytesEquality] diff --git a/mypyc/test-data/run-bytes.test b/mypyc/test-data/run-bytes.test index f5eb6abfe234..89ccfd66d288 100644 --- a/mypyc/test-data/run-bytes.test +++ b/mypyc/test-data/run-bytes.test @@ -260,6 +260,10 @@ from typing import Any from testutil import assertRaises def test_basics() -> None: + brr0 = bytearray() + assert brr0 == bytearray(b'') + assert brr0 == b'' + assert len(brr0) == 0 brr1 = bytearray(3) assert brr1 == bytearray(b'\x00\x00\x00') assert brr1 == b'\x00\x00\x00' @@ -276,16 +280,30 @@ def test_basics() -> None: assert len(brr1) == 3 assert len(brr2) == 4 -def f(b: bytes) -> str: +def test_bytearray_type_object() -> None: + b = bytearray() + assert type(b) is bytearray + +def fb(b: bytes) -> str: return "xy" def test_bytearray_passed_into_bytes() -> None: brr1: Any = bytearray() with assertRaises(TypeError, "bytes object expected; got bytearray"): - f(brr1) + fb(brr1) with assertRaises(TypeError, "bytes object expected; got bytearray"): b: bytes = brr1 +def fba(brr: bytearray) -> str: + return "xy" + +def test_bytes_passed_into_bytearray() -> None: + b: Any = b"foo" + with assertRaises(TypeError, "bytearray object expected; got bytes"): + fba(b) + with assertRaises(TypeError, "bytearray object expected; got bytes"): + brr: bytearray = b + [case testBytearraySlicing] def test_bytearray_slicing() -> None: b = bytearray(b'abcdefg')