diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 435d09259ce99..5242739f8f846 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -139,6 +139,7 @@ jobs: timeout-minutes: 60 env: TOX_SKIP_MISSING_INTERPRETERS: False + VIRTUALENV_SYSTEM_SITE_PACKAGES: ${{ matrix.test_mypyc && 1 || 0 }} # Rich (pip) -- Disable color for windows + pytest FORCE_COLOR: ${{ !(startsWith(matrix.os, 'windows-') && startsWith(matrix.toxenv, 'py')) && 1 || 0 }} # Tox @@ -209,8 +210,10 @@ jobs: - name: Compiled with mypyc if: ${{ matrix.test_mypyc }} + # Use local version of librt during self-compilation in tests. run: | pip install -r test-requirements.txt + pip install -U mypyc/lib-rt CC=clang MYPYC_OPT_LEVEL=0 MYPY_USE_MYPYC=1 pip install -e . - name: Setup tox environment diff --git a/mypy/cache.py b/mypy/cache.py index 0adc7affb8cb4..c4b74230c4c28 100644 --- a/mypy/cache.py +++ b/mypy/cache.py @@ -239,6 +239,7 @@ def read(cls, data: ReadBuffer, data_file: str) -> CacheMeta | None: # Always use this type alias to refer to type tags. Tag = u8 +# Note: all tags should be kept in sync with lib-rt/internal/librt_internal.c. # Primitives. LITERAL_FALSE: Final[Tag] = 0 LITERAL_TRUE: Final[Tag] = 1 @@ -264,6 +265,7 @@ def read(cls, data: ReadBuffer, data_file: str) -> CacheMeta | None: # Four integers representing source file (line, column) range. LOCATION: Final[Tag] = 152 +RESERVED: Final[Tag] = 254 END_TAG: Final[Tag] = 255 diff --git a/mypy/nodes.py b/mypy/nodes.py index 37ea4d3b0d561..dda01d64493fc 100644 --- a/mypy/nodes.py +++ b/mypy/nodes.py @@ -4930,7 +4930,20 @@ def read(cls, data: ReadBuffer) -> SymbolTableNode: sym.plugin_generated = read_bool(data) cross_ref = read_str_opt(data) if cross_ref is None: - sym.node = read_symbol(data) + tag = read_tag(data) + if tag == TYPE_INFO: + sym.node = TypeInfo.read(data) + else: + # This logic is temporary, to make sure we don't introduce + # regressions until we have proper lazy deserialization. + # It has negligible performance impact. + try: + from librt.internal import extract_symbol + except ImportError: + sym.node = read_symbol(data, tag) + else: + node_bytes = extract_symbol(data) + sym.node = read_symbol(ReadBuffer(node_bytes), tag) else: sym.cross_ref = cross_ref assert read_tag(data) == END_TAG @@ -5333,8 +5346,7 @@ def local_definitions( TSTRING_EXPR: Final[Tag] = 229 -def read_symbol(data: ReadBuffer) -> SymbolNode: - tag = read_tag(data) +def read_symbol(data: ReadBuffer, tag: Tag) -> SymbolNode: # The branches here are ordered manually by type "popularity". if tag == VAR: return Var.read(data) @@ -5342,8 +5354,6 @@ def read_symbol(data: ReadBuffer) -> SymbolNode: return FuncDef.read(data) if tag == DECORATOR: return Decorator.read(data) - if tag == TYPE_INFO: - return TypeInfo.read(data) if tag == OVERLOADED_FUNC_DEF: return OverloadedFuncDef.read(data) if tag == TYPE_VAR_EXPR: diff --git a/mypy/typeshed/stubs/librt/librt/internal.pyi b/mypy/typeshed/stubs/librt/librt/internal.pyi index 72ed5a7ee76f4..8e597c4edbd70 100644 --- a/mypy/typeshed/stubs/librt/librt/internal.pyi +++ b/mypy/typeshed/stubs/librt/librt/internal.pyi @@ -19,3 +19,4 @@ def read_int(data: ReadBuffer, /) -> int: ... def write_tag(data: WriteBuffer, value: u8, /) -> None: ... def read_tag(data: ReadBuffer, /) -> u8: ... def cache_version() -> u8: ... +def extract_symbol(data: ReadBuffer, /) -> bytes: ... diff --git a/mypyc/lib-rt/internal/librt_internal.c b/mypyc/lib-rt/internal/librt_internal.c index ca8bc8bbea99a..3a301221e89f3 100644 --- a/mypyc/lib-rt/internal/librt_internal.c +++ b/mypyc/lib-rt/internal/librt_internal.c @@ -920,6 +920,273 @@ write_tag(PyObject *self, PyObject *const *args, size_t nargs) { return Py_None; } +// All tags must be kept in sync with cache.py, nodes.py, and types.py. +// Primitive types. +#define LITERAL_FALSE 0 +#define LITERAL_TRUE 1 +#define LITERAL_NONE 2 +#define LITERAL_INT 3 +#define LITERAL_STR 4 +#define LITERAL_BYTES 5 +#define LITERAL_FLOAT 6 +#define LITERAL_COMPLEX 7 + +// Supported builtin collections. +#define LIST_GEN 20 +#define LIST_INT 21 +#define LIST_STR 22 +#define LIST_BYTES 23 +#define TUPLE_GEN 24 +#define DICT_STR_GEN 30 + +// This is the smallest custom class tag. +#define MYPY_FILE 50 + +// Instance class has special formats. +#define INSTANCE 80 +#define INSTANCE_SIMPLE 81 +#define INSTANCE_GENERIC 82 +#define INSTANCE_STR 83 +#define INSTANCE_FUNCTION 84 +#define INSTANCE_INT 85 +#define INSTANCE_BOOL 86 +#define INSTANCE_OBJECT 87 + +#define RESERVED 254 +#define END_TAG 255 + +// Forward declaration. +static char _skip_object(PyObject *data, uint8_t tag); + +static inline char +_skip(PyObject *data, Py_ssize_t size) { + // We are careful about error conditions, so all + // _skip_xxx() functions can return an error value. + _CHECK_READ(data, size, CPY_NONE_ERROR) + ((ReadBufferObject *)data)->ptr += size; + return CPY_NONE; +} + +static inline char +_skip_short_int(PyObject *data, uint8_t first) { + if ((first & TWO_BYTES_INT_BIT) == 0) + return CPY_NONE; + if ((first & FOUR_BYTES_INT_BIT) == 0) + return _skip(data, 1); + return _skip(data, 3); +} + +static inline char +_skip_int(PyObject *data) { + _CHECK_READ(data, 1, CPY_NONE_ERROR) + + uint8_t first; + _READ(&first, data, uint8_t); + if (likely(first != LONG_INT_TRAILER)) { + return _skip_short_int(data, first); + } + + _CHECK_READ(data, 1, CPY_NONE_ERROR) + _READ(&first, data, uint8_t); + Py_ssize_t size_and_sign = _read_short_int(data, first); + if (size_and_sign == CPY_INT_TAG) + return CPY_NONE_ERROR; + if ((Py_ssize_t)size_and_sign < 0) { + PyErr_SetString(PyExc_ValueError, "invalid int data"); + return CPY_NONE_ERROR; + } + Py_ssize_t size = size_and_sign >> 2; + return _skip(data, size); +} + +// This is essentially a wrapper around _read_short_int() that makes +// sure the result is valid. +static inline Py_ssize_t +_read_size(PyObject *data) { + _CHECK_READ(data, 1, -1) + uint8_t first; + _READ(&first, data, uint8_t); + // We actually allow serializing lists/dicts with over 4 billion items, + // but we don't really need to, fail with ValueError just in case. + if (unlikely(first == LONG_INT_TRAILER)) { + PyErr_SetString(PyExc_ValueError, "unsupported size"); + return -1; + } + CPyTagged tagged_size = _read_short_int(data, first); + if (tagged_size == CPY_INT_TAG) + return -1; + if ((Py_ssize_t)tagged_size < 0) { + PyErr_SetString(PyExc_ValueError, "invalid size"); + return -1; + } + Py_ssize_t size = tagged_size >> 1; + return size; +} + +static inline char +_skip_str_bytes(PyObject *data) { + Py_ssize_t size = _read_size(data); + if (size < 0) + return CPY_NONE_ERROR; + return _skip(data, size); +} + +// List/dict logic should be kept in sync with mypy/cache.py +static inline char +_skip_list_gen(PyObject *data) { + Py_ssize_t size = _read_size(data); + if (size < 0) + return CPY_NONE_ERROR; + int i; + for (i = 0; i < size; i++) { + uint8_t tag = read_tag_internal(data); + if (unlikely(tag == CPY_LL_UINT_ERROR && PyErr_Occurred())) { + return CPY_NONE_ERROR; + } + if (unlikely(_skip_object(data, tag) == CPY_NONE_ERROR)) + return CPY_NONE_ERROR; + } + return CPY_NONE; +} + +static inline char +_skip_list_int(PyObject *data) { + Py_ssize_t size = _read_size(data); + if (size < 0) + return CPY_NONE_ERROR; + int i; + for (i = 0; i < size; i++) { + if (unlikely(_skip_int(data) == CPY_NONE_ERROR)) + return CPY_NONE_ERROR; + } + return CPY_NONE; +} + +static inline char +_skip_list_str_bytes(PyObject *data) { + Py_ssize_t size = _read_size(data); + if (size < 0) + return CPY_NONE_ERROR; + int i; + for (i = 0; i < size; i++) { + if (unlikely(_skip_str_bytes(data) == CPY_NONE_ERROR)) + return CPY_NONE_ERROR; + } + return CPY_NONE; +} + +static inline char +_skip_dict_str_gen(PyObject *data) { + Py_ssize_t size = _read_size(data); + if (size < 0) + return CPY_NONE_ERROR; + int i; + for (i = 0; i < size; i++) { + // Bare key followed by tagged value. + if (unlikely(_skip_str_bytes(data) == CPY_NONE_ERROR)) + return CPY_NONE_ERROR; + uint8_t tag = read_tag_internal(data); + if (unlikely(tag == CPY_LL_UINT_ERROR && PyErr_Occurred())) { + return CPY_NONE_ERROR; + } + if (unlikely(_skip_object(data, tag) == CPY_NONE_ERROR)) + return CPY_NONE_ERROR; + } + return CPY_NONE; +} + +// Similar to mypy/cache.py, the convention is that the caller reads +// the opening tag for custom classes. +static inline char +_skip_class(PyObject *data) { + while (1) { + uint8_t tag = read_tag_internal(data); + if (unlikely(tag == CPY_LL_UINT_ERROR && PyErr_Occurred())) { + return CPY_NONE_ERROR; + } + if (tag == END_TAG) { + return CPY_NONE; + } + if (unlikely(_skip_object(data, tag) == CPY_NONE_ERROR)) { + return CPY_NONE_ERROR; + } + } +} + +// Instance has special compact layout (as an important optimization). +static inline char +_skip_instance(PyObject *data) { + uint8_t second_tag = read_tag_internal(data); + if (unlikely(second_tag == CPY_LL_UINT_ERROR && PyErr_Occurred())) { + return CPY_NONE_ERROR; + } + if (second_tag >= INSTANCE_STR && second_tag <= INSTANCE_OBJECT) { + return CPY_NONE; + } + if (second_tag == INSTANCE_SIMPLE) { + return _skip_str_bytes(data); + } + if (second_tag == INSTANCE_GENERIC) { + return _skip_class(data); + } + PyErr_Format(PyExc_ValueError, "Unexpected instance tag: %d", second_tag); + return CPY_NONE_ERROR; +} + +// This is the main dispatch point. Branches are ordered manually +// based roughly on frequency in self-check. +static char +_skip_object(PyObject *data, uint8_t tag) { + if (tag == LITERAL_STR || tag == LITERAL_BYTES) + return _skip_str_bytes(data); + if (tag == LITERAL_NONE || tag == LITERAL_FALSE || tag == LITERAL_TRUE) + return CPY_NONE; + if (tag == LIST_GEN || tag == TUPLE_GEN) + return _skip_list_gen(data); + if (tag == LITERAL_INT) + return _skip_int(data); + if (tag == INSTANCE) + return _skip_instance(data); + if (tag > MYPY_FILE && tag < RESERVED) + return _skip_class(data); + if (tag == LIST_INT) + return _skip_list_int(data); + if (tag == LIST_STR || tag == LIST_BYTES) + return _skip_list_str_bytes(data); + if (tag == DICT_STR_GEN) + return _skip_dict_str_gen(data); + if (tag == LITERAL_FLOAT) + return _skip(data, 8); + if (tag == LITERAL_COMPLEX) + return _skip(data, 16); + PyErr_Format(PyExc_ValueError, "Unsupported tag: %d", tag); + return CPY_NONE_ERROR; +} + +static PyObject* +extract_symbol_internal(PyObject *data) { + char *ptr = ((ReadBufferObject *)data)->ptr; + if (unlikely(_skip_class(data) == CPY_NONE_ERROR)) + return NULL; + Py_ssize_t size = ((ReadBufferObject *)data)->ptr - ptr; + PyObject *res = PyBytes_FromStringAndSize(ptr, size); + if (unlikely(res == NULL)) + return NULL; + return res; +} + +static PyObject* +extract_symbol(PyObject *self, PyObject *const *args, size_t nargs) { + if (unlikely(nargs != 1)) { + PyErr_Format(PyExc_TypeError, + "extract_symbol() takes exactly 1 argument (%zu given)", nargs); + return NULL; + } + PyObject *data = args[0]; + _CHECK_READ_BUFFER(data, NULL) + return extract_symbol_internal(data); +} + static uint8_t cache_version_internal(void) { return 0; @@ -954,6 +1221,7 @@ static PyMethodDef librt_internal_module_methods[] = { {"write_tag", (PyCFunction)write_tag, METH_FASTCALL, PyDoc_STR("write a short int")}, {"read_tag", (PyCFunction)read_tag, METH_FASTCALL, PyDoc_STR("read a short int")}, {"cache_version", (PyCFunction)cache_version, METH_NOARGS, PyDoc_STR("cache format version")}, + {"extract_symbol", (PyCFunction)extract_symbol, METH_FASTCALL, PyDoc_STR("extract bytes for a mypy symbol")}, {NULL, NULL, 0, NULL} }; @@ -1005,6 +1273,7 @@ librt_internal_module_exec(PyObject *m) (void *)ReadBuffer_type_internal, (void *)WriteBuffer_type_internal, (void *)NativeInternal_API_Version, + (void *)extract_symbol_internal }; PyObject *c_api_object = PyCapsule_New((void *)NativeInternal_API, "librt.internal._C_API", NULL); if (PyModule_Add(m, "_C_API", c_api_object) < 0) { diff --git a/mypyc/lib-rt/internal/librt_internal.h b/mypyc/lib-rt/internal/librt_internal.h index 1b325b20d95bb..e6d85901cabd9 100644 --- a/mypyc/lib-rt/internal/librt_internal.h +++ b/mypyc/lib-rt/internal/librt_internal.h @@ -11,11 +11,11 @@ // API version -- more recent versions must maintain backward compatibility, i.e. // we can add new features but not remove or change existing features (unless // ABI version is changed, but see the comment above). - #define LIBRT_INTERNAL_API_VERSION 0 + #define LIBRT_INTERNAL_API_VERSION 1 // Number of functions in the capsule API. If you add a new function, also increase // LIBRT_INTERNAL_API_VERSION. -#define LIBRT_INTERNAL_API_LEN 20 +#define LIBRT_INTERNAL_API_LEN 21 #ifdef LIBRT_INTERNAL_MODULE @@ -41,6 +41,7 @@ static uint8_t cache_version_internal(void); static PyTypeObject *ReadBuffer_type_internal(void); static PyTypeObject *WriteBuffer_type_internal(void); static int NativeInternal_API_Version(void); +static PyObject *extract_symbol_internal(PyObject *data); #else @@ -66,6 +67,7 @@ static void *NativeInternal_API[LIBRT_INTERNAL_API_LEN]; #define ReadBuffer_type_internal (*(PyTypeObject* (*)(void)) NativeInternal_API[17]) #define WriteBuffer_type_internal (*(PyTypeObject* (*)(void)) NativeInternal_API[18]) #define NativeInternal_API_Version (*(int (*)(void)) NativeInternal_API[19]) +#define extract_symbol_internal (*(PyObject* (*)(PyObject *source)) NativeInternal_API[20]) static int import_librt_internal(void) diff --git a/mypyc/primitives/misc_ops.py b/mypyc/primitives/misc_ops.py index ae2cfdf43e9c5..6be74baff3d0b 100644 --- a/mypyc/primitives/misc_ops.py +++ b/mypyc/primitives/misc_ops.py @@ -503,6 +503,14 @@ error_kind=ERR_NEVER, ) +function_op( + name="librt.internal.extract_symbol", + arg_types=[object_rprimitive], + return_type=bytes_rprimitive, + c_function_name="extract_symbol_internal", + error_kind=ERR_MAGIC, +) + function_op( name="librt.base64.b64encode", arg_types=[bytes_rprimitive],