From 99cdf1deb61852dda9f8df639b0f13c1ce121cfa Mon Sep 17 00:00:00 2001 From: sobolevn Date: Wed, 23 Jul 2025 14:56:02 +0300 Subject: [PATCH 1/6] gh-136437: Make several functions in `os.path` pos-only (#136949) --- Lib/genericpath.py | 14 ++-- Lib/ntpath.py | 24 +++---- Lib/posixpath.py | 20 +++--- Modules/clinic/posixmodule.c.h | 125 ++++----------------------------- Modules/posixmodule.c | 15 ++-- 5 files changed, 51 insertions(+), 147 deletions(-) diff --git a/Lib/genericpath.py b/Lib/genericpath.py index 9363f564aab7a6..4a223654994194 100644 --- a/Lib/genericpath.py +++ b/Lib/genericpath.py @@ -81,28 +81,28 @@ def isdevdrive(path): return False -def getsize(filename): +def getsize(filename, /): """Return the size of a file, reported by os.stat().""" return os.stat(filename).st_size -def getmtime(filename): +def getmtime(filename, /): """Return the last modification time of a file, reported by os.stat().""" return os.stat(filename).st_mtime -def getatime(filename): +def getatime(filename, /): """Return the last access time of a file, reported by os.stat().""" return os.stat(filename).st_atime -def getctime(filename): +def getctime(filename, /): """Return the metadata change time of a file, reported by os.stat().""" return os.stat(filename).st_ctime # Return the longest prefix of all list elements. -def commonprefix(m): +def commonprefix(m, /): "Given a list of pathnames, returns the longest common leading component" if not m: return '' # Some people pass in a list of pathname parts to operate in an OS-agnostic @@ -120,14 +120,14 @@ def commonprefix(m): # Are two stat buffers (obtained from stat, fstat or lstat) # describing the same file? -def samestat(s1, s2): +def samestat(s1, s2, /): """Test whether two stat buffers reference the same file""" return (s1.st_ino == s2.st_ino and s1.st_dev == s2.st_dev) # Are two filenames really pointing to the same file? -def samefile(f1, f2): +def samefile(f1, f2, /): """Test whether two pathnames reference the same actual file or directory This is determined by the device number and i-node number and diff --git a/Lib/ntpath.py b/Lib/ntpath.py index 9cdc16480f9afe..fad15430a373fb 100644 --- a/Lib/ntpath.py +++ b/Lib/ntpath.py @@ -47,7 +47,7 @@ def _get_bothseps(path): LOCALE_NAME_INVARIANT as _LOCALE_NAME_INVARIANT, LCMAP_LOWERCASE as _LCMAP_LOWERCASE) - def normcase(s): + def normcase(s, /): """Normalize case of pathname. Makes all characters lowercase and all slashes into backslashes. @@ -66,7 +66,7 @@ def normcase(s): _LCMAP_LOWERCASE, s.replace('/', '\\')) except ImportError: - def normcase(s): + def normcase(s, /): """Normalize case of pathname. Makes all characters lowercase and all slashes into backslashes. @@ -77,7 +77,7 @@ def normcase(s): return s.replace('/', '\\').lower() -def isabs(s): +def isabs(s, /): """Test whether a path is absolute""" s = os.fspath(s) if isinstance(s, bytes): @@ -96,7 +96,7 @@ def isabs(s): # Join two (or more) paths. -def join(path, *paths): +def join(path, /, *paths): path = os.fspath(path) if isinstance(path, bytes): sep = b'\\' @@ -143,7 +143,7 @@ def join(path, *paths): # Split a path in a drive specification (a drive letter followed by a # colon) and the path specification. # It is always true that drivespec + pathspec == p -def splitdrive(p): +def splitdrive(p, /): """Split a pathname into drive/UNC sharepoint and relative path specifiers. Returns a 2-tuple (drive_or_unc, path); either part may be empty. @@ -169,7 +169,7 @@ def splitdrive(p): try: from nt import _path_splitroot_ex as splitroot except ImportError: - def splitroot(p): + def splitroot(p, /): """Split a pathname into drive, root and tail. The tail contains anything after the root.""" @@ -219,7 +219,7 @@ def splitroot(p): # join(head, tail) == p holds. # The resulting head won't end in '/' unless it is the root. -def split(p): +def split(p, /): """Split a pathname. Return tuple (head, tail) where tail is everything after the final slash. @@ -240,7 +240,7 @@ def split(p): # pathname component; the root is everything before that. # It is always true that root + ext == p. -def splitext(p): +def splitext(p, /): p = os.fspath(p) if isinstance(p, bytes): return genericpath._splitext(p, b'\\', b'/', b'.') @@ -251,14 +251,14 @@ def splitext(p): # Return the tail (basename) part of a path. -def basename(p): +def basename(p, /): """Returns the final component of a pathname""" return split(p)[1] # Return the head (dirname) part of a path. -def dirname(p): +def dirname(p, /): """Returns the directory component of a pathname""" return split(p)[0] @@ -601,7 +601,7 @@ def abspath(path): from nt import _findfirstfile, _getfinalpathname, readlink as _nt_readlink except ImportError: # realpath is a no-op on systems without _getfinalpathname support. - def realpath(path, *, strict=False): + def realpath(path, /, *, strict=False): return abspath(path) else: def _readlink_deep(path, ignored_error=OSError): @@ -702,7 +702,7 @@ def _getfinalpathname_nonstrict(path, ignored_error=OSError): tail = join(name, tail) if tail else name return tail - def realpath(path, *, strict=False): + def realpath(path, /, *, strict=False): path = normpath(path) if isinstance(path, bytes): prefix = b'\\\\?\\' diff --git a/Lib/posixpath.py b/Lib/posixpath.py index d38f3bd5872bcd..5b5cde239e6275 100644 --- a/Lib/posixpath.py +++ b/Lib/posixpath.py @@ -50,7 +50,7 @@ def _get_sep(path): # normalizations (such as optimizing '../' away) are not allowed # (another function should be defined to do that). -def normcase(s): +def normcase(s, /): """Normalize case of pathname. Has no effect under Posix""" return os.fspath(s) @@ -58,7 +58,7 @@ def normcase(s): # Return whether a path is absolute. # Trivial in Posix, harder on the Mac or MS-DOS. -def isabs(s): +def isabs(s, /): """Test whether a path is absolute""" s = os.fspath(s) sep = _get_sep(s) @@ -69,7 +69,7 @@ def isabs(s): # Ignore the previous parts if a part is absolute. # Insert a '/' unless the first part is empty or already ends in '/'. -def join(a, *p): +def join(a, /, *p): """Join two or more pathname components, inserting '/' as needed. If any component is an absolute path, all previous path components will be discarded. An empty last part will result in a path that @@ -97,7 +97,7 @@ def join(a, *p): # '/' in the path, head will be empty. # Trailing '/'es are stripped from head unless it is the root. -def split(p): +def split(p, /): """Split a pathname. Returns tuple "(head, tail)" where "tail" is everything after the final slash. Either part may be empty.""" p = os.fspath(p) @@ -114,7 +114,7 @@ def split(p): # pathname component; the root is everything before that. # It is always true that root + ext == p. -def splitext(p): +def splitext(p, /): p = os.fspath(p) if isinstance(p, bytes): sep = b'/' @@ -128,7 +128,7 @@ def splitext(p): # Split a pathname into a drive specification and the rest of the # path. Useful on DOS/Windows/NT; on Unix, the drive is always empty. -def splitdrive(p): +def splitdrive(p, /): """Split a pathname into drive and path. On Posix, drive is always empty.""" p = os.fspath(p) @@ -138,7 +138,7 @@ def splitdrive(p): try: from posix import _path_splitroot_ex as splitroot except ImportError: - def splitroot(p): + def splitroot(p, /): """Split a pathname into drive, root and tail. The tail contains anything after the root.""" @@ -163,7 +163,7 @@ def splitroot(p): # Return the tail (basename) part of a path, same as split(path)[1]. -def basename(p): +def basename(p, /): """Returns the final component of a pathname""" p = os.fspath(p) sep = _get_sep(p) @@ -173,7 +173,7 @@ def basename(p): # Return the head (dirname) part of a path, same as split(path)[0]. -def dirname(p): +def dirname(p, /): """Returns the directory component of a pathname""" p = os.fspath(p) sep = _get_sep(p) @@ -388,7 +388,7 @@ def abspath(path): # Return a canonical path (i.e. the absolute location of a file on the # filesystem). -def realpath(filename, *, strict=False): +def realpath(filename, /, *, strict=False): """Return the canonical path of the specified filename, eliminating any symbolic links encountered in the path.""" filename = os.fspath(filename) diff --git a/Modules/clinic/posixmodule.c.h b/Modules/clinic/posixmodule.c.h index 0a281cbe6c57a2..8af9e1db781c8f 100644 --- a/Modules/clinic/posixmodule.c.h +++ b/Modules/clinic/posixmodule.c.h @@ -2044,57 +2044,24 @@ os__getvolumepathname(PyObject *module, PyObject *const *args, Py_ssize_t nargs, #if defined(MS_WINDOWS) PyDoc_STRVAR(os__path_splitroot__doc__, -"_path_splitroot($module, /, path)\n" +"_path_splitroot($module, path, /)\n" "--\n" "\n" "Removes everything after the root on Win32."); #define OS__PATH_SPLITROOT_METHODDEF \ - {"_path_splitroot", _PyCFunction_CAST(os__path_splitroot), METH_FASTCALL|METH_KEYWORDS, os__path_splitroot__doc__}, + {"_path_splitroot", (PyCFunction)os__path_splitroot, METH_O, os__path_splitroot__doc__}, static PyObject * os__path_splitroot_impl(PyObject *module, path_t *path); static PyObject * -os__path_splitroot(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +os__path_splitroot(PyObject *module, PyObject *arg) { PyObject *return_value = NULL; - #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) - - #define NUM_KEYWORDS 1 - static struct { - PyGC_Head _this_is_not_used; - PyObject_VAR_HEAD - Py_hash_t ob_hash; - PyObject *ob_item[NUM_KEYWORDS]; - } _kwtuple = { - .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) - .ob_hash = -1, - .ob_item = { &_Py_ID(path), }, - }; - #undef NUM_KEYWORDS - #define KWTUPLE (&_kwtuple.ob_base.ob_base) - - #else // !Py_BUILD_CORE - # define KWTUPLE NULL - #endif // !Py_BUILD_CORE - - static const char * const _keywords[] = {"path", NULL}; - static _PyArg_Parser _parser = { - .keywords = _keywords, - .fname = "_path_splitroot", - .kwtuple = KWTUPLE, - }; - #undef KWTUPLE - PyObject *argsbuf[1]; path_t path = PATH_T_INITIALIZE_P("_path_splitroot", "path", 0, 0, 0, 0); - args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, - /*minpos*/ 1, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf); - if (!args) { - goto exit; - } - if (!path_converter(args[0], &path)) { + if (!path_converter(arg, &path)) { goto exit; } return_value = os__path_splitroot_impl(module, &path); @@ -2255,58 +2222,25 @@ os__path_lexists(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyOb #if defined(MS_WINDOWS) PyDoc_STRVAR(os__path_isdir__doc__, -"_path_isdir($module, /, s)\n" +"_path_isdir($module, path, /)\n" "--\n" "\n" "Return true if the pathname refers to an existing directory."); #define OS__PATH_ISDIR_METHODDEF \ - {"_path_isdir", _PyCFunction_CAST(os__path_isdir), METH_FASTCALL|METH_KEYWORDS, os__path_isdir__doc__}, + {"_path_isdir", (PyCFunction)os__path_isdir, METH_O, os__path_isdir__doc__}, static int os__path_isdir_impl(PyObject *module, path_t *path); static PyObject * -os__path_isdir(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +os__path_isdir(PyObject *module, PyObject *arg) { PyObject *return_value = NULL; - #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) - - #define NUM_KEYWORDS 1 - static struct { - PyGC_Head _this_is_not_used; - PyObject_VAR_HEAD - Py_hash_t ob_hash; - PyObject *ob_item[NUM_KEYWORDS]; - } _kwtuple = { - .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) - .ob_hash = -1, - .ob_item = { _Py_LATIN1_CHR('s'), }, - }; - #undef NUM_KEYWORDS - #define KWTUPLE (&_kwtuple.ob_base.ob_base) - - #else // !Py_BUILD_CORE - # define KWTUPLE NULL - #endif // !Py_BUILD_CORE - - static const char * const _keywords[] = {"s", NULL}; - static _PyArg_Parser _parser = { - .keywords = _keywords, - .fname = "_path_isdir", - .kwtuple = KWTUPLE, - }; - #undef KWTUPLE - PyObject *argsbuf[1]; path_t path = PATH_T_INITIALIZE_P("_path_isdir", "path", 0, 0, 1, 1); int _return_value; - args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, - /*minpos*/ 1, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf); - if (!args) { - goto exit; - } - if (!path_converter(args[0], &path)) { + if (!path_converter(arg, &path)) { goto exit; } _return_value = os__path_isdir_impl(module, &path); @@ -2541,7 +2475,7 @@ os__path_isjunction(PyObject *module, PyObject *const *args, Py_ssize_t nargs, P #endif /* defined(MS_WINDOWS) */ PyDoc_STRVAR(os__path_splitroot_ex__doc__, -"_path_splitroot_ex($module, /, p)\n" +"_path_splitroot_ex($module, path, /)\n" "--\n" "\n" "Split a pathname into drive, root and tail.\n" @@ -2549,51 +2483,18 @@ PyDoc_STRVAR(os__path_splitroot_ex__doc__, "The tail contains anything after the root."); #define OS__PATH_SPLITROOT_EX_METHODDEF \ - {"_path_splitroot_ex", _PyCFunction_CAST(os__path_splitroot_ex), METH_FASTCALL|METH_KEYWORDS, os__path_splitroot_ex__doc__}, + {"_path_splitroot_ex", (PyCFunction)os__path_splitroot_ex, METH_O, os__path_splitroot_ex__doc__}, static PyObject * os__path_splitroot_ex_impl(PyObject *module, path_t *path); static PyObject * -os__path_splitroot_ex(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +os__path_splitroot_ex(PyObject *module, PyObject *arg) { PyObject *return_value = NULL; - #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) - - #define NUM_KEYWORDS 1 - static struct { - PyGC_Head _this_is_not_used; - PyObject_VAR_HEAD - Py_hash_t ob_hash; - PyObject *ob_item[NUM_KEYWORDS]; - } _kwtuple = { - .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) - .ob_hash = -1, - .ob_item = { _Py_LATIN1_CHR('p'), }, - }; - #undef NUM_KEYWORDS - #define KWTUPLE (&_kwtuple.ob_base.ob_base) - - #else // !Py_BUILD_CORE - # define KWTUPLE NULL - #endif // !Py_BUILD_CORE - - static const char * const _keywords[] = {"p", NULL}; - static _PyArg_Parser _parser = { - .keywords = _keywords, - .fname = "_path_splitroot_ex", - .kwtuple = KWTUPLE, - }; - #undef KWTUPLE - PyObject *argsbuf[1]; path_t path = PATH_T_INITIALIZE("_path_splitroot_ex", "path", 0, 1, 1, 0, 0); - args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, - /*minpos*/ 1, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf); - if (!args) { - goto exit; - } - if (!path_converter(args[0], &path)) { + if (!path_converter(arg, &path)) { goto exit; } return_value = os__path_splitroot_ex_impl(module, &path); @@ -13518,4 +13419,4 @@ os__emscripten_log(PyObject *module, PyObject *const *args, Py_ssize_t nargs, Py #ifndef OS__EMSCRIPTEN_LOG_METHODDEF #define OS__EMSCRIPTEN_LOG_METHODDEF #endif /* !defined(OS__EMSCRIPTEN_LOG_METHODDEF) */ -/*[clinic end generated code: output=608e9bc5f631f688 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=b1e2615384347102 input=a9049054013a1b77]*/ diff --git a/Modules/posixmodule.c b/Modules/posixmodule.c index 77622fbc4e8065..b1a80788bd8115 100644 --- a/Modules/posixmodule.c +++ b/Modules/posixmodule.c @@ -5226,14 +5226,15 @@ os__getvolumepathname_impl(PyObject *module, path_t *path) /*[clinic input] os._path_splitroot - path: path_t + path: path_t, + / Removes everything after the root on Win32. [clinic start generated code]*/ static PyObject * os__path_splitroot_impl(PyObject *module, path_t *path) -/*[clinic end generated code: output=ab7f1a88b654581c input=dc93b1d3984cffb6]*/ +/*[clinic end generated code: output=ab7f1a88b654581c input=42831e41f8458f6d]*/ { wchar_t *buffer; wchar_t *end; @@ -5535,7 +5536,8 @@ os__path_lexists_impl(PyObject *module, path_t *path) /*[clinic input] os._path_isdir -> bool - s as path: path_t(allow_fd=True, suppress_value_error=True) + path: path_t(allow_fd=True, suppress_value_error=True), + / Return true if the pathname refers to an existing directory. @@ -5543,7 +5545,7 @@ Return true if the pathname refers to an existing directory. static int os__path_isdir_impl(PyObject *module, path_t *path) -/*[clinic end generated code: output=d5786196f9e2fa7a input=132a3b5301aecf79]*/ +/*[clinic end generated code: output=d5786196f9e2fa7a input=0d3fd790564d244b]*/ { return _testFileType(path, PY_IFDIR); } @@ -5612,7 +5614,8 @@ os__path_isjunction_impl(PyObject *module, path_t *path) /*[clinic input] os._path_splitroot_ex - p as path: path_t(make_wide=True, nonstrict=True) + path: path_t(make_wide=True, nonstrict=True), + / Split a pathname into drive, root and tail. @@ -5621,7 +5624,7 @@ The tail contains anything after the root. static PyObject * os__path_splitroot_ex_impl(PyObject *module, path_t *path) -/*[clinic end generated code: output=4b0072b6cdf4b611 input=4556b615c7cc13f2]*/ +/*[clinic end generated code: output=4b0072b6cdf4b611 input=4ac47b394d68bd21]*/ { Py_ssize_t drvsize, rootsize; PyObject *drv = NULL, *root = NULL, *tail = NULL, *result = NULL; From fac4964fdb2ae12969b485de496dd6d064fdbe99 Mon Sep 17 00:00:00 2001 From: Petr Viktorin Date: Wed, 23 Jul 2025 14:01:38 +0200 Subject: [PATCH 2/6] gh-136516: Mention installation artifacts as de-facto resources (GH-136419) Files like NUL on windows are, from `importlib.resources` point of view, an artifact caused by installing to a filesystem directory. Mention these. --- Doc/library/importlib.resources.rst | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/Doc/library/importlib.resources.rst b/Doc/library/importlib.resources.rst index e002198899c8b8..7a11f4fe069004 100644 --- a/Doc/library/importlib.resources.rst +++ b/Doc/library/importlib.resources.rst @@ -16,11 +16,12 @@ within *packages*. "Resources" are file-like resources associated with a module or package in Python. The resources may be contained directly in a package, within a subdirectory contained in that package, or adjacent to modules outside a -package. Resources may be text or binary. As a result, Python module sources -(.py) of a package and compilation artifacts (pycache) are technically -de-facto resources of that package. In practice, however, resources are -primarily those non-Python artifacts exposed specifically by the package -author. +package. Resources may be text or binary. As a result, a package's Python +module sources (.py), compilation artifacts (pycache), and installation +artifacts (like :func:`reserved filenames ` +in directories) are technically de-facto resources of that package. +In practice, however, resources are primarily those non-Python artifacts +exposed specifically by the package author. Resources can be opened or read in either binary or text mode. From 80a7017d2649ad5d7d1f83758eeeef50e5eba6b1 Mon Sep 17 00:00:00 2001 From: Cornelius Roemer Date: Wed, 23 Jul 2025 14:04:59 +0200 Subject: [PATCH 3/6] Fix typos in Doc/extending/extending.rst and Doc/library/shelve.rst (GH-136890) --- Doc/extending/extending.rst | 2 +- Doc/library/shelve.rst | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Doc/extending/extending.rst b/Doc/extending/extending.rst index fd63495674651b..a89a69043c0f9f 100644 --- a/Doc/extending/extending.rst +++ b/Doc/extending/extending.rst @@ -214,7 +214,7 @@ and initialize it by calling :c:func:`PyErr_NewException` in the module's SpamError = PyErr_NewException("spam.error", NULL, NULL); -Since :c:data:`!SpamError` is a global variable, it will be overwitten every time +Since :c:data:`!SpamError` is a global variable, it will be overwritten every time the module is reinitialized, when the :c:data:`Py_mod_exec` function is called. For now, let's avoid the issue: we will block repeated initialization by raising an diff --git a/Doc/library/shelve.rst b/Doc/library/shelve.rst index 23808619524056..b88fe4157bdc29 100644 --- a/Doc/library/shelve.rst +++ b/Doc/library/shelve.rst @@ -144,7 +144,7 @@ Restrictions which can cause hard crashes when trying to read from the database. * :meth:`Shelf.reorganize` may not be available for all database packages and - may temporarely increase resource usage (especially disk space) when called. + may temporarily increase resource usage (especially disk space) when called. Additionally, it will never run automatically and instead needs to be called explicitly. From 38b936cc9912fc6847265917f94af53f0bf228e9 Mon Sep 17 00:00:00 2001 From: Guido Imperiale Date: Wed, 23 Jul 2025 15:36:06 +0100 Subject: [PATCH 4/6] gh-137043: mention `PyList_GET_ITEM` as unsafe borrowed API in free-threading docs (#137042) --- Doc/howto/free-threading-extensions.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Doc/howto/free-threading-extensions.rst b/Doc/howto/free-threading-extensions.rst index 02b45879ccfaca..577e283bb9cb4c 100644 --- a/Doc/howto/free-threading-extensions.rst +++ b/Doc/howto/free-threading-extensions.rst @@ -161,6 +161,8 @@ that return :term:`strong references `. +===================================+===================================+ | :c:func:`PyList_GetItem` | :c:func:`PyList_GetItemRef` | +-----------------------------------+-----------------------------------+ +| :c:func:`PyList_GET_ITEM` | :c:func:`PyList_GetItemRef` | ++-----------------------------------+-----------------------------------+ | :c:func:`PyDict_GetItem` | :c:func:`PyDict_GetItemRef` | +-----------------------------------+-----------------------------------+ | :c:func:`PyDict_GetItemWithError` | :c:func:`PyDict_GetItemRef` | From 6a285f94c63f6e40ae1495d7efc61694d988e9ed Mon Sep 17 00:00:00 2001 From: Jelle Zijlstra Date: Wed, 23 Jul 2025 08:13:19 -0700 Subject: [PATCH 5/6] Revert "gh-135228: When @dataclass(slots=True) replaces a dataclass, make the original class collectible (#136893)" (#137014) This reverts commit 46cbdf967ada11b0286060488b61635fd6a2bb23. --- Lib/dataclasses.py | 15 -------- Lib/test/test_dataclasses/__init__.py | 35 ------------------- ...-07-20-16-56-55.gh-issue-135228.n_XIao.rst | 4 --- 3 files changed, 54 deletions(-) delete mode 100644 Misc/NEWS.d/next/Library/2025-07-20-16-56-55.gh-issue-135228.n_XIao.rst diff --git a/Lib/dataclasses.py b/Lib/dataclasses.py index 22b78bb2fbe6ed..83ea623dce6281 100644 --- a/Lib/dataclasses.py +++ b/Lib/dataclasses.py @@ -1338,13 +1338,6 @@ def _add_slots(cls, is_frozen, weakref_slot, defined_fields): or _update_func_cell_for__class__(member.fdel, cls, newcls)): break - # gh-135228: Make sure the original class can be garbage collected. - # Bypass mapping proxy to allow __dict__ to be removed - old_cls_dict = cls.__dict__ | _deproxier - old_cls_dict.pop('__dict__', None) - if "__weakref__" in cls.__dict__: - del cls.__weakref__ - return newcls @@ -1739,11 +1732,3 @@ def _replace(self, /, **changes): # changes that aren't fields, this will correctly raise a # TypeError. return self.__class__(**changes) - - -# Hack to the get the underlying dict out of a mappingproxy -# Use it with: cls.__dict__ | _deproxier -class _Deproxier: - def __ror__(self, other): - return other -_deproxier = _Deproxier() diff --git a/Lib/test/test_dataclasses/__init__.py b/Lib/test/test_dataclasses/__init__.py index 6bf5e5b3e5554b..e98a8f284cec9f 100644 --- a/Lib/test/test_dataclasses/__init__.py +++ b/Lib/test/test_dataclasses/__init__.py @@ -3804,41 +3804,6 @@ class WithCorrectSuper(CorrectSuper): # that we create internally. self.assertEqual(CorrectSuper.args, ["default", "default"]) - def test_original_class_is_gced(self): - # gh-135228: Make sure when we replace the class with slots=True, the original class - # gets garbage collected. - def make_simple(): - @dataclass(slots=True) - class SlotsTest: - pass - - return SlotsTest - - def make_with_annotations(): - @dataclass(slots=True) - class SlotsTest: - x: int - - return SlotsTest - - def make_with_annotations_and_method(): - @dataclass(slots=True) - class SlotsTest: - x: int - - def method(self) -> int: - return self.x - - return SlotsTest - - for make in (make_simple, make_with_annotations, make_with_annotations_and_method): - with self.subTest(make=make): - C = make() - support.gc_collect() - candidates = [cls for cls in object.__subclasses__() if cls.__name__ == 'SlotsTest' - and cls.__firstlineno__ == make.__code__.co_firstlineno + 1] - self.assertEqual(candidates, [C]) - class TestDescriptors(unittest.TestCase): def test_set_name(self): diff --git a/Misc/NEWS.d/next/Library/2025-07-20-16-56-55.gh-issue-135228.n_XIao.rst b/Misc/NEWS.d/next/Library/2025-07-20-16-56-55.gh-issue-135228.n_XIao.rst deleted file mode 100644 index ee8962c6f46e75..00000000000000 --- a/Misc/NEWS.d/next/Library/2025-07-20-16-56-55.gh-issue-135228.n_XIao.rst +++ /dev/null @@ -1,4 +0,0 @@ -When :mod:`dataclasses` replaces a class with a slotted dataclass, the -original class is now garbage collected again. Earlier changes in Python -3.14 caused this class to remain in existence together with the replacement -class synthesized by :mod:`dataclasses`. From 777159fa318f39c36ad60039cdf35a8dbb319637 Mon Sep 17 00:00:00 2001 From: Petr Viktorin Date: Wed, 23 Jul 2025 17:57:54 +0200 Subject: [PATCH 6/6] gh-135676: Lexical analysis: Reword String literals and related sections (GH-135942) Co-authored-by: Blaise Pabon Co-authored-by: Stan Ulbrych <89152624+StanFromIreland@users.noreply.github.com> Co-authored-by: Adam Turner <9087854+AA-Turner@users.noreply.github.com> --- Doc/reference/expressions.rst | 63 ++- Doc/reference/grammar.rst | 5 +- Doc/reference/introduction.rst | 16 +- Doc/reference/lexical_analysis.rst | 599 +++++++++++++++++++---------- 4 files changed, 460 insertions(+), 223 deletions(-) diff --git a/Doc/reference/expressions.rst b/Doc/reference/expressions.rst index 24544a055c3ed2..9aca25e3214a16 100644 --- a/Doc/reference/expressions.rst +++ b/Doc/reference/expressions.rst @@ -133,13 +133,18 @@ Literals Python supports string and bytes literals and various numeric literals: -.. productionlist:: python-grammar - literal: `stringliteral` | `bytesliteral` | `NUMBER` +.. grammar-snippet:: + :group: python-grammar + + literal: `strings` | `NUMBER` Evaluation of a literal yields an object of the given type (string, bytes, integer, floating-point number, complex number) with the given value. The value may be approximated in the case of floating-point and imaginary (complex) -literals. See section :ref:`literals` for details. +literals. +See section :ref:`literals` for details. +See section :ref:`string-concatenation` for details on ``strings``. + .. index:: triple: immutable; data; type @@ -152,6 +157,58 @@ occurrence) may obtain the same object or a different object with the same value. +.. _string-concatenation: + +String literal concatenation +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Multiple adjacent string or bytes literals (delimited by whitespace), possibly +using different quoting conventions, are allowed, and their meaning is the same +as their concatenation:: + + >>> "hello" 'world' + "helloworld" + +Formally: + +.. grammar-snippet:: + :group: python-grammar + + strings: ( `STRING` | fstring)+ | tstring+ + +This feature is defined at the syntactical level, so it only works with literals. +To concatenate string expressions at run time, the '+' operator may be used:: + + >>> greeting = "Hello" + >>> space = " " + >>> name = "Blaise" + >>> print(greeting + space + name) # not: print(greeting space name) + Hello Blaise + +Literal concatenation can freely mix raw strings, triple-quoted strings, +and formatted string literals. +For example:: + + >>> "Hello" r', ' f"{name}!" + "Hello, Blaise!" + +This feature can be used to reduce the number of backslashes +needed, to split long strings conveniently across long lines, or even to add +comments to parts of strings. For example:: + + re.compile("[A-Za-z_]" # letter or underscore + "[A-Za-z0-9_]*" # letter, digit or underscore + ) + +However, bytes literals may only be combined with other byte literals; +not with string literals of any kind. +Also, template string literals may only be combined with other template +string literals:: + + >>> t"Hello" t"{name}!" + Template(strings=('Hello', '!'), interpolations=(...)) + + .. _parenthesized: Parenthesized forms diff --git a/Doc/reference/grammar.rst b/Doc/reference/grammar.rst index 55c148801d8559..1037feb691f6bc 100644 --- a/Doc/reference/grammar.rst +++ b/Doc/reference/grammar.rst @@ -10,11 +10,8 @@ error recovery. The notation used here is the same as in the preceding docs, and is described in the :ref:`notation ` section, -except for a few extra complications: +except for an extra complication: -* ``&e``: a positive lookahead (that is, ``e`` is required to match but - not consumed) -* ``!e``: a negative lookahead (that is, ``e`` is required *not* to match) * ``~`` ("cut"): commit to the current alternative and fail the rule even if this fails to parse diff --git a/Doc/reference/introduction.rst b/Doc/reference/introduction.rst index 444acac374a690..c62240b18cfe55 100644 --- a/Doc/reference/introduction.rst +++ b/Doc/reference/introduction.rst @@ -145,15 +145,23 @@ The definition to the right of the colon uses the following syntax elements: * ``e?``: A question mark has exactly the same meaning as square brackets: the preceding item is optional. * ``(e)``: Parentheses are used for grouping. + +The following notation is only used in +:ref:`lexical definitions `. + * ``"a"..."z"``: Two literal characters separated by three dots mean a choice of any single character in the given (inclusive) range of ASCII characters. - This notation is only used in - :ref:`lexical definitions `. * ``<...>``: A phrase between angular brackets gives an informal description of the matched symbol (for example, ````), or an abbreviation that is defined in nearby text (for example, ````). - This notation is only used in - :ref:`lexical definitions `. + +.. _lexical-lookaheads: + +Some definitions also use *lookaheads*, which indicate that an element +must (or must not) match at a given position, but without consuming any input: + +* ``&e``: a positive lookahead (that is, ``e`` is required to match) +* ``!e``: a negative lookahead (that is, ``e`` is required *not* to match) The unary operators (``*``, ``+``, ``?``) bind as tightly as possible; the vertical bar (``|``) binds most loosely. diff --git a/Doc/reference/lexical_analysis.rst b/Doc/reference/lexical_analysis.rst index a7f8e5392b7e71..cf241829b71120 100644 --- a/Doc/reference/lexical_analysis.rst +++ b/Doc/reference/lexical_analysis.rst @@ -39,7 +39,8 @@ The end of a logical line is represented by the token :data:`~token.NEWLINE`. Statements cannot cross logical line boundaries except where :data:`!NEWLINE` is allowed by the syntax (e.g., between statements in compound statements). A logical line is constructed from one or more *physical lines* by following -the explicit or implicit *line joining* rules. +the :ref:`explicit ` or :ref:`implicit ` +*line joining* rules. .. _physical-lines: @@ -47,17 +48,30 @@ the explicit or implicit *line joining* rules. Physical lines -------------- -A physical line is a sequence of characters terminated by an end-of-line -sequence. In source files and strings, any of the standard platform line -termination sequences can be used - the Unix form using ASCII LF (linefeed), -the Windows form using the ASCII sequence CR LF (return followed by linefeed), -or the old Macintosh form using the ASCII CR (return) character. All of these -forms can be used equally, regardless of platform. The end of input also serves -as an implicit terminator for the final physical line. +A physical line is a sequence of characters terminated by one the following +end-of-line sequences: -When embedding Python, source code strings should be passed to Python APIs using -the standard C conventions for newline characters (the ``\n`` character, -representing ASCII LF, is the line terminator). +* the Unix form using ASCII LF (linefeed), +* the Windows form using the ASCII sequence CR LF (return followed by linefeed), +* the '`Classic Mac OS`__' form using the ASCII CR (return) character. + + __ https://en.wikipedia.org/wiki/Classic_Mac_OS + +Regardless of platform, each of these sequences is replaced by a single +ASCII LF (linefeed) character. +(This is done even inside :ref:`string literals `.) +Each line can use any of the sequences; they do not need to be consistent +within a file. + +The end of input also serves as an implicit terminator for the final +physical line. + +Formally: + +.. grammar-snippet:: + :group: python-grammar + + newline: | | .. _comments: @@ -106,6 +120,16 @@ If an encoding is declared, the encoding name must be recognized by Python encoding is used for all lexical analysis, including string literals, comments and identifiers. +All lexical analysis, including string literals, comments +and identifiers, works on Unicode text decoded using the source encoding. +Any Unicode code point, except the NUL control character, can appear in +Python source. + +.. grammar-snippet:: + :group: python-grammar + + source_character: + .. _explicit-joining: @@ -474,80 +498,110 @@ Literals Literals are notations for constant values of some built-in types. +In terms of lexical analysis, Python has :ref:`string, bytes ` +and :ref:`numeric ` literals. + +Other "literals" are lexically denoted using :ref:`keywords ` +(``None``, ``True``, ``False``) and the special +:ref:`ellipsis token ` (``...``). + .. index:: string literal, bytes literal, ASCII single: ' (single quote); string literal single: " (double quote); string literal - single: u'; string literal - single: u"; string literal .. _strings: String and Bytes literals -------------------------- +========================= -String literals are described by the following lexical definitions: +String literals are text enclosed in single quotes (``'``) or double +quotes (``"``). For example: -.. productionlist:: python-grammar - stringliteral: [`stringprefix`](`shortstring` | `longstring`) - stringprefix: "r" | "u" | "R" | "U" | "f" | "F" | "t" | "T" - : | "fr" | "Fr" | "fR" | "FR" | "rf" | "rF" | "Rf" | "RF" - : | "tr" | "Tr" | "tR" | "TR" | "rt" | "rT" | "Rt" | "RT" - shortstring: "'" `shortstringitem`* "'" | '"' `shortstringitem`* '"' - longstring: "'''" `longstringitem`* "'''" | '"""' `longstringitem`* '"""' - shortstringitem: `shortstringchar` | `stringescapeseq` - longstringitem: `longstringchar` | `stringescapeseq` - shortstringchar: - longstringchar: - stringescapeseq: "\" +.. code-block:: python -.. productionlist:: python-grammar - bytesliteral: `bytesprefix`(`shortbytes` | `longbytes`) - bytesprefix: "b" | "B" | "br" | "Br" | "bR" | "BR" | "rb" | "rB" | "Rb" | "RB" - shortbytes: "'" `shortbytesitem`* "'" | '"' `shortbytesitem`* '"' - longbytes: "'''" `longbytesitem`* "'''" | '"""' `longbytesitem`* '"""' - shortbytesitem: `shortbyteschar` | `bytesescapeseq` - longbytesitem: `longbyteschar` | `bytesescapeseq` - shortbyteschar: - longbyteschar: - bytesescapeseq: "\" - -One syntactic restriction not indicated by these productions is that whitespace -is not allowed between the :token:`~python-grammar:stringprefix` or -:token:`~python-grammar:bytesprefix` and the rest of the literal. The source -character set is defined by the encoding declaration; it is UTF-8 if no encoding -declaration is given in the source file; see section :ref:`encodings`. - -.. index:: triple-quoted string, Unicode Consortium, raw string + "spam" + 'eggs' + +The quote used to start the literal also terminates it, so a string literal +can only contain the other quote (except with escape sequences, see below). +For example: + +.. code-block:: python + + 'Say "Hello", please.' + "Don't do that!" + +Except for this limitation, the choice of quote character (``'`` or ``"``) +does not affect how the literal is parsed. + +Inside a string literal, the backslash (``\``) character introduces an +:dfn:`escape sequence`, which has special meaning depending on the character +after the backslash. +For example, ``\"`` denotes the double quote character, and does *not* end +the string: + +.. code-block:: pycon + + >>> print("Say \"Hello\" to everyone!") + Say "Hello" to everyone! + +See :ref:`escape sequences ` below for a full list of such +sequences, and more details. + + +.. index:: triple-quoted string single: """; string literal single: '''; string literal -In plain English: Both types of literals can be enclosed in matching single quotes -(``'``) or double quotes (``"``). They can also be enclosed in matching groups -of three single or double quotes (these are generally referred to as -*triple-quoted strings*). The backslash (``\``) character is used to give special -meaning to otherwise ordinary characters like ``n``, which means 'newline' when -escaped (``\n``). It can also be used to escape characters that otherwise have a -special meaning, such as newline, backslash itself, or the quote character. -See :ref:`escape sequences ` below for examples. +Triple-quoted strings +--------------------- -.. index:: - single: b'; bytes literal - single: b"; bytes literal +Strings can also be enclosed in matching groups of three single or double +quotes. +These are generally referred to as :dfn:`triple-quoted strings`:: + + """This is a triple-quoted string.""" + +In triple-quoted literals, unescaped quotes are allowed (and are +retained), except that three unescaped quotes in a row terminate the literal, +if they are of the same kind (``'`` or ``"``) used at the start:: + + """This string has "quotes" inside.""" + +Unescaped newlines are also allowed and retained:: + + '''This triple-quoted string + continues on the next line.''' -Bytes literals are always prefixed with ``'b'`` or ``'B'``; they produce an -instance of the :class:`bytes` type instead of the :class:`str` type. They -may only contain ASCII characters; bytes with a numeric value of 128 or greater -must be expressed with escapes. .. index:: - single: r'; raw string literal - single: r"; raw string literal + single: u'; string literal + single: u"; string literal -Both string and bytes literals may optionally be prefixed with a letter ``'r'`` -or ``'R'``; such constructs are called :dfn:`raw string literals` -and :dfn:`raw bytes literals` respectively and treat backslashes as -literal characters. As a result, in raw string literals, ``'\U'`` and ``'\u'`` -escapes are not treated specially. +String prefixes +--------------- + +String literals can have an optional :dfn:`prefix` that influences how the +content of the literal is parsed, for example: + +.. code-block:: python + + b"data" + f'{result=}' + +The allowed prefixes are: + +* ``b``: :ref:`Bytes literal ` +* ``r``: :ref:`Raw string ` +* ``f``: :ref:`Formatted string literal ` ("f-string") +* ``t``: :ref:`Template string literal ` ("t-string") +* ``u``: No effect (allowed for backwards compatibility) + +See the linked sections for details on each type. + +Prefixes are case-insensitive (for example, ``B`` works the same as ``b``). +The ``r`` prefix can be combined with ``f``, ``t`` or ``b``, so ``fr``, +``rf``, ``tr``, ``rt``, ``br`` and ``rb`` are also valid prefixes. .. versionadded:: 3.3 The ``'rb'`` prefix of raw bytes literals has been added as a synonym @@ -557,18 +611,35 @@ escapes are not treated specially. to simplify the maintenance of dual Python 2.x and 3.x codebases. See :pep:`414` for more information. -.. index:: - single: f'; formatted string literal - single: f"; formatted string literal -A string literal with ``f`` or ``F`` in its prefix is a -:dfn:`formatted string literal`; see :ref:`f-strings`. The ``f`` may be -combined with ``r``, but not with ``b`` or ``u``, therefore raw -formatted strings are possible, but formatted bytes literals are not. +Formal grammar +-------------- + +String literals, except :ref:`"f-strings" ` and +:ref:`"t-strings" `, are described by the +following lexical definitions. + +These definitions use :ref:`negative lookaheads ` (``!``) +to indicate that an ending quote ends the literal. + +.. grammar-snippet:: + :group: python-grammar -In triple-quoted literals, unescaped newlines and quotes are allowed (and are -retained), except that three unescaped quotes in a row terminate the literal. (A -"quote" is the character used to open the literal, i.e. either ``'`` or ``"``.) + STRING: [`stringprefix`] (`stringcontent`) + stringprefix: <("r" | "u" | "b" | "br" | "rb"), case-insensitive> + stringcontent: + | "'" ( !"'" `stringitem`)* "'" + | '"' ( !'"' `stringitem`)* '"' + | "'''" ( !"'''" `longstringitem`)* "'''" + | '"""' ( !'"""' `longstringitem`)* '"""' + stringitem: `stringchar` | `stringescapeseq` + stringchar: + longstringitem: `stringitem` | newline + stringescapeseq: "\" + +Note that as in all lexical definitions, whitespace is significant. +In particular, the prefix (if any) must be immediately followed by the starting +quote. .. index:: physical line, escape sequence, Standard C, C single: \ (backslash); escape sequence @@ -587,120 +658,237 @@ retained), except that three unescaped quotes in a row terminate the literal. ( .. _escape-sequences: - Escape sequences -^^^^^^^^^^^^^^^^ +---------------- Unless an ``'r'`` or ``'R'`` prefix is present, escape sequences in string and bytes literals are interpreted according to rules similar to those used by Standard C. The recognized escape sequences are: -+-------------------------+---------------------------------+-------+ -| Escape Sequence | Meaning | Notes | -+=========================+=================================+=======+ -| ``\``\ | Backslash and newline ignored | \(1) | -+-------------------------+---------------------------------+-------+ -| ``\\`` | Backslash (``\``) | | -+-------------------------+---------------------------------+-------+ -| ``\'`` | Single quote (``'``) | | -+-------------------------+---------------------------------+-------+ -| ``\"`` | Double quote (``"``) | | -+-------------------------+---------------------------------+-------+ -| ``\a`` | ASCII Bell (BEL) | | -+-------------------------+---------------------------------+-------+ -| ``\b`` | ASCII Backspace (BS) | | -+-------------------------+---------------------------------+-------+ -| ``\f`` | ASCII Formfeed (FF) | | -+-------------------------+---------------------------------+-------+ -| ``\n`` | ASCII Linefeed (LF) | | -+-------------------------+---------------------------------+-------+ -| ``\r`` | ASCII Carriage Return (CR) | | -+-------------------------+---------------------------------+-------+ -| ``\t`` | ASCII Horizontal Tab (TAB) | | -+-------------------------+---------------------------------+-------+ -| ``\v`` | ASCII Vertical Tab (VT) | | -+-------------------------+---------------------------------+-------+ -| :samp:`\\\\{ooo}` | Character with octal value | (2,4) | -| | *ooo* | | -+-------------------------+---------------------------------+-------+ -| :samp:`\\x{hh}` | Character with hex value *hh* | (3,4) | -+-------------------------+---------------------------------+-------+ - -Escape sequences only recognized in string literals are: - -+-------------------------+---------------------------------+-------+ -| Escape Sequence | Meaning | Notes | -+=========================+=================================+=======+ -| :samp:`\\N\\{{name}\\}` | Character named *name* in the | \(5) | -| | Unicode database | | -+-------------------------+---------------------------------+-------+ -| :samp:`\\u{xxxx}` | Character with 16-bit hex value | \(6) | -| | *xxxx* | | -+-------------------------+---------------------------------+-------+ -| :samp:`\\U{xxxxxxxx}` | Character with 32-bit hex value | \(7) | -| | *xxxxxxxx* | | -+-------------------------+---------------------------------+-------+ - -Notes: - -(1) - A backslash can be added at the end of a line to ignore the newline:: - - >>> 'This string will not include \ - ... backslashes or newline characters.' - 'This string will not include backslashes or newline characters.' - - The same result can be achieved using :ref:`triple-quoted strings `, - or parentheses and :ref:`string literal concatenation `. - - -(2) - As in Standard C, up to three octal digits are accepted. - - .. versionchanged:: 3.11 - Octal escapes with value larger than ``0o377`` produce a - :exc:`DeprecationWarning`. - - .. versionchanged:: 3.12 - Octal escapes with value larger than ``0o377`` produce a - :exc:`SyntaxWarning`. In a future Python version they will be eventually - a :exc:`SyntaxError`. - -(3) - Unlike in Standard C, exactly two hex digits are required. - -(4) - In a bytes literal, hexadecimal and octal escapes denote the byte with the - given value. In a string literal, these escapes denote a Unicode character - with the given value. - -(5) - .. versionchanged:: 3.3 - Support for name aliases [#]_ has been added. - -(6) - Exactly four hex digits are required. - -(7) - Any Unicode character can be encoded this way. Exactly eight hex digits - are required. +.. list-table:: + :widths: auto + :header-rows: 1 + + * * Escape Sequence + * Meaning + * * ``\``\ + * :ref:`string-escape-ignore` + * * ``\\`` + * :ref:`Backslash ` + * * ``\'`` + * :ref:`Single quote ` + * * ``\"`` + * :ref:`Double quote ` + * * ``\a`` + * ASCII Bell (BEL) + * * ``\b`` + * ASCII Backspace (BS) + * * ``\f`` + * ASCII Formfeed (FF) + * * ``\n`` + * ASCII Linefeed (LF) + * * ``\r`` + * ASCII Carriage Return (CR) + * * ``\t`` + * ASCII Horizontal Tab (TAB) + * * ``\v`` + * ASCII Vertical Tab (VT) + * * :samp:`\\\\{ooo}` + * :ref:`string-escape-oct` + * * :samp:`\\x{hh}` + * :ref:`string-escape-hex` + * * :samp:`\\N\\{{name}\\}` + * :ref:`string-escape-named` + * * :samp:`\\u{xxxx}` + * :ref:`Hexadecimal Unicode character ` + * * :samp:`\\U{xxxxxxxx}` + * :ref:`Hexadecimal Unicode character ` + +.. _string-escape-ignore: + +Ignored end of line +^^^^^^^^^^^^^^^^^^^ + +A backslash can be added at the end of a line to ignore the newline:: + + >>> 'This string will not include \ + ... backslashes or newline characters.' + 'This string will not include backslashes or newline characters.' + +The same result can be achieved using :ref:`triple-quoted strings `, +or parentheses and :ref:`string literal concatenation `. + +.. _string-escape-escaped-char: + +Escaped characters +^^^^^^^^^^^^^^^^^^ + +To include a backslash in a non-:ref:`raw ` Python string +literal, it must be doubled. The ``\\`` escape sequence denotes a single +backslash character:: + + >>> print('C:\\Program Files') + C:\Program Files + +Similarly, the ``\'`` and ``\"`` sequences denote the single and double +quote character, respectively:: + + >>> print('\' and \"') + ' and " + +.. _string-escape-oct: + +Octal character +^^^^^^^^^^^^^^^ + +The sequence :samp:`\\\\{ooo}` denotes a *character* with the octal (base 8) +value *ooo*:: + + >>> '\120' + 'P' + +Up to three octal digits (0 through 7) are accepted. + +In a bytes literal, *character* means a *byte* with the given value. +In a string literal, it means a Unicode character with the given value. + +.. versionchanged:: 3.11 + Octal escapes with value larger than ``0o377`` (255) produce a + :exc:`DeprecationWarning`. + +.. versionchanged:: 3.12 + Octal escapes with value larger than ``0o377`` (255) produce a + :exc:`SyntaxWarning`. + In a future Python version they will raise a :exc:`SyntaxError`. + +.. _string-escape-hex: + +Hexadecimal character +^^^^^^^^^^^^^^^^^^^^^ + +The sequence :samp:`\\x{hh}` denotes a *character* with the hex (base 16) +value *hh*:: + + >>> '\x50' + 'P' + +Unlike in Standard C, exactly two hex digits are required. + +In a bytes literal, *character* means a *byte* with the given value. +In a string literal, it means a Unicode character with the given value. + +.. _string-escape-named: + +Named Unicode character +^^^^^^^^^^^^^^^^^^^^^^^ + +The sequence :samp:`\\N\\{{name}\\}` denotes a Unicode character +with the given *name*:: + + >>> '\N{LATIN CAPITAL LETTER P}' + 'P' + >>> '\N{SNAKE}' + '🐍' + +This sequence cannot appear in :ref:`bytes literals `. + +.. versionchanged:: 3.3 + Support for `name aliases `__ + has been added. + +.. _string-escape-long-hex: + +Hexadecimal Unicode characters +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +These sequences :samp:`\\u{xxxx}` and :samp:`\\U{xxxxxxxx}` denote the +Unicode character with the given hex (base 16) value. +Exactly four digits are required for ``\u``; exactly eight digits are +required for ``\U``. +The latter can encode any Unicode character. + +.. code-block:: pycon + + >>> '\u1234' + 'ሴ' + >>> '\U0001f40d' + '🐍' + +These sequences cannot appear in :ref:`bytes literals `. .. index:: unrecognized escape sequence -Unlike Standard C, all unrecognized escape sequences are left in the string -unchanged, i.e., *the backslash is left in the result*. (This behavior is -useful when debugging: if an escape sequence is mistyped, the resulting output -is more easily recognized as broken.) It is also important to note that the -escape sequences only recognized in string literals fall into the category of -unrecognized escapes for bytes literals. +Unrecognized escape sequences +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Unlike in Standard C, all unrecognized escape sequences are left in the string +unchanged, that is, *the backslash is left in the result*:: + + >>> print('\q') + \q + >>> list('\q') + ['\\', 'q'] + +Note that for bytes literals, the escape sequences only recognized in string +literals (``\N...``, ``\u...``, ``\U...``) fall into the category of +unrecognized escapes. .. versionchanged:: 3.6 Unrecognized escape sequences produce a :exc:`DeprecationWarning`. .. versionchanged:: 3.12 - Unrecognized escape sequences produce a :exc:`SyntaxWarning`. In a future - Python version they will be eventually a :exc:`SyntaxError`. + Unrecognized escape sequences produce a :exc:`SyntaxWarning`. + In a future Python version they will raise a :exc:`SyntaxError`. + + +.. index:: + single: b'; bytes literal + single: b"; bytes literal + + +.. _bytes-literal: + +Bytes literals +-------------- + +:dfn:`Bytes literals` are always prefixed with ``'b'`` or ``'B'``; they produce an +instance of the :class:`bytes` type instead of the :class:`str` type. +They may only contain ASCII characters; bytes with a numeric value of 128 +or greater must be expressed with escape sequences (typically +:ref:`string-escape-hex` or :ref:`string-escape-oct`): + +.. code-block:: pycon + + >>> b'\x89PNG\r\n\x1a\n' + b'\x89PNG\r\n\x1a\n' + >>> list(b'\x89PNG\r\n\x1a\n') + [137, 80, 78, 71, 13, 10, 26, 10] + +Similarly, a zero byte must be expressed using an escape sequence (typically +``\0`` or ``\x00``). + + +.. index:: + single: r'; raw string literal + single: r"; raw string literal + +.. _raw-strings: + +Raw string literals +------------------- + +Both string and bytes literals may optionally be prefixed with a letter ``'r'`` +or ``'R'``; such constructs are called :dfn:`raw string literals` +and :dfn:`raw bytes literals` respectively and treat backslashes as +literal characters. +As a result, in raw string literals, :ref:`escape sequences ` +are not treated specially: + +.. code-block:: pycon + + >>> r'\d{4}-\d{2}-\d{2}' + '\\d{4}-\\d{2}-\\d{2}' Even in a raw literal, quotes can be escaped with a backslash, but the backslash remains in the result; for example, ``r"\""`` is a valid string @@ -712,29 +900,6 @@ that a single backslash followed by a newline is interpreted as those two characters as part of the literal, *not* as a line continuation. -.. _string-concatenation: - -String literal concatenation ----------------------------- - -Multiple adjacent string or bytes literals (delimited by whitespace), possibly -using different quoting conventions, are allowed, and their meaning is the same -as their concatenation. Thus, ``"hello" 'world'`` is equivalent to -``"helloworld"``. This feature can be used to reduce the number of backslashes -needed, to split long strings conveniently across long lines, or even to add -comments to parts of strings, for example:: - - re.compile("[A-Za-z_]" # letter or underscore - "[A-Za-z0-9_]*" # letter, digit or underscore - ) - -Note that this feature is defined at the syntactical level, but implemented at -compile time. The '+' operator must be used to concatenate string expressions -at run time. Also note that literal concatenation can use different quoting -styles for each component (even mixing raw strings and triple quoted strings), -and formatted string literals may be concatenated with plain string literals. - - .. index:: single: formatted string literal single: interpolated string literal @@ -742,6 +907,8 @@ and formatted string literals may be concatenated with plain string literals. single: string; interpolated literal single: f-string single: fstring + single: f'; formatted string literal + single: f"; formatted string literal single: {} (curly brackets); in formatted string literal single: ! (exclamation); in formatted string literal single: : (colon); in formatted string literal @@ -958,7 +1125,7 @@ the following differences: .. _numbers: Numeric literals ----------------- +================ .. index:: number, numeric literal, integer literal floating-point literal, hexadecimal literal @@ -991,7 +1158,7 @@ actually an expression composed of the unary operator '``-``' and the literal .. _integers: Integer literals -^^^^^^^^^^^^^^^^ +---------------- Integer literals denote whole numbers. For example:: @@ -1064,7 +1231,7 @@ Formally, integer literals are described by the following lexical definitions: .. _floating: Floating-point literals -^^^^^^^^^^^^^^^^^^^^^^^ +----------------------- Floating-point (float) literals, such as ``3.14`` or ``1.5``, denote :ref:`approximations of real numbers `. @@ -1126,7 +1293,7 @@ lexical definitions: .. _imaginary: Imaginary literals -^^^^^^^^^^^^^^^^^^ +------------------ Python has :ref:`complex number ` objects, but no complex literals. @@ -1214,14 +1381,26 @@ The following tokens serve as delimiters in the grammar: ( ) [ ] { } , : ! . ; @ = + +The period can also occur in floating-point and imaginary literals. + +.. _lexical-ellipsis: + +A sequence of three periods has a special meaning as an +:py:data:`Ellipsis` literal: + +.. code-block:: none + + ... + +The following *augmented assignment operators* serve +lexically as delimiters, but also perform an operation: + +.. code-block:: none + -> += -= *= /= //= %= @= &= |= ^= >>= <<= **= -The period can also occur in floating-point and imaginary literals. A sequence -of three periods has a special meaning as an ellipsis literal. The second half -of the list, the augmented assignment operators, serve lexically as delimiters, -but also perform an operation. - The following printing ASCII characters have special meaning as part of other tokens or are otherwise significant to the lexical analyzer: @@ -1236,7 +1415,3 @@ occurrence outside string literals and comments is an unconditional error: $ ? ` - -.. rubric:: Footnotes - -.. [#] https://www.unicode.org/Public/16.0.0/ucd/NameAliases.txt