Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions Include/internal/pycore_dict.h
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,8 @@ extern Py_ssize_t _Py_dict_lookup(PyDictObject *mp, PyObject *key, Py_hash_t has
extern Py_ssize_t _Py_dict_lookup_threadsafe(PyDictObject *mp, PyObject *key, Py_hash_t hash, PyObject **value_addr);
extern Py_ssize_t _Py_dict_lookup_threadsafe_stackref(PyDictObject *mp, PyObject *key, Py_hash_t hash, _PyStackRef *value_addr);

extern int _PyDict_GetMethodStackRef(PyDictObject *dict, PyObject *name, _PyStackRef *method);

extern Py_ssize_t _PyDict_LookupIndex(PyDictObject *, PyObject *);
extern Py_ssize_t _PyDictKeys_StringLookup(PyDictKeysObject* dictkeys, PyObject *key);

Expand Down
7 changes: 7 additions & 0 deletions Include/internal/pycore_stackref.h
Original file line number Diff line number Diff line change
Expand Up @@ -839,6 +839,13 @@ _Py_TryXGetStackRef(PyObject **src, _PyStackRef *out)

#endif

#define PyStackRef_XSETREF(dst, src) \
do { \
_PyStackRef _tmp_dst_ref = (dst); \
(dst) = (src); \
PyStackRef_XCLOSE(_tmp_dst_ref); \
} while(0)

// Like Py_VISIT but for _PyStackRef fields
#define _Py_VISIT_STACKREF(ref) \
do { \
Expand Down
3 changes: 3 additions & 0 deletions Lib/tarfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -1647,6 +1647,9 @@ def _block(self, count):
"""Round up a byte count by BLOCKSIZE and return it,
e.g. _block(834) => 1024.
"""
# Only non-negative offsets are allowed
if count < 0:
raise InvalidHeaderError("invalid offset")
blocks, remainder = divmod(count, BLOCKSIZE)
if remainder:
blocks += 1
Expand Down
156 changes: 156 additions & 0 deletions Lib/test/test_tarfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ def sha256sum(data):
zstname = os.path.join(TEMPDIR, "testtar.tar.zst")
tmpname = os.path.join(TEMPDIR, "tmp.tar")
dotlessname = os.path.join(TEMPDIR, "testtar")
SPACE = b" "

sha256_regtype = (
"e09e4bc8b3c9d9177e77256353b36c159f5f040531bbd4b024a8f9b9196c71ce"
Expand Down Expand Up @@ -4602,6 +4603,161 @@ def extractall(self, ar):
ar.extractall(self.testdir, filter='fully_trusted')


class OffsetValidationTests(unittest.TestCase):
tarname = tmpname
invalid_posix_header = (
# name: 100 bytes
tarfile.NUL * tarfile.LENGTH_NAME
# mode, space, null terminator: 8 bytes
+ b"000755" + SPACE + tarfile.NUL
# uid, space, null terminator: 8 bytes
+ b"000001" + SPACE + tarfile.NUL
# gid, space, null terminator: 8 bytes
+ b"000001" + SPACE + tarfile.NUL
# size, space: 12 bytes
+ b"\xff" * 11 + SPACE
# mtime, space: 12 bytes
+ tarfile.NUL * 11 + SPACE
# chksum: 8 bytes
+ b"0011407" + tarfile.NUL
# type: 1 byte
+ tarfile.REGTYPE
# linkname: 100 bytes
+ tarfile.NUL * tarfile.LENGTH_LINK
# magic: 6 bytes, version: 2 bytes
+ tarfile.POSIX_MAGIC
# uname: 32 bytes
+ tarfile.NUL * 32
# gname: 32 bytes
+ tarfile.NUL * 32
# devmajor, space, null terminator: 8 bytes
+ tarfile.NUL * 6 + SPACE + tarfile.NUL
# devminor, space, null terminator: 8 bytes
+ tarfile.NUL * 6 + SPACE + tarfile.NUL
# prefix: 155 bytes
+ tarfile.NUL * tarfile.LENGTH_PREFIX
# padding: 12 bytes
+ tarfile.NUL * 12
)
invalid_gnu_header = (
# name: 100 bytes
tarfile.NUL * tarfile.LENGTH_NAME
# mode, null terminator: 8 bytes
+ b"0000755" + tarfile.NUL
# uid, null terminator: 8 bytes
+ b"0000001" + tarfile.NUL
# gid, space, null terminator: 8 bytes
+ b"0000001" + tarfile.NUL
# size, space: 12 bytes
+ b"\xff" * 11 + SPACE
# mtime, space: 12 bytes
+ tarfile.NUL * 11 + SPACE
# chksum: 8 bytes
+ b"0011327" + tarfile.NUL
# type: 1 byte
+ tarfile.REGTYPE
# linkname: 100 bytes
+ tarfile.NUL * tarfile.LENGTH_LINK
# magic: 8 bytes
+ tarfile.GNU_MAGIC
# uname: 32 bytes
+ tarfile.NUL * 32
# gname: 32 bytes
+ tarfile.NUL * 32
# devmajor, null terminator: 8 bytes
+ tarfile.NUL * 8
# devminor, null terminator: 8 bytes
+ tarfile.NUL * 8
# padding: 167 bytes
+ tarfile.NUL * 167
)
invalid_v7_header = (
# name: 100 bytes
tarfile.NUL * tarfile.LENGTH_NAME
# mode, space, null terminator: 8 bytes
+ b"000755" + SPACE + tarfile.NUL
# uid, space, null terminator: 8 bytes
+ b"000001" + SPACE + tarfile.NUL
# gid, space, null terminator: 8 bytes
+ b"000001" + SPACE + tarfile.NUL
# size, space: 12 bytes
+ b"\xff" * 11 + SPACE
# mtime, space: 12 bytes
+ tarfile.NUL * 11 + SPACE
# chksum: 8 bytes
+ b"0010070" + tarfile.NUL
# type: 1 byte
+ tarfile.REGTYPE
# linkname: 100 bytes
+ tarfile.NUL * tarfile.LENGTH_LINK
# padding: 255 bytes
+ tarfile.NUL * 255
)
valid_gnu_header = tarfile.TarInfo("filename").tobuf(tarfile.GNU_FORMAT)
data_block = b"\xff" * tarfile.BLOCKSIZE

def _write_buffer(self, buffer):
with open(self.tarname, "wb") as f:
f.write(buffer)

def _get_members(self, ignore_zeros=None):
with open(self.tarname, "rb") as f:
with tarfile.open(
mode="r", fileobj=f, ignore_zeros=ignore_zeros
) as tar:
return tar.getmembers()

def _assert_raises_read_error_exception(self):
with self.assertRaisesRegex(
tarfile.ReadError, "file could not be opened successfully"
):
self._get_members()

def test_invalid_offset_header_validations(self):
for tar_format, invalid_header in (
("posix", self.invalid_posix_header),
("gnu", self.invalid_gnu_header),
("v7", self.invalid_v7_header),
):
with self.subTest(format=tar_format):
self._write_buffer(invalid_header)
self._assert_raises_read_error_exception()

def test_early_stop_at_invalid_offset_header(self):
buffer = self.valid_gnu_header + self.invalid_gnu_header + self.valid_gnu_header
self._write_buffer(buffer)
members = self._get_members()
self.assertEqual(len(members), 1)
self.assertEqual(members[0].name, "filename")
self.assertEqual(members[0].offset, 0)

def test_ignore_invalid_archive(self):
# 3 invalid headers with their respective data
buffer = (self.invalid_gnu_header + self.data_block) * 3
self._write_buffer(buffer)
members = self._get_members(ignore_zeros=True)
self.assertEqual(len(members), 0)

def test_ignore_invalid_offset_headers(self):
for first_block, second_block, expected_offset in (
(
(self.valid_gnu_header),
(self.invalid_gnu_header + self.data_block),
0,
),
(
(self.invalid_gnu_header + self.data_block),
(self.valid_gnu_header),
1024,
),
):
self._write_buffer(first_block + second_block)
members = self._get_members(ignore_zeros=True)
self.assertEqual(len(members), 1)
self.assertEqual(members[0].name, "filename")
self.assertEqual(members[0].offset, expected_offset)


def setUpModule():
os_helper.unlink(TEMPDIR)
os.makedirs(TEMPDIR)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Disable computed stack limit checks on non-glibc linux platforms to fix
crashes on deep recursion.
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
:mod:`tarfile` now validates archives to ensure member offsets are
non-negative. (Contributed by Alexander Enrique Urieles Nieto in
:gh:`130577`.)
9 changes: 8 additions & 1 deletion Modules/socketmodule.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,10 @@
*/
#ifdef AF_BTH
# include <ws2bth.h>
# ifdef __clang__
# pragma clang diagnostic push
# pragma clang diagnostic ignored "-Wpragma-pack"
# endif
# include <pshpack1.h>

/*
Expand Down Expand Up @@ -51,7 +55,10 @@ struct SOCKADDR_BTH_REDEF {

};
# include <poppack.h>
#endif
# ifdef __clang__
# pragma clang diagnostic pop
# endif
#endif /* AF_BTH */

/* Windows 'supports' CMSG_LEN, but does not follow the POSIX standard
* interface at all, so there is no point including the code that
Expand Down
93 changes: 74 additions & 19 deletions Objects/dictobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -1581,32 +1581,47 @@ _Py_dict_lookup_threadsafe(PyDictObject *mp, PyObject *key, Py_hash_t hash, PyOb
return ix;
}

static Py_ssize_t
lookup_threadsafe_unicode(PyDictKeysObject *dk, PyObject *key, Py_hash_t hash, _PyStackRef *value_addr)
{
assert(dk->dk_kind == DICT_KEYS_UNICODE);
assert(PyUnicode_CheckExact(key));

Py_ssize_t ix = unicodekeys_lookup_unicode_threadsafe(dk, key, hash);
if (ix == DKIX_EMPTY) {
*value_addr = PyStackRef_NULL;
return ix;
}
else if (ix >= 0) {
PyObject **addr_of_value = &DK_UNICODE_ENTRIES(dk)[ix].me_value;
PyObject *value = _Py_atomic_load_ptr(addr_of_value);
if (value == NULL) {
*value_addr = PyStackRef_NULL;
return DKIX_EMPTY;
}
if (_PyObject_HasDeferredRefcount(value)) {
*value_addr = (_PyStackRef){ .bits = (uintptr_t)value | Py_TAG_DEFERRED };
return ix;
}
if (_Py_TryIncrefCompare(addr_of_value, value)) {
*value_addr = PyStackRef_FromPyObjectSteal(value);
return ix;
}
return DKIX_KEY_CHANGED;
}
assert(ix == DKIX_KEY_CHANGED);
return ix;
}

Py_ssize_t
_Py_dict_lookup_threadsafe_stackref(PyDictObject *mp, PyObject *key, Py_hash_t hash, _PyStackRef *value_addr)
{
PyDictKeysObject *dk = _Py_atomic_load_ptr(&mp->ma_keys);
if (dk->dk_kind == DICT_KEYS_UNICODE && PyUnicode_CheckExact(key)) {
Py_ssize_t ix = unicodekeys_lookup_unicode_threadsafe(dk, key, hash);
if (ix == DKIX_EMPTY) {
*value_addr = PyStackRef_NULL;
Py_ssize_t ix = lookup_threadsafe_unicode(dk, key, hash, value_addr);
if (ix != DKIX_KEY_CHANGED) {
return ix;
}
else if (ix >= 0) {
PyObject **addr_of_value = &DK_UNICODE_ENTRIES(dk)[ix].me_value;
PyObject *value = _Py_atomic_load_ptr(addr_of_value);
if (value == NULL) {
*value_addr = PyStackRef_NULL;
return DKIX_EMPTY;
}
if (_PyObject_HasDeferredRefcount(value)) {
*value_addr = (_PyStackRef){ .bits = (uintptr_t)value | Py_TAG_DEFERRED };
return ix;
}
if (_Py_TryIncrefCompare(addr_of_value, value)) {
*value_addr = PyStackRef_FromPyObjectSteal(value);
return ix;
}
}
}

PyObject *obj;
Expand Down Expand Up @@ -1646,6 +1661,46 @@ _Py_dict_lookup_threadsafe_stackref(PyDictObject *mp, PyObject *key, Py_hash_t h

#endif

// Looks up the unicode key `key` in the dictionary. Note that `*method` may
// already contain a valid value! See _PyObject_GetMethodStackRef().
int
_PyDict_GetMethodStackRef(PyDictObject *mp, PyObject *key, _PyStackRef *method)
{
assert(PyUnicode_CheckExact(key));
Py_hash_t hash = hash_unicode_key(key);

#ifdef Py_GIL_DISABLED
PyDictKeysObject *dk = _Py_atomic_load_ptr_acquire(&mp->ma_keys);
if (dk->dk_kind == DICT_KEYS_UNICODE) {
_PyStackRef ref;
Py_ssize_t ix = lookup_threadsafe_unicode(dk, key, hash, &ref);
if (ix >= 0) {
assert(!PyStackRef_IsNull(ref));
PyStackRef_XSETREF(*method, ref);
return 1;
}
else if (ix == DKIX_EMPTY) {
return 0;
}
assert(ix == DKIX_KEY_CHANGED);
}
#endif

PyObject *obj;
Py_INCREF(mp);
Py_ssize_t ix = _Py_dict_lookup_threadsafe(mp, key, hash, &obj);
Py_DECREF(mp);
if (ix == DKIX_ERROR) {
PyStackRef_CLEAR(*method);
return -1;
}
else if (ix >= 0 && obj != NULL) {
PyStackRef_XSETREF(*method, PyStackRef_FromPyObjectSteal(obj));
return 1;
}
return 0; // not found
}

int
_PyDict_HasOnlyStringKeys(PyObject *dict)
{
Expand Down
19 changes: 7 additions & 12 deletions Objects/object.c
Original file line number Diff line number Diff line change
Expand Up @@ -1753,20 +1753,15 @@ _PyObject_GetMethodStackRef(PyThreadState *ts, PyObject *obj,
}
}
if (dict != NULL) {
// TODO: use _Py_dict_lookup_threadsafe_stackref
Py_INCREF(dict);
PyObject *value;
if (PyDict_GetItemRef(dict, name, &value) != 0) {
// found or error
Py_DECREF(dict);
PyStackRef_CLEAR(*method);
if (value != NULL) {
*method = PyStackRef_FromPyObjectSteal(value);
}
assert(PyUnicode_CheckExact(name));
int found = _PyDict_GetMethodStackRef((PyDictObject *)dict, name, method);
if (found < 0) {
assert(PyStackRef_IsNull(*method));
return -1;
}
else if (found) {
return 0;
}
// not found
Py_DECREF(dict);
}

if (meth_found) {
Expand Down
6 changes: 5 additions & 1 deletion Python/ceval.c
Original file line number Diff line number Diff line change
Expand Up @@ -452,7 +452,11 @@ _Py_InitializeRecursionLimits(PyThreadState *tstate)
_tstate->c_stack_soft_limit = _tstate->c_stack_hard_limit + _PyOS_STACK_MARGIN_BYTES;
#else
uintptr_t here_addr = _Py_get_machine_stack_pointer();
# if defined(HAVE_PTHREAD_GETATTR_NP) && !defined(_AIX) && !defined(__NetBSD__)
/// XXX musl supports HAVE_PTHRED_GETATTR_NP, but the resulting stack size
/// (on alpine at least) is much smaller than expected and imposes undue limits
/// compared to the old stack size estimation. (We assume musl is not glibc.)
# if defined(HAVE_PTHREAD_GETATTR_NP) && !defined(_AIX) && \
!defined(__NetBSD__) && (defined(__GLIBC__) || !defined(__linux__))
size_t stack_size, guard_size;
void *stack_addr;
pthread_attr_t attr;
Expand Down
Loading