Skip to content

Commit 507db05

Browse files
unicode parsing fix for encoding
1 parent b03f43c commit 507db05

File tree

3 files changed

+23
-8
lines changed

3 files changed

+23
-8
lines changed

Include/object.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,7 @@ whose size is determined when the object is allocated.
105105
*/
106106
typedef struct _object {
107107
PyObject_HEAD
108+
Py_ssize_t ob_bstate;
108109
} PyObject;
109110

110111
typedef struct {

Lib/test/test_unicode.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -743,13 +743,13 @@ def __str__(self):
743743
self.assertRaises(
744744
TypeError,
745745
unicode,
746-
u'decoding unicode is not supported',
746+
u"'decode()' is not supported on Unicode in 3.x: convert the string to bytes.",
747747
'utf-8',
748748
'strict'
749749
)
750750

751751
self.assertEqual(
752-
unicode('strings are decoded to unicode', 'utf-8', 'strict'),
752+
unicode("'decode()' is not supported on Unicode in 3.x: convert the string to bytes.", 'utf-8', 'strict'),
753753
u'strings are decoded to unicode'
754754
)
755755

Objects/unicodeobject.c

Lines changed: 20 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1209,12 +1209,20 @@ PyObject *PyUnicode_FromEncodedObject(register PyObject *obj,
12091209
return PyObject_Unicode(obj);
12101210
}
12111211
#else
1212+
#endif
1213+
12121214
if (PyUnicode_Check(obj)) {
1213-
PyErr_SetString(PyExc_TypeError,
1214-
"decoding Unicode is not supported");
1215-
return NULL;
1215+
if (encoding) {
1216+
obj->ob_bstate = BSTATE_BYTE;
1217+
if ((obj->ob_bstate == BSTATE_BYTE) &&
1218+
PyErr_WarnPy3k(
1219+
"'decode()' is not supported on Unicode in 3.x: convert the string to bytes.", 1) < 0) {
1220+
return NULL;
1221+
}
1222+
return NULL;
1223+
}
1224+
return PyObject_Unicode(obj);
12161225
}
1217-
#endif
12181226

12191227
/* Coerce object */
12201228
if (PyString_Check(obj)) {
@@ -1304,8 +1312,14 @@ PyObject *PyUnicode_AsDecodedObject(PyObject *unicode,
13041312
goto onError;
13051313
}
13061314

1307-
if (PyErr_WarnPy3k("decoding Unicode is not supported in 3.x", 1) < 0)
1308-
goto onError;
1315+
if (PyString_CheckExact(unicode)) {
1316+
unicode->ob_bstate = BSTATE_BYTE;
1317+
}
1318+
1319+
if ((unicode->ob_bstate == BSTATE_BYTE) &&
1320+
PyErr_WarnPy3k("'decode()' is not supported on Unicode in 3.x: convert the string to bytes.", 1) < 0) {
1321+
return NULL;
1322+
}
13091323

13101324
if (encoding == NULL)
13111325
encoding = PyUnicode_GetDefaultEncoding();

0 commit comments

Comments
 (0)