Skip to content
This repository was archived by the owner on Feb 13, 2025. It is now read-only.

Commit 1c3fdd9

Browse files
Issue python#28563: Fixed possible DoS and arbitrary code execution when handle
plural form selections in the gettext module. The expression parser now supports exact syntax supported by GNU gettext.
2 parents 20a587b + 07bcf05 commit 1c3fdd9

File tree

3 files changed

+216
-45
lines changed

3 files changed

+216
-45
lines changed

Lib/gettext.py

Lines changed: 128 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -59,55 +59,139 @@
5959

6060
_default_localedir = os.path.join(sys.base_prefix, 'share', 'locale')
6161

62+
# Expression parsing for plural form selection.
63+
#
64+
# The gettext library supports a small subset of C syntax. The only
65+
# incompatible difference is that integer literals starting with zero are
66+
# decimal.
67+
#
68+
# https://www.gnu.org/software/gettext/manual/gettext.html#Plural-forms
69+
# http://git.savannah.gnu.org/cgit/gettext.git/tree/gettext-runtime/intl/plural.y
70+
71+
_token_pattern = re.compile(r"""
72+
(?P<WHITESPACES>[ \t]+) | # spaces and horizontal tabs
73+
(?P<NUMBER>[0-9]+\b) | # decimal integer
74+
(?P<NAME>n\b) | # only n is allowed
75+
(?P<PARENTHESIS>[()]) |
76+
(?P<OPERATOR>[-*/%+?:]|[><!]=?|==|&&|\|\|) | # !, *, /, %, +, -, <, >,
77+
# <=, >=, ==, !=, &&, ||,
78+
# ? :
79+
# unary and bitwise ops
80+
# not allowed
81+
(?P<INVALID>\w+|.) # invalid token
82+
""", re.VERBOSE|re.DOTALL)
83+
84+
def _tokenize(plural):
85+
for mo in re.finditer(_token_pattern, plural):
86+
kind = mo.lastgroup
87+
if kind == 'WHITESPACES':
88+
continue
89+
value = mo.group(kind)
90+
if kind == 'INVALID':
91+
raise ValueError('invalid token in plural form: %s' % value)
92+
yield value
93+
yield ''
94+
95+
def _error(value):
96+
if value:
97+
return ValueError('unexpected token in plural form: %s' % value)
98+
else:
99+
return ValueError('unexpected end of plural form')
100+
101+
_binary_ops = (
102+
('||',),
103+
('&&',),
104+
('==', '!='),
105+
('<', '>', '<=', '>='),
106+
('+', '-'),
107+
('*', '/', '%'),
108+
)
109+
_binary_ops = {op: i for i, ops in enumerate(_binary_ops, 1) for op in ops}
110+
_c2py_ops = {'||': 'or', '&&': 'and', '/': '//'}
111+
112+
def _parse(tokens, priority=-1):
113+
result = ''
114+
nexttok = next(tokens)
115+
while nexttok == '!':
116+
result += 'not '
117+
nexttok = next(tokens)
118+
119+
if nexttok == '(':
120+
sub, nexttok = _parse(tokens)
121+
result = '%s(%s)' % (result, sub)
122+
if nexttok != ')':
123+
raise ValueError('unbalanced parenthesis in plural form')
124+
elif nexttok == 'n':
125+
result = '%s%s' % (result, nexttok)
126+
else:
127+
try:
128+
value = int(nexttok, 10)
129+
except ValueError:
130+
raise _error(nexttok) from None
131+
result = '%s%d' % (result, value)
132+
nexttok = next(tokens)
133+
134+
j = 100
135+
while nexttok in _binary_ops:
136+
i = _binary_ops[nexttok]
137+
if i < priority:
138+
break
139+
# Break chained comparisons
140+
if i in (3, 4) and j in (3, 4): # '==', '!=', '<', '>', '<=', '>='
141+
result = '(%s)' % result
142+
# Replace some C operators by their Python equivalents
143+
op = _c2py_ops.get(nexttok, nexttok)
144+
right, nexttok = _parse(tokens, i + 1)
145+
result = '%s %s %s' % (result, op, right)
146+
j = i
147+
if j == priority == 4: # '<', '>', '<=', '>='
148+
result = '(%s)' % result
149+
150+
if nexttok == '?' and priority <= 0:
151+
if_true, nexttok = _parse(tokens, 0)
152+
if nexttok != ':':
153+
raise _error(nexttok)
154+
if_false, nexttok = _parse(tokens)
155+
result = '%s if %s else %s' % (if_true, result, if_false)
156+
if priority == 0:
157+
result = '(%s)' % result
158+
159+
return result, nexttok
62160

63161
def c2py(plural):
64162
"""Gets a C expression as used in PO files for plural forms and returns a
65-
Python lambda function that implements an equivalent expression.
163+
Python function that implements an equivalent expression.
66164
"""
67-
# Security check, allow only the "n" identifier
68-
import token, tokenize
69-
tokens = tokenize.generate_tokens(io.StringIO(plural).readline)
70-
try:
71-
danger = [x for x in tokens if x[0] == token.NAME and x[1] != 'n']
72-
except tokenize.TokenError:
73-
raise ValueError('plural forms expression error, maybe unbalanced parenthesis')
74-
else:
75-
if danger:
76-
raise ValueError('plural forms expression could be dangerous')
77-
78-
# Replace some C operators by their Python equivalents
79-
plural = plural.replace('&&', ' and ')
80-
plural = plural.replace('||', ' or ')
81-
82-
expr = re.compile(r'\!([^=])')
83-
plural = expr.sub(' not \\1', plural)
84-
85-
# Regular expression and replacement function used to transform
86-
# "a?b:c" to "b if a else c".
87-
expr = re.compile(r'(.*?)\?(.*?):(.*)')
88-
def repl(x):
89-
return "(%s if %s else %s)" % (x.group(2), x.group(1),
90-
expr.sub(repl, x.group(3)))
91-
92-
# Code to transform the plural expression, taking care of parentheses
93-
stack = ['']
94-
for c in plural:
95-
if c == '(':
96-
stack.append('')
97-
elif c == ')':
98-
if len(stack) == 1:
99-
# Actually, we never reach this code, because unbalanced
100-
# parentheses get caught in the security check at the
101-
# beginning.
102-
raise ValueError('unbalanced parenthesis in plural form')
103-
s = expr.sub(repl, stack.pop())
104-
stack[-1] += '(%s)' % s
105-
else:
106-
stack[-1] += c
107-
plural = expr.sub(repl, stack.pop())
108-
109-
return eval('lambda n: int(%s)' % plural)
110165

166+
if len(plural) > 1000:
167+
raise ValueError('plural form expression is too long')
168+
try:
169+
result, nexttok = _parse(_tokenize(plural))
170+
if nexttok:
171+
raise _error(nexttok)
172+
173+
depth = 0
174+
for c in result:
175+
if c == '(':
176+
depth += 1
177+
if depth > 20:
178+
# Python compiler limit is about 90.
179+
# The most complex example has 2.
180+
raise ValueError('plural form expression is too complex')
181+
elif c == ')':
182+
depth -= 1
183+
184+
ns = {}
185+
exec('''if True:
186+
def func(n):
187+
if not isinstance(n, int):
188+
raise ValueError('Plural value must be an integer.')
189+
return int(%s)
190+
''' % result, ns)
191+
return ns['func']
192+
except RuntimeError:
193+
# Recursion error can be raised in _parse() or exec().
194+
raise ValueError('plural form expression is too complex')
111195

112196

113197
def _expand_lang(loc):

Lib/test/test_gettext.py

Lines changed: 84 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -236,7 +236,9 @@ def test_plural_forms2(self):
236236
x = t.ngettext('There is %s file', 'There are %s files', 2)
237237
eq(x, 'Hay %s ficheros')
238238

239-
def test_hu(self):
239+
# Examples from http://www.gnu.org/software/gettext/manual/gettext.html
240+
241+
def test_ja(self):
240242
eq = self.assertEqual
241243
f = gettext.c2py('0')
242244
s = ''.join([ str(f(x)) for x in range(200) ])
@@ -254,6 +256,12 @@ def test_fr(self):
254256
s = ''.join([ str(f(x)) for x in range(200) ])
255257
eq(s, "00111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111")
256258

259+
def test_lv(self):
260+
eq = self.assertEqual
261+
f = gettext.c2py('n%10==1 && n%100!=11 ? 0 : n != 0 ? 1 : 2')
262+
s = ''.join([ str(f(x)) for x in range(200) ])
263+
eq(s, "20111111111111111111101111111110111111111011111111101111111110111111111011111111101111111110111111111011111111111111111110111111111011111111101111111110111111111011111111101111111110111111111011111111")
264+
257265
def test_gd(self):
258266
eq = self.assertEqual
259267
f = gettext.c2py('n==1 ? 0 : n==2 ? 1 : 2')
@@ -267,6 +275,12 @@ def test_gd2(self):
267275
s = ''.join([ str(f(x)) for x in range(200) ])
268276
eq(s, "20122222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222")
269277

278+
def test_ro(self):
279+
eq = self.assertEqual
280+
f = gettext.c2py('n==1 ? 0 : (n==0 || (n%100 > 0 && n%100 < 20)) ? 1 : 2')
281+
s = ''.join([ str(f(x)) for x in range(200) ])
282+
eq(s, "10111111111111111111222222222222222222222222222222222222222222222222222222222222222222222222222222222111111111111111111122222222222222222222222222222222222222222222222222222222222222222222222222222222")
283+
270284
def test_lt(self):
271285
eq = self.assertEqual
272286
f = gettext.c2py('n%10==1 && n%100!=11 ? 0 : n%10>=2 && (n%100<10 || n%100>=20) ? 1 : 2')
@@ -279,6 +293,12 @@ def test_ru(self):
279293
s = ''.join([ str(f(x)) for x in range(200) ])
280294
eq(s, "20111222222222222222201112222220111222222011122222201112222220111222222011122222201112222220111222222011122222222222222220111222222011122222201112222220111222222011122222201112222220111222222011122222")
281295

296+
def test_cs(self):
297+
eq = self.assertEqual
298+
f = gettext.c2py('(n==1) ? 0 : (n>=2 && n<=4) ? 1 : 2')
299+
s = ''.join([ str(f(x)) for x in range(200) ])
300+
eq(s, "20111222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222")
301+
282302
def test_pl(self):
283303
eq = self.assertEqual
284304
f = gettext.c2py('n==1 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2')
@@ -291,10 +311,73 @@ def test_sl(self):
291311
s = ''.join([ str(f(x)) for x in range(200) ])
292312
eq(s, "30122333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333012233333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333")
293313

314+
def test_ar(self):
315+
eq = self.assertEqual
316+
f = gettext.c2py('n==0 ? 0 : n==1 ? 1 : n==2 ? 2 : n%100>=3 && n%100<=10 ? 3 : n%100>=11 ? 4 : 5')
317+
s = ''.join([ str(f(x)) for x in range(200) ])
318+
eq(s, "01233333333444444444444444444444444444444444444444444444444444444444444444444444444444444444444444445553333333344444444444444444444444444444444444444444444444444444444444444444444444444444444444444444")
319+
294320
def test_security(self):
295321
raises = self.assertRaises
296322
# Test for a dangerous expression
297323
raises(ValueError, gettext.c2py, "os.chmod('/etc/passwd',0777)")
324+
# issue28563
325+
raises(ValueError, gettext.c2py, '"(eval(foo) && ""')
326+
raises(ValueError, gettext.c2py, 'f"{os.system(\'sh\')}"')
327+
# Maximum recursion depth exceeded during compilation
328+
raises(ValueError, gettext.c2py, 'n+'*10000 + 'n')
329+
self.assertEqual(gettext.c2py('n+'*100 + 'n')(1), 101)
330+
# MemoryError during compilation
331+
raises(ValueError, gettext.c2py, '('*100 + 'n' + ')'*100)
332+
# Maximum recursion depth exceeded in C to Python translator
333+
raises(ValueError, gettext.c2py, '('*10000 + 'n' + ')'*10000)
334+
self.assertEqual(gettext.c2py('('*20 + 'n' + ')'*20)(1), 1)
335+
336+
def test_chained_comparison(self):
337+
# C doesn't chain comparison as Python so 2 == 2 == 2 gets different results
338+
f = gettext.c2py('n == n == n')
339+
self.assertEqual(''.join(str(f(x)) for x in range(3)), '010')
340+
f = gettext.c2py('1 < n == n')
341+
self.assertEqual(''.join(str(f(x)) for x in range(3)), '100')
342+
f = gettext.c2py('n == n < 2')
343+
self.assertEqual(''.join(str(f(x)) for x in range(3)), '010')
344+
f = gettext.c2py('0 < n < 2')
345+
self.assertEqual(''.join(str(f(x)) for x in range(3)), '111')
346+
347+
def test_decimal_number(self):
348+
self.assertEqual(gettext.c2py('0123')(1), 123)
349+
350+
def test_invalid_syntax(self):
351+
invalid_expressions = [
352+
'x>1', '(n>1', 'n>1)', '42**42**42', '0xa', '1.0', '1e2',
353+
'n>0x1', '+n', '-n', 'n()', 'n(1)', '1+', 'nn', 'n n',
354+
]
355+
for expr in invalid_expressions:
356+
with self.assertRaises(ValueError):
357+
gettext.c2py(expr)
358+
359+
def test_nested_condition_operator(self):
360+
self.assertEqual(gettext.c2py('n?1?2:3:4')(0), 4)
361+
self.assertEqual(gettext.c2py('n?1?2:3:4')(1), 2)
362+
self.assertEqual(gettext.c2py('n?1:3?4:5')(0), 4)
363+
self.assertEqual(gettext.c2py('n?1:3?4:5')(1), 1)
364+
365+
def test_division(self):
366+
f = gettext.c2py('2/n*3')
367+
self.assertEqual(f(1), 6)
368+
self.assertEqual(f(2), 3)
369+
self.assertEqual(f(3), 0)
370+
self.assertEqual(f(-1), -6)
371+
self.assertRaises(ZeroDivisionError, f, 0)
372+
373+
def test_plural_number(self):
374+
f = gettext.c2py('1')
375+
self.assertEqual(f(1), 1)
376+
self.assertRaises(ValueError, f, 1.0)
377+
self.assertRaises(ValueError, f, '1')
378+
self.assertRaises(ValueError, f, [])
379+
self.assertRaises(ValueError, f, object())
380+
298381

299382
class GNUTranslationParsingTest(GettextBaseTest):
300383
def test_plural_form_error_issue17898(self):

Misc/NEWS

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,10 @@ Core and Builtins
1616
Library
1717
-------
1818

19+
- Issue #28563: Fixed possible DoS and arbitrary code execution when handle
20+
plural form selections in the gettext module. The expression parser now
21+
supports exact syntax supported by GNU gettext.
22+
1923
- In the curses module, raise an error if window.getstr() or window.instr() is
2024
passed a negative value.
2125

0 commit comments

Comments
 (0)