Skip to content

Commit b2e20b2

Browse files
authored
Fix aws-chunked Content-Length handling for non-seekable S3 uploads (boto#3652)
1 parent c251a91 commit b2e20b2

File tree

4 files changed

+84
-5
lines changed

4 files changed

+84
-5
lines changed
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
{
2+
"type": "bugfix",
3+
"category": "s3",
4+
"description": "Fix aws-chunked requests with non-seekable streams sending both ``Content-Length`` and ``Transfer-Encoding: chunked``, which violated HTTP/1.1 (RFC 7230) and caused ``SignatureDoesNotMatch`` errors."
5+
}

botocore/httpchecksum.py

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -472,16 +472,22 @@ def _apply_request_trailer_checksum(request):
472472
headers["X-Amz-Trailer"] = location_name
473473

474474
content_length = determine_content_length(body)
475+
if content_length is None and "Content-Length" in headers:
476+
# determine_content_length() cannot resolve the length of non-seekable
477+
# bodies, but the caller may have set Content-Length explicitly. Reuse
478+
# that value for X-Amz-Decoded-Content-Length before the header is
479+
# removed for chunked transfer encoding.
480+
content_length = int(headers["Content-Length"])
475481
if content_length is not None:
476482
# Send the decoded content length if we can determine it. Some
477483
# services such as S3 may require the decoded content length
478484
headers["X-Amz-Decoded-Content-Length"] = str(content_length)
479485

480-
if "Content-Length" in headers:
481-
del headers["Content-Length"]
482-
logger.debug(
483-
"Removing the Content-Length header since 'chunked' is specified for Transfer-Encoding."
484-
)
486+
if "Content-Length" in headers:
487+
del headers["Content-Length"]
488+
logger.debug(
489+
"Removing the Content-Length header since 'chunked' is specified for Transfer-Encoding."
490+
)
485491

486492
if isinstance(body, (bytes, bytearray)):
487493
body = io.BytesIO(body)

tests/functional/test_s3.py

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
# ANY KIND, either express or implied. See the License for the specific
1212
# language governing permissions and limitations under the License.
1313
import datetime
14+
import io
1415
import re
1516

1617
import pytest
@@ -43,6 +44,22 @@
4344
DATE = datetime.datetime(2021, 8, 27, 0, 0, 0, tzinfo=datetime.timezone.utc)
4445

4546

47+
class NonSeekableStream(io.RawIOBase):
48+
def __init__(self, data):
49+
self._data = data
50+
self._pos = 0
51+
52+
def read(self, size=-1):
53+
if size == -1:
54+
size = len(self._data) - self._pos
55+
chunk = self._data[self._pos : self._pos + size]
56+
self._pos += len(chunk)
57+
return chunk
58+
59+
def readable(self):
60+
return True
61+
62+
4663
class TestS3BucketValidation(unittest.TestCase):
4764
def test_invalid_bucket_name_raises_error(self):
4865
session = botocore.session.get_session()
@@ -1550,6 +1567,27 @@ def test_trailing_checksum_set_with_content_length_removes_header(self):
15501567
self.assertIn(b"x-amz-checksum-crc32:eCQEmA==", body)
15511568
self.assertNotIn("Content-Length", sent_headers)
15521569

1570+
def test_trailing_checksum_non_seekable_body_uses_content_length(self):
1571+
with self.http_stubber:
1572+
self.client.put_object(
1573+
Bucket="foo",
1574+
Key="bar",
1575+
Body=NonSeekableStream(b"hello world"),
1576+
ContentLength=11,
1577+
)
1578+
sent_headers = self.get_sent_headers()
1579+
self.assertEqual(sent_headers["Content-Encoding"], b"aws-chunked")
1580+
self.assertEqual(sent_headers["Transfer-Encoding"], b"chunked")
1581+
self.assertEqual(
1582+
sent_headers["X-Amz-Trailer"], b"x-amz-checksum-crc32"
1583+
)
1584+
self.assertEqual(sent_headers["X-Amz-Decoded-Content-Length"], b"11")
1585+
self.assertEqual(
1586+
sent_headers["x-amz-content-sha256"],
1587+
b"STREAMING-UNSIGNED-PAYLOAD-TRAILER",
1588+
)
1589+
self.assertNotIn("Content-Length", sent_headers)
1590+
15531591
def test_trailing_checksum_set_empty_body(self):
15541592
with self.http_stubber:
15551593
self.client.put_object(Bucket="foo", Key="bar", Body="")

tests/unit/test_httpchecksum.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
1111
# ANY KIND, either express or implied. See the License for the specific
1212
# language governing permissions and limitations under the License.
13+
import io
1314
import unittest
1415
from io import BytesIO
1516

@@ -377,6 +378,35 @@ def test_apply_request_checksum_flex_trailer_readable(self):
377378
self.assertNotIn("x-amz-checksum-crc32", request["headers"])
378379
self.assertIsInstance(request["body"], AwsChunkedWrapper)
379380

381+
def test_apply_request_checksum_flex_trailer_non_seekable_with_length(
382+
self,
383+
):
384+
class NonSeekableStream(io.RawIOBase):
385+
def __init__(self, body):
386+
self._body = BytesIO(body)
387+
388+
def read(self, size=-1):
389+
return self._body.read(size)
390+
391+
def readable(self):
392+
return True
393+
394+
request = self._build_request(NonSeekableStream(b"hello world"))
395+
request["headers"]["Content-Length"] = "11"
396+
request["context"]["checksum"] = {
397+
"request_algorithm": {
398+
"in": "trailer",
399+
"algorithm": "crc32",
400+
"name": "x-amz-checksum-crc32",
401+
}
402+
}
403+
apply_request_checksum(request)
404+
self.assertNotIn("Content-Length", request["headers"])
405+
self.assertEqual(
406+
request["headers"]["X-Amz-Decoded-Content-Length"], "11"
407+
)
408+
self.assertIsInstance(request["body"], AwsChunkedWrapper)
409+
380410
def test_apply_request_checksum_flex_header_trailer_explicit_digest(self):
381411
request = self._build_request(b"")
382412
request["context"]["checksum"] = {

0 commit comments

Comments
 (0)