Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
201 changes: 147 additions & 54 deletions src/workerd/api/blob.c++
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

#include <workerd/api/streams/readable-source.h>
#include <workerd/api/streams/readable.h>
#include <workerd/io/features.h>
#include <workerd/io/observer.h>
#include <workerd/util/mimetype.h>
#include <workerd/util/stream-utils.h>
Expand All @@ -14,19 +15,27 @@ namespace workerd::api {

namespace {
// Concatenate an array of segments (parameter to Blob constructor).
jsg::BufferSource concat(jsg::Lock& js, jsg::Optional<Blob::Bits> maybeBits) {
// TODO(perf): Make it so that a Blob can keep references to the input data rather than copy it.
// Note that we can't keep references to ArrayBuffers since they are mutable, but we can
// reference other Blobs in the input.

kj::Maybe<jsg::JsBufferSource> concat(jsg::Lock& js, jsg::Optional<Blob::Bits> maybeBits) {
auto bits = kj::mv(maybeBits).orDefault(nullptr);
if (bits.size() == 0) {
return kj::none;
}

auto rejectResizable = FeatureFlags::get(js).getNoResizableArrayBufferInBlob();
auto maxBlobSize = Worker::Isolate::from(js).getLimitEnforcer().getBlobSizeLimit();
static constexpr int kMaxInt KJ_UNUSED = kj::maxValue;
KJ_DASSERT(maxBlobSize <= kMaxInt, "Blob size limit exceeds int range");
size_t size = 0;
kj::SmallArray<size_t, 8> cachedPartSizes(bits.size());
int index = 0;
for (auto& part: bits) {
size_t partSize = 0;
KJ_SWITCH_ONEOF(part) {
KJ_CASE_ONEOF(bytes, kj::Array<const byte>) {
KJ_CASE_ONEOF(bytes, jsg::JsBufferSource) {
if (rejectResizable) {
JSG_REQUIRE(
!bytes.isResizable(), TypeError, "Cannot create a Blob with a resizable ArrayBuffer");
}
partSize = bytes.size();
}
KJ_CASE_ONEOF(text, kj::String) {
Expand All @@ -36,6 +45,7 @@ jsg::BufferSource concat(jsg::Lock& js, jsg::Optional<Blob::Bits> maybeBits) {
partSize = blob->getData().size();
}
}
cachedPartSizes[index++] = partSize;

// We can skip the remaining checks if the part is empty.
if (partSize == 0) continue;
Expand All @@ -57,32 +67,42 @@ jsg::BufferSource concat(jsg::Lock& js, jsg::Optional<Blob::Bits> maybeBits) {
size += partSize;
}

auto backing = jsg::BackingStore::alloc<v8::ArrayBuffer>(js, size);
auto result = jsg::BufferSource(js, kj::mv(backing));
if (size == 0) {
return kj::none;
}

if (size == 0) return kj::mv(result);
auto u8 = jsg::JsUint8Array::create(js, size);

auto view = result.asArrayPtr();
auto view = u8.asArrayPtr();

index = 0;
for (auto& part: bits) {
KJ_SWITCH_ONEOF(part) {
KJ_CASE_ONEOF(bytes, kj::Array<const byte>) {
if (bytes.size() == 0) continue;
KJ_ASSERT(view.size() >= bytes.size());
view.first(bytes.size()).copyFrom(bytes);
view = view.slice(bytes.size());
KJ_CASE_ONEOF(bytes, jsg::JsBufferSource) {
size_t cachedSize = cachedPartSizes[index++];
// If the ArrayBuffer was resized larger, we'll ignore the additional bytes.
// If the ArrayBuffer was resized smaller, we'll copy up the current size
// and the remaining bytes for this chunk will be left as zeros.
size_t toCopy = kj::min(bytes.size(), cachedSize);
if (toCopy > 0) {
KJ_ASSERT(view.size() >= toCopy);
view.first(toCopy).copyFrom(bytes.asArrayPtr().first(toCopy));
}
view = view.slice(cachedSize);
}
KJ_CASE_ONEOF(text, kj::String) {
auto byteLength = text.asBytes().size();
if (byteLength == 0) continue;
KJ_ASSERT(view.size() >= byteLength);
KJ_ASSERT(byteLength == cachedPartSizes[index++]);
view.first(byteLength).copyFrom(text.asBytes());
view = view.slice(byteLength);
}
KJ_CASE_ONEOF(blob, jsg::Ref<Blob>) {
auto data = blob->getData();
if (data.size() == 0) continue;
KJ_ASSERT(view.size() >= data.size());
KJ_ASSERT(data.size() == cachedPartSizes[index++]);
view.first(data.size()).copyFrom(data);
view = view.slice(data.size());
}
Expand All @@ -91,7 +111,7 @@ jsg::BufferSource concat(jsg::Lock& js, jsg::Optional<Blob::Bits> maybeBits) {

KJ_ASSERT(view == nullptr);

return kj::mv(result);
return jsg::JsBufferSource(u8);
}

kj::String normalizeType(kj::String type) {
Expand All @@ -117,15 +137,22 @@ kj::String normalizeType(kj::String type) {

} // namespace

Blob::Blob(kj::String type): ownData(Empty{}), data(nullptr), type(kj::mv(type)) {}

Blob::Blob(kj::Array<byte> data, kj::String type)
: ownData(kj::mv(data)),
data(ownData.get<kj::Array<kj::byte>>()),
type(kj::mv(type)) {}

Blob::Blob(jsg::Lock& js, jsg::BufferSource data, kj::String type)
: ownData(kj::mv(data)),
data(ownData.get<jsg::BufferSource>().asArrayPtr()),
type(kj::mv(type)) {}
Blob::Blob(jsg::Lock& js, jsg::JsBufferSource data, kj::String type)
: ownData(data.addRef(js)),
data(data.asArrayPtr()),
type(kj::mv(type)) {
if (FeatureFlags::get(js).getNoResizableArrayBufferInBlob()) {
JSG_REQUIRE(
!data.isResizable(), TypeError, "Cannot create a Blob with a resizable ArrayBuffer");
}
}

Blob::Blob(jsg::Ref<Blob> parent, kj::ArrayPtr<const byte> data, kj::String type)
: ownData(kj::mv(parent)),
Expand All @@ -141,69 +168,110 @@ jsg::Ref<Blob> Blob::constructor(
}
}

return js.alloc<Blob>(js, concat(js, kj::mv(bits)), kj::mv(type));
KJ_IF_SOME(b, bits) {
// Optimize for the case where the input is a single Blob, where we can just
// return a new view on the existing data without copying.
if (b.size() == 1) {
KJ_IF_SOME(parent, b[0].template tryGet<jsg::Ref<Blob>>()) {
if (parent->getSize() == 0) {
return js.alloc<Blob>(kj::mv(type));
}
auto ptr = parent->data;
KJ_IF_SOME(root, parent->ownData.template tryGet<jsg::Ref<Blob>>()) {
parent = root.addRef();
}
return js.alloc<Blob>(kj::mv(parent), ptr, kj::mv(type));
}
}
}

KJ_IF_SOME(data, concat(js, kj::mv(bits))) {
return js.alloc<Blob>(js, data, kj::mv(type));
}
return js.alloc<Blob>(kj::mv(type));
}

kj::ArrayPtr<const byte> Blob::getData() const {
FeatureObserver::maybeRecordUse(FeatureObserver::Feature::BLOB_GET_DATA);
return data;
}

jsg::Ref<Blob> Blob::slice(jsg::Lock& js,
jsg::Optional<int> maybeStart,
jsg::Optional<int> maybeEnd,
jsg::Optional<kj::String> type) {

auto normalizedType = normalizeType(kj::mv(type).orDefault(nullptr));
if (data.size() == 0) {
// Blob is empty, there's nothing to slice.
return js.alloc<Blob>(kj::mv(normalizedType));
}

int start = maybeStart.orDefault(0);
int end = maybeEnd.orDefault(data.size());

if (start < 0) {
// Negative value interpreted as offset from end.
start += data.size();
}
// Clamp start to range.
if (start < 0) {
start = 0;
} else if (start > data.size()) {
start = data.size();
}

if (end < 0) {
// Negative value interpreted as offset from end.
end += data.size();
}
// Clamp end to range.
if (end < start) {
end = start;
} else if (end > data.size()) {
end = data.size();

// Clamp start and end to range.
start = kj::max(0, kj::min(start, static_cast<int>(data.size())));
end = kj::max(start, kj::min(end, static_cast<int>(data.size())));

// We run with KJ_IREQUIRE checks enabled in production, which will catch
// out of bounds start/end ... but since we're clamping them above, this
// should never actually be a problem.
auto slicedData = data.slice(start, end);

// If the slice is empty, we can just return a new empty Blob without worrying about
// referencing the original data at all. Super minor optimization that avoids an
// unnecessary refcount.
if (slicedData.size() == 0) {
return js.alloc<Blob>(kj::mv(normalizedType));
}

return js.alloc<Blob>(
JSG_THIS, data.slice(start, end), normalizeType(kj::mv(type).orDefault(nullptr)));
KJ_SWITCH_ONEOF(ownData) {
KJ_CASE_ONEOF(_, Empty) {
// Handled at the beginning of the function with the zero-length check.
KJ_FAIL_ASSERT("Empty blob should have been handled at the beginning of the function");
}
KJ_CASE_ONEOF(parent, jsg::Ref<Blob>) {
// If this blob is itself a slice (backed by a Ref<Blob>), reference the
// root data-owning blob directly. This prevents unbounded chain depth —
// every slice always points to the root, so depth is always ≤ 1.
return js.alloc<Blob>(parent.addRef(), slicedData, kj::mv(normalizedType));
}
KJ_CASE_ONEOF(_, jsg::JsRef<jsg::JsBufferSource>) {
return js.alloc<Blob>(JSG_THIS, slicedData, kj::mv(normalizedType));
}
KJ_CASE_ONEOF(_, kj::Array<kj::byte>) {
return js.alloc<Blob>(JSG_THIS, slicedData, kj::mv(normalizedType));
}
}
KJ_UNREACHABLE;
}

jsg::Promise<jsg::BufferSource> Blob::arrayBuffer(jsg::Lock& js) {
jsg::Promise<jsg::JsRef<jsg::JsArrayBuffer>> Blob::arrayBuffer(jsg::Lock& js) {
FeatureObserver::maybeRecordUse(FeatureObserver::Feature::BLOB_AS_ARRAY_BUFFER);
// We use BufferSource here instead of kj::Array<kj::byte> to ensure that the
// resulting backing store is associated with the isolate, which is necessary
// for when we start making use of v8 sandboxing.
auto backing = jsg::BackingStore::alloc<v8::ArrayBuffer>(js, data.size());
backing.asArrayPtr().copyFrom(data);
return js.resolvedPromise(jsg::BufferSource(js, kj::mv(backing)));
auto ret = jsg::JsArrayBuffer::create(js, data);
return js.resolvedPromise(ret.addRef(js));
}

jsg::Promise<jsg::BufferSource> Blob::bytes(jsg::Lock& js) {
// We use BufferSource here instead of kj::Array<kj::byte> to ensure that the
// resulting backing store is associated with the isolate, which is necessary
// for when we start making use of v8 sandboxing.
auto backing = jsg::BackingStore::alloc<v8::Uint8Array>(js, data.size());
backing.asArrayPtr().copyFrom(data);
return js.resolvedPromise(jsg::BufferSource(js, kj::mv(backing)));
jsg::Promise<jsg::JsRef<jsg::JsUint8Array>> Blob::bytes(jsg::Lock& js) {
FeatureObserver::maybeRecordUse(FeatureObserver::Feature::BLOB_AS_ARRAY_BUFFER);
auto ret = jsg::JsUint8Array::create(js, data);
return js.resolvedPromise(ret.addRef(js));
}

jsg::Promise<kj::String> Blob::text(jsg::Lock& js) {
jsg::Promise<jsg::JsRef<jsg::JsString>> Blob::text(jsg::Lock& js) {
FeatureObserver::maybeRecordUse(FeatureObserver::Feature::BLOB_AS_TEXT);
return js.resolvedPromise(kj::str(data.asChars()));
// Using js.str here instead of returning kj::String avoids an additional
// intermediate allocation and copy of the string data.
return js.resolvedPromise(js.str(data.asChars()).addRef(js));
}

jsg::Ref<ReadableStream> Blob::stream(jsg::Lock& js) {
Expand All @@ -214,13 +282,18 @@ jsg::Ref<ReadableStream> Blob::stream(jsg::Lock& js) {

// =======================================================================================

File::File(kj::String name, kj::String type, double lastModified)
: Blob(kj::mv(type)),
name(kj::mv(name)),
lastModified(lastModified) {}

File::File(kj::Array<byte> data, kj::String name, kj::String type, double lastModified)
: Blob(kj::mv(data), kj::mv(type)),
name(kj::mv(name)),
lastModified(lastModified) {}

File::File(
jsg::Lock& js, jsg::BufferSource data, kj::String name, kj::String type, double lastModified)
jsg::Lock& js, jsg::JsBufferSource data, kj::String name, kj::String type, double lastModified)
: Blob(js, kj::mv(data), kj::mv(type)),
name(kj::mv(name)),
lastModified(lastModified) {}
Expand Down Expand Up @@ -252,7 +325,27 @@ jsg::Ref<File> File::constructor(
lastModified = dateNow();
}

return js.alloc<File>(js, concat(js, kj::mv(bits)), kj::mv(name), kj::mv(type), lastModified);
KJ_IF_SOME(b, bits) {
// Optimize for the case where the input is a single Blob, where we can just
// return a new view on the existing data without copying.
if (b.size() == 1) {
KJ_IF_SOME(parent, b[0].template tryGet<jsg::Ref<Blob>>()) {
if (parent->getSize() == 0) {
return js.alloc<File>(kj::mv(name), kj::mv(type), lastModified);
}
auto ptr = parent->data;
KJ_IF_SOME(root, parent->ownData.template tryGet<jsg::Ref<Blob>>()) {
parent = root.addRef();
}
return js.alloc<File>(kj::mv(parent), ptr, kj::mv(name), kj::mv(type), lastModified);
}
}
}

KJ_IF_SOME(data, concat(js, kj::mv(bits))) {
return js.alloc<File>(js, data, kj::mv(name), kj::mv(type), lastModified);
}
return js.alloc<File>(kj::mv(name), kj::mv(type), lastModified);
}

} // namespace workerd::api
Loading
Loading