cloudflare · jasnell · Apr 2, 2026 · Mar 31, 2026 · Mar 31, 2026
@@ -6,6 +6,7 @@
 
 #include <workerd/api/streams/readable-source.h>
 #include <workerd/api/streams/readable.h>
+#include <workerd/io/features.h>
 #include <workerd/io/observer.h>
 #include <workerd/util/mimetype.h>
 #include <workerd/util/stream-utils.h>
@@ -14,19 +15,27 @@ namespace workerd::api {
 
 namespace {
 // Concatenate an array of segments (parameter to Blob constructor).
-jsg::BufferSource concat(jsg::Lock& js, jsg::Optional<Blob::Bits> maybeBits) {
-  // TODO(perf): Make it so that a Blob can keep references to the input data rather than copy it.
-  //   Note that we can't keep references to ArrayBuffers since they are mutable, but we can
-  //   reference other Blobs in the input.
-
+kj::Maybe<jsg::JsBufferSource> concat(jsg::Lock& js, jsg::Optional<Blob::Bits> maybeBits) {
   auto bits = kj::mv(maybeBits).orDefault(nullptr);
+  if (bits.size() == 0) {
+    return kj::none;
+  }
 
+  auto rejectResizable = FeatureFlags::get(js).getNoResizableArrayBufferInBlob();
   auto maxBlobSize = Worker::Isolate::from(js).getLimitEnforcer().getBlobSizeLimit();
+  static constexpr int kMaxInt KJ_UNUSED = kj::maxValue;
+  KJ_DASSERT(maxBlobSize <= kMaxInt, "Blob size limit exceeds int range");
   size_t size = 0;
+  kj::SmallArray<size_t, 8> cachedPartSizes(bits.size());
+  int index = 0;
   for (auto& part: bits) {
     size_t partSize = 0;
     KJ_SWITCH_ONEOF(part) {
-      KJ_CASE_ONEOF(bytes, kj::Array<const byte>) {
+      KJ_CASE_ONEOF(bytes, jsg::JsBufferSource) {
+        if (rejectResizable) {
+          JSG_REQUIRE(
+              !bytes.isResizable(), TypeError, "Cannot create a Blob with a resizable ArrayBuffer");
+        }
         partSize = bytes.size();
       }
       KJ_CASE_ONEOF(text, kj::String) {
@@ -36,6 +45,7 @@ jsg::BufferSource concat(jsg::Lock& js, jsg::Optional<Blob::Bits> maybeBits) {
         partSize = blob->getData().size();
       }
     }
+    cachedPartSizes[index++] = partSize;
 
     // We can skip the remaining checks if the part is empty.
     if (partSize == 0) continue;
@@ -57,32 +67,42 @@ jsg::BufferSource concat(jsg::Lock& js, jsg::Optional<Blob::Bits> maybeBits) {
     size += partSize;
   }
 
-  auto backing = jsg::BackingStore::alloc<v8::ArrayBuffer>(js, size);
-  auto result = jsg::BufferSource(js, kj::mv(backing));
+  if (size == 0) {
+    return kj::none;
+  }
 
-  if (size == 0) return kj::mv(result);
+  auto u8 = jsg::JsUint8Array::create(js, size);
 
-  auto view = result.asArrayPtr();
+  auto view = u8.asArrayPtr();
 
+  index = 0;
   for (auto& part: bits) {
     KJ_SWITCH_ONEOF(part) {
-      KJ_CASE_ONEOF(bytes, kj::Array<const byte>) {
-        if (bytes.size() == 0) continue;
-        KJ_ASSERT(view.size() >= bytes.size());
-        view.first(bytes.size()).copyFrom(bytes);
-        view = view.slice(bytes.size());
+      KJ_CASE_ONEOF(bytes, jsg::JsBufferSource) {
+        size_t cachedSize = cachedPartSizes[index++];
+        // If the ArrayBuffer was resized larger, we'll ignore the additional bytes.
+        // If the ArrayBuffer was resized smaller, we'll copy up the current size
+        // and the remaining bytes for this chunk will be left as zeros.
+        size_t toCopy = kj::min(bytes.size(), cachedSize);
+        if (toCopy > 0) {
+          KJ_ASSERT(view.size() >= toCopy);
+          view.first(toCopy).copyFrom(bytes.asArrayPtr().first(toCopy));
+        }
+        view = view.slice(cachedSize);
       }
       KJ_CASE_ONEOF(text, kj::String) {
         auto byteLength = text.asBytes().size();
         if (byteLength == 0) continue;
         KJ_ASSERT(view.size() >= byteLength);
+        KJ_ASSERT(byteLength == cachedPartSizes[index++]);
         view.first(byteLength).copyFrom(text.asBytes());
         view = view.slice(byteLength);
       }
       KJ_CASE_ONEOF(blob, jsg::Ref<Blob>) {
         auto data = blob->getData();
         if (data.size() == 0) continue;
         KJ_ASSERT(view.size() >= data.size());
+        KJ_ASSERT(data.size() == cachedPartSizes[index++]);
         view.first(data.size()).copyFrom(data);
         view = view.slice(data.size());
       }
@@ -91,7 +111,7 @@ jsg::BufferSource concat(jsg::Lock& js, jsg::Optional<Blob::Bits> maybeBits) {
 
   KJ_ASSERT(view == nullptr);
 
-  return kj::mv(result);
+  return jsg::JsBufferSource(u8);
 }
 
 kj::String normalizeType(kj::String type) {
@@ -117,15 +137,22 @@ kj::String normalizeType(kj::String type) {
 
 }  // namespace
 
+Blob::Blob(kj::String type): ownData(Empty{}), data(nullptr), type(kj::mv(type)) {}
+
 Blob::Blob(kj::Array<byte> data, kj::String type)
     : ownData(kj::mv(data)),
       data(ownData.get<kj::Array<kj::byte>>()),
       type(kj::mv(type)) {}
 
-Blob::Blob(jsg::Lock& js, jsg::BufferSource data, kj::String type)
-    : ownData(kj::mv(data)),
-      data(ownData.get<jsg::BufferSource>().asArrayPtr()),
-      type(kj::mv(type)) {}
+Blob::Blob(jsg::Lock& js, jsg::JsBufferSource data, kj::String type)
+    : ownData(data.addRef(js)),
+      data(data.asArrayPtr()),
+      type(kj::mv(type)) {
+  if (FeatureFlags::get(js).getNoResizableArrayBufferInBlob()) {
+    JSG_REQUIRE(
+        !data.isResizable(), TypeError, "Cannot create a Blob with a resizable ArrayBuffer");
+  }
+}
 
 Blob::Blob(jsg::Ref<Blob> parent, kj::ArrayPtr<const byte> data, kj::String type)
     : ownData(kj::mv(parent)),
@@ -141,69 +168,110 @@ jsg::Ref<Blob> Blob::constructor(
     }
   }
 
-  return js.alloc<Blob>(js, concat(js, kj::mv(bits)), kj::mv(type));
+  KJ_IF_SOME(b, bits) {
+    // Optimize for the case where the input is a single Blob, where we can just
+    // return a new view on the existing data without copying.
+    if (b.size() == 1) {
+      KJ_IF_SOME(parent, b[0].template tryGet<jsg::Ref<Blob>>()) {
+        if (parent->getSize() == 0) {
+          return js.alloc<Blob>(kj::mv(type));
+        }
+        auto ptr = parent->data;
+        KJ_IF_SOME(root, parent->ownData.template tryGet<jsg::Ref<Blob>>()) {
+          parent = root.addRef();
+        }
+        return js.alloc<Blob>(kj::mv(parent), ptr, kj::mv(type));
+      }
+    }
+  }
+
+  KJ_IF_SOME(data, concat(js, kj::mv(bits))) {
+    return js.alloc<Blob>(js, data, kj::mv(type));
+  }
+  return js.alloc<Blob>(kj::mv(type));
 }
 
 kj::ArrayPtr<const byte> Blob::getData() const {
-  FeatureObserver::maybeRecordUse(FeatureObserver::Feature::BLOB_GET_DATA);
   return data;
 }
 
 jsg::Ref<Blob> Blob::slice(jsg::Lock& js,
     jsg::Optional<int> maybeStart,
     jsg::Optional<int> maybeEnd,
     jsg::Optional<kj::String> type) {
+
+  auto normalizedType = normalizeType(kj::mv(type).orDefault(nullptr));
+  if (data.size() == 0) {
+    // Blob is empty, there's nothing to slice.
+    return js.alloc<Blob>(kj::mv(normalizedType));
+  }
+
   int start = maybeStart.orDefault(0);
   int end = maybeEnd.orDefault(data.size());
 
   if (start < 0) {
     // Negative value interpreted as offset from end.
     start += data.size();
   }
-  // Clamp start to range.
-  if (start < 0) {
-    start = 0;
-  } else if (start > data.size()) {
-    start = data.size();
-  }
-
   if (end < 0) {
     // Negative value interpreted as offset from end.
     end += data.size();
   }
-  // Clamp end to range.
-  if (end < start) {
-    end = start;
-  } else if (end > data.size()) {
-    end = data.size();
+
+  // Clamp start and end to range.
+  start = kj::max(0, kj::min(start, static_cast<int>(data.size())));
+  end = kj::max(start, kj::min(end, static_cast<int>(data.size())));
+
+  // We run with KJ_IREQUIRE checks enabled in production, which will catch
+  // out of bounds start/end ... but since we're clamping them above, this
+  // should never actually be a problem.
+  auto slicedData = data.slice(start, end);
+
+  // If the slice is empty, we can just return a new empty Blob without worrying about
+  // referencing the original data at all. Super minor optimization that avoids an
+  // unnecessary refcount.
+  if (slicedData.size() == 0) {
+    return js.alloc<Blob>(kj::mv(normalizedType));
   }
 
-  return js.alloc<Blob>(
-      JSG_THIS, data.slice(start, end), normalizeType(kj::mv(type).orDefault(nullptr)));
+  KJ_SWITCH_ONEOF(ownData) {
+    KJ_CASE_ONEOF(_, Empty) {
+      // Handled at the beginning of the function with the zero-length check.
+      KJ_FAIL_ASSERT("Empty blob should have been handled at the beginning of the function");
+    }
+    KJ_CASE_ONEOF(parent, jsg::Ref<Blob>) {
+      // If this blob is itself a slice (backed by a Ref<Blob>), reference the
+      // root data-owning blob directly. This prevents unbounded chain depth —
+      // every slice always points to the root, so depth is always ≤ 1.
+      return js.alloc<Blob>(parent.addRef(), slicedData, kj::mv(normalizedType));
+    }
+    KJ_CASE_ONEOF(_, jsg::JsRef<jsg::JsBufferSource>) {
+      return js.alloc<Blob>(JSG_THIS, slicedData, kj::mv(normalizedType));
+    }
+    KJ_CASE_ONEOF(_, kj::Array<kj::byte>) {
+      return js.alloc<Blob>(JSG_THIS, slicedData, kj::mv(normalizedType));
+    }
+  }
+  KJ_UNREACHABLE;
 }
 
-jsg::Promise<jsg::BufferSource> Blob::arrayBuffer(jsg::Lock& js) {
+jsg::Promise<jsg::JsRef<jsg::JsArrayBuffer>> Blob::arrayBuffer(jsg::Lock& js) {
   FeatureObserver::maybeRecordUse(FeatureObserver::Feature::BLOB_AS_ARRAY_BUFFER);
-  // We use BufferSource here instead of kj::Array<kj::byte> to ensure that the
-  // resulting backing store is associated with the isolate, which is necessary
-  // for when we start making use of v8 sandboxing.
-  auto backing = jsg::BackingStore::alloc<v8::ArrayBuffer>(js, data.size());
-  backing.asArrayPtr().copyFrom(data);
-  return js.resolvedPromise(jsg::BufferSource(js, kj::mv(backing)));
+  auto ret = jsg::JsArrayBuffer::create(js, data);
+  return js.resolvedPromise(ret.addRef(js));
 }
 
-jsg::Promise<jsg::BufferSource> Blob::bytes(jsg::Lock& js) {
-  // We use BufferSource here instead of kj::Array<kj::byte> to ensure that the
-  // resulting backing store is associated with the isolate, which is necessary
-  // for when we start making use of v8 sandboxing.
-  auto backing = jsg::BackingStore::alloc<v8::Uint8Array>(js, data.size());
-  backing.asArrayPtr().copyFrom(data);
-  return js.resolvedPromise(jsg::BufferSource(js, kj::mv(backing)));
+jsg::Promise<jsg::JsRef<jsg::JsUint8Array>> Blob::bytes(jsg::Lock& js) {
+  FeatureObserver::maybeRecordUse(FeatureObserver::Feature::BLOB_AS_ARRAY_BUFFER);
+  auto ret = jsg::JsUint8Array::create(js, data);
+  return js.resolvedPromise(ret.addRef(js));
 }
 
-jsg::Promise<kj::String> Blob::text(jsg::Lock& js) {
+jsg::Promise<jsg::JsRef<jsg::JsString>> Blob::text(jsg::Lock& js) {
   FeatureObserver::maybeRecordUse(FeatureObserver::Feature::BLOB_AS_TEXT);
-  return js.resolvedPromise(kj::str(data.asChars()));
+  // Using js.str here instead of returning kj::String avoids an additional
+  // intermediate allocation and copy of the string data.
+  return js.resolvedPromise(js.str(data.asChars()).addRef(js));
 }
 
 jsg::Ref<ReadableStream> Blob::stream(jsg::Lock& js) {
@@ -214,13 +282,18 @@ jsg::Ref<ReadableStream> Blob::stream(jsg::Lock& js) {
 
 // =======================================================================================
 
+File::File(kj::String name, kj::String type, double lastModified)
+    : Blob(kj::mv(type)),
+      name(kj::mv(name)),
+      lastModified(lastModified) {}
+
 File::File(kj::Array<byte> data, kj::String name, kj::String type, double lastModified)
     : Blob(kj::mv(data), kj::mv(type)),
       name(kj::mv(name)),
       lastModified(lastModified) {}
 
 File::File(
-    jsg::Lock& js, jsg::BufferSource data, kj::String name, kj::String type, double lastModified)
+    jsg::Lock& js, jsg::JsBufferSource data, kj::String name, kj::String type, double lastModified)
     : Blob(js, kj::mv(data), kj::mv(type)),
       name(kj::mv(name)),
       lastModified(lastModified) {}
@@ -252,7 +325,27 @@ jsg::Ref<File> File::constructor(
     lastModified = dateNow();
   }
 
-  return js.alloc<File>(js, concat(js, kj::mv(bits)), kj::mv(name), kj::mv(type), lastModified);
+  KJ_IF_SOME(b, bits) {
+    // Optimize for the case where the input is a single Blob, where we can just
+    // return a new view on the existing data without copying.
+    if (b.size() == 1) {
+      KJ_IF_SOME(parent, b[0].template tryGet<jsg::Ref<Blob>>()) {
+        if (parent->getSize() == 0) {
+          return js.alloc<File>(kj::mv(name), kj::mv(type), lastModified);
+        }
+        auto ptr = parent->data;
+        KJ_IF_SOME(root, parent->ownData.template tryGet<jsg::Ref<Blob>>()) {
+          parent = root.addRef();
+        }
+        return js.alloc<File>(kj::mv(parent), ptr, kj::mv(name), kj::mv(type), lastModified);
+      }
+    }
+  }
+
+  KJ_IF_SOME(data, concat(js, kj::mv(bits))) {
+    return js.alloc<File>(js, data, kj::mv(name), kj::mv(type), lastModified);
+  }
+  return js.alloc<File>(kj::mv(name), kj::mv(type), lastModified);
 }
 
 }  // namespace workerd::api