diff --git a/.changeset/resilient-resume-hook.md b/.changeset/resilient-resume-hook.md
new file mode 100644
index 0000000000..8fc24b48de
--- /dev/null
+++ b/.changeset/resilient-resume-hook.md
@@ -0,0 +1,7 @@
+---
+"@workflow/core": minor
+"workflow": minor
+"@workflow/world": minor
+---
+
+Make `resumeHook()` resilient to transient `hook_received` event write failures (429/5xx) by carrying the payload on the queue message for the runtime to materialize. Returned `Hook` gets a new `resilientResume: true` flag when this fallback path is taken.
diff --git a/packages/core/e2e/e2e.test.ts b/packages/core/e2e/e2e.test.ts
index 1ae7a2239d..774c147df5 100644
--- a/packages/core/e2e/e2e.test.ts
+++ b/packages/core/e2e/e2e.test.ts
@@ -24,6 +24,7 @@ import {
   healthCheck,
   start as rawStart,
   resumeHook,
+  setWorld,
 } from '../src/runtime';
 import {
   cliCancel,
@@ -2297,6 +2298,77 @@ describe('e2e', () => {
     }
   );
 
+  // ============================================================
+  // Resilient resume: hook payload delivered even when hook_received fails
+  // ============================================================
+  test(
+    'resilient resume: hookWorkflow receives payload when hook_received returns 500',
+    { timeout: 60_000 },
+    async () => {
+      const token = `resilient-resume-${Math.random().toString(36).slice(2)}`;
+      const customData = Math.random().toString(36).slice(2);
+
+      // Start the hook-awaiting workflow normally
+      const run = await start(await e2e('hookWorkflow'), [token, customData]);
+
+      // Wait for the hook to be registered
+      await sleep(5_000);
+
+      // Build a stubbed world whose events.create throws a 500 on the
+      // hook_received write, but passes all other events through. The queue
+      // dispatch should still succeed, and the workflow runtime should
+      // materialize the missing hook_received event from `hookInput` on the
+      // queue message (resilient resume).
+      const realWorld = await getWorld();
+      const stubbedWorld: World = {
+        ...realWorld,
+        events: {
+          ...realWorld.events,
+          create: (async (...args: Parameters<World['events']['create']>) => {
+            const [, event] = args;
+            if (event.eventType === 'hook_received') {
+              throw new WorkflowWorldError('Simulated storage outage', {
+                status: 500,
+              });
+            }
+            return realWorld.events.create(...args);
+          }) as World['events']['create'],
+        },
+      };
+
+      const hook = await getHookByToken(token);
+      expect(hook.runId).toBe(run.runId);
+
+      // Swap in the stubbed world for the duration of the resumeHook() call.
+      // `resumeHook` uses `getWorld()` internally (no `world` option), so we
+      // use `setWorld()` to replace the cached instance and restore the real
+      // one afterwards.
+      setWorld(stubbedWorld);
+      let resumedHook;
+      try {
+        resumedHook = await resumeHook(hook, {
+          message: 'via-resilient-resume',
+          customData: (hook.metadata as any)?.customData,
+          done: true,
+        });
+      } finally {
+        setWorld(realWorld);
+      }
+
+      // The direct hook_received write failed with 500, so resumeHook should
+      // have taken the resilient path and flagged the returned hook.
+      expect(resumedHook.resilientResume).toBe(true);
+
+      // Despite hook_received failing, the workflow should still receive
+      // the payload via the runtime's queue-payload fallback.
+      const returnValue = await run.returnValue;
+      expect(returnValue).toHaveLength(1);
+      expect(returnValue[0].message).toBe('via-resilient-resume');
+      expect(returnValue[0].customData).toBe(customData);
+      expect(returnValue[0].done).toBe(true);
+    }
+  );
+
   test(
     'getterStepWorkflow - getter functions with "use step" directive',
     { timeout: 60_000 },
diff --git a/packages/core/src/runtime.ts b/packages/core/src/runtime.ts
index 3af03f4fed..773fa221c6 100644
--- a/packages/core/src/runtime.ts
+++ b/packages/core/src/runtime.ts
@@ -1,5 +1,6 @@
 import {
   EntityConflictError,
+  HookNotFoundError,
   RUN_ERROR_CODES,
   RunExpiredError,
   WorkflowRuntimeError,
@@ -125,6 +126,7 @@ export function workflowEntrypoint(
           traceCarrier: traceContext,
           requestedAt,
           runInput,
+          hookInput,
         } = WorkflowInvokePayloadSchema.parse(message_);
         const { requestId } = metadata;
         // Extract the workflow name from the topic name
@@ -450,6 +452,85 @@ export function workflowEntrypoint(
                     }
                   }
 
+                  // --- Resilient resume: materialize missing hook_received ---
+                  // When `resumeHook()` fires its hook_received event write and
+                  // queue dispatch in parallel, the event write may fail with
+                  // a transient 429/5xx while the queue dispatch succeeds.
+                  // In that case `hookInput` is present on the queue payload,
+                  // carrying the dehydrated payload + a client-minted
+                  // idempotency key (`resumeId`). If no existing hook_received
+                  // event already carries that `resumeId`, we materialize one
+                  // here so the workflow replay sees the payload. Mirrors
+                  // `start()`'s resilient path for run_created → run_started.
+                  if (hookInput) {
+                    const alreadyMaterialized = events.some(
+                      (e) =>
+                        e.eventType === 'hook_received' &&
+                        e.correlationId === hookInput.hookId &&
+                        (e.eventData as { resumeId?: string } | undefined)
+                          ?.resumeId === hookInput.resumeId
+                    );
+                    if (!alreadyMaterialized) {
+                      try {
+                        const result = await world.events.create(
+                          runId,
+                          {
+                            eventType: 'hook_received',
+                            specVersion:
+                              workflowRun.specVersion ?? SPEC_VERSION_CURRENT,
+                            correlationId: hookInput.hookId,
+                            eventData: {
+                              payload: hookInput.payload as any,
+                              resumeId: hookInput.resumeId,
+                            },
+                          },
+                          { requestId }
+                        );
+                        if (result.event) {
+                          events.push(result.event);
+                        }
+                        runtimeLogger.warn(
+                          'Materialized hook_received event from queue payload (resilient resume)',
+                          {
+                            workflowRunId: runId,
+                            hookId: hookInput.hookId,
+                            resumeId: hookInput.resumeId,
+                          }
+                        );
+                        span?.setAttributes({
+                          ...Attribute.HookResilientResumeMaterialized(true),
+                        });
+                      } catch (err) {
+                        if (EntityConflictError.is(err)) {
+                          // Another queue delivery already materialized this
+                          // hook_received event — safe to ignore.
+                          runtimeLogger.info(
+                            'Hook resilient-resume materialization skipped (already exists)',
+                            {
+                              workflowRunId: runId,
+                              hookId: hookInput.hookId,
+                              resumeId: hookInput.resumeId,
+                            }
+                          );
+                        } else if (HookNotFoundError.is(err)) {
+                          // The hook was disposed between resumeHook() and
+                          // this queue delivery. Drop the resume — there is
+                          // no active awaiter to deliver it to.
+                          runtimeLogger.warn(
+                            'Hook was disposed before resilient resume could materialize — dropping payload',
+                            {
+                              workflowRunId: runId,
+                              hookId: hookInput.hookId,
+                              resumeId: hookInput.resumeId,
+                            }
+                          );
+                        } else {
+                          throw err;
+                        }
+                      }
+                    }
+                  }
+
                   // Resolve the encryption key for this run's deployment
                   const rawKey =
                     await world.getEncryptionKeyForRun?.(workflowRun);
diff --git a/packages/core/src/runtime/helpers.ts b/packages/core/src/runtime/helpers.ts
index e890071a67..65c326e8a8 100644
--- a/packages/core/src/runtime/helpers.ts
+++ b/packages/core/src/runtime/helpers.ts
@@ -1,3 +1,4 @@
+import { ThrottleError, WorkflowWorldError } from '@workflow/errors';
 import type {
   Event,
   HealthCheckPayload,
@@ -17,6 +18,22 @@ import { getSpanKind, trace } from '../telemetry.js';
 import { version as workflowCoreVersion } from '../version.js';
 import { getWorld } from './world.js';
 
+/**
+ * Checks if an error from events.create() is retryable via the queue-payload
+ * fallback path. Used by `start()` (resilient start — run_created → run_started
+ * fallback) and `resumeHook()` (resilient resume — hook_received fallback
+ * materialized by the workflow runtime from `hookInput` on the queue message).
+ *
+ * - ThrottleError (429): rate limited, will likely succeed later
+ * - WorkflowWorldError with status >= 500: transient server error
+ */
+export function isRetryableEventError(err: unknown): boolean {
+  if (ThrottleError.is(err)) return true;
+  if (WorkflowWorldError.is(err) && err.status && err.status >= 500)
+    return true;
+  return false;
+}
+
 /** Default timeout for health checks in milliseconds */
 const DEFAULT_HEALTH_CHECK_TIMEOUT = 30_000;
 
diff --git a/packages/core/src/runtime/resume-hook.test.ts b/packages/core/src/runtime/resume-hook.test.ts
new file mode 100644
index 0000000000..03e4a2cb5f
--- /dev/null
+++ b/packages/core/src/runtime/resume-hook.test.ts
@@ -0,0 +1,279 @@
+import { ThrottleError, WorkflowWorldError } from '@workflow/errors';
+import {
+  SPEC_VERSION_CURRENT,
+  SPEC_VERSION_LEGACY,
+  SPEC_VERSION_SUPPORTS_CBOR_QUEUE_TRANSPORT,
+} from '@workflow/world';
+import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
+import { resumeHook } from './resume-hook.js';
+import { getWorld } from './world.js';
+
+// Mock @vercel/functions
+vi.mock('@vercel/functions', () => ({
+  waitUntil: vi.fn(),
+}));
+
+// Mock the world module
+vi.mock('./world.js', () => ({
+  getWorld: vi.fn(),
+  getWorldHandlers: vi.fn(() => ({
+    createQueueHandler: vi.fn(() => vi.fn()),
+  })),
+}));
+
+// Mock telemetry
+vi.mock('../telemetry.js', () => ({
+  serializeTraceCarrier: vi.fn().mockResolvedValue({}),
+  getSpanContextForTraceCarrier: vi.fn().mockResolvedValue(undefined),
+  trace: vi.fn((_name, fn) => fn(undefined)),
+}));
+
+// Mock serialization
+vi.mock('../serialization.js', async () => {
+  const actual = await vi.importActual<typeof import('../serialization.js')>(
+    '../serialization.js'
+  );
+  return {
+    ...actual,
+    dehydrateStepReturnValue: vi
+      .fn()
+      .mockImplementation(async () => new Uint8Array([1, 2, 3])),
+    hydrateStepArguments: vi.fn(async (v: unknown) => v),
+  };
+});
+
+// Mock capabilities — always allow encryption format so we don't strip keys
+vi.mock('../capabilities.js', () => ({
+  getRunCapabilities: vi.fn(() => ({
+    supportedFormats: new Set(['encr', 'json', 'devj', 'devb', 'bin', 'utf8']),
+  })),
+}));
+
+interface MockWorldOptions {
+  runSpecVersion?: number;
+  eventsCreate?: ReturnType<typeof vi.fn>;
+  queue?: ReturnType<typeof vi.fn>;
+}
+
+function makeMockWorld(opts: MockWorldOptions = {}) {
+  const {
+    runSpecVersion = SPEC_VERSION_SUPPORTS_CBOR_QUEUE_TRANSPORT,
+    eventsCreate = vi.fn().mockResolvedValue({}),
+    queue = vi.fn().mockResolvedValue({ messageId: null }),
+  } = opts;
+
+  const hook = {
+    runId: 'wrun_test',
+    hookId: 'hook_test',
+    token: 'tok_test',
+    ownerId: 'owner_test',
+    projectId: 'proj_test',
+    environment: 'production',
+    createdAt: new Date(),
+    specVersion: runSpecVersion,
+  };
+
+  const workflowRun = {
+    runId: 'wrun_test',
+    workflowName: 'test-workflow',
+    deploymentId: 'deploy_123',
+    status: 'running',
+    specVersion: runSpecVersion,
+    executionContext: {},
+    createdAt: new Date(),
+    updatedAt: new Date(),
+  };
+
+  const world = {
+    specVersion: runSpecVersion,
+    events: { create: eventsCreate },
+    queue,
+    hooks: {
+      getByToken: vi.fn().mockResolvedValue(hook),
+    },
+    runs: {
+      get: vi.fn().mockResolvedValue(workflowRun),
+    },
+    getEncryptionKeyForRun: vi.fn().mockResolvedValue(undefined),
+  };
+
+  return { world, hook, workflowRun, eventsCreate, queue };
+}
+
+describe('resumeHook', () => {
+  afterEach(() => {
+    vi.clearAllMocks();
+  });
+
+  beforeEach(() => {
+    vi.clearAllMocks();
+  });
+
+  describe('happy path', () => {
+    it('writes hook_received with resumeId; queue payload has NO hookInput (event write succeeded)', async () => {
+      const { world, queue, eventsCreate } = makeMockWorld();
+      vi.mocked(getWorld).mockReturnValue(world as any);
+
+      const result = await resumeHook('tok_test', { hello: 'world' });
+
+      expect(result.resilientResume).toBeFalsy();
+      expect(eventsCreate).toHaveBeenCalledTimes(1);
+      expect(queue).toHaveBeenCalledTimes(1);
+
+      const [, eventReq] = eventsCreate.mock.calls[0];
+      expect(eventReq.eventType).toBe('hook_received');
+      expect(eventReq.correlationId).toBe('hook_test');
+      // resumeId is always attached to the direct write on CBOR-capable
+      // deployments, so the runtime can dedup if it ever sees a matching
+      // hookInput via a retried queue message. In the happy path the queue
+      // does NOT carry hookInput, so dedup shouldn't be needed — but the
+      // id is cheap and future-proof.
+      expect(eventReq.eventData.resumeId).toMatch(/^[0-9A-HJKMNP-TV-Z]+$/);
+      expect(eventReq.eventData.payload).toBeInstanceOf(Uint8Array);
+
+      // Happy path: event write succeeded, so no hookInput on queue payload.
+      // This avoids a race where the queue handler could materialize a
+      // duplicate hook_received before the direct write commits.
+      const [, queuePayload] = queue.mock.calls[0];
+      expect(queuePayload.hookInput).toBeUndefined();
+    });
+  });
+
+  describe('resilient resume (events.create failure)', () => {
+    it('sets resilientResume=true when events.create throws ThrottleError (429) and queue succeeds', async () => {
+      const eventsCreate = vi
+        .fn()
+        .mockRejectedValue(new ThrottleError('rate limited'));
+      const { world } = makeMockWorld({ eventsCreate });
+      vi.mocked(getWorld).mockReturnValue(world as any);
+
+      const result = await resumeHook('tok_test', { data: 1 });
+
+      expect(result.resilientResume).toBe(true);
+      expect(result.runId).toBe('wrun_test');
+    });
+
+    it('sets resilientResume=true when events.create throws 500 and queue succeeds', async () => {
+      const eventsCreate = vi.fn().mockRejectedValue(
+        new WorkflowWorldError('Internal Server Error', {
+          status: 500,
+        })
+      );
+      const { world, queue } = makeMockWorld({ eventsCreate });
+      vi.mocked(getWorld).mockReturnValue(world as any);
+
+      const result = await resumeHook('tok_test', { data: 1 });
+
+      expect(result.resilientResume).toBe(true);
+      // Queue must have been called with hookInput so the runtime can
+      // materialize hook_received on the other side.
+      const [, queuePayload] = queue.mock.calls[0];
+      expect(queuePayload.hookInput).toBeDefined();
+      expect(queuePayload.hookInput.hookId).toBe('hook_test');
+    });
+
+    it('throws when events.create throws a non-retryable error (e.g. 400)', async () => {
+      const eventsCreate = vi.fn().mockRejectedValue(
+        new WorkflowWorldError('Bad Request', {
+          status: 400,
+        })
+      );
+      const { world } = makeMockWorld({ eventsCreate });
+      vi.mocked(getWorld).mockReturnValue(world as any);
+
+      await expect(resumeHook('tok_test', { data: 1 })).rejects.toThrow(
+        'Bad Request'
+      );
+    });
+
+    it('throws when queue fails even if events.create succeeds', async () => {
+      const queue = vi.fn().mockRejectedValue(new Error('Queue unavailable'));
+      const { world } = makeMockWorld({ queue });
+      vi.mocked(getWorld).mockReturnValue(world as any);
+
+      await expect(resumeHook('tok_test', { data: 1 })).rejects.toThrow(
+        'Queue unavailable'
+      );
+    });
+
+    it('throws queue error when both events.create and queue fail', async () => {
+      const eventsCreate = vi
+        .fn()
+        .mockRejectedValue(new ThrottleError('rate limited'));
+      const queue = vi.fn().mockRejectedValue(new Error('Queue unavailable'));
+      const { world } = makeMockWorld({ eventsCreate, queue });
+      vi.mocked(getWorld).mockReturnValue(world as any);
+
+      await expect(resumeHook('tok_test', { data: 1 })).rejects.toThrow(
+        'Queue unavailable'
+      );
+    });
+
+    it('does not take resilient path on legacy spec versions (no CBOR queue transport)', async () => {
+      const eventsCreate = vi
+        .fn()
+        .mockRejectedValue(new ThrottleError('rate limited'));
+      const { world, queue } = makeMockWorld({
+        eventsCreate,
+        runSpecVersion: SPEC_VERSION_LEGACY,
+      });
+      vi.mocked(getWorld).mockReturnValue(world as any);
+
+      // On legacy spec versions the runtime cannot materialize hook_received
+      // from queue payload, so we must fail-fast instead of pretending
+      // resilient delivery will work.
+      await expect(resumeHook('tok_test', { data: 1 })).rejects.toThrow(
+        'rate limited'
+      );
+
+      // hookInput should NOT be attached to the queue payload on legacy
+      if (queue.mock.calls.length > 0) {
+        const [, queuePayload] = queue.mock.calls[0];
+        expect(queuePayload.hookInput).toBeUndefined();
+      }
+    });
+  });
+
+  describe('sequential dispatch (events.create first, then queue)', () => {
+    it('awaits events.create before dispatching to queue (happy path)', async () => {
+      // This ordering is important: it avoids a race where the queue handler
+      // processes the message and materializes a duplicate hook_received
+      // before the direct write commits.
+      let eventsCreateResolve: (v: unknown) => void = () => {};
+      const eventsCreatePromise = new Promise((resolve) => {
+        eventsCreateResolve = resolve;
+      });
+      const eventsCreate = vi
+        .fn()
+        .mockImplementation(() => eventsCreatePromise);
+      const queue = vi.fn().mockResolvedValue({ messageId: null });
+
+      const { world } = makeMockWorld({ eventsCreate, queue });
+      vi.mocked(getWorld).mockReturnValue(world as any);
+
+      const resumePromise = resumeHook('tok_test', { data: 1 });
+
+      // Give microtasks a chance to run. events.create should have been
+      // called, but queue should NOT have been — we're waiting for the
+      // event write to commit before dispatching.
+      await new Promise((r) => setTimeout(r, 10));
+      expect(eventsCreate).toHaveBeenCalledTimes(1);
+      expect(queue).not.toHaveBeenCalled();
+
+      // Now resolve events.create and verify queue is dispatched.
+      eventsCreateResolve({});
+      await resumePromise;
+      expect(queue).toHaveBeenCalledTimes(1);
+    });
+  });
+});
+
+describe('isRetryableEventError', () => {
+  // Indirectly tested via resumeHook above. The helper is also unit-covered
+  // via start.test.ts's resilient start suite; no duplicate tests needed.
+  it('is exercised via resumeHook resilient resume tests', () => {
+    expect(SPEC_VERSION_CURRENT).toBeGreaterThanOrEqual(
+      SPEC_VERSION_SUPPORTS_CBOR_QUEUE_TRANSPORT
+    );
+  });
+});
diff --git a/packages/core/src/runtime/resume-hook.ts b/packages/core/src/runtime/resume-hook.ts
index eed7e62450..9614078ded 100644
--- a/packages/core/src/runtime/resume-hook.ts
+++ b/packages/core/src/runtime/resume-hook.ts
@@ -9,11 +9,14 @@ import {
   isLegacySpecVersion,
   SPEC_VERSION_CURRENT,
   SPEC_VERSION_LEGACY,
+  SPEC_VERSION_SUPPORTS_CBOR_QUEUE_TRANSPORT,
   type WorkflowInvokePayload,
   type WorkflowRun,
 } from '@workflow/world';
+import { monotonicFactory } from 'ulid';
 import { getRunCapabilities } from '../capabilities.js';
 import { type CryptoKey, importKey } from '../encryption.js';
+import { runtimeLogger } from '../logger.js';
 import {
   dehydrateStepReturnValue,
   hydrateStepArguments,
@@ -23,9 +26,12 @@ import { WEBHOOK_RESPONSE_WRITABLE } from '../symbols.js';
 import * as Attribute from '../telemetry/semantic-conventions.js';
 import { getSpanContextForTraceCarrier, trace } from '../telemetry.js';
 import { waitedUntil } from '../util.js';
-import { getWorkflowQueueName } from './helpers.js';
+import { getWorkflowQueueName, isRetryableEventError } from './helpers.js';
 import { getWorld } from './world.js';
 
+/** ULID generator for client-side resumeId generation */
+const ulid = monotonicFactory();
+
 /**
  * Internal helper that returns the hook, the associated workflow run,
  * and the resolved encryption key.
@@ -62,16 +68,52 @@ export async function getHookByToken(token: string): Promise<Hook> {
   return hook;
 }
 
+/**
+ * A hook returned by {@link resumeHook}. Extends the base {@link Hook} entity
+ * with a transient flag indicating whether the resume took the resilient
+ * fallback path.
+ */
+export type ResumedHook = Hook & {
+  /**
+   * When `true`, the direct `hook_received` event write failed with a
+   * transient error (429/5xx) but the queue dispatch succeeded. The resume
+   * will still land via the workflow runtime's queue-payload fallback path
+   * (the runtime materializes the missing `hook_received` event from
+   * `hookInput` on the queue message). Callers can treat this as "accepted,
+   * will deliver eventually" — the same way `start()` returns a `Run` with
+   * `resilientStart` set when `run_created` failed.
+   *
+   * When `false` or absent, both the direct event write and the queue
+   * dispatch succeeded normally.
+   */
+  resilientResume?: boolean;
+};
+
 /**
  * Resumes a workflow run by sending a payload to a hook identified by its token.
  *
  * This function is called externally (e.g., from an API route or server action)
  * to send data to a hook and resume the associated workflow run.
  *
+ * ## Resilient resume
+ *
+ * `resumeHook()` fires the `hook_received` event creation and the workflow
+ * queue dispatch in parallel. If the event creation fails with a retryable
+ * error (429/5xx) but the queue dispatch succeeds, the workflow runtime will
+ * materialize the missing `hook_received` event from the payload carried on
+ * the queue message — the returned hook has `resilientResume: true` to
+ * signal this fallback path was taken. This mirrors the resilient-start
+ * behavior of {@link start}.
+ *
+ * Both write paths carry the same client-minted `resumeId` as an idempotency
+ * key; the runtime uses it to avoid double-delivering the payload.
+ *
  * @param tokenOrHook - The unique token identifying the hook, or the hook object itself
  * @param payload - The data payload to send to the hook
- * @returns Promise resolving to the hook
- * @throws Error if the hook is not found or if there's an error during the process
+ * @returns Promise resolving to the hook, with `resilientResume: true` when
+ *   the resilient fallback path was taken.
+ * @throws Error if the hook is not found, if the queue dispatch fails, or if
+ *   there's a non-retryable error during event creation.
  *
  * @example
  *
@@ -95,7 +137,7 @@ export async function resumeHook<T = any>(
   tokenOrHook: string | Hook,
   payload: T,
   encryptionKeyOverride?: CryptoKey
-): Promise<Hook> {
+): Promise<ResumedHook> {
   return await waitedUntil(() => {
     return trace('hook.resume', async (span) => {
       const world = await getWorld();
@@ -156,20 +198,6 @@ export async function resumeHook<T = any>(
           })
         );
 
-        // Create a hook_received event with the payload
-        await world.events.create(
-          hook.runId,
-          {
-            eventType: 'hook_received',
-            specVersion: SPEC_VERSION_CURRENT,
-            correlationId: hook.hookId,
-            eventData: {
-              payload: dehydratedPayload,
-            },
-          },
-          { v1Compat }
-        );
-
         span?.setAttributes({
           ...Attribute.WorkflowName(workflowRun.workflowName),
         });
@@ -183,8 +211,63 @@ export async function resumeHook<T = any>(
           }
         }
 
-        // Re-trigger the workflow against the deployment ID associated
-        // with the workflow run that the hook belongs to
+        // Mint a client-side idempotency key. When the resilient path fires
+        // (events.create fails but queue succeeds), both the direct write
+        // and the runtime's queue-payload fallback use this key so the
+        // runtime can dedup any hook_received event that already carries it.
+        const resumeId = ulid();
+
+        // Only carry `hookInput` on the queue payload for runs whose
+        // deployment supports the CBOR queue transport. Older deployments
+        // use JSON-only transport which cannot carry binary payloads
+        // (Uint8Array). For such deployments, fall back to today's behavior
+        // where the runtime cannot materialize hook_received from the queue.
+        const runSpecVersion = workflowRun.specVersion ?? SPEC_VERSION_LEGACY;
+        const canCarryHookInput =
+          runSpecVersion >= SPEC_VERSION_SUPPORTS_CBOR_QUEUE_TRANSPORT;
+
+        // First, attempt the direct hook_received event write. This is
+        // sequential (not parallel with queue dispatch) to avoid a race
+        // where the queue handler processes the message before the event
+        // write has committed, which would otherwise cause the runtime
+        // fallback to materialize a duplicate hook_received event.
+        //
+        // - If the write succeeds, we queue WITHOUT `hookInput` — the
+        //   runtime has nothing to materialize and will just replay the run.
+        // - If the write fails with a retryable error (429/5xx) on a
+        //   CBOR-capable deployment, we queue WITH `hookInput` so the
+        //   runtime can materialize the missing event (resilient resume).
+        // - If the write fails with any other error, we propagate.
+        let eventWriteFailed = false;
+        let eventWriteError: unknown;
+        try {
+          await world.events.create(
+            hook.runId,
+            {
+              eventType: 'hook_received',
+              specVersion: SPEC_VERSION_CURRENT,
+              correlationId: hook.hookId,
+              eventData: {
+                payload: dehydratedPayload,
+                // Include the idempotency key so the runtime's fallback
+                // path can dedup on re-delivery of the queue message.
+                ...(canCarryHookInput ? { resumeId } : {}),
+              },
+            },
+            { v1Compat }
+          );
+        } catch (err) {
+          if (!canCarryHookInput || !isRetryableEventError(err)) {
+            // Non-retryable, or legacy spec version (no fallback available).
+            throw err;
+          }
+          eventWriteFailed = true;
+          eventWriteError = err;
+        }
+
+        // Re-trigger the workflow. Attach `hookInput` only when the direct
+        // event write failed — otherwise the runtime's fallback path has
+        // nothing to materialize and we avoid the dedup race.
         await world.queue(
           getWorkflowQueueName(workflowRun.workflowName),
           {
@@ -192,14 +275,46 @@ export async function resumeHook<T = any>(
             // attach the trace carrier from the workflow run
             traceCarrier:
               workflowRun.executionContext?.traceCarrier ?? undefined,
+            ...(eventWriteFailed && canCarryHookInput
+              ? {
+                  hookInput: {
+                    hookId: hook.hookId,
+                    resumeId,
+                    payload: dehydratedPayload,
+                  },
+                }
+              : {}),
           } satisfies WorkflowInvokePayload,
           {
             deploymentId: workflowRun.deploymentId,
-            specVersion: workflowRun.specVersion ?? SPEC_VERSION_LEGACY,
+            specVersion: runSpecVersion,
           }
         );
 
-        return hook;
+        if (eventWriteFailed) {
+          runtimeLogger.warn(
+            'Hook event creation failed, but the workflow was re-triggered via the queue. ' +
+              'The hook_received event will be materialized by the runtime via the resilient resume path.',
+            {
+              workflowRunId: hook.runId,
+              hookId: hook.hookId,
+              resumeId,
+              error:
+                eventWriteError instanceof Error
+                  ? eventWriteError.message
+                  : String(eventWriteError),
+            }
+          );
+        }
+
+        span?.setAttributes({
+          ...Attribute.HookResilientResume(eventWriteFailed),
+        });
+
+        if (eventWriteFailed) {
+          return { ...hook, resilientResume: true } satisfies ResumedHook;
+        }
+        return hook satisfies ResumedHook;
       } catch (err) {
         span?.setAttributes({
           ...Attribute.HookToken(
diff --git a/packages/core/src/runtime/start.ts b/packages/core/src/runtime/start.ts
index 73dcdc5f79..d54a2c985e 100644
--- a/packages/core/src/runtime/start.ts
+++ b/packages/core/src/runtime/start.ts
@@ -1,10 +1,5 @@
 import { waitUntil } from '@vercel/functions';
-import {
-  EntityConflictError,
-  ThrottleError,
-  WorkflowRuntimeError,
-  WorkflowWorldError,
-} from '@workflow/errors';
+import { EntityConflictError, WorkflowRuntimeError } from '@workflow/errors';
 import type { WorkflowInvokePayload, World } from '@workflow/world';
 import {
   isLegacySpecVersion,
@@ -20,7 +15,7 @@ import * as Attribute from '../telemetry/semantic-conventions.js';
 import { serializeTraceCarrier, trace } from '../telemetry.js';
 import { waitedUntil } from '../util.js';
 import { version as workflowCoreVersion } from '../version.js';
-import { getWorkflowQueueName } from './helpers.js';
+import { getWorkflowQueueName, isRetryableEventError } from './helpers.js';
 import { Run } from './run.js';
 import { getWorld } from './world.js';
 
@@ -269,7 +264,7 @@ export async function start<TArgs extends unknown[], TResult>(
           // the run creation call gets a cold start or other slowdown, and the queue
           // + run_started call completes faster. We expect this to be <=1% of cases.
           // In this case, we can safely return.
-        } else if (isRetryableStartError(err)) {
+        } else if (isRetryableEventError(err)) {
           // 429 (ThrottleError) and 5xx (WorkflowWorldError with status >= 500)
           // are retryable — the run was accepted via the queue and creation
           // will be re-tried by the runtime when it calls run_started.
@@ -321,16 +316,3 @@ export async function start<TArgs extends unknown[], TResult>(
     });
   });
 }
-
-/**
- * Checks if an error from events.create (run_created) is retryable,
- * meaning the queue can re-try creation later via the run_started path.
- * - ThrottleError (429): rate limited, will succeed later
- * - WorkflowWorldError with status >= 500: server error, will succeed later
- */
-function isRetryableStartError(err: unknown): boolean {
-  if (ThrottleError.is(err)) return true;
-  if (WorkflowWorldError.is(err) && err.status && err.status >= 500)
-    return true;
-  return false;
-}
diff --git a/packages/core/src/telemetry/semantic-conventions.ts b/packages/core/src/telemetry/semantic-conventions.ts
index fcc5d0694f..8a58329160 100644
--- a/packages/core/src/telemetry/semantic-conventions.ts
+++ b/packages/core/src/telemetry/semantic-conventions.ts
@@ -226,6 +226,25 @@ export const HookId = SemanticConvention<string>('workflow.hook.id');
 /** Whether a hook was found by its token */
 export const HookFound = SemanticConvention<boolean>('workflow.hook.found');
 
+/**
+ * Set to `true` on the `hook.resume` span when `resumeHook()`'s direct
+ * `hook_received` event write failed with a retryable error (429/5xx) but
+ * the queue dispatch succeeded — the resume will still land via the
+ * runtime's queue-payload fallback path.
+ */
+export const HookResilientResume = SemanticConvention<boolean>(
+  'workflow.hook.resilient_resume'
+);
+
+/**
+ * Set to `true` on the workflow execution span when the runtime materialized
+ * a missing `hook_received` event from the queue-payload fallback carried on
+ * the queue message (resilient resume).
+ */
+export const HookResilientResumeMaterialized = SemanticConvention<boolean>(
+  'workflow.hook.resilient_resume_materialized'
+);
+
 // Webhook attributes
 
 /** Number of webhook handlers triggered */
diff --git a/packages/world/src/events.ts b/packages/world/src/events.ts
index 6784a0a819..1c44a0ed16 100644
--- a/packages/world/src/events.ts
+++ b/packages/world/src/events.ts
@@ -165,6 +165,14 @@ const HookReceivedEventSchema = BaseEventSchema.extend({
   correlationId: z.string(),
   eventData: z.object({
     payload: SerializedDataSchema,
+    /**
+     * Optional idempotency key used by the resilient resumeHook() path.
+     * When present, the workflow runtime uses this to dedup a hook_received
+     * event that may have been written both directly (by resumeHook) and by
+     * the runtime's queue-payload fallback. Matches `HookInput.resumeId` on
+     * the WorkflowInvokePayload.
+     */
+    resumeId: z.string().optional(),
   }),
 });
 
diff --git a/packages/world/src/queue.ts b/packages/world/src/queue.ts
index 409d00dd0c..3a31eb08a3 100644
--- a/packages/world/src/queue.ts
+++ b/packages/world/src/queue.ts
@@ -36,6 +36,29 @@ export const RunInputSchema = z.object({
 });
 export type RunInput = z.infer<typeof RunInputSchema>;
 
+/**
+ * Hook resume data carried through the queue for resilient resumeHook().
+ * Only present on the queue delivery triggered by resumeHook() — re-enqueues
+ * omit this. When the runtime processes the message and detects that the
+ * corresponding hook_received event is missing (e.g., because events.create()
+ * failed with a transient 429/5xx while queue() succeeded), it materializes
+ * the hook_received event from this payload.
+ *
+ * `resumeId` is a client-minted ULID used as an idempotency key: both the
+ * direct hook_received write (from resumeHook) and the runtime fallback write
+ * include it in `eventData.resumeId`, so the runtime can dedup by checking
+ * whether any existing hook_received event already carries the same resumeId.
+ */
+export const HookInputSchema = z.object({
+  /** correlationId of the target hook (hookId) */
+  hookId: z.string(),
+  /** Client-minted ULID; idempotency key shared across both write paths */
+  resumeId: z.string(),
+  /** Dehydrated payload to deliver to the hook */
+  payload: z.unknown(),
+});
+export type HookInput = z.infer<typeof HookInputSchema>;
+
 export const WorkflowInvokePayloadSchema = z.object({
   runId: z.string(),
   traceCarrier: TraceCarrierSchema.optional(),
@@ -44,6 +67,8 @@ export const WorkflowInvokePayloadSchema = z.object({
   serverErrorRetryCount: z.number().int().optional(),
   /** Run creation data, only present on the first queue delivery from start() */
   runInput: RunInputSchema.optional(),
+  /** Hook resume data, only present on the queue delivery from resumeHook() */
+  hookInput: HookInputSchema.optional(),
 });
 
 export const StepInvokePayloadSchema = z.object({