Retry 5XX errors and rate limiting during polling

erbridge · erbridge · commit 4cf3f47b51ae · 2023-02-20T22:12:56.000Z
We default to an exponential backoff, but allow callers to override
that as needed.
diff --git a/lib/ReplicateClient.js b/lib/ReplicateClient.js
@@ -25,8 +25,11 @@ export default class ReplicateClient {
   async predict(
     version,
     input,
-    { onUpdate } = {},
-    { defaultPollingInterval = 500 } = {}
+    { onUpdate, onTemporaryError } = {},
+    {
+      defaultPollingInterval = 500,
+      backoffFn = (errorCount) => Math.pow(2, errorCount) * 100,
+    } = {}
   ) {
     if (!version) {
       throw new ReplicateError("version is required");
@@ -41,14 +44,36 @@ export default class ReplicateClient {
     onUpdate && onUpdate(prediction);
 
     let pollingInterval = defaultPollingInterval;
+    let errorCount = 0;
 
     while (!prediction.hasTerminalStatus()) {
       await sleep(pollingInterval);
       pollingInterval = defaultPollingInterval; // Reset to default each time.
 
-      prediction = await this.getPrediction(prediction.id);
+      try {
+        prediction = await this.getPrediction(prediction.id);
+
+        onUpdate && onUpdate(prediction);
+
+        errorCount = 0; // Reset because we've had a non-error response.
+      } catch (err) {
+        if (!err instanceof ReplicateResponseError) {
+          throw err;
+        }
 
-      onUpdate && onUpdate(prediction);
+        if (
+          !err.status ||
+          (Math.floor(err.status / 100) !== 5 && err.status !== 429)
+        ) {
+          throw err;
+        }
+
+        errorCount += 1;
+
+        onTemporaryError && onTemporaryError(err);
+
+        pollingInterval = backoffFn(errorCount);
+      }
     }
 
     return prediction;
diff --git a/lib/ReplicateClient.test.js b/lib/ReplicateClient.test.js
@@ -150,6 +150,62 @@ describe("predict()", () => {
       status: PredictionStatus.SUCCEEDED,
     });
   });
+
+  it("retries polling on error", async () => {
+    const requestMockReturnValues = {
+      "POST /v1/predictions": [
+        () => ({
+          id: "test-id",
+          status: PredictionStatus.STARTING,
+        }),
+      ],
+      "GET /v1/predictions/test-id": [
+        () => {
+          throw new ReplicateResponseError(
+            "test error",
+            new Response("{}", {
+              status: 500,
+              statusText: "Internal Server Error",
+            })
+          );
+        },
+        () => {
+          throw new ReplicateResponseError(
+            "test error",
+            new Response("{}", {
+              status: 429,
+              statusText: "Too Many Requests",
+            })
+          );
+        },
+        () => ({
+          id: "test-id",
+          status: PredictionStatus.SUCCEEDED,
+        }),
+      ],
+    };
+
+    jest
+      .spyOn(client, "request")
+      .mockImplementation((action) =>
+        requestMockReturnValues[action].shift()()
+      );
+    const backoffFn = jest.fn(() => 0);
+
+    await client.predict(
+      "test-version",
+      { text: "test text" },
+      {},
+      { defaultPollingInterval: 0, backoffFn }
+    );
+
+    expect(client.request).toHaveBeenCalledTimes(4);
+    expect(client.request).toHaveLastReturnedWith({
+      id: "test-id",
+      status: PredictionStatus.SUCCEEDED,
+    });
+    expect(backoffFn).toHaveBeenCalledTimes(2);
+  });
 });
 
 describe("createPrediction()", () => {