From 8cefd06916b0ee94253544134a2904ed520db35b Mon Sep 17 00:00:00 2001 From: Noah Stapp Date: Tue, 2 Dec 2025 09:00:41 -0500 Subject: [PATCH 1/8] PYTHON-5388 - Add two automatic retries --- .evergreen/execute-tests.sh | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/.evergreen/execute-tests.sh b/.evergreen/execute-tests.sh index 950bad0..7b16d31 100644 --- a/.evergreen/execute-tests.sh +++ b/.evergreen/execute-tests.sh @@ -13,4 +13,23 @@ set +a cd ${REPO_NAME} -bash ${ROOT_DIR}/${DIR}/run.sh +MAX_ATTEMPTS=3 +ATTEMPT=1 + +while [ $ATTEMPT -le $MAX_ATTEMPTS ]; do + bash ${ROOT_DIR}/${DIR}/run.sh + + EXIT_CODE=$? + + if [ $EXIT_CODE -eq 0 ]; then + break + else + echo "Tests failed with exit code $EXIT_CODE on attempt $ATTEMPT of $MAX_ATTEMPTS..." + ((ATTEMPT++)) + fi +done + +if [ $EXIT_CODE -ne 0 ]; then + echo "Tests failed after $MAX_ATTEMPTS attempts." +fi +exit($EXIT_CODE) From c858b0d6bbd641619c9c07de0cb864ee5d27b049 Mon Sep 17 00:00:00 2001 From: Noah Stapp Date: Tue, 2 Dec 2025 09:58:31 -0500 Subject: [PATCH 2/8] Fix EXIT_CODE --- .evergreen/execute-tests.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/.evergreen/execute-tests.sh b/.evergreen/execute-tests.sh index 7b16d31..c0b3b28 100644 --- a/.evergreen/execute-tests.sh +++ b/.evergreen/execute-tests.sh @@ -15,6 +15,7 @@ cd ${REPO_NAME} MAX_ATTEMPTS=3 ATTEMPT=1 +EXIT_CODE=0 while [ $ATTEMPT -le $MAX_ATTEMPTS ]; do bash ${ROOT_DIR}/${DIR}/run.sh From efc457abdbd8ca8d25cb7f49e7b2f6bb3d08c948 Mon Sep 17 00:00:00 2001 From: Noah Stapp Date: Tue, 2 Dec 2025 10:20:47 -0500 Subject: [PATCH 3/8] Fix EXIT_CODE --- .evergreen/execute-tests.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.evergreen/execute-tests.sh b/.evergreen/execute-tests.sh index c0b3b28..0e8801d 100644 --- a/.evergreen/execute-tests.sh +++ b/.evergreen/execute-tests.sh @@ -33,4 +33,4 @@ done if [ $EXIT_CODE -ne 0 ]; then echo "Tests failed after $MAX_ATTEMPTS attempts." fi -exit($EXIT_CODE) +exit $EXIT_CODE From 867f5f2a0d0b9d9ab11c48d9b902bfc6e2f3f20e Mon Sep 17 00:00:00 2001 From: Noah Stapp Date: Tue, 2 Dec 2025 10:28:16 -0500 Subject: [PATCH 4/8] debugging --- .evergreen/execute-tests.sh | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.evergreen/execute-tests.sh b/.evergreen/execute-tests.sh index 0e8801d..9e93923 100644 --- a/.evergreen/execute-tests.sh +++ b/.evergreen/execute-tests.sh @@ -16,12 +16,16 @@ cd ${REPO_NAME} MAX_ATTEMPTS=3 ATTEMPT=1 EXIT_CODE=0 +set +e while [ $ATTEMPT -le $MAX_ATTEMPTS ]; do + echo "Running test attempt $ATTEMPT" bash ${ROOT_DIR}/${DIR}/run.sh EXIT_CODE=$? + echo "Test attempt $ATTEMPT had exit code $EXIT_CODE" + if [ $EXIT_CODE -eq 0 ]; then break else From ded8625dd8f9e76abb6ebeb36fadac7eeb4203a5 Mon Sep 17 00:00:00 2001 From: Noah Stapp Date: Tue, 2 Dec 2025 11:01:26 -0500 Subject: [PATCH 5/8] Cleanup --- .evergreen/execute-tests.sh | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/.evergreen/execute-tests.sh b/.evergreen/execute-tests.sh index 9e93923..61dc2e7 100644 --- a/.evergreen/execute-tests.sh +++ b/.evergreen/execute-tests.sh @@ -19,17 +19,14 @@ EXIT_CODE=0 set +e while [ $ATTEMPT -le $MAX_ATTEMPTS ]; do - echo "Running test attempt $ATTEMPT" bash ${ROOT_DIR}/${DIR}/run.sh EXIT_CODE=$? - echo "Test attempt $ATTEMPT had exit code $EXIT_CODE" - if [ $EXIT_CODE -eq 0 ]; then break else - echo "Tests failed with exit code $EXIT_CODE on attempt $ATTEMPT of $MAX_ATTEMPTS..." + echo "Tests failed on attempt $ATTEMPT of $MAX_ATTEMPTS..." ((ATTEMPT++)) fi done From 2865c88876590f7cafd714e04b3395e7d0217580 Mon Sep 17 00:00:00 2001 From: Noah Stapp Date: Tue, 2 Dec 2025 11:04:40 -0500 Subject: [PATCH 6/8] Linting --- .evergreen/execute-tests.sh | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/.evergreen/execute-tests.sh b/.evergreen/execute-tests.sh index 61dc2e7..789370a 100644 --- a/.evergreen/execute-tests.sh +++ b/.evergreen/execute-tests.sh @@ -18,20 +18,20 @@ ATTEMPT=1 EXIT_CODE=0 set +e -while [ $ATTEMPT -le $MAX_ATTEMPTS ]; do +while [ $ATTEMPT -le $MAX_ATTEMPTS ]; do bash ${ROOT_DIR}/${DIR}/run.sh - + EXIT_CODE=$? - if [ $EXIT_CODE -eq 0 ]; then - break - else - echo "Tests failed on attempt $ATTEMPT of $MAX_ATTEMPTS..." - ((ATTEMPT++)) - fi -done + if [ $EXIT_CODE -eq 0 ]; then + break + else + echo "Tests failed on attempt $ATTEMPT of $MAX_ATTEMPTS..." + ((ATTEMPT++)) + fi +done -if [ $EXIT_CODE -ne 0 ]; then +if [ $EXIT_CODE -ne 0 ]; then echo "Tests failed after $MAX_ATTEMPTS attempts." -fi +fi exit $EXIT_CODE From 91ca8ff3b8049773376da98ee74c34c0d3f72370 Mon Sep 17 00:00:00 2001 From: Noah Stapp Date: Tue, 2 Dec 2025 14:21:52 -0500 Subject: [PATCH 7/8] Update README --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 2aa49ce..195c878 100644 --- a/README.md +++ b/README.md @@ -187,7 +187,7 @@ evergreen patch -p ai-ml-pipeline-testing --param REPO_ORG=caseyclements --param ### Handling Failing Tests -Tests are run periodically (nightly) and any failures will propagate into both the `dbx-ai-ml-testing-pipline-notifications` and `dbx-ai-ml-testing-pipeline-notifications-{language}` channel. Repo owners of this `ai-ml-testing-pipeline` library are required to join the `dbx-ai-ml-testing-pipeline-notifications`. Pipeline specific implementers must **at least** join `dbx-ai-ml-testing-pipline-notifications-{language}` (e.g. whomever implemented `langchain-js` must at least be a member of `dbx-ai-ml-testing-pipeline-notifications-js`). +Tests are run periodically (nightly). All failing test suites are automatically retried up to three times. Any failures will propagate into both the `dbx-ai-ml-testing-pipline-notifications` and `dbx-ai-ml-testing-pipeline-notifications-{language}` channel. Repo owners of this `ai-ml-testing-pipeline` library are required to join the `dbx-ai-ml-testing-pipeline-notifications`. Pipeline specific implementers must **at least** join `dbx-ai-ml-testing-pipline-notifications-{language}` (e.g. whomever implemented `langchain-js` must at least be a member of `dbx-ai-ml-testing-pipeline-notifications-js`). If tests are found to be failing, and cannot be addressed quickly, the responsible team MUST create a JIRA ticket within their team's project (e.g. a python failure should generate an `INTPYTHON` ticket), and disable the relevant tests in the `config.yml` file, with a comment about the JIRA ticket that will address it. From 0282908112c182787393079048b74b19b0f17187 Mon Sep 17 00:00:00 2001 From: Noah Stapp Date: Tue, 2 Dec 2025 15:35:47 -0500 Subject: [PATCH 8/8] Update wording --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 195c878..28cbc52 100644 --- a/README.md +++ b/README.md @@ -187,7 +187,7 @@ evergreen patch -p ai-ml-pipeline-testing --param REPO_ORG=caseyclements --param ### Handling Failing Tests -Tests are run periodically (nightly). All failing test suites are automatically retried up to three times. Any failures will propagate into both the `dbx-ai-ml-testing-pipline-notifications` and `dbx-ai-ml-testing-pipeline-notifications-{language}` channel. Repo owners of this `ai-ml-testing-pipeline` library are required to join the `dbx-ai-ml-testing-pipeline-notifications`. Pipeline specific implementers must **at least** join `dbx-ai-ml-testing-pipline-notifications-{language}` (e.g. whomever implemented `langchain-js` must at least be a member of `dbx-ai-ml-testing-pipeline-notifications-js`). +Tests are run periodically (nightly). All failing test suites are automatically retried up to two times. Any failures will propagate into both the `dbx-ai-ml-testing-pipline-notifications` and `dbx-ai-ml-testing-pipeline-notifications-{language}` channel. Repo owners of this `ai-ml-testing-pipeline` library are required to join the `dbx-ai-ml-testing-pipeline-notifications`. Pipeline specific implementers must **at least** join `dbx-ai-ml-testing-pipline-notifications-{language}` (e.g. whomever implemented `langchain-js` must at least be a member of `dbx-ai-ml-testing-pipeline-notifications-js`). If tests are found to be failing, and cannot be addressed quickly, the responsible team MUST create a JIRA ticket within their team's project (e.g. a python failure should generate an `INTPYTHON` ticket), and disable the relevant tests in the `config.yml` file, with a comment about the JIRA ticket that will address it.