chore(CI): Debug artifact download #4
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Weekly Model Evaluation | |
| on: | |
| schedule: | |
| - cron: '0 6 * * 1' # Every Monday at 6:00 UTC | |
| pull_request: # TODO: Remove before merging — only for debugging artifact paths | |
| paths: | |
| - '.github/workflows/model-evaluation.yml' | |
| workflow_dispatch: | |
| inputs: | |
| models: | |
| description: 'Comma-separated list of model IDs to evaluate' | |
| required: false | |
| default: 'gpt-5-mini' | |
| concurrency: | |
| group: model-evaluation | |
| cancel-in-progress: true | |
| jobs: | |
| prepare: | |
| name: Prepare Model Matrix | |
| runs-on: ubuntu-latest | |
| outputs: | |
| matrix: ${{ steps.set-matrix.outputs.matrix }} | |
| date: ${{ steps.set-date.outputs.date }} | |
| steps: | |
| - name: Set date | |
| id: set-date | |
| run: echo "date=$(date +%Y-%m-%d)" >> "$GITHUB_OUTPUT" | |
| - name: Set matrix | |
| id: set-matrix | |
| run: | | |
| MODELS="${{ inputs.models || 'gpt-5-mini' }}" | |
| MATRIX=$(echo "$MODELS" | tr ',' '\n' | jq -R . | jq -sc '{"model": .}') | |
| echo "matrix=$MATRIX" >> "$GITHUB_OUTPUT" | |
| evaluate: | |
| name: Evaluate ${{ matrix.model }} | |
| needs: prepare | |
| runs-on: ubuntu-latest | |
| strategy: | |
| matrix: ${{ fromJson(needs.prepare.outputs.matrix) }} | |
| fail-fast: false | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| - name: Set up Go | |
| uses: actions/setup-go@v5 | |
| with: | |
| go-version-file: go.mod | |
| - name: Setup proto files | |
| run: make proto-setup | |
| - name: Generate proto descriptors | |
| run: make proto-generate | |
| - name: Download WireMock | |
| run: make mock-download | |
| - name: Run E2E tests | |
| env: | |
| OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} | |
| MODEL_KEY: ${{ secrets.OPENAI_API_KEY }} | |
| MODEL_NAME: "openai:${{ matrix.model }}" | |
| run: make e2e-test | |
| - name: Upload results | |
| if: always() | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: eval-results-${{ matrix.model }} | |
| path: e2e-tests/mcpchecker/mcpchecker-stackrox-mcp-e2e-out.json | |
| if-no-files-found: error | |
| update-docs: | |
| name: Update Documentation & Create PR | |
| needs: [prepare, evaluate] | |
| runs-on: ubuntu-latest | |
| permissions: | |
| contents: write | |
| pull-requests: write | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| - name: Download all results | |
| uses: actions/download-artifact@v8 | |
| with: | |
| # Download all artifacts instead of using pattern matching. | |
| # When pattern matches a single artifact, download-artifact extracts | |
| # it directly into the path without creating a subdirectory, breaking | |
| # the expected path structure. Downloading all artifacts always creates | |
| # per-artifact subdirectories. | |
| path: eval-results | |
| - name: Debug - list artifact structure | |
| run: | | |
| echo "=== eval-results directory structure ===" | |
| ls -R eval-results || echo "eval-results directory not found" | |
| - name: Update model evaluation docs | |
| run: | | |
| MODELS="${{ inputs.models || 'gpt-5-mini' }}" | |
| for MODEL in $(echo "$MODELS" | tr ',' ' '); do | |
| RESULTS_FILE="eval-results/eval-results-${MODEL}/mcpchecker-stackrox-mcp-e2e-out.json" | |
| if [ -f "$RESULTS_FILE" ]; then | |
| echo "Updating docs for model: ${MODEL}" | |
| ./scripts/update-model-evaluation.sh \ | |
| --model-id "${MODEL}" \ | |
| --results "${RESULTS_FILE}" | |
| else | |
| echo "::warning::No results found for model ${MODEL}" | |
| fi | |
| done | |
| - name: Clean up eval results | |
| run: rm -rf eval-results | |
| - name: Check for changes | |
| id: check-changes | |
| run: | | |
| if git diff --quiet docs/model-evaluation.md; then | |
| echo "changed=false" >> "$GITHUB_OUTPUT" | |
| else | |
| echo "changed=true" >> "$GITHUB_OUTPUT" | |
| fi | |
| # TODO: Re-enable after debugging artifact paths | |
| # - name: Create Pull Request | |
| # if: steps.check-changes.outputs.changed == 'true' | |
| # uses: peter-evans/create-pull-request@v7 | |
| # with: | |
| # branch: chore/update-model-evaluation-${{ needs.prepare.outputs.date }} | |
| # commit-message: "Update model evaluations ${{ needs.prepare.outputs.date }}" | |
| # title: "chore(evals): Update model evaluations ${{ needs.prepare.outputs.date }}" | |
| # body: | | |
| # Automated weekly model evaluation update. | |
| # | |
| # **Models evaluated:** ${{ inputs.models || 'gpt-5-mini' }} | |
| # **Date:** ${{ needs.prepare.outputs.date }} | |
| # | |
| # This PR was automatically generated by the [Model Evaluation workflow](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}). | |
| # base: main |