diff --git a/.github/workflows/contribution-check.lock.yml b/.github/workflows/contribution-check.lock.yml index fab6ad088e9..9ddba1ee2b8 100644 --- a/.github/workflows/contribution-check.lock.yml +++ b/.github/workflows/contribution-check.lock.yml @@ -350,15 +350,63 @@ jobs: DIFC_PROXY_IMAGE: 'ghcr.io/github/gh-aw-mcpg:v0.2.19' run: | bash "${RUNNER_TEMP}/gh-aw/actions/start_difc_proxy.sh" - - name: Set GH_REPO for proxied steps - run: | - echo "GH_REPO=${GITHUB_REPOSITORY}" >> "$GITHUB_ENV" - env: + GH_HOST: localhost:18443 + GH_REPO: ${{ github.repository }} GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + GITHUB_API_URL: https://localhost:18443/api/v3 + GITHUB_GRAPHQL_URL: https://localhost:18443/api/graphql GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + NODE_EXTRA_CA_CERTS: /tmp/gh-aw/proxy-logs/proxy-tls/ca.crt name: Fetch and filter PRs - run: "# Fetch open PRs from the target repository opened in the last 24 hours\nSINCE=$(date -d '24 hours ago' '+%Y-%m-%dT%H:%M:%SZ' 2>/dev/null \\\n || date -v-24H '+%Y-%m-%dT%H:%M:%SZ')\n\necho \"Fetching open PRs from $TARGET_REPOSITORY created since $SINCE...\"\nALL_PRS=$(gh pr list \\\n --repo \"$TARGET_REPOSITORY\" \\\n --state open \\\n --limit 100 \\\n --json number,createdAt \\\n --jq \"[.[] | select(.createdAt >= \\\"$SINCE\\\")]\" \\\n 2>/dev/null || echo \"[]\")\n\nTOTAL=$(echo \"$ALL_PRS\" | jq 'length')\necho \"Found $TOTAL open PRs created in the last 24 hours\"\n\n# Cap the number of PRs to evaluate at 5\nMAX_EVALUATE=5\nEVALUATED=$(echo \"$ALL_PRS\" | jq --argjson max \"$MAX_EVALUATE\" '[.[0:$max][] | .number]')\nEVALUATED_COUNT=$(echo \"$EVALUATED\" | jq 'length')\nSKIPPED_COUNT=$((TOTAL - EVALUATED_COUNT))\n\n# Write results to workspace root\njq -n \\\n --argjson pr_numbers \"$EVALUATED\" \\\n --argjson skipped_count \"$SKIPPED_COUNT\" \\\n --argjson evaluated_count \"$EVALUATED_COUNT\" \\\n '{pr_numbers: $pr_numbers, skipped_count: $skipped_count, evaluated_count: $evaluated_count}' \\\n > \"$GITHUB_WORKSPACE/pr-filter-results.json\"\n\necho \"✓ Wrote pr-filter-results.json: $EVALUATED_COUNT to evaluate, $SKIPPED_COUNT skipped\"\ncat \"$GITHUB_WORKSPACE/pr-filter-results.json\"\n\n# Pre-fetch CONTRIBUTING.md once so all subagent calls can reuse it\nCONTRIBUTING_FETCHED=false\nfor CONTRIBUTING_PATH in \"CONTRIBUTING.md\" \".github/CONTRIBUTING.md\" \"docs/CONTRIBUTING.md\"; do\n if gh api \"repos/$TARGET_REPOSITORY/contents/$CONTRIBUTING_PATH\" \\\n --jq '.content' 2>/dev/null | base64 -d > \"$GITHUB_WORKSPACE/contributing-guidelines.md\" 2>/dev/null; then\n echo \"✓ Pre-fetched contributing guidelines from $CONTRIBUTING_PATH\"\n CONTRIBUTING_FETCHED=true\n break\n fi\ndone\nif [ \"$CONTRIBUTING_FETCHED\" = \"false\" ]; then\n echo \"# No CONTRIBUTING.md found\" > \"$GITHUB_WORKSPACE/contributing-guidelines.md\"\n echo \"ℹ No CONTRIBUTING.md found in $TARGET_REPOSITORY (checked root, .github/, docs/)\"\nfi" + run: |- + # Fetch open PRs from the target repository opened in the last 24 hours + SINCE=$(date -d '24 hours ago' '+%Y-%m-%dT%H:%M:%SZ' 2>/dev/null \ + || date -v-24H '+%Y-%m-%dT%H:%M:%SZ') + + echo "Fetching open PRs from $TARGET_REPOSITORY created since $SINCE..." + ALL_PRS=$(gh pr list \ + --repo "$TARGET_REPOSITORY" \ + --state open \ + --limit 100 \ + --json number,createdAt \ + --jq "[.[] | select(.createdAt >= \"$SINCE\")]" \ + 2>/dev/null || echo "[]") + + TOTAL=$(echo "$ALL_PRS" | jq 'length') + echo "Found $TOTAL open PRs created in the last 24 hours" + + # Cap the number of PRs to evaluate at 5 + MAX_EVALUATE=5 + EVALUATED=$(echo "$ALL_PRS" | jq --argjson max "$MAX_EVALUATE" '[.[0:$max][] | .number]') + EVALUATED_COUNT=$(echo "$EVALUATED" | jq 'length') + SKIPPED_COUNT=$((TOTAL - EVALUATED_COUNT)) + # Write results to workspace root + jq -n \ + --argjson pr_numbers "$EVALUATED" \ + --argjson skipped_count "$SKIPPED_COUNT" \ + --argjson evaluated_count "$EVALUATED_COUNT" \ + '{pr_numbers: $pr_numbers, skipped_count: $skipped_count, evaluated_count: $evaluated_count}' \ + > "$GITHUB_WORKSPACE/pr-filter-results.json" + + echo "✓ Wrote pr-filter-results.json: $EVALUATED_COUNT to evaluate, $SKIPPED_COUNT skipped" + cat "$GITHUB_WORKSPACE/pr-filter-results.json" + + # Pre-fetch CONTRIBUTING.md once so all subagent calls can reuse it + CONTRIBUTING_FETCHED=false + for CONTRIBUTING_PATH in "CONTRIBUTING.md" ".github/CONTRIBUTING.md" "docs/CONTRIBUTING.md"; do + if gh api "repos/$TARGET_REPOSITORY/contents/$CONTRIBUTING_PATH" \ + --jq '.content' 2>/dev/null | base64 -d > "$GITHUB_WORKSPACE/contributing-guidelines.md" 2>/dev/null; then + echo "✓ Pre-fetched contributing guidelines from $CONTRIBUTING_PATH" + CONTRIBUTING_FETCHED=true + break + fi + done + if [ "$CONTRIBUTING_FETCHED" = "false" ]; then + echo "# No CONTRIBUTING.md found" > "$GITHUB_WORKSPACE/contributing-guidelines.md" + echo "ℹ No CONTRIBUTING.md found in $TARGET_REPOSITORY (checked root, .github/, docs/)" + fi - name: Configure Git credentials env: REPO_NAME: ${{ github.repository }} diff --git a/.github/workflows/daily-issues-report.lock.yml b/.github/workflows/daily-issues-report.lock.yml index c2eff670c52..89f18806035 100644 --- a/.github/workflows/daily-issues-report.lock.yml +++ b/.github/workflows/daily-issues-report.lock.yml @@ -1,5 +1,5 @@ # gh-aw-metadata: {"schema_version":"v3","frontmatter_hash":"2b1837fd30fd6b8bd53bb35dc16aa6fb9f2ae1e8b255a37e984419ca8200a91b","strict":true,"agent_id":"copilot"} -# gh-aw-manifest: {"version":1,"secrets":["COPILOT_GITHUB_TOKEN","GH_AW_GITHUB_MCP_SERVER_TOKEN","GH_AW_GITHUB_TOKEN","GH_AW_OTEL_ENDPOINT","GH_AW_OTEL_HEADERS","GITHUB_TOKEN"],"actions":[{"repo":"actions/cache/restore","sha":"27d5ce7f107fe9357f9df03efb73ab90386fccae","version":"v5.0.5"},{"repo":"actions/cache/save","sha":"27d5ce7f107fe9357f9df03efb73ab90386fccae","version":"v5.0.5"},{"repo":"actions/checkout","sha":"de0fac2e4500dabe0009e67214ff5f5447ce83dd","version":"v6.0.2"},{"repo":"actions/download-artifact","sha":"3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c","version":"v8.0.1"},{"repo":"actions/github-script","sha":"373c709c69115d41ff229c7e5df9f8788daa9553","version":"v9"},{"repo":"actions/setup-node","sha":"53b83947a5a98c8d113130e565377fae1a50d02f","version":"v6.3.0"},{"repo":"actions/setup-python","sha":"a309ff8b426b58ec0e2a45f0f869d46889d02405","version":"v6.2.0"},{"repo":"actions/upload-artifact","sha":"043fb46d1a93c77aae656e7c1c64a875d1fc6a0a","version":"v7"}],"containers":[{"image":"ghcr.io/github/gh-aw-firewall/agent:0.25.20","digest":"sha256:9161f2415a3306a344aca34dd671ee69f122317e0a512e66dc64c94b9c508682","pinned_image":"ghcr.io/github/gh-aw-firewall/agent:0.25.20@sha256:9161f2415a3306a344aca34dd671ee69f122317e0a512e66dc64c94b9c508682"},{"image":"ghcr.io/github/gh-aw-firewall/api-proxy:0.25.20","digest":"sha256:6971639e381e82e45134bcd333181f456df3a52cd6f818a3e3d6de068ff91519","pinned_image":"ghcr.io/github/gh-aw-firewall/api-proxy:0.25.20@sha256:6971639e381e82e45134bcd333181f456df3a52cd6f818a3e3d6de068ff91519"},{"image":"ghcr.io/github/gh-aw-firewall/squid:0.25.20","digest":"sha256:5411d903f73ee597e6a084971c2adef3eb0bd405910df3ed7bf5e3d6bd58a236","pinned_image":"ghcr.io/github/gh-aw-firewall/squid:0.25.20@sha256:5411d903f73ee597e6a084971c2adef3eb0bd405910df3ed7bf5e3d6bd58a236"},{"image":"ghcr.io/github/gh-aw-mcpg:v0.2.19","digest":"sha256:44d4d8de7e6c37aaea484eba489940c52df6a0b54078ddcbc9327592d5b3c3dd","pinned_image":"ghcr.io/github/gh-aw-mcpg:v0.2.19@sha256:44d4d8de7e6c37aaea484eba489940c52df6a0b54078ddcbc9327592d5b3c3dd"},{"image":"ghcr.io/github/github-mcp-server:v0.32.0","digest":"sha256:2763823c63bcca718ce53850a1d7fcf2f501ec84028394f1b63ce7e9f4f9be28","pinned_image":"ghcr.io/github/github-mcp-server:v0.32.0@sha256:2763823c63bcca718ce53850a1d7fcf2f501ec84028394f1b63ce7e9f4f9be28"},{"image":"node:lts-alpine","digest":"sha256:01743339035a5c3c11a373cd7c83aeab6ed1457b55da6a69e014a95ac4e4700b","pinned_image":"node:lts-alpine@sha256:01743339035a5c3c11a373cd7c83aeab6ed1457b55da6a69e014a95ac4e4700b"}]} +# gh-aw-manifest: {"version":1,"secrets":["COPILOT_GITHUB_TOKEN","GH_AW_GITHUB_MCP_SERVER_TOKEN","GH_AW_GITHUB_TOKEN","GH_AW_OTEL_ENDPOINT","GH_AW_OTEL_HEADERS","GITHUB_TOKEN"],"actions":[{"repo":"actions/cache/restore","sha":"27d5ce7f107fe9357f9df03efb73ab90386fccae","version":"v5.0.5"},{"repo":"actions/cache/save","sha":"27d5ce7f107fe9357f9df03efb73ab90386fccae","version":"v5.0.5"},{"repo":"actions/checkout","sha":"de0fac2e4500dabe0009e67214ff5f5447ce83dd","version":"v6.0.2"},{"repo":"actions/download-artifact","sha":"3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c","version":"v8.0.1"},{"repo":"actions/github-script","sha":"373c709c69115d41ff229c7e5df9f8788daa9553","version":"v9"},{"repo":"actions/setup-node","sha":"53b83947a5a98c8d113130e565377fae1a50d02f","version":"v6.3.0"},{"repo":"actions/setup-python","sha":"a309ff8b426b58ec0e2a45f0f869d46889d02405","version":"v6.2.0"},{"repo":"actions/upload-artifact","sha":"043fb46d1a93c77aae656e7c1c64a875d1fc6a0a","version":"043fb46d1a93c77aae656e7c1c64a875d1fc6a0a"}],"containers":[{"image":"ghcr.io/github/gh-aw-firewall/agent:0.25.20","digest":"sha256:9161f2415a3306a344aca34dd671ee69f122317e0a512e66dc64c94b9c508682","pinned_image":"ghcr.io/github/gh-aw-firewall/agent:0.25.20@sha256:9161f2415a3306a344aca34dd671ee69f122317e0a512e66dc64c94b9c508682"},{"image":"ghcr.io/github/gh-aw-firewall/api-proxy:0.25.20","digest":"sha256:6971639e381e82e45134bcd333181f456df3a52cd6f818a3e3d6de068ff91519","pinned_image":"ghcr.io/github/gh-aw-firewall/api-proxy:0.25.20@sha256:6971639e381e82e45134bcd333181f456df3a52cd6f818a3e3d6de068ff91519"},{"image":"ghcr.io/github/gh-aw-firewall/squid:0.25.20","digest":"sha256:5411d903f73ee597e6a084971c2adef3eb0bd405910df3ed7bf5e3d6bd58a236","pinned_image":"ghcr.io/github/gh-aw-firewall/squid:0.25.20@sha256:5411d903f73ee597e6a084971c2adef3eb0bd405910df3ed7bf5e3d6bd58a236"},{"image":"ghcr.io/github/gh-aw-mcpg:v0.2.19","digest":"sha256:44d4d8de7e6c37aaea484eba489940c52df6a0b54078ddcbc9327592d5b3c3dd","pinned_image":"ghcr.io/github/gh-aw-mcpg:v0.2.19@sha256:44d4d8de7e6c37aaea484eba489940c52df6a0b54078ddcbc9327592d5b3c3dd"},{"image":"ghcr.io/github/github-mcp-server:v0.32.0","digest":"sha256:2763823c63bcca718ce53850a1d7fcf2f501ec84028394f1b63ce7e9f4f9be28","pinned_image":"ghcr.io/github/github-mcp-server:v0.32.0@sha256:2763823c63bcca718ce53850a1d7fcf2f501ec84028394f1b63ce7e9f4f9be28"},{"image":"node:lts-alpine","digest":"sha256:01743339035a5c3c11a373cd7c83aeab6ed1457b55da6a69e014a95ac4e4700b","pinned_image":"node:lts-alpine@sha256:01743339035a5c3c11a373cd7c83aeab6ed1457b55da6a69e014a95ac4e4700b"}]} # ___ _ _ # / _ \ | | (_) # | |_| | __ _ ___ _ __ | |_ _ ___ @@ -52,7 +52,7 @@ # - actions/github-script@373c709c69115d41ff229c7e5df9f8788daa9553 # v9 # - actions/setup-node@53b83947a5a98c8d113130e565377fae1a50d02f # v6.3.0 # - actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 -# - actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7 +# - actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # - actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1 # # Container images used: @@ -401,26 +401,152 @@ jobs: DIFC_PROXY_IMAGE: 'ghcr.io/github/gh-aw-mcpg:v0.2.19' run: | bash "${RUNNER_TEMP}/gh-aw/actions/start_difc_proxy.sh" - - name: Set GH_REPO for proxied steps - run: | - echo "GH_REPO=${GITHUB_REPOSITORY}" >> "$GITHUB_ENV" - - name: Setup jq utilities directory - run: "mkdir -p /tmp/gh-aw\ncat > /tmp/gh-aw/jqschema.sh << 'EOF'\n#!/usr/bin/env bash\n# jqschema.sh\njq -c '\ndef walk(f):\n . as $in |\n if type == \"object\" then\n reduce keys[] as $k ({}; . + {($k): ($in[$k] | walk(f))})\n elif type == \"array\" then\n if length == 0 then [] else [.[0] | walk(f)] end\n else\n type\n end;\nwalk(.)\n'\nEOF\nchmod +x /tmp/gh-aw/jqschema.sh" - - name: Install gh CLI + - env: + GH_HOST: localhost:18443 + GH_REPO: ${{ github.repository }} + GITHUB_API_URL: https://localhost:18443/api/v3 + GITHUB_GRAPHQL_URL: https://localhost:18443/api/graphql + NODE_EXTRA_CA_CERTS: /tmp/gh-aw/proxy-logs/proxy-tls/ca.crt + name: Setup jq utilities directory + run: |- + mkdir -p /tmp/gh-aw + cat > /tmp/gh-aw/jqschema.sh << 'EOF' + #!/usr/bin/env bash + # jqschema.sh + jq -c ' + def walk(f): + . as $in | + if type == "object" then + reduce keys[] as $k ({}; . + {($k): ($in[$k] | walk(f))}) + elif type == "array" then + if length == 0 then [] else [.[0] | walk(f)] end + else + type + end; + walk(.) + ' + EOF + chmod +x /tmp/gh-aw/jqschema.sh + - env: + GH_HOST: localhost:18443 + GH_REPO: ${{ github.repository }} + GITHUB_API_URL: https://localhost:18443/api/v3 + GITHUB_GRAPHQL_URL: https://localhost:18443/api/graphql + NODE_EXTRA_CA_CERTS: /tmp/gh-aw/proxy-logs/proxy-tls/ca.crt + name: Install gh CLI run: | bash "${RUNNER_TEMP}/gh-aw/actions/install_gh_cli.sh" - env: + GH_HOST: localhost:18443 + GH_REPO: ${{ github.repository }} GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + GITHUB_API_URL: https://localhost:18443/api/v3 + GITHUB_GRAPHQL_URL: https://localhost:18443/api/graphql GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + NODE_EXTRA_CA_CERTS: /tmp/gh-aw/proxy-logs/proxy-tls/ca.crt name: Fetch issues - run: "# Create output directories\nmkdir -p /tmp/gh-aw/issues-data\nmkdir -p /tmp/gh-aw/cache-memory\n\n# Get today's date for cache identification\nTODAY=$(date '+%Y-%m-%d')\nCACHE_DIR=\"/tmp/gh-aw/cache-memory\"\n\n# Check if cached data exists from today\nif [ -f \"$CACHE_DIR/issues-${TODAY}.json\" ] && [ -s \"$CACHE_DIR/issues-${TODAY}.json\" ]; then\n echo \"✓ Found cached issues data from ${TODAY}\"\n cp \"$CACHE_DIR/issues-${TODAY}.json\" /tmp/gh-aw/issues-data/issues.json\n \n # Regenerate schema if missing\n if [ ! -f \"$CACHE_DIR/issues-${TODAY}-schema.json\" ]; then\n /tmp/gh-aw/jqschema.sh < /tmp/gh-aw/issues-data/issues.json > \"$CACHE_DIR/issues-${TODAY}-schema.json\"\n fi\n cp \"$CACHE_DIR/issues-${TODAY}-schema.json\" /tmp/gh-aw/issues-data/issues-schema.json\n \n echo \"Using cached data from ${TODAY}\"\n echo \"Total issues in cache: $(jq 'length' /tmp/gh-aw/issues-data/issues.json)\"\nelse\n echo \"⬇ Downloading fresh issues data...\"\n \n # Fetch all issues (open and closed) using gh CLI\n # Using --limit 1000 to get the last 1000 issues, unfiltered\n echo \"Fetching the last 1000 issues...\"\n if ! gh issue list --repo \"$GITHUB_REPOSITORY\" \\\n --state all \\\n --json number,title,author,createdAt,state,url,body,labels,updatedAt,closedAt,milestone,assignees,comments \\\n --limit 1000 \\\n > /tmp/gh-aw/issues-data/issues.json; then\n echo \"::warning::Failed to fetch issues data (issues may be disabled or temporarily unavailable). Using empty dataset. Downstream analysis will report zero issues — check repository Issues settings or retry the workflow if this is unexpected.\"\n echo \"[]\" > /tmp/gh-aw/issues-data/issues.json\n fi\n\n # Generate schema for reference\n /tmp/gh-aw/jqschema.sh < /tmp/gh-aw/issues-data/issues.json > /tmp/gh-aw/issues-data/issues-schema.json\n\n # Store in cache with today's date\n cp /tmp/gh-aw/issues-data/issues.json \"$CACHE_DIR/issues-${TODAY}.json\"\n cp /tmp/gh-aw/issues-data/issues-schema.json \"$CACHE_DIR/issues-${TODAY}-schema.json\"\n\n echo \"✓ Issues data saved to cache: issues-${TODAY}.json\"\n echo \"Total issues found: $(jq 'length' /tmp/gh-aw/issues-data/issues.json)\"\nfi\n\n# Always ensure data is available at expected locations for backward compatibility\necho \"Issues data available at: /tmp/gh-aw/issues-data/issues.json\"\necho \"Schema available at: /tmp/gh-aw/issues-data/issues-schema.json\"" - - name: Setup Python environment - run: "# Create working directory for Python scripts\nmkdir -p /tmp/gh-aw/python\nmkdir -p /tmp/gh-aw/python/data\nmkdir -p /tmp/gh-aw/python/charts\nmkdir -p /tmp/gh-aw/python/artifacts\n\necho \"Python environment setup complete\"\necho \"Working directory: /tmp/gh-aw/python\"\necho \"Data directory: /tmp/gh-aw/python/data\"\necho \"Charts directory: /tmp/gh-aw/python/charts\"\necho \"Artifacts directory: /tmp/gh-aw/python/artifacts\"\n" - - name: Install Python scientific libraries - run: "# Create a virtual environment for proper package isolation (avoids --break-system-packages)\nif [ ! -d /tmp/gh-aw/venv ]; then\n python3 -m venv /tmp/gh-aw/venv\nfi\necho \"/tmp/gh-aw/venv/bin\" >> \"$GITHUB_PATH\"\n/tmp/gh-aw/venv/bin/pip install --quiet numpy pandas matplotlib seaborn scipy\n\n# Verify installations\n/tmp/gh-aw/venv/bin/python3 -c \"import numpy; print(f'NumPy {numpy.__version__} installed')\"\n/tmp/gh-aw/venv/bin/python3 -c \"import pandas; print(f'Pandas {pandas.__version__} installed')\"\n/tmp/gh-aw/venv/bin/python3 -c \"import matplotlib; print(f'Matplotlib {matplotlib.__version__} installed')\"\n/tmp/gh-aw/venv/bin/python3 -c \"import seaborn; print(f'Seaborn {seaborn.__version__} installed')\"\n/tmp/gh-aw/venv/bin/python3 -c \"import scipy; print(f'SciPy {scipy.__version__} installed')\"\n\necho \"All scientific libraries installed successfully\"\n" - - if: always() + run: |- + # Create output directories + mkdir -p /tmp/gh-aw/issues-data + mkdir -p /tmp/gh-aw/cache-memory + + # Get today's date for cache identification + TODAY=$(date '+%Y-%m-%d') + CACHE_DIR="/tmp/gh-aw/cache-memory" + + # Check if cached data exists from today + if [ -f "$CACHE_DIR/issues-${TODAY}.json" ] && [ -s "$CACHE_DIR/issues-${TODAY}.json" ]; then + echo "✓ Found cached issues data from ${TODAY}" + cp "$CACHE_DIR/issues-${TODAY}.json" /tmp/gh-aw/issues-data/issues.json + + # Regenerate schema if missing + if [ ! -f "$CACHE_DIR/issues-${TODAY}-schema.json" ]; then + /tmp/gh-aw/jqschema.sh < /tmp/gh-aw/issues-data/issues.json > "$CACHE_DIR/issues-${TODAY}-schema.json" + fi + cp "$CACHE_DIR/issues-${TODAY}-schema.json" /tmp/gh-aw/issues-data/issues-schema.json + + echo "Using cached data from ${TODAY}" + echo "Total issues in cache: $(jq 'length' /tmp/gh-aw/issues-data/issues.json)" + else + echo "⬇ Downloading fresh issues data..." + + # Fetch all issues (open and closed) using gh CLI + # Using --limit 1000 to get the last 1000 issues, unfiltered + echo "Fetching the last 1000 issues..." + if ! gh issue list --repo "$GITHUB_REPOSITORY" \ + --state all \ + --json number,title,author,createdAt,state,url,body,labels,updatedAt,closedAt,milestone,assignees,comments \ + --limit 1000 \ + > /tmp/gh-aw/issues-data/issues.json; then + echo "::warning::Failed to fetch issues data (issues may be disabled or temporarily unavailable). Using empty dataset. Downstream analysis will report zero issues — check repository Issues settings or retry the workflow if this is unexpected." + echo "[]" > /tmp/gh-aw/issues-data/issues.json + fi + + # Generate schema for reference + /tmp/gh-aw/jqschema.sh < /tmp/gh-aw/issues-data/issues.json > /tmp/gh-aw/issues-data/issues-schema.json + + # Store in cache with today's date + cp /tmp/gh-aw/issues-data/issues.json "$CACHE_DIR/issues-${TODAY}.json" + cp /tmp/gh-aw/issues-data/issues-schema.json "$CACHE_DIR/issues-${TODAY}-schema.json" + + echo "✓ Issues data saved to cache: issues-${TODAY}.json" + echo "Total issues found: $(jq 'length' /tmp/gh-aw/issues-data/issues.json)" + fi + + # Always ensure data is available at expected locations for backward compatibility + echo "Issues data available at: /tmp/gh-aw/issues-data/issues.json" + echo "Schema available at: /tmp/gh-aw/issues-data/issues-schema.json" + - env: + GH_HOST: localhost:18443 + GH_REPO: ${{ github.repository }} + GITHUB_API_URL: https://localhost:18443/api/v3 + GITHUB_GRAPHQL_URL: https://localhost:18443/api/graphql + NODE_EXTRA_CA_CERTS: /tmp/gh-aw/proxy-logs/proxy-tls/ca.crt + name: Setup Python environment + run: | + # Create working directory for Python scripts + mkdir -p /tmp/gh-aw/python + mkdir -p /tmp/gh-aw/python/data + mkdir -p /tmp/gh-aw/python/charts + mkdir -p /tmp/gh-aw/python/artifacts + + echo "Python environment setup complete" + echo "Working directory: /tmp/gh-aw/python" + echo "Data directory: /tmp/gh-aw/python/data" + echo "Charts directory: /tmp/gh-aw/python/charts" + echo "Artifacts directory: /tmp/gh-aw/python/artifacts" + - env: + GH_HOST: localhost:18443 + GH_REPO: ${{ github.repository }} + GITHUB_API_URL: https://localhost:18443/api/v3 + GITHUB_GRAPHQL_URL: https://localhost:18443/api/graphql + NODE_EXTRA_CA_CERTS: /tmp/gh-aw/proxy-logs/proxy-tls/ca.crt + name: Install Python scientific libraries + run: | + # Create a virtual environment for proper package isolation (avoids --break-system-packages) + if [ ! -d /tmp/gh-aw/venv ]; then + python3 -m venv /tmp/gh-aw/venv + fi + echo "/tmp/gh-aw/venv/bin" >> "$GITHUB_PATH" + /tmp/gh-aw/venv/bin/pip install --quiet numpy pandas matplotlib seaborn scipy + + # Verify installations + /tmp/gh-aw/venv/bin/python3 -c "import numpy; print(f'NumPy {numpy.__version__} installed')" + /tmp/gh-aw/venv/bin/python3 -c "import pandas; print(f'Pandas {pandas.__version__} installed')" + /tmp/gh-aw/venv/bin/python3 -c "import matplotlib; print(f'Matplotlib {matplotlib.__version__} installed')" + /tmp/gh-aw/venv/bin/python3 -c "import seaborn; print(f'Seaborn {seaborn.__version__} installed')" + /tmp/gh-aw/venv/bin/python3 -c "import scipy; print(f'SciPy {scipy.__version__} installed')" + + echo "All scientific libraries installed successfully" + - env: + GH_HOST: localhost:18443 + GH_REPO: ${{ github.repository }} + GITHUB_API_URL: https://localhost:18443/api/v3 + GITHUB_GRAPHQL_URL: https://localhost:18443/api/graphql + NODE_EXTRA_CA_CERTS: /tmp/gh-aw/proxy-logs/proxy-tls/ca.crt + if: always() name: Upload source files and data - uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7 + uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a with: if-no-files-found: warn name: python-source-and-data @@ -428,9 +554,31 @@ jobs: /tmp/gh-aw/python/*.py /tmp/gh-aw/python/data/* retention-days: 30 - - name: Setup Python NLP environment - run: "mkdir -p /tmp/gh-aw/python/{data,charts,artifacts}\n# Create a virtual environment for proper package isolation (avoids --break-system-packages)\nif [ ! -d /tmp/gh-aw/venv ]; then\n python3 -m venv /tmp/gh-aw/venv\nfi\necho \"/tmp/gh-aw/venv/bin\" >> \"$GITHUB_PATH\"\n/tmp/gh-aw/venv/bin/pip install --quiet nltk scikit-learn textblob wordcloud\n\n# Download required NLTK corpora\n/tmp/gh-aw/venv/bin/python3 -c \"\nimport nltk\nfor corpus in ['punkt_tab', 'stopwords', 'vader_lexicon', 'averaged_perceptron_tagger_eng']:\n nltk.download(corpus, quiet=True)\nprint('NLTK corpora ready')\n\"\n\n/tmp/gh-aw/venv/bin/python3 -c \"import sklearn; print(f'scikit-learn {sklearn.__version__}')\"" + - env: + GH_HOST: localhost:18443 + GH_REPO: ${{ github.repository }} + GITHUB_API_URL: https://localhost:18443/api/v3 + GITHUB_GRAPHQL_URL: https://localhost:18443/api/graphql + NODE_EXTRA_CA_CERTS: /tmp/gh-aw/proxy-logs/proxy-tls/ca.crt + name: Setup Python NLP environment + run: |- + mkdir -p /tmp/gh-aw/python/{data,charts,artifacts} + # Create a virtual environment for proper package isolation (avoids --break-system-packages) + if [ ! -d /tmp/gh-aw/venv ]; then + python3 -m venv /tmp/gh-aw/venv + fi + echo "/tmp/gh-aw/venv/bin" >> "$GITHUB_PATH" + /tmp/gh-aw/venv/bin/pip install --quiet nltk scikit-learn textblob wordcloud + + # Download required NLTK corpora + /tmp/gh-aw/venv/bin/python3 -c " + import nltk + for corpus in ['punkt_tab', 'stopwords', 'vader_lexicon', 'averaged_perceptron_tagger_eng']: + nltk.download(corpus, quiet=True) + print('NLTK corpora ready') + " + /tmp/gh-aw/venv/bin/python3 -c "import sklearn; print(f'scikit-learn {sklearn.__version__}')" # Cache memory file share configuration from frontmatter processed below - name: Create cache-memory directory run: bash "${RUNNER_TEMP}/gh-aw/actions/create_cache_memory_dir.sh" diff --git a/.github/workflows/issue-arborist.lock.yml b/.github/workflows/issue-arborist.lock.yml index f89e0b68908..7893fe9518a 100644 --- a/.github/workflows/issue-arborist.lock.yml +++ b/.github/workflows/issue-arborist.lock.yml @@ -347,17 +347,65 @@ jobs: DIFC_PROXY_IMAGE: 'ghcr.io/github/gh-aw-mcpg:v0.2.19' run: | bash "${RUNNER_TEMP}/gh-aw/actions/start_difc_proxy.sh" - - name: Set GH_REPO for proxied steps - run: | - echo "GH_REPO=${GITHUB_REPOSITORY}" >> "$GITHUB_ENV" - - name: Setup jq utilities directory - run: "mkdir -p /tmp/gh-aw\ncat > /tmp/gh-aw/jqschema.sh << 'EOF'\n#!/usr/bin/env bash\n# jqschema.sh\njq -c '\ndef walk(f):\n . as $in |\n if type == \"object\" then\n reduce keys[] as $k ({}; . + {($k): ($in[$k] | walk(f))})\n elif type == \"array\" then\n if length == 0 then [] else [.[0] | walk(f)] end\n else\n type\n end;\nwalk(.)\n'\nEOF\nchmod +x /tmp/gh-aw/jqschema.sh" - env: + GH_HOST: localhost:18443 + GH_REPO: ${{ github.repository }} + GITHUB_API_URL: https://localhost:18443/api/v3 + GITHUB_GRAPHQL_URL: https://localhost:18443/api/graphql + NODE_EXTRA_CA_CERTS: /tmp/gh-aw/proxy-logs/proxy-tls/ca.crt + name: Setup jq utilities directory + run: |- + mkdir -p /tmp/gh-aw + cat > /tmp/gh-aw/jqschema.sh << 'EOF' + #!/usr/bin/env bash + # jqschema.sh + jq -c ' + def walk(f): + . as $in | + if type == "object" then + reduce keys[] as $k ({}; . + {($k): ($in[$k] | walk(f))}) + elif type == "array" then + if length == 0 then [] else [.[0] | walk(f)] end + else + type + end; + walk(.) + ' + EOF + chmod +x /tmp/gh-aw/jqschema.sh + - env: + GH_HOST: localhost:18443 + GH_REPO: ${{ github.repository }} GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + GITHUB_API_URL: https://localhost:18443/api/v3 + GITHUB_GRAPHQL_URL: https://localhost:18443/api/graphql GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + NODE_EXTRA_CA_CERTS: /tmp/gh-aw/proxy-logs/proxy-tls/ca.crt name: Fetch issues - run: "# Create output directory\nmkdir -p /tmp/gh-aw/issues-data\n\necho \"⬇ Downloading the last 100 open issues (excluding sub-issues)...\"\n\n# Fetch the last 100 open issues that don't have a parent issue\n# Using search filter to exclude issues that are already sub-issues\ngh issue list --repo \"$GITHUB_REPOSITORY\" \\\n --search \"-parent-issue:*\" \\\n --state open \\\n --json number,title,author,createdAt,state,url,body,labels,updatedAt,closedAt,milestone,assignees \\\n --limit 100 \\\n > /tmp/gh-aw/issues-data/issues.json\n\n# Generate schema for reference using jqschema\n/tmp/gh-aw/jqschema.sh < /tmp/gh-aw/issues-data/issues.json > /tmp/gh-aw/issues-data/issues-schema.json\n\necho \"✓ Issues data saved to /tmp/gh-aw/issues-data/issues.json\"\necho \"✓ Schema saved to /tmp/gh-aw/issues-data/issues-schema.json\"\necho \"Total issues fetched: $(jq 'length' /tmp/gh-aw/issues-data/issues.json)\"\necho \"\"\necho \"Schema of the issues data:\"\ncat /tmp/gh-aw/issues-data/issues-schema.json | jq .\n" + run: | + # Create output directory + mkdir -p /tmp/gh-aw/issues-data + + echo "⬇ Downloading the last 100 open issues (excluding sub-issues)..." + + # Fetch the last 100 open issues that don't have a parent issue + # Using search filter to exclude issues that are already sub-issues + gh issue list --repo "$GITHUB_REPOSITORY" \ + --search "-parent-issue:*" \ + --state open \ + --json number,title,author,createdAt,state,url,body,labels,updatedAt,closedAt,milestone,assignees \ + --limit 100 \ + > /tmp/gh-aw/issues-data/issues.json + + # Generate schema for reference using jqschema + /tmp/gh-aw/jqschema.sh < /tmp/gh-aw/issues-data/issues.json > /tmp/gh-aw/issues-data/issues-schema.json + echo "✓ Issues data saved to /tmp/gh-aw/issues-data/issues.json" + echo "✓ Schema saved to /tmp/gh-aw/issues-data/issues-schema.json" + echo "Total issues fetched: $(jq 'length' /tmp/gh-aw/issues-data/issues.json)" + echo "" + echo "Schema of the issues data:" + cat /tmp/gh-aw/issues-data/issues-schema.json | jq . - name: Configure Git credentials env: REPO_NAME: ${{ github.repository }} diff --git a/.github/workflows/stale-repo-identifier.lock.yml b/.github/workflows/stale-repo-identifier.lock.yml index b4ab57899df..19f980af3c1 100644 --- a/.github/workflows/stale-repo-identifier.lock.yml +++ b/.github/workflows/stale-repo-identifier.lock.yml @@ -1,5 +1,5 @@ # gh-aw-metadata: {"schema_version":"v3","frontmatter_hash":"5e48f7d5dba2241f2c017d613dda4c6455a91439c75a526d4350c90d299f6b54","strict":true,"agent_id":"copilot"} -# gh-aw-manifest: {"version":1,"secrets":["COPILOT_GITHUB_TOKEN","GH_AW_GITHUB_MCP_SERVER_TOKEN","GH_AW_GITHUB_TOKEN","GITHUB_TOKEN"],"actions":[{"repo":"actions/cache/restore","sha":"27d5ce7f107fe9357f9df03efb73ab90386fccae","version":"v5.0.5"},{"repo":"actions/cache/save","sha":"27d5ce7f107fe9357f9df03efb73ab90386fccae","version":"v5.0.5"},{"repo":"actions/checkout","sha":"de0fac2e4500dabe0009e67214ff5f5447ce83dd","version":"v6.0.2"},{"repo":"actions/download-artifact","sha":"3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c","version":"v8.0.1"},{"repo":"actions/github-script","sha":"373c709c69115d41ff229c7e5df9f8788daa9553","version":"v9"},{"repo":"actions/setup-python","sha":"a309ff8b426b58ec0e2a45f0f869d46889d02405","version":"v6.2.0"},{"repo":"actions/upload-artifact","sha":"043fb46d1a93c77aae656e7c1c64a875d1fc6a0a","version":"v7"},{"repo":"github/stale-repos","sha":"5f2e18fc5432823f96c1feb69327f665c2acab59","version":"v9.0.8"}],"containers":[{"image":"ghcr.io/github/gh-aw-firewall/agent:0.25.20","digest":"sha256:9161f2415a3306a344aca34dd671ee69f122317e0a512e66dc64c94b9c508682","pinned_image":"ghcr.io/github/gh-aw-firewall/agent:0.25.20@sha256:9161f2415a3306a344aca34dd671ee69f122317e0a512e66dc64c94b9c508682"},{"image":"ghcr.io/github/gh-aw-firewall/api-proxy:0.25.20","digest":"sha256:6971639e381e82e45134bcd333181f456df3a52cd6f818a3e3d6de068ff91519","pinned_image":"ghcr.io/github/gh-aw-firewall/api-proxy:0.25.20@sha256:6971639e381e82e45134bcd333181f456df3a52cd6f818a3e3d6de068ff91519"},{"image":"ghcr.io/github/gh-aw-firewall/squid:0.25.20","digest":"sha256:5411d903f73ee597e6a084971c2adef3eb0bd405910df3ed7bf5e3d6bd58a236","pinned_image":"ghcr.io/github/gh-aw-firewall/squid:0.25.20@sha256:5411d903f73ee597e6a084971c2adef3eb0bd405910df3ed7bf5e3d6bd58a236"},{"image":"ghcr.io/github/gh-aw-mcpg:v0.2.19","digest":"sha256:44d4d8de7e6c37aaea484eba489940c52df6a0b54078ddcbc9327592d5b3c3dd","pinned_image":"ghcr.io/github/gh-aw-mcpg:v0.2.19@sha256:44d4d8de7e6c37aaea484eba489940c52df6a0b54078ddcbc9327592d5b3c3dd"},{"image":"ghcr.io/github/github-mcp-server:v0.32.0","digest":"sha256:2763823c63bcca718ce53850a1d7fcf2f501ec84028394f1b63ce7e9f4f9be28","pinned_image":"ghcr.io/github/github-mcp-server:v0.32.0@sha256:2763823c63bcca718ce53850a1d7fcf2f501ec84028394f1b63ce7e9f4f9be28"},{"image":"node:lts-alpine","digest":"sha256:01743339035a5c3c11a373cd7c83aeab6ed1457b55da6a69e014a95ac4e4700b","pinned_image":"node:lts-alpine@sha256:01743339035a5c3c11a373cd7c83aeab6ed1457b55da6a69e014a95ac4e4700b"}]} +# gh-aw-manifest: {"version":1,"secrets":["COPILOT_GITHUB_TOKEN","GH_AW_GITHUB_MCP_SERVER_TOKEN","GH_AW_GITHUB_TOKEN","GITHUB_TOKEN"],"actions":[{"repo":"actions/cache/restore","sha":"27d5ce7f107fe9357f9df03efb73ab90386fccae","version":"v5.0.5"},{"repo":"actions/cache/save","sha":"27d5ce7f107fe9357f9df03efb73ab90386fccae","version":"v5.0.5"},{"repo":"actions/checkout","sha":"de0fac2e4500dabe0009e67214ff5f5447ce83dd","version":"v6.0.2"},{"repo":"actions/download-artifact","sha":"3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c","version":"v8.0.1"},{"repo":"actions/github-script","sha":"373c709c69115d41ff229c7e5df9f8788daa9553","version":"v9"},{"repo":"actions/setup-python","sha":"a309ff8b426b58ec0e2a45f0f869d46889d02405","version":"v6.2.0"},{"repo":"actions/upload-artifact","sha":"043fb46d1a93c77aae656e7c1c64a875d1fc6a0a","version":"043fb46d1a93c77aae656e7c1c64a875d1fc6a0a"},{"repo":"github/stale-repos","sha":"5f2e18fc5432823f96c1feb69327f665c2acab59","version":"5f2e18fc5432823f96c1feb69327f665c2acab59"}],"containers":[{"image":"ghcr.io/github/gh-aw-firewall/agent:0.25.20","digest":"sha256:9161f2415a3306a344aca34dd671ee69f122317e0a512e66dc64c94b9c508682","pinned_image":"ghcr.io/github/gh-aw-firewall/agent:0.25.20@sha256:9161f2415a3306a344aca34dd671ee69f122317e0a512e66dc64c94b9c508682"},{"image":"ghcr.io/github/gh-aw-firewall/api-proxy:0.25.20","digest":"sha256:6971639e381e82e45134bcd333181f456df3a52cd6f818a3e3d6de068ff91519","pinned_image":"ghcr.io/github/gh-aw-firewall/api-proxy:0.25.20@sha256:6971639e381e82e45134bcd333181f456df3a52cd6f818a3e3d6de068ff91519"},{"image":"ghcr.io/github/gh-aw-firewall/squid:0.25.20","digest":"sha256:5411d903f73ee597e6a084971c2adef3eb0bd405910df3ed7bf5e3d6bd58a236","pinned_image":"ghcr.io/github/gh-aw-firewall/squid:0.25.20@sha256:5411d903f73ee597e6a084971c2adef3eb0bd405910df3ed7bf5e3d6bd58a236"},{"image":"ghcr.io/github/gh-aw-mcpg:v0.2.19","digest":"sha256:44d4d8de7e6c37aaea484eba489940c52df6a0b54078ddcbc9327592d5b3c3dd","pinned_image":"ghcr.io/github/gh-aw-mcpg:v0.2.19@sha256:44d4d8de7e6c37aaea484eba489940c52df6a0b54078ddcbc9327592d5b3c3dd"},{"image":"ghcr.io/github/github-mcp-server:v0.32.0","digest":"sha256:2763823c63bcca718ce53850a1d7fcf2f501ec84028394f1b63ce7e9f4f9be28","pinned_image":"ghcr.io/github/github-mcp-server:v0.32.0@sha256:2763823c63bcca718ce53850a1d7fcf2f501ec84028394f1b63ce7e9f4f9be28"},{"image":"node:lts-alpine","digest":"sha256:01743339035a5c3c11a373cd7c83aeab6ed1457b55da6a69e014a95ac4e4700b","pinned_image":"node:lts-alpine@sha256:01743339035a5c3c11a373cd7c83aeab6ed1457b55da6a69e014a95ac4e4700b"}]} # ___ _ _ # / _ \ | | (_) # | |_| | __ _ ___ _ __ | |_ _ ___ @@ -48,9 +48,9 @@ # - actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 # - actions/github-script@373c709c69115d41ff229c7e5df9f8788daa9553 # v9 # - actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 -# - actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7 +# - actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # - actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1 -# - github/stale-repos@5f2e18fc5432823f96c1feb69327f665c2acab59 # v9.0.8 +# - github/stale-repos@5f2e18fc5432823f96c1feb69327f665c2acab59 # # Container images used: # - ghcr.io/github/gh-aw-firewall/agent:0.25.20@sha256:9161f2415a3306a344aca34dd671ee69f122317e0a512e66dc64c94b9c508682 @@ -386,16 +386,57 @@ jobs: DIFC_PROXY_IMAGE: 'ghcr.io/github/gh-aw-mcpg:v0.2.19' run: | bash "${RUNNER_TEMP}/gh-aw/actions/start_difc_proxy.sh" - - name: Set GH_REPO for proxied steps + - env: + GH_HOST: localhost:18443 + GH_REPO: ${{ github.repository }} + GITHUB_API_URL: https://localhost:18443/api/v3 + GITHUB_GRAPHQL_URL: https://localhost:18443/api/graphql + NODE_EXTRA_CA_CERTS: /tmp/gh-aw/proxy-logs/proxy-tls/ca.crt + name: Setup Python environment run: | - echo "GH_REPO=${GITHUB_REPOSITORY}" >> "$GITHUB_ENV" - - name: Setup Python environment - run: "# Create working directory for Python scripts\nmkdir -p /tmp/gh-aw/python\nmkdir -p /tmp/gh-aw/python/data\nmkdir -p /tmp/gh-aw/python/charts\nmkdir -p /tmp/gh-aw/python/artifacts\n\necho \"Python environment setup complete\"\necho \"Working directory: /tmp/gh-aw/python\"\necho \"Data directory: /tmp/gh-aw/python/data\"\necho \"Charts directory: /tmp/gh-aw/python/charts\"\necho \"Artifacts directory: /tmp/gh-aw/python/artifacts\"\n" - - name: Install Python scientific libraries - run: "# Create a virtual environment for proper package isolation (avoids --break-system-packages)\nif [ ! -d /tmp/gh-aw/venv ]; then\n python3 -m venv /tmp/gh-aw/venv\nfi\necho \"/tmp/gh-aw/venv/bin\" >> \"$GITHUB_PATH\"\n/tmp/gh-aw/venv/bin/pip install --quiet numpy pandas matplotlib seaborn scipy\n\n# Verify installations\n/tmp/gh-aw/venv/bin/python3 -c \"import numpy; print(f'NumPy {numpy.__version__} installed')\"\n/tmp/gh-aw/venv/bin/python3 -c \"import pandas; print(f'Pandas {pandas.__version__} installed')\"\n/tmp/gh-aw/venv/bin/python3 -c \"import matplotlib; print(f'Matplotlib {matplotlib.__version__} installed')\"\n/tmp/gh-aw/venv/bin/python3 -c \"import seaborn; print(f'Seaborn {seaborn.__version__} installed')\"\n/tmp/gh-aw/venv/bin/python3 -c \"import scipy; print(f'SciPy {scipy.__version__} installed')\"\n\necho \"All scientific libraries installed successfully\"\n" - - if: always() + # Create working directory for Python scripts + mkdir -p /tmp/gh-aw/python + mkdir -p /tmp/gh-aw/python/data + mkdir -p /tmp/gh-aw/python/charts + mkdir -p /tmp/gh-aw/python/artifacts + + echo "Python environment setup complete" + echo "Working directory: /tmp/gh-aw/python" + echo "Data directory: /tmp/gh-aw/python/data" + echo "Charts directory: /tmp/gh-aw/python/charts" + echo "Artifacts directory: /tmp/gh-aw/python/artifacts" + - env: + GH_HOST: localhost:18443 + GH_REPO: ${{ github.repository }} + GITHUB_API_URL: https://localhost:18443/api/v3 + GITHUB_GRAPHQL_URL: https://localhost:18443/api/graphql + NODE_EXTRA_CA_CERTS: /tmp/gh-aw/proxy-logs/proxy-tls/ca.crt + name: Install Python scientific libraries + run: | + # Create a virtual environment for proper package isolation (avoids --break-system-packages) + if [ ! -d /tmp/gh-aw/venv ]; then + python3 -m venv /tmp/gh-aw/venv + fi + echo "/tmp/gh-aw/venv/bin" >> "$GITHUB_PATH" + /tmp/gh-aw/venv/bin/pip install --quiet numpy pandas matplotlib seaborn scipy + + # Verify installations + /tmp/gh-aw/venv/bin/python3 -c "import numpy; print(f'NumPy {numpy.__version__} installed')" + /tmp/gh-aw/venv/bin/python3 -c "import pandas; print(f'Pandas {pandas.__version__} installed')" + /tmp/gh-aw/venv/bin/python3 -c "import matplotlib; print(f'Matplotlib {matplotlib.__version__} installed')" + /tmp/gh-aw/venv/bin/python3 -c "import seaborn; print(f'Seaborn {seaborn.__version__} installed')" + /tmp/gh-aw/venv/bin/python3 -c "import scipy; print(f'SciPy {scipy.__version__} installed')" + + echo "All scientific libraries installed successfully" + - env: + GH_HOST: localhost:18443 + GH_REPO: ${{ github.repository }} + GITHUB_API_URL: https://localhost:18443/api/v3 + GITHUB_GRAPHQL_URL: https://localhost:18443/api/graphql + NODE_EXTRA_CA_CERTS: /tmp/gh-aw/proxy-logs/proxy-tls/ca.crt + if: always() name: Upload source files and data - uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7 + uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a with: if-no-files-found: warn name: python-source-and-data @@ -403,13 +444,56 @@ jobs: /tmp/gh-aw/python/*.py /tmp/gh-aw/python/data/* retention-days: 30 - - name: Setup jq utilities directory - run: "mkdir -p /tmp/gh-aw\ncat > /tmp/gh-aw/jqschema.sh << 'EOF'\n#!/usr/bin/env bash\n# jqschema.sh\njq -c '\ndef walk(f):\n . as $in |\n if type == \"object\" then\n reduce keys[] as $k ({}; . + {($k): ($in[$k] | walk(f))})\n elif type == \"array\" then\n if length == 0 then [] else [.[0] | walk(f)] end\n else\n type\n end;\nwalk(.)\n'\nEOF\nchmod +x /tmp/gh-aw/jqschema.sh" - - name: Setup Python environment - run: "mkdir -p /tmp/gh-aw/python/{data,charts,artifacts}\n# Create a virtual environment for proper package isolation (avoids --break-system-packages)\nif [ ! -d /tmp/gh-aw/venv ]; then\n python3 -m venv /tmp/gh-aw/venv\nfi\necho \"/tmp/gh-aw/venv/bin\" >> \"$GITHUB_PATH\"\n/tmp/gh-aw/venv/bin/pip install --quiet numpy pandas matplotlib seaborn scipy\n" - - if: always() + - env: + GH_HOST: localhost:18443 + GH_REPO: ${{ github.repository }} + GITHUB_API_URL: https://localhost:18443/api/v3 + GITHUB_GRAPHQL_URL: https://localhost:18443/api/graphql + NODE_EXTRA_CA_CERTS: /tmp/gh-aw/proxy-logs/proxy-tls/ca.crt + name: Setup jq utilities directory + run: |- + mkdir -p /tmp/gh-aw + cat > /tmp/gh-aw/jqschema.sh << 'EOF' + #!/usr/bin/env bash + # jqschema.sh + jq -c ' + def walk(f): + . as $in | + if type == "object" then + reduce keys[] as $k ({}; . + {($k): ($in[$k] | walk(f))}) + elif type == "array" then + if length == 0 then [] else [.[0] | walk(f)] end + else + type + end; + walk(.) + ' + EOF + chmod +x /tmp/gh-aw/jqschema.sh + - env: + GH_HOST: localhost:18443 + GH_REPO: ${{ github.repository }} + GITHUB_API_URL: https://localhost:18443/api/v3 + GITHUB_GRAPHQL_URL: https://localhost:18443/api/graphql + NODE_EXTRA_CA_CERTS: /tmp/gh-aw/proxy-logs/proxy-tls/ca.crt + name: Setup Python environment + run: | + mkdir -p /tmp/gh-aw/python/{data,charts,artifacts} + # Create a virtual environment for proper package isolation (avoids --break-system-packages) + if [ ! -d /tmp/gh-aw/venv ]; then + python3 -m venv /tmp/gh-aw/venv + fi + echo "/tmp/gh-aw/venv/bin" >> "$GITHUB_PATH" + /tmp/gh-aw/venv/bin/pip install --quiet numpy pandas matplotlib seaborn scipy + - env: + GH_HOST: localhost:18443 + GH_REPO: ${{ github.repository }} + GITHUB_API_URL: https://localhost:18443/api/v3 + GITHUB_GRAPHQL_URL: https://localhost:18443/api/graphql + NODE_EXTRA_CA_CERTS: /tmp/gh-aw/proxy-logs/proxy-tls/ca.crt + if: always() name: Upload source files and data - uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7 + uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a with: if-no-files-found: warn name: trending-source-and-data @@ -420,17 +504,30 @@ jobs: - env: ADDITIONAL_METRICS: release,pr EXEMPT_TOPICS: keep,template + GH_HOST: localhost:18443 + GH_REPO: ${{ github.repository }} GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + GITHUB_API_URL: https://localhost:18443/api/v3 + GITHUB_GRAPHQL_URL: https://localhost:18443/api/graphql INACTIVE_DAYS: "365" + NODE_EXTRA_CA_CERTS: /tmp/gh-aw/proxy-logs/proxy-tls/ca.crt ORGANIZATION: ${{ env.ORGANIZATION }} id: stale-repos name: Run stale-repos tool - uses: github/stale-repos@5f2e18fc5432823f96c1feb69327f665c2acab59 # v9.0.8 + uses: github/stale-repos@5f2e18fc5432823f96c1feb69327f665c2acab59 - env: + GH_HOST: localhost:18443 + GH_REPO: ${{ github.repository }} + GITHUB_API_URL: https://localhost:18443/api/v3 + GITHUB_GRAPHQL_URL: https://localhost:18443/api/graphql INACTIVE_REPOS: ${{ steps.stale-repos.outputs.inactiveRepos }} + NODE_EXTRA_CA_CERTS: /tmp/gh-aw/proxy-logs/proxy-tls/ca.crt name: Save stale repos output - run: "mkdir -p /tmp/stale-repos-data\necho \"$INACTIVE_REPOS\" > /tmp/stale-repos-data/inactive-repos.json\necho \"Stale repositories data saved\"\necho \"Total stale repositories: $(jq 'length' /tmp/stale-repos-data/inactive-repos.json)\"" - + run: |- + mkdir -p /tmp/stale-repos-data + echo "$INACTIVE_REPOS" > /tmp/stale-repos-data/inactive-repos.json + echo "Stale repositories data saved" + echo "Total stale repositories: $(jq 'length' /tmp/stale-repos-data/inactive-repos.json)" # Cache memory file share configuration from frontmatter processed below - name: Create cache-memory directory run: bash "${RUNNER_TEMP}/gh-aw/actions/create_cache_memory_dir.sh" diff --git a/actions/setup/sh/start_difc_proxy.sh b/actions/setup/sh/start_difc_proxy.sh index 1a430128512..7185cc971f1 100644 --- a/actions/setup/sh/start_difc_proxy.sh +++ b/actions/setup/sh/start_difc_proxy.sh @@ -3,13 +3,17 @@ # This script starts the awmg proxy container that routes gh CLI calls # through DIFC integrity filtering before the agent runs. # +# This script does NOT modify $GITHUB_ENV. The compiler injects step-level +# env: blocks (GH_HOST, GITHUB_API_URL, GITHUB_GRAPHQL_URL, NODE_EXTRA_CA_CERTS, +# GH_REPO) directly on each custom step so that the proxy address is scoped to +# proxied steps only and does not overwrite GHE host values. +# # Environment: # DIFC_PROXY_POLICY - JSON guard policy string # DIFC_PROXY_IMAGE - Container image to use (e.g., ghcr.io/github/gh-aw-mcpg:v0.2.2) # GH_TOKEN - GitHub token passed to the proxy container # GITHUB_SERVER_URL - GitHub server URL for upstream routing (e.g. https://github.com or https://TENANT.ghe.com) # GITHUB_REPOSITORY - Repository name (owner/repo) for git remote -# GITHUB_ENV - Path to GitHub Actions environment file set -e @@ -66,17 +70,7 @@ for i in $(seq 1 30); do fi if curl -sf "https://localhost:18443/api/v3/health" -o /dev/null 2>/dev/null; then echo "DIFC proxy ready on port 18443" - # Route gh CLI calls through the proxy. - echo "GH_HOST=localhost:18443" >> "$GITHUB_ENV" git remote add proxy "https://localhost:18443/${GITHUB_REPOSITORY}.git" || true - # Route actions/github-script Octokit calls through the proxy. - # Save the originals so stop_difc_proxy.sh can restore them. - echo "GH_AW_ORIGINAL_GITHUB_API_URL=${GITHUB_API_URL:-}" >> "$GITHUB_ENV" - echo "GH_AW_ORIGINAL_GITHUB_GRAPHQL_URL=${GITHUB_GRAPHQL_URL:-}" >> "$GITHUB_ENV" - echo "GITHUB_API_URL=https://localhost:18443/api/v3" >> "$GITHUB_ENV" - echo "GITHUB_GRAPHQL_URL=https://localhost:18443/api/graphql" >> "$GITHUB_ENV" - # Trust the proxy TLS certificate from Node.js (used by actions/github-script). - echo "NODE_EXTRA_CA_CERTS=$PROXY_LOG_DIR/proxy-tls/ca.crt" >> "$GITHUB_ENV" PROXY_READY=true break fi diff --git a/actions/setup/sh/stop_difc_proxy.sh b/actions/setup/sh/stop_difc_proxy.sh index 22dd5b42dff..4f3d3a43985 100644 --- a/actions/setup/sh/stop_difc_proxy.sh +++ b/actions/setup/sh/stop_difc_proxy.sh @@ -1,11 +1,12 @@ #!/usr/bin/env bash # Stop DIFC proxy for pre-agent gh CLI steps -# This script stops the awmg proxy container, removes the proxy CA certificate from the -# system trust store (if it was installed), and clears the GH_HOST environment variable. +# This script stops the awmg proxy container and removes the proxy CA certificate +# from the system trust store (if it was installed). # The proxy must be stopped before the MCP gateway starts to avoid double-filtering traffic. # -# Environment: -# GITHUB_ENV - Path to GitHub Actions environment file +# This script does NOT modify $GITHUB_ENV. The proxy routing env vars (GH_HOST, +# GITHUB_API_URL, etc.) are injected as step-level env by the compiler and are +# never written to $GITHUB_ENV, so no restore/clear is needed here. set -e @@ -22,21 +23,4 @@ if [ -f "$DIFC_PROXY_CA_CERT" ]; then fi fi -# Only clear GH_HOST if it was set to the proxy address; preserve any pre-existing -# GH_HOST value (e.g., from configure_gh_for_ghe.sh on GitHub Enterprise runners). -if [ "${GH_HOST:-}" = "localhost:18443" ]; then - echo "GH_HOST=" >> "$GITHUB_ENV" -fi - -# Restore GITHUB_API_URL and GITHUB_GRAPHQL_URL to their original values. -if [ "${GITHUB_API_URL:-}" = "https://localhost:18443/api/v3" ]; then - echo "GITHUB_API_URL=${GH_AW_ORIGINAL_GITHUB_API_URL:-}" >> "$GITHUB_ENV" -fi -if [ "${GITHUB_GRAPHQL_URL:-}" = "https://localhost:18443/api/graphql" ]; then - echo "GITHUB_GRAPHQL_URL=${GH_AW_ORIGINAL_GITHUB_GRAPHQL_URL:-}" >> "$GITHUB_ENV" -fi - -# Clear the Node.js CA certs override set by start_difc_proxy.sh. -echo "NODE_EXTRA_CA_CERTS=" >> "$GITHUB_ENV" - echo "DIFC proxy stopped" diff --git a/docs/adr/26322-step-scoped-env-injection-for-difc-proxy.md b/docs/adr/26322-step-scoped-env-injection-for-difc-proxy.md new file mode 100644 index 00000000000..c4504e136b5 --- /dev/null +++ b/docs/adr/26322-step-scoped-env-injection-for-difc-proxy.md @@ -0,0 +1,76 @@ +# ADR-26322: Step-Scoped Env Injection for DIFC Proxy Routing + +**Date**: 2026-04-15 +**Status**: Draft +**Deciders**: lpcox, Copilot + +--- + +## Part 1 — Narrative (Human-Friendly) + +### Context + +The DIFC proxy (`awmg-proxy`) intercepts pre-agent `gh` CLI calls through integrity filtering when guard policies are configured. To route `gh` CLI through the proxy, several env vars must be set on each step that uses the `gh` CLI: `GH_HOST`, `GH_REPO`, `GITHUB_API_URL`, `GITHUB_GRAPHQL_URL`, and `NODE_EXTRA_CA_CERTS`. Previously, `start_difc_proxy.sh` wrote these vars to `$GITHUB_ENV`, which applies them as job-level state for the entire remainder of the job. On GitHub Enterprise (GHE) runners, `configure_gh_for_ghe.sh` runs earlier in the job and sets `GH_HOST` to the enterprise hostname in `$GITHUB_ENV`; the proxy's subsequent write overwrote that value and left the job without a valid GHE host after the proxy stopped. This required fragile save/restore logic in the shell scripts and a dedicated `Set GH_REPO` workaround step, both of which had to be replicated for every new `gh` CLI variable added. + +### Decision + +We will inject DIFC proxy routing env vars as **step-level `env:` blocks** on each custom step, rather than writing them to `$GITHUB_ENV`. A new compiler function, `injectProxyEnvIntoCustomSteps()`, uses YAML parse-and-reserialize (via `goccy/go-yaml`) to merge proxy vars into each step's existing `env:` block while preserving user-defined vars (e.g. `GH_TOKEN`). Step-level env overrides `$GITHUB_ENV` for that step only, so the GHE host and other job-level env values are left intact for non-proxied steps (including post-stop cleanup). The shell scripts `start_difc_proxy.sh` and `stop_difc_proxy.sh` are simplified to contain only container lifecycle logic; all env-routing responsibility moves to the compiler. + +### Alternatives Considered + +#### Alternative 1: Save/Restore `$GITHUB_ENV` in Shell Scripts + +Before starting the proxy, save the existing `GH_HOST` (and related vars) to `GH_AW_ORIGINAL_*` placeholders in `$GITHUB_ENV`; restore them in `stop_difc_proxy.sh`. This was the prior implementation. It was rejected because the restore logic was timing-sensitive and silently failed on some GHES runner configurations, leaving the job with a corrupted `GH_HOST` after the proxy stopped. Every new proxy variable required a matching save/restore pair, creating ongoing maintenance burden. + +#### Alternative 2: Standalone Env-Override Step + Cleanup Step + +Emit a compiler-generated step just before the custom steps block that writes proxy vars to `$GITHUB_ENV`, and a matching cleanup step after. This has the same root problem as Alternative 1: `$GITHUB_ENV` is global, so the cleanup must accurately restore all prior values or non-proxied steps break. It also introduces two extra visible steps per workflow invocation and does not compose cleanly with post-failure cleanup (the cleanup step runs only if prior steps succeed, unless wrapped with `if: always()`). + +#### Alternative 3: Wrapper Script That Sets Env Per-Invocation + +Wrap each custom step's `run:` block in a shell preamble that sets and unsets the proxy vars for that one invocation. This avoids `$GITHUB_ENV` mutation but requires rewriting `run:` content, which breaks `uses:` steps (action steps do not have a `run:` field) and is incompatible with multi-line scripts that rely on shell inheritance. + +### Consequences + +#### Positive +- GHE host values set by `configure_gh_for_ghe.sh` are preserved for all non-proxied steps (post-stop steps, MCP gateway setup, secret redaction, artifact upload). +- Eliminates the standalone `Set GH_REPO` workaround step and all save/restore shell logic. +- The proxy's env scope is automatically limited to the steps that need it; no cleanup required. +- Adding new proxy vars in future requires only updating `proxyEnvVars()` in Go, not shell scripts. + +#### Negative +- The compiler must parse custom steps YAML at compile time using `goccy/go-yaml`; a parse failure silently falls back to unmodified steps (proxy env not injected), which could cause runtime failures that are hard to diagnose. +- Compiled workflow lock files become slightly larger because each custom step now carries explicit `env:` entries for five proxy vars. +- The `injectProxyEnvIntoCustomSteps()` abstraction must be maintained and kept consistent if the custom-steps YAML structure changes (e.g. if steps are wrapped in an outer document key other than `steps:`). + +#### Neutral +- `start_difc_proxy.sh` and `stop_difc_proxy.sh` are now purely container lifecycle scripts with no env-management responsibility; teams maintaining these scripts no longer need to understand the env-routing design. +- Four existing workflow lock files were recompiled to apply the new step-level `env:` blocks; the diff is mechanical and does not represent a behavioral change for github.com-hosted runners. + +--- + +## Part 2 — Normative Specification (RFC 2119) + +> The key words **MUST**, **MUST NOT**, **REQUIRED**, **SHALL**, **SHALL NOT**, **SHOULD**, **SHOULD NOT**, **RECOMMENDED**, **MAY**, and **OPTIONAL** in this section are to be interpreted as described in [RFC 2119](https://www.rfc-editor.org/rfc/rfc2119). + +### Proxy Env Injection + +1. When the DIFC proxy is active for a main-job workflow, implementations **MUST** inject proxy routing env vars (`GH_HOST`, `GH_REPO`, `GITHUB_API_URL`, `GITHUB_GRAPHQL_URL`, `NODE_EXTRA_CA_CERTS`) as step-level `env:` blocks on each custom step. +2. Implementations **MUST NOT** write proxy routing env vars to `$GITHUB_ENV` (the runner's persistent env file) for the purpose of routing custom steps through the proxy. +3. Implementations **MUST** preserve existing step-level env vars (e.g. `GH_TOKEN`) when injecting proxy vars; proxy vars **MUST NOT** overwrite user-defined env vars that share the same key. +4. If YAML parsing of the custom steps string fails, implementations **SHOULD** log the error and return the original custom steps string unchanged rather than aborting compilation. + +### Shell Script Responsibilities + +1. `start_difc_proxy.sh` **MUST** be responsible only for starting the proxy container, performing a health check, installing the CA certificate, and adding the `proxy` git remote. +2. `start_difc_proxy.sh` **MUST NOT** write any env vars to `$GITHUB_ENV`. +3. `stop_difc_proxy.sh` **MUST** be responsible only for stopping the proxy container and removing the CA certificate. +4. `stop_difc_proxy.sh` **MUST NOT** read from or write to `$GITHUB_ENV` for the purpose of restoring or clearing proxy routing vars. + +### Conformance + +An implementation is considered conformant with this ADR if it satisfies all **MUST** and **MUST NOT** requirements above. Specifically: proxy routing vars appear only as step-level `env:` blocks on custom steps (never in `$GITHUB_ENV` writes within the proxy scripts), and existing step env vars are preserved when proxy vars are merged. Failure to meet any **MUST** or **MUST NOT** requirement constitutes non-conformance. + +--- + +*This is a DRAFT ADR generated by the [Design Decision Gate](https://github.com/github/gh-aw/actions/runs/24431428234) workflow. The PR author must review, complete, and finalize this document before the PR can merge.* diff --git a/pkg/workflow/compiler_difc_proxy.go b/pkg/workflow/compiler_difc_proxy.go index 1a1ad408d6f..80de1e55dda 100644 --- a/pkg/workflow/compiler_difc_proxy.go +++ b/pkg/workflow/compiler_difc_proxy.go @@ -26,8 +26,11 @@ package workflow // // Proxy lifecycle within the main job: // 1. Start proxy — after "Configure gh CLI" step, before custom steps -// 2. Custom steps run with GH_HOST=localhost:18443, GITHUB_API_URL, GITHUB_GRAPHQL_URL, -// and NODE_EXTRA_CA_CERTS set (via $GITHUB_ENV) +// 2. Custom steps run with step-level env blocks containing GH_HOST, GH_REPO, +// GITHUB_API_URL, GITHUB_GRAPHQL_URL, and NODE_EXTRA_CA_CERTS. These are +// injected by the compiler as step-level env (not via $GITHUB_ENV), so they +// take precedence over job-level env without mutating global state. GHE host +// values set by configure_gh_for_ghe.sh are preserved for non-proxied steps. // 3. Stop proxy — before MCP gateway starts (generateMCPSetup); always runs // even if earlier steps failed (if: always(), continue-on-error: true) // @@ -58,6 +61,7 @@ import ( "github.com/github/gh-aw/pkg/constants" "github.com/github/gh-aw/pkg/logger" + "github.com/goccy/go-yaml" ) var difcProxyLog = logger.New("workflow:difc_proxy") @@ -273,41 +277,74 @@ func (c *Compiler) generateStartDIFCProxyStep(yaml *strings.Builder, data *Workf } } -// buildSetGHRepoStepYAML returns the YAML for the "Set GH_REPO for proxied steps" step. +// proxyEnvVars returns the env vars to inject as step-level env on each custom step +// when the DIFC proxy is running. // -// start_difc_proxy.sh writes GH_HOST=localhost:18443 to GITHUB_ENV so that the gh CLI -// routes through the proxy. However, gh CLI infers the target repository from the git -// remote, which uses the original host (github.com / GHEC host). When GH_HOST is the -// proxy address, gh fails to resolve the repository because the host doesn't match. -// -// Rather than overwriting GH_HOST (which would bypass the DIFC proxy's integrity -// filtering), this step sets GH_REPO=$GITHUB_REPOSITORY. The gh CLI respects GH_REPO -// to determine the target repository without needing to match the git remote host. -// GH_HOST stays at localhost:18443 so all gh CLI traffic continues routing through -// the proxy for integrity filtering. -func buildSetGHRepoStepYAML() string { - var sb strings.Builder - sb.WriteString(" - name: Set GH_REPO for proxied steps\n") - sb.WriteString(" run: |\n") - sb.WriteString(" echo \"GH_REPO=${GITHUB_REPOSITORY}\" >> \"$GITHUB_ENV\"\n") - return sb.String() +// These override $GITHUB_ENV values (such as GH_HOST=myorg.ghe.com on GHE runners) +// without mutating global state. Steps that do not need the proxy (e.g., after +// stop_difc_proxy.sh) continue to see the original job-level env values. +func proxyEnvVars() map[string]string { + return map[string]string{ + "GH_HOST": "localhost:18443", + "GH_REPO": "${{ github.repository }}", + "GITHUB_API_URL": "https://localhost:18443/api/v3", + "GITHUB_GRAPHQL_URL": "https://localhost:18443/api/graphql", + "NODE_EXTRA_CA_CERTS": "/tmp/gh-aw/proxy-logs/proxy-tls/ca.crt", + } } -// generateSetGHRepoAfterDIFCProxyStep injects a step that sets GH_REPO=$GITHUB_REPOSITORY -// after start_difc_proxy.sh and before user-defined setup steps. +// injectProxyEnvIntoCustomSteps adds the DIFC proxy routing env vars to each step +// in the custom steps YAML string as step-level env. Step-level env takes precedence +// over $GITHUB_ENV values but does not mutate them, so GHE host values set by +// configure_gh_for_ghe.sh are preserved for steps that do not need the proxy. // -// The proxy sets GH_HOST=localhost:18443 in GITHUB_ENV, which causes gh CLI to fail -// resolving the repository from the git remote. Setting GH_REPO tells gh which repo -// to target without changing the proxy routing (GH_HOST stays at the proxy address). +// The proxy env vars injected are: +// - GH_HOST=localhost:18443 +// - GH_REPO=${{ github.repository }} +// - GITHUB_API_URL=https://localhost:18443/api/v3 +// - GITHUB_GRAPHQL_URL=https://localhost:18443/api/graphql +// - NODE_EXTRA_CA_CERTS=/tmp/gh-aw/proxy-logs/proxy-tls/ca.crt // -// The step is only emitted when hasDIFCProxyNeeded returns true. -func (c *Compiler) generateSetGHRepoAfterDIFCProxyStep(yaml *strings.Builder, data *WorkflowData) { - if !hasDIFCProxyNeeded(data) { - return +// If a step already has an env: block, the proxy vars are merged into it (existing +// vars like GH_TOKEN are preserved). If parsing or serialization fails, the original +// customSteps string is returned unchanged. +func injectProxyEnvIntoCustomSteps(customSteps string) string { + if customSteps == "" { + return customSteps + } + + var parsed struct { + Steps []map[string]any `yaml:"steps"` + } + if err := yaml.Unmarshal([]byte(customSteps), &parsed); err != nil || len(parsed.Steps) == 0 { + difcProxyLog.Printf("injectProxyEnvIntoCustomSteps: could not parse custom steps, returning as-is: %v", err) + return customSteps + } + + proxyEnv := proxyEnvVars() + for i, step := range parsed.Steps { + envMap, ok := step["env"].(map[string]any) + if !ok { + envMap = make(map[string]any) + } + for k, v := range proxyEnv { + envMap[k] = v + } + step["env"] = envMap + parsed.Steps[i] = step + } + + resultBytes, err := yaml.MarshalWithOptions( + map[string]any{"steps": parsed.Steps}, + yaml.Indent(2), + yaml.UseLiteralStyleIfMultiline(true), + ) + if err != nil { + difcProxyLog.Printf("injectProxyEnvIntoCustomSteps: failed to re-serialize, returning as-is: %v", err) + return customSteps } - difcProxyLog.Print("Generating Set GH_REPO step after DIFC proxy start") - yaml.WriteString(buildSetGHRepoStepYAML()) + return strings.TrimRight(string(resultBytes), "\n") } // generateStopDIFCProxyStep generates a step that stops the DIFC proxy container diff --git a/pkg/workflow/compiler_difc_proxy_test.go b/pkg/workflow/compiler_difc_proxy_test.go index 3e4f289c1ff..4e7e9a96305 100644 --- a/pkg/workflow/compiler_difc_proxy_test.go +++ b/pkg/workflow/compiler_difc_proxy_test.go @@ -441,25 +441,35 @@ Test that DIFC proxy is injected by default when min-integrity is set with custo assert.Contains(t, result, "Stop DIFC proxy", "compiled workflow should contain proxy stop step") - // Verify the "Set GH_REPO" step is present - assert.Contains(t, result, "Set GH_REPO for proxied steps", - "compiled workflow should contain Set GH_REPO step") + // Verify the standalone "Set GH_REPO" step is no longer emitted; + // GH_REPO is now injected as step-level env on each custom step. + assert.NotContains(t, result, "Set GH_REPO for proxied steps", + "compiled workflow should NOT contain standalone Set GH_REPO step") + + // Verify proxy env vars are injected into the custom step as step-level env. + assert.Contains(t, result, "GH_HOST: localhost:18443", + "custom step should have GH_HOST in step-level env") + assert.Contains(t, result, "GH_REPO: ${{ github.repository }}", + "custom step should have GH_REPO in step-level env") + assert.Contains(t, result, "GITHUB_API_URL: https://localhost:18443/api/v3", + "custom step should have GITHUB_API_URL in step-level env") + assert.Contains(t, result, "GITHUB_GRAPHQL_URL: https://localhost:18443/api/graphql", + "custom step should have GITHUB_GRAPHQL_URL in step-level env") + assert.Contains(t, result, "NODE_EXTRA_CA_CERTS: /tmp/gh-aw/proxy-logs/proxy-tls/ca.crt", + "custom step should have NODE_EXTRA_CA_CERTS in step-level env") // Verify step ordering: Start proxy must come before Stop proxy startIdx := strings.Index(result, "Start DIFC proxy for pre-agent gh calls") - setRepoIdx := strings.Index(result, "Set GH_REPO for proxied steps") stopIdx := strings.Index(result, "Stop DIFC proxy") require.Greater(t, startIdx, -1, "start proxy step should be in output") - require.Greater(t, setRepoIdx, -1, "set GH_REPO step should be in output") require.Greater(t, stopIdx, -1, "stop proxy step should be in output") - assert.Less(t, startIdx, setRepoIdx, "Start DIFC proxy must come before Set GH_REPO") assert.Less(t, startIdx, stopIdx, "Start DIFC proxy must come before Stop DIFC proxy") - // Verify "Set GH_REPO" step is before custom step ("Fetch repo data") + // Verify the custom step comes after proxy start and before proxy stop customStepIdx := strings.Index(result, "Fetch repo data") require.Greater(t, customStepIdx, -1, "custom step should be in output") assert.Less(t, startIdx, customStepIdx, "Start DIFC proxy must come before custom step") - assert.Less(t, setRepoIdx, customStepIdx, "Set GH_REPO must come before custom step") + assert.Less(t, customStepIdx, stopIdx, "custom step must come before Stop DIFC proxy") // Verify proxy stop is before MCP gateway start gatewayIdx := strings.Index(result, "Start MCP Gateway") @@ -635,64 +645,117 @@ func TestHasDIFCGuardsConfigured(t *testing.T) { } } -// TestBuildSetGHRepoStepYAML verifies the YAML generated for the "Set GH_REPO" step. -func TestBuildSetGHRepoStepYAML(t *testing.T) { - result := buildSetGHRepoStepYAML() - - assert.Contains(t, result, "Set GH_REPO for proxied steps", "step name should be present") - assert.Contains(t, result, "GH_REPO=${GITHUB_REPOSITORY}", "should set GH_REPO from GITHUB_REPOSITORY") - assert.Contains(t, result, "GITHUB_ENV", "should write GH_REPO to GITHUB_ENV") - assert.NotContains(t, result, "GH_HOST", "should not modify GH_HOST (proxy must keep routing)") +// TestProxyEnvVars verifies that all expected proxy routing env vars are returned. +func TestProxyEnvVars(t *testing.T) { + vars := proxyEnvVars() + + require.NotEmpty(t, vars, "proxyEnvVars should return a non-empty map") + assert.Equal(t, "localhost:18443", vars["GH_HOST"], "GH_HOST should be the proxy address") + assert.Equal(t, "${{ github.repository }}", vars["GH_REPO"], "GH_REPO should reference github.repository") + assert.Equal(t, "https://localhost:18443/api/v3", vars["GITHUB_API_URL"], "GITHUB_API_URL should point to proxy") + assert.Equal(t, "https://localhost:18443/api/graphql", vars["GITHUB_GRAPHQL_URL"], "GITHUB_GRAPHQL_URL should point to proxy") + assert.Equal(t, "/tmp/gh-aw/proxy-logs/proxy-tls/ca.crt", vars["NODE_EXTRA_CA_CERTS"], "NODE_EXTRA_CA_CERTS should be the proxy CA cert") + assert.Len(t, vars, 5, "proxyEnvVars should return exactly 5 vars") } -// TestGenerateSetGHRepoAfterDIFCProxyStep verifies that the step is emitted only when -// the DIFC proxy is needed (guard policies configured + pre-agent GH_TOKEN steps). -func TestGenerateSetGHRepoAfterDIFCProxyStep(t *testing.T) { - c := &Compiler{} - - t.Run("no step when guard policy not configured", func(t *testing.T) { - var yaml strings.Builder - data := &WorkflowData{ - Tools: map[string]any{ - "github": map[string]any{"toolsets": []string{"default"}}, +// TestInjectProxyEnvIntoCustomSteps verifies that proxy env vars are injected +// into each custom step as step-level env, preserving existing env vars. +func TestInjectProxyEnvIntoCustomSteps(t *testing.T) { + tests := []struct { + name string + customSteps string + expectedContains []string + expectedAbsent []string + desc string + }{ + { + name: "empty string returns empty", + customSteps: "", + desc: "empty input should return empty output", + }, + { + name: "step without env gets proxy env block added", + customSteps: "steps:\n- name: Step with no env\n run: echo hello\n", + expectedContains: []string{ + "GH_HOST: localhost:18443", + "GH_REPO: ${{ github.repository }}", + "GITHUB_API_URL: https://localhost:18443/api/v3", + "GITHUB_GRAPHQL_URL: https://localhost:18443/api/graphql", + "NODE_EXTRA_CA_CERTS: /tmp/gh-aw/proxy-logs/proxy-tls/ca.crt", }, - CustomSteps: "steps:\n - name: Fetch\n env:\n GH_TOKEN: ${{ github.token }}\n run: gh issue list", - SandboxConfig: &SandboxConfig{}, - } - c.generateSetGHRepoAfterDIFCProxyStep(&yaml, data) - assert.Empty(t, yaml.String(), "should not generate step without guard policy") - }) - - t.Run("no step when no GH_TOKEN pre-agent steps", func(t *testing.T) { - var yaml strings.Builder - data := &WorkflowData{ - Tools: map[string]any{ - "github": map[string]any{"min-integrity": "approved"}, + desc: "step without env should get proxy env block added", + }, + { + name: "step with existing env preserves existing vars", + customSteps: "steps:\n- name: Step with env\n env:\n GH_TOKEN: ${{ github.token }}\n run: gh issue list\n", + expectedContains: []string{ + "GH_TOKEN: ${{ github.token }}", + "GH_HOST: localhost:18443", + "GH_REPO: ${{ github.repository }}", + "GITHUB_API_URL: https://localhost:18443/api/v3", + "GITHUB_GRAPHQL_URL: https://localhost:18443/api/graphql", + "NODE_EXTRA_CA_CERTS: /tmp/gh-aw/proxy-logs/proxy-tls/ca.crt", }, - SandboxConfig: &SandboxConfig{}, - } - c.generateSetGHRepoAfterDIFCProxyStep(&yaml, data) - assert.Empty(t, yaml.String(), "should not generate step without pre-agent GH_TOKEN steps") - }) - - t.Run("generates set GH_REPO step when guard policy and custom steps with GH_TOKEN", func(t *testing.T) { - var yaml strings.Builder - data := &WorkflowData{ - Tools: map[string]any{ - "github": map[string]any{"min-integrity": "approved"}, + desc: "existing env var GH_TOKEN should be preserved alongside proxy vars", + }, + { + name: "multiple steps each get proxy env", + customSteps: "steps:\n- name: Step 1\n run: echo one\n- name: Step 2\n env:\n MY_VAR: value\n run: echo two\n", + expectedContains: []string{ + "name: Step 1", + "name: Step 2", + "MY_VAR: value", + "GH_HOST: localhost:18443", + "GH_REPO: ${{ github.repository }}", }, - CustomSteps: "steps:\n - name: Fetch\n env:\n GH_TOKEN: ${{ github.token }}\n run: gh issue list", - SandboxConfig: &SandboxConfig{}, - } - c.generateSetGHRepoAfterDIFCProxyStep(&yaml, data) + desc: "all steps should have proxy env injected", + }, + { + name: "uses step gets proxy env", + customSteps: "steps:\n- name: Checkout\n uses: actions/checkout@v4\n with:\n token: ${{ github.token }}\n", + expectedContains: []string{ + "uses: actions/checkout@v4", + "GH_HOST: localhost:18443", + "GH_REPO: ${{ github.repository }}", + }, + desc: "uses: steps should also get proxy env injected", + }, + { + name: "multiline run is preserved", + customSteps: "steps:\n- name: Complex step\n env:\n GH_TOKEN: ${{ github.token }}\n run: |-\n cmd1\n cmd2\n cmd3\n", + expectedContains: []string{ + "cmd1", + "cmd2", + "cmd3", + "GH_TOKEN: ${{ github.token }}", + "GH_HOST: localhost:18443", + }, + desc: "multiline run content should be preserved after injection", + }, + } - result := yaml.String() - require.NotEmpty(t, result, "should generate set GH_REPO step") - assert.Contains(t, result, "Set GH_REPO for proxied steps", "step name should be present") - assert.Contains(t, result, "GH_REPO=${GITHUB_REPOSITORY}", "should set GH_REPO from GITHUB_REPOSITORY") - assert.Contains(t, result, "GITHUB_ENV", "should write to GITHUB_ENV") - assert.NotContains(t, result, "GH_HOST", "should not touch GH_HOST") - }) + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := injectProxyEnvIntoCustomSteps(tt.customSteps) + + if tt.customSteps == "" { + assert.Empty(t, result, "empty input should produce empty output: %s", tt.desc) + return + } + + require.NotEmpty(t, result, "result should not be empty: %s", tt.desc) + + for _, s := range tt.expectedContains { + assert.Contains(t, result, s, "result should contain %q: %s", s, tt.desc) + } + for _, s := range tt.expectedAbsent { + assert.NotContains(t, result, s, "result should NOT contain %q: %s", s, tt.desc) + } + + // Result should still start with "steps:" so addCustomStepsAsIs can process it + assert.True(t, strings.HasPrefix(result, "steps:"), "result should start with 'steps:': %s", tt.desc) + }) + } } // TestBuildStartCliProxyStepYAML verifies that the CLI proxy step always emits diff --git a/pkg/workflow/compiler_yaml_main_job.go b/pkg/workflow/compiler_yaml_main_job.go index 520cd199115..bf9acc9ead0 100644 --- a/pkg/workflow/compiler_yaml_main_job.go +++ b/pkg/workflow/compiler_yaml_main_job.go @@ -208,24 +208,24 @@ func (c *Compiler) generateMainJobSteps(yaml *strings.Builder, data *WorkflowDat // integrity filtering before the agent runs. Must start before custom steps. c.generateStartDIFCProxyStep(yaml, data) - // Set GH_REPO after the proxy starts so gh CLI can resolve the target repository. - // start_difc_proxy.sh writes GH_HOST=localhost:18443 to GITHUB_ENV, which causes gh - // CLI to fail resolving the repository from the git remote. Setting GH_REPO tells gh - // which repo to target while keeping GH_HOST pointed at the proxy for integrity - // filtering. Works on both github.com and GHEC. - c.generateSetGHRepoAfterDIFCProxyStep(yaml, data) - // Add custom steps if present if data.CustomSteps != "" { + // When the DIFC proxy is active, inject proxy routing env vars as step-level env + // on each custom step. Step-level env takes precedence over $GITHUB_ENV without + // mutating it, so GHE host values are preserved for non-proxied steps. + customStepsToEmit := data.CustomSteps + if hasDIFCProxyNeeded(data) { + customStepsToEmit = injectProxyEnvIntoCustomSteps(customStepsToEmit) + } if customStepsContainCheckout && len(runtimeSetupSteps) > 0 { // Custom steps contain checkout and we have runtime steps to insert // Insert runtime steps after the first checkout step compilerYamlLog.Printf("Calling addCustomStepsWithRuntimeInsertion: %d runtime steps to insert after checkout", len(runtimeSetupSteps)) - c.addCustomStepsWithRuntimeInsertion(yaml, data.CustomSteps, runtimeSetupSteps, data.ParsedTools) + c.addCustomStepsWithRuntimeInsertion(yaml, customStepsToEmit, runtimeSetupSteps, data.ParsedTools) } else { // No checkout in custom steps or no runtime steps, just add custom steps as-is compilerYamlLog.Printf("Calling addCustomStepsAsIs (customStepsContainCheckout=%t, runtimeStepsCount=%d)", customStepsContainCheckout, len(runtimeSetupSteps)) - c.addCustomStepsAsIs(yaml, data.CustomSteps) + c.addCustomStepsAsIs(yaml, customStepsToEmit) } }