Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 19 additions & 3 deletions .github/scripts/deps.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,9 @@ tests:
test_bits.py:
- peft

test_lora.py:
- peft

test_mmlupro.py:
- peft

Expand All @@ -62,9 +65,6 @@ tests:
test_evalplus.py:
- evalplus

test_hymba.py:
- causal_conv1d

test_nemotron_ultra.py:
- causal_conv1d

Expand All @@ -77,3 +77,19 @@ tests/models:

test_gemma:
- jieba

test_hymba.py:
- causal_conv1d
- mamba_ssm

test_internlm.py:
- transformers<=4.44.2

test_ovis_1_6_llama.py:
- transformers<=4.44.2

test_phi_4.py:
- peft

test_phi_3_moe.py:
- transformers<=4.44.2
31 changes: 29 additions & 2 deletions .github/workflows/unit_tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -627,15 +627,42 @@ jobs:
fi
if [[ "${{ matrix.test_script }}" == *xpu* ]]; then
export CUDA_VISIBLE_DEVICES=""
# source /etc/profile.d/pyenv.sh && pyenv activate xpu
uv pip uninstall vllm -y
uv pip list
fi


start_monitor() {
echo "start to keep alive..."
while true; do
resp=$(curl -fsSL "http://$XEON5/gpu/keepalive?runid=${{ github.run_id }}&test=${{ matrix.test_script }}&runner=${RUNNER_NAME}&timestamp=$(date +%s)&exclusive=${{ github.event.inputs['exclusive-gpu'] }}&gpu=${CUDA_VISIBLE_DEVICES}" 2>/dev/null || echo "")

if [ "$(echo "$resp" | tr -d '[:space:]')" = "-1" ]; then
echo "Server returned -1, terminating job..."
pkill -9 -f "pytest.*${{ matrix.test_script }}" 2>/dev/null || true
exit 1
else
echo "gpu is kept alive..."
fi

sleep 60
done
}

start_monitor &
MONITOR_PID=$!

cleanup() {
echo "trap cleanup EXIT..."
kill $MONITOR_PID 2>/dev/null || true
wait $MONITOR_PID 2>/dev/null || true
}
trap cleanup EXIT

start_time=$(date +%s)
pytest --durations=0 tests/${{ matrix.test_script }}.py || { echo "ERROR=1" >> $GITHUB_ENV; exit 1; }
execution_time=$(( $(date +%s) - start_time ))
echo "$((execution_time / 60))m $((execution_time % 60))s"

curl "http://$XEON5/gpu/log_test_vram?runid=${{ github.run_id }}&gpu=${{ env.CUDA_VISIBLE_DEVICES }}&range=$execution_time&unit=second&test=${{ matrix.test_script }}"

- name: Release GPU
Expand Down
Loading