Skip to content

Commit b7155eb

Browse files
author
cloudforge1
committed
fix: rename benchmark for CI discovery, bump to 10k iterations, env-gate large/bench tests
- Renamed benchmark_ngram_kernel.py → test_benchmark_ngram_kernel.py so pytest discovers it (test_*.py pattern) - Bumped NUM_ITERS 10→10000, WARMUP 2→5 for noise-free profiling - Gated benchmark class with RUN_NGRAM_BENCHMARKS=1 (won't bloat CI) - Gated test_large_batch_long_seq with RUN_LARGE_NGRAM_TESTS=1 (OOM risk) - Gated test_latency with RUN_NGRAM_BENCHMARKS=1 (no assertions)
1 parent 65f609b commit b7155eb

2 files changed

Lines changed: 23 additions & 3 deletions

File tree

tests/spec_decode/benchmark_ngram_kernel.py renamed to tests/spec_decode/test_benchmark_ngram_kernel.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,8 +40,8 @@
4040

4141
MAX_NGRAM_SIZE = 3
4242
MAX_DRAFT_TOKENS = 10
43-
NUM_ITERS = 10
44-
WARMUP = 2
43+
NUM_ITERS = 10000
44+
WARMUP = 5
4545

4646

4747
def _build_data(batch_size, seq_len, hit_type="low_input", seed=42):
@@ -206,6 +206,10 @@ def _print_table(title, header, rows):
206206
print(f"{'=' * 80}")
207207

208208

209+
@unittest.skipUnless(
210+
os.environ.get("RUN_NGRAM_BENCHMARKS", "0") == "1",
211+
"Set RUN_NGRAM_BENCHMARKS=1 to run multi-group profiling (slow)",
212+
)
209213
class TestNgramBenchmarkGroups(unittest.TestCase):
210214
"""Multi-dimension benchmark matching NKNaN's 5-group methodology."""
211215

tests/spec_decode/test_ngram_gpu_kernel.py

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -418,7 +418,12 @@ def test_large_batch_long_seq(self):
418418
419419
Uses high threshold to ensure all batches exercise the parallel search
420420
path (default threshold=128 would skip all batches at bsz=256).
421+
422+
Gated by RUN_LARGE_NGRAM_TESTS=1 to avoid OOM / timeout on
423+
memory-constrained CI nodes.
421424
"""
425+
if not os.environ.get("RUN_LARGE_NGRAM_TESTS", ""):
426+
self.skipTest("Large-scale test skipped. Set RUN_LARGE_NGRAM_TESTS=1 to enable.")
422427
high_threshold = 100000
423428
data = _make_ngram_test_data(batch_size=256, input_len=131072, max_model_len=131072 + 64, seed=77)
424429
cpu_draft = data["draft_tokens"].copy()
@@ -558,7 +563,13 @@ def test_many_short_seqs(self):
558563
np.testing.assert_array_equal(gpu_data["draft_tokens"].numpy(), cpu_draft)
559564

560565
def test_latency(self):
561-
"""Benchmark: GPU kernel latency vs CPU transfer overhead."""
566+
"""Benchmark: GPU kernel latency vs CPU transfer overhead.
567+
568+
Pure benchmark with no assertions — skipped in CI by default.
569+
Set RUN_NGRAM_BENCHMARKS=1 to enable.
570+
"""
571+
if not os.environ.get("RUN_NGRAM_BENCHMARKS", ""):
572+
self.skipTest("Benchmark skipped. Set RUN_NGRAM_BENCHMARKS=1 to enable.")
562573
# Pre-create tensors on GPU (data creation excluded from timing)
563574
gpu_data = _to_gpu(_make_ngram_test_data(batch_size=32, input_len=512, seed=42))
564575
cpu_data = _make_ngram_test_data(batch_size=32, input_len=512, seed=42)
@@ -732,7 +743,12 @@ def test_large_batch_long_seq(self):
732743
733744
Uses high threshold to ensure all batches exercise the parallel search
734745
path (default threshold=1024 would skip many batches at bsz=256).
746+
747+
Gated by RUN_LARGE_NGRAM_TESTS=1 to avoid OOM / timeout on
748+
memory-constrained CI nodes.
735749
"""
750+
if not os.environ.get("RUN_LARGE_NGRAM_TESTS", ""):
751+
self.skipTest("Large-scale test skipped. Set RUN_LARGE_NGRAM_TESTS=1 to enable.")
736752
high_threshold = 100000
737753
data = _make_mixed_test_data(batch_size=256, input_len=131072, pre_ids_len=131072 + 64, seed=77)
738754
cpu_draft = data["draft_tokens"].copy()

0 commit comments

Comments
 (0)