File tree Expand file tree Collapse file tree 1 file changed +2
-2
lines changed
Expand file tree Collapse file tree 1 file changed +2
-2
lines changed Original file line number Diff line number Diff line change 3131# quant.h as a single translation unit — no sync issues.
3232# Phi-3.5: ~1.15 tok/s (CPU NEON), ~6.5 tok/s reported in PR #79.
3333# Q8_0 is 2x faster than Q4_K_M on NEON (simpler dequant, 3.0 vs 1.5 tok/s).
34- DEFAULT_MODEL = REPO / "models" / "Qwen3 .5-4B-Q4_K_M .gguf"
34+ DEFAULT_MODEL = REPO / "models" / "Phi-3 .5-mini-instruct-Q8_0 .gguf"
3535DEFAULT_SERVER_BINARY = REPO / "build_metal" / "quant-server-unified"
3636DEFAULT_SERVER_HOST = "127.0.0.1"
3737DEFAULT_SERVER_PORT = 8421 # arbitrary, avoid conflicts with 8080
4444CLIFF_BUDGET = {
4545 "models/Llama-3.2-3B-Instruct-Q8_0.gguf" : 1024 ,
4646 "models/Llama-3.2-1B-Instruct-Q8_0.gguf" : 512 ,
47- "models/Qwen3 .5-4B-Q4_K_M .gguf" : 1024 ,
47+ "models/Phi-3 .5-mini-instruct-Q8_0 .gguf" : 1024 ,
4848 "models/Phi-3.5-mini-instruct-Q4_K_M.gguf" : 1024 ,
4949}
5050
You can’t perform that action at this time.
0 commit comments