|
7 | 7 | "tags": [ |
8 | 8 | "Coding" |
9 | 9 | ], |
10 | | - "lastUpdated": "2026-02-12", |
| 10 | + "lastUpdated": "2026-02-21", |
11 | 11 | "metrics": { |
12 | 12 | "unit": "Index", |
13 | 13 | "isBetterHigher": true |
|
19 | 19 | "initialWeight": 1000 |
20 | 20 | }, |
21 | 21 | "snapshot": [ |
| 22 | + { |
| 23 | + "modelRef": "google/gemini-3-1-pro-preview", |
| 24 | + "score": 55.5 |
| 25 | + }, |
| 26 | + { |
| 27 | + "modelRef": "anthropic/claude-sonnet-4-6-adaptive", |
| 28 | + "score": 50.9 |
| 29 | + }, |
22 | 30 | { |
23 | 31 | "modelRef": "openai/gpt-5-2", |
24 | 32 | "score": 48.7 |
|
43 | 51 | "modelRef": "google/gemini-3-pro", |
44 | 52 | "score": 46.5 |
45 | 53 | }, |
| 54 | + { |
| 55 | + "modelRef": "anthropic/claude-sonnet-4-6", |
| 56 | + "score": 46.4 |
| 57 | + }, |
46 | 58 | { |
47 | 59 | "modelRef": "openai/gpt-5-1", |
48 | 60 | "score": 44.7 |
|
59 | 71 | "modelRef": "openai/gpt-5-2-codex", |
60 | 72 | "score": 43.0 |
61 | 73 | }, |
| 74 | + { |
| 75 | + "modelRef": "anthropic/claude-sonnet-4-6-non-reasoning-low-effort", |
| 76 | + "score": 43.0 |
| 77 | + }, |
62 | 78 | { |
63 | 79 | "modelRef": "anthropic/claude-opus-4-5", |
64 | 80 | "score": 42.9 |
|
67 | 83 | "modelRef": "google/gemini-3-flash-reasoning", |
68 | 84 | "score": 42.6 |
69 | 85 | }, |
| 86 | + { |
| 87 | + "modelRef": "alibaba/qwen3-5-397b-a17b", |
| 88 | + "score": 41.3 |
| 89 | + }, |
70 | 90 | { |
71 | 91 | "modelRef": "xai/grok-4", |
72 | 92 | "score": 40.5 |
|
79 | 99 | "modelRef": "google/gemini-3-pro-low", |
80 | 100 | "score": 39.4 |
81 | 101 | }, |
| 102 | + { |
| 103 | + "modelRef": "zai/glm-5-non-reasoning", |
| 104 | + "score": 39.0 |
| 105 | + }, |
82 | 106 | { |
83 | 107 | "modelRef": "openai/gpt-5-medium", |
84 | 108 | "score": 39.0 |
|
103 | 127 | "modelRef": "google/gemini-3-flash", |
104 | 128 | "score": 37.8 |
105 | 129 | }, |
| 130 | + { |
| 131 | + "modelRef": "minimax/minimax-m2-5", |
| 132 | + "score": 37.4 |
| 133 | + }, |
| 134 | + { |
| 135 | + "modelRef": "alibaba/qwen3-5-397b-a17b-non-reasoning", |
| 136 | + "score": 37.4 |
| 137 | + }, |
106 | 138 | { |
107 | 139 | "modelRef": "deepseek/deepseek-v3-2-reasoning", |
108 | 140 | "score": 36.7 |
|
1003 | 1035 | "modelRef": "nvidia/nvidia-nemotron-nano-9b-v2", |
1004 | 1036 | "score": 7.5 |
1005 | 1037 | }, |
| 1038 | + { |
| 1039 | + "modelRef": "trillionlabs/tri-21b-think-preview", |
| 1040 | + "score": 7.4 |
| 1041 | + }, |
1006 | 1042 | { |
1007 | 1043 | "modelRef": "google/gemini-2-5-flash-lite", |
1008 | 1044 | "score": 7.4 |
|
1027 | 1063 | "modelRef": "meta/llama-4-scout", |
1028 | 1064 | "score": 6.7 |
1029 | 1065 | }, |
| 1066 | + { |
| 1067 | + "modelRef": "anthropic/claude-3-haiku", |
| 1068 | + "score": 6.7 |
| 1069 | + }, |
1030 | 1070 | { |
1031 | 1071 | "modelRef": "alibaba/qwen3-vl-4b-reasoning", |
1032 | 1072 | "score": 6.7 |
1033 | 1073 | }, |
| 1074 | + { |
| 1075 | + "modelRef": "trillionlabs/tri-21b-think-v0-5", |
| 1076 | + "score": 6.3 |
| 1077 | + }, |
1034 | 1078 | { |
1035 | 1079 | "modelRef": "google/gemma-3-12b", |
1036 | 1080 | "score": 6.3 |
|
1167 | 1211 | "modelRef": "alibaba/qwen3-0-6b-instruct", |
1168 | 1212 | "score": 1.4 |
1169 | 1213 | }, |
| 1214 | + { |
| 1215 | + "modelRef": "cohere/tiny-aya-global", |
| 1216 | + "score": 1.2 |
| 1217 | + }, |
1170 | 1218 | { |
1171 | 1219 | "modelRef": "ai2/olmo-2-7b", |
1172 | 1220 | "score": 1.2 |
|
0 commit comments