88 " Coding" ,
99 " Agent"
1010 ],
11- "lastUpdated" : " 2026-01-12 " ,
11+ "lastUpdated" : " 2026-03-17 " ,
1212 "metrics" : {
1313 "unit" : " % Resolved" ,
1414 "isBetterHigher" : true
1919 },
2020 "snapshot" : [
2121 {
22- "modelRef" : " anthropic/claude-opus- 4-5-thinking " ,
23- "score" : 74.4
22+ "modelRef" : " anthropic/claude-4-5-opus-high-reasoning " ,
23+ "score" : 76.8
2424 },
2525 {
26- "modelRef" : " google/gemini-3-pro" ,
27- "score" : 74.2
28- },
29- {
30- "modelRef" : " openai/gpt-5-2" ,
31- "score" : 71.8
32- },
33- {
34- "modelRef" : " anthropic/claude-4-5-sonnet" ,
35- "score" : 70.6
36- },
37- {
38- "modelRef" : " openai/gpt-5-2" ,
39- "score" : 69.0
40- },
41- {
42- "modelRef" : " anthropic/claude-4-opus" ,
43- "score" : 67.6
44- },
45- {
46- "modelRef" : " openai/gpt-5-1-codex" ,
47- "score" : 66.0
48- },
49- {
50- "modelRef" : " openai/gpt-5-1" ,
51- "score" : 66.0
52- },
53- {
54- "modelRef" : " openai/gpt-5-medium" ,
55- "score" : 65.0
56- },
57- {
58- "modelRef" : " anthropic/claude-4-sonnet" ,
59- "score" : 64.93
60- },
61- {
62- "modelRef" : " kimi/kimi-k2-thinking" ,
63- "score" : 63.4
64- },
65- {
66- "modelRef" : " minimax/minimax-m2" ,
67- "score" : 61.0
26+ "modelRef" : " google/gemini-3-flash-high-reasoning" ,
27+ "score" : 75.8
6828 },
6929 {
70- "modelRef" : " deepseek/deepseek-v3-2 -reasoning" ,
71- "score" : 60.0
30+ "modelRef" : " minimax/minimax-m2-5-high -reasoning" ,
31+ "score" : 75.8
7232 },
7333 {
74- "modelRef" : " openai/gpt-5-mini-medium " ,
75- "score" : 59.8
34+ "modelRef" : " anthropic/claude-opus-4-6 " ,
35+ "score" : 75.6
7636 },
7737 {
78- "modelRef" : " openai/o3 " ,
79- "score" : 58.4
38+ "modelRef" : " openai/gpt-5-2-codex " ,
39+ "score" : 72.8
8040 },
8141 {
82- "modelRef" : " mistral/devstral-small " ,
83- "score" : 56.4
42+ "modelRef" : " z-ai/glm-5-high-reasoning " ,
43+ "score" : 72.8
8444 },
8545 {
86- "modelRef" : " alibaba/qwen3-coder-480b-a35b-instruct" ,
87- "score" : 55.4
88- },
89- {
90- "modelRef" : " zai/glm-4-6" ,
91- "score" : 55.4
92- },
93- {
94- "modelRef" : " zai/glm-4-5" ,
95- "score" : 54.2
96- },
97- {
98- "modelRef" : " mistral/devstral-2" ,
99- "score" : 53.8
100- },
101- {
102- "modelRef" : " google/gemini-2-5-pro" ,
103- "score" : 53.6
104- },
105- {
106- "modelRef" : " anthropic/claude-3-7-sonnet" ,
107- "score" : 52.8
108- },
109- {
110- "modelRef" : " openai/o4-mini" ,
111- "score" : 45.0
112- },
113- {
114- "modelRef" : " kimi/kimi-k2-0905" ,
115- "score" : 43.8
116- },
117- {
118- "modelRef" : " openai/gpt-4-1" ,
119- "score" : 39.58
120- },
121- {
122- "modelRef" : " openai/gpt-5-nano-medium" ,
123- "score" : 34.8
124- },
125- {
126- "modelRef" : " google/gemini-2-5-flash" ,
127- "score" : 28.73
46+ "modelRef" : " openai/gpt-5-2" ,
47+ "score" : 72.8
12848 },
12949 {
130- "modelRef" : " openai/gpt-oss-120b " ,
131- "score" : 26.0
50+ "modelRef" : " openai/gpt-5-2-codex " ,
51+ "score" : 72.8
13252 },
13353 {
134- "modelRef" : " openai/gpt -4-1-mini " ,
135- "score" : 23.94
54+ "modelRef" : " anthropic/claude -4-5-sonnet-high-reasoning " ,
55+ "score" : 71.4
13656 },
13757 {
138- "modelRef" : " openai/gpt-4o " ,
139- "score" : 21.62
58+ "modelRef" : " kimi/kimi-k2-5-high-reasoning " ,
59+ "score" : 70.8
14060 },
14161 {
142- "modelRef" : " meta/llama-4-maverick " ,
143- "score" : 21.04
62+ "modelRef" : " deepseek/deepseek-v3-2-high-reasoning " ,
63+ "score" : 70.0
14464 },
14565 {
146- "modelRef" : " google/gemini-2-0-flash " ,
147- "score" : 13.52
66+ "modelRef" : " google/gemini-3-pro " ,
67+ "score" : 69.6
14868 },
14969 {
150- "modelRef" : " meta/llama -4-scout " ,
151- "score" : 9.06
70+ "modelRef" : " anthropic/claude -4-5-haiku-high-reasoning " ,
71+ "score" : 66.6
15272 },
15373 {
154- "modelRef" : " alibaba/qwen2 -5-coder-32b-instruct " ,
155- "score" : 9.0
74+ "modelRef" : " openai/gpt -5-mini " ,
75+ "score" : 56.2
15676 }
15777 ]
15878}
0 commit comments