ggcristian commited on
Commit
ea7f6d7
·
1 Parent(s): b4bb97f

Change name to NotSoTiny-25-12

Browse files
.DS_Store ADDED
Binary file (6.15 kB). View file
 
app.py CHANGED
@@ -129,7 +129,7 @@ with gr.Blocks(css=custom_css, theme=gr.themes.Default(primary_hue=colors.emeral
129
  # Main view
130
  with gr.Tabs() as tabs:
131
  # Leaderboard
132
- name_main = "Latest Leaderboard"
133
  (
134
  task_radio_main,
135
  benchmark_radio_main,
 
129
  # Main view
130
  with gr.Tabs() as tabs:
131
  # Leaderboard
132
+ name_main = "Leaderboard"
133
  (
134
  task_radio_main,
135
  benchmark_radio_main,
config/constants.py CHANGED
@@ -1,7 +1,7 @@
1
  RESULTS_DIR = "results"
2
  ICARUS_RESULTS = f"{RESULTS_DIR}/results_icarus.json"
3
  VERILATOR_RESULTS = f"{RESULTS_DIR}/results_verilator.json"
4
- YOSYS_RESULTS = f"{RESULTS_DIR}/results_NST.json"
5
  ICARUS_AGG = f"{RESULTS_DIR}/aggregated_scores_icarus.csv"
6
  VERILATOR_AGG = f"{RESULTS_DIR}/aggregated_scores_verilator.csv"
7
 
@@ -19,11 +19,11 @@ DISCARDED_MODELS = {
19
 
20
  }
21
 
22
- TASKS = ["Spec-to-RTL", "Code Completion", "Line Completion *", "Module Completion"]
23
  S2R_BENCHMARKS = ["VerilogEval S2R", "RTLLM"]
24
  CC_BENCHMARKS = ["VerilogEval MC", "VeriGen"]
25
  LC_BENCHMARKS = ["RTL-Repo"]
26
- MC_BENCHMARKS = ["NotSoTiny"]
27
 
28
  MODEL_TYPES = ["All", "General 🟢", "Coding 🔵", "RTL-Specific 🔴"]
29
  TYPE_EMOJI = {"RTL-Specific": "🔴", "General": "🟢", "Coding": "🔵"}
 
1
  RESULTS_DIR = "results"
2
  ICARUS_RESULTS = f"{RESULTS_DIR}/results_icarus.json"
3
  VERILATOR_RESULTS = f"{RESULTS_DIR}/results_verilator.json"
4
+ YOSYS_RESULTS = f"{RESULTS_DIR}/results_NotSoTiny-25-12.json"
5
  ICARUS_AGG = f"{RESULTS_DIR}/aggregated_scores_icarus.csv"
6
  VERILATOR_AGG = f"{RESULTS_DIR}/aggregated_scores_verilator.csv"
7
 
 
19
 
20
  }
21
 
22
+ TASKS = ["Spec-to-RTL", "Code Completion", "Line Completion", "Module Completion"]
23
  S2R_BENCHMARKS = ["VerilogEval S2R", "RTLLM"]
24
  CC_BENCHMARKS = ["VerilogEval MC", "VeriGen"]
25
  LC_BENCHMARKS = ["RTL-Repo"]
26
+ MC_BENCHMARKS = ["NotSoTiny-25-12"]
27
 
28
  MODEL_TYPES = ["All", "General 🟢", "Coding 🔵", "RTL-Specific 🔴"]
29
  TYPE_EMOJI = {"RTL-Specific": "🔴", "General": "🟢", "Coding": "🔵"}
data_processing.py CHANGED
@@ -109,7 +109,7 @@ def filter_leaderboard(task, benchmark, model_type, search_query, max_params, st
109
  return filter_RTLRepo(subset, name=name)
110
  elif benchmark == "RTL-Repo":
111
  return filter_RTLRepo(subset, name=name)
112
- elif benchmark == "NotSoTiny":
113
  return filter_NotSoTiny(subset, name=name)
114
  else:
115
  agg_column = None
 
109
  return filter_RTLRepo(subset, name=name)
110
  elif benchmark == "RTL-Repo":
111
  return filter_RTLRepo(subset, name=name)
112
+ elif benchmark == "NotSoTiny-25-12":
113
  return filter_NotSoTiny(subset, name=name)
114
  else:
115
  agg_column = None
logo_bak.png ADDED

Git LFS Details

  • SHA256: f35b346cfe8c29b4c34d1fc73558e3e34c294da19e28b99aceb7407efa6945e5
  • Pointer size: 130 Bytes
  • Size of remote file: 33.9 kB
results/parse_nst.py CHANGED
@@ -81,7 +81,7 @@ def parse_nst_results(csv_path: str) -> list[dict]:
81
  record = {
82
  "Model": model,
83
  "Model Type": type,
84
- "Benchmark": "NotSoTiny",
85
  "Task": tt_column,
86
  "Result": score,
87
  "Model URL": url,
@@ -105,7 +105,7 @@ def write_json(data: list, path: str):
105
  if __name__ == "__main__":
106
  if len(sys.argv) < 2:
107
  print("Usage: python -m results.parse_nst <path_to_NST.csv>")
108
- print("Example: python -m results.parse_nst results/results_NST.csv")
109
  sys.exit(1)
110
 
111
  csv_path = sys.argv[1]
 
81
  record = {
82
  "Model": model,
83
  "Model Type": type,
84
+ "Benchmark": "NotSoTiny-25-12",
85
  "Task": tt_column,
86
  "Result": score,
87
  "Model URL": url,
 
105
  if __name__ == "__main__":
106
  if len(sys.argv) < 2:
107
  print("Usage: python -m results.parse_nst <path_to_NST.csv>")
108
+ print("Example: python -m results.parse_nst results/results_NST-YY-MM.csv")
109
  sys.exit(1)
110
 
111
  csv_path = sys.argv[1]
results/results_NotSoTiny-25-12.csv ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ,TT06,TT07,TT08,TT09,TT10 IHP 02,TT10 IHP 25a,TTSky25a
2
+ Kimi-K2-Instruct-0905,12.96,15.59,20.92,21.84,23.33,19.57,18.82
3
+ DeepSeek-R1-0528 ,14.63,20.56,19.69,23.84,24.67,20.65,21.07
4
+ Qwen3-Coder-480B-A35B-Instruct ,17.96,28.36,22.04,28.32,20.00,28.15,19.17
5
+ gpt-oss-120b,17.41,19.21,19.80,31.84,23.33,18.15,16.57
6
+ Qwen2.5-72B-Instruct,11.67,12.77,5.61,22.40,22.67,15.11,12.66
7
+ Qwen2.5-Coder-32B-Instruct,7.78,10.17,5.92,17.52,13.33,15.43,12.78
8
+ Qwen2.5-14B-Instruct-1M,9.81,15.71,15.51,15.92,11.33,12.07,7.10
9
+ Qwen2.5-14B-Instruct,5.56,12.88,4.69,15.04,8.00,9.02,7.57
10
+ Qwen2.5-7B-Instruct,0.74,6.21,2.86,3.84,7.33,1.74,1.66
11
+ HaVen-CodeQwen,2.78,3.39,1.73,7.20,10.67,0.43,3.20
12
+ OriGen,4.07,2.71,1.02,5.84,7.33,2.17,0.00
13
+
results/results_NotSoTiny-25-12.json ADDED
@@ -0,0 +1,849 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "Model": "Kimi K2 Instruct 0905",
4
+ "Model Type": "General",
5
+ "Benchmark": "NotSoTiny-25-12",
6
+ "Task": "TT06",
7
+ "Result": 12.96,
8
+ "Model URL": "https://huggingface.co/moonshotai/Kimi-K2-Instruct-0905",
9
+ "Params": 1000,
10
+ "Release": "V4",
11
+ "Thinking": "Dense"
12
+ },
13
+ {
14
+ "Model": "Kimi K2 Instruct 0905",
15
+ "Model Type": "General",
16
+ "Benchmark": "NotSoTiny-25-12",
17
+ "Task": "TT07",
18
+ "Result": 15.59,
19
+ "Model URL": "https://huggingface.co/moonshotai/Kimi-K2-Instruct-0905",
20
+ "Params": 1000,
21
+ "Release": "V4",
22
+ "Thinking": "Dense"
23
+ },
24
+ {
25
+ "Model": "Kimi K2 Instruct 0905",
26
+ "Model Type": "General",
27
+ "Benchmark": "NotSoTiny-25-12",
28
+ "Task": "TT08",
29
+ "Result": 20.92,
30
+ "Model URL": "https://huggingface.co/moonshotai/Kimi-K2-Instruct-0905",
31
+ "Params": 1000,
32
+ "Release": "V4",
33
+ "Thinking": "Dense"
34
+ },
35
+ {
36
+ "Model": "Kimi K2 Instruct 0905",
37
+ "Model Type": "General",
38
+ "Benchmark": "NotSoTiny-25-12",
39
+ "Task": "TT09",
40
+ "Result": 21.84,
41
+ "Model URL": "https://huggingface.co/moonshotai/Kimi-K2-Instruct-0905",
42
+ "Params": 1000,
43
+ "Release": "V4",
44
+ "Thinking": "Dense"
45
+ },
46
+ {
47
+ "Model": "Kimi K2 Instruct 0905",
48
+ "Model Type": "General",
49
+ "Benchmark": "NotSoTiny-25-12",
50
+ "Task": "TT10 IHP 02",
51
+ "Result": 23.33,
52
+ "Model URL": "https://huggingface.co/moonshotai/Kimi-K2-Instruct-0905",
53
+ "Params": 1000,
54
+ "Release": "V4",
55
+ "Thinking": "Dense"
56
+ },
57
+ {
58
+ "Model": "Kimi K2 Instruct 0905",
59
+ "Model Type": "General",
60
+ "Benchmark": "NotSoTiny-25-12",
61
+ "Task": "TT10 IHP 25a",
62
+ "Result": 19.57,
63
+ "Model URL": "https://huggingface.co/moonshotai/Kimi-K2-Instruct-0905",
64
+ "Params": 1000,
65
+ "Release": "V4",
66
+ "Thinking": "Dense"
67
+ },
68
+ {
69
+ "Model": "Kimi K2 Instruct 0905",
70
+ "Model Type": "General",
71
+ "Benchmark": "NotSoTiny-25-12",
72
+ "Task": "TTSky25a",
73
+ "Result": 18.82,
74
+ "Model URL": "https://huggingface.co/moonshotai/Kimi-K2-Instruct-0905",
75
+ "Params": 1000,
76
+ "Release": "V4",
77
+ "Thinking": "Dense"
78
+ },
79
+ {
80
+ "Model": "DeepSeek R1-0528",
81
+ "Model Type": "General",
82
+ "Benchmark": "NotSoTiny-25-12",
83
+ "Task": "TT06",
84
+ "Result": 14.63,
85
+ "Model URL": "https://huggingface.co/deepseek-ai/DeepSeek-R1-0528",
86
+ "Params": 685,
87
+ "Release": "V2",
88
+ "Thinking": "Reasoning"
89
+ },
90
+ {
91
+ "Model": "DeepSeek R1-0528",
92
+ "Model Type": "General",
93
+ "Benchmark": "NotSoTiny-25-12",
94
+ "Task": "TT07",
95
+ "Result": 20.56,
96
+ "Model URL": "https://huggingface.co/deepseek-ai/DeepSeek-R1-0528",
97
+ "Params": 685,
98
+ "Release": "V2",
99
+ "Thinking": "Reasoning"
100
+ },
101
+ {
102
+ "Model": "DeepSeek R1-0528",
103
+ "Model Type": "General",
104
+ "Benchmark": "NotSoTiny-25-12",
105
+ "Task": "TT08",
106
+ "Result": 19.69,
107
+ "Model URL": "https://huggingface.co/deepseek-ai/DeepSeek-R1-0528",
108
+ "Params": 685,
109
+ "Release": "V2",
110
+ "Thinking": "Reasoning"
111
+ },
112
+ {
113
+ "Model": "DeepSeek R1-0528",
114
+ "Model Type": "General",
115
+ "Benchmark": "NotSoTiny-25-12",
116
+ "Task": "TT09",
117
+ "Result": 23.84,
118
+ "Model URL": "https://huggingface.co/deepseek-ai/DeepSeek-R1-0528",
119
+ "Params": 685,
120
+ "Release": "V2",
121
+ "Thinking": "Reasoning"
122
+ },
123
+ {
124
+ "Model": "DeepSeek R1-0528",
125
+ "Model Type": "General",
126
+ "Benchmark": "NotSoTiny-25-12",
127
+ "Task": "TT10 IHP 02",
128
+ "Result": 24.67,
129
+ "Model URL": "https://huggingface.co/deepseek-ai/DeepSeek-R1-0528",
130
+ "Params": 685,
131
+ "Release": "V2",
132
+ "Thinking": "Reasoning"
133
+ },
134
+ {
135
+ "Model": "DeepSeek R1-0528",
136
+ "Model Type": "General",
137
+ "Benchmark": "NotSoTiny-25-12",
138
+ "Task": "TT10 IHP 25a",
139
+ "Result": 20.65,
140
+ "Model URL": "https://huggingface.co/deepseek-ai/DeepSeek-R1-0528",
141
+ "Params": 685,
142
+ "Release": "V2",
143
+ "Thinking": "Reasoning"
144
+ },
145
+ {
146
+ "Model": "DeepSeek R1-0528",
147
+ "Model Type": "General",
148
+ "Benchmark": "NotSoTiny-25-12",
149
+ "Task": "TTSky25a",
150
+ "Result": 21.07,
151
+ "Model URL": "https://huggingface.co/deepseek-ai/DeepSeek-R1-0528",
152
+ "Params": 685,
153
+ "Release": "V2",
154
+ "Thinking": "Reasoning"
155
+ },
156
+ {
157
+ "Model": "Qwen3 Coder 480B A35B",
158
+ "Model Type": "Coding",
159
+ "Benchmark": "NotSoTiny-25-12",
160
+ "Task": "TT06",
161
+ "Result": 17.96,
162
+ "Model URL": "https://huggingface.co/Qwen/Qwen3-Coder-480B-A35B-Instruct",
163
+ "Params": 480,
164
+ "Release": "V2",
165
+ "Thinking": "Dense"
166
+ },
167
+ {
168
+ "Model": "Qwen3 Coder 480B A35B",
169
+ "Model Type": "Coding",
170
+ "Benchmark": "NotSoTiny-25-12",
171
+ "Task": "TT07",
172
+ "Result": 28.36,
173
+ "Model URL": "https://huggingface.co/Qwen/Qwen3-Coder-480B-A35B-Instruct",
174
+ "Params": 480,
175
+ "Release": "V2",
176
+ "Thinking": "Dense"
177
+ },
178
+ {
179
+ "Model": "Qwen3 Coder 480B A35B",
180
+ "Model Type": "Coding",
181
+ "Benchmark": "NotSoTiny-25-12",
182
+ "Task": "TT08",
183
+ "Result": 22.04,
184
+ "Model URL": "https://huggingface.co/Qwen/Qwen3-Coder-480B-A35B-Instruct",
185
+ "Params": 480,
186
+ "Release": "V2",
187
+ "Thinking": "Dense"
188
+ },
189
+ {
190
+ "Model": "Qwen3 Coder 480B A35B",
191
+ "Model Type": "Coding",
192
+ "Benchmark": "NotSoTiny-25-12",
193
+ "Task": "TT09",
194
+ "Result": 28.32,
195
+ "Model URL": "https://huggingface.co/Qwen/Qwen3-Coder-480B-A35B-Instruct",
196
+ "Params": 480,
197
+ "Release": "V2",
198
+ "Thinking": "Dense"
199
+ },
200
+ {
201
+ "Model": "Qwen3 Coder 480B A35B",
202
+ "Model Type": "Coding",
203
+ "Benchmark": "NotSoTiny-25-12",
204
+ "Task": "TT10 IHP 02",
205
+ "Result": 20.0,
206
+ "Model URL": "https://huggingface.co/Qwen/Qwen3-Coder-480B-A35B-Instruct",
207
+ "Params": 480,
208
+ "Release": "V2",
209
+ "Thinking": "Dense"
210
+ },
211
+ {
212
+ "Model": "Qwen3 Coder 480B A35B",
213
+ "Model Type": "Coding",
214
+ "Benchmark": "NotSoTiny-25-12",
215
+ "Task": "TT10 IHP 25a",
216
+ "Result": 28.15,
217
+ "Model URL": "https://huggingface.co/Qwen/Qwen3-Coder-480B-A35B-Instruct",
218
+ "Params": 480,
219
+ "Release": "V2",
220
+ "Thinking": "Dense"
221
+ },
222
+ {
223
+ "Model": "Qwen3 Coder 480B A35B",
224
+ "Model Type": "Coding",
225
+ "Benchmark": "NotSoTiny-25-12",
226
+ "Task": "TTSky25a",
227
+ "Result": 19.17,
228
+ "Model URL": "https://huggingface.co/Qwen/Qwen3-Coder-480B-A35B-Instruct",
229
+ "Params": 480,
230
+ "Release": "V2",
231
+ "Thinking": "Dense"
232
+ },
233
+ {
234
+ "Model": "gpt-oss-120b",
235
+ "Model Type": "General",
236
+ "Benchmark": "NotSoTiny-25-12",
237
+ "Task": "TT06",
238
+ "Result": 17.41,
239
+ "Model URL": "https://huggingface.co/openai/gpt-oss-120b",
240
+ "Params": 120,
241
+ "Release": "V3",
242
+ "Thinking": "Reasoning"
243
+ },
244
+ {
245
+ "Model": "gpt-oss-120b",
246
+ "Model Type": "General",
247
+ "Benchmark": "NotSoTiny-25-12",
248
+ "Task": "TT07",
249
+ "Result": 19.21,
250
+ "Model URL": "https://huggingface.co/openai/gpt-oss-120b",
251
+ "Params": 120,
252
+ "Release": "V3",
253
+ "Thinking": "Reasoning"
254
+ },
255
+ {
256
+ "Model": "gpt-oss-120b",
257
+ "Model Type": "General",
258
+ "Benchmark": "NotSoTiny-25-12",
259
+ "Task": "TT08",
260
+ "Result": 19.8,
261
+ "Model URL": "https://huggingface.co/openai/gpt-oss-120b",
262
+ "Params": 120,
263
+ "Release": "V3",
264
+ "Thinking": "Reasoning"
265
+ },
266
+ {
267
+ "Model": "gpt-oss-120b",
268
+ "Model Type": "General",
269
+ "Benchmark": "NotSoTiny-25-12",
270
+ "Task": "TT09",
271
+ "Result": 31.84,
272
+ "Model URL": "https://huggingface.co/openai/gpt-oss-120b",
273
+ "Params": 120,
274
+ "Release": "V3",
275
+ "Thinking": "Reasoning"
276
+ },
277
+ {
278
+ "Model": "gpt-oss-120b",
279
+ "Model Type": "General",
280
+ "Benchmark": "NotSoTiny-25-12",
281
+ "Task": "TT10 IHP 02",
282
+ "Result": 23.33,
283
+ "Model URL": "https://huggingface.co/openai/gpt-oss-120b",
284
+ "Params": 120,
285
+ "Release": "V3",
286
+ "Thinking": "Reasoning"
287
+ },
288
+ {
289
+ "Model": "gpt-oss-120b",
290
+ "Model Type": "General",
291
+ "Benchmark": "NotSoTiny-25-12",
292
+ "Task": "TT10 IHP 25a",
293
+ "Result": 18.15,
294
+ "Model URL": "https://huggingface.co/openai/gpt-oss-120b",
295
+ "Params": 120,
296
+ "Release": "V3",
297
+ "Thinking": "Reasoning"
298
+ },
299
+ {
300
+ "Model": "gpt-oss-120b",
301
+ "Model Type": "General",
302
+ "Benchmark": "NotSoTiny-25-12",
303
+ "Task": "TTSky25a",
304
+ "Result": 16.57,
305
+ "Model URL": "https://huggingface.co/openai/gpt-oss-120b",
306
+ "Params": 120,
307
+ "Release": "V3",
308
+ "Thinking": "Reasoning"
309
+ },
310
+ {
311
+ "Model": "Qwen2.5 72B",
312
+ "Model Type": "General",
313
+ "Benchmark": "NotSoTiny-25-12",
314
+ "Task": "TT06",
315
+ "Result": 11.67,
316
+ "Model URL": "https://huggingface.co/Qwen/Qwen2.5-72B-Instruct",
317
+ "Params": 72.7,
318
+ "Release": "V1",
319
+ "Thinking": "Dense"
320
+ },
321
+ {
322
+ "Model": "Qwen2.5 72B",
323
+ "Model Type": "General",
324
+ "Benchmark": "NotSoTiny-25-12",
325
+ "Task": "TT07",
326
+ "Result": 12.77,
327
+ "Model URL": "https://huggingface.co/Qwen/Qwen2.5-72B-Instruct",
328
+ "Params": 72.7,
329
+ "Release": "V1",
330
+ "Thinking": "Dense"
331
+ },
332
+ {
333
+ "Model": "Qwen2.5 72B",
334
+ "Model Type": "General",
335
+ "Benchmark": "NotSoTiny-25-12",
336
+ "Task": "TT08",
337
+ "Result": 5.61,
338
+ "Model URL": "https://huggingface.co/Qwen/Qwen2.5-72B-Instruct",
339
+ "Params": 72.7,
340
+ "Release": "V1",
341
+ "Thinking": "Dense"
342
+ },
343
+ {
344
+ "Model": "Qwen2.5 72B",
345
+ "Model Type": "General",
346
+ "Benchmark": "NotSoTiny-25-12",
347
+ "Task": "TT09",
348
+ "Result": 22.4,
349
+ "Model URL": "https://huggingface.co/Qwen/Qwen2.5-72B-Instruct",
350
+ "Params": 72.7,
351
+ "Release": "V1",
352
+ "Thinking": "Dense"
353
+ },
354
+ {
355
+ "Model": "Qwen2.5 72B",
356
+ "Model Type": "General",
357
+ "Benchmark": "NotSoTiny-25-12",
358
+ "Task": "TT10 IHP 02",
359
+ "Result": 22.67,
360
+ "Model URL": "https://huggingface.co/Qwen/Qwen2.5-72B-Instruct",
361
+ "Params": 72.7,
362
+ "Release": "V1",
363
+ "Thinking": "Dense"
364
+ },
365
+ {
366
+ "Model": "Qwen2.5 72B",
367
+ "Model Type": "General",
368
+ "Benchmark": "NotSoTiny-25-12",
369
+ "Task": "TT10 IHP 25a",
370
+ "Result": 15.11,
371
+ "Model URL": "https://huggingface.co/Qwen/Qwen2.5-72B-Instruct",
372
+ "Params": 72.7,
373
+ "Release": "V1",
374
+ "Thinking": "Dense"
375
+ },
376
+ {
377
+ "Model": "Qwen2.5 72B",
378
+ "Model Type": "General",
379
+ "Benchmark": "NotSoTiny-25-12",
380
+ "Task": "TTSky25a",
381
+ "Result": 12.66,
382
+ "Model URL": "https://huggingface.co/Qwen/Qwen2.5-72B-Instruct",
383
+ "Params": 72.7,
384
+ "Release": "V1",
385
+ "Thinking": "Dense"
386
+ },
387
+ {
388
+ "Model": "QwenCoder 2.5 32B",
389
+ "Model Type": "Coding",
390
+ "Benchmark": "NotSoTiny-25-12",
391
+ "Task": "TT06",
392
+ "Result": 7.78,
393
+ "Model URL": "https://huggingface.co/Qwen/Qwen2.5-Coder-32B-Instruct",
394
+ "Params": 32.5,
395
+ "Release": "V1",
396
+ "Thinking": "Dense"
397
+ },
398
+ {
399
+ "Model": "QwenCoder 2.5 32B",
400
+ "Model Type": "Coding",
401
+ "Benchmark": "NotSoTiny-25-12",
402
+ "Task": "TT07",
403
+ "Result": 10.17,
404
+ "Model URL": "https://huggingface.co/Qwen/Qwen2.5-Coder-32B-Instruct",
405
+ "Params": 32.5,
406
+ "Release": "V1",
407
+ "Thinking": "Dense"
408
+ },
409
+ {
410
+ "Model": "QwenCoder 2.5 32B",
411
+ "Model Type": "Coding",
412
+ "Benchmark": "NotSoTiny-25-12",
413
+ "Task": "TT08",
414
+ "Result": 5.92,
415
+ "Model URL": "https://huggingface.co/Qwen/Qwen2.5-Coder-32B-Instruct",
416
+ "Params": 32.5,
417
+ "Release": "V1",
418
+ "Thinking": "Dense"
419
+ },
420
+ {
421
+ "Model": "QwenCoder 2.5 32B",
422
+ "Model Type": "Coding",
423
+ "Benchmark": "NotSoTiny-25-12",
424
+ "Task": "TT09",
425
+ "Result": 17.52,
426
+ "Model URL": "https://huggingface.co/Qwen/Qwen2.5-Coder-32B-Instruct",
427
+ "Params": 32.5,
428
+ "Release": "V1",
429
+ "Thinking": "Dense"
430
+ },
431
+ {
432
+ "Model": "QwenCoder 2.5 32B",
433
+ "Model Type": "Coding",
434
+ "Benchmark": "NotSoTiny-25-12",
435
+ "Task": "TT10 IHP 02",
436
+ "Result": 13.33,
437
+ "Model URL": "https://huggingface.co/Qwen/Qwen2.5-Coder-32B-Instruct",
438
+ "Params": 32.5,
439
+ "Release": "V1",
440
+ "Thinking": "Dense"
441
+ },
442
+ {
443
+ "Model": "QwenCoder 2.5 32B",
444
+ "Model Type": "Coding",
445
+ "Benchmark": "NotSoTiny-25-12",
446
+ "Task": "TT10 IHP 25a",
447
+ "Result": 15.43,
448
+ "Model URL": "https://huggingface.co/Qwen/Qwen2.5-Coder-32B-Instruct",
449
+ "Params": 32.5,
450
+ "Release": "V1",
451
+ "Thinking": "Dense"
452
+ },
453
+ {
454
+ "Model": "QwenCoder 2.5 32B",
455
+ "Model Type": "Coding",
456
+ "Benchmark": "NotSoTiny-25-12",
457
+ "Task": "TTSky25a",
458
+ "Result": 12.78,
459
+ "Model URL": "https://huggingface.co/Qwen/Qwen2.5-Coder-32B-Instruct",
460
+ "Params": 32.5,
461
+ "Release": "V1",
462
+ "Thinking": "Dense"
463
+ },
464
+ {
465
+ "Model": "Qwen2.5 14B 1M",
466
+ "Model Type": "General",
467
+ "Benchmark": "NotSoTiny-25-12",
468
+ "Task": "TT06",
469
+ "Result": 9.81,
470
+ "Model URL": "https://huggingface.co/Qwen/Qwen2.5-14B-Instruct-1M",
471
+ "Params": 14.8,
472
+ "Release": "V4",
473
+ "Thinking": "Dense"
474
+ },
475
+ {
476
+ "Model": "Qwen2.5 14B 1M",
477
+ "Model Type": "General",
478
+ "Benchmark": "NotSoTiny-25-12",
479
+ "Task": "TT07",
480
+ "Result": 15.71,
481
+ "Model URL": "https://huggingface.co/Qwen/Qwen2.5-14B-Instruct-1M",
482
+ "Params": 14.8,
483
+ "Release": "V4",
484
+ "Thinking": "Dense"
485
+ },
486
+ {
487
+ "Model": "Qwen2.5 14B 1M",
488
+ "Model Type": "General",
489
+ "Benchmark": "NotSoTiny-25-12",
490
+ "Task": "TT08",
491
+ "Result": 15.51,
492
+ "Model URL": "https://huggingface.co/Qwen/Qwen2.5-14B-Instruct-1M",
493
+ "Params": 14.8,
494
+ "Release": "V4",
495
+ "Thinking": "Dense"
496
+ },
497
+ {
498
+ "Model": "Qwen2.5 14B 1M",
499
+ "Model Type": "General",
500
+ "Benchmark": "NotSoTiny-25-12",
501
+ "Task": "TT09",
502
+ "Result": 15.92,
503
+ "Model URL": "https://huggingface.co/Qwen/Qwen2.5-14B-Instruct-1M",
504
+ "Params": 14.8,
505
+ "Release": "V4",
506
+ "Thinking": "Dense"
507
+ },
508
+ {
509
+ "Model": "Qwen2.5 14B 1M",
510
+ "Model Type": "General",
511
+ "Benchmark": "NotSoTiny-25-12",
512
+ "Task": "TT10 IHP 02",
513
+ "Result": 11.33,
514
+ "Model URL": "https://huggingface.co/Qwen/Qwen2.5-14B-Instruct-1M",
515
+ "Params": 14.8,
516
+ "Release": "V4",
517
+ "Thinking": "Dense"
518
+ },
519
+ {
520
+ "Model": "Qwen2.5 14B 1M",
521
+ "Model Type": "General",
522
+ "Benchmark": "NotSoTiny-25-12",
523
+ "Task": "TT10 IHP 25a",
524
+ "Result": 12.07,
525
+ "Model URL": "https://huggingface.co/Qwen/Qwen2.5-14B-Instruct-1M",
526
+ "Params": 14.8,
527
+ "Release": "V4",
528
+ "Thinking": "Dense"
529
+ },
530
+ {
531
+ "Model": "Qwen2.5 14B 1M",
532
+ "Model Type": "General",
533
+ "Benchmark": "NotSoTiny-25-12",
534
+ "Task": "TTSky25a",
535
+ "Result": 7.1,
536
+ "Model URL": "https://huggingface.co/Qwen/Qwen2.5-14B-Instruct-1M",
537
+ "Params": 14.8,
538
+ "Release": "V4",
539
+ "Thinking": "Dense"
540
+ },
541
+ {
542
+ "Model": "Qwen2.5 14B",
543
+ "Model Type": "General",
544
+ "Benchmark": "NotSoTiny-25-12",
545
+ "Task": "TT06",
546
+ "Result": 5.56,
547
+ "Model URL": "https://huggingface.co/Qwen/Qwen2.5-14B-Instruct",
548
+ "Params": 14.8,
549
+ "Release": "V4",
550
+ "Thinking": "Dense"
551
+ },
552
+ {
553
+ "Model": "Qwen2.5 14B",
554
+ "Model Type": "General",
555
+ "Benchmark": "NotSoTiny-25-12",
556
+ "Task": "TT07",
557
+ "Result": 12.88,
558
+ "Model URL": "https://huggingface.co/Qwen/Qwen2.5-14B-Instruct",
559
+ "Params": 14.8,
560
+ "Release": "V4",
561
+ "Thinking": "Dense"
562
+ },
563
+ {
564
+ "Model": "Qwen2.5 14B",
565
+ "Model Type": "General",
566
+ "Benchmark": "NotSoTiny-25-12",
567
+ "Task": "TT08",
568
+ "Result": 4.69,
569
+ "Model URL": "https://huggingface.co/Qwen/Qwen2.5-14B-Instruct",
570
+ "Params": 14.8,
571
+ "Release": "V4",
572
+ "Thinking": "Dense"
573
+ },
574
+ {
575
+ "Model": "Qwen2.5 14B",
576
+ "Model Type": "General",
577
+ "Benchmark": "NotSoTiny-25-12",
578
+ "Task": "TT09",
579
+ "Result": 15.04,
580
+ "Model URL": "https://huggingface.co/Qwen/Qwen2.5-14B-Instruct",
581
+ "Params": 14.8,
582
+ "Release": "V4",
583
+ "Thinking": "Dense"
584
+ },
585
+ {
586
+ "Model": "Qwen2.5 14B",
587
+ "Model Type": "General",
588
+ "Benchmark": "NotSoTiny-25-12",
589
+ "Task": "TT10 IHP 02",
590
+ "Result": 8.0,
591
+ "Model URL": "https://huggingface.co/Qwen/Qwen2.5-14B-Instruct",
592
+ "Params": 14.8,
593
+ "Release": "V4",
594
+ "Thinking": "Dense"
595
+ },
596
+ {
597
+ "Model": "Qwen2.5 14B",
598
+ "Model Type": "General",
599
+ "Benchmark": "NotSoTiny-25-12",
600
+ "Task": "TT10 IHP 25a",
601
+ "Result": 9.02,
602
+ "Model URL": "https://huggingface.co/Qwen/Qwen2.5-14B-Instruct",
603
+ "Params": 14.8,
604
+ "Release": "V4",
605
+ "Thinking": "Dense"
606
+ },
607
+ {
608
+ "Model": "Qwen2.5 14B",
609
+ "Model Type": "General",
610
+ "Benchmark": "NotSoTiny-25-12",
611
+ "Task": "TTSky25a",
612
+ "Result": 7.57,
613
+ "Model URL": "https://huggingface.co/Qwen/Qwen2.5-14B-Instruct",
614
+ "Params": 14.8,
615
+ "Release": "V4",
616
+ "Thinking": "Dense"
617
+ },
618
+ {
619
+ "Model": "Qwen2.5 7B",
620
+ "Model Type": "General",
621
+ "Benchmark": "NotSoTiny-25-12",
622
+ "Task": "TT06",
623
+ "Result": 0.74,
624
+ "Model URL": "https://huggingface.co/Qwen/Qwen2.5-7B-Instruct",
625
+ "Params": 7.61,
626
+ "Release": "V4",
627
+ "Thinking": "Dense"
628
+ },
629
+ {
630
+ "Model": "Qwen2.5 7B",
631
+ "Model Type": "General",
632
+ "Benchmark": "NotSoTiny-25-12",
633
+ "Task": "TT07",
634
+ "Result": 6.21,
635
+ "Model URL": "https://huggingface.co/Qwen/Qwen2.5-7B-Instruct",
636
+ "Params": 7.61,
637
+ "Release": "V4",
638
+ "Thinking": "Dense"
639
+ },
640
+ {
641
+ "Model": "Qwen2.5 7B",
642
+ "Model Type": "General",
643
+ "Benchmark": "NotSoTiny-25-12",
644
+ "Task": "TT08",
645
+ "Result": 2.86,
646
+ "Model URL": "https://huggingface.co/Qwen/Qwen2.5-7B-Instruct",
647
+ "Params": 7.61,
648
+ "Release": "V4",
649
+ "Thinking": "Dense"
650
+ },
651
+ {
652
+ "Model": "Qwen2.5 7B",
653
+ "Model Type": "General",
654
+ "Benchmark": "NotSoTiny-25-12",
655
+ "Task": "TT09",
656
+ "Result": 3.84,
657
+ "Model URL": "https://huggingface.co/Qwen/Qwen2.5-7B-Instruct",
658
+ "Params": 7.61,
659
+ "Release": "V4",
660
+ "Thinking": "Dense"
661
+ },
662
+ {
663
+ "Model": "Qwen2.5 7B",
664
+ "Model Type": "General",
665
+ "Benchmark": "NotSoTiny-25-12",
666
+ "Task": "TT10 IHP 02",
667
+ "Result": 7.33,
668
+ "Model URL": "https://huggingface.co/Qwen/Qwen2.5-7B-Instruct",
669
+ "Params": 7.61,
670
+ "Release": "V4",
671
+ "Thinking": "Dense"
672
+ },
673
+ {
674
+ "Model": "Qwen2.5 7B",
675
+ "Model Type": "General",
676
+ "Benchmark": "NotSoTiny-25-12",
677
+ "Task": "TT10 IHP 25a",
678
+ "Result": 1.74,
679
+ "Model URL": "https://huggingface.co/Qwen/Qwen2.5-7B-Instruct",
680
+ "Params": 7.61,
681
+ "Release": "V4",
682
+ "Thinking": "Dense"
683
+ },
684
+ {
685
+ "Model": "Qwen2.5 7B",
686
+ "Model Type": "General",
687
+ "Benchmark": "NotSoTiny-25-12",
688
+ "Task": "TTSky25a",
689
+ "Result": 1.66,
690
+ "Model URL": "https://huggingface.co/Qwen/Qwen2.5-7B-Instruct",
691
+ "Params": 7.61,
692
+ "Release": "V4",
693
+ "Thinking": "Dense"
694
+ },
695
+ {
696
+ "Model": "HaVen-CodeQwen",
697
+ "Model Type": "RTL-Specific",
698
+ "Benchmark": "NotSoTiny-25-12",
699
+ "Task": "TT06",
700
+ "Result": 2.78,
701
+ "Model URL": "https://huggingface.co/yangyiyao/HaVen-CodeQwen",
702
+ "Params": 7.25,
703
+ "Release": "V1",
704
+ "Thinking": "Dense"
705
+ },
706
+ {
707
+ "Model": "HaVen-CodeQwen",
708
+ "Model Type": "RTL-Specific",
709
+ "Benchmark": "NotSoTiny-25-12",
710
+ "Task": "TT07",
711
+ "Result": 3.39,
712
+ "Model URL": "https://huggingface.co/yangyiyao/HaVen-CodeQwen",
713
+ "Params": 7.25,
714
+ "Release": "V1",
715
+ "Thinking": "Dense"
716
+ },
717
+ {
718
+ "Model": "HaVen-CodeQwen",
719
+ "Model Type": "RTL-Specific",
720
+ "Benchmark": "NotSoTiny-25-12",
721
+ "Task": "TT08",
722
+ "Result": 1.73,
723
+ "Model URL": "https://huggingface.co/yangyiyao/HaVen-CodeQwen",
724
+ "Params": 7.25,
725
+ "Release": "V1",
726
+ "Thinking": "Dense"
727
+ },
728
+ {
729
+ "Model": "HaVen-CodeQwen",
730
+ "Model Type": "RTL-Specific",
731
+ "Benchmark": "NotSoTiny-25-12",
732
+ "Task": "TT09",
733
+ "Result": 7.2,
734
+ "Model URL": "https://huggingface.co/yangyiyao/HaVen-CodeQwen",
735
+ "Params": 7.25,
736
+ "Release": "V1",
737
+ "Thinking": "Dense"
738
+ },
739
+ {
740
+ "Model": "HaVen-CodeQwen",
741
+ "Model Type": "RTL-Specific",
742
+ "Benchmark": "NotSoTiny-25-12",
743
+ "Task": "TT10 IHP 02",
744
+ "Result": 10.67,
745
+ "Model URL": "https://huggingface.co/yangyiyao/HaVen-CodeQwen",
746
+ "Params": 7.25,
747
+ "Release": "V1",
748
+ "Thinking": "Dense"
749
+ },
750
+ {
751
+ "Model": "HaVen-CodeQwen",
752
+ "Model Type": "RTL-Specific",
753
+ "Benchmark": "NotSoTiny-25-12",
754
+ "Task": "TT10 IHP 25a",
755
+ "Result": 0.43,
756
+ "Model URL": "https://huggingface.co/yangyiyao/HaVen-CodeQwen",
757
+ "Params": 7.25,
758
+ "Release": "V1",
759
+ "Thinking": "Dense"
760
+ },
761
+ {
762
+ "Model": "HaVen-CodeQwen",
763
+ "Model Type": "RTL-Specific",
764
+ "Benchmark": "NotSoTiny-25-12",
765
+ "Task": "TTSky25a",
766
+ "Result": 3.2,
767
+ "Model URL": "https://huggingface.co/yangyiyao/HaVen-CodeQwen",
768
+ "Params": 7.25,
769
+ "Release": "V1",
770
+ "Thinking": "Dense"
771
+ },
772
+ {
773
+ "Model": "OriGen",
774
+ "Model Type": "RTL-Specific",
775
+ "Benchmark": "NotSoTiny-25-12",
776
+ "Task": "TT06",
777
+ "Result": 4.07,
778
+ "Model URL": "https://huggingface.co/henryen/OriGen",
779
+ "Params": 6.74,
780
+ "Release": "V1",
781
+ "Thinking": "Dense"
782
+ },
783
+ {
784
+ "Model": "OriGen",
785
+ "Model Type": "RTL-Specific",
786
+ "Benchmark": "NotSoTiny-25-12",
787
+ "Task": "TT07",
788
+ "Result": 2.71,
789
+ "Model URL": "https://huggingface.co/henryen/OriGen",
790
+ "Params": 6.74,
791
+ "Release": "V1",
792
+ "Thinking": "Dense"
793
+ },
794
+ {
795
+ "Model": "OriGen",
796
+ "Model Type": "RTL-Specific",
797
+ "Benchmark": "NotSoTiny-25-12",
798
+ "Task": "TT08",
799
+ "Result": 1.02,
800
+ "Model URL": "https://huggingface.co/henryen/OriGen",
801
+ "Params": 6.74,
802
+ "Release": "V1",
803
+ "Thinking": "Dense"
804
+ },
805
+ {
806
+ "Model": "OriGen",
807
+ "Model Type": "RTL-Specific",
808
+ "Benchmark": "NotSoTiny-25-12",
809
+ "Task": "TT09",
810
+ "Result": 5.84,
811
+ "Model URL": "https://huggingface.co/henryen/OriGen",
812
+ "Params": 6.74,
813
+ "Release": "V1",
814
+ "Thinking": "Dense"
815
+ },
816
+ {
817
+ "Model": "OriGen",
818
+ "Model Type": "RTL-Specific",
819
+ "Benchmark": "NotSoTiny-25-12",
820
+ "Task": "TT10 IHP 02",
821
+ "Result": 7.33,
822
+ "Model URL": "https://huggingface.co/henryen/OriGen",
823
+ "Params": 6.74,
824
+ "Release": "V1",
825
+ "Thinking": "Dense"
826
+ },
827
+ {
828
+ "Model": "OriGen",
829
+ "Model Type": "RTL-Specific",
830
+ "Benchmark": "NotSoTiny-25-12",
831
+ "Task": "TT10 IHP 25a",
832
+ "Result": 2.17,
833
+ "Model URL": "https://huggingface.co/henryen/OriGen",
834
+ "Params": 6.74,
835
+ "Release": "V1",
836
+ "Thinking": "Dense"
837
+ },
838
+ {
839
+ "Model": "OriGen",
840
+ "Model Type": "RTL-Specific",
841
+ "Benchmark": "NotSoTiny-25-12",
842
+ "Task": "TTSky25a",
843
+ "Result": 0.0,
844
+ "Model URL": "https://huggingface.co/henryen/OriGen",
845
+ "Params": 6.74,
846
+ "Release": "V1",
847
+ "Thinking": "Dense"
848
+ }
849
+ ]
static/html_content.py CHANGED
@@ -86,7 +86,6 @@ LC_FOOTNOTE_HTML = """
86
  <div id="lc-footnote" style="font-size: 13px; opacity: 0.6; margin-top: -5px; z-index:999; text-align: left;">
87
  <span style="font-weight: 600; opacity: 1;">*</span>
88
  <em>Line Completion</em> excludes "reasoning" models since this task targets quick auto-completion<br/>
89
- Additionally, for <em>Line Completion</em> and <em>Code Completion</em> benchmarks we use <b>Base</b> model variant (if available), and for <em>Spec-to-RTL</em> we use <b>Instruct</b> model variant
90
  </div>
91
  """
92
 
 
86
  <div id="lc-footnote" style="font-size: 13px; opacity: 0.6; margin-top: -5px; z-index:999; text-align: left;">
87
  <span style="font-weight: 600; opacity: 1;">*</span>
88
  <em>Line Completion</em> excludes "reasoning" models since this task targets quick auto-completion<br/>
 
89
  </div>
90
  """
91
 
turtle_dark.svg ADDED
turtle_light.svg ADDED
uv.lock ADDED
The diff for this file is too large to render. See raw diff