TuRTLe-Leaderboard

Running

ggcristian commited on 1 day ago

Commit

a13918e

1 Parent(s): 86a28a0

Remove \cross for an \ast

Files changed (4) hide show

config/constants.py CHANGED Viewed

@@ -18,7 +18,7 @@ DISCARDED_MODELS = {
 }
-TASKS = ["Spec-to-RTL", "Code Completion", "Line Completion †", "Module Completion"]
 S2R_BENCHMARKS = ["VerilogEval S2R", "RTLLM"]
 CC_BENCHMARKS = ["VerilogEval MC", "VeriGen"]
 LC_BENCHMARKS = ["RTL-Repo"]

 }
+TASKS = ["Spec-to-RTL", "Code Completion", "Line Completion *", "Module Completion"]
 S2R_BENCHMARKS = ["VerilogEval S2R", "RTLLM"]
 CC_BENCHMARKS = ["VerilogEval MC", "VeriGen"]
 LC_BENCHMARKS = ["RTL-Repo"]

data_processing.py CHANGED Viewed

@@ -55,7 +55,7 @@ def filter_leaderboard(task, benchmark, model_type, search_query, max_params, st
         valid_benchmarks = CC_BENCHMARKS
         if benchmark == "All":
             subset = subset[subset["Benchmark"].isin(valid_benchmarks)]
-    elif task == "Line Completion †":
         valid_benchmarks = LC_BENCHMARKS
         if benchmark == "All":
             subset = subset[subset["Benchmark"].isin(valid_benchmarks)]
@@ -89,7 +89,7 @@ def filter_leaderboard(task, benchmark, model_type, search_query, max_params, st
             return filter_bench_all(subset, state.get_current_agg(), agg_column="Agg S2R", name=name)
         elif task == "Code Completion":
             return filter_bench_all(subset, state.get_current_agg(), agg_column="Agg MC", name=name)
-        elif task == "Line Completion †":
             return filter_RTLRepo(subset, name=name)
     elif benchmark == "RTL-Repo":
         return filter_RTLRepo(subset, name=name)

         valid_benchmarks = CC_BENCHMARKS
         if benchmark == "All":
             subset = subset[subset["Benchmark"].isin(valid_benchmarks)]
+    elif task == "Line Completion *":
         valid_benchmarks = LC_BENCHMARKS
         if benchmark == "All":
             subset = subset[subset["Benchmark"].isin(valid_benchmarks)]
             return filter_bench_all(subset, state.get_current_agg(), agg_column="Agg S2R", name=name)
         elif task == "Code Completion":
             return filter_bench_all(subset, state.get_current_agg(), agg_column="Agg MC", name=name)
+        elif task == "Line Completion *":
             return filter_RTLRepo(subset, name=name)
     elif benchmark == "RTL-Repo":
         return filter_RTLRepo(subset, name=name)

handlers/leaderboard_handlers.py CHANGED Viewed

@@ -38,7 +38,7 @@ def create_leaderboard_handlers(
         elif task == "Code Completion":
             new_benchmarks = ["All"] + CC_BENCHMARKS
             new_simulators = SIMULATORS
-        elif task == "Line Completion †":
             new_benchmarks = LC_BENCHMARKS
             new_simulators = SIMULATORS
         elif task == "Module Completion":

         elif task == "Code Completion":
             new_benchmarks = ["All"] + CC_BENCHMARKS
             new_simulators = SIMULATORS
+        elif task == "Line Completion *":
             new_benchmarks = LC_BENCHMARKS
             new_simulators = SIMULATORS
         elif task == "Module Completion":

static/html_content.py CHANGED Viewed

@@ -81,7 +81,7 @@ INTRO_HTML = """
 LC_FOOTNOTE_HTML = """
 <div id="lc-footnote" style="font-size: 13px; opacity: 0.6; margin-top: -5px; z-index:999; text-align: left;">
-    <span style="font-weight: 600; opacity: 1;">†</span>
     <em>Line Completion</em> excludes "reasoning" models since this task targets quick auto-completion<br/>
     Additionally, for <em>Line Completion</em> and <em>Code Completion</em> benchmarks we use <b>Base</b> model variant (if available), and for <em>Spec-to-RTL</em> we use <b>Instruct</b> model variant
 </div>

 LC_FOOTNOTE_HTML = """
 <div id="lc-footnote" style="font-size: 13px; opacity: 0.6; margin-top: -5px; z-index:999; text-align: left;">
+    <span style="font-weight: 600; opacity: 1;">*</span>
     <em>Line Completion</em> excludes "reasoning" models since this task targets quick auto-completion<br/>
     Additionally, for <em>Line Completion</em> and <em>Code Completion</em> benchmarks we use <b>Base</b> model variant (if available), and for <em>Spec-to-RTL</em> we use <b>Instruct</b> model variant
 </div>