davanstrien HF Staff commited on
Commit
8f9e935
Β·
verified Β·
1 Parent(s): 786e808

Upload app.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. app.py +60 -105
app.py CHANGED
@@ -30,6 +30,12 @@ def _(mo):
30
  |----------|-------------|
31
  | **VLM** | Send the cover image directly to a Vision-Language Model |
32
  | **Text** | Extract text from image first (OCR), then send to an LLM |
 
 
 
 
 
 
33
  """
34
  )
35
  return
@@ -46,7 +52,6 @@ def _():
46
  @app.cell
47
  def _(evals_df, mo):
48
  # Load evaluation results with persistent caching
49
- # First run downloads ~180MB, subsequent runs load from disk cache
50
  with mo.persistent_cache(name="doab_evals"):
51
  df_raw = evals_df("hf://datasets/davanstrien/doab-title-extraction-evals", quiet=True)
52
 
@@ -66,7 +71,7 @@ def _(evals_df, mo):
66
  # Convert score to percentage
67
  df_raw["accuracy"] = df_raw["score_headline_value"] * 100
68
 
69
- # Parameter sizes and URLs (manual mapping)
70
  model_info = {
71
  "hf-inference-providers/Qwen/Qwen3-VL-8B-Instruct": {
72
  "params": 8,
@@ -101,33 +106,22 @@ def _(evals_df, mo):
101
 
102
 
103
  @app.cell
104
- def _(df_raw, mo):
105
- # Task selector
106
- task_selector = mo.ui.dropdown(
107
- options=["Title Extraction", "Full Metadata"],
108
- value="Title Extraction",
109
- label="Select task",
110
- )
111
- return (task_selector,)
112
-
113
-
114
- @app.cell
115
- def _(df_raw, mo, task_selector):
116
- # Filter by selected task
117
- df = df_raw[df_raw["task_category"] == task_selector.value].copy()
118
 
119
- # Calculate summary stats
120
- vlm_avg = df[df["approach"] == "VLM"]["accuracy"].mean()
121
- text_avg = df[df["approach"] == "Text"]["accuracy"].mean()
122
- diff = vlm_avg - text_avg
123
 
124
- task_desc = "book titles" if task_selector.value == "Title Extraction" else "full metadata (title, subtitle, publisher, year, ISBN)"
125
 
126
- mo.vstack([
127
- task_selector,
128
- mo.md(
129
  f"""
130
- ## Results: {task_selector.value}
131
 
132
  | Approach | Average Accuracy |
133
  |----------|-----------------|
@@ -136,98 +130,60 @@ def _(df_raw, mo, task_selector):
136
 
137
  **VLM advantage: +{diff:.0f} percentage points**
138
 
139
- VLMs {'significantly ' if diff > 15 else ''}outperform text extraction for extracting {task_desc} from book covers.
140
  """
141
  )
142
- ])
143
- return df, diff, task_desc, text_avg, vlm_avg
144
-
145
-
146
- @app.cell
147
- def _(mo):
148
- mo.md("## Model Size vs Accuracy")
149
- return
150
-
151
 
152
- @app.cell
153
- def _(alt, df, mo):
154
- # Interactive scatter plot: model size vs accuracy
155
- chart = alt.Chart(df).mark_circle(size=200, opacity=0.8).encode(
156
- x=alt.X("param_size_b:Q", title="Parameters (Billions)", scale=alt.Scale(zero=False)),
157
- y=alt.Y("accuracy:Q", title="Accuracy (%)", scale=alt.Scale(domain=[50, 105])),
158
- color=alt.Color("approach:N", title="Approach", scale=alt.Scale(domain=["VLM", "Text"], range=["#1f77b4", "#ff7f0e"])),
159
- tooltip=[
160
- alt.Tooltip("model_short:N", title="Model"),
161
- alt.Tooltip("approach:N", title="Approach"),
162
- alt.Tooltip("param_size_b:Q", title="Params (B)"),
163
- alt.Tooltip("accuracy:Q", title="Accuracy", format=".1f"),
164
- ],
165
- ).properties(
166
- width=550,
167
- height=350,
168
- ).configure_axis(
169
- labelFontSize=12,
170
- titleFontSize=14,
171
- )
172
-
173
- mo.vstack([
174
- mo.as_html(chart),
175
- mo.md("*Hover over points to see model details*"),
176
- ])
177
- return (chart,)
178
-
179
-
180
- @app.cell
181
- def _(mo):
182
- mo.md("## Model Leaderboard")
183
- return
184
 
 
 
 
 
 
185
 
186
- @app.cell
187
- def _(df, mo):
188
- # Filter selector for approach
189
- approach_filter = mo.ui.dropdown(
190
- options=["All", "VLM", "Text"],
191
- value="All",
192
- label="Filter by approach",
193
- )
194
- return (approach_filter,)
195
 
 
 
 
 
 
196
 
197
- @app.cell
198
- def _(approach_filter, df, mo):
199
- # Filter data based on selection
200
- if approach_filter.value == "All":
201
- filtered_df = df
202
- else:
203
- filtered_df = df[df["approach"] == approach_filter.value]
204
-
205
- # Create leaderboard with clickable model links
206
- leaderboard_data = []
207
- for _, row in filtered_df.sort_values("accuracy", ascending=False).iterrows():
208
- model_link = f"[{row['model_short']}]({row['model_url']})" if row['model_url'] else row['model_short']
209
- leaderboard_data.append({
210
- "Model": model_link,
211
- "Approach": row["approach"],
212
- "Params (B)": row["param_size_b"],
213
- "Accuracy (%)": round(row["accuracy"], 1),
214
- })
215
-
216
- leaderboard_md = "| Model | Approach | Params (B) | Accuracy (%) |\n|-------|----------|------------|-------------|\n"
217
- for row in leaderboard_data:
218
- leaderboard_md += f"| {row['Model']} | {row['Approach']} | {row['Params (B)']} | {row['Accuracy (%)']} |\n"
219
-
220
- mo.vstack([
221
- approach_filter,
222
- mo.md(leaderboard_md),
223
- ])
224
- return filtered_df, leaderboard_data, leaderboard_md
225
 
226
 
227
  @app.cell
228
  def _(mo):
229
  mo.md(
230
  """
 
 
231
  ## Why VLMs Win
232
 
233
  Book covers are **visually structured** documents:
@@ -263,7 +219,6 @@ def _(mo):
263
 
264
  @app.cell
265
  def _(mo):
266
- # Dataset viewer iframe
267
  mo.Html(
268
  """
269
  <iframe
 
30
  |----------|-------------|
31
  | **VLM** | Send the cover image directly to a Vision-Language Model |
32
  | **Text** | Extract text from image first (OCR), then send to an LLM |
33
+
34
+ ---
35
+
36
+ ## Evaluation Results
37
+
38
+ Select a task below to see how different models performed:
39
  """
40
  )
41
  return
 
52
  @app.cell
53
  def _(evals_df, mo):
54
  # Load evaluation results with persistent caching
 
55
  with mo.persistent_cache(name="doab_evals"):
56
  df_raw = evals_df("hf://datasets/davanstrien/doab-title-extraction-evals", quiet=True)
57
 
 
71
  # Convert score to percentage
72
  df_raw["accuracy"] = df_raw["score_headline_value"] * 100
73
 
74
+ # Parameter sizes and URLs
75
  model_info = {
76
  "hf-inference-providers/Qwen/Qwen3-VL-8B-Instruct": {
77
  "params": 8,
 
106
 
107
 
108
  @app.cell
109
+ def _(alt, df_raw, mo):
110
+ def make_task_content(task_name):
111
+ """Generate the complete results view for a task."""
112
+ df = df_raw[df_raw["task_category"] == task_name].copy()
 
 
 
 
 
 
 
 
 
 
113
 
114
+ # Calculate summary stats
115
+ vlm_avg = df[df["approach"] == "VLM"]["accuracy"].mean()
116
+ text_avg = df[df["approach"] == "Text"]["accuracy"].mean()
117
+ diff = vlm_avg - text_avg
118
 
119
+ task_desc = "book titles" if task_name == "Title Extraction" else "full metadata (title, subtitle, publisher, year, ISBN)"
120
 
121
+ # Results summary
122
+ results_md = mo.md(
 
123
  f"""
124
+ ### Summary
125
 
126
  | Approach | Average Accuracy |
127
  |----------|-----------------|
 
130
 
131
  **VLM advantage: +{diff:.0f} percentage points**
132
 
133
+ VLMs {'significantly ' if diff > 15 else ''}outperform text extraction for extracting {task_desc}.
134
  """
135
  )
 
 
 
 
 
 
 
 
 
136
 
137
+ # Scatter plot
138
+ chart = alt.Chart(df).mark_circle(size=200, opacity=0.8).encode(
139
+ x=alt.X("param_size_b:Q", title="Parameters (Billions)", scale=alt.Scale(zero=False)),
140
+ y=alt.Y("accuracy:Q", title="Accuracy (%)", scale=alt.Scale(domain=[50, 105])),
141
+ color=alt.Color("approach:N", title="Approach", scale=alt.Scale(domain=["VLM", "Text"], range=["#1f77b4", "#ff7f0e"])),
142
+ tooltip=[
143
+ alt.Tooltip("model_short:N", title="Model"),
144
+ alt.Tooltip("approach:N", title="Approach"),
145
+ alt.Tooltip("param_size_b:Q", title="Params (B)"),
146
+ alt.Tooltip("accuracy:Q", title="Accuracy", format=".1f"),
147
+ ],
148
+ ).properties(
149
+ width=500,
150
+ height=300,
151
+ title="Model Size vs Accuracy"
152
+ ).configure_axis(
153
+ labelFontSize=12,
154
+ titleFontSize=14,
155
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
156
 
157
+ # Leaderboard
158
+ leaderboard_md = "### Model Leaderboard\n\n| Model | Approach | Params (B) | Accuracy (%) |\n|-------|----------|------------|-------------|\n"
159
+ for _, row in df.sort_values("accuracy", ascending=False).iterrows():
160
+ model_link = f"[{row['model_short']}]({row['model_url']})" if row['model_url'] else row['model_short']
161
+ leaderboard_md += f"| {model_link} | {row['approach']} | {row['param_size_b']} | {row['accuracy']:.1f} |\n"
162
 
163
+ return mo.vstack([
164
+ results_md,
165
+ mo.md("### Model Size vs Accuracy"),
166
+ mo.as_html(chart),
167
+ mo.md("*Hover over points to see model details*"),
168
+ mo.md(leaderboard_md),
169
+ ])
 
 
170
 
171
+ # Create tabs
172
+ tabs = mo.ui.tabs({
173
+ "πŸ“„ Title Extraction": make_task_content("Title Extraction"),
174
+ "πŸ“š Full Metadata": make_task_content("Full Metadata"),
175
+ })
176
 
177
+ tabs
178
+ return make_task_content, tabs
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
179
 
180
 
181
  @app.cell
182
  def _(mo):
183
  mo.md(
184
  """
185
+ ---
186
+
187
  ## Why VLMs Win
188
 
189
  Book covers are **visually structured** documents:
 
219
 
220
  @app.cell
221
  def _(mo):
 
222
  mo.Html(
223
  """
224
  <iframe