davanstrien HF Staff Claude Opus 4.5 commited on
Commit
afeeac0
·
1 Parent(s): fbc8ccb

Use separate tokens for inference and PR creation

Browse files

- HF_TOKEN: for inference API calls (davanstrien)
- LIBRARIAN_BOT_TOKEN: for opening PRs (librarian-bot)

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <[email protected]>

Files changed (2) hide show
  1. .beads/issues.jsonl +1 -1
  2. app.py +27 -13
.beads/issues.jsonl CHANGED
@@ -1,2 +1,2 @@
1
- {"id":"dataset-card-drafter-ebu","title":"Add PR deduplication logic","description":"Multiple PRs being opened for same dataset. Need to check for existing open PRs before creating new ones.","status":"in_progress","priority":1,"issue_type":"bug","created_at":"2025-12-15T17:43:02.474669Z","updated_at":"2025-12-15T17:45:54.45933Z"}
2
  {"id":"dataset-card-drafter-wbd","title":"MVP implementation: WebhooksServer + DatasetCard + InferenceClient","description":"","status":"closed","priority":1,"issue_type":"feature","created_at":"2025-12-15T17:24:36.365733Z","updated_at":"2025-12-15T17:28:21.127763Z","closed_at":"2025-12-15T17:28:21.127763Z","close_reason":"MVP implemented with WebhooksServer, DatasetCard, and InferenceClient"}
 
1
+ {"id":"dataset-card-drafter-ebu","title":"Add PR deduplication logic","description":"Multiple PRs being opened for same dataset. Need to check for existing open PRs before creating new ones.","status":"closed","priority":1,"issue_type":"bug","created_at":"2025-12-15T17:43:02.474669Z","updated_at":"2025-12-15T17:48:03.770007Z","closed_at":"2025-12-15T17:48:03.770007Z","close_reason":"Added has_existing_pr() check using get_repo_discussions + improved PR description"}
2
  {"id":"dataset-card-drafter-wbd","title":"MVP implementation: WebhooksServer + DatasetCard + InferenceClient","description":"","status":"closed","priority":1,"issue_type":"feature","created_at":"2025-12-15T17:24:36.365733Z","updated_at":"2025-12-15T17:28:21.127763Z","closed_at":"2025-12-15T17:28:21.127763Z","close_reason":"MVP implemented with WebhooksServer, DatasetCard, and InferenceClient"}
app.py CHANGED
@@ -92,9 +92,14 @@ This PR was automatically generated by the [Dataset Card Drafter]({SPACE_URL}) S
92
  *Generated by [{BOT_NAME}]({SPACE_URL})*"""
93
 
94
 
95
- async def process_dataset(dataset_id: str, hf_token: str) -> dict:
96
  """Process a single dataset: check, generate, and open PR.
97
 
 
 
 
 
 
98
  Returns a status dict with results.
99
  """
100
  # Check for existing open PR first
@@ -111,16 +116,16 @@ async def process_dataset(dataset_id: str, hf_token: str) -> dict:
111
  if not should_generate(card):
112
  return {"status": "skipped", "reason": "description exists"}
113
 
114
- # Generate description
115
  try:
116
- description = generate_description(dataset_id, hf_token)
117
  except Exception as e:
118
  return {"status": "error", "reason": f"generation failed: {e}"}
119
 
120
  if not description:
121
  return {"status": "error", "reason": "empty description generated"}
122
 
123
- # Update card and push as PR
124
  card.text = description
125
 
126
  try:
@@ -130,7 +135,7 @@ async def process_dataset(dataset_id: str, hf_token: str) -> dict:
130
  commit_message=PR_TITLE,
131
  commit_description=build_pr_description(description),
132
  create_pr=True,
133
- token=hf_token,
134
  )
135
  pr_url = getattr(commit_info, "pr_url", str(commit_info))
136
  except Exception as e:
@@ -188,7 +193,8 @@ with gr.Blocks(title="Dataset Card Drafter") as demo:
188
  with gr.Tab("Trigger PR"):
189
  gr.Markdown(
190
  "Manually trigger description generation and PR creation.\n\n"
191
- "**Warning:** This will open a real PR!"
 
192
  )
193
  trigger_input = gr.Textbox(
194
  label="Dataset ID",
@@ -201,11 +207,15 @@ with gr.Blocks(title="Dataset Card Drafter") as demo:
201
  if not dataset_id:
202
  return {"status": "error", "reason": "no dataset ID provided"}
203
 
204
- hf_token = os.getenv("HF_TOKEN")
205
- if not hf_token:
 
 
206
  return {"status": "error", "reason": "HF_TOKEN not set"}
 
 
207
 
208
- result = await process_dataset(dataset_id, hf_token)
209
 
210
  # Save to processed log
211
  if result.get("status") == "pr_created":
@@ -244,13 +254,17 @@ async def handle_dataset_webhook(payload: WebhookPayload) -> dict:
244
 
245
  dataset_id = payload.repo.name
246
 
247
- # Get token
248
- hf_token = os.getenv("HF_TOKEN")
249
- if not hf_token:
 
 
250
  return {"status": "error", "reason": "HF_TOKEN not configured"}
 
 
251
 
252
  # Process the dataset
253
- result = await process_dataset(dataset_id, hf_token)
254
 
255
  # Save to processed log
256
  processed = load_processed()
 
92
  *Generated by [{BOT_NAME}]({SPACE_URL})*"""
93
 
94
 
95
+ async def process_dataset(dataset_id: str, inference_token: str, pr_token: str) -> dict:
96
  """Process a single dataset: check, generate, and open PR.
97
 
98
+ Args:
99
+ dataset_id: The dataset to process
100
+ inference_token: Token for inference API calls (e.g., davanstrien's token)
101
+ pr_token: Token for opening PRs (librarian-bot's token)
102
+
103
  Returns a status dict with results.
104
  """
105
  # Check for existing open PR first
 
116
  if not should_generate(card):
117
  return {"status": "skipped", "reason": "description exists"}
118
 
119
+ # Generate description using inference token
120
  try:
121
+ description = generate_description(dataset_id, inference_token)
122
  except Exception as e:
123
  return {"status": "error", "reason": f"generation failed: {e}"}
124
 
125
  if not description:
126
  return {"status": "error", "reason": "empty description generated"}
127
 
128
+ # Update card and push as PR using librarian-bot token
129
  card.text = description
130
 
131
  try:
 
135
  commit_message=PR_TITLE,
136
  commit_description=build_pr_description(description),
137
  create_pr=True,
138
+ token=pr_token,
139
  )
140
  pr_url = getattr(commit_info, "pr_url", str(commit_info))
141
  except Exception as e:
 
193
  with gr.Tab("Trigger PR"):
194
  gr.Markdown(
195
  "Manually trigger description generation and PR creation.\n\n"
196
+ "**Warning:** This will open a real PR!\n\n"
197
+ "Requires `HF_TOKEN` (for inference) and `LIBRARIAN_BOT_TOKEN` (for PRs)."
198
  )
199
  trigger_input = gr.Textbox(
200
  label="Dataset ID",
 
207
  if not dataset_id:
208
  return {"status": "error", "reason": "no dataset ID provided"}
209
 
210
+ inference_token = os.getenv("HF_TOKEN")
211
+ pr_token = os.getenv("LIBRARIAN_BOT_TOKEN")
212
+
213
+ if not inference_token:
214
  return {"status": "error", "reason": "HF_TOKEN not set"}
215
+ if not pr_token:
216
+ return {"status": "error", "reason": "LIBRARIAN_BOT_TOKEN not set"}
217
 
218
+ result = await process_dataset(dataset_id, inference_token, pr_token)
219
 
220
  # Save to processed log
221
  if result.get("status") == "pr_created":
 
254
 
255
  dataset_id = payload.repo.name
256
 
257
+ # Get tokens
258
+ inference_token = os.getenv("HF_TOKEN")
259
+ pr_token = os.getenv("LIBRARIAN_BOT_TOKEN")
260
+
261
+ if not inference_token:
262
  return {"status": "error", "reason": "HF_TOKEN not configured"}
263
+ if not pr_token:
264
+ return {"status": "error", "reason": "LIBRARIAN_BOT_TOKEN not configured"}
265
 
266
  # Process the dataset
267
+ result = await process_dataset(dataset_id, inference_token, pr_token)
268
 
269
  # Save to processed log
270
  processed = load_processed()