Commit
·
afeeac0
1
Parent(s):
fbc8ccb
Use separate tokens for inference and PR creation
Browse files- HF_TOKEN: for inference API calls (davanstrien)
- LIBRARIAN_BOT_TOKEN: for opening PRs (librarian-bot)
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude Opus 4.5 <[email protected]>
- .beads/issues.jsonl +1 -1
- app.py +27 -13
.beads/issues.jsonl
CHANGED
|
@@ -1,2 +1,2 @@
|
|
| 1 |
-
{"id":"dataset-card-drafter-ebu","title":"Add PR deduplication logic","description":"Multiple PRs being opened for same dataset. Need to check for existing open PRs before creating new ones.","status":"
|
| 2 |
{"id":"dataset-card-drafter-wbd","title":"MVP implementation: WebhooksServer + DatasetCard + InferenceClient","description":"","status":"closed","priority":1,"issue_type":"feature","created_at":"2025-12-15T17:24:36.365733Z","updated_at":"2025-12-15T17:28:21.127763Z","closed_at":"2025-12-15T17:28:21.127763Z","close_reason":"MVP implemented with WebhooksServer, DatasetCard, and InferenceClient"}
|
|
|
|
| 1 |
+
{"id":"dataset-card-drafter-ebu","title":"Add PR deduplication logic","description":"Multiple PRs being opened for same dataset. Need to check for existing open PRs before creating new ones.","status":"closed","priority":1,"issue_type":"bug","created_at":"2025-12-15T17:43:02.474669Z","updated_at":"2025-12-15T17:48:03.770007Z","closed_at":"2025-12-15T17:48:03.770007Z","close_reason":"Added has_existing_pr() check using get_repo_discussions + improved PR description"}
|
| 2 |
{"id":"dataset-card-drafter-wbd","title":"MVP implementation: WebhooksServer + DatasetCard + InferenceClient","description":"","status":"closed","priority":1,"issue_type":"feature","created_at":"2025-12-15T17:24:36.365733Z","updated_at":"2025-12-15T17:28:21.127763Z","closed_at":"2025-12-15T17:28:21.127763Z","close_reason":"MVP implemented with WebhooksServer, DatasetCard, and InferenceClient"}
|
app.py
CHANGED
|
@@ -92,9 +92,14 @@ This PR was automatically generated by the [Dataset Card Drafter]({SPACE_URL}) S
|
|
| 92 |
*Generated by [{BOT_NAME}]({SPACE_URL})*"""
|
| 93 |
|
| 94 |
|
| 95 |
-
async def process_dataset(dataset_id: str,
|
| 96 |
"""Process a single dataset: check, generate, and open PR.
|
| 97 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 98 |
Returns a status dict with results.
|
| 99 |
"""
|
| 100 |
# Check for existing open PR first
|
|
@@ -111,16 +116,16 @@ async def process_dataset(dataset_id: str, hf_token: str) -> dict:
|
|
| 111 |
if not should_generate(card):
|
| 112 |
return {"status": "skipped", "reason": "description exists"}
|
| 113 |
|
| 114 |
-
# Generate description
|
| 115 |
try:
|
| 116 |
-
description = generate_description(dataset_id,
|
| 117 |
except Exception as e:
|
| 118 |
return {"status": "error", "reason": f"generation failed: {e}"}
|
| 119 |
|
| 120 |
if not description:
|
| 121 |
return {"status": "error", "reason": "empty description generated"}
|
| 122 |
|
| 123 |
-
# Update card and push as PR
|
| 124 |
card.text = description
|
| 125 |
|
| 126 |
try:
|
|
@@ -130,7 +135,7 @@ async def process_dataset(dataset_id: str, hf_token: str) -> dict:
|
|
| 130 |
commit_message=PR_TITLE,
|
| 131 |
commit_description=build_pr_description(description),
|
| 132 |
create_pr=True,
|
| 133 |
-
token=
|
| 134 |
)
|
| 135 |
pr_url = getattr(commit_info, "pr_url", str(commit_info))
|
| 136 |
except Exception as e:
|
|
@@ -188,7 +193,8 @@ with gr.Blocks(title="Dataset Card Drafter") as demo:
|
|
| 188 |
with gr.Tab("Trigger PR"):
|
| 189 |
gr.Markdown(
|
| 190 |
"Manually trigger description generation and PR creation.\n\n"
|
| 191 |
-
"**Warning:** This will open a real PR
|
|
|
|
| 192 |
)
|
| 193 |
trigger_input = gr.Textbox(
|
| 194 |
label="Dataset ID",
|
|
@@ -201,11 +207,15 @@ with gr.Blocks(title="Dataset Card Drafter") as demo:
|
|
| 201 |
if not dataset_id:
|
| 202 |
return {"status": "error", "reason": "no dataset ID provided"}
|
| 203 |
|
| 204 |
-
|
| 205 |
-
|
|
|
|
|
|
|
| 206 |
return {"status": "error", "reason": "HF_TOKEN not set"}
|
|
|
|
|
|
|
| 207 |
|
| 208 |
-
result = await process_dataset(dataset_id,
|
| 209 |
|
| 210 |
# Save to processed log
|
| 211 |
if result.get("status") == "pr_created":
|
|
@@ -244,13 +254,17 @@ async def handle_dataset_webhook(payload: WebhookPayload) -> dict:
|
|
| 244 |
|
| 245 |
dataset_id = payload.repo.name
|
| 246 |
|
| 247 |
-
# Get
|
| 248 |
-
|
| 249 |
-
|
|
|
|
|
|
|
| 250 |
return {"status": "error", "reason": "HF_TOKEN not configured"}
|
|
|
|
|
|
|
| 251 |
|
| 252 |
# Process the dataset
|
| 253 |
-
result = await process_dataset(dataset_id,
|
| 254 |
|
| 255 |
# Save to processed log
|
| 256 |
processed = load_processed()
|
|
|
|
| 92 |
*Generated by [{BOT_NAME}]({SPACE_URL})*"""
|
| 93 |
|
| 94 |
|
| 95 |
+
async def process_dataset(dataset_id: str, inference_token: str, pr_token: str) -> dict:
|
| 96 |
"""Process a single dataset: check, generate, and open PR.
|
| 97 |
|
| 98 |
+
Args:
|
| 99 |
+
dataset_id: The dataset to process
|
| 100 |
+
inference_token: Token for inference API calls (e.g., davanstrien's token)
|
| 101 |
+
pr_token: Token for opening PRs (librarian-bot's token)
|
| 102 |
+
|
| 103 |
Returns a status dict with results.
|
| 104 |
"""
|
| 105 |
# Check for existing open PR first
|
|
|
|
| 116 |
if not should_generate(card):
|
| 117 |
return {"status": "skipped", "reason": "description exists"}
|
| 118 |
|
| 119 |
+
# Generate description using inference token
|
| 120 |
try:
|
| 121 |
+
description = generate_description(dataset_id, inference_token)
|
| 122 |
except Exception as e:
|
| 123 |
return {"status": "error", "reason": f"generation failed: {e}"}
|
| 124 |
|
| 125 |
if not description:
|
| 126 |
return {"status": "error", "reason": "empty description generated"}
|
| 127 |
|
| 128 |
+
# Update card and push as PR using librarian-bot token
|
| 129 |
card.text = description
|
| 130 |
|
| 131 |
try:
|
|
|
|
| 135 |
commit_message=PR_TITLE,
|
| 136 |
commit_description=build_pr_description(description),
|
| 137 |
create_pr=True,
|
| 138 |
+
token=pr_token,
|
| 139 |
)
|
| 140 |
pr_url = getattr(commit_info, "pr_url", str(commit_info))
|
| 141 |
except Exception as e:
|
|
|
|
| 193 |
with gr.Tab("Trigger PR"):
|
| 194 |
gr.Markdown(
|
| 195 |
"Manually trigger description generation and PR creation.\n\n"
|
| 196 |
+
"**Warning:** This will open a real PR!\n\n"
|
| 197 |
+
"Requires `HF_TOKEN` (for inference) and `LIBRARIAN_BOT_TOKEN` (for PRs)."
|
| 198 |
)
|
| 199 |
trigger_input = gr.Textbox(
|
| 200 |
label="Dataset ID",
|
|
|
|
| 207 |
if not dataset_id:
|
| 208 |
return {"status": "error", "reason": "no dataset ID provided"}
|
| 209 |
|
| 210 |
+
inference_token = os.getenv("HF_TOKEN")
|
| 211 |
+
pr_token = os.getenv("LIBRARIAN_BOT_TOKEN")
|
| 212 |
+
|
| 213 |
+
if not inference_token:
|
| 214 |
return {"status": "error", "reason": "HF_TOKEN not set"}
|
| 215 |
+
if not pr_token:
|
| 216 |
+
return {"status": "error", "reason": "LIBRARIAN_BOT_TOKEN not set"}
|
| 217 |
|
| 218 |
+
result = await process_dataset(dataset_id, inference_token, pr_token)
|
| 219 |
|
| 220 |
# Save to processed log
|
| 221 |
if result.get("status") == "pr_created":
|
|
|
|
| 254 |
|
| 255 |
dataset_id = payload.repo.name
|
| 256 |
|
| 257 |
+
# Get tokens
|
| 258 |
+
inference_token = os.getenv("HF_TOKEN")
|
| 259 |
+
pr_token = os.getenv("LIBRARIAN_BOT_TOKEN")
|
| 260 |
+
|
| 261 |
+
if not inference_token:
|
| 262 |
return {"status": "error", "reason": "HF_TOKEN not configured"}
|
| 263 |
+
if not pr_token:
|
| 264 |
+
return {"status": "error", "reason": "LIBRARIAN_BOT_TOKEN not configured"}
|
| 265 |
|
| 266 |
# Process the dataset
|
| 267 |
+
result = await process_dataset(dataset_id, inference_token, pr_token)
|
| 268 |
|
| 269 |
# Save to processed log
|
| 270 |
processed = load_processed()
|