hci-assistant / app.py
Arj2n's picture
Update app.py
5979f83 verified
import gradio as gr
from PyPDF2 import PdfReader
from transformers import pipeline
# Summarization pipeline
summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6")
# ---- Extract text from PDF ----
def extract_pdf_text(file):
reader = PdfReader(file)
text = ""
for page in reader.pages:
text += page.extract_text() or ""
return text
# ---- Split text into smaller chunks ----
def chunk_text(text, max_length=500):
words = text.split()
return [" ".join(words[i:i + max_length]) for i in range(0, len(words), max_length)]
# ---- Summarization function ----
def summarize_research_paper(pdf_file, text_input, source_link, progress=gr.Progress()):
text = ""
if pdf_file is not None:
text += extract_pdf_text(pdf_file)
if text_input:
text += "\n" + text_input
if not text.strip():
return "⚠️ Please upload a PDF or paste some text to summarize."
chunks = chunk_text(text)
summaries = []
for i, chunk in enumerate(chunks[:5]): # limit to 5 chunks
progress((i + 1) / len(chunks), desc=f"Summarizing section {i + 1}/{len(chunks)}...")
summary = summarizer(chunk, max_length=130, min_length=30, do_sample=False)[0]["summary_text"]
summaries.append(summary)
final_summary = "\n\n".join(summaries)
if source_link:
final_summary += f"\n\nπŸ”— **[View Source]({source_link})**"
return final_summary
# ---- Build the Gradio UI ----
demo = gr.Interface(
fn=summarize_research_paper,
inputs=[
gr.File(label="πŸ“„ Upload Research Paper (PDF)"),
gr.Textbox(label="πŸ“ Or Paste Research Text", placeholder="Paste abstract, intro, or text here..."),
gr.Textbox(label="πŸ”— Add Source Link (Optional)", placeholder="https://arxiv.org/abs/2405.12345")
],
outputs=gr.Markdown(label="🧾 Summary with Source"),
title="HCI Research Paper Summarizer",
description="Upload a research paper or paste text to get a concise summary. Optionally include a source link (DOI, arXiv, or publication page).",
)
demo.queue()
demo.launch()