import gradio as gr from PyPDF2 import PdfReader from transformers import pipeline # Summarization pipeline summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6") # ---- Extract text from PDF ---- def extract_pdf_text(file): reader = PdfReader(file) text = "" for page in reader.pages: text += page.extract_text() or "" return text # ---- Split text into smaller chunks ---- def chunk_text(text, max_length=500): words = text.split() return [" ".join(words[i:i + max_length]) for i in range(0, len(words), max_length)] # ---- Summarization function ---- def summarize_research_paper(pdf_file, text_input, source_link, progress=gr.Progress()): text = "" if pdf_file is not None: text += extract_pdf_text(pdf_file) if text_input: text += "\n" + text_input if not text.strip(): return "โš ๏ธ Please upload a PDF or paste some text to summarize." chunks = chunk_text(text) summaries = [] for i, chunk in enumerate(chunks[:5]): # limit to 5 chunks progress((i + 1) / len(chunks), desc=f"Summarizing section {i + 1}/{len(chunks)}...") summary = summarizer(chunk, max_length=130, min_length=30, do_sample=False)[0]["summary_text"] summaries.append(summary) final_summary = "\n\n".join(summaries) if source_link: final_summary += f"\n\n๐Ÿ”— **[View Source]({source_link})**" return final_summary # ---- Build the Gradio UI ---- demo = gr.Interface( fn=summarize_research_paper, inputs=[ gr.File(label="๐Ÿ“„ Upload Research Paper (PDF)"), gr.Textbox(label="๐Ÿ“ Or Paste Research Text", placeholder="Paste abstract, intro, or text here..."), gr.Textbox(label="๐Ÿ”— Add Source Link (Optional)", placeholder="https://arxiv.org/abs/2405.12345") ], outputs=gr.Markdown(label="๐Ÿงพ Summary with Source"), title="HCI Research Paper Summarizer", description="Upload a research paper or paste text to get a concise summary. Optionally include a source link (DOI, arXiv, or publication page).", ) demo.queue() demo.launch()