Spaces:
Sleeping
Sleeping
File size: 2,121 Bytes
4d9e804 1153200 cbf8d31 5979f83 1153200 a6cc800 1153200 5979f83 1153200 a6cc800 1153200 5979f83 1153200 5979f83 1567874 5979f83 1153200 cbf8d31 1153200 5979f83 cbf8d31 1153200 4d9e804 5979f83 1153200 1567874 1153200 1567874 1153200 1567874 1153200 1567874 1153200 1567874 f4eb9f4 5979f83 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 |
import gradio as gr
from PyPDF2 import PdfReader
from transformers import pipeline
# Summarization pipeline
summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6")
# ---- Extract text from PDF ----
def extract_pdf_text(file):
reader = PdfReader(file)
text = ""
for page in reader.pages:
text += page.extract_text() or ""
return text
# ---- Split text into smaller chunks ----
def chunk_text(text, max_length=500):
words = text.split()
return [" ".join(words[i:i + max_length]) for i in range(0, len(words), max_length)]
# ---- Summarization function ----
def summarize_research_paper(pdf_file, text_input, source_link, progress=gr.Progress()):
text = ""
if pdf_file is not None:
text += extract_pdf_text(pdf_file)
if text_input:
text += "\n" + text_input
if not text.strip():
return "β οΈ Please upload a PDF or paste some text to summarize."
chunks = chunk_text(text)
summaries = []
for i, chunk in enumerate(chunks[:5]): # limit to 5 chunks
progress((i + 1) / len(chunks), desc=f"Summarizing section {i + 1}/{len(chunks)}...")
summary = summarizer(chunk, max_length=130, min_length=30, do_sample=False)[0]["summary_text"]
summaries.append(summary)
final_summary = "\n\n".join(summaries)
if source_link:
final_summary += f"\n\nπ **[View Source]({source_link})**"
return final_summary
# ---- Build the Gradio UI ----
demo = gr.Interface(
fn=summarize_research_paper,
inputs=[
gr.File(label="π Upload Research Paper (PDF)"),
gr.Textbox(label="π Or Paste Research Text", placeholder="Paste abstract, intro, or text here..."),
gr.Textbox(label="π Add Source Link (Optional)", placeholder="https://arxiv.org/abs/2405.12345")
],
outputs=gr.Markdown(label="π§Ύ Summary with Source"),
title="HCI Research Paper Summarizer",
description="Upload a research paper or paste text to get a concise summary. Optionally include a source link (DOI, arXiv, or publication page).",
)
demo.queue()
demo.launch()
|