File size: 2,121 Bytes
4d9e804
1153200
cbf8d31
 
5979f83
1153200
a6cc800
1153200
 
5979f83
1153200
 
 
 
a6cc800
1153200
5979f83
1153200
 
 
 
 
 
5979f83
1567874
5979f83
1153200
 
cbf8d31
1153200
5979f83
cbf8d31
1153200
 
4d9e804
5979f83
1153200
 
 
1567874
1153200
1567874
1153200
 
1567874
1153200
1567874
 
1153200
 
 
 
 
 
 
 
 
 
 
 
1567874
f4eb9f4
5979f83
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
import gradio as gr
from PyPDF2 import PdfReader
from transformers import pipeline

# Summarization pipeline
summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6")

# ---- Extract text from PDF ----
def extract_pdf_text(file):
    reader = PdfReader(file)
    text = ""
    for page in reader.pages:
        text += page.extract_text() or ""
    return text

# ---- Split text into smaller chunks ----
def chunk_text(text, max_length=500):
    words = text.split()
    return [" ".join(words[i:i + max_length]) for i in range(0, len(words), max_length)]

# ---- Summarization function ----
def summarize_research_paper(pdf_file, text_input, source_link, progress=gr.Progress()):
    text = ""

    if pdf_file is not None:
        text += extract_pdf_text(pdf_file)
    if text_input:
        text += "\n" + text_input

    if not text.strip():
        return "⚠️ Please upload a PDF or paste some text to summarize."

    chunks = chunk_text(text)
    summaries = []

    for i, chunk in enumerate(chunks[:5]):  # limit to 5 chunks
        progress((i + 1) / len(chunks), desc=f"Summarizing section {i + 1}/{len(chunks)}...")
        summary = summarizer(chunk, max_length=130, min_length=30, do_sample=False)[0]["summary_text"]
        summaries.append(summary)

    final_summary = "\n\n".join(summaries)

    if source_link:
        final_summary += f"\n\nπŸ”— **[View Source]({source_link})**"

    return final_summary


# ---- Build the Gradio UI ----
demo = gr.Interface(
    fn=summarize_research_paper,
    inputs=[
        gr.File(label="πŸ“„ Upload Research Paper (PDF)"),
        gr.Textbox(label="πŸ“ Or Paste Research Text", placeholder="Paste abstract, intro, or text here..."),
        gr.Textbox(label="πŸ”— Add Source Link (Optional)", placeholder="https://arxiv.org/abs/2405.12345")
    ],
    outputs=gr.Markdown(label="🧾 Summary with Source"),
    title="HCI Research Paper Summarizer",
    description="Upload a research paper or paste text to get a concise summary. Optionally include a source link (DOI, arXiv, or publication page).",
)

demo.queue()
demo.launch()