dhiraut commited on
Commit
1633f9d
Β·
verified Β·
1 Parent(s): 53a4453

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +104 -0
app.py ADDED
@@ -0,0 +1,104 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import tempfile
3
+ from typing import List
4
+
5
+ import fitz # PyMuPDF
6
+ import requests
7
+ from transformers import pipeline
8
+ from gtts import gTTS
9
+ import streamlit as st
10
+
11
+ # ---------- CONFIG ----------
12
+ def summarize_text(text: str) -> str:
13
+ if not text.strip():
14
+ return "Summary not available (empty text)."
15
+
16
+ try:
17
+ # Truncate long text safely
18
+ if len(text) > 2000:
19
+ text = text[:2000]
20
+
21
+ summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6")
22
+ result = summarizer(text, max_length=200, min_length=30, do_sample=False)
23
+
24
+ if result and isinstance(result, list) and 'summary_text' in result[0]:
25
+ return result[0]['summary_text']
26
+ return "Summary not available (model did not return text)."
27
+ except Exception as e:
28
+ return f"Summary failed: {str(e)}"
29
+
30
+ def extract_text_from_pdf(pdf_path: str) -> str:
31
+ doc = fitz.open(pdf_path)
32
+ text = ""
33
+ for page in doc:
34
+ text += page.get_text()
35
+ return text
36
+
37
+ def classify_topic(text: str, topics: List[str]) -> str:
38
+ if not text.strip():
39
+ return "Unknown (no text extracted)"
40
+ if not topics:
41
+ return "Unknown (no topics provided)"
42
+
43
+ classifier = pipeline("zero-shot-classification", model="valhalla/distilbart-mnli-12-3")
44
+ result = classifier(text[:1000], candidate_labels=topics)
45
+
46
+ if 'labels' in result and isinstance(result['labels'], list) and len(result['labels']) > 0:
47
+ return result['labels'][0]
48
+ return "Unknown (classification failed)"
49
+
50
+ def generate_audio(text: str, output_path: str):
51
+ try:
52
+ tts = gTTS(text)
53
+ tts.save(output_path)
54
+ except Exception as e:
55
+ raise RuntimeError(f"Audio generation failed: {str(e)}")
56
+
57
+ # ---------- STREAMLIT UI ----------
58
+ st.set_page_config(page_title="Research Paper Summarizer", layout="centered")
59
+ st.title("πŸ“„ AI Research Paper Summarizer")
60
+
61
+ st.markdown("""
62
+ Upload a research paper (PDF) and a list of topics. The app will:
63
+ 1. Extract and summarize the paper
64
+ 2. Classify it into a topic
65
+ 3. Generate an audio summary 🎧
66
+ """)
67
+
68
+ with st.form("upload_form"):
69
+ uploaded_file = st.file_uploader("Upload a PDF file", type=["pdf"])
70
+ topic_input = st.text_input("Enter comma-separated topics")
71
+ submitted = st.form_submit_button("Summarize and Generate Audio")
72
+
73
+ if submitted and uploaded_file and topic_input:
74
+ with st.spinner("Processing paper..."):
75
+ try:
76
+ temp_dir = tempfile.mkdtemp()
77
+ file_path = os.path.join(temp_dir, uploaded_file.name)
78
+
79
+ with open(file_path, "wb") as f:
80
+ f.write(uploaded_file.read())
81
+
82
+ text = extract_text_from_pdf(file_path)
83
+ st.info(f"Extracted text length: {len(text)} characters")
84
+
85
+ if not text.strip():
86
+ st.error("❌ No text could be extracted from the PDF. Try another file.")
87
+ else:
88
+ topic_list = [t.strip() for t in topic_input.split(",") if t.strip()]
89
+ classified_topic = classify_topic(text, topic_list)
90
+ summary = summarize_text(text)
91
+
92
+ st.markdown(f"### 🧠 Classified Topic: `{classified_topic}`")
93
+ st.markdown("### ✍️ Summary:")
94
+ st.write(summary)
95
+
96
+ audio_path = os.path.join(temp_dir, "summary.mp3")
97
+ generate_audio(summary, audio_path)
98
+
99
+ st.markdown("### πŸ”Š Audio Summary")
100
+ st.audio(audio_path)
101
+ st.success("Done! Audio summary is ready.")
102
+
103
+ except Exception as e:
104
+ st.error(f"❌ Error: {str(e)}")