mrsk1883 commited on
Commit
c693e62
1 Parent(s): 85c0e81

Upload 2 files

Browse files
Files changed (2) hide show
  1. app (1).py +52 -0
  2. requirements (1).txt +14 -0
app (1).py ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from PyPDF2 import PdfReader
3
+ from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
4
+ from gtts import gTTS
5
+ from io import BytesIO
6
+ import re
7
+
8
+ model_name = "ArtifactAI/led_large_16384_arxiv_summarization"
9
+ model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
10
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
11
+
12
+ def extract_first_sentence(text):
13
+ sentences = re.split(r'(?<!\w\.\w.)(?<![A-Z][a-z]\.)(?<=\.|\?)\s', text)
14
+ if sentences:
15
+ return sentences[0]
16
+ else:
17
+ return text
18
+
19
+ def summarize_pdf_abstract(pdf_file):
20
+ try:
21
+ reader = PdfReader(pdf_file)
22
+ abstract_text = ""
23
+ for page in reader.pages:
24
+ if "Abstract" in page.extract_text() or "Introduction" in page.extract_text():
25
+ abstract_text = page.extract_text()
26
+ break
27
+
28
+ inputs = tokenizer(abstract_text, return_tensors="pt")
29
+ outputs = model.generate(**inputs)
30
+ summary = tokenizer.decode(outputs[0])
31
+
32
+ # Extract only the first sentence
33
+ summary_sentence = extract_first_sentence(summary)
34
+
35
+ # Generate audio
36
+ speech = gTTS(text=summary_sentence, lang="en")
37
+ speech_bytes = BytesIO()
38
+ speech.write_to_fp(speech_bytes)
39
+
40
+ # Return individual output values
41
+ return summary_sentence, speech_bytes.getvalue()
42
+
43
+ except Exception as e:
44
+ raise Exception(str(e))
45
+
46
+ interface = gr.Interface(
47
+ fn=summarize_pdf_abstract,
48
+ inputs=[gr.File(label="Upload PDF")],
49
+ outputs=[gr.Textbox(label="Summary"), gr.Audio()],
50
+ )
51
+
52
+ interface.launch(share=True)
requirements (1).txt ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ gradio
2
+ transformers
3
+ PyPDF2
4
+ gtts
5
+ torch
6
+ numpy
7
+ pytest
8
+ sphinx
9
+ huggingface-hub
10
+ IPython
11
+ torchvision
12
+ torchaudio
13
+ tensorflow
14
+ flax