gigisan81 commited on
Commit
1a1e781
1 Parent(s): a808718

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +99 -0
app.py ADDED
@@ -0,0 +1,99 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # https://huggingface.co/spaces/gigisan81/lg_assessment3
2
+
3
+ # Here are the imports
4
+ import gradio as gr
5
+ from transformers import pipeline
6
+ from tempfile import NamedTemporaryFile
7
+ from PyPDF2 import PdfReader
8
+ from IPython.display import Audio
9
+ import numpy as np
10
+ from bark import SAMPLE_RATE, generate_audio, preload_models
11
+ from scipy.io.wavfile import write as write_wav
12
+ import torch
13
+ # Here is the code
14
+ def summarize_abstract_from_pdf(pdf_file_path):
15
+ abstract_string = 'abstract'
16
+ found_abstract = False
17
+ intro_string ='introduction'
18
+ extracted_text_string =""
19
+
20
+ # Read the PDF and extract text from the first page
21
+ with open(pdf_file_path, 'rb') as pdf_file:
22
+ reader = PdfReader(pdf_file)
23
+ text = ""
24
+ text += reader.pages[0].extract_text()
25
+
26
+
27
+ file = text.splitlines()
28
+ for lines in file:
29
+ lower_lines = lines.lower()
30
+ if lower_lines.strip()== abstract_string:
31
+ found_abstract = True
32
+ elif "1" in lower_lines.strip() and intro_string in lower_lines.strip():
33
+ found_abstract = False
34
+
35
+ if found_abstract == True:
36
+ extracted_text_string += lines
37
+
38
+
39
+ extracted_text_string = extracted_text_string.replace("Abstract", "")
40
+ summarizer = pipeline("summarization", "pszemraj/led-base-book-summary",device=0 if torch.cuda.is_available() else -1,)
41
+ # Generate a summarized abstract using the specified model
42
+ summarized_abstract = summarizer(extracted_text_string,
43
+ min_length=16,
44
+ max_length=150,
45
+ no_repeat_ngram_size=3,
46
+ encoder_no_repeat_ngram_size=3,
47
+ repetition_penalty=3.5,
48
+ num_beams=4,
49
+ early_stopping=True,
50
+ )
51
+ #I run this twice to get summazired text
52
+ summarized_abstract2 = summarizer(summarized_abstract[0]['summary_text'],
53
+ min_length=16,
54
+ max_length=25,
55
+ no_repeat_ngram_size=3,
56
+ encoder_no_repeat_ngram_size=3,
57
+ repetition_penalty=3.5,
58
+ num_beams=4,
59
+ early_stopping=True,
60
+ )
61
+
62
+
63
+
64
+ # Return the summarized abstract as a string
65
+ return summarized_abstract2[0]['summary_text']
66
+
67
+ def generate_audio_func(pdf_file):
68
+
69
+ pdf_file_path = pdf_file.name
70
+ # Generate audio from text
71
+ #call the summarize abstract function
72
+ text_prompt = summarize_abstract_from_pdf(pdf_file_path)
73
+ audio_array = generate_audio(text_prompt)
74
+
75
+ # Create a temporary WAV file to save the audio
76
+ with NamedTemporaryFile(suffix=".wav", delete=False) as temp_wav_file:
77
+ wav_file_path = temp_wav_file.name
78
+ write_wav(wav_file_path, 22050, (audio_array * 32767).astype(np.int16))
79
+ return wav_file_path
80
+
81
+
82
+
83
+ # Define app name, app description, and examples
84
+ app_name = "PDF to Audio Converter"
85
+ app_description = "Convert text from a PDF file to audio. Upload a PDF file. We accept only PDF files with abstracts."
86
+
87
+ # Create the Gradio app
88
+ input_component = gr.File(file_types=["pdf"])
89
+ output_component = gr.Audio()
90
+
91
+ demo = gr.Interface(
92
+ fn=generate_audio_func,
93
+ inputs=input_component,
94
+ outputs=output_component,
95
+ title=app_name,
96
+ description=app_description
97
+ )
98
+
99
+ demo.launch()