Alioth86 commited on
Commit
912db67
1 Parent(s): 8c468f3

Add application file

Browse files
Files changed (1) hide show
  1. app.py +9 -2
app.py CHANGED
@@ -14,6 +14,7 @@ import re
14
  import torch
15
  import transformers
16
  from transformers import pipeline
 
17
  from datasets import load_dataset
18
  import soundfile as sf
19
  from IPython.display import Audio
@@ -144,9 +145,15 @@ def main_function(uploaded_filepath):
144
  text_per_pagy[key] = cleaned_text
145
  abstract_text = extract_abstract(text_per_pagy)
146
 
 
 
147
  #abstract the summary with my pipeline and model, deciding the length
148
  summarizer = pipeline("summarization", model="pszemraj/long-t5-tglobal-base-sci-simplify")
149
- summary = summarizer(abstract_text, max_length=65, do_sample=False)[0]['summary_text']
 
 
 
 
150
 
151
  #generating the audio from the text, with my pipeline and model
152
  synthesiser = pipeline("text-to-speech", model="microsoft/speecht5_tts")
@@ -159,7 +166,7 @@ def main_function(uploaded_filepath):
159
  sf.write(audio_file_path, speech["audio"], samplerate=speech["sampling_rate"])
160
 
161
  #the function returns the 2 pieces we need
162
- return summary, audio_file_path
163
 
164
  #let's communicate with gradio what it has to put in
165
  iface = gr.Interface(
 
14
  import torch
15
  import transformers
16
  from transformers import pipeline
17
+ import nltk
18
  from datasets import load_dataset
19
  import soundfile as sf
20
  from IPython.display import Audio
 
145
  text_per_pagy[key] = cleaned_text
146
  abstract_text = extract_abstract(text_per_pagy)
147
 
148
+ nltk.download('punkt')
149
+
150
  #abstract the summary with my pipeline and model, deciding the length
151
  summarizer = pipeline("summarization", model="pszemraj/long-t5-tglobal-base-sci-simplify")
152
+ summary = summarizer(abstract_text, max_length=100, do_sample=False)[0]['summary_text']
153
+
154
+ #keeping just the first sentence, to be sure.
155
+ sentences = nltk.tokenize.sent_tokenize(summary)
156
+ first_sentence = sentences[0]
157
 
158
  #generating the audio from the text, with my pipeline and model
159
  synthesiser = pipeline("text-to-speech", model="microsoft/speecht5_tts")
 
166
  sf.write(audio_file_path, speech["audio"], samplerate=speech["sampling_rate"])
167
 
168
  #the function returns the 2 pieces we need
169
+ return first_sentence, audio_file_path
170
 
171
  #let's communicate with gradio what it has to put in
172
  iface = gr.Interface(