Xuratron commited on
Commit
1adb2ef
1 Parent(s): 0e0c910

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +85 -47
app.py CHANGED
@@ -1,86 +1,124 @@
1
- #https://huggingface.co/spaces/Xuratron/abstract-speech-summarizer
2
-
3
  # Here are the imports
4
  import PyPDF2
5
  import re
6
  import torch
7
  from transformers import pipeline
8
- import soundfile as sf
9
  from fairseq.checkpoint_utils import load_model_ensemble_and_task_from_hf_hub
10
  from fairseq.models.text_to_speech.hub_interface import TTSHubInterface
11
  import gradio as gr
12
-
 
 
 
13
 
14
  # Here is the code
15
 
 
16
  def extract_and_clean_abstract(uploaded_file):
17
- """
18
- Extracts and cleans the abstract from the uploaded PDF file.
19
- """
20
- reader = PyPDF2.PdfReader(uploaded_file.file)
21
- text = ""
22
- for page in reader.pages:
23
- text += page.extract_text() or ""
 
 
24
 
25
- # Regular expression pattern to find the abstract
26
  pattern = r"(Abstract|ABSTRACT|abstract)(.*?)(Introduction|INTRODUCTION|introduction|1|Keywords|KEYWORDS|keywords)"
27
- match = re.search(pattern, text, re.DOTALL)
28
 
29
  if match:
30
  abstract = match.group(2).strip()
31
  else:
32
- abstract = "Abstract not found."
33
 
34
- # Clean the abstract text
35
  cleaned_abstract = abstract.replace('\n', ' ').replace('- ', '')
36
 
37
  return cleaned_abstract
38
 
39
- def summarize_text(hf_model_name, text):
40
- """
41
- Summarizes the given text using a Hugging Face model.
42
- """
43
- summarizer = pipeline("summarization", model=hf_model_name)
44
- summary = summarizer(text, max_length=130, min_length=30, do_sample=False)[0]['summary_text']
45
- return summary
 
46
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
  def text_to_speech(text):
48
- """
49
- Converts text to speech using a Hugging Face model.
50
- """
 
51
  models, cfg, task = load_model_ensemble_and_task_from_hf_hub(
52
- "facebook/fastspeech2-en-ljspeech",
53
  arg_overrides={"vocoder": "hifigan", "fp16": False}
54
  )
55
- model = models[0]
 
 
 
 
56
  TTSHubInterface.update_cfg_with_data_cfg(cfg, task.data_cfg)
 
 
57
  generator = task.build_generator([model], cfg)
 
 
58
  sample = TTSHubInterface.get_model_input(task, text)
 
 
 
 
59
  wav, rate = TTSHubInterface.get_prediction(task, model, generator, sample)
60
-
61
- return wav, rate
62
-
63
- def process_pdf(uploaded_file, hf_model_name):
64
- if uploaded_file.name.lower().endswith('.pdf'):
65
- abstract = extract_and_clean_abstract(uploaded_file)
66
- summary = summarize_text(hf_model_name, abstract)
67
- wav, rate = text_to_speech(summary)
68
- sf.write('/tmp/speech_output.wav', wav, rate)
69
- return '/tmp/speech_output.wav'
70
- else:
71
- return "Error: Please upload a PDF file."
72
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
73
  iface = gr.Interface(
74
  fn=process_pdf,
75
- inputs=[
76
- gr.File(label="Upload PDF"),
77
- gr.Textbox(label="Hugging Face Model Name for Summarization")
78
- ],
79
  outputs=gr.Audio(label="Audio Summary"),
80
  title="PDF Abstract to Speech",
81
- description="Extracts and summarizes the abstract from a PDF file and converts it to speech."
82
  )
83
 
84
- if __name__ == "__main__":
85
- iface.launch()
86
-
 
 
 
1
  # Here are the imports
2
  import PyPDF2
3
  import re
4
  import torch
5
  from transformers import pipeline
 
6
  from fairseq.checkpoint_utils import load_model_ensemble_and_task_from_hf_hub
7
  from fairseq.models.text_to_speech.hub_interface import TTSHubInterface
8
  import gradio as gr
9
+ import io
10
+ import numpy as np
11
+ import soundfile as sf
12
+ import tempfile
13
 
14
  # Here is the code
15
 
16
+ # Function to extract and clean abstract from PDF
17
  def extract_and_clean_abstract(uploaded_file):
18
+ if uploaded_file is None:
19
+ return "No file uploaded."
20
+
21
+ # Read the file using its temporary file path
22
+ with open(uploaded_file.name, 'rb') as file:
23
+ reader = PyPDF2.PdfReader(file)
24
+ full_text = ""
25
+ for page in reader.pages:
26
+ full_text += page.extract_text()
27
 
28
+ # Find the abstract
29
  pattern = r"(Abstract|ABSTRACT|abstract)(.*?)(Introduction|INTRODUCTION|introduction|1|Keywords|KEYWORDS|keywords)"
30
+ match = re.search(pattern, full_text, re.DOTALL)
31
 
32
  if match:
33
  abstract = match.group(2).strip()
34
  else:
35
+ return "Abstract not found."
36
 
37
+ # Clean the abstract
38
  cleaned_abstract = abstract.replace('\n', ' ').replace('- ', '')
39
 
40
  return cleaned_abstract
41
 
42
+ # Function to summarize text
43
+ def summarize_text(text):
44
+ # Initialize the summarization pipeline with the summarization model
45
+ summarizer = pipeline(
46
+ "summarization",
47
+ "pszemraj/led-base-book-summary",
48
+ device=0 if torch.cuda.is_available() else -1,
49
+ )
50
 
51
+ # Generate the summary
52
+ result = summarizer(
53
+ text,
54
+ min_length=8,
55
+ max_length=25,
56
+ no_repeat_ngram_size=3,
57
+ encoder_no_repeat_ngram_size=3,
58
+ repetition_penalty=3.5,
59
+ num_beams=4,
60
+ do_sample=False,
61
+ early_stopping=True,
62
+ )
63
+ # Extract the first sentence from the summary
64
+ first_sentence = re.split(r'(?<=[.:;!?])\s', result[0]['summary_text'])[0]
65
+
66
+ return first_sentence
67
+
68
+ # Function for text-to-speech
69
  def text_to_speech(text):
70
+ # Check if CUDA is available and set the device accordingly
71
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
72
+
73
+ # Load the TTS model and task from Hugging Face Hub
74
  models, cfg, task = load_model_ensemble_and_task_from_hf_hub(
75
+ "facebook/fastspeech2-en-ljspeech", # Or another TTS model of your choice
76
  arg_overrides={"vocoder": "hifigan", "fp16": False}
77
  )
78
+
79
+ # Ensure the model is on the correct device
80
+ model = models[0].to(device)
81
+
82
+ # Update the config with the data config from the task
83
  TTSHubInterface.update_cfg_with_data_cfg(cfg, task.data_cfg)
84
+
85
+ # Build the generator
86
  generator = task.build_generator([model], cfg)
87
+
88
+ # Get the model input from the text
89
  sample = TTSHubInterface.get_model_input(task, text)
90
+ sample["net_input"]["src_tokens"] = sample["net_input"]["src_tokens"].to(device)
91
+ sample["net_input"]["src_lengths"] = sample["net_input"]["src_lengths"].to(device)
92
+
93
+ # Generate the waveform
94
  wav, rate = TTSHubInterface.get_prediction(task, model, generator, sample)
 
 
 
 
 
 
 
 
 
 
 
 
95
 
96
+ # Move the waveform to CPU if it's on GPU
97
+ if wav.is_cuda:
98
+ wav = wav.cpu()
99
+
100
+ # Write the waveform to a temporary file and return the file path
101
+ with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp_file:
102
+ sf.write(tmp_file.name, wav.numpy(), rate)
103
+ return tmp_file.name
104
+
105
+ def process_pdf(uploaded_file):
106
+ """
107
+ Process the uploaded PDF file to extract, summarize the abstract, and convert it to speech.
108
+ """
109
+ abstract = extract_and_clean_abstract(uploaded_file)
110
+ summary = summarize_text(abstract)
111
+ audio_output = text_to_speech(summary)
112
+ return audio_output
113
+
114
+ # Create Gradio interface
115
  iface = gr.Interface(
116
  fn=process_pdf,
117
+ inputs=gr.File(label="Upload PDF"),
 
 
 
118
  outputs=gr.Audio(label="Audio Summary"),
119
  title="PDF Abstract to Speech",
120
+ description="Upload a PDF file to extract its abstract, summarize it, and convert the summary to speech."
121
  )
122
 
123
+ # Run the Gradio app
124
+ iface.launch()