camparchimedes commited on
Commit
b3ed824
·
verified ·
1 Parent(s): 440d6b7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -18
app.py CHANGED
@@ -35,9 +35,8 @@ iface = gr.Interface(
35
  theme="default",
36
  layout="vertical",
37
  live=False
38
- )import gradio as gr
39
- import warnings
40
- import torch
41
  from transformers import AutoProcessor, AutoModelForSpeechSeq2Seq, AutoTokenizer, AutoModelForSeq2SeqLM
42
  from pydub import AudioSegment
43
  import soundfile as sf
@@ -49,23 +48,21 @@ import time
49
 
50
  nltk.download('punkt')
51
 
52
- warnings.filterwarnings("ignore")
53
-
54
- # Load processor and model for transcription
55
  processor = AutoProcessor.from_pretrained("NbAiLabBeta/nb-whisper-large-semantic")
56
  transcription_model = AutoModelForSpeechSeq2Seq.from_pretrained("NbAiLabBeta/nb-whisper-large-semantic")
57
 
58
- # Load tokenizer and model for summarization
59
  summarization_tokenizer = AutoTokenizer.from_pretrained("NbAiLab/norbert-summarization")
60
  summarization_model = AutoModelForSeq2SeqLM.from_pretrained("NbAiLab/norbert-summarization")
61
 
62
- # Set up the device
63
  device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
64
  torch_dtype = torch.float32
65
 
66
- # Move the models to the device
67
  transcription_model.to(device)
68
- summarization_model.to(device)
69
 
70
  def convert_to_wav(audio_file):
71
  audio = AudioSegment.from_file(audio_file, format="m4a")
@@ -116,7 +113,7 @@ def transcribe_audio(audio_file, batch_size=4):
116
  summary = summarization_tokenizer.decode(summary_ids[0], skip_special_tokens=True)
117
  return summary
118
 
119
- # HTML for banner and additional image
120
  banner_html = """
121
  <div style="text-align: center;">
122
  <img src="https://huggingface.co/spaces/camparchimedes/ola_s-audioshop/raw/main/Olas%20AudioSwitch%20Shop.png" alt="Banner" width="87%" height="auto">
@@ -126,7 +123,7 @@ banner_html = """
126
  </div>
127
  """
128
 
129
- # Create Gradio interface
130
  iface = gr.Blocks()
131
 
132
  with iface:
@@ -151,13 +148,13 @@ def save_to_pdf(transcription, summary):
151
  pdf.add_page()
152
  pdf.set_font("Arial", size=12)
153
 
154
- # Add transcription
155
  pdf.multi_cell(0, 10, "Transcription:\n" + transcription)
156
 
157
- # Add a space between transcription and summary
158
  pdf.ln(10)
159
 
160
- # Add summary
161
  pdf.multi_cell(0, 10, "Summary:\n" + summary)
162
 
163
  pdf_output_path = "transcription_summary.pdf"
@@ -166,11 +163,9 @@ def save_to_pdf(transcription, summary):
166
 
167
 
168
 
169
- # Launch the interface
170
  iface.launch(share=True, debug=True)
171
 
172
- # Launch the interface
173
- iface.launch(share=True, debug=True)
174
 
175
 
176
 
 
35
  theme="default",
36
  layout="vertical",
37
  live=False
38
+
39
+
 
40
  from transformers import AutoProcessor, AutoModelForSpeechSeq2Seq, AutoTokenizer, AutoModelForSeq2SeqLM
41
  from pydub import AudioSegment
42
  import soundfile as sf
 
48
 
49
  nltk.download('punkt')
50
 
51
+ # transcription
 
 
52
  processor = AutoProcessor.from_pretrained("NbAiLabBeta/nb-whisper-large-semantic")
53
  transcription_model = AutoModelForSpeechSeq2Seq.from_pretrained("NbAiLabBeta/nb-whisper-large-semantic")
54
 
55
+ # summarization
56
  summarization_tokenizer = AutoTokenizer.from_pretrained("NbAiLab/norbert-summarization")
57
  summarization_model = AutoModelForSeq2SeqLM.from_pretrained("NbAiLab/norbert-summarization")
58
 
59
+ # setup
60
  device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
61
  torch_dtype = torch.float32
62
 
63
+ # move 'em
64
  transcription_model.to(device)
65
+ summarization_model.to(device) # PS. model needs to be told to use graph-based summary method (Lexname?)
66
 
67
  def convert_to_wav(audio_file):
68
  audio = AudioSegment.from_file(audio_file, format="m4a")
 
113
  summary = summarization_tokenizer.decode(summary_ids[0], skip_special_tokens=True)
114
  return summary
115
 
116
+ # HTML syntax for imagery
117
  banner_html = """
118
  <div style="text-align: center;">
119
  <img src="https://huggingface.co/spaces/camparchimedes/ola_s-audioshop/raw/main/Olas%20AudioSwitch%20Shop.png" alt="Banner" width="87%" height="auto">
 
123
  </div>
124
  """
125
 
126
+ # Gradio UI
127
  iface = gr.Blocks()
128
 
129
  with iface:
 
148
  pdf.add_page()
149
  pdf.set_font("Arial", size=12)
150
 
151
+ # incøude transcription
152
  pdf.multi_cell(0, 10, "Transcription:\n" + transcription)
153
 
154
+ # paragraph space
155
  pdf.ln(10)
156
 
157
+ # include summary
158
  pdf.multi_cell(0, 10, "Summary:\n" + summary)
159
 
160
  pdf_output_path = "transcription_summary.pdf"
 
163
 
164
 
165
 
166
+ # run
167
  iface.launch(share=True, debug=True)
168
 
 
 
169
 
170
 
171