Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -35,9 +35,8 @@ iface = gr.Interface(
|
|
35 |
theme="default",
|
36 |
layout="vertical",
|
37 |
live=False
|
38 |
-
|
39 |
-
|
40 |
-
import torch
|
41 |
from transformers import AutoProcessor, AutoModelForSpeechSeq2Seq, AutoTokenizer, AutoModelForSeq2SeqLM
|
42 |
from pydub import AudioSegment
|
43 |
import soundfile as sf
|
@@ -49,23 +48,21 @@ import time
|
|
49 |
|
50 |
nltk.download('punkt')
|
51 |
|
52 |
-
|
53 |
-
|
54 |
-
# Load processor and model for transcription
|
55 |
processor = AutoProcessor.from_pretrained("NbAiLabBeta/nb-whisper-large-semantic")
|
56 |
transcription_model = AutoModelForSpeechSeq2Seq.from_pretrained("NbAiLabBeta/nb-whisper-large-semantic")
|
57 |
|
58 |
-
#
|
59 |
summarization_tokenizer = AutoTokenizer.from_pretrained("NbAiLab/norbert-summarization")
|
60 |
summarization_model = AutoModelForSeq2SeqLM.from_pretrained("NbAiLab/norbert-summarization")
|
61 |
|
62 |
-
#
|
63 |
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
64 |
torch_dtype = torch.float32
|
65 |
|
66 |
-
#
|
67 |
transcription_model.to(device)
|
68 |
-
summarization_model.to(device)
|
69 |
|
70 |
def convert_to_wav(audio_file):
|
71 |
audio = AudioSegment.from_file(audio_file, format="m4a")
|
@@ -116,7 +113,7 @@ def transcribe_audio(audio_file, batch_size=4):
|
|
116 |
summary = summarization_tokenizer.decode(summary_ids[0], skip_special_tokens=True)
|
117 |
return summary
|
118 |
|
119 |
-
# HTML for
|
120 |
banner_html = """
|
121 |
<div style="text-align: center;">
|
122 |
<img src="https://huggingface.co/spaces/camparchimedes/ola_s-audioshop/raw/main/Olas%20AudioSwitch%20Shop.png" alt="Banner" width="87%" height="auto">
|
@@ -126,7 +123,7 @@ banner_html = """
|
|
126 |
</div>
|
127 |
"""
|
128 |
|
129 |
-
#
|
130 |
iface = gr.Blocks()
|
131 |
|
132 |
with iface:
|
@@ -151,13 +148,13 @@ def save_to_pdf(transcription, summary):
|
|
151 |
pdf.add_page()
|
152 |
pdf.set_font("Arial", size=12)
|
153 |
|
154 |
-
#
|
155 |
pdf.multi_cell(0, 10, "Transcription:\n" + transcription)
|
156 |
|
157 |
-
#
|
158 |
pdf.ln(10)
|
159 |
|
160 |
-
#
|
161 |
pdf.multi_cell(0, 10, "Summary:\n" + summary)
|
162 |
|
163 |
pdf_output_path = "transcription_summary.pdf"
|
@@ -166,11 +163,9 @@ def save_to_pdf(transcription, summary):
|
|
166 |
|
167 |
|
168 |
|
169 |
-
#
|
170 |
iface.launch(share=True, debug=True)
|
171 |
|
172 |
-
# Launch the interface
|
173 |
-
iface.launch(share=True, debug=True)
|
174 |
|
175 |
|
176 |
|
|
|
35 |
theme="default",
|
36 |
layout="vertical",
|
37 |
live=False
|
38 |
+
|
39 |
+
|
|
|
40 |
from transformers import AutoProcessor, AutoModelForSpeechSeq2Seq, AutoTokenizer, AutoModelForSeq2SeqLM
|
41 |
from pydub import AudioSegment
|
42 |
import soundfile as sf
|
|
|
48 |
|
49 |
nltk.download('punkt')
|
50 |
|
51 |
+
# transcription
|
|
|
|
|
52 |
processor = AutoProcessor.from_pretrained("NbAiLabBeta/nb-whisper-large-semantic")
|
53 |
transcription_model = AutoModelForSpeechSeq2Seq.from_pretrained("NbAiLabBeta/nb-whisper-large-semantic")
|
54 |
|
55 |
+
# summarization
|
56 |
summarization_tokenizer = AutoTokenizer.from_pretrained("NbAiLab/norbert-summarization")
|
57 |
summarization_model = AutoModelForSeq2SeqLM.from_pretrained("NbAiLab/norbert-summarization")
|
58 |
|
59 |
+
# setup
|
60 |
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
61 |
torch_dtype = torch.float32
|
62 |
|
63 |
+
# move 'em
|
64 |
transcription_model.to(device)
|
65 |
+
summarization_model.to(device) # PS. model needs to be told to use graph-based summary method (Lexname?)
|
66 |
|
67 |
def convert_to_wav(audio_file):
|
68 |
audio = AudioSegment.from_file(audio_file, format="m4a")
|
|
|
113 |
summary = summarization_tokenizer.decode(summary_ids[0], skip_special_tokens=True)
|
114 |
return summary
|
115 |
|
116 |
+
# HTML syntax for imagery
|
117 |
banner_html = """
|
118 |
<div style="text-align: center;">
|
119 |
<img src="https://huggingface.co/spaces/camparchimedes/ola_s-audioshop/raw/main/Olas%20AudioSwitch%20Shop.png" alt="Banner" width="87%" height="auto">
|
|
|
123 |
</div>
|
124 |
"""
|
125 |
|
126 |
+
# Gradio UI
|
127 |
iface = gr.Blocks()
|
128 |
|
129 |
with iface:
|
|
|
148 |
pdf.add_page()
|
149 |
pdf.set_font("Arial", size=12)
|
150 |
|
151 |
+
# incøude transcription
|
152 |
pdf.multi_cell(0, 10, "Transcription:\n" + transcription)
|
153 |
|
154 |
+
# paragraph space
|
155 |
pdf.ln(10)
|
156 |
|
157 |
+
# include summary
|
158 |
pdf.multi_cell(0, 10, "Summary:\n" + summary)
|
159 |
|
160 |
pdf_output_path = "transcription_summary.pdf"
|
|
|
163 |
|
164 |
|
165 |
|
166 |
+
# run
|
167 |
iface.launch(share=True, debug=True)
|
168 |
|
|
|
|
|
169 |
|
170 |
|
171 |
|