Spaces:
Build error
Build error
ahmedghani
commited on
Commit
•
39e4af1
1
Parent(s):
c65d563
added clean whisper asr implementation
Browse files- app.py +28 -24
- requirements.txt +1 -5
app.py
CHANGED
@@ -4,7 +4,8 @@ from scipy.io.wavfile import write
|
|
4 |
import gradio as gr
|
5 |
import os
|
6 |
from transformers import AutoProcessor, pipeline
|
7 |
-
from optimum.onnxruntime import ORTModelForSpeechSeq2Seq
|
|
|
8 |
from glob import glob
|
9 |
load_model()
|
10 |
|
@@ -12,27 +13,29 @@ BASE_PATH = os.path.dirname(os.path.abspath(__file__))
|
|
12 |
os.makedirs('input', exist_ok=True)
|
13 |
os.makedirs('separated', exist_ok=True)
|
14 |
|
15 |
-
print("Loading ASR model...")
|
16 |
-
processor = AutoProcessor.from_pretrained("openai/whisper-small")
|
17 |
-
if not os.path.exists("whisper_checkpoint"):
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
else:
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
print("Whisper ASR model loaded.")
|
|
|
|
|
36 |
|
37 |
def separator(audio, rec_audio, example):
|
38 |
outputs= {}
|
@@ -51,8 +54,9 @@ def separator(audio, rec_audio, example):
|
|
51 |
separated_files = [f for f in separated_files if "original.wav" not in f]
|
52 |
outputs['transcripts'] = []
|
53 |
for file in sorted(separated_files):
|
54 |
-
separated_audio = sio.wavfile.read(file)
|
55 |
-
outputs['transcripts'].append(speech_recognition_pipeline(separated_audio[1])['text'])
|
|
|
56 |
return sorted(separated_files) + outputs['transcripts']
|
57 |
|
58 |
def set_example_audio(example: list) -> dict:
|
|
|
4 |
import gradio as gr
|
5 |
import os
|
6 |
from transformers import AutoProcessor, pipeline
|
7 |
+
# from optimum.onnxruntime import ORTModelForSpeechSeq2Seq
|
8 |
+
import whisper
|
9 |
from glob import glob
|
10 |
load_model()
|
11 |
|
|
|
13 |
os.makedirs('input', exist_ok=True)
|
14 |
os.makedirs('separated', exist_ok=True)
|
15 |
|
16 |
+
# print("Loading ASR model...")
|
17 |
+
# processor = AutoProcessor.from_pretrained("openai/whisper-small")
|
18 |
+
# if not os.path.exists("whisper_checkpoint"):
|
19 |
+
# model = ORTModelForSpeechSeq2Seq.from_pretrained("openai/whisper-small", from_transformers=True)
|
20 |
+
# speech_recognition_pipeline = pipeline(
|
21 |
+
# "automatic-speech-recognition",
|
22 |
+
# model=model,
|
23 |
+
# feature_extractor=processor.feature_extractor,
|
24 |
+
# tokenizer=processor.tokenizer,
|
25 |
+
# )
|
26 |
+
# os.makedirs('whisper_checkpoint', exist_ok=True)
|
27 |
+
# model.save_pretrained("whisper_checkpoint")
|
28 |
+
# else:
|
29 |
+
# model = ORTModelForSpeechSeq2Seq.from_pretrained("whisper_checkpoint", from_transformers=False)
|
30 |
+
# speech_recognition_pipeline = pipeline(
|
31 |
+
# "automatic-speech-recognition",
|
32 |
+
# model=model,
|
33 |
+
# feature_extractor=processor.feature_extractor,
|
34 |
+
# tokenizer=processor.tokenizer,
|
35 |
+
# )
|
36 |
+
# print("Whisper ASR model loaded.")
|
37 |
+
|
38 |
+
model = whisper.load_model("base")
|
39 |
|
40 |
def separator(audio, rec_audio, example):
|
41 |
outputs= {}
|
|
|
54 |
separated_files = [f for f in separated_files if "original.wav" not in f]
|
55 |
outputs['transcripts'] = []
|
56 |
for file in sorted(separated_files):
|
57 |
+
# separated_audio = sio.wavfile.read(file)
|
58 |
+
# outputs['transcripts'].append(speech_recognition_pipeline(separated_audio[1])['text'])
|
59 |
+
outputs['transcripts'].append(whisper.transcribe(file)["text"])
|
60 |
return sorted(separated_files) + outputs['transcripts']
|
61 |
|
62 |
def set_example_audio(example: list) -> dict:
|
requirements.txt
CHANGED
@@ -6,13 +6,9 @@ pystoi==0.3.3
|
|
6 |
librosa==0.7.1
|
7 |
numba==0.48
|
8 |
numpy
|
9 |
-
flask
|
10 |
-
flask-cors
|
11 |
-
uvicorn[standard]
|
12 |
asgiref
|
13 |
gradio
|
14 |
-
transformers==4.24.0
|
15 |
torch
|
16 |
torchvision
|
17 |
torchaudio
|
18 |
-
|
|
|
6 |
librosa==0.7.1
|
7 |
numba==0.48
|
8 |
numpy
|
|
|
|
|
|
|
9 |
asgiref
|
10 |
gradio
|
|
|
11 |
torch
|
12 |
torchvision
|
13 |
torchaudio
|
14 |
+
whisper
|