ahmedghani commited on
Commit
b3d591c
1 Parent(s): 6c1ce5e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -37
app.py CHANGED
@@ -1,41 +1,25 @@
1
  from svoice.separate import *
2
- import scipy.io as sio
3
- from scipy.io.wavfile import write
4
  import gradio as gr
5
  import os
6
- # from transformers import AutoProcessor, pipeline
7
- # from optimum.onnxruntime import ORTModelForSpeechSeq2Seq
8
- import whisper
9
  from glob import glob
10
  load_model()
11
 
 
12
  BASE_PATH = os.path.dirname(os.path.abspath(__file__))
13
  os.makedirs('input', exist_ok=True)
14
  os.makedirs('separated', exist_ok=True)
15
 
16
- # print("Loading ASR model...")
17
- # processor = AutoProcessor.from_pretrained("openai/whisper-small")
18
- # if not os.path.exists("whisper_checkpoint"):
19
- # model = ORTModelForSpeechSeq2Seq.from_pretrained("openai/whisper-small", from_transformers=True)
20
- # speech_recognition_pipeline = pipeline(
21
- # "automatic-speech-recognition",
22
- # model=model,
23
- # feature_extractor=processor.feature_extractor,
24
- # tokenizer=processor.tokenizer,
25
- # )
26
- # os.makedirs('whisper_checkpoint', exist_ok=True)
27
- # model.save_pretrained("whisper_checkpoint")
28
- # else:
29
- # model = ORTModelForSpeechSeq2Seq.from_pretrained("whisper_checkpoint", from_transformers=False)
30
- # speech_recognition_pipeline = pipeline(
31
- # "automatic-speech-recognition",
32
- # model=model,
33
- # feature_extractor=processor.feature_extractor,
34
- # tokenizer=processor.tokenizer,
35
- # )
36
- # print("Whisper ASR model loaded.")
37
 
38
- model = whisper.load_model("base")
 
 
39
 
40
  def separator(audio, rec_audio, example):
41
  outputs= {}
@@ -44,21 +28,22 @@ def separator(audio, rec_audio, example):
44
  for f in glob('separated/*'):
45
  os.remove(f)
46
  if audio:
47
- write('input/original.wav', audio[0], audio[1])
48
  elif rec_audio:
49
- write('input/original.wav', rec_audio[0], rec_audio[1])
50
  else:
51
  os.system(f'cp {example} input/original.wav')
52
  separate_demo(mix_dir="./input")
53
  separated_files = glob(os.path.join('separated', "*.wav"))
54
- separated_files = [f for f in separated_files if "original.wav" not in f]
55
- outputs['transcripts'] = []
56
- for file in sorted(separated_files):
57
- # separated_audio = sio.wavfile.read(file)
58
- # outputs['transcripts'].append(speech_recognition_pipeline(separated_audio[1])['text'])
59
- outputs['transcripts'].append(whisper.transcribe(file)["text"])
60
- return sorted(separated_files) + outputs['transcripts']
61
 
 
 
 
 
 
 
62
  def set_example_audio(example: list) -> dict:
63
  return gr.Audio.update(value=example[0])
64
 
@@ -108,7 +93,7 @@ with demo:
108
  "samples/mixture2.wav",
109
  "samples/mixture3.wav"
110
  ]
111
- example_selector = gr.inputs.Dropdown(examples, label="Example Audio", default="samples/mixture1.wav")
112
  button.click(separator, inputs=[input_audio, rec_audio, example_selector], outputs=outputs_audio + outputs_text)
113
 
114
  demo.launch()
 
1
  from svoice.separate import *
2
+ import scipy.io.wavfile as wav
 
3
  import gradio as gr
4
  import os
5
+ import torch
6
+ import soundfile as sf
7
+ from transformers import pipeline
8
  from glob import glob
9
  load_model()
10
 
11
+ device = "cuda" if torch.cuda.is_available() else "cpu"
12
  BASE_PATH = os.path.dirname(os.path.abspath(__file__))
13
  os.makedirs('input', exist_ok=True)
14
  os.makedirs('separated', exist_ok=True)
15
 
16
+ print("Loading ASR model...")
17
+ pipe = pipeline("automatic-speech-recognition", model="openai/whisper-base", device=0 if device == "cuda" else -1)
18
+ print("ASR model loaded!")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
 
20
+ def transcribe_audio(audiopath):
21
+ audio_input, sr = sf.read(audiopath)
22
+ return pipe(audio_input, sampling_rate=sr, return_tensors=False, padding=True, max_new_tokens=500)['text']
23
 
24
  def separator(audio, rec_audio, example):
25
  outputs= {}
 
28
  for f in glob('separated/*'):
29
  os.remove(f)
30
  if audio:
31
+ wav.write('input/original.wav', audio[0], audio[1])
32
  elif rec_audio:
33
+ wav.write('input/original.wav', rec_audio[0], rec_audio[1])
34
  else:
35
  os.system(f'cp {example} input/original.wav')
36
  separate_demo(mix_dir="./input")
37
  separated_files = glob(os.path.join('separated', "*.wav"))
38
+ separated_files = sorted([f for f in separated_files if "original.wav" not in f])
39
+ outputs["transcripts"] = []
 
 
 
 
 
40
 
41
+ for i, f in enumerate(separated_files):
42
+ print(f"Transcribing separated audio {i+1} ...")
43
+ outputs["transcripts"].append(transcribe_audio(f))
44
+ print("Text:", outputs["transcripts"][-1])
45
+ return separated_files + outputs['transcripts']
46
+
47
  def set_example_audio(example: list) -> dict:
48
  return gr.Audio.update(value=example[0])
49
 
 
93
  "samples/mixture2.wav",
94
  "samples/mixture3.wav"
95
  ]
96
+ example_selector = gr.inputs.Radio(examples, label="Example Audio")
97
  button.click(separator, inputs=[input_audio, rec_audio, example_selector], outputs=outputs_audio + outputs_text)
98
 
99
  demo.launch()