clr commited on
Commit
56f1ec9
·
1 Parent(s): 149c35c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -5
app.py CHANGED
@@ -1,11 +1,18 @@
1
  import gradio as gr
 
 
2
  from datasets import load_dataset
 
3
 
4
 
5
  MODEL_NAME="carlosdanielhernandezmena/wav2vec2-large-xlsr-53-icelandic-ep10-1000h"
6
  model = Wav2Vec2ForCTC.from_pretrained(MODEL_NAME).to(device)
7
  processor = Wav2Vec2Processor.from_pretrained(MODEL_NAME) # do i need this? can't remember
8
 
 
 
 
 
9
  #def greet(name):
10
  # return "Hello " + name + "!!"
11
  #iface = gr.Interface(fn=greet, inputs="text", outputs="text")
@@ -15,17 +22,28 @@ processor = Wav2Vec2Processor.from_pretrained(MODEL_NAME) # do i need this? can'
15
 
16
  #ds = load_dataset("language-and-voice-lab/samromur_asr",split='train',streaming=True)
17
  #ds = load_dataset("language-and-voice-lab/samromur_asr",split='test')
18
-
19
 
20
  def show_ex(exnum):
21
  #return(ds['audio_id'][exnum])
22
  return(exnum)
 
23
 
24
- def recc(ul):
 
 
 
 
 
 
 
 
 
 
 
 
 
25
 
26
- return(ul,api(ul))
27
- #wait_for_model set true??
28
- #anyway in a minute it timed out....
29
 
30
  bl = gr.Blocks()
31
  with bl:
@@ -35,6 +53,7 @@ with bl:
35
  #text_button.click(show_ex, inputs=text_input, outputs=text_output)
36
 
37
  audio_file = gr.Audio(type="filepath")
 
38
  text_button.click(recc, inputs=audio_file, outputs=text_output)
39
 
40
 
 
1
  import gradio as gr
2
+ import soundfile as sf
3
+ import torch, torchaudio
4
  from datasets import load_dataset
5
+ import matplotlib.pyplot as plt
6
 
7
 
8
  MODEL_NAME="carlosdanielhernandezmena/wav2vec2-large-xlsr-53-icelandic-ep10-1000h"
9
  model = Wav2Vec2ForCTC.from_pretrained(MODEL_NAME).to(device)
10
  processor = Wav2Vec2Processor.from_pretrained(MODEL_NAME) # do i need this? can't remember
11
 
12
+ torch.random.manual_seed(0)
13
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
14
+
15
+
16
  #def greet(name):
17
  # return "Hello " + name + "!!"
18
  #iface = gr.Interface(fn=greet, inputs="text", outputs="text")
 
22
 
23
  #ds = load_dataset("language-and-voice-lab/samromur_asr",split='train',streaming=True)
24
  #ds = load_dataset("language-and-voice-lab/samromur_asr",split='test')
25
+ #ds = ds.cast_column("audio", Audio(sampling_rate=16_000))
26
 
27
  def show_ex(exnum):
28
  #return(ds['audio_id'][exnum])
29
  return(exnum)
30
+
31
 
32
+ def recc(a_f):
33
+ wav, sr = sf.read(a_f, dtype=np.float32)
34
+ if len(wav.shape) == 2:
35
+ wav = wav.mean(1)
36
+ if sr != 16000:
37
+ wlen = int(wav.shape[0] / sr * 16000)
38
+ wav = signal.resample(wav, wlen)
39
+
40
+ with torch.inference_mode():
41
+ wav = torch.from_numpy(wav).unsqueeze(0)
42
+ if torch.cuda.is_available():
43
+ wav = wav.cuda()
44
+ input_values = processor(wav).input_values
45
+ return input_values
46
 
 
 
 
47
 
48
  bl = gr.Blocks()
49
  with bl:
 
53
  #text_button.click(show_ex, inputs=text_input, outputs=text_output)
54
 
55
  audio_file = gr.Audio(type="filepath")
56
+ #ipt =
57
  text_button.click(recc, inputs=audio_file, outputs=text_output)
58
 
59