yashsrivastava commited on
Commit
21d4f8b
1 Parent(s): 72f1770

Delete aap.py

Browse files
Files changed (1) hide show
  1. aap.py +0 -63
aap.py DELETED
@@ -1,63 +0,0 @@
1
- #!/usr/bin/env python
2
- # coding: utf-8
3
-
4
- # In[ ]:
5
-
6
-
7
- import soundfile as sf
8
- import torch
9
- from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
10
- import argparse
11
- from glob import glob
12
- import torchaudio
13
- import subprocess
14
- import gradio as gr
15
-
16
- resampler = torchaudio.transforms.Resample(48_000, 16_000)
17
-
18
- def get_filename(wav_file):
19
- filename_local = wav_file.split('/')[-1][:-4]
20
- filename_new = '/tmp/'+filename_local+'_16.wav'
21
-
22
-
23
- subprocess.call(["sox {} -r {} -b 16 -c 1 {}".format(wav_file, str(16000), filename_new)], shell=True)
24
- return filename_new
25
-
26
- def parse_transcription(wav_file):
27
- # load pretrained model
28
- processor = Wav2Vec2Processor.from_pretrained("jonatasgrosman/wav2vec2-large-xlsr-53-english")
29
- model = Wav2Vec2ForCTC.from_pretrained("jonatasgrosman/wav2vec2-large-xlsr-53-english")
30
-
31
- # load audio
32
-
33
-
34
- wav_file = get_filename(wav_file.name)
35
- audio_input, sample_rate = sf.read(wav_file)
36
- #test_file = resampler(test_file[0])
37
-
38
- # pad input values and return pt tensor
39
- input_values = processor(audio_input, sampling_rate=16_000, return_tensors="pt").input_values
40
-
41
- # INFERENCE
42
- # retrieve logits & take argmax
43
- logits = model(input_values).logits
44
- predicted_ids = torch.argmax(logits, dim=-1)
45
-
46
- # transcribe
47
- transcription = processor.decode(predicted_ids[0], skip_special_tokens=True)
48
- return transcription
49
-
50
-
51
- # In[ ]:
52
-
53
-
54
- import gradio as gr
55
- title = "Speech-to-Text-English"
56
- description = "Upload a English audio clip, and let AI do the hard work of transcribing."
57
-
58
- gr.Interface(
59
- parse_transcription,
60
- title=title,
61
- inputs=gr.inputs.Audio(label="Record Audio File", type="file", source = "microphone"),
62
- description=description, outputs = "text").launch(inline = False)
63
-