abnerh commited on
Commit
1221d26
1 Parent(s): e9c72a6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +1 -7
app.py CHANGED
@@ -13,10 +13,6 @@ import gradio as gr
13
  model = "facebook/wav2vec2-large-960h-lv60-self"
14
  tokenizer = Wav2Vec2Tokenizer.from_pretrained(model)
15
  asr_model = Wav2Vec2ForCTC.from_pretrained(model)#.to('cuda')
16
- vocab_dict = tokenizer.get_vocab()
17
- sort_vocab = sorted((value, key) for (key,value) in vocab_dict.items())
18
- vocab = ([x[1].replace("|", " ") if x[1] not in tokenizer.all_special_tokens else "_" for x in sort_vocab])
19
-
20
 
21
  # Line count for SRT file
22
  line_count = 0
@@ -27,7 +23,6 @@ def sort_alphanumeric(data):
27
 
28
  return sorted(data, key = alphanum_key)
29
 
30
-
31
  def transcribe_audio(tokenizer, asr_model, audio_file, file_handle):
32
  # Run Wav2Vec2.0 inference on each audio file generated after VAD segmentation.
33
  global line_count
@@ -49,8 +44,7 @@ def transcribe_audio(tokenizer, asr_model, audio_file, file_handle):
49
  if len(infered_text) > 1:
50
  line_count += 1
51
  write_to_file(file_handle, infered_text, line_count, limits)
52
-
53
-
54
  def get_subs(input_file):
55
  # Get directory for audio
56
  base_directory = os.getcwd()
 
13
  model = "facebook/wav2vec2-large-960h-lv60-self"
14
  tokenizer = Wav2Vec2Tokenizer.from_pretrained(model)
15
  asr_model = Wav2Vec2ForCTC.from_pretrained(model)#.to('cuda')
 
 
 
 
16
 
17
  # Line count for SRT file
18
  line_count = 0
 
23
 
24
  return sorted(data, key = alphanum_key)
25
 
 
26
  def transcribe_audio(tokenizer, asr_model, audio_file, file_handle):
27
  # Run Wav2Vec2.0 inference on each audio file generated after VAD segmentation.
28
  global line_count
 
44
  if len(infered_text) > 1:
45
  line_count += 1
46
  write_to_file(file_handle, infered_text, line_count, limits)
47
+
 
48
  def get_subs(input_file):
49
  # Get directory for audio
50
  base_directory = os.getcwd()