Irpan commited on
Commit
18f99c6
·
1 Parent(s): f6cde70
Files changed (1) hide show
  1. util.py +17 -19
util.py CHANGED
@@ -1,10 +1,8 @@
1
  import random
2
  from umsc import UgMultiScriptConverter
3
- import torchaudio
4
  import string
5
  import epitran
6
  from difflib import SequenceMatcher
7
- import numpy as np
8
 
9
  # Lists of Uyghur short and long texts
10
  short_texts = [
@@ -36,23 +34,23 @@ def generate_long_text(script_choice):
36
  return text
37
 
38
  # ASR Utils
39
- def load_and_resample_audio(audio_data, target_rate):
40
- """Load audio and resample based on target sample rate"""
41
- if isinstance(audio_data, tuple):
42
- # microphone
43
- sampling_rate, audio_input = audio_data
44
- audio_input = (audio_input / 32768.0).astype(np.float32)
45
- elif isinstance(audio_data, str):
46
- # file upload
47
- audio_input, sampling_rate = torchaudio.load(audio_data)
48
- else:
49
- return "<<ERROR: Invalid Audio Input Instance: {}>>".format(type(audio_data))
50
- # Resample if needed
51
- if sampling_rate != target_rate:
52
- resampler = torchaudio.transforms.Resample(sampling_rate, target_rate)
53
- audio_input = resampler(audio_input)
54
-
55
- return audio_input, target_rate
56
 
57
  def calculate_pronunciation_accuracy(reference_text, output_text, language_code='uig-Arab'):
58
  """
 
1
  import random
2
  from umsc import UgMultiScriptConverter
 
3
  import string
4
  import epitran
5
  from difflib import SequenceMatcher
 
6
 
7
  # Lists of Uyghur short and long texts
8
  short_texts = [
 
34
  return text
35
 
36
  # ASR Utils
37
+ # def load_and_resample_audio(audio_data, target_rate):
38
+ # """Load audio and resample based on target sample rate"""
39
+ # if isinstance(audio_data, tuple):
40
+ # # microphone
41
+ # sampling_rate, audio_input = audio_data
42
+ # audio_input = (audio_input / 32768.0).astype(np.float32)
43
+ # elif isinstance(audio_data, str):
44
+ # # file upload
45
+ # audio_input, sampling_rate = torchaudio.load(audio_data)
46
+ # else:
47
+ # return "<<ERROR: Invalid Audio Input Instance: {}>>".format(type(audio_data))
48
+ # # Resample if needed
49
+ # if sampling_rate != target_rate:
50
+ # resampler = torchaudio.transforms.Resample(sampling_rate, target_rate)
51
+ # audio_input = resampler(audio_input)
52
+
53
+ # return audio_input, target_rate
54
 
55
  def calculate_pronunciation_accuracy(reference_text, output_text, language_code='uig-Arab'):
56
  """