helloWorld199 commited on
Commit
b69ba0c
1 Parent(s): d5095bd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +47 -12
app.py CHANGED
@@ -47,6 +47,22 @@ CACHE_EXAMPLES = os.getenv('CACHE_EXAMPLES', '1') == '1'
47
 
48
  base_dir = "/tmp/gradio/"
49
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
  def analyze(path):
51
  #Measure time for inference
52
  start = time.time()
@@ -64,6 +80,8 @@ def analyze(path):
64
  for file_path in files:
65
  json_structure_output = os.path.join(root, file_path)
66
  print(json_structure_output)
 
 
67
 
68
  fig = allin1.visualize(
69
  result,
@@ -107,9 +125,14 @@ def analyze(path):
107
 
108
  def add_voice_label(json_file, audio_path):
109
  # Load the JSON file
110
- file_path = 'path_to_your_json_file.json'
111
- with open(file_path, 'r') as f:
112
  data = json.load(f)
 
 
 
 
 
 
113
 
114
  # Access the segments
115
  segments = data['segments']
@@ -118,18 +141,30 @@ def add_voice_label(json_file, audio_path):
118
  for segment in segments:
119
  start = segment['start']
120
  end = segment['end']
121
-
122
- audio_segment = get_audio_segment()
123
 
 
 
124
 
125
-
126
-
127
- # Add the "voice" label to each segment. It contains either Yes or No.
128
- for segment in segments:
129
- segment['voice'] = contains_voice(segment)
130
-
131
-
132
- def get_audio_segment(audio_path, ):
 
 
 
 
 
 
 
 
 
 
 
 
133
 
134
 
135
 
 
47
 
48
  base_dir = "/tmp/gradio/"
49
 
50
+ # Defining sample rate for voice activity detection (must use multiple of 8k)
51
+ SAMPLING_RATE = 32000
52
+ torch.set_num_threads(1)
53
+
54
+ # Import of models to do voice detection
55
+ model, utils = torch.hub.load(repo_or_dir='snakers4/silero-vad',
56
+ model='silero_vad',
57
+ force_reload=True,
58
+ onnx=USE_ONNX)
59
+
60
+ (get_speech_timestamps,
61
+ save_audio,
62
+ read_audio,
63
+ VADIterator,
64
+ collect_chunks) = utils
65
+
66
  def analyze(path):
67
  #Measure time for inference
68
  start = time.time()
 
80
  for file_path in files:
81
  json_structure_output = os.path.join(root, file_path)
82
  print(json_structure_output)
83
+
84
+ add_voice_label(json_structure_output, path)
85
 
86
  fig = allin1.visualize(
87
  result,
 
125
 
126
  def add_voice_label(json_file, audio_path):
127
  # Load the JSON file
128
+ with open(json_file, 'r') as f:
 
129
  data = json.load(f)
130
+
131
+ # Create VAD object
132
+ vad_iterator = VADIterator(model)
133
+
134
+ # Read input audio file
135
+ wav = read_audio(audio_path, sampling_rate=SAMPLING_RATE)
136
 
137
  # Access the segments
138
  segments = data['segments']
 
141
  for segment in segments:
142
  start = segment['start']
143
  end = segment['end']
 
 
144
 
145
+ start_sample = int(start*SAMPLING_RATE)
146
+ end_sample = int(end*SAMPLING_RATE)
147
 
148
+ speech_probs = []
149
+ window_size_samples = 1536
150
+ for i in range(0, len(wav), window_size_samples):
151
+ chunk = wav[i: i+ window_size_samples]
152
+ if len(chunk) < window_size_samples:
153
+ break
154
+ speech_prob = model(chunk, SAMPLING_RATE).item()
155
+ speech_probs.append(speech_prob)
156
+ vad_iterator.reset_states() # reset model states after each audio
157
+
158
+ mean_probability = np.mean(speech_probs)
159
+ print(mean_probability)
160
+
161
+ if mean_probability >= 0.7 :
162
+ segment['voice'] = "Yes"
163
+ else:
164
+ segment['voice'] = "No"
165
+
166
+ with open(json_file, 'w') as f:
167
+ json.dump(data, f, indent=4)
168
 
169
 
170