XDHDD commited on
Commit
4e71ba8
1 Parent(s): d5af255

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +32 -2
app.py CHANGED
@@ -24,6 +24,9 @@ from PLCMOS.plc_mos import PLCMOSEstimator
24
  from speechmos import dnsmos
25
  from speechmos import plcmos
26
 
 
 
 
27
 
28
  @st.cache
29
  def load_model():
@@ -272,10 +275,10 @@ if st.button('Сгенерировать потери'):
272
 
273
  PLC_massv2 = [plcmos.run("target.wav", sr=16000)['plcmos'], plcmos.run("lossy.wav", sr=16000)['plcmos'], plcmos.run("enhanced.wav", sr=16000)['plcmos']]
274
 
275
- DNS = [dnsmos.run("target.wav", sr=16000)['ovrl_mos'], dnsmos.run("lossy.wav", sr=16000)['ovrl_mos'], dnsmos.run("enhanced.wav", sr=16000)['ovrl_mos']]
276
 
277
  df_1['PLCMOSv2'] = PLC_massv2
278
- df_1['DNSMOS'] = DNS
279
 
280
 
281
  #df_2 = pd.DataFrame(columns=['DNSMOS', 'PLCMOSv2'])
@@ -290,6 +293,33 @@ if st.button('Сгенерировать потери'):
290
  #df_2.columns = new_columns
291
  #df_merged = df_1.merge(df_2, left_index=True, right_index=True)
292
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
293
  st.dataframe(df_1)
294
 
295
 
 
24
  from speechmos import dnsmos
25
  from speechmos import plcmos
26
 
27
+ import speech_recognition as sr
28
+ from jiwer import wer
29
+
30
 
31
  @st.cache
32
  def load_model():
 
275
 
276
  PLC_massv2 = [plcmos.run("target.wav", sr=16000)['plcmos'], plcmos.run("lossy.wav", sr=16000)['plcmos'], plcmos.run("enhanced.wav", sr=16000)['plcmos']]
277
 
278
+ #DNS = [dnsmos.run("target.wav", sr=16000)['ovrl_mos'], dnsmos.run("lossy.wav", sr=16000)['ovrl_mos'], dnsmos.run("enhanced.wav", sr=16000)['ovrl_mos']]
279
 
280
  df_1['PLCMOSv2'] = PLC_massv2
281
+ #df_1['DNSMOS'] = DNS
282
 
283
 
284
  #df_2 = pd.DataFrame(columns=['DNSMOS', 'PLCMOSv2'])
 
293
  #df_2.columns = new_columns
294
  #df_merged = df_1.merge(df_2, left_index=True, right_index=True)
295
 
296
+
297
+ r = sr.Recognizer ()
298
+
299
+ harvard = sr.AudioFile('target.wav')
300
+ with harvard as source:
301
+ audio = r.record(source)
302
+
303
+ orig = r.recognize_google(audio, language = "ru-RU")
304
+
305
+ harvard = sr.AudioFile('lossy.wav')
306
+ with harvard as source:
307
+ audio = r.record(source)
308
+ lossy = r.recognize_google(audio, language = "ru-RU")
309
+
310
+
311
+ harvard = sr.AudioFile('enhanced.wav')
312
+ with harvard as source:
313
+ audio = r.record(source)
314
+ enhanced = r.recognize_google(audio, language = "ru-RU")
315
+
316
+ error1 = wer(orig, orig)
317
+ error2 = wer(orig, lossy)
318
+ error2 = wer(orig, enhanced)
319
+ WER_mass=[error1, error2, error3]
320
+
321
+ df_1['WER'] = WER_mass
322
+
323
  st.dataframe(df_1)
324
 
325