wietsedv commited on
Commit
e7f4530
1 Parent(s): 39a0f1b

Update neural_acoustic_distance.py

Browse files
Files changed (1) hide show
  1. neural_acoustic_distance.py +24 -24
neural_acoustic_distance.py CHANGED
@@ -112,11 +112,11 @@ def main():
112
 
113
  st.write(
114
  "This tool visualizes pronunciation differences between two recordings of the same word. The two recordings have to be wave files containing a single spoken word. \n\n\
115
- Choose any wav2vec 2.0 compatible model identifier on the [Hugging Face Model Hub](https://huggingface.co/models?filter=wav2vec2) and select the output layer you want to use.\n\n\
116
- To upload your own recordings select 'custom upload' in the audio file selection step. The first recording is put on the x-axis of the plot and the second one will be the reference recording for computing distance.\n\
117
- You should already see an example plot of two sample recordings.\n\n\
118
- This visualization tool is part of [neural representations for modeling variation in speech](https://doi.org/10.1016/j.wocn.2022.101137). \n\
119
- Please see our paper for further details.")
120
 
121
  st.subheader("Model selection:")
122
 
@@ -208,28 +208,28 @@ def main():
208
  plt_id = randrange(0, 10)
209
  plt.savefig("./output/plot" + str(plt_id) + ".pdf")
210
  st.pyplot(fig)
211
-
212
- print('7. Plot filled', datetime.now().strftime('%d-%m-%Y %H:%M:%S')) # test
213
-
214
- if os.path.isfile("./output/plot.pdf"):
215
- st.caption(" Visualization of neural acoustic distances\
216
- per frame (based on wav2vec 2.0) with the pronunciation of\
217
- the first filename on the x-axis and distances to the pronunciation\
218
- of second filename on the y-axis. The horizontal line represents\
219
- the global distance value (i.e. the average of all individual frames).\
220
- The blue continuous line represents the moving average distance based on 9 frames,\
221
- corresponding to 180ms. As a result of the moving average, the blue line does not cover the entire duration of\
222
- the sample. Larger bullet sizes indicate that multiple\
223
- frames in the pronunciation on the y-axis are aligned to a single frame in the pronunciation on the x-axis.")
224
-
225
- with open("./output/plot.pdf", "rb") as file:
226
- btn = st.download_button(label="Download plot", data=file, file_name="plot.pdf", mime="image/pdf")
227
-
228
- print('8. End', datetime.now().strftime('%d-%m-%Y %H:%M:%S')) # test
229
- print(f"9. RAM used: {psutil.Process().memory_info().rss / (1024 * 1024):.2f} MB") # test
230
 
231
  main()
232
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
233
  for name in dir():
234
  if not name.startswith('_'):
235
  del globals()[name]
112
 
113
  st.write(
114
  "This tool visualizes pronunciation differences between two recordings of the same word. The two recordings have to be wave files containing a single spoken word. \n\n\
115
+ Choose any wav2vec 2.0 compatible model identifier on the [Hugging Face Model Hub](https://huggingface.co/models?filter=wav2vec2) and select the output layer you want to use.\n\n\
116
+ To upload your own recordings select 'custom upload' in the audio file selection step. The first recording is put on the x-axis of the plot and the second one will be the reference recording for computing distance.\n\
117
+ You should already see an example plot of two sample recordings.\n\n\
118
+ This visualization tool is part of [neural representations for modeling variation in speech](https://doi.org/10.1016/j.wocn.2022.101137). \n\
119
+ Please see our paper for further details.")
120
 
121
  st.subheader("Model selection:")
122
 
208
  plt_id = randrange(0, 10)
209
  plt.savefig("./output/plot" + str(plt_id) + ".pdf")
210
  st.pyplot(fig)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
211
 
212
  main()
213
 
214
+ print('7. Plot filled', datetime.now().strftime('%d-%m-%Y %H:%M:%S')) # test
215
+
216
+ if os.path.isfile("./output/plot.pdf"):
217
+ st.caption(" Visualization of neural acoustic distances\
218
+ per frame (based on wav2vec 2.0) with the pronunciation of\
219
+ the first filename on the x-axis and distances to the pronunciation\
220
+ of second filename on the y-axis. The horizontal line represents\
221
+ the global distance value (i.e. the average of all individual frames).\
222
+ The blue continuous line represents the moving average distance based on 9 frames,\
223
+ corresponding to 180ms. As a result of the moving average, the blue line does not cover the entire duration of\
224
+ the sample. Larger bullet sizes indicate that multiple\
225
+ frames in the pronunciation on the y-axis are aligned to a single frame in the pronunciation on the x-axis.")
226
+
227
+ with open("./output/plot.pdf", "rb") as file:
228
+ btn = st.download_button(label="Download plot", data=file, file_name="plot.pdf", mime="image/pdf")
229
+
230
+ print('8. End', datetime.now().strftime('%d-%m-%Y %H:%M:%S')) # test
231
+ print(f"9. RAM used: {psutil.Process().memory_info().rss / (1024 * 1024):.2f} MB") # test
232
+
233
  for name in dir():
234
  if not name.startswith('_'):
235
  del globals()[name]