HoneyTian commited on
Commit
6f86585
·
1 Parent(s): 459dab4
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitignore +2 -1
  2. examples/download_wav/Temp Query 5_20251008-093912.csv +101 -0
  3. examples/download_wav/step_1_download_wav.py +12 -37
  4. examples/download_wav/step_2_to_1ch.py +12 -8
  5. examples/download_wav/step_3_split_two_second_wav.py +14 -7
  6. examples/lstm_badcase_filter/step_1_badcase_filter.py +233 -0
  7. examples/online_model_test/step_1_predict.py +6 -4
  8. examples/online_model_test/step_2_audio_filter.py +6 -2
  9. examples/online_model_test/step_3_make_test.py +2 -2
  10. examples/online_model_test/test.py +84 -0
  11. examples/{vm_sound_classification → sound_classification_by_cnn}/requirements.txt +0 -0
  12. examples/{vm_sound_classification → sound_classification_by_cnn}/run.sh +4 -4
  13. examples/{vm_sound_classification → sound_classification_by_cnn}/run_batch.sh +66 -66
  14. examples/{vm_sound_classification → sound_classification_by_cnn}/step_1_prepare_data.py +0 -0
  15. examples/{vm_sound_classification → sound_classification_by_cnn}/step_2_make_vocabulary.py +0 -0
  16. examples/{vm_sound_classification → sound_classification_by_cnn}/step_3_train_model.py +1 -1
  17. examples/{vm_sound_classification → sound_classification_by_cnn}/step_4_evaluation_model.py +0 -0
  18. examples/{vm_sound_classification → sound_classification_by_cnn}/step_5_export_models.py +0 -0
  19. examples/{vm_sound_classification → sound_classification_by_cnn}/step_6_infer.py +0 -0
  20. examples/{vm_sound_classification → sound_classification_by_cnn}/step_7_test_model.py +0 -0
  21. examples/{vm_sound_classification → sound_classification_by_cnn}/stop.sh +0 -0
  22. examples/{vm_sound_classification → sound_classification_by_cnn}/yaml/conv2d-classifier-2-ch16.yaml +0 -0
  23. examples/{vm_sound_classification → sound_classification_by_cnn}/yaml/conv2d-classifier-2-ch32.yaml +0 -0
  24. examples/{vm_sound_classification → sound_classification_by_cnn}/yaml/conv2d-classifier-2-ch4.yaml +0 -0
  25. examples/{vm_sound_classification → sound_classification_by_cnn}/yaml/conv2d-classifier-2-ch8.yaml +0 -0
  26. examples/{vm_sound_classification → sound_classification_by_cnn}/yaml/conv2d-classifier-3-ch16.yaml +0 -0
  27. examples/{vm_sound_classification → sound_classification_by_cnn}/yaml/conv2d-classifier-3-ch32.yaml +0 -0
  28. examples/{vm_sound_classification → sound_classification_by_cnn}/yaml/conv2d-classifier-3-ch4.yaml +0 -0
  29. examples/{vm_sound_classification → sound_classification_by_cnn}/yaml/conv2d-classifier-3-ch8.yaml +0 -0
  30. examples/{vm_sound_classification → sound_classification_by_cnn}/yaml/conv2d-classifier-4-ch16.yaml +0 -0
  31. examples/{vm_sound_classification → sound_classification_by_cnn}/yaml/conv2d-classifier-4-ch32.yaml +0 -0
  32. examples/{vm_sound_classification → sound_classification_by_cnn}/yaml/conv2d-classifier-4-ch4.yaml +0 -0
  33. examples/{vm_sound_classification → sound_classification_by_cnn}/yaml/conv2d-classifier-4-ch8.yaml +0 -0
  34. examples/{vm_sound_classification → sound_classification_by_cnn}/yaml/conv2d-classifier-8-ch16.yaml +0 -0
  35. examples/{vm_sound_classification → sound_classification_by_cnn}/yaml/conv2d-classifier-8-ch32.yaml +0 -0
  36. examples/{vm_sound_classification → sound_classification_by_cnn}/yaml/conv2d-classifier-8-ch4.yaml +0 -0
  37. examples/{vm_sound_classification → sound_classification_by_cnn}/yaml/conv2d-classifier-8-ch8.yaml +0 -0
  38. examples/{vm_sound_classification8 → sound_classification_by_cnn_union}/requirements.txt +0 -0
  39. examples/{vm_sound_classification8 → sound_classification_by_cnn_union}/run.sh +0 -0
  40. examples/{vm_sound_classification8 → sound_classification_by_cnn_union}/step_1_prepare_data.py +0 -0
  41. examples/{vm_sound_classification8 → sound_classification_by_cnn_union}/step_2_make_vocabulary.py +0 -0
  42. examples/{vm_sound_classification8 → sound_classification_by_cnn_union}/step_3_train_global_model.py +0 -0
  43. examples/{vm_sound_classification8 → sound_classification_by_cnn_union}/step_4_train_country_model.py +0 -0
  44. examples/{vm_sound_classification8 → sound_classification_by_cnn_union}/step_5_train_union.py +0 -0
  45. examples/{vm_sound_classification8 → sound_classification_by_cnn_union}/stop.sh +0 -0
  46. examples/sound_classification_by_lstm/run.sh +197 -0
  47. examples/sound_classification_by_lstm/step_1_prepare_data.py +193 -0
  48. examples/sound_classification_by_lstm/step_2_make_vocabulary.py +50 -0
  49. examples/sound_classification_by_lstm/step_3_train_model.py +367 -0
  50. examples/sound_classification_by_lstm/yaml/lstm_classifier-4-ch64.yaml +27 -0
.gitignore CHANGED
@@ -15,6 +15,7 @@
15
  /trained_models/
16
  /temp/
17
 
 
 
18
  #**/*.wav
19
  **/*.xlsx
20
- **/*.onnx
 
15
  /trained_models/
16
  /temp/
17
 
18
+ **/*.csv
19
+ **/*.onnx
20
  #**/*.wav
21
  **/*.xlsx
 
examples/download_wav/Temp Query 5_20251008-093912.csv ADDED
@@ -0,0 +1,101 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ date,overdue_term,id,case_id,credit_user_id,call_start_timestamp,call_end_timestamp,thirdpart_download_url
2
+ 11/10/2025,M3,201577107,62145483,2.05158E+18,1760156453,1760156464,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251011/21962/6b76d306-b767-44e5-be9a-0a15d1165113.mp3
3
+ 11/10/2025,M3,201552895,61647547,2.04871E+18,1760150223,1760150235,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251011/21962/91eb4d93-aaaf-4a22-b1b5-93f90790f360.mp3
4
+ 11/10/2025,M1,201571248,64869969,1.63814E+18,1760154872,1760154878,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251011/21962/9feab432-a05f-4c12-a7a5-1de81c5c5552.mp3
5
+ 10/10/2025,M5,201481243,57774660,1.86995E+18,1760093720,1760093736,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251010/21962/8ca27707-73e9-41a9-a531-f84011a2d021.mp3
6
+ 11/10/2025,M6,201602065,56556981,1.96434E+18,1760162403,1760162411,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251011/21962/23edb55b-d7d7-496d-92d9-27be9a8d0f06.mp3
7
+ 10/10/2025,M3,201432876,62937736,1.71926E+18,1760081217,1760081223,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251010/21962/d64b9511-1ada-435c-bf1d-7ff8edd194d1.mp3
8
+ 10/10/2025,M2,201418064,63818662,2.06059E+18,1760078017,1760078023,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251010/21962/7a53b0cf-d4a8-496b-8533-578e1b3c8050.mp3
9
+ 11/10/2025,M1,201546922,65604125,1.86304E+18,1760149167,1760149175,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251011/21962/e6adf4e4-269c-4668-955d-7b3dd1c60736.mp3
10
+ 10/10/2025,M3,201430098,61807602,1.85118E+18,1760080774,1760080785,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251010/21962/b2b8e1e5-d92d-424d-9150-7af7506305c4.mp3
11
+ 10/10/2025,M1,201448566,64796208,1.65278E+18,1760085408,1760085415,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251010/21962/09d6248c-111d-4b73-910f-e218049185d8.mp3
12
+ 11/10/2025,M4,201571566,60538522,1.88122E+18,1760154923,1760154930,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251011/21962/944211c2-e492-4889-b655-e508fdd5879d.mp3
13
+ 11/10/2025,M1,201566967,65843234,2.02107E+18,1760154065,1760154073,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251011/21962/bfa8591b-e527-45e9-ae57-7f74f9e7302b.mp3
14
+ 10/10/2025,M2,201447321,64267309,1.56498E+18,1760085020,1760085033,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251010/21962/4bb8e69a-4c6c-4828-857a-ce0e43cc75a1.mp3
15
+ 11/10/2025,M1,201568415,65114574,4883832,1760154398,1760154405,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251011/21962/51e805c9-5cc5-490b-bae2-7d5eab6f343c.mp3
16
+ 11/10/2025,M2,201605984,63943082,1.8374E+18,1760163009,1760163030,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251011/21962/98af45d4-16cd-4eaf-9b91-05c7143a26bc.mp3
17
+ 10/10/2025,M1,201419656,66515322,1.49814E+17,1760078339,1760078345,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251010/21962/91f43558-6567-43d9-a709-ffa3173c81b4.mp3
18
+ 10/10/2025,M2,201427406,63880041,1.56918E+18,1760080267,1760080275,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251010/21962/375fdc20-428e-4658-aeb7-cb9cca17c534.mp3
19
+ 11/10/2025,M1,201575782,64887894,1.73042E+18,1760156066,1760156082,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251011/21962/b2d6b5a2-3d7f-4fc7-abf6-258cd7a7de5f.mp3
20
+ 10/10/2025,M3,201418794,62368390,1.94558E+18,1760078142,1760078156,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251010/21962/ba673b33-8496-4901-8f2f-8083802a5213.mp3
21
+ 10/10/2025,M1,201424572,66395236,1203507,1760079797,1760079804,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251010/21962/a95596c0-8b46-4333-b126-6c4c11ca41fc.mp3
22
+ 11/10/2025,M2,201571228,64248917,1.88019E+18,1760154871,1760154880,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251011/21962/e4eb3384-2d75-4563-b9a7-7bb3256a2aae.mp3
23
+ 11/10/2025,M4,201570642,60447265,1.98507E+18,1760154782,1760154787,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251011/21962/a5440c2e-3fc5-43b2-8ecf-d4bde44f62e8.mp3
24
+ 10/10/2025,M5,201453357,58652419,1.85737E+18,1760086342,1760086350,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251010/21962/68def6c9-130e-4705-b4b6-19d5d8b4b27d.mp3
25
+ 11/10/2025,M6,201573623,57234397,1.97251E+18,1760155303,1760155313,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251011/21962/0ba67a44-c94a-4d68-a6ba-d003cf4b57c8.mp3
26
+ 10/10/2025,M5,201424683,57553385,1.86241E+18,1760079814,1760079831,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251010/21962/50c26933-40a1-45bc-baf0-eb7b4c268ffb.mp3
27
+ 11/10/2025,M1,201570171,66334366,1.8276E+18,1760154717,1760154723,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251011/21962/ecbb5b43-19cf-44e2-8ee4-ea131c706421.mp3
28
+ 10/10/2025,M4,201451276,59840709,2.04014E+18,1760085947,1760085952,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251010/21962/6059e9c2-2f3c-4cef-a536-4796436b9765.mp3
29
+ 10/10/2025,M4,201432508,59867441,1.91396E+18,1760081156,1760081172,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251010/21962/c0814466-4e13-4ef1-b4be-2fbe66eebfd8.mp3
30
+ 11/10/2025,M5,201612109,58418373,1.87946E+18,1760163975,1760163983,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251011/21962/c7c54616-575a-4dc9-9820-fea245211933.mp3
31
+ 10/10/2025,M2,201432653,64650851,1.74295E+18,1760081176,1760081192,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251010/21962/e8cc82f1-ded6-484f-9e6a-b9bf514eda04.mp3
32
+ 11/10/2025,M1,201580231,65755142,1.96545E+18,1760157408,1760157415,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251011/21962/d2ed8a99-d8a4-4c0a-9b30-0a4f97e8db6e.mp3
33
+ 10/10/2025,M3,201430023,61812734,7400607,1760080766,1760080780,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251010/21962/f693b1d5-dc99-43d9-a730-2040ca645f17.mp3
34
+ 10/10/2025,M3,201450322,62009884,2.007E+18,1760085762,1760085768,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251010/21962/4915b87b-1169-41f7-af8c-6509a66dfbe6.mp3
35
+ 10/10/2025,M3,201431281,62172812,2.05076E+18,1760080963,1760080975,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251010/21962/dc9c5331-04bb-4cbf-b789-61b0b811d6b6.mp3
36
+ 10/10/2025,M5,201430080,58314791,1.99801E+18,1760080772,1760080777,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251010/21962/e571b6f4-6471-4311-81ed-cd1af7e55e07.mp3
37
+ 11/10/2025,M4,201538284,59471661,2.03412E+18,1760147632,1760147638,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251011/21962/cfbe5f77-02b3-4a04-8948-9ca4237c3abc.mp3
38
+ 10/10/2025,M1,201447219,65817559,2.02777E+18,1760085001,1760085007,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251010/21962/49dad1dd-d681-410a-87df-649eca036ff0.mp3
39
+ 10/10/2025,M1,201481818,66043196,1.93698E+18,1760093888,1760093894,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251010/21962/93c19c1f-93fa-41ec-8f57-54a606a7f4a4.mp3
40
+ 10/10/2025,M1,201485519,66563695,2.074E+18,1760095020,1760095034,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251010/21962/e8c214bd-6261-42a7-863b-3bcbd82f081e.mp3
41
+ 11/10/2025,M5,201595914,59014301,1.5286E+18,1760161399,1760161415,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251011/21962/095bcf3f-1807-4c71-a997-ce6806b4da99.mp3
42
+ 11/10/2025,M2,201533318,63403949,1.88317E+18,1760146862,1760146871,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251011/21962/8ae1702d-eca8-4e77-b39d-fd119c72499e.mp3
43
+ 11/10/2025,M4,201576553,60295505,1.80115E+18,1760156314,1760156320,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251011/21962/47bad51d-65fb-4795-9211-a8a0434b95ad.mp3
44
+ 10/10/2025,M1,201485741,65280144,2.07517E+18,1760095107,1760095114,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251010/21962/87d8ccb1-f7e9-4377-a1d8-7bf9228e0c3f.mp3
45
+ 10/10/2025,M1,201431349,64854591,1.58546E+18,1760080970,1760080978,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251010/21962/6f983610-0fd3-4da6-be49-1cc50f205618.mp3
46
+ 10/10/2025,M5,201487648,57318618,1.64505E+18,1760095790,1760095797,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251010/21962/254f796e-b955-4a5e-a190-1d3579474645.mp3
47
+ 11/10/2025,M1,201577796,64963614,1.86777E+18,1760156648,1760156657,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251011/21962/b382a477-933e-43b1-8759-a2b4e17e92b2.mp3
48
+ 11/10/2025,M3,201533254,62192769,2.023E+18,1760146812,1760146817,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251011/21962/ac78c5f9-7109-45fc-87da-5e275e0159f4.mp3
49
+ 10/10/2025,M1,201428974,65690748,1.89974E+18,1760080550,1760080557,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251010/21962/b4e53f33-4c55-4b16-b786-6619332f47fc.mp3
50
+ 11/10/2025,M2,201546294,64684883,1.93667E+18,1760149074,1760149090,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251011/21962/337edca5-80fa-4f9d-8820-3fb333b384d5.mp3
51
+ 10/10/2025,M1,201475668,66214001,1.80957E+18,1760092690,1760092695,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251010/21962/21e2b2ec-2475-4692-9b67-2453e262e77b.mp3
52
+ 10/10/2025,M5,201459599,57741938,6633631,1760087612,1760087617,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251010/21962/02bd6e79-efc1-4cfd-bee9-12967c844735.mp3
53
+ 11/10/2025,M3,201551060,61444707,1.94443E+18,1760149891,1760149899,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251011/21962/700c2e32-c5bd-48f3-8085-0617694963dd.mp3
54
+ 10/10/2025,M4,201453055,59345041,1.68365E+18,1760086275,1760086282,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251010/21962/4f1ef95b-3432-41d3-8970-1698504ba010.mp3
55
+ 10/10/2025,M1,201426891,66520128,1728931,1760080190,1760080196,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251010/21962/ec7180ac-2948-49a3-b709-f60b80dfee27.mp3
56
+ 12/10/2025,M1,201704571,66237684,1.77477E+18,1760229906,1760229911,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251012/21962/b70ed0f3-145f-49a6-9dbb-aa695e21d7de.mp3
57
+ 10/10/2025,M1,201457899,65109188,1.58483E+18,1760087330,1760087339,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251010/21962/c5f910f6-6a5e-4bd0-8a64-f2805837558d.mp3
58
+ 11/10/2025,M2,201537965,63920995,2.04332E+18,1760147578,1760147593,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251011/21962/38ff4d8b-4db3-4410-902e-3953699bf4eb.mp3
59
+ 11/10/2025,M1,201568081,66644507,1.92267E+18,1760154313,1760154318,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251011/21962/a801cfcc-40d2-4dde-9994-b709836df856.mp3
60
+ 11/10/2025,M3,201539641,62112487,1.75162E+18,1760147868,1760147875,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251011/21962/56ec2c07-3bf5-47ba-82c6-039bf094d6ca.mp3
61
+ 10/10/2025,M1,201483514,65958944,1.96386E+18,1760094328,1760094335,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251010/21962/edd080b5-2439-4079-a27f-2f0941217825.mp3
62
+ 10/10/2025,M5,201417598,57494166,1.59238E+18,1760077922,1760077928,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251010/21962/65750a23-6edc-479a-bea4-14ba9e43648e.mp3
63
+ 11/10/2025,M1,201528466,65224705,1.75014E+18,1760145272,1760145278,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251011/21962/5b63572f-79d2-4d22-9ded-4fa2e3cd372b.mp3
64
+ 10/10/2025,M5,201453641,58921447,1.92301E+18,1760086392,1760086405,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251010/21962/4a9b93d4-9878-4023-8dd2-40fcf9502a49.mp3
65
+ 11/10/2025,M1,201611955,65789335,1.88401E+18,1760163939,1760163946,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251011/21962/6fb4f49d-fe9c-4a64-8c74-b5649d7e8175.mp3
66
+ 10/10/2025,M1,201459093,66318002,1.61088E+18,1760087527,1760087538,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251010/21962/8a608622-e3d7-4a96-89ac-1300b7653c6f.mp3
67
+ 10/10/2025,M2,201418416,63100145,1.85044E+18,1760078077,1760078083,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251010/21962/f6ab0a88-2055-4b88-9fbd-7af4aef5731f.mp3
68
+ 11/10/2025,M3,201537163,61356706,2.04189E+18,1760147448,1760147453,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251011/21962/f1f6ad6a-7399-4fb2-8073-c83ef7093b6e.mp3
69
+ 10/10/2025,M3,201480897,61752670,1.99088E+18,1760093653,1760093662,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251010/21962/cdf7552c-a3a8-4cfc-a4b1-651f84141090.mp3
70
+ 11/10/2025,M2,201605821,63901708,2.06357E+18,1760162987,1760162993,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251011/21962/70b3816a-2e43-403a-97e0-dd3288596c71.mp3
71
+ 10/10/2025,M3,201457652,61356706,2.04189E+18,1760087292,1760087299,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251010/21962/d5f5598a-b93c-4fb8-8bb1-56acc9bb0033.mp3
72
+ 10/10/2025,M2,201480118,64077815,1.99009E+18,1760093518,1760093526,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251010/21962/8cac232e-2ed4-4506-8bcf-2a8e4bca91b5.mp3
73
+ 11/10/2025,M1,201551882,65617862,1.65598E+18,1760150016,1760150024,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251011/21962/e71c8472-c3f7-486e-a388-1196836899bf.mp3
74
+ 10/10/2025,M5,201417451,58993884,1.18898E+16,1760077893,1760077905,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251010/21962/2965b989-cf94-4dc4-afaa-5fda4ac4bb34.mp3
75
+ 11/10/2025,M5,201547506,58539902,1.92196E+18,1760149254,1760149265,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251011/21962/1cb5b911-69c9-4b5a-ab2f-e4bad61dd0be.mp3
76
+ 11/10/2025,M1,201606566,66579640,1.92316E+18,1760163098,1760163103,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251011/21962/2e5ff340-8197-4069-8533-0d47a221dc57.mp3
77
+ 11/10/2025,M2,201545849,63976411,2.01076E+18,1760148993,1760148999,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251011/21962/e7592b1f-d24d-4fb5-afb9-0d64f23719d5.mp3
78
+ 10/10/2025,M1,201487535,66304049,1.96729E+18,1760095749,1760095754,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251010/21962/03b31201-d6f7-4246-80ef-ec902f93e6bf.mp3
79
+ 10/10/2025,M1,201458971,66590224,2.06508E+18,1760087508,1760087515,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251010/21962/01013ca6-1659-4613-8ae7-5d79372d4464.mp3
80
+ 11/10/2025,M4,201548032,59720355,1.54183E+18,1760149338,1760149343,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251011/21962/61bafa17-b411-4d1a-b9bd-1a76c5087b9e.mp3
81
+ 10/10/2025,M5,201430001,57789932,1.99701E+18,1760080762,1760080768,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251010/21962/aa25ffd1-82a2-4048-938d-0adc335cec41.mp3
82
+ 11/10/2025,M1,201596095,66001014,1.94845E+18,1760161430,1760161436,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251011/21962/76c67081-af66-4a80-b531-25b14a8e0443.mp3
83
+ 11/10/2025,M3,201549933,62873165,1.87383E+18,1760149739,1760149747,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251011/21962/fd0caad7-72fd-4152-a8ed-58253e946567.mp3
84
+ 10/10/2025,M5,201447596,58417708,1.9626E+18,1760085087,1760085092,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251010/21962/0f8f64c4-2587-4501-abb8-23e0e5389635.mp3
85
+ 11/10/2025,M1,201596157,65991243,1.88004E+18,1760161446,1760161452,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251011/21962/8f731992-cbc6-45e1-8203-5628d51a6e45.mp3
86
+ 10/10/2025,M2,201391828,63462209,1.1835E+16,1760067513,1760067535,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251010/21962/2c641568-ebf6-4989-abeb-e2bc404d79e8.mp3
87
+ 11/10/2025,M4,201579066,60241526,1.95443E+18,1760157061,1760157070,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251011/21962/34c79266-7c59-4e26-b936-05d64716fa79.mp3
88
+ 11/10/2025,M3,201539123,61552513,1.65655E+18,1760147755,1760147762,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251011/21962/1dc2d33a-f080-4cb4-ad66-fb50761b500a.mp3
89
+ 11/10/2025,M5,201607636,57899370,1.93241E+18,1760163267,1760163287,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251011/21962/d8e13804-692a-4cf7-b4e2-781384a1d559.mp3
90
+ 10/10/2025,M4,201426209,60181850,1.79511E+18,1760080075,1760080081,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251010/21962/8f81c559-7392-46ff-8ebb-1a6edc41381c.mp3
91
+ 11/10/2025,M1,201535197,66655594,1.66159E+18,1760147133,1760147139,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251011/21962/d793a0e3-bd7b-44f3-a1e2-bc1eb0908c1a.mp3
92
+ 11/10/2025,M4,201613127,61191667,1.78852E+18,1760164234,1760164240,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251011/21962/4e794fb2-01bf-41ff-843f-945b2b1ec9df.mp3
93
+ 10/10/2025,M3,201456582,61353852,1.70556E+18,1760087088,1760087093,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251010/21962/5fe67f28-5f86-43bb-a87f-589b75118d56.mp3
94
+ 11/10/2025,M2,201536974,63373730,2.02636E+18,1760147420,1760147433,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251011/21962/cf2bf8cc-e2af-478a-96e4-686c59a98d4d.mp3
95
+ 11/10/2025,M6,201598303,57270639,1.57805E+18,1760161833,1760161850,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251011/21962/07e2de01-8040-4942-8586-77d6eb38b64f.mp3
96
+ 11/10/2025,M4,201577614,60545450,1.95248E+18,1760156607,1760156635,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251011/21962/53f6d6eb-c22e-4935-9d3c-941969a0241f.mp3
97
+ 10/10/2025,M1,201451460,66451819,1406890,1760085972,1760085978,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251010/21962/43061915-f8e0-405b-9be0-bd3826d0aa69.mp3
98
+ 11/10/2025,M5,201566778,57480954,1.79569E+18,1760154035,1760154041,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251011/21962/f737520a-d62f-4c71-a81c-bfbcbd2887d3.mp3
99
+ 11/10/2025,M5,201579474,58000396,1.17763E+16,1760157205,1760157215,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251011/21962/b8d5a9b1-13cf-4667-8271-1b562de5dd22.mp3
100
+ 11/10/2025,M1,201547564,66391023,987018,1760149263,1760149269,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251011/21962/434cea47-98a9-4f24-b29e-bee36e9f9832.mp3
101
+ 11/10/2025,M5,201538978,57340689,1.62497E+18,1760147736,1760147755,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251011/21962/a74e253e-34cf-42df-9c3c-c8f6a45b994e.mp3
examples/download_wav/step_1_download_wav.py CHANGED
@@ -31,7 +31,7 @@ def get_args():
31
  )
32
  parser.add_argument(
33
  "--output_dir",
34
- default=(project_path / "data/calling/358/wav_2ch").as_posix(),
35
  type=str
36
  )
37
  args = parser.parse_args()
@@ -39,37 +39,7 @@ def get_args():
39
 
40
 
41
  excel_file_str = """
42
- AIAgent-CallLog-20250929100824.xlsx
43
- AIAgent-CallLog-20250929134959.xlsx
44
- AIAgent-CallLog-20250929135030.xlsx
45
- AIAgent-CallLog-20250929135052.xlsx
46
- AIAgent-CallLog-20250929135122.xlsx
47
- AIAgent-CallLog-20250929135134.xlsx
48
- AIAgent-CallLog-20250929135209.xlsx
49
- AIAgent-CallLog-20250929135219.xlsx
50
- AIAgent-CallLog-20250929135247.xlsx
51
- AIAgent-CallLog-20250929135300.xlsx
52
- AIAgent-CallLog-20250929135311.xlsx
53
- AIAgent-CallLog-20250929135335.xlsx
54
- AIAgent-CallLog-20250929135344.xlsx
55
- AIAgent-CallLog-20250929135355.xlsx
56
- AIAgent-CallLog-20250929135443.xlsx
57
- AIAgent-CallLog-20250929135452.xlsx
58
- AIAgent-CallLog-20250929135501.xlsx
59
- AIAgent-CallLog-20250929135537.xlsx
60
- AIAgent-CallLog-20250929135544.xlsx
61
- AIAgent-CallLog-20250929135554.xlsx
62
- AIAgent-CallLog-20250929135630.xlsx
63
- AIAgent-CallLog-20250929135701.xlsx
64
- AIAgent-CallLog-20250929135710.xlsx
65
- AIAgent-CallLog-20250929135716.xlsx
66
- AIAgent-CallLog-20250929135755.xlsx
67
- AIAgent-CallLog-20250929135800.xlsx
68
- AIAgent-CallLog-20250929135809.xlsx
69
- AIAgent-CallLog-20250929135842.xlsx
70
- AIAgent-CallLog-20250929135849.xlsx
71
- AIAgent-CallLog-20250929135858.xlsx
72
- AIAgent-CallLog-20250929135909.xlsx
73
  """
74
 
75
 
@@ -101,11 +71,16 @@ def main():
101
  continue
102
  excel_file = excel_file_dir / name
103
 
104
- df = pd.read_excel(excel_file.as_posix())
 
105
  for i, row in tqdm(df.iterrows()):
106
- call_date = row["Attempt time"]
107
- call_id = row["Call ID"]
108
- record_url = row["Recording file"]
 
 
 
 
109
  if pd.isna(record_url):
110
  continue
111
 
@@ -137,7 +112,7 @@ def main():
137
  if resp.status_code != 200:
138
  raise AssertionError("status_code: {}; text: {}".format(resp.status_code, resp.text))
139
 
140
- filename = output_dir / "{}.wav".format(call_id)
141
  with open(filename.as_posix(), "wb") as f:
142
  f.write(resp.content)
143
 
 
31
  )
32
  parser.add_argument(
33
  "--output_dir",
34
+ default=(project_path / "data/calling/62/wav_2ch").as_posix(),
35
  type=str
36
  )
37
  args = parser.parse_args()
 
39
 
40
 
41
  excel_file_str = """
42
+ Temp Query 5_20251008-093912.csv
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
  """
44
 
45
 
 
71
  continue
72
  excel_file = excel_file_dir / name
73
 
74
+ # df = pd.read_excel(excel_file.as_posix())
75
+ df = pd.read_csv(excel_file.as_posix())
76
  for i, row in tqdm(df.iterrows()):
77
+ call_date = "2025-10-12 00:00:00"
78
+ record_url = row["thirdpart_download_url"]
79
+ call_id = Path(record_url).stem
80
+
81
+ # call_date = row["Attempt time"]
82
+ # call_id = row["Call ID"]
83
+ # record_url = row["Recording file"]
84
  if pd.isna(record_url):
85
  continue
86
 
 
112
  if resp.status_code != 200:
113
  raise AssertionError("status_code: {}; text: {}".format(resp.status_code, resp.text))
114
 
115
+ filename = output_dir / "{}.mp3".format(call_id)
116
  with open(filename.as_posix(), "wb") as f:
117
  f.write(resp.content)
118
 
examples/download_wav/step_2_to_1ch.py CHANGED
@@ -3,8 +3,9 @@
3
  import argparse
4
  import os
5
  from pathlib import Path
6
- import time
7
 
 
 
8
  from scipy.io import wavfile
9
  from tqdm import tqdm
10
 
@@ -16,12 +17,12 @@ def get_args():
16
 
17
  parser.add_argument(
18
  "--audio_dir",
19
- default=(project_path / "data/calling/358/wav_2ch").as_posix(),
20
  type=str
21
  )
22
  parser.add_argument(
23
  "--output_dir",
24
- default=(project_path / "data/calling/358/wav_1ch").as_posix(),
25
  type=str
26
  )
27
  args = parser.parse_args()
@@ -36,13 +37,13 @@ def main():
36
  output_dir.mkdir(parents=True, exist_ok=True)
37
 
38
  finished = set()
39
- for filename in tqdm(list(output_dir.glob("*.wav"))):
40
  splits = filename.stem.split("_")
41
  call_id = splits[3]
42
  finished.add(call_id)
43
  print(f"finished count: {len(finished)}")
44
 
45
- for filename in tqdm(list(audio_dir.glob("*.wav"))):
46
  call_id = filename.stem
47
 
48
  if call_id in finished:
@@ -51,16 +52,19 @@ def main():
51
  finished.add(call_id)
52
 
53
  try:
54
- sample_rate, signal = wavfile.read(filename.as_posix())
 
 
55
  except UnboundLocalError as error:
56
  print(f"wavfile read failed. error type: {type(error)}, text: {str(error)}, filename: {filename.as_posix()}")
57
  raise error
58
  if sample_rate != 8000:
59
  raise AssertionError
60
 
61
- signal = signal[:, 0]
 
62
 
63
- to_filename = output_dir / f"active_media_r_{call_id}_fi-FI_none.wav"
64
  try:
65
  wavfile.write(
66
  to_filename.as_posix(),
 
3
  import argparse
4
  import os
5
  from pathlib import Path
 
6
 
7
+ import librosa
8
+ import numpy as np
9
  from scipy.io import wavfile
10
  from tqdm import tqdm
11
 
 
17
 
18
  parser.add_argument(
19
  "--audio_dir",
20
+ default=(project_path / "data/calling/62/wav_2ch").as_posix(),
21
  type=str
22
  )
23
  parser.add_argument(
24
  "--output_dir",
25
+ default=(project_path / "data/calling/62/wav_1ch").as_posix(),
26
  type=str
27
  )
28
  args = parser.parse_args()
 
37
  output_dir.mkdir(parents=True, exist_ok=True)
38
 
39
  finished = set()
40
+ for filename in tqdm(list(output_dir.glob("*.mp3"))):
41
  splits = filename.stem.split("_")
42
  call_id = splits[3]
43
  finished.add(call_id)
44
  print(f"finished count: {len(finished)}")
45
 
46
+ for filename in tqdm(list(audio_dir.glob("*.mp3"))):
47
  call_id = filename.stem
48
 
49
  if call_id in finished:
 
52
  finished.add(call_id)
53
 
54
  try:
55
+ # sample_rate, signal = wavfile.read(filename.as_posix())
56
+ signal, sample_rate = librosa.load(filename.as_posix(), sr=8000, mono=False)
57
+ signal = np.array(signal * (1 << 15), dtype=np.int16)
58
  except UnboundLocalError as error:
59
  print(f"wavfile read failed. error type: {type(error)}, text: {str(error)}, filename: {filename.as_posix()}")
60
  raise error
61
  if sample_rate != 8000:
62
  raise AssertionError
63
 
64
+ # signal = signal[:, 0]
65
+ signal = signal[0, :]
66
 
67
+ to_filename = output_dir / f"active_media_r_{call_id}_id-ID_none.wav"
68
  try:
69
  wavfile.write(
70
  to_filename.as_posix(),
examples/download_wav/step_3_split_two_second_wav.py CHANGED
@@ -15,17 +15,21 @@ def get_args():
15
 
16
  parser.add_argument(
17
  "--audio_dir",
18
- default=(project_path / "data/calling/358/wav_2ch").as_posix(),
 
 
 
19
  type=str
20
  )
21
  parser.add_argument(
22
  "--output_dir",
23
- default=(project_path / "data/calling/358/wav_segmented").as_posix(),
 
24
  type=str
25
  )
26
  parser.add_argument(
27
  "--first_n_seconds",
28
- default=8,
29
  type=int
30
  )
31
  args = parser.parse_args()
@@ -40,12 +44,16 @@ def main():
40
  output_dir.mkdir(parents=True, exist_ok=True)
41
 
42
  for filename in tqdm(list(audio_dir.glob("*.wav"))):
43
- call_id = filename.stem
 
 
 
 
44
  sample_rate, signal = wavfile.read(filename.as_posix())
45
  if sample_rate != 8000:
46
  raise AssertionError
47
 
48
- signal = signal[:, 0]
49
  signal_length = len(signal) - sample_rate * 2
50
  if signal_length <= 0:
51
  continue
@@ -56,8 +64,7 @@ def main():
56
  end = begin + sample_rate * 2
57
  sub_signal = signal[begin: end]
58
 
59
- ts = int(time.time() * 1000)
60
- to_filename = output_dir / "{}_fi-FI_none_{}.wav".format(call_id, ts)
61
  wavfile.write(
62
  to_filename.as_posix(),
63
  sample_rate,
 
15
 
16
  parser.add_argument(
17
  "--audio_dir",
18
+ # default=(project_path / "data/calling/66/wav_1ch").as_posix(),
19
+ # default=(project_path / "data/calling/358/wav_1ch/finished/voicemail_annotation").as_posix(),
20
+ # default=(project_path / "data/calling/358/wav_1ch/finished/voicemail_annotation").as_posix(),
21
+ default=r"D:\Users\tianx\HuggingSpaces\template_match_asr\data\wav\early_media\52\music",
22
  type=str
23
  )
24
  parser.add_argument(
25
  "--output_dir",
26
+ # default=(project_path / "data/calling/358/wav_segmented").as_posix(),
27
+ default=r"D:\Users\tianx\HuggingSpaces\template_match_asr\data\wav\early_media\52\music\wav_segmented",
28
  type=str
29
  )
30
  parser.add_argument(
31
  "--first_n_seconds",
32
+ default=1000,
33
  type=int
34
  )
35
  args = parser.parse_args()
 
44
  output_dir.mkdir(parents=True, exist_ok=True)
45
 
46
  for filename in tqdm(list(audio_dir.glob("*.wav"))):
47
+ splits = filename.stem.split("_")
48
+ call_id = splits[3]
49
+ language = splits[4]
50
+ scene_id = splits[5]
51
+
52
  sample_rate, signal = wavfile.read(filename.as_posix())
53
  if sample_rate != 8000:
54
  raise AssertionError
55
 
56
+ # signal = signal[:, 0]
57
  signal_length = len(signal) - sample_rate * 2
58
  if signal_length <= 0:
59
  continue
 
64
  end = begin + sample_rate * 2
65
  sub_signal = signal[begin: end]
66
 
67
+ to_filename = output_dir / f"active_media_r_{call_id}_{language}_{scene_id}_{begin}.wav"
 
68
  wavfile.write(
69
  to_filename.as_posix(),
70
  sample_rate,
examples/lstm_badcase_filter/step_1_badcase_filter.py ADDED
@@ -0,0 +1,233 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/python3
2
+ # -*- coding: utf-8 -*-
3
+ import argparse
4
+ from pathlib import Path
5
+
6
+ from tqdm import tqdm
7
+
8
+ from gradio_client import Client, handle_file
9
+ import librosa
10
+ import numpy as np
11
+ import onnxruntime as ort
12
+ from scipy.io import wavfile
13
+ import torch
14
+ import torchaudio
15
+ import shutil
16
+
17
+ from project_settings import project_path
18
+
19
+
20
+ def get_args():
21
+ parser = argparse.ArgumentParser()
22
+ parser.add_argument(
23
+ "--audio_dir",
24
+ # default=r"D:\Users\tianx\HuggingDatasets\international_voice\data\sea-idn\audio_lib_hkg_1\audio_lib_hkg_1\pt-BR2",
25
+ # default=r"D:\Users\tianx\HuggingDatasets\international_voice\data\sea-idn\audio_lib_hkg_1\audio_lib_hkg_1\pt-BR",
26
+ default=r"D:\Users\tianx\HuggingDatasets\calling_analysis\data\pt-BR\bell_and_di_then_mute",
27
+ type=str,
28
+ )
29
+ parser.add_argument(
30
+ "--onnx_model_file",
31
+ # default=(project_path / "examples/online_model_test/models/pt-BR.onnx").as_posix(),
32
+ default="../online_model_test/models/pt-BR.onnx",
33
+ type=str
34
+ )
35
+ parser.add_argument(
36
+ "--output_dir",
37
+ default=(project_path / "data/badcase").as_posix(),
38
+ type=str,
39
+ )
40
+ args = parser.parse_args()
41
+ return args
42
+
43
+
44
+ class OnlineModelConfig(object):
45
+ def __init__(self,
46
+ sample_rate: int = 8000,
47
+ n_fft: int = 1024,
48
+ hop_size: int = 512,
49
+ n_mels: int = 80,
50
+ f_min: float = 10.0,
51
+ f_max: float = 3800.0,
52
+ ):
53
+ self.sample_rate = sample_rate
54
+ self.n_fft = n_fft
55
+ self.hop_size = hop_size
56
+ self.n_mels = n_mels
57
+ self.f_min = f_min
58
+ self.f_max = f_max
59
+
60
+
61
+ class OnlineModelInference(object):
62
+ def __init__(self,
63
+ model_path: str,
64
+ ):
65
+ self.model_path = model_path
66
+
67
+ providers = [
68
+ "CUDAExecutionProvider", "CPUExecutionProvider"
69
+ ] if torch.cuda.is_available() else [
70
+ "CPUExecutionProvider"
71
+ ]
72
+ self.session = ort.InferenceSession(self.model_path, providers=providers)
73
+
74
+ self.config = OnlineModelConfig()
75
+
76
+ self.mel_transform = torchaudio.transforms.MelSpectrogram(
77
+ sample_rate=self.config.sample_rate,
78
+ n_fft=self.config.n_fft,
79
+ hop_length=self.config.hop_size,
80
+ n_mels=self.config.n_mels,
81
+ f_min=self.config.f_min,
82
+ f_max=self.config.f_max,
83
+ window_fn=torch.hamming_window
84
+ )
85
+
86
+ def predict_by_ndarray(self,
87
+ sub_signal: np.ndarray,
88
+ h: np.ndarray = None,
89
+ c: np.ndarray = None,
90
+ ):
91
+ # sub_signal, shape: [num_samples,]
92
+ sub_signal = torch.tensor(sub_signal, dtype=torch.float32)
93
+
94
+ sub_signal = sub_signal.unsqueeze(0)
95
+ # sub_signal, shape: [1, num_samples]
96
+ mel_spec = self.mel_transform.forward(sub_signal)
97
+ # mel_spec, shape: [1, n_mels, n_frames]
98
+ mel_spec = torch.transpose(mel_spec, dim0=1, dim1=2)
99
+ # mel_spec, shape: [1, n_frames, n_mels]
100
+
101
+ h = torch.tensor(h) if h is not None else None
102
+ c = torch.tensor(c) if h is not None else None
103
+ label, prob, h, c = self.predict_by_mel_spec(mel_spec, h=h, c=c)
104
+ # h, c: torch.Tensor
105
+ h = h.numpy()
106
+ c = c.numpy()
107
+ return label, prob, h, c
108
+
109
+ def predict_by_mel_spec(self,
110
+ mel_spec: torch.Tensor,
111
+ h: torch.Tensor = None,
112
+ c: torch.Tensor = None,
113
+ ):
114
+ # mel_spec, shape: [1, n_frames, n_mels]
115
+
116
+ if h is None:
117
+ h = np.zeros((3, 1, 64), dtype=np.float32) # 3层LSTM,批次大小1,隐藏大小64
118
+ else:
119
+ h = h.numpy()
120
+ if c is None:
121
+ c = np.zeros((3, 1, 64), dtype=np.float32) # 3层LSTM,批次大小1,隐藏大小64
122
+ else:
123
+ c = c.numpy()
124
+
125
+ mel_spec_np = mel_spec.numpy()
126
+ outputs = self.session.run(
127
+ input_feed={
128
+ "input": mel_spec_np,
129
+ "h": h,
130
+ "c": c
131
+ },
132
+ output_names=[
133
+ "output", "h_out", "c_out"
134
+ ],
135
+ )
136
+ logits, h, c = outputs
137
+ # logits, np.ndarray, shape: [b, num_labels]
138
+ # h, c: np.ndarray
139
+ h = torch.tensor(h)
140
+ c = torch.tensor(c)
141
+
142
+ probs = torch.softmax(torch.tensor(logits), dim=1)
143
+ max_prob, predicted_label_index = torch.max(probs, dim=1)
144
+
145
+ label = self.get_label_by_index(predicted_label_index.item())
146
+ prob = max_prob.item()
147
+ return label, prob, h, c
148
+
149
+ @staticmethod
150
+ def get_label_by_index(index: int):
151
+ label_map = {
152
+ 0: "voice",
153
+ 1: "voicemail",
154
+ 2: "mute",
155
+ 3: "noise"
156
+ }
157
+ result = label_map[index]
158
+ return result
159
+
160
+
161
+ def main():
162
+ args = get_args()
163
+
164
+ client = Client("http://127.0.0.1:7864/")
165
+ # client = Client("http://10.75.27.247:7864/")
166
+
167
+ audio_dir = Path(args.audio_dir)
168
+ output_dir = Path(args.output_dir)
169
+ output_dir.mkdir(parents=True, exist_ok=True)
170
+
171
+ model = OnlineModelInference(model_path=args.onnx_model_file)
172
+
173
+ for filename in tqdm(audio_dir.glob("**/active_media_r_*.wav")):
174
+ splits = filename.stem.split("_")
175
+ call_id = splits[3]
176
+ language = splits[4]
177
+ scene_id = splits[5]
178
+
179
+ signal, sample_rate = librosa.load(filename.as_posix(), sr=8000)
180
+ duration = librosa.get_duration(y=signal, sr=sample_rate)
181
+ signal_length = len(signal)
182
+ if signal_length == 0:
183
+ continue
184
+
185
+ begin = 0
186
+ end = begin + sample_rate * 2
187
+ sub_signal = signal[begin: end]
188
+ if sub_signal.shape[0] != 16000:
189
+ continue
190
+
191
+ h = None
192
+ c = None
193
+ label1, prob1, h, c = model.predict_by_ndarray(sub_signal, h=h, c=c)
194
+
195
+ sub_signal_ = np.array(sub_signal * (1 << 15), dtype=np.int16)
196
+ temp_file = "temp.wav"
197
+
198
+ wavfile.write(
199
+ temp_file,
200
+ 8000,
201
+ sub_signal_,
202
+ )
203
+
204
+ # label2, prob2 = client.predict(
205
+ # audio_t=handle_file(temp_file),
206
+ # model_name="voicemail-pt-br-2-ch4",
207
+ # ground_true="Hello!!",
208
+ # api_name="/when_click_cls_button"
209
+ # )
210
+ label2, prob2 = client.predict(
211
+ audio_t=handle_file(temp_file),
212
+ model_name="sound-8-ch4",
213
+ ground_true="Hello!!",
214
+ api_name="/when_click_cls_button"
215
+ )
216
+
217
+ print(label1)
218
+ print(label2)
219
+ label2 = "voicemail"
220
+ label1 = "non_voicemail"
221
+ if label2 in ("voicemail", "bell") and label1 != "voicemail":
222
+ tgt_file = output_dir / f"active_media_r_{call_id}_{language}_{scene_id}_0.wav"
223
+ if not tgt_file.exists():
224
+ shutil.move(
225
+ temp_file,
226
+ tgt_file.as_posix(),
227
+ )
228
+
229
+ return
230
+
231
+
232
+ if __name__ == "__main__":
233
+ main()
examples/online_model_test/step_1_predict.py CHANGED
@@ -19,13 +19,13 @@ def get_args():
19
  parser = argparse.ArgumentParser()
20
  parser.add_argument(
21
  "--audio_dir",
22
- default=r"D:\Users\tianx\HuggingDatasets\international_voice\data\sea-idn\audio_lib_hkg_1\audio_lib_hkg_1\zh-TW",
23
  type=str,
24
  )
25
- parser.add_argument("--onnx_model_file", default="zh-TW.onnx", type=str)
26
  parser.add_argument("--target_duration", default=8.0, type=float)
27
 
28
- parser.add_argument("--output_file", default="zh_tw_predict.xlsx", type=str)
29
 
30
  args = parser.parse_args()
31
  return args
@@ -177,13 +177,15 @@ def main():
177
  for begin in range(0, target_duration, sample_rate*2):
178
  end = begin + sample_rate*2
179
  sub_signal = signal[begin: end]
180
- if len(sub_signal) == 0:
181
  break
182
  label, prob, h, c = model.predict_by_ndarray(sub_signal, h=h, c=c)
183
  predict_result.append({
184
  "label": label,
185
  "prob": prob,
186
  })
 
 
187
  label_list = [p["label"] for p in predict_result]
188
  predict_result_ = json.dumps(predict_result, ensure_ascii=False, indent=4)
189
  label2 = predict_result[0]["label"]
 
19
  parser = argparse.ArgumentParser()
20
  parser.add_argument(
21
  "--audio_dir",
22
+ default=r"D:\Users\tianx\HuggingDatasets\international_voice\data\sea-idn\audio_lib_hkg_1\audio_lib_hkg_1\th-TH\th-TH\early_media_no_voice",
23
  type=str,
24
  )
25
+ parser.add_argument("--onnx_model_file", default="models/th-TH.onnx", type=str)
26
  parser.add_argument("--target_duration", default=8.0, type=float)
27
 
28
+ parser.add_argument("--output_file", default="th-TH_predict.xlsx", type=str)
29
 
30
  args = parser.parse_args()
31
  return args
 
177
  for begin in range(0, target_duration, sample_rate*2):
178
  end = begin + sample_rate*2
179
  sub_signal = signal[begin: end]
180
+ if len(sub_signal) < 0.5 * sample_rate:
181
  break
182
  label, prob, h, c = model.predict_by_ndarray(sub_signal, h=h, c=c)
183
  predict_result.append({
184
  "label": label,
185
  "prob": prob,
186
  })
187
+ if len(predict_result) == 0:
188
+ continue
189
  label_list = [p["label"] for p in predict_result]
190
  predict_result_ = json.dumps(predict_result, ensure_ascii=False, indent=4)
191
  label2 = predict_result[0]["label"]
examples/online_model_test/step_2_audio_filter.py CHANGED
@@ -10,10 +10,10 @@ import pandas as pd
10
  def get_args():
11
  parser = argparse.ArgumentParser()
12
 
13
- parser.add_argument("--predict_file", default="zh_tw_predict.xlsx", type=str)
14
  parser.add_argument(
15
  "--output_dir",
16
- default=r"D:\Users\tianx\HuggingDatasets\international_voice\data\calling\886",
17
  type=str,
18
  )
19
  args = parser.parse_args()
@@ -24,12 +24,16 @@ def main():
24
  args = get_args()
25
 
26
  output_dir = Path(args.output_dir)
 
27
 
28
  df = pd.read_excel(args.predict_file)
29
  for i, row in df.iterrows():
30
  filename = row["filename"]
31
  ground_truth_ = row["ground_truth_"]
 
32
 
 
 
33
  if ground_truth_ == "voicemail":
34
  shutil.copy(
35
  filename,
 
10
  def get_args():
11
  parser = argparse.ArgumentParser()
12
 
13
+ parser.add_argument("--predict_file", default="th-TH_predict.xlsx", type=str)
14
  parser.add_argument(
15
  "--output_dir",
16
+ default=r"D:\Users\tianx\HuggingDatasets\international_voice\data\sea-idn\audio_lib_hkg_1\audio_lib_hkg_1\th-TH\th-TH\early_media_no_voice\bad_case",
17
  type=str,
18
  )
19
  args = parser.parse_args()
 
24
  args = get_args()
25
 
26
  output_dir = Path(args.output_dir)
27
+ output_dir.mkdir(parents=True, exist_ok=True)
28
 
29
  df = pd.read_excel(args.predict_file)
30
  for i, row in df.iterrows():
31
  filename = row["filename"]
32
  ground_truth_ = row["ground_truth_"]
33
+ flag = row["flag"]
34
 
35
+ if flag == 1:
36
+ continue
37
  if ground_truth_ == "voicemail":
38
  shutil.copy(
39
  filename,
examples/online_model_test/step_3_make_test.py CHANGED
@@ -15,12 +15,12 @@ def get_args():
15
 
16
  parser.add_argument(
17
  "--src_dir",
18
- default=r"D:\Users\tianx\HuggingDatasets\international_voice\data\calling\65\voicemail",
19
  type=str,
20
  )
21
  parser.add_argument(
22
  "--tgt_dir",
23
- default=r"D:\Users\tianx\HuggingDatasets\international_voice\data\voice_test_examples\65\95",
24
  type=str,
25
  )
26
  parser.add_argument(
 
15
 
16
  parser.add_argument(
17
  "--src_dir",
18
+ default=r"D:\Users\tianx\HuggingDatasets\international_voice\data\calling\63\voicemail",
19
  type=str,
20
  )
21
  parser.add_argument(
22
  "--tgt_dir",
23
+ default=r"D:\Users\tianx\HuggingDatasets\international_voice\data\voice_test_examples\63\96",
24
  type=str,
25
  )
26
  parser.add_argument(
examples/online_model_test/test.py ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/python3
2
+ # -*- coding: utf-8 -*-
3
+ import argparse
4
+ from collections import defaultdict
5
+ from pathlib import Path
6
+ import shutil
7
+
8
+ from gradio_client import Client, handle_file
9
+ import librosa
10
+ import pandas as pd
11
+ from tqdm import tqdm
12
+
13
+
14
+ def get_args():
15
+ parser = argparse.ArgumentParser()
16
+ parser.add_argument(
17
+ "--finished_dir",
18
+ default=r"D:\Users\tianx\HuggingSpaces\cc_audio_8\data\calling\66\wav_1ch",
19
+ type=str,
20
+ )
21
+ parser.add_argument(
22
+ "--src_dir",
23
+ default=r"D:/Users/tianx/HuggingDatasets/international_voice/data/sea-idn/audio_lib_hkg_1/audio_lib_hkg_1/th-TH/th-TH/",
24
+ type=str,
25
+ )
26
+ parser.add_argument(
27
+ "--tgt_dir",
28
+ default=r"D:\Users\tianx\HuggingDatasets\international_voice\data\sea-idn\audio_lib_hkg_1\audio_lib_hkg_1\th-TH\bad_case",
29
+ type=str,
30
+ )
31
+ args = parser.parse_args()
32
+ return args
33
+
34
+
35
+ def main():
36
+ args = get_args()
37
+
38
+ finished_dir = Path(args.finished_dir)
39
+ src_dir = Path(args.src_dir)
40
+ tgt_dir = Path(args.tgt_dir)
41
+ tgt_dir.mkdir(parents=True, exist_ok=True)
42
+
43
+ # finished
44
+ finished = set()
45
+ for filename in finished_dir.glob("*.wav"):
46
+ splits = filename.stem.split("_")
47
+ call_id = splits[3]
48
+ if call_id in ("27521940-feef-4bfa-ba55-b1f00a10c64d",):
49
+ print(f"call_id: {call_id}")
50
+
51
+ finished.add(call_id)
52
+ print(f"finished count: {len(finished)}")
53
+
54
+ # call_id_to_wav_file_list
55
+ call_id_to_wav_file_list = defaultdict(list)
56
+ for filename in src_dir.glob("**/*.wav"):
57
+ splits = filename.stem.split("_")
58
+ call_id = splits[3]
59
+ language = splits[4]
60
+ scene_id = splits[5]
61
+ if call_id in ("27521940-feef-4bfa-ba55-b1f00a10c64d",):
62
+ print(f"call_id: {call_id}")
63
+
64
+ call_id_to_wav_file_list[call_id].append(filename.as_posix())
65
+ print(f"src count: {len(call_id_to_wav_file_list)}")
66
+
67
+ for filename in tqdm(src_dir.glob("**/active_media_r_*.wav")):
68
+ splits = filename.stem.split("_")
69
+ call_id = splits[3]
70
+ if call_id in ("27521940-feef-4bfa-ba55-b1f00a10c64d",):
71
+ print(f"call_id: {call_id}")
72
+
73
+ if call_id in finished:
74
+ wav_file_list = call_id_to_wav_file_list[call_id]
75
+ for wav_file in wav_file_list:
76
+ shutil.move(
77
+ wav_file,
78
+ tgt_dir.as_posix(),
79
+ )
80
+ return
81
+
82
+
83
+ if __name__ == "__main__":
84
+ main()
examples/{vm_sound_classification → sound_classification_by_cnn}/requirements.txt RENAMED
File without changes
examples/{vm_sound_classification → sound_classification_by_cnn}/run.sh RENAMED
@@ -2,22 +2,22 @@
2
 
3
  : <<'END'
4
 
5
- sh run.sh --stage 0 --stop_stage 1 --system_version windows --file_folder_name file_dir --final_model_name sound-4-ch32 \
6
  --filename_patterns "E:/Users/tianx/HuggingDatasets/cc_audio_8/data/wav_finished/wav_finished/en-US/wav_finished/*/*.wav \
7
  E:/Users/tianx/HuggingDatasets/cc_audio_8/data/wav_finished/id-ID/wav_finished/*/*.wav" \
8
  --label_plan 4
9
 
10
- sh run.sh --stage 2 --stop_stage 2 --system_version windows --file_folder_name file_dir --final_model_name sound-2-ch32 \
11
  --filename_patterns "E:/Users/tianx/HuggingDatasets/cc_audio_8/data/wav_finished/wav_finished/en-US/wav_finished/*/*.wav \
12
  E:/Users/tianx/HuggingDatasets/cc_audio_8/data/wav_finished/id-ID/wav_finished/*/*.wav" \
13
  --label_plan 4
14
 
15
- sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name sound-3-ch32 \
16
  --filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav" \
17
  --label_plan 3 \
18
  --config_file "yaml/conv2d-classifier-3-ch4.yaml"
19
 
20
- sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-ms-my-2-ch32 \
21
  --filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/ms-MY/wav_finished/*/*.wav" \
22
  --label_plan 2-voicemail \
23
  --config_file "yaml/conv2d-classifier-2-ch32.yaml"
 
2
 
3
  : <<'END'
4
 
5
+ sh run.sh --stage 0 --stop_stage 1 --system_version windows --file_folder_name file_dir --final_model_name sound-4-ch32-cnn \
6
  --filename_patterns "E:/Users/tianx/HuggingDatasets/cc_audio_8/data/wav_finished/wav_finished/en-US/wav_finished/*/*.wav \
7
  E:/Users/tianx/HuggingDatasets/cc_audio_8/data/wav_finished/id-ID/wav_finished/*/*.wav" \
8
  --label_plan 4
9
 
10
+ sh run.sh --stage 2 --stop_stage 2 --system_version windows --file_folder_name file_dir --final_model_name sound-2-ch32-cnn \
11
  --filename_patterns "E:/Users/tianx/HuggingDatasets/cc_audio_8/data/wav_finished/wav_finished/en-US/wav_finished/*/*.wav \
12
  E:/Users/tianx/HuggingDatasets/cc_audio_8/data/wav_finished/id-ID/wav_finished/*/*.wav" \
13
  --label_plan 4
14
 
15
+ sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name sound-3-ch32-cnn \
16
  --filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav" \
17
  --label_plan 3 \
18
  --config_file "yaml/conv2d-classifier-3-ch4.yaml"
19
 
20
+ sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-ms-my-2-ch32-cnn \
21
  --filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/ms-MY/wav_finished/*/*.wav" \
22
  --label_plan 2-voicemail \
23
  --config_file "yaml/conv2d-classifier-2-ch32.yaml"
examples/{vm_sound_classification → sound_classification_by_cnn}/run_batch.sh RENAMED
@@ -3,25 +3,25 @@
3
 
4
  # sound ch4
5
 
6
- #sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name sound-2-ch4 \
7
  #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav" \
8
  #--label_plan 2 \
9
  #--config_file "yaml/conv2d-classifier-2-ch4.yaml"
10
  #
11
  #
12
- #sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name sound-3-ch4 \
13
  #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav" \
14
  #--label_plan 3 \
15
  #--config_file "yaml/conv2d-classifier-3-ch4.yaml"
16
  #
17
  #
18
- #sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name sound-4-ch4 \
19
  #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav" \
20
  #--label_plan 4 \
21
  #--config_file "yaml/conv2d-classifier-4-ch4.yaml"
22
  #
23
  #
24
- #sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name sound-8-ch4 \
25
  #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav" \
26
  #--label_plan 8 \
27
  #--config_file "yaml/conv2d-classifier-8-ch4.yaml"
@@ -29,25 +29,25 @@
29
 
30
  # sound ch8
31
 
32
- #sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name sound-2-ch8 \
33
  #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav" \
34
  #--label_plan 2 \
35
  #--config_file "yaml/conv2d-classifier-2-ch8.yaml"
36
  #
37
  #
38
- #sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name sound-3-ch8 \
39
  #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav" \
40
  #--label_plan 3 \
41
  #--config_file "yaml/conv2d-classifier-3-ch8.yaml"
42
  #
43
  #
44
- #sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name sound-4-ch8 \
45
  #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav" \
46
  #--label_plan 4 \
47
  #--config_file "yaml/conv2d-classifier-4-ch8.yaml"
48
  #
49
  #
50
- #sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name sound-8-ch8 \
51
  #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav" \
52
  #--label_plan 8 \
53
  #--config_file "yaml/conv2d-classifier-8-ch8.yaml"
@@ -55,25 +55,25 @@
55
 
56
  # sound ch16
57
 
58
- #sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name sound-2-ch16 \
59
  #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav" \
60
  #--label_plan 2 \
61
  #--config_file "yaml/conv2d-classifier-2-ch16.yaml"
62
 
63
 
64
- #sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name sound-3-ch16 \
65
  #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav" \
66
  #--label_plan 3 \
67
  #--config_file "yaml/conv2d-classifier-3-ch16.yaml"
68
  #
69
  #
70
- #sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name sound-4-ch16 \
71
  #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav" \
72
  #--label_plan 4 \
73
  #--config_file "yaml/conv2d-classifier-4-ch16.yaml"
74
  #
75
  #
76
- #sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name sound-8-ch16 \
77
  #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav" \
78
  #--label_plan 8 \
79
  #--config_file "yaml/conv2d-classifier-8-ch16.yaml"
@@ -81,25 +81,25 @@
81
 
82
  # sound ch32
83
 
84
- #sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name sound-2-ch32 \
85
  #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav" \
86
  #--label_plan 2 \
87
  #--config_file "yaml/conv2d-classifier-2-ch32.yaml"
88
  #
89
  #
90
- #sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name sound-3-ch32 \
91
  #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav" \
92
  #--label_plan 3 \
93
  #--config_file "yaml/conv2d-classifier-3-ch32.yaml"
94
  #
95
  #
96
- #sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name sound-4-ch32 \
97
  #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav" \
98
  #--label_plan 4 \
99
  #--config_file "yaml/conv2d-classifier-4-ch32.yaml"
100
 
101
 
102
- #sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name sound-8-ch32 \
103
  #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav" \
104
  #--label_plan 8 \
105
  #--config_file "yaml/conv2d-classifier-8-ch32.yaml"
@@ -107,12 +107,12 @@
107
 
108
  # pretrained voicemail
109
 
110
- sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-2-ch4 \
111
  --filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav" \
112
  --label_plan 2-voicemail \
113
  --config_file "yaml/conv2d-classifier-2-ch4.yaml"
114
 
115
- sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-2-ch32 \
116
  --filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav" \
117
  --label_plan 2-voicemail \
118
  --config_file "yaml/conv2d-classifier-2-ch32.yaml"
@@ -120,149 +120,149 @@ sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name fi
120
 
121
  # voicemail ch4
122
 
123
- #sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-en-ph-2-ch4 \
124
  #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/en-PH/wav_finished/*/*.wav" \
125
  #--label_plan 2-voicemail \
126
  #--config_file "yaml/conv2d-classifier-2-ch4.yaml" \
127
- #--pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-ch4.zip"
128
 
129
- #sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-en-sg-2-ch4 \
130
  #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/en-SG/wav_finished/*/*.wav" \
131
  #--label_plan 2-voicemail \
132
  #--config_file "yaml/conv2d-classifier-2-ch4.yaml" \
133
- #--pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-ch4.zip"
134
  #
135
- #sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-en-us-2-ch4 \
136
  #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/en-US/wav_finished/*/*.wav" \
137
  #--label_plan 2-voicemail \
138
  #--config_file "yaml/conv2d-classifier-2-ch4.yaml" \
139
- #--pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-ch4.zip"
140
  #
141
- #sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-es-mx-2-ch4 \
142
  #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/es-MX/wav_finished/*/*.wav" \
143
  #--label_plan 2-voicemail \
144
  #--config_file "yaml/conv2d-classifier-2-ch4.yaml" \
145
- #--pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-ch4.zip"
146
  #
147
- #sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-es-pe-2-ch4 \
148
  #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/es-PE/wav_finished/*/*.wav" \
149
  #--label_plan 2-voicemail \
150
  #--config_file "yaml/conv2d-classifier-2-ch4.yaml" \
151
- #--pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-ch4.zip"
152
  #
153
- sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-id-id-2-ch4 \
154
  --filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/id-ID/wav_finished/*/*.wav" \
155
  --label_plan 2-voicemail \
156
  --config_file "yaml/conv2d-classifier-2-ch4.yaml" \
157
- --pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-ch4.zip"
158
 
159
- #sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-ja-jp-2-ch4 \
160
  #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/ja-JP/wav_finished/*/*.wav" \
161
  #--label_plan 2-voicemail \
162
  #--config_file "yaml/conv2d-classifier-2-ch4.yaml" \
163
- #--pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-ch4.zip"
164
  #
165
- #sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-ko-kr-2-ch4 \
166
  #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/ko-KR/wav_finished/*/*.wav" \
167
  #--label_plan 2-voicemail \
168
  #--config_file "yaml/conv2d-classifier-2-ch4.yaml" \
169
- #--pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-ch4.zip"
170
  #
171
- #sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-ms-my-2-ch4 \
172
  #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/ms-MY/wav_finished/*/*.wav" \
173
  #--label_plan 2-voicemail \
174
  #--config_file "yaml/conv2d-classifier-2-ch4.yaml" \
175
- #--pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-ch4.zip"
176
  #
177
- #sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-pt-br-2-ch4 \
178
  #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/pt-BR/wav_finished/*/*.wav" \
179
  #--label_plan 2-voicemail \
180
  #--config_file "yaml/conv2d-classifier-2-ch4.yaml" \
181
- #--pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-ch4.zip"
182
  #
183
- #sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-th-th-2-ch4 \
184
  #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/th-TH/wav_finished/*/*.wav" \
185
  #--label_plan 2-voicemail \
186
  #--config_file "yaml/conv2d-classifier-2-ch4.yaml" \
187
- #--pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-ch4.zip"
188
  #
189
- #sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-zh-tw-2-ch4 \
190
  #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/zh-TW/wav_finished/*/*.wav" \
191
  #--label_plan 2-voicemail \
192
  #--config_file "yaml/conv2d-classifier-2-ch4.yaml" \
193
- #--pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-ch4.zip"
194
 
195
 
196
  # voicemail ch32
197
 
198
- #sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-en-ph-2-ch32 \
199
  #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/en-PH/wav_finished/*/*.wav" \
200
  #--label_plan 2-voicemail \
201
  #--config_file "yaml/conv2d-classifier-2-ch32.yaml" \
202
- #--pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-ch32.zip"
203
 
204
- #sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-en-sg-2-ch32 \
205
  #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/en-SG/wav_finished/*/*.wav" \
206
  #--label_plan 2-voicemail \
207
  #--config_file "yaml/conv2d-classifier-2-ch32.yaml" \
208
- #--pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-ch32.zip"
209
  #
210
- #sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-en-us-2-ch32 \
211
  #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/en-US/wav_finished/*/*.wav" \
212
  #--label_plan 2-voicemail \
213
  #--config_file "yaml/conv2d-classifier-2-ch32.yaml" \
214
- #--pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-ch32.zip"
215
  #
216
- #sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-es-mx-2-ch32 \
217
  #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/es-MX/wav_finished/*/*.wav" \
218
  #--label_plan 2-voicemail \
219
  #--config_file "yaml/conv2d-classifier-2-ch32.yaml" \
220
- #--pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-ch32.zip"
221
  #
222
- #sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-es-pe-2-ch32 \
223
  #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/es-PE/wav_finished/*/*.wav" \
224
  #--label_plan 2-voicemail \
225
  #--config_file "yaml/conv2d-classifier-2-ch32.yaml" \
226
- #--pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-ch32.zip"
227
  #
228
- sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-id-id-2-ch32 \
229
  --filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/id-ID/wav_finished/*/*.wav" \
230
  --label_plan 2-voicemail \
231
  --config_file "yaml/conv2d-classifier-2-ch32.yaml" \
232
- --pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-ch32.zip"
233
 
234
- #sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-ja-jp-2-ch32 \
235
  #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/ja-JP/wav_finished/*/*.wav" \
236
  #--label_plan 2-voicemail \
237
  #--config_file "yaml/conv2d-classifier-2-ch32.yaml" \
238
- #--pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-ch32.zip"
239
  #
240
- #sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-ko-kr-2-ch32 \
241
  #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/ko-KR/wav_finished/*/*.wav" \
242
  #--label_plan 2-voicemail \
243
  #--config_file "yaml/conv2d-classifier-2-ch32.yaml" \
244
- #--pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-ch32.zip"
245
  #
246
- #sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-ms-my-2-ch32 \
247
  #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/ms-MY/wav_finished/*/*.wav" \
248
  #--label_plan 2-voicemail \
249
  #--config_file "yaml/conv2d-classifier-2-ch32.yaml" \
250
- #--pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-ch32.zip"
251
  #
252
- #sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-pt-br-2-ch32 \
253
  #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/pt-BR/wav_finished/*/*.wav" \
254
  #--label_plan 2-voicemail \
255
  #--config_file "yaml/conv2d-classifier-2-ch32.yaml" \
256
- #--pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-ch32.zip"
257
  #
258
- #sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-th-th-2-ch32 \
259
  #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/th-TH/wav_finished/*/*.wav" \
260
  #--label_plan 2-voicemail \
261
  #--config_file "yaml/conv2d-classifier-2-ch32.yaml" \
262
- #--pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-ch32.zip"
263
  #
264
- #sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-zh-tw-2-ch32 \
265
  #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/zh-TW/wav_finished/*/*.wav" \
266
  #--label_plan 2-voicemail \
267
  #--config_file "yaml/conv2d-classifier-2-ch32.yaml" \
268
- #--pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-ch32.zip"
 
3
 
4
  # sound ch4
5
 
6
+ #sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name sound-2-ch4-cnn \
7
  #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav" \
8
  #--label_plan 2 \
9
  #--config_file "yaml/conv2d-classifier-2-ch4.yaml"
10
  #
11
  #
12
+ #sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name sound-3-ch4-cnn \
13
  #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav" \
14
  #--label_plan 3 \
15
  #--config_file "yaml/conv2d-classifier-3-ch4.yaml"
16
  #
17
  #
18
+ #sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name sound-4-ch4-cnn \
19
  #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav" \
20
  #--label_plan 4 \
21
  #--config_file "yaml/conv2d-classifier-4-ch4.yaml"
22
  #
23
  #
24
+ #sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name sound-8-ch4-cnn \
25
  #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav" \
26
  #--label_plan 8 \
27
  #--config_file "yaml/conv2d-classifier-8-ch4.yaml"
 
29
 
30
  # sound ch8
31
 
32
+ #sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name sound-2-ch8-cnn \
33
  #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav" \
34
  #--label_plan 2 \
35
  #--config_file "yaml/conv2d-classifier-2-ch8.yaml"
36
  #
37
  #
38
+ #sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name sound-3-ch8-cnn \
39
  #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav" \
40
  #--label_plan 3 \
41
  #--config_file "yaml/conv2d-classifier-3-ch8.yaml"
42
  #
43
  #
44
+ #sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name sound-4-ch8-cnn \
45
  #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav" \
46
  #--label_plan 4 \
47
  #--config_file "yaml/conv2d-classifier-4-ch8.yaml"
48
  #
49
  #
50
+ #sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name sound-8-ch8-cnn \
51
  #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav" \
52
  #--label_plan 8 \
53
  #--config_file "yaml/conv2d-classifier-8-ch8.yaml"
 
55
 
56
  # sound ch16
57
 
58
+ #sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name sound-2-ch16-cnn \
59
  #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav" \
60
  #--label_plan 2 \
61
  #--config_file "yaml/conv2d-classifier-2-ch16.yaml"
62
 
63
 
64
+ #sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name sound-3-ch16-cnn \
65
  #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav" \
66
  #--label_plan 3 \
67
  #--config_file "yaml/conv2d-classifier-3-ch16.yaml"
68
  #
69
  #
70
+ #sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name sound-4-ch16-cnn \
71
  #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav" \
72
  #--label_plan 4 \
73
  #--config_file "yaml/conv2d-classifier-4-ch16.yaml"
74
  #
75
  #
76
+ #sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name sound-8-ch16-cnn \
77
  #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav" \
78
  #--label_plan 8 \
79
  #--config_file "yaml/conv2d-classifier-8-ch16.yaml"
 
81
 
82
  # sound ch32
83
 
84
+ #sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name sound-2-ch32-cnn \
85
  #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav" \
86
  #--label_plan 2 \
87
  #--config_file "yaml/conv2d-classifier-2-ch32.yaml"
88
  #
89
  #
90
+ #sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name sound-3-ch32-cnn \
91
  #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav" \
92
  #--label_plan 3 \
93
  #--config_file "yaml/conv2d-classifier-3-ch32.yaml"
94
  #
95
  #
96
+ #sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name sound-4-ch32-cnn \
97
  #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav" \
98
  #--label_plan 4 \
99
  #--config_file "yaml/conv2d-classifier-4-ch32.yaml"
100
 
101
 
102
+ #sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name sound-8-ch32-cnn \
103
  #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav" \
104
  #--label_plan 8 \
105
  #--config_file "yaml/conv2d-classifier-8-ch32.yaml"
 
107
 
108
  # pretrained voicemail
109
 
110
+ sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-2-ch4-cnn \
111
  --filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav" \
112
  --label_plan 2-voicemail \
113
  --config_file "yaml/conv2d-classifier-2-ch4.yaml"
114
 
115
+ sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-2-ch32-cnn \
116
  --filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav" \
117
  --label_plan 2-voicemail \
118
  --config_file "yaml/conv2d-classifier-2-ch32.yaml"
 
120
 
121
  # voicemail ch4
122
 
123
+ #sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-en-ph-2-ch4-cnn \
124
  #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/en-PH/wav_finished/*/*.wav" \
125
  #--label_plan 2-voicemail \
126
  #--config_file "yaml/conv2d-classifier-2-ch4.yaml" \
127
+ #--pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-ch4-cnn.zip"
128
 
129
+ #sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-en-sg-2-ch4-cnn \
130
  #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/en-SG/wav_finished/*/*.wav" \
131
  #--label_plan 2-voicemail \
132
  #--config_file "yaml/conv2d-classifier-2-ch4.yaml" \
133
+ #--pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-ch4-cnn.zip"
134
  #
135
+ #sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-en-us-2-ch4-cnn \
136
  #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/en-US/wav_finished/*/*.wav" \
137
  #--label_plan 2-voicemail \
138
  #--config_file "yaml/conv2d-classifier-2-ch4.yaml" \
139
+ #--pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-ch4-cnn.zip"
140
  #
141
+ #sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-es-mx-2-ch4-cnn \
142
  #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/es-MX/wav_finished/*/*.wav" \
143
  #--label_plan 2-voicemail \
144
  #--config_file "yaml/conv2d-classifier-2-ch4.yaml" \
145
+ #--pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-ch4-cnn.zip"
146
  #
147
+ #sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-es-pe-2-ch4-cnn \
148
  #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/es-PE/wav_finished/*/*.wav" \
149
  #--label_plan 2-voicemail \
150
  #--config_file "yaml/conv2d-classifier-2-ch4.yaml" \
151
+ #--pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-ch4-cnn.zip"
152
  #
153
+ sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-id-id-2-ch4-cnn \
154
  --filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/id-ID/wav_finished/*/*.wav" \
155
  --label_plan 2-voicemail \
156
  --config_file "yaml/conv2d-classifier-2-ch4.yaml" \
157
+ --pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-ch4-cnn.zip"
158
 
159
+ #sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-ja-jp-2-ch4-cnn \
160
  #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/ja-JP/wav_finished/*/*.wav" \
161
  #--label_plan 2-voicemail \
162
  #--config_file "yaml/conv2d-classifier-2-ch4.yaml" \
163
+ #--pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-ch4-cnn.zip"
164
  #
165
+ #sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-ko-kr-2-ch4-cnn \
166
  #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/ko-KR/wav_finished/*/*.wav" \
167
  #--label_plan 2-voicemail \
168
  #--config_file "yaml/conv2d-classifier-2-ch4.yaml" \
169
+ #--pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-ch4-cnn.zip"
170
  #
171
+ #sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-ms-my-2-ch4-cnn \
172
  #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/ms-MY/wav_finished/*/*.wav" \
173
  #--label_plan 2-voicemail \
174
  #--config_file "yaml/conv2d-classifier-2-ch4.yaml" \
175
+ #--pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-ch4-cnn.zip"
176
  #
177
+ #sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-pt-br-2-ch4-cnn \
178
  #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/pt-BR/wav_finished/*/*.wav" \
179
  #--label_plan 2-voicemail \
180
  #--config_file "yaml/conv2d-classifier-2-ch4.yaml" \
181
+ #--pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-ch4-cnn.zip"
182
  #
183
+ #sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-th-th-2-ch4-cnn \
184
  #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/th-TH/wav_finished/*/*.wav" \
185
  #--label_plan 2-voicemail \
186
  #--config_file "yaml/conv2d-classifier-2-ch4.yaml" \
187
+ #--pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-ch4-cnn.zip"
188
  #
189
+ #sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-zh-tw-2-ch4-cnn \
190
  #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/zh-TW/wav_finished/*/*.wav" \
191
  #--label_plan 2-voicemail \
192
  #--config_file "yaml/conv2d-classifier-2-ch4.yaml" \
193
+ #--pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-ch4-cnn.zip"
194
 
195
 
196
  # voicemail ch32
197
 
198
+ #sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-en-ph-2-ch32-cnn \
199
  #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/en-PH/wav_finished/*/*.wav" \
200
  #--label_plan 2-voicemail \
201
  #--config_file "yaml/conv2d-classifier-2-ch32.yaml" \
202
+ #--pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-ch32-cnn.zip"
203
 
204
+ #sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-en-sg-2-ch32-cnn \
205
  #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/en-SG/wav_finished/*/*.wav" \
206
  #--label_plan 2-voicemail \
207
  #--config_file "yaml/conv2d-classifier-2-ch32.yaml" \
208
+ #--pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-ch32-cnn.zip"
209
  #
210
+ #sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-en-us-2-ch32-cnn \
211
  #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/en-US/wav_finished/*/*.wav" \
212
  #--label_plan 2-voicemail \
213
  #--config_file "yaml/conv2d-classifier-2-ch32.yaml" \
214
+ #--pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-ch32-cnn.zip"
215
  #
216
+ #sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-es-mx-2-ch32-cnn \
217
  #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/es-MX/wav_finished/*/*.wav" \
218
  #--label_plan 2-voicemail \
219
  #--config_file "yaml/conv2d-classifier-2-ch32.yaml" \
220
+ #--pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-ch32-cnn.zip"
221
  #
222
+ #sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-es-pe-2-ch32-cnn \
223
  #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/es-PE/wav_finished/*/*.wav" \
224
  #--label_plan 2-voicemail \
225
  #--config_file "yaml/conv2d-classifier-2-ch32.yaml" \
226
+ #--pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-ch32-cnn.zip"
227
  #
228
+ sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-id-id-2-ch32-cnn \
229
  --filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/id-ID/wav_finished/*/*.wav" \
230
  --label_plan 2-voicemail \
231
  --config_file "yaml/conv2d-classifier-2-ch32.yaml" \
232
+ --pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-ch32-cnn.zip"
233
 
234
+ #sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-ja-jp-2-ch32-cnn \
235
  #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/ja-JP/wav_finished/*/*.wav" \
236
  #--label_plan 2-voicemail \
237
  #--config_file "yaml/conv2d-classifier-2-ch32.yaml" \
238
+ #--pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-ch32-cnn.zip"
239
  #
240
+ #sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-ko-kr-2-ch32-cnn \
241
  #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/ko-KR/wav_finished/*/*.wav" \
242
  #--label_plan 2-voicemail \
243
  #--config_file "yaml/conv2d-classifier-2-ch32.yaml" \
244
+ #--pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-ch32-cnn.zip"
245
  #
246
+ #sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-ms-my-2-ch32-cnn \
247
  #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/ms-MY/wav_finished/*/*.wav" \
248
  #--label_plan 2-voicemail \
249
  #--config_file "yaml/conv2d-classifier-2-ch32.yaml" \
250
+ #--pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-ch32-cnn.zip"
251
  #
252
+ #sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-pt-br-2-ch32-cnn \
253
  #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/pt-BR/wav_finished/*/*.wav" \
254
  #--label_plan 2-voicemail \
255
  #--config_file "yaml/conv2d-classifier-2-ch32.yaml" \
256
+ #--pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-ch32-cnn.zip"
257
  #
258
+ #sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-th-th-2-ch32-cnn \
259
  #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/th-TH/wav_finished/*/*.wav" \
260
  #--label_plan 2-voicemail \
261
  #--config_file "yaml/conv2d-classifier-2-ch32.yaml" \
262
+ #--pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-ch32-cnn.zip"
263
  #
264
+ #sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-zh-tw-2-ch32-cnn \
265
  #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/zh-TW/wav_finished/*/*.wav" \
266
  #--label_plan 2-voicemail \
267
  #--config_file "yaml/conv2d-classifier-2-ch32.yaml" \
268
+ #--pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-ch32-cnn.zip"
examples/{vm_sound_classification → sound_classification_by_cnn}/step_1_prepare_data.py RENAMED
File without changes
examples/{vm_sound_classification → sound_classification_by_cnn}/step_2_make_vocabulary.py RENAMED
File without changes
examples/{vm_sound_classification → sound_classification_by_cnn}/step_3_train_model.py RENAMED
@@ -50,7 +50,7 @@ def get_args():
50
  parser.add_argument("--config_file", default="conv2d_classifier.yaml", type=str)
51
  parser.add_argument(
52
  "--pretrained_model",
53
- # default=(project_path / "trained_models/voicemail-en-sg-2-ch4.zip").as_posix(),
54
  default="null",
55
  type=str
56
  )
 
50
  parser.add_argument("--config_file", default="conv2d_classifier.yaml", type=str)
51
  parser.add_argument(
52
  "--pretrained_model",
53
+ # default=(project_path / "trained_models/voicemail-en-sg-2-ch4-cnn.zip").as_posix(),
54
  default="null",
55
  type=str
56
  )
examples/{vm_sound_classification → sound_classification_by_cnn}/step_4_evaluation_model.py RENAMED
File without changes
examples/{vm_sound_classification → sound_classification_by_cnn}/step_5_export_models.py RENAMED
File without changes
examples/{vm_sound_classification → sound_classification_by_cnn}/step_6_infer.py RENAMED
File without changes
examples/{vm_sound_classification → sound_classification_by_cnn}/step_7_test_model.py RENAMED
File without changes
examples/{vm_sound_classification → sound_classification_by_cnn}/stop.sh RENAMED
File without changes
examples/{vm_sound_classification → sound_classification_by_cnn}/yaml/conv2d-classifier-2-ch16.yaml RENAMED
File without changes
examples/{vm_sound_classification → sound_classification_by_cnn}/yaml/conv2d-classifier-2-ch32.yaml RENAMED
File without changes
examples/{vm_sound_classification → sound_classification_by_cnn}/yaml/conv2d-classifier-2-ch4.yaml RENAMED
File without changes
examples/{vm_sound_classification → sound_classification_by_cnn}/yaml/conv2d-classifier-2-ch8.yaml RENAMED
File without changes
examples/{vm_sound_classification → sound_classification_by_cnn}/yaml/conv2d-classifier-3-ch16.yaml RENAMED
File without changes
examples/{vm_sound_classification → sound_classification_by_cnn}/yaml/conv2d-classifier-3-ch32.yaml RENAMED
File without changes
examples/{vm_sound_classification → sound_classification_by_cnn}/yaml/conv2d-classifier-3-ch4.yaml RENAMED
File without changes
examples/{vm_sound_classification → sound_classification_by_cnn}/yaml/conv2d-classifier-3-ch8.yaml RENAMED
File without changes
examples/{vm_sound_classification → sound_classification_by_cnn}/yaml/conv2d-classifier-4-ch16.yaml RENAMED
File without changes
examples/{vm_sound_classification → sound_classification_by_cnn}/yaml/conv2d-classifier-4-ch32.yaml RENAMED
File without changes
examples/{vm_sound_classification → sound_classification_by_cnn}/yaml/conv2d-classifier-4-ch4.yaml RENAMED
File without changes
examples/{vm_sound_classification → sound_classification_by_cnn}/yaml/conv2d-classifier-4-ch8.yaml RENAMED
File without changes
examples/{vm_sound_classification → sound_classification_by_cnn}/yaml/conv2d-classifier-8-ch16.yaml RENAMED
File without changes
examples/{vm_sound_classification → sound_classification_by_cnn}/yaml/conv2d-classifier-8-ch32.yaml RENAMED
File without changes
examples/{vm_sound_classification → sound_classification_by_cnn}/yaml/conv2d-classifier-8-ch4.yaml RENAMED
File without changes
examples/{vm_sound_classification → sound_classification_by_cnn}/yaml/conv2d-classifier-8-ch8.yaml RENAMED
File without changes
examples/{vm_sound_classification8 → sound_classification_by_cnn_union}/requirements.txt RENAMED
File without changes
examples/{vm_sound_classification8 → sound_classification_by_cnn_union}/run.sh RENAMED
File without changes
examples/{vm_sound_classification8 → sound_classification_by_cnn_union}/step_1_prepare_data.py RENAMED
File without changes
examples/{vm_sound_classification8 → sound_classification_by_cnn_union}/step_2_make_vocabulary.py RENAMED
File without changes
examples/{vm_sound_classification8 → sound_classification_by_cnn_union}/step_3_train_global_model.py RENAMED
File without changes
examples/{vm_sound_classification8 → sound_classification_by_cnn_union}/step_4_train_country_model.py RENAMED
File without changes
examples/{vm_sound_classification8 → sound_classification_by_cnn_union}/step_5_train_union.py RENAMED
File without changes
examples/{vm_sound_classification8 → sound_classification_by_cnn_union}/stop.sh RENAMED
File without changes
examples/sound_classification_by_lstm/run.sh ADDED
@@ -0,0 +1,197 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env bash
2
+
3
+ : <<'END'
4
+
5
+ sh run.sh --stage 0 --stop_stage 1 --system_version windows --file_folder_name file_dir --final_model_name sound-4-ch32-lstm \
6
+ --filename_patterns "E:/Users/tianx/HuggingDatasets/cc_audio_8/data/wav_finished/wav_finished/en-US/wav_finished/*/*.wav \
7
+ E:/Users/tianx/HuggingDatasets/cc_audio_8/data/wav_finished/id-ID/wav_finished/*/*.wav" \
8
+ --label_plan 4
9
+
10
+ sh run.sh --stage 2 --stop_stage 2 --system_version windows --file_folder_name file_dir --final_model_name sound-2-ch32-lstm \
11
+ --filename_patterns "E:/Users/tianx/HuggingDatasets/cc_audio_8/data/wav_finished/wav_finished/en-US/wav_finished/*/*.wav \
12
+ E:/Users/tianx/HuggingDatasets/cc_audio_8/data/wav_finished/id-ID/wav_finished/*/*.wav" \
13
+ --label_plan 4
14
+
15
+ sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name sound-3-ch32-lstm \
16
+ --filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav" \
17
+ --label_plan 3 \
18
+ --config_file "yaml/lstm_classifier-3-ch64.yaml"
19
+
20
+ sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-ms-my-2-ch32-lstm \
21
+ --filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/ms-MY/wav_finished/*/*.wav" \
22
+ --label_plan 2-voicemail \
23
+ --config_file "yaml/lstm_classifier-2-ch64.yaml"
24
+
25
+ END
26
+
27
+
28
+ # params
29
+ system_version="windows";
30
+ verbose=true;
31
+ stage=0 # start from 0 if you need to start from data preparation
32
+ stop_stage=9
33
+
34
+ work_dir="$(pwd)"
35
+ file_folder_name=file_folder_name
36
+ final_model_name=final_model_name
37
+ filename_patterns="/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav"
38
+ label_plan=4
39
+ config_file="yaml/lstm_classifier-4-ch64.yaml"
40
+ pretrained_model=null
41
+ nohup_name=nohup.out
42
+
43
+ country=en-US
44
+
45
+ # model params
46
+ batch_size=64
47
+ max_epochs=200
48
+ save_top_k=10
49
+ patience=5
50
+
51
+
52
+ # parse options
53
+ while true; do
54
+ [ -z "${1:-}" ] && break; # break if there are no arguments
55
+ case "$1" in
56
+ --*) name=$(echo "$1" | sed s/^--// | sed s/-/_/g);
57
+ eval '[ -z "${'"$name"'+xxx}" ]' && echo "$0: invalid option $1" 1>&2 && exit 1;
58
+ old_value="(eval echo \\$$name)";
59
+ if [ "${old_value}" == "true" ] || [ "${old_value}" == "false" ]; then
60
+ was_bool=true;
61
+ else
62
+ was_bool=false;
63
+ fi
64
+
65
+ # Set the variable to the right value-- the escaped quotes make it work if
66
+ # the option had spaces, like --cmd "queue.pl -sync y"
67
+ eval "${name}=\"$2\"";
68
+
69
+ # Check that Boolean-valued arguments are really Boolean.
70
+ if $was_bool && [[ "$2" != "true" && "$2" != "false" ]]; then
71
+ echo "$0: expected \"true\" or \"false\": $1 $2" 1>&2
72
+ exit 1;
73
+ fi
74
+ shift 2;
75
+ ;;
76
+
77
+ *) break;
78
+ esac
79
+ done
80
+
81
+ file_dir="${work_dir}/${file_folder_name}"
82
+ final_model_dir="${work_dir}/../../trained_models/${final_model_name}";
83
+
84
+ dataset="${file_dir}/dataset.xlsx"
85
+ train_dataset="${file_dir}/train.xlsx"
86
+ valid_dataset="${file_dir}/valid.xlsx"
87
+ evaluation_file="${file_dir}/evaluation.xlsx"
88
+ vocabulary_dir="${file_dir}/vocabulary"
89
+
90
+ $verbose && echo "system_version: ${system_version}"
91
+ $verbose && echo "file_folder_name: ${file_folder_name}"
92
+
93
+ if [ $system_version == "windows" ]; then
94
+ alias python3='D:/Users/tianx/PycharmProjects/virtualenv/cc_audio_8/Scripts/python.exe'
95
+ elif [ $system_version == "centos" ] || [ $system_version == "ubuntu" ]; then
96
+ #source /data/local/bin/cc_audio_8/bin/activate
97
+ alias python3='/data/local/bin/cc_audio_8/bin/python3'
98
+ fi
99
+
100
+
101
+ if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
102
+ $verbose && echo "stage 0: prepare data"
103
+ cd "${work_dir}" || exit 1
104
+ python3 step_1_prepare_data.py \
105
+ --file_dir "${file_dir}" \
106
+ --filename_patterns "${filename_patterns}" \
107
+ --train_dataset "${train_dataset}" \
108
+ --valid_dataset "${valid_dataset}" \
109
+ --label_plan "${label_plan}" \
110
+
111
+ fi
112
+
113
+
114
+ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
115
+ $verbose && echo "stage 1: make vocabulary"
116
+ cd "${work_dir}" || exit 1
117
+ python3 step_2_make_vocabulary.py \
118
+ --vocabulary_dir "${vocabulary_dir}" \
119
+ --train_dataset "${train_dataset}" \
120
+ --valid_dataset "${valid_dataset}" \
121
+
122
+ fi
123
+
124
+
125
+ if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
126
+ $verbose && echo "stage 2: train model"
127
+ cd "${work_dir}" || exit 1
128
+ python3 step_3_train_model.py \
129
+ --vocabulary_dir "${vocabulary_dir}" \
130
+ --train_dataset "${train_dataset}" \
131
+ --valid_dataset "${valid_dataset}" \
132
+ --serialization_dir "${file_dir}" \
133
+ --config_file "${config_file}" \
134
+ --pretrained_model "${pretrained_model}" \
135
+
136
+ fi
137
+
138
+
139
+ if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
140
+ $verbose && echo "stage 3: test model"
141
+ cd "${work_dir}" || exit 1
142
+ python3 step_4_evaluation_model.py \
143
+ --dataset "${dataset}" \
144
+ --vocabulary_dir "${vocabulary_dir}" \
145
+ --model_dir "${file_dir}/best" \
146
+ --output_file "${evaluation_file}" \
147
+
148
+ fi
149
+
150
+
151
+ if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
152
+ $verbose && echo "stage 4: export model"
153
+ cd "${work_dir}" || exit 1
154
+ python3 step_5_export_models.py \
155
+ --vocabulary_dir "${vocabulary_dir}" \
156
+ --model_dir "${file_dir}/best" \
157
+ --serialization_dir "${file_dir}" \
158
+
159
+ fi
160
+
161
+
162
+ if [ ${stage} -le 5 ] && [ ${stop_stage} -ge 5 ]; then
163
+ $verbose && echo "stage 5: collect files"
164
+ cd "${work_dir}" || exit 1
165
+
166
+ mkdir -p ${final_model_dir}
167
+
168
+ cp "${file_dir}/best"/* "${final_model_dir}"
169
+ cp -r "${file_dir}/vocabulary" "${final_model_dir}"
170
+
171
+ cp "${file_dir}/evaluation.xlsx" "${final_model_dir}/evaluation.xlsx"
172
+
173
+ cp "${file_dir}/trace_model.zip" "${final_model_dir}/trace_model.zip"
174
+ cp "${file_dir}/trace_quant_model.zip" "${final_model_dir}/trace_quant_model.zip"
175
+ cp "${file_dir}/script_model.zip" "${final_model_dir}/script_model.zip"
176
+ cp "${file_dir}/script_quant_model.zip" "${final_model_dir}/script_quant_model.zip"
177
+
178
+ cd "${final_model_dir}/.." || exit 1;
179
+
180
+ if [ -e "${final_model_name}.zip" ]; then
181
+ rm -rf "${final_model_name}_backup.zip"
182
+ mv "${final_model_name}.zip" "${final_model_name}_backup.zip"
183
+ fi
184
+
185
+ zip -r "${final_model_name}.zip" "${final_model_name}"
186
+ rm -rf "${final_model_name}"
187
+
188
+ fi
189
+
190
+
191
+ if [ ${stage} -le 6 ] && [ ${stop_stage} -ge 6 ]; then
192
+ $verbose && echo "stage 6: clear file_dir"
193
+ cd "${work_dir}" || exit 1
194
+
195
+ rm -rf "${file_dir}";
196
+
197
+ fi
examples/sound_classification_by_lstm/step_1_prepare_data.py ADDED
@@ -0,0 +1,193 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/python3
2
+ # -*- coding: utf-8 -*-
3
+ import argparse
4
+ from glob import glob
5
+ import os
6
+ from pathlib import Path
7
+ import random
8
+ import sys
9
+
10
+ pwd = os.path.abspath(os.path.dirname(__file__))
11
+ sys.path.append(os.path.join(pwd, "../../"))
12
+
13
+ import pandas as pd
14
+ from scipy.io import wavfile
15
+ from tqdm import tqdm
16
+
17
+
18
+ def get_args():
19
+ parser = argparse.ArgumentParser()
20
+ parser.add_argument("--file_dir", default="./", type=str)
21
+ parser.add_argument("--filename_patterns", type=str)
22
+
23
+ parser.add_argument("--train_dataset", default="train.xlsx", type=str)
24
+ parser.add_argument("--valid_dataset", default="valid.xlsx", type=str)
25
+
26
+ parser.add_argument("--label_plan", default="4", type=str)
27
+
28
+ args = parser.parse_args()
29
+ return args
30
+
31
+
32
+ def get_dataset(args):
33
+ filename_patterns = args.filename_patterns
34
+ filename_patterns = filename_patterns.split(" ")
35
+ print(filename_patterns)
36
+
37
+ file_dir = Path(args.file_dir)
38
+ file_dir.mkdir(exist_ok=True)
39
+
40
+ if args.label_plan == "2-voicemail":
41
+ label_map = {
42
+ "bell": "voicemail",
43
+ "white_noise": "non_voicemail",
44
+ "low_white_noise": "non_voicemail",
45
+ "high_white_noise": "non_voicemail",
46
+ # "music": "non_voicemail",
47
+ "mute": "non_voicemail",
48
+ "noise": "non_voicemail",
49
+ "noise_mute": "non_voicemail",
50
+ "voice": "non_voicemail",
51
+ "voicemail": "voicemail",
52
+ }
53
+ elif args.label_plan == "2":
54
+ label_map = {
55
+ "bell": "non_voice",
56
+ "white_noise": "non_voice",
57
+ "low_white_noise": "non_voice",
58
+ "high_white_noise": "non_voice",
59
+ "music": "non_voice",
60
+ "mute": "non_voice",
61
+ "noise": "non_voice",
62
+ "noise_mute": "non_voice",
63
+ "voice": "voice",
64
+ "voicemail": "voice",
65
+ }
66
+ elif args.label_plan == "3":
67
+ label_map = {
68
+ "bell": "voicemail",
69
+ "white_noise": "mute",
70
+ "low_white_noise": "mute",
71
+ "high_white_noise": "mute",
72
+ # "music": "music",
73
+ "mute": "mute",
74
+ "noise": "voice_or_noise",
75
+ "noise_mute": "voice_or_noise",
76
+ "voice": "voice_or_noise",
77
+ "voicemail": "voicemail",
78
+ }
79
+ elif args.label_plan == "4":
80
+ label_map = {
81
+ "bell": "voicemail",
82
+ "white_noise": "mute",
83
+ "low_white_noise": "mute",
84
+ "high_white_noise": "mute",
85
+ # "music": "music",
86
+ "mute": "mute",
87
+ "noise": "noise",
88
+ "noise_mute": "noise",
89
+ "voice": "voice",
90
+ "voicemail": "voicemail",
91
+ }
92
+ elif args.label_plan == "8":
93
+ label_map = {
94
+ "bell": "bell",
95
+ "white_noise": "white_noise",
96
+ "low_white_noise": "white_noise",
97
+ "high_white_noise": "white_noise",
98
+ "music": "music",
99
+ "mute": "mute",
100
+ "noise": "noise",
101
+ "noise_mute": "noise_mute",
102
+ "voice": "voice",
103
+ "voicemail": "voicemail",
104
+ }
105
+ else:
106
+ raise AssertionError
107
+
108
+ result = list()
109
+ for filename_pattern in filename_patterns:
110
+ filename_list = glob(filename_pattern)
111
+ for filename in tqdm(filename_list):
112
+ filename = Path(filename)
113
+ sample_rate, signal = wavfile.read(filename.as_posix())
114
+ if len(signal) < sample_rate * 2:
115
+ continue
116
+
117
+ folder = filename.parts[-2]
118
+ country = filename.parts[-4]
119
+
120
+ if folder not in label_map.keys():
121
+ continue
122
+
123
+ labels = label_map[folder]
124
+
125
+ random1 = random.random()
126
+ random2 = random.random()
127
+
128
+ result.append({
129
+ "filename": filename,
130
+ "folder": folder,
131
+ "category": country,
132
+ "labels": labels,
133
+ "random1": random1,
134
+ "random2": random2,
135
+ "flag": "TRAIN" if random2 < 0.8 else "TEST",
136
+ })
137
+
138
+ df = pd.DataFrame(result)
139
+ pivot_table = pd.pivot_table(df, index=["labels"], values=["filename"], aggfunc="count")
140
+ print(pivot_table)
141
+
142
+ df = df.sort_values(by=["random1"], ascending=False)
143
+ df.to_excel(
144
+ file_dir / "dataset.xlsx",
145
+ index=False,
146
+ # encoding="utf_8_sig"
147
+ )
148
+
149
+ return
150
+
151
+
152
+ def split_dataset(args):
153
+ """分割训练集, 测试集"""
154
+ file_dir = Path(args.file_dir)
155
+ file_dir.mkdir(exist_ok=True)
156
+
157
+ df = pd.read_excel(file_dir / "dataset.xlsx")
158
+
159
+ train = list()
160
+ test = list()
161
+
162
+ for i, row in df.iterrows():
163
+ flag = row["flag"]
164
+ if flag == "TRAIN":
165
+ train.append(row)
166
+ else:
167
+ test.append(row)
168
+
169
+ train = pd.DataFrame(train)
170
+ train.to_excel(
171
+ args.train_dataset,
172
+ index=False,
173
+ # encoding="utf_8_sig"
174
+ )
175
+ test = pd.DataFrame(test)
176
+ test.to_excel(
177
+ args.valid_dataset,
178
+ index=False,
179
+ # encoding="utf_8_sig"
180
+ )
181
+
182
+ return
183
+
184
+
185
+ def main():
186
+ args = get_args()
187
+ get_dataset(args)
188
+ split_dataset(args)
189
+ return
190
+
191
+
192
+ if __name__ == "__main__":
193
+ main()
examples/sound_classification_by_lstm/step_2_make_vocabulary.py ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/python3
2
+ # -*- coding: utf-8 -*-
3
+ import argparse
4
+ import os
5
+ import sys
6
+
7
+ pwd = os.path.abspath(os.path.dirname(__file__))
8
+ sys.path.append(os.path.join(pwd, "../../"))
9
+
10
+ import pandas as pd
11
+
12
+ from toolbox.torch.utils.data.vocabulary import Vocabulary
13
+
14
+
15
+ def get_args():
16
+ parser = argparse.ArgumentParser()
17
+ parser.add_argument("--vocabulary_dir", default="vocabulary", type=str)
18
+
19
+ parser.add_argument("--train_dataset", default="train.xlsx", type=str)
20
+ parser.add_argument("--valid_dataset", default="valid.xlsx", type=str)
21
+
22
+ args = parser.parse_args()
23
+ return args
24
+
25
+
26
+ def main():
27
+ args = get_args()
28
+
29
+ train_dataset = pd.read_excel(args.train_dataset)
30
+ valid_dataset = pd.read_excel(args.valid_dataset)
31
+
32
+ vocabulary = Vocabulary()
33
+
34
+ # train
35
+ for i, row in train_dataset.iterrows():
36
+ label = row["labels"]
37
+ vocabulary.add_token_to_namespace(label, namespace="labels")
38
+
39
+ # valid
40
+ for i, row in valid_dataset.iterrows():
41
+ label = row["labels"]
42
+ vocabulary.add_token_to_namespace(label, namespace="labels")
43
+
44
+ vocabulary.save_to_files(args.vocabulary_dir)
45
+
46
+ return
47
+
48
+
49
+ if __name__ == "__main__":
50
+ main()
examples/sound_classification_by_lstm/step_3_train_model.py ADDED
@@ -0,0 +1,367 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/python3
2
+ # -*- coding: utf-8 -*-
3
+ import argparse
4
+ from collections import defaultdict
5
+ import json
6
+ import logging
7
+ from logging.handlers import TimedRotatingFileHandler
8
+ import os
9
+ import platform
10
+ from pathlib import Path
11
+ import random
12
+ import sys
13
+ import shutil
14
+ import tempfile
15
+ from typing import List
16
+ import zipfile
17
+
18
+ pwd = os.path.abspath(os.path.dirname(__file__))
19
+ sys.path.append(os.path.join(pwd, "../../"))
20
+
21
+ import numpy as np
22
+ import torch
23
+ from torch.utils.data.dataloader import DataLoader
24
+ from tqdm import tqdm
25
+
26
+ from toolbox.torch.modules.loss import FocalLoss, HingeLoss, HingeLinear
27
+ from toolbox.torch.training.metrics.categorical_accuracy import CategoricalAccuracy
28
+ from toolbox.torch.utils.data.vocabulary import Vocabulary
29
+ from toolbox.torch.utils.data.dataset.wave_classifier_excel_dataset import WaveClassifierExcelDataset
30
+ from toolbox.torchaudio.models.lstm_audio_classifier.modeling_lstm_audio_classifier import LSTMClassifierPretrainedModel
31
+ from toolbox.torchaudio.models.lstm_audio_classifier.configuration_lstm_audio_classifier import LSTMClassifierConfig
32
+
33
+
34
+ def get_args():
35
+ parser = argparse.ArgumentParser()
36
+ parser.add_argument("--vocabulary_dir", default="vocabulary", type=str)
37
+
38
+ parser.add_argument("--train_dataset", default="train.xlsx", type=str)
39
+ parser.add_argument("--valid_dataset", default="valid.xlsx", type=str)
40
+
41
+ parser.add_argument("--max_epochs", default=100, type=int)
42
+
43
+ parser.add_argument("--batch_size", default=64, type=int)
44
+ parser.add_argument("--learning_rate", default=1e-3, type=float)
45
+ parser.add_argument("--num_serialized_models_to_keep", default=10, type=int)
46
+ parser.add_argument("--patience", default=5, type=int)
47
+ parser.add_argument("--serialization_dir", default="serialization_dir", type=str)
48
+ parser.add_argument("--seed", default=0, type=int)
49
+
50
+ parser.add_argument("--config_file", default="conv2d_classifier.yaml", type=str)
51
+ parser.add_argument(
52
+ "--pretrained_model",
53
+ # default=(project_path / "trained_models/voicemail-en-sg-2-ch4-cnn.zip").as_posix(),
54
+ default="null",
55
+ type=str
56
+ )
57
+
58
+ args = parser.parse_args()
59
+ return args
60
+
61
+
62
+ def logging_config(file_dir: str):
63
+ fmt = "%(asctime)s - %(name)s - %(levelname)s %(filename)s:%(lineno)d > %(message)s"
64
+
65
+ logging.basicConfig(format=fmt,
66
+ datefmt="%m/%d/%Y %H:%M:%S",
67
+ level=logging.DEBUG)
68
+ file_handler = TimedRotatingFileHandler(
69
+ filename=os.path.join(file_dir, "main.log"),
70
+ encoding="utf-8",
71
+ when="D",
72
+ interval=1,
73
+ backupCount=7
74
+ )
75
+ file_handler.setLevel(logging.INFO)
76
+ file_handler.setFormatter(logging.Formatter(fmt))
77
+ logger = logging.getLogger(__name__)
78
+ logger.addHandler(file_handler)
79
+
80
+ return logger
81
+
82
+
83
+ class CollateFunction(object):
84
+ def __init__(self):
85
+ pass
86
+
87
+ def __call__(self, batch: List[dict]):
88
+ array_list = list()
89
+ label_list = list()
90
+ for sample in batch:
91
+ array = sample["waveform"]
92
+ label = sample["label"]
93
+
94
+ l = len(array)
95
+ if l < 16000:
96
+ delta = int(16000 - l)
97
+ array = np.concatenate([array, np.zeros(shape=(delta,), dtype=np.float32)], axis=-1)
98
+ if l > 16000:
99
+ array = array[:16000]
100
+
101
+ array_list.append(array)
102
+ label_list.append(label)
103
+
104
+ array_list = torch.stack(array_list)
105
+ label_list = torch.stack(label_list)
106
+ return array_list, label_list
107
+
108
+
109
+ collate_fn = CollateFunction()
110
+
111
+
112
+ def main():
113
+ args = get_args()
114
+
115
+ serialization_dir = Path(args.serialization_dir)
116
+ serialization_dir.mkdir(parents=True, exist_ok=True)
117
+
118
+ logger = logging_config(serialization_dir)
119
+
120
+ random.seed(args.seed)
121
+ np.random.seed(args.seed)
122
+ torch.manual_seed(args.seed)
123
+ logger.info("set seed: {}".format(args.seed))
124
+
125
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
126
+ n_gpu = torch.cuda.device_count()
127
+ logger.info("GPU available count: {}; device: {}".format(n_gpu, device))
128
+
129
+ vocabulary = Vocabulary.from_files(args.vocabulary_dir)
130
+
131
+ # datasets
132
+ logger.info("prepare datasets")
133
+ train_dataset = WaveClassifierExcelDataset(
134
+ vocab=vocabulary,
135
+ excel_file=args.train_dataset,
136
+ category=None,
137
+ category_field="category",
138
+ label_field="labels",
139
+ expected_sample_rate=8000,
140
+ max_wave_value=32768.0,
141
+ )
142
+ valid_dataset = WaveClassifierExcelDataset(
143
+ vocab=vocabulary,
144
+ excel_file=args.valid_dataset,
145
+ category=None,
146
+ category_field="category",
147
+ label_field="labels",
148
+ expected_sample_rate=8000,
149
+ max_wave_value=32768.0,
150
+ )
151
+ train_data_loader = DataLoader(
152
+ dataset=train_dataset,
153
+ batch_size=args.batch_size,
154
+ shuffle=True,
155
+ # Linux 系统中可以使用多个子进程���载数据, 而在 Windows 系统中不能.
156
+ num_workers=0 if platform.system() == "Windows" else os.cpu_count() // 2,
157
+ collate_fn=collate_fn,
158
+ pin_memory=False,
159
+ # prefetch_factor=64,
160
+ )
161
+ valid_data_loader = DataLoader(
162
+ dataset=valid_dataset,
163
+ batch_size=args.batch_size,
164
+ shuffle=True,
165
+ # Linux 系统中可以使用多个子进程加载数据, 而在 Windows 系统中不能.
166
+ num_workers=0 if platform.system() == "Windows" else os.cpu_count() // 2,
167
+ collate_fn=collate_fn,
168
+ pin_memory=False,
169
+ # prefetch_factor=64,
170
+ )
171
+
172
+ # models
173
+ logger.info(f"prepare models. config_file: {args.config_file}")
174
+ config = LSTMClassifierConfig.from_pretrained(
175
+ pretrained_model_name_or_path=args.config_file,
176
+ # num_labels=vocabulary.get_vocab_size(namespace="labels")
177
+ )
178
+ if not config.cls_head_param["num_labels"] == vocabulary.get_vocab_size(namespace="labels"):
179
+ raise AssertionError("expected num labels: {} instead of {}.".format(
180
+ vocabulary.get_vocab_size(namespace="labels"),
181
+ config.cls_head_param["num_labels"],
182
+ ))
183
+ model = LSTMClassifierPretrainedModel(
184
+ config=config,
185
+ )
186
+
187
+ if args.pretrained_model is not None and os.path.exists(args.pretrained_model):
188
+ logger.info(f"load pretrained model state dict from: {args.pretrained_model}")
189
+ pretrained_model = Path(args.pretrained_model)
190
+ with zipfile.ZipFile(pretrained_model.as_posix(), "r") as f_zip:
191
+ out_root = Path(tempfile.gettempdir()) / "cc_audio_8"
192
+ # print(out_root.as_posix())
193
+ if out_root.exists():
194
+ shutil.rmtree(out_root.as_posix())
195
+ out_root.mkdir(parents=True, exist_ok=True)
196
+ f_zip.extractall(path=out_root)
197
+
198
+ tgt_path = out_root / pretrained_model.stem
199
+ model_pt_file = tgt_path / "model.pt"
200
+ with open(model_pt_file, "rb") as f:
201
+ state_dict = torch.load(f, map_location="cpu")
202
+ model.load_state_dict(state_dict=state_dict)
203
+
204
+ model.to(device)
205
+ model.train()
206
+
207
+ # optimizer
208
+ logger.info("prepare optimizer, lr_scheduler, loss_fn, categorical_accuracy")
209
+ param_optimizer = model.parameters()
210
+ optimizer = torch.optim.Adam(
211
+ param_optimizer,
212
+ lr=args.learning_rate,
213
+ )
214
+ # lr_scheduler = torch.optim.lr_scheduler.StepLR(
215
+ # optimizer,
216
+ # step_size=2000
217
+ # )
218
+ lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(
219
+ optimizer,
220
+ milestones=[10000, 20000, 30000, 40000, 50000], gamma=0.5
221
+ )
222
+ focal_loss = FocalLoss(
223
+ num_classes=vocabulary.get_vocab_size(namespace="labels"),
224
+ reduction="mean",
225
+ )
226
+ categorical_accuracy = CategoricalAccuracy()
227
+
228
+ # training loop
229
+ logger.info("training")
230
+
231
+ training_loss = 10000000000
232
+ training_accuracy = 0.
233
+ evaluation_loss = 10000000000
234
+ evaluation_accuracy = 0.
235
+
236
+ model_list = list()
237
+ best_idx_epoch = None
238
+ best_accuracy = None
239
+ patience_count = 0
240
+
241
+ for idx_epoch in range(args.max_epochs):
242
+ categorical_accuracy.reset()
243
+ total_loss = 0.
244
+ total_examples = 0.
245
+ progress_bar = tqdm(
246
+ total=len(train_data_loader),
247
+ desc="Training; epoch: {}".format(idx_epoch),
248
+ )
249
+ for batch in train_data_loader:
250
+ input_ids, label_ids = batch
251
+ input_ids = input_ids.to(device)
252
+ label_ids: torch.LongTensor = label_ids.to(device).long()
253
+
254
+ logits = model.forward(input_ids)
255
+ loss = focal_loss.forward(logits, label_ids.view(-1))
256
+ categorical_accuracy(logits, label_ids)
257
+
258
+ total_loss += loss.item()
259
+ total_examples += input_ids.size(0)
260
+
261
+ optimizer.zero_grad()
262
+ loss.backward()
263
+ optimizer.step()
264
+ lr_scheduler.step()
265
+
266
+ training_loss = total_loss / total_examples
267
+ training_loss = round(training_loss, 4)
268
+ training_accuracy = categorical_accuracy.get_metric()["accuracy"]
269
+ training_accuracy = round(training_accuracy, 4)
270
+
271
+ progress_bar.update(1)
272
+ progress_bar.set_postfix({
273
+ "training_loss": training_loss,
274
+ "training_accuracy": training_accuracy,
275
+ })
276
+
277
+ categorical_accuracy.reset()
278
+ total_loss = 0.
279
+ total_examples = 0.
280
+ progress_bar = tqdm(
281
+ total=len(valid_data_loader),
282
+ desc="Evaluation; epoch: {}".format(idx_epoch),
283
+ )
284
+ for batch in valid_data_loader:
285
+ input_ids, label_ids = batch
286
+ input_ids = input_ids.to(device)
287
+ label_ids: torch.LongTensor = label_ids.to(device).long()
288
+
289
+ with torch.no_grad():
290
+ logits = model.forward(input_ids)
291
+ loss = focal_loss.forward(logits, label_ids.view(-1))
292
+ categorical_accuracy(logits, label_ids)
293
+
294
+ total_loss += loss.item()
295
+ total_examples += input_ids.size(0)
296
+
297
+ evaluation_loss = total_loss / total_examples
298
+ evaluation_loss = round(evaluation_loss, 4)
299
+ evaluation_accuracy = categorical_accuracy.get_metric()["accuracy"]
300
+ evaluation_accuracy = round(evaluation_accuracy, 4)
301
+
302
+ progress_bar.update(1)
303
+ progress_bar.set_postfix({
304
+ "evaluation_loss": evaluation_loss,
305
+ "evaluation_accuracy": evaluation_accuracy,
306
+ })
307
+
308
+ # save path
309
+ epoch_dir = serialization_dir / "epoch-{}".format(idx_epoch)
310
+ epoch_dir.mkdir(parents=True, exist_ok=False)
311
+
312
+ # save models
313
+ model.save_pretrained(epoch_dir.as_posix())
314
+
315
+ model_list.append(epoch_dir)
316
+ if len(model_list) >= args.num_serialized_models_to_keep:
317
+ model_to_delete: Path = model_list.pop(0)
318
+ shutil.rmtree(model_to_delete.as_posix())
319
+
320
+ # save metric
321
+ if best_accuracy is None:
322
+ best_idx_epoch = idx_epoch
323
+ best_accuracy = evaluation_accuracy
324
+ elif evaluation_accuracy > best_accuracy:
325
+ best_idx_epoch = idx_epoch
326
+ best_accuracy = evaluation_accuracy
327
+ else:
328
+ pass
329
+
330
+ metrics = {
331
+ "idx_epoch": idx_epoch,
332
+ "best_idx_epoch": best_idx_epoch,
333
+ "best_accuracy": best_accuracy,
334
+ "training_loss": training_loss,
335
+ "training_accuracy": training_accuracy,
336
+ "evaluation_loss": evaluation_loss,
337
+ "evaluation_accuracy": evaluation_accuracy,
338
+ "learning_rate": optimizer.param_groups[0]['lr'],
339
+ }
340
+ metrics_filename = epoch_dir / "metrics_epoch.json"
341
+ with open(metrics_filename, "w", encoding="utf-8") as f:
342
+ json.dump(metrics, f, indent=4, ensure_ascii=False)
343
+
344
+ # save best
345
+ best_dir = serialization_dir / "best"
346
+ if best_idx_epoch == idx_epoch:
347
+ if best_dir.exists():
348
+ shutil.rmtree(best_dir)
349
+ shutil.copytree(epoch_dir, best_dir)
350
+
351
+ # early stop
352
+ early_stop_flag = False
353
+ if best_idx_epoch == idx_epoch:
354
+ patience_count = 0
355
+ else:
356
+ patience_count += 1
357
+ if patience_count >= args.patience:
358
+ early_stop_flag = True
359
+
360
+ # early stop
361
+ if early_stop_flag:
362
+ break
363
+ return
364
+
365
+
366
+ if __name__ == "__main__":
367
+ main()
examples/sound_classification_by_lstm/yaml/lstm_classifier-4-ch64.yaml ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model_name: "lstm_audio_classifier"
2
+
3
+ mel_spectrogram_param:
4
+ sample_rate: 8000
5
+ n_fft: 512
6
+ win_length: 200
7
+ hop_length: 80
8
+ f_min: 10
9
+ f_max: 3800
10
+ window_fn: hamming
11
+ n_mels: 80
12
+
13
+ lstm_layer_param:
14
+ input_size: 80
15
+ hidden_size: 64
16
+ num_layers: 3
17
+ dropout: 0.2
18
+ pool_layer: last
19
+
20
+ cls_head_param:
21
+ input_dim: 64
22
+ num_layers: 1
23
+ hidden_dims:
24
+ - 32
25
+ activations: relu
26
+ dropout: 0.1
27
+ num_labels: 4