Spaces:
Sleeping
Sleeping
update
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .gitignore +2 -1
- examples/download_wav/Temp Query 5_20251008-093912.csv +101 -0
- examples/download_wav/step_1_download_wav.py +12 -37
- examples/download_wav/step_2_to_1ch.py +12 -8
- examples/download_wav/step_3_split_two_second_wav.py +14 -7
- examples/lstm_badcase_filter/step_1_badcase_filter.py +233 -0
- examples/online_model_test/step_1_predict.py +6 -4
- examples/online_model_test/step_2_audio_filter.py +6 -2
- examples/online_model_test/step_3_make_test.py +2 -2
- examples/online_model_test/test.py +84 -0
- examples/{vm_sound_classification → sound_classification_by_cnn}/requirements.txt +0 -0
- examples/{vm_sound_classification → sound_classification_by_cnn}/run.sh +4 -4
- examples/{vm_sound_classification → sound_classification_by_cnn}/run_batch.sh +66 -66
- examples/{vm_sound_classification → sound_classification_by_cnn}/step_1_prepare_data.py +0 -0
- examples/{vm_sound_classification → sound_classification_by_cnn}/step_2_make_vocabulary.py +0 -0
- examples/{vm_sound_classification → sound_classification_by_cnn}/step_3_train_model.py +1 -1
- examples/{vm_sound_classification → sound_classification_by_cnn}/step_4_evaluation_model.py +0 -0
- examples/{vm_sound_classification → sound_classification_by_cnn}/step_5_export_models.py +0 -0
- examples/{vm_sound_classification → sound_classification_by_cnn}/step_6_infer.py +0 -0
- examples/{vm_sound_classification → sound_classification_by_cnn}/step_7_test_model.py +0 -0
- examples/{vm_sound_classification → sound_classification_by_cnn}/stop.sh +0 -0
- examples/{vm_sound_classification → sound_classification_by_cnn}/yaml/conv2d-classifier-2-ch16.yaml +0 -0
- examples/{vm_sound_classification → sound_classification_by_cnn}/yaml/conv2d-classifier-2-ch32.yaml +0 -0
- examples/{vm_sound_classification → sound_classification_by_cnn}/yaml/conv2d-classifier-2-ch4.yaml +0 -0
- examples/{vm_sound_classification → sound_classification_by_cnn}/yaml/conv2d-classifier-2-ch8.yaml +0 -0
- examples/{vm_sound_classification → sound_classification_by_cnn}/yaml/conv2d-classifier-3-ch16.yaml +0 -0
- examples/{vm_sound_classification → sound_classification_by_cnn}/yaml/conv2d-classifier-3-ch32.yaml +0 -0
- examples/{vm_sound_classification → sound_classification_by_cnn}/yaml/conv2d-classifier-3-ch4.yaml +0 -0
- examples/{vm_sound_classification → sound_classification_by_cnn}/yaml/conv2d-classifier-3-ch8.yaml +0 -0
- examples/{vm_sound_classification → sound_classification_by_cnn}/yaml/conv2d-classifier-4-ch16.yaml +0 -0
- examples/{vm_sound_classification → sound_classification_by_cnn}/yaml/conv2d-classifier-4-ch32.yaml +0 -0
- examples/{vm_sound_classification → sound_classification_by_cnn}/yaml/conv2d-classifier-4-ch4.yaml +0 -0
- examples/{vm_sound_classification → sound_classification_by_cnn}/yaml/conv2d-classifier-4-ch8.yaml +0 -0
- examples/{vm_sound_classification → sound_classification_by_cnn}/yaml/conv2d-classifier-8-ch16.yaml +0 -0
- examples/{vm_sound_classification → sound_classification_by_cnn}/yaml/conv2d-classifier-8-ch32.yaml +0 -0
- examples/{vm_sound_classification → sound_classification_by_cnn}/yaml/conv2d-classifier-8-ch4.yaml +0 -0
- examples/{vm_sound_classification → sound_classification_by_cnn}/yaml/conv2d-classifier-8-ch8.yaml +0 -0
- examples/{vm_sound_classification8 → sound_classification_by_cnn_union}/requirements.txt +0 -0
- examples/{vm_sound_classification8 → sound_classification_by_cnn_union}/run.sh +0 -0
- examples/{vm_sound_classification8 → sound_classification_by_cnn_union}/step_1_prepare_data.py +0 -0
- examples/{vm_sound_classification8 → sound_classification_by_cnn_union}/step_2_make_vocabulary.py +0 -0
- examples/{vm_sound_classification8 → sound_classification_by_cnn_union}/step_3_train_global_model.py +0 -0
- examples/{vm_sound_classification8 → sound_classification_by_cnn_union}/step_4_train_country_model.py +0 -0
- examples/{vm_sound_classification8 → sound_classification_by_cnn_union}/step_5_train_union.py +0 -0
- examples/{vm_sound_classification8 → sound_classification_by_cnn_union}/stop.sh +0 -0
- examples/sound_classification_by_lstm/run.sh +197 -0
- examples/sound_classification_by_lstm/step_1_prepare_data.py +193 -0
- examples/sound_classification_by_lstm/step_2_make_vocabulary.py +50 -0
- examples/sound_classification_by_lstm/step_3_train_model.py +367 -0
- examples/sound_classification_by_lstm/yaml/lstm_classifier-4-ch64.yaml +27 -0
.gitignore
CHANGED
|
@@ -15,6 +15,7 @@
|
|
| 15 |
/trained_models/
|
| 16 |
/temp/
|
| 17 |
|
|
|
|
|
|
|
| 18 |
#**/*.wav
|
| 19 |
**/*.xlsx
|
| 20 |
-
**/*.onnx
|
|
|
|
| 15 |
/trained_models/
|
| 16 |
/temp/
|
| 17 |
|
| 18 |
+
**/*.csv
|
| 19 |
+
**/*.onnx
|
| 20 |
#**/*.wav
|
| 21 |
**/*.xlsx
|
|
|
examples/download_wav/Temp Query 5_20251008-093912.csv
ADDED
|
@@ -0,0 +1,101 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
date,overdue_term,id,case_id,credit_user_id,call_start_timestamp,call_end_timestamp,thirdpart_download_url
|
| 2 |
+
11/10/2025,M3,201577107,62145483,2.05158E+18,1760156453,1760156464,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251011/21962/6b76d306-b767-44e5-be9a-0a15d1165113.mp3
|
| 3 |
+
11/10/2025,M3,201552895,61647547,2.04871E+18,1760150223,1760150235,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251011/21962/91eb4d93-aaaf-4a22-b1b5-93f90790f360.mp3
|
| 4 |
+
11/10/2025,M1,201571248,64869969,1.63814E+18,1760154872,1760154878,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251011/21962/9feab432-a05f-4c12-a7a5-1de81c5c5552.mp3
|
| 5 |
+
10/10/2025,M5,201481243,57774660,1.86995E+18,1760093720,1760093736,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251010/21962/8ca27707-73e9-41a9-a531-f84011a2d021.mp3
|
| 6 |
+
11/10/2025,M6,201602065,56556981,1.96434E+18,1760162403,1760162411,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251011/21962/23edb55b-d7d7-496d-92d9-27be9a8d0f06.mp3
|
| 7 |
+
10/10/2025,M3,201432876,62937736,1.71926E+18,1760081217,1760081223,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251010/21962/d64b9511-1ada-435c-bf1d-7ff8edd194d1.mp3
|
| 8 |
+
10/10/2025,M2,201418064,63818662,2.06059E+18,1760078017,1760078023,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251010/21962/7a53b0cf-d4a8-496b-8533-578e1b3c8050.mp3
|
| 9 |
+
11/10/2025,M1,201546922,65604125,1.86304E+18,1760149167,1760149175,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251011/21962/e6adf4e4-269c-4668-955d-7b3dd1c60736.mp3
|
| 10 |
+
10/10/2025,M3,201430098,61807602,1.85118E+18,1760080774,1760080785,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251010/21962/b2b8e1e5-d92d-424d-9150-7af7506305c4.mp3
|
| 11 |
+
10/10/2025,M1,201448566,64796208,1.65278E+18,1760085408,1760085415,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251010/21962/09d6248c-111d-4b73-910f-e218049185d8.mp3
|
| 12 |
+
11/10/2025,M4,201571566,60538522,1.88122E+18,1760154923,1760154930,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251011/21962/944211c2-e492-4889-b655-e508fdd5879d.mp3
|
| 13 |
+
11/10/2025,M1,201566967,65843234,2.02107E+18,1760154065,1760154073,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251011/21962/bfa8591b-e527-45e9-ae57-7f74f9e7302b.mp3
|
| 14 |
+
10/10/2025,M2,201447321,64267309,1.56498E+18,1760085020,1760085033,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251010/21962/4bb8e69a-4c6c-4828-857a-ce0e43cc75a1.mp3
|
| 15 |
+
11/10/2025,M1,201568415,65114574,4883832,1760154398,1760154405,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251011/21962/51e805c9-5cc5-490b-bae2-7d5eab6f343c.mp3
|
| 16 |
+
11/10/2025,M2,201605984,63943082,1.8374E+18,1760163009,1760163030,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251011/21962/98af45d4-16cd-4eaf-9b91-05c7143a26bc.mp3
|
| 17 |
+
10/10/2025,M1,201419656,66515322,1.49814E+17,1760078339,1760078345,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251010/21962/91f43558-6567-43d9-a709-ffa3173c81b4.mp3
|
| 18 |
+
10/10/2025,M2,201427406,63880041,1.56918E+18,1760080267,1760080275,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251010/21962/375fdc20-428e-4658-aeb7-cb9cca17c534.mp3
|
| 19 |
+
11/10/2025,M1,201575782,64887894,1.73042E+18,1760156066,1760156082,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251011/21962/b2d6b5a2-3d7f-4fc7-abf6-258cd7a7de5f.mp3
|
| 20 |
+
10/10/2025,M3,201418794,62368390,1.94558E+18,1760078142,1760078156,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251010/21962/ba673b33-8496-4901-8f2f-8083802a5213.mp3
|
| 21 |
+
10/10/2025,M1,201424572,66395236,1203507,1760079797,1760079804,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251010/21962/a95596c0-8b46-4333-b126-6c4c11ca41fc.mp3
|
| 22 |
+
11/10/2025,M2,201571228,64248917,1.88019E+18,1760154871,1760154880,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251011/21962/e4eb3384-2d75-4563-b9a7-7bb3256a2aae.mp3
|
| 23 |
+
11/10/2025,M4,201570642,60447265,1.98507E+18,1760154782,1760154787,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251011/21962/a5440c2e-3fc5-43b2-8ecf-d4bde44f62e8.mp3
|
| 24 |
+
10/10/2025,M5,201453357,58652419,1.85737E+18,1760086342,1760086350,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251010/21962/68def6c9-130e-4705-b4b6-19d5d8b4b27d.mp3
|
| 25 |
+
11/10/2025,M6,201573623,57234397,1.97251E+18,1760155303,1760155313,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251011/21962/0ba67a44-c94a-4d68-a6ba-d003cf4b57c8.mp3
|
| 26 |
+
10/10/2025,M5,201424683,57553385,1.86241E+18,1760079814,1760079831,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251010/21962/50c26933-40a1-45bc-baf0-eb7b4c268ffb.mp3
|
| 27 |
+
11/10/2025,M1,201570171,66334366,1.8276E+18,1760154717,1760154723,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251011/21962/ecbb5b43-19cf-44e2-8ee4-ea131c706421.mp3
|
| 28 |
+
10/10/2025,M4,201451276,59840709,2.04014E+18,1760085947,1760085952,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251010/21962/6059e9c2-2f3c-4cef-a536-4796436b9765.mp3
|
| 29 |
+
10/10/2025,M4,201432508,59867441,1.91396E+18,1760081156,1760081172,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251010/21962/c0814466-4e13-4ef1-b4be-2fbe66eebfd8.mp3
|
| 30 |
+
11/10/2025,M5,201612109,58418373,1.87946E+18,1760163975,1760163983,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251011/21962/c7c54616-575a-4dc9-9820-fea245211933.mp3
|
| 31 |
+
10/10/2025,M2,201432653,64650851,1.74295E+18,1760081176,1760081192,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251010/21962/e8cc82f1-ded6-484f-9e6a-b9bf514eda04.mp3
|
| 32 |
+
11/10/2025,M1,201580231,65755142,1.96545E+18,1760157408,1760157415,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251011/21962/d2ed8a99-d8a4-4c0a-9b30-0a4f97e8db6e.mp3
|
| 33 |
+
10/10/2025,M3,201430023,61812734,7400607,1760080766,1760080780,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251010/21962/f693b1d5-dc99-43d9-a730-2040ca645f17.mp3
|
| 34 |
+
10/10/2025,M3,201450322,62009884,2.007E+18,1760085762,1760085768,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251010/21962/4915b87b-1169-41f7-af8c-6509a66dfbe6.mp3
|
| 35 |
+
10/10/2025,M3,201431281,62172812,2.05076E+18,1760080963,1760080975,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251010/21962/dc9c5331-04bb-4cbf-b789-61b0b811d6b6.mp3
|
| 36 |
+
10/10/2025,M5,201430080,58314791,1.99801E+18,1760080772,1760080777,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251010/21962/e571b6f4-6471-4311-81ed-cd1af7e55e07.mp3
|
| 37 |
+
11/10/2025,M4,201538284,59471661,2.03412E+18,1760147632,1760147638,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251011/21962/cfbe5f77-02b3-4a04-8948-9ca4237c3abc.mp3
|
| 38 |
+
10/10/2025,M1,201447219,65817559,2.02777E+18,1760085001,1760085007,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251010/21962/49dad1dd-d681-410a-87df-649eca036ff0.mp3
|
| 39 |
+
10/10/2025,M1,201481818,66043196,1.93698E+18,1760093888,1760093894,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251010/21962/93c19c1f-93fa-41ec-8f57-54a606a7f4a4.mp3
|
| 40 |
+
10/10/2025,M1,201485519,66563695,2.074E+18,1760095020,1760095034,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251010/21962/e8c214bd-6261-42a7-863b-3bcbd82f081e.mp3
|
| 41 |
+
11/10/2025,M5,201595914,59014301,1.5286E+18,1760161399,1760161415,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251011/21962/095bcf3f-1807-4c71-a997-ce6806b4da99.mp3
|
| 42 |
+
11/10/2025,M2,201533318,63403949,1.88317E+18,1760146862,1760146871,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251011/21962/8ae1702d-eca8-4e77-b39d-fd119c72499e.mp3
|
| 43 |
+
11/10/2025,M4,201576553,60295505,1.80115E+18,1760156314,1760156320,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251011/21962/47bad51d-65fb-4795-9211-a8a0434b95ad.mp3
|
| 44 |
+
10/10/2025,M1,201485741,65280144,2.07517E+18,1760095107,1760095114,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251010/21962/87d8ccb1-f7e9-4377-a1d8-7bf9228e0c3f.mp3
|
| 45 |
+
10/10/2025,M1,201431349,64854591,1.58546E+18,1760080970,1760080978,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251010/21962/6f983610-0fd3-4da6-be49-1cc50f205618.mp3
|
| 46 |
+
10/10/2025,M5,201487648,57318618,1.64505E+18,1760095790,1760095797,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251010/21962/254f796e-b955-4a5e-a190-1d3579474645.mp3
|
| 47 |
+
11/10/2025,M1,201577796,64963614,1.86777E+18,1760156648,1760156657,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251011/21962/b382a477-933e-43b1-8759-a2b4e17e92b2.mp3
|
| 48 |
+
11/10/2025,M3,201533254,62192769,2.023E+18,1760146812,1760146817,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251011/21962/ac78c5f9-7109-45fc-87da-5e275e0159f4.mp3
|
| 49 |
+
10/10/2025,M1,201428974,65690748,1.89974E+18,1760080550,1760080557,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251010/21962/b4e53f33-4c55-4b16-b786-6619332f47fc.mp3
|
| 50 |
+
11/10/2025,M2,201546294,64684883,1.93667E+18,1760149074,1760149090,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251011/21962/337edca5-80fa-4f9d-8820-3fb333b384d5.mp3
|
| 51 |
+
10/10/2025,M1,201475668,66214001,1.80957E+18,1760092690,1760092695,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251010/21962/21e2b2ec-2475-4692-9b67-2453e262e77b.mp3
|
| 52 |
+
10/10/2025,M5,201459599,57741938,6633631,1760087612,1760087617,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251010/21962/02bd6e79-efc1-4cfd-bee9-12967c844735.mp3
|
| 53 |
+
11/10/2025,M3,201551060,61444707,1.94443E+18,1760149891,1760149899,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251011/21962/700c2e32-c5bd-48f3-8085-0617694963dd.mp3
|
| 54 |
+
10/10/2025,M4,201453055,59345041,1.68365E+18,1760086275,1760086282,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251010/21962/4f1ef95b-3432-41d3-8970-1698504ba010.mp3
|
| 55 |
+
10/10/2025,M1,201426891,66520128,1728931,1760080190,1760080196,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251010/21962/ec7180ac-2948-49a3-b709-f60b80dfee27.mp3
|
| 56 |
+
12/10/2025,M1,201704571,66237684,1.77477E+18,1760229906,1760229911,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251012/21962/b70ed0f3-145f-49a6-9dbb-aa695e21d7de.mp3
|
| 57 |
+
10/10/2025,M1,201457899,65109188,1.58483E+18,1760087330,1760087339,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251010/21962/c5f910f6-6a5e-4bd0-8a64-f2805837558d.mp3
|
| 58 |
+
11/10/2025,M2,201537965,63920995,2.04332E+18,1760147578,1760147593,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251011/21962/38ff4d8b-4db3-4410-902e-3953699bf4eb.mp3
|
| 59 |
+
11/10/2025,M1,201568081,66644507,1.92267E+18,1760154313,1760154318,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251011/21962/a801cfcc-40d2-4dde-9994-b709836df856.mp3
|
| 60 |
+
11/10/2025,M3,201539641,62112487,1.75162E+18,1760147868,1760147875,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251011/21962/56ec2c07-3bf5-47ba-82c6-039bf094d6ca.mp3
|
| 61 |
+
10/10/2025,M1,201483514,65958944,1.96386E+18,1760094328,1760094335,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251010/21962/edd080b5-2439-4079-a27f-2f0941217825.mp3
|
| 62 |
+
10/10/2025,M5,201417598,57494166,1.59238E+18,1760077922,1760077928,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251010/21962/65750a23-6edc-479a-bea4-14ba9e43648e.mp3
|
| 63 |
+
11/10/2025,M1,201528466,65224705,1.75014E+18,1760145272,1760145278,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251011/21962/5b63572f-79d2-4d22-9ded-4fa2e3cd372b.mp3
|
| 64 |
+
10/10/2025,M5,201453641,58921447,1.92301E+18,1760086392,1760086405,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251010/21962/4a9b93d4-9878-4023-8dd2-40fcf9502a49.mp3
|
| 65 |
+
11/10/2025,M1,201611955,65789335,1.88401E+18,1760163939,1760163946,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251011/21962/6fb4f49d-fe9c-4a64-8c74-b5649d7e8175.mp3
|
| 66 |
+
10/10/2025,M1,201459093,66318002,1.61088E+18,1760087527,1760087538,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251010/21962/8a608622-e3d7-4a96-89ac-1300b7653c6f.mp3
|
| 67 |
+
10/10/2025,M2,201418416,63100145,1.85044E+18,1760078077,1760078083,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251010/21962/f6ab0a88-2055-4b88-9fbd-7af4aef5731f.mp3
|
| 68 |
+
11/10/2025,M3,201537163,61356706,2.04189E+18,1760147448,1760147453,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251011/21962/f1f6ad6a-7399-4fb2-8073-c83ef7093b6e.mp3
|
| 69 |
+
10/10/2025,M3,201480897,61752670,1.99088E+18,1760093653,1760093662,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251010/21962/cdf7552c-a3a8-4cfc-a4b1-651f84141090.mp3
|
| 70 |
+
11/10/2025,M2,201605821,63901708,2.06357E+18,1760162987,1760162993,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251011/21962/70b3816a-2e43-403a-97e0-dd3288596c71.mp3
|
| 71 |
+
10/10/2025,M3,201457652,61356706,2.04189E+18,1760087292,1760087299,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251010/21962/d5f5598a-b93c-4fb8-8bb1-56acc9bb0033.mp3
|
| 72 |
+
10/10/2025,M2,201480118,64077815,1.99009E+18,1760093518,1760093526,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251010/21962/8cac232e-2ed4-4506-8bcf-2a8e4bca91b5.mp3
|
| 73 |
+
11/10/2025,M1,201551882,65617862,1.65598E+18,1760150016,1760150024,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251011/21962/e71c8472-c3f7-486e-a388-1196836899bf.mp3
|
| 74 |
+
10/10/2025,M5,201417451,58993884,1.18898E+16,1760077893,1760077905,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251010/21962/2965b989-cf94-4dc4-afaa-5fda4ac4bb34.mp3
|
| 75 |
+
11/10/2025,M5,201547506,58539902,1.92196E+18,1760149254,1760149265,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251011/21962/1cb5b911-69c9-4b5a-ab2f-e4bad61dd0be.mp3
|
| 76 |
+
11/10/2025,M1,201606566,66579640,1.92316E+18,1760163098,1760163103,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251011/21962/2e5ff340-8197-4069-8533-0d47a221dc57.mp3
|
| 77 |
+
11/10/2025,M2,201545849,63976411,2.01076E+18,1760148993,1760148999,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251011/21962/e7592b1f-d24d-4fb5-afb9-0d64f23719d5.mp3
|
| 78 |
+
10/10/2025,M1,201487535,66304049,1.96729E+18,1760095749,1760095754,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251010/21962/03b31201-d6f7-4246-80ef-ec902f93e6bf.mp3
|
| 79 |
+
10/10/2025,M1,201458971,66590224,2.06508E+18,1760087508,1760087515,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251010/21962/01013ca6-1659-4613-8ae7-5d79372d4464.mp3
|
| 80 |
+
11/10/2025,M4,201548032,59720355,1.54183E+18,1760149338,1760149343,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251011/21962/61bafa17-b411-4d1a-b9bd-1a76c5087b9e.mp3
|
| 81 |
+
10/10/2025,M5,201430001,57789932,1.99701E+18,1760080762,1760080768,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251010/21962/aa25ffd1-82a2-4048-938d-0adc335cec41.mp3
|
| 82 |
+
11/10/2025,M1,201596095,66001014,1.94845E+18,1760161430,1760161436,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251011/21962/76c67081-af66-4a80-b531-25b14a8e0443.mp3
|
| 83 |
+
11/10/2025,M3,201549933,62873165,1.87383E+18,1760149739,1760149747,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251011/21962/fd0caad7-72fd-4152-a8ed-58253e946567.mp3
|
| 84 |
+
10/10/2025,M5,201447596,58417708,1.9626E+18,1760085087,1760085092,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251010/21962/0f8f64c4-2587-4501-abb8-23e0e5389635.mp3
|
| 85 |
+
11/10/2025,M1,201596157,65991243,1.88004E+18,1760161446,1760161452,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251011/21962/8f731992-cbc6-45e1-8203-5628d51a6e45.mp3
|
| 86 |
+
10/10/2025,M2,201391828,63462209,1.1835E+16,1760067513,1760067535,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251010/21962/2c641568-ebf6-4989-abeb-e2bc404d79e8.mp3
|
| 87 |
+
11/10/2025,M4,201579066,60241526,1.95443E+18,1760157061,1760157070,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251011/21962/34c79266-7c59-4e26-b936-05d64716fa79.mp3
|
| 88 |
+
11/10/2025,M3,201539123,61552513,1.65655E+18,1760147755,1760147762,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251011/21962/1dc2d33a-f080-4cb4-ad66-fb50761b500a.mp3
|
| 89 |
+
11/10/2025,M5,201607636,57899370,1.93241E+18,1760163267,1760163287,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251011/21962/d8e13804-692a-4cf7-b4e2-781384a1d559.mp3
|
| 90 |
+
10/10/2025,M4,201426209,60181850,1.79511E+18,1760080075,1760080081,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251010/21962/8f81c559-7392-46ff-8ebb-1a6edc41381c.mp3
|
| 91 |
+
11/10/2025,M1,201535197,66655594,1.66159E+18,1760147133,1760147139,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251011/21962/d793a0e3-bd7b-44f3-a1e2-bc1eb0908c1a.mp3
|
| 92 |
+
11/10/2025,M4,201613127,61191667,1.78852E+18,1760164234,1760164240,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251011/21962/4e794fb2-01bf-41ff-843f-945b2b1ec9df.mp3
|
| 93 |
+
10/10/2025,M3,201456582,61353852,1.70556E+18,1760087088,1760087093,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251010/21962/5fe67f28-5f86-43bb-a87f-589b75118d56.mp3
|
| 94 |
+
11/10/2025,M2,201536974,63373730,2.02636E+18,1760147420,1760147433,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251011/21962/cf2bf8cc-e2af-478a-96e4-686c59a98d4d.mp3
|
| 95 |
+
11/10/2025,M6,201598303,57270639,1.57805E+18,1760161833,1760161850,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251011/21962/07e2de01-8040-4942-8586-77d6eb38b64f.mp3
|
| 96 |
+
11/10/2025,M4,201577614,60545450,1.95248E+18,1760156607,1760156635,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251011/21962/53f6d6eb-c22e-4935-9d3c-941969a0241f.mp3
|
| 97 |
+
10/10/2025,M1,201451460,66451819,1406890,1760085972,1760085978,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251010/21962/43061915-f8e0-405b-9be0-bd3826d0aa69.mp3
|
| 98 |
+
11/10/2025,M5,201566778,57480954,1.79569E+18,1760154035,1760154041,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251011/21962/f737520a-d62f-4c71-a81c-bfbcbd2887d3.mp3
|
| 99 |
+
11/10/2025,M5,201579474,58000396,1.17763E+16,1760157205,1760157215,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251011/21962/b8d5a9b1-13cf-4667-8271-1b562de5dd22.mp3
|
| 100 |
+
11/10/2025,M1,201547564,66391023,987018,1760149263,1760149269,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251011/21962/434cea47-98a9-4f24-b29e-bee36e9f9832.mp3
|
| 101 |
+
11/10/2025,M5,201538978,57340689,1.62497E+18,1760147736,1760147755,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251011/21962/a74e253e-34cf-42df-9c3c-c8f6a45b994e.mp3
|
examples/download_wav/step_1_download_wav.py
CHANGED
|
@@ -31,7 +31,7 @@ def get_args():
|
|
| 31 |
)
|
| 32 |
parser.add_argument(
|
| 33 |
"--output_dir",
|
| 34 |
-
default=(project_path / "data/calling/
|
| 35 |
type=str
|
| 36 |
)
|
| 37 |
args = parser.parse_args()
|
|
@@ -39,37 +39,7 @@ def get_args():
|
|
| 39 |
|
| 40 |
|
| 41 |
excel_file_str = """
|
| 42 |
-
|
| 43 |
-
AIAgent-CallLog-20250929134959.xlsx
|
| 44 |
-
AIAgent-CallLog-20250929135030.xlsx
|
| 45 |
-
AIAgent-CallLog-20250929135052.xlsx
|
| 46 |
-
AIAgent-CallLog-20250929135122.xlsx
|
| 47 |
-
AIAgent-CallLog-20250929135134.xlsx
|
| 48 |
-
AIAgent-CallLog-20250929135209.xlsx
|
| 49 |
-
AIAgent-CallLog-20250929135219.xlsx
|
| 50 |
-
AIAgent-CallLog-20250929135247.xlsx
|
| 51 |
-
AIAgent-CallLog-20250929135300.xlsx
|
| 52 |
-
AIAgent-CallLog-20250929135311.xlsx
|
| 53 |
-
AIAgent-CallLog-20250929135335.xlsx
|
| 54 |
-
AIAgent-CallLog-20250929135344.xlsx
|
| 55 |
-
AIAgent-CallLog-20250929135355.xlsx
|
| 56 |
-
AIAgent-CallLog-20250929135443.xlsx
|
| 57 |
-
AIAgent-CallLog-20250929135452.xlsx
|
| 58 |
-
AIAgent-CallLog-20250929135501.xlsx
|
| 59 |
-
AIAgent-CallLog-20250929135537.xlsx
|
| 60 |
-
AIAgent-CallLog-20250929135544.xlsx
|
| 61 |
-
AIAgent-CallLog-20250929135554.xlsx
|
| 62 |
-
AIAgent-CallLog-20250929135630.xlsx
|
| 63 |
-
AIAgent-CallLog-20250929135701.xlsx
|
| 64 |
-
AIAgent-CallLog-20250929135710.xlsx
|
| 65 |
-
AIAgent-CallLog-20250929135716.xlsx
|
| 66 |
-
AIAgent-CallLog-20250929135755.xlsx
|
| 67 |
-
AIAgent-CallLog-20250929135800.xlsx
|
| 68 |
-
AIAgent-CallLog-20250929135809.xlsx
|
| 69 |
-
AIAgent-CallLog-20250929135842.xlsx
|
| 70 |
-
AIAgent-CallLog-20250929135849.xlsx
|
| 71 |
-
AIAgent-CallLog-20250929135858.xlsx
|
| 72 |
-
AIAgent-CallLog-20250929135909.xlsx
|
| 73 |
"""
|
| 74 |
|
| 75 |
|
|
@@ -101,11 +71,16 @@ def main():
|
|
| 101 |
continue
|
| 102 |
excel_file = excel_file_dir / name
|
| 103 |
|
| 104 |
-
df = pd.read_excel(excel_file.as_posix())
|
|
|
|
| 105 |
for i, row in tqdm(df.iterrows()):
|
| 106 |
-
call_date =
|
| 107 |
-
|
| 108 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 109 |
if pd.isna(record_url):
|
| 110 |
continue
|
| 111 |
|
|
@@ -137,7 +112,7 @@ def main():
|
|
| 137 |
if resp.status_code != 200:
|
| 138 |
raise AssertionError("status_code: {}; text: {}".format(resp.status_code, resp.text))
|
| 139 |
|
| 140 |
-
filename = output_dir / "{}.
|
| 141 |
with open(filename.as_posix(), "wb") as f:
|
| 142 |
f.write(resp.content)
|
| 143 |
|
|
|
|
| 31 |
)
|
| 32 |
parser.add_argument(
|
| 33 |
"--output_dir",
|
| 34 |
+
default=(project_path / "data/calling/62/wav_2ch").as_posix(),
|
| 35 |
type=str
|
| 36 |
)
|
| 37 |
args = parser.parse_args()
|
|
|
|
| 39 |
|
| 40 |
|
| 41 |
excel_file_str = """
|
| 42 |
+
Temp Query 5_20251008-093912.csv
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 43 |
"""
|
| 44 |
|
| 45 |
|
|
|
|
| 71 |
continue
|
| 72 |
excel_file = excel_file_dir / name
|
| 73 |
|
| 74 |
+
# df = pd.read_excel(excel_file.as_posix())
|
| 75 |
+
df = pd.read_csv(excel_file.as_posix())
|
| 76 |
for i, row in tqdm(df.iterrows()):
|
| 77 |
+
call_date = "2025-10-12 00:00:00"
|
| 78 |
+
record_url = row["thirdpart_download_url"]
|
| 79 |
+
call_id = Path(record_url).stem
|
| 80 |
+
|
| 81 |
+
# call_date = row["Attempt time"]
|
| 82 |
+
# call_id = row["Call ID"]
|
| 83 |
+
# record_url = row["Recording file"]
|
| 84 |
if pd.isna(record_url):
|
| 85 |
continue
|
| 86 |
|
|
|
|
| 112 |
if resp.status_code != 200:
|
| 113 |
raise AssertionError("status_code: {}; text: {}".format(resp.status_code, resp.text))
|
| 114 |
|
| 115 |
+
filename = output_dir / "{}.mp3".format(call_id)
|
| 116 |
with open(filename.as_posix(), "wb") as f:
|
| 117 |
f.write(resp.content)
|
| 118 |
|
examples/download_wav/step_2_to_1ch.py
CHANGED
|
@@ -3,8 +3,9 @@
|
|
| 3 |
import argparse
|
| 4 |
import os
|
| 5 |
from pathlib import Path
|
| 6 |
-
import time
|
| 7 |
|
|
|
|
|
|
|
| 8 |
from scipy.io import wavfile
|
| 9 |
from tqdm import tqdm
|
| 10 |
|
|
@@ -16,12 +17,12 @@ def get_args():
|
|
| 16 |
|
| 17 |
parser.add_argument(
|
| 18 |
"--audio_dir",
|
| 19 |
-
default=(project_path / "data/calling/
|
| 20 |
type=str
|
| 21 |
)
|
| 22 |
parser.add_argument(
|
| 23 |
"--output_dir",
|
| 24 |
-
default=(project_path / "data/calling/
|
| 25 |
type=str
|
| 26 |
)
|
| 27 |
args = parser.parse_args()
|
|
@@ -36,13 +37,13 @@ def main():
|
|
| 36 |
output_dir.mkdir(parents=True, exist_ok=True)
|
| 37 |
|
| 38 |
finished = set()
|
| 39 |
-
for filename in tqdm(list(output_dir.glob("*.
|
| 40 |
splits = filename.stem.split("_")
|
| 41 |
call_id = splits[3]
|
| 42 |
finished.add(call_id)
|
| 43 |
print(f"finished count: {len(finished)}")
|
| 44 |
|
| 45 |
-
for filename in tqdm(list(audio_dir.glob("*.
|
| 46 |
call_id = filename.stem
|
| 47 |
|
| 48 |
if call_id in finished:
|
|
@@ -51,16 +52,19 @@ def main():
|
|
| 51 |
finished.add(call_id)
|
| 52 |
|
| 53 |
try:
|
| 54 |
-
sample_rate, signal = wavfile.read(filename.as_posix())
|
|
|
|
|
|
|
| 55 |
except UnboundLocalError as error:
|
| 56 |
print(f"wavfile read failed. error type: {type(error)}, text: {str(error)}, filename: {filename.as_posix()}")
|
| 57 |
raise error
|
| 58 |
if sample_rate != 8000:
|
| 59 |
raise AssertionError
|
| 60 |
|
| 61 |
-
signal = signal[:, 0]
|
|
|
|
| 62 |
|
| 63 |
-
to_filename = output_dir / f"active_media_r_{call_id}
|
| 64 |
try:
|
| 65 |
wavfile.write(
|
| 66 |
to_filename.as_posix(),
|
|
|
|
| 3 |
import argparse
|
| 4 |
import os
|
| 5 |
from pathlib import Path
|
|
|
|
| 6 |
|
| 7 |
+
import librosa
|
| 8 |
+
import numpy as np
|
| 9 |
from scipy.io import wavfile
|
| 10 |
from tqdm import tqdm
|
| 11 |
|
|
|
|
| 17 |
|
| 18 |
parser.add_argument(
|
| 19 |
"--audio_dir",
|
| 20 |
+
default=(project_path / "data/calling/62/wav_2ch").as_posix(),
|
| 21 |
type=str
|
| 22 |
)
|
| 23 |
parser.add_argument(
|
| 24 |
"--output_dir",
|
| 25 |
+
default=(project_path / "data/calling/62/wav_1ch").as_posix(),
|
| 26 |
type=str
|
| 27 |
)
|
| 28 |
args = parser.parse_args()
|
|
|
|
| 37 |
output_dir.mkdir(parents=True, exist_ok=True)
|
| 38 |
|
| 39 |
finished = set()
|
| 40 |
+
for filename in tqdm(list(output_dir.glob("*.mp3"))):
|
| 41 |
splits = filename.stem.split("_")
|
| 42 |
call_id = splits[3]
|
| 43 |
finished.add(call_id)
|
| 44 |
print(f"finished count: {len(finished)}")
|
| 45 |
|
| 46 |
+
for filename in tqdm(list(audio_dir.glob("*.mp3"))):
|
| 47 |
call_id = filename.stem
|
| 48 |
|
| 49 |
if call_id in finished:
|
|
|
|
| 52 |
finished.add(call_id)
|
| 53 |
|
| 54 |
try:
|
| 55 |
+
# sample_rate, signal = wavfile.read(filename.as_posix())
|
| 56 |
+
signal, sample_rate = librosa.load(filename.as_posix(), sr=8000, mono=False)
|
| 57 |
+
signal = np.array(signal * (1 << 15), dtype=np.int16)
|
| 58 |
except UnboundLocalError as error:
|
| 59 |
print(f"wavfile read failed. error type: {type(error)}, text: {str(error)}, filename: {filename.as_posix()}")
|
| 60 |
raise error
|
| 61 |
if sample_rate != 8000:
|
| 62 |
raise AssertionError
|
| 63 |
|
| 64 |
+
# signal = signal[:, 0]
|
| 65 |
+
signal = signal[0, :]
|
| 66 |
|
| 67 |
+
to_filename = output_dir / f"active_media_r_{call_id}_id-ID_none.wav"
|
| 68 |
try:
|
| 69 |
wavfile.write(
|
| 70 |
to_filename.as_posix(),
|
examples/download_wav/step_3_split_two_second_wav.py
CHANGED
|
@@ -15,17 +15,21 @@ def get_args():
|
|
| 15 |
|
| 16 |
parser.add_argument(
|
| 17 |
"--audio_dir",
|
| 18 |
-
default=(project_path / "data/calling/
|
|
|
|
|
|
|
|
|
|
| 19 |
type=str
|
| 20 |
)
|
| 21 |
parser.add_argument(
|
| 22 |
"--output_dir",
|
| 23 |
-
default=(project_path / "data/calling/358/wav_segmented").as_posix(),
|
|
|
|
| 24 |
type=str
|
| 25 |
)
|
| 26 |
parser.add_argument(
|
| 27 |
"--first_n_seconds",
|
| 28 |
-
default=
|
| 29 |
type=int
|
| 30 |
)
|
| 31 |
args = parser.parse_args()
|
|
@@ -40,12 +44,16 @@ def main():
|
|
| 40 |
output_dir.mkdir(parents=True, exist_ok=True)
|
| 41 |
|
| 42 |
for filename in tqdm(list(audio_dir.glob("*.wav"))):
|
| 43 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 44 |
sample_rate, signal = wavfile.read(filename.as_posix())
|
| 45 |
if sample_rate != 8000:
|
| 46 |
raise AssertionError
|
| 47 |
|
| 48 |
-
signal = signal[:, 0]
|
| 49 |
signal_length = len(signal) - sample_rate * 2
|
| 50 |
if signal_length <= 0:
|
| 51 |
continue
|
|
@@ -56,8 +64,7 @@ def main():
|
|
| 56 |
end = begin + sample_rate * 2
|
| 57 |
sub_signal = signal[begin: end]
|
| 58 |
|
| 59 |
-
|
| 60 |
-
to_filename = output_dir / "{}_fi-FI_none_{}.wav".format(call_id, ts)
|
| 61 |
wavfile.write(
|
| 62 |
to_filename.as_posix(),
|
| 63 |
sample_rate,
|
|
|
|
| 15 |
|
| 16 |
parser.add_argument(
|
| 17 |
"--audio_dir",
|
| 18 |
+
# default=(project_path / "data/calling/66/wav_1ch").as_posix(),
|
| 19 |
+
# default=(project_path / "data/calling/358/wav_1ch/finished/voicemail_annotation").as_posix(),
|
| 20 |
+
# default=(project_path / "data/calling/358/wav_1ch/finished/voicemail_annotation").as_posix(),
|
| 21 |
+
default=r"D:\Users\tianx\HuggingSpaces\template_match_asr\data\wav\early_media\52\music",
|
| 22 |
type=str
|
| 23 |
)
|
| 24 |
parser.add_argument(
|
| 25 |
"--output_dir",
|
| 26 |
+
# default=(project_path / "data/calling/358/wav_segmented").as_posix(),
|
| 27 |
+
default=r"D:\Users\tianx\HuggingSpaces\template_match_asr\data\wav\early_media\52\music\wav_segmented",
|
| 28 |
type=str
|
| 29 |
)
|
| 30 |
parser.add_argument(
|
| 31 |
"--first_n_seconds",
|
| 32 |
+
default=1000,
|
| 33 |
type=int
|
| 34 |
)
|
| 35 |
args = parser.parse_args()
|
|
|
|
| 44 |
output_dir.mkdir(parents=True, exist_ok=True)
|
| 45 |
|
| 46 |
for filename in tqdm(list(audio_dir.glob("*.wav"))):
|
| 47 |
+
splits = filename.stem.split("_")
|
| 48 |
+
call_id = splits[3]
|
| 49 |
+
language = splits[4]
|
| 50 |
+
scene_id = splits[5]
|
| 51 |
+
|
| 52 |
sample_rate, signal = wavfile.read(filename.as_posix())
|
| 53 |
if sample_rate != 8000:
|
| 54 |
raise AssertionError
|
| 55 |
|
| 56 |
+
# signal = signal[:, 0]
|
| 57 |
signal_length = len(signal) - sample_rate * 2
|
| 58 |
if signal_length <= 0:
|
| 59 |
continue
|
|
|
|
| 64 |
end = begin + sample_rate * 2
|
| 65 |
sub_signal = signal[begin: end]
|
| 66 |
|
| 67 |
+
to_filename = output_dir / f"active_media_r_{call_id}_{language}_{scene_id}_{begin}.wav"
|
|
|
|
| 68 |
wavfile.write(
|
| 69 |
to_filename.as_posix(),
|
| 70 |
sample_rate,
|
examples/lstm_badcase_filter/step_1_badcase_filter.py
ADDED
|
@@ -0,0 +1,233 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/python3
|
| 2 |
+
# -*- coding: utf-8 -*-
|
| 3 |
+
import argparse
|
| 4 |
+
from pathlib import Path
|
| 5 |
+
|
| 6 |
+
from tqdm import tqdm
|
| 7 |
+
|
| 8 |
+
from gradio_client import Client, handle_file
|
| 9 |
+
import librosa
|
| 10 |
+
import numpy as np
|
| 11 |
+
import onnxruntime as ort
|
| 12 |
+
from scipy.io import wavfile
|
| 13 |
+
import torch
|
| 14 |
+
import torchaudio
|
| 15 |
+
import shutil
|
| 16 |
+
|
| 17 |
+
from project_settings import project_path
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
def get_args():
|
| 21 |
+
parser = argparse.ArgumentParser()
|
| 22 |
+
parser.add_argument(
|
| 23 |
+
"--audio_dir",
|
| 24 |
+
# default=r"D:\Users\tianx\HuggingDatasets\international_voice\data\sea-idn\audio_lib_hkg_1\audio_lib_hkg_1\pt-BR2",
|
| 25 |
+
# default=r"D:\Users\tianx\HuggingDatasets\international_voice\data\sea-idn\audio_lib_hkg_1\audio_lib_hkg_1\pt-BR",
|
| 26 |
+
default=r"D:\Users\tianx\HuggingDatasets\calling_analysis\data\pt-BR\bell_and_di_then_mute",
|
| 27 |
+
type=str,
|
| 28 |
+
)
|
| 29 |
+
parser.add_argument(
|
| 30 |
+
"--onnx_model_file",
|
| 31 |
+
# default=(project_path / "examples/online_model_test/models/pt-BR.onnx").as_posix(),
|
| 32 |
+
default="../online_model_test/models/pt-BR.onnx",
|
| 33 |
+
type=str
|
| 34 |
+
)
|
| 35 |
+
parser.add_argument(
|
| 36 |
+
"--output_dir",
|
| 37 |
+
default=(project_path / "data/badcase").as_posix(),
|
| 38 |
+
type=str,
|
| 39 |
+
)
|
| 40 |
+
args = parser.parse_args()
|
| 41 |
+
return args
|
| 42 |
+
|
| 43 |
+
|
| 44 |
+
class OnlineModelConfig(object):
|
| 45 |
+
def __init__(self,
|
| 46 |
+
sample_rate: int = 8000,
|
| 47 |
+
n_fft: int = 1024,
|
| 48 |
+
hop_size: int = 512,
|
| 49 |
+
n_mels: int = 80,
|
| 50 |
+
f_min: float = 10.0,
|
| 51 |
+
f_max: float = 3800.0,
|
| 52 |
+
):
|
| 53 |
+
self.sample_rate = sample_rate
|
| 54 |
+
self.n_fft = n_fft
|
| 55 |
+
self.hop_size = hop_size
|
| 56 |
+
self.n_mels = n_mels
|
| 57 |
+
self.f_min = f_min
|
| 58 |
+
self.f_max = f_max
|
| 59 |
+
|
| 60 |
+
|
| 61 |
+
class OnlineModelInference(object):
|
| 62 |
+
def __init__(self,
|
| 63 |
+
model_path: str,
|
| 64 |
+
):
|
| 65 |
+
self.model_path = model_path
|
| 66 |
+
|
| 67 |
+
providers = [
|
| 68 |
+
"CUDAExecutionProvider", "CPUExecutionProvider"
|
| 69 |
+
] if torch.cuda.is_available() else [
|
| 70 |
+
"CPUExecutionProvider"
|
| 71 |
+
]
|
| 72 |
+
self.session = ort.InferenceSession(self.model_path, providers=providers)
|
| 73 |
+
|
| 74 |
+
self.config = OnlineModelConfig()
|
| 75 |
+
|
| 76 |
+
self.mel_transform = torchaudio.transforms.MelSpectrogram(
|
| 77 |
+
sample_rate=self.config.sample_rate,
|
| 78 |
+
n_fft=self.config.n_fft,
|
| 79 |
+
hop_length=self.config.hop_size,
|
| 80 |
+
n_mels=self.config.n_mels,
|
| 81 |
+
f_min=self.config.f_min,
|
| 82 |
+
f_max=self.config.f_max,
|
| 83 |
+
window_fn=torch.hamming_window
|
| 84 |
+
)
|
| 85 |
+
|
| 86 |
+
def predict_by_ndarray(self,
|
| 87 |
+
sub_signal: np.ndarray,
|
| 88 |
+
h: np.ndarray = None,
|
| 89 |
+
c: np.ndarray = None,
|
| 90 |
+
):
|
| 91 |
+
# sub_signal, shape: [num_samples,]
|
| 92 |
+
sub_signal = torch.tensor(sub_signal, dtype=torch.float32)
|
| 93 |
+
|
| 94 |
+
sub_signal = sub_signal.unsqueeze(0)
|
| 95 |
+
# sub_signal, shape: [1, num_samples]
|
| 96 |
+
mel_spec = self.mel_transform.forward(sub_signal)
|
| 97 |
+
# mel_spec, shape: [1, n_mels, n_frames]
|
| 98 |
+
mel_spec = torch.transpose(mel_spec, dim0=1, dim1=2)
|
| 99 |
+
# mel_spec, shape: [1, n_frames, n_mels]
|
| 100 |
+
|
| 101 |
+
h = torch.tensor(h) if h is not None else None
|
| 102 |
+
c = torch.tensor(c) if h is not None else None
|
| 103 |
+
label, prob, h, c = self.predict_by_mel_spec(mel_spec, h=h, c=c)
|
| 104 |
+
# h, c: torch.Tensor
|
| 105 |
+
h = h.numpy()
|
| 106 |
+
c = c.numpy()
|
| 107 |
+
return label, prob, h, c
|
| 108 |
+
|
| 109 |
+
def predict_by_mel_spec(self,
|
| 110 |
+
mel_spec: torch.Tensor,
|
| 111 |
+
h: torch.Tensor = None,
|
| 112 |
+
c: torch.Tensor = None,
|
| 113 |
+
):
|
| 114 |
+
# mel_spec, shape: [1, n_frames, n_mels]
|
| 115 |
+
|
| 116 |
+
if h is None:
|
| 117 |
+
h = np.zeros((3, 1, 64), dtype=np.float32) # 3层LSTM,批次大小1,隐藏大小64
|
| 118 |
+
else:
|
| 119 |
+
h = h.numpy()
|
| 120 |
+
if c is None:
|
| 121 |
+
c = np.zeros((3, 1, 64), dtype=np.float32) # 3层LSTM,批次大小1,隐藏大小64
|
| 122 |
+
else:
|
| 123 |
+
c = c.numpy()
|
| 124 |
+
|
| 125 |
+
mel_spec_np = mel_spec.numpy()
|
| 126 |
+
outputs = self.session.run(
|
| 127 |
+
input_feed={
|
| 128 |
+
"input": mel_spec_np,
|
| 129 |
+
"h": h,
|
| 130 |
+
"c": c
|
| 131 |
+
},
|
| 132 |
+
output_names=[
|
| 133 |
+
"output", "h_out", "c_out"
|
| 134 |
+
],
|
| 135 |
+
)
|
| 136 |
+
logits, h, c = outputs
|
| 137 |
+
# logits, np.ndarray, shape: [b, num_labels]
|
| 138 |
+
# h, c: np.ndarray
|
| 139 |
+
h = torch.tensor(h)
|
| 140 |
+
c = torch.tensor(c)
|
| 141 |
+
|
| 142 |
+
probs = torch.softmax(torch.tensor(logits), dim=1)
|
| 143 |
+
max_prob, predicted_label_index = torch.max(probs, dim=1)
|
| 144 |
+
|
| 145 |
+
label = self.get_label_by_index(predicted_label_index.item())
|
| 146 |
+
prob = max_prob.item()
|
| 147 |
+
return label, prob, h, c
|
| 148 |
+
|
| 149 |
+
@staticmethod
|
| 150 |
+
def get_label_by_index(index: int):
|
| 151 |
+
label_map = {
|
| 152 |
+
0: "voice",
|
| 153 |
+
1: "voicemail",
|
| 154 |
+
2: "mute",
|
| 155 |
+
3: "noise"
|
| 156 |
+
}
|
| 157 |
+
result = label_map[index]
|
| 158 |
+
return result
|
| 159 |
+
|
| 160 |
+
|
| 161 |
+
def main():
|
| 162 |
+
args = get_args()
|
| 163 |
+
|
| 164 |
+
client = Client("http://127.0.0.1:7864/")
|
| 165 |
+
# client = Client("http://10.75.27.247:7864/")
|
| 166 |
+
|
| 167 |
+
audio_dir = Path(args.audio_dir)
|
| 168 |
+
output_dir = Path(args.output_dir)
|
| 169 |
+
output_dir.mkdir(parents=True, exist_ok=True)
|
| 170 |
+
|
| 171 |
+
model = OnlineModelInference(model_path=args.onnx_model_file)
|
| 172 |
+
|
| 173 |
+
for filename in tqdm(audio_dir.glob("**/active_media_r_*.wav")):
|
| 174 |
+
splits = filename.stem.split("_")
|
| 175 |
+
call_id = splits[3]
|
| 176 |
+
language = splits[4]
|
| 177 |
+
scene_id = splits[5]
|
| 178 |
+
|
| 179 |
+
signal, sample_rate = librosa.load(filename.as_posix(), sr=8000)
|
| 180 |
+
duration = librosa.get_duration(y=signal, sr=sample_rate)
|
| 181 |
+
signal_length = len(signal)
|
| 182 |
+
if signal_length == 0:
|
| 183 |
+
continue
|
| 184 |
+
|
| 185 |
+
begin = 0
|
| 186 |
+
end = begin + sample_rate * 2
|
| 187 |
+
sub_signal = signal[begin: end]
|
| 188 |
+
if sub_signal.shape[0] != 16000:
|
| 189 |
+
continue
|
| 190 |
+
|
| 191 |
+
h = None
|
| 192 |
+
c = None
|
| 193 |
+
label1, prob1, h, c = model.predict_by_ndarray(sub_signal, h=h, c=c)
|
| 194 |
+
|
| 195 |
+
sub_signal_ = np.array(sub_signal * (1 << 15), dtype=np.int16)
|
| 196 |
+
temp_file = "temp.wav"
|
| 197 |
+
|
| 198 |
+
wavfile.write(
|
| 199 |
+
temp_file,
|
| 200 |
+
8000,
|
| 201 |
+
sub_signal_,
|
| 202 |
+
)
|
| 203 |
+
|
| 204 |
+
# label2, prob2 = client.predict(
|
| 205 |
+
# audio_t=handle_file(temp_file),
|
| 206 |
+
# model_name="voicemail-pt-br-2-ch4",
|
| 207 |
+
# ground_true="Hello!!",
|
| 208 |
+
# api_name="/when_click_cls_button"
|
| 209 |
+
# )
|
| 210 |
+
label2, prob2 = client.predict(
|
| 211 |
+
audio_t=handle_file(temp_file),
|
| 212 |
+
model_name="sound-8-ch4",
|
| 213 |
+
ground_true="Hello!!",
|
| 214 |
+
api_name="/when_click_cls_button"
|
| 215 |
+
)
|
| 216 |
+
|
| 217 |
+
print(label1)
|
| 218 |
+
print(label2)
|
| 219 |
+
label2 = "voicemail"
|
| 220 |
+
label1 = "non_voicemail"
|
| 221 |
+
if label2 in ("voicemail", "bell") and label1 != "voicemail":
|
| 222 |
+
tgt_file = output_dir / f"active_media_r_{call_id}_{language}_{scene_id}_0.wav"
|
| 223 |
+
if not tgt_file.exists():
|
| 224 |
+
shutil.move(
|
| 225 |
+
temp_file,
|
| 226 |
+
tgt_file.as_posix(),
|
| 227 |
+
)
|
| 228 |
+
|
| 229 |
+
return
|
| 230 |
+
|
| 231 |
+
|
| 232 |
+
if __name__ == "__main__":
|
| 233 |
+
main()
|
examples/online_model_test/step_1_predict.py
CHANGED
|
@@ -19,13 +19,13 @@ def get_args():
|
|
| 19 |
parser = argparse.ArgumentParser()
|
| 20 |
parser.add_argument(
|
| 21 |
"--audio_dir",
|
| 22 |
-
default=r"D:\Users\tianx\HuggingDatasets\international_voice\data\sea-idn\audio_lib_hkg_1\audio_lib_hkg_1\
|
| 23 |
type=str,
|
| 24 |
)
|
| 25 |
-
parser.add_argument("--onnx_model_file", default="
|
| 26 |
parser.add_argument("--target_duration", default=8.0, type=float)
|
| 27 |
|
| 28 |
-
parser.add_argument("--output_file", default="
|
| 29 |
|
| 30 |
args = parser.parse_args()
|
| 31 |
return args
|
|
@@ -177,13 +177,15 @@ def main():
|
|
| 177 |
for begin in range(0, target_duration, sample_rate*2):
|
| 178 |
end = begin + sample_rate*2
|
| 179 |
sub_signal = signal[begin: end]
|
| 180 |
-
if len(sub_signal)
|
| 181 |
break
|
| 182 |
label, prob, h, c = model.predict_by_ndarray(sub_signal, h=h, c=c)
|
| 183 |
predict_result.append({
|
| 184 |
"label": label,
|
| 185 |
"prob": prob,
|
| 186 |
})
|
|
|
|
|
|
|
| 187 |
label_list = [p["label"] for p in predict_result]
|
| 188 |
predict_result_ = json.dumps(predict_result, ensure_ascii=False, indent=4)
|
| 189 |
label2 = predict_result[0]["label"]
|
|
|
|
| 19 |
parser = argparse.ArgumentParser()
|
| 20 |
parser.add_argument(
|
| 21 |
"--audio_dir",
|
| 22 |
+
default=r"D:\Users\tianx\HuggingDatasets\international_voice\data\sea-idn\audio_lib_hkg_1\audio_lib_hkg_1\th-TH\th-TH\early_media_no_voice",
|
| 23 |
type=str,
|
| 24 |
)
|
| 25 |
+
parser.add_argument("--onnx_model_file", default="models/th-TH.onnx", type=str)
|
| 26 |
parser.add_argument("--target_duration", default=8.0, type=float)
|
| 27 |
|
| 28 |
+
parser.add_argument("--output_file", default="th-TH_predict.xlsx", type=str)
|
| 29 |
|
| 30 |
args = parser.parse_args()
|
| 31 |
return args
|
|
|
|
| 177 |
for begin in range(0, target_duration, sample_rate*2):
|
| 178 |
end = begin + sample_rate*2
|
| 179 |
sub_signal = signal[begin: end]
|
| 180 |
+
if len(sub_signal) < 0.5 * sample_rate:
|
| 181 |
break
|
| 182 |
label, prob, h, c = model.predict_by_ndarray(sub_signal, h=h, c=c)
|
| 183 |
predict_result.append({
|
| 184 |
"label": label,
|
| 185 |
"prob": prob,
|
| 186 |
})
|
| 187 |
+
if len(predict_result) == 0:
|
| 188 |
+
continue
|
| 189 |
label_list = [p["label"] for p in predict_result]
|
| 190 |
predict_result_ = json.dumps(predict_result, ensure_ascii=False, indent=4)
|
| 191 |
label2 = predict_result[0]["label"]
|
examples/online_model_test/step_2_audio_filter.py
CHANGED
|
@@ -10,10 +10,10 @@ import pandas as pd
|
|
| 10 |
def get_args():
|
| 11 |
parser = argparse.ArgumentParser()
|
| 12 |
|
| 13 |
-
parser.add_argument("--predict_file", default="
|
| 14 |
parser.add_argument(
|
| 15 |
"--output_dir",
|
| 16 |
-
default=r"D:\Users\tianx\HuggingDatasets\international_voice\data\
|
| 17 |
type=str,
|
| 18 |
)
|
| 19 |
args = parser.parse_args()
|
|
@@ -24,12 +24,16 @@ def main():
|
|
| 24 |
args = get_args()
|
| 25 |
|
| 26 |
output_dir = Path(args.output_dir)
|
|
|
|
| 27 |
|
| 28 |
df = pd.read_excel(args.predict_file)
|
| 29 |
for i, row in df.iterrows():
|
| 30 |
filename = row["filename"]
|
| 31 |
ground_truth_ = row["ground_truth_"]
|
|
|
|
| 32 |
|
|
|
|
|
|
|
| 33 |
if ground_truth_ == "voicemail":
|
| 34 |
shutil.copy(
|
| 35 |
filename,
|
|
|
|
| 10 |
def get_args():
|
| 11 |
parser = argparse.ArgumentParser()
|
| 12 |
|
| 13 |
+
parser.add_argument("--predict_file", default="th-TH_predict.xlsx", type=str)
|
| 14 |
parser.add_argument(
|
| 15 |
"--output_dir",
|
| 16 |
+
default=r"D:\Users\tianx\HuggingDatasets\international_voice\data\sea-idn\audio_lib_hkg_1\audio_lib_hkg_1\th-TH\th-TH\early_media_no_voice\bad_case",
|
| 17 |
type=str,
|
| 18 |
)
|
| 19 |
args = parser.parse_args()
|
|
|
|
| 24 |
args = get_args()
|
| 25 |
|
| 26 |
output_dir = Path(args.output_dir)
|
| 27 |
+
output_dir.mkdir(parents=True, exist_ok=True)
|
| 28 |
|
| 29 |
df = pd.read_excel(args.predict_file)
|
| 30 |
for i, row in df.iterrows():
|
| 31 |
filename = row["filename"]
|
| 32 |
ground_truth_ = row["ground_truth_"]
|
| 33 |
+
flag = row["flag"]
|
| 34 |
|
| 35 |
+
if flag == 1:
|
| 36 |
+
continue
|
| 37 |
if ground_truth_ == "voicemail":
|
| 38 |
shutil.copy(
|
| 39 |
filename,
|
examples/online_model_test/step_3_make_test.py
CHANGED
|
@@ -15,12 +15,12 @@ def get_args():
|
|
| 15 |
|
| 16 |
parser.add_argument(
|
| 17 |
"--src_dir",
|
| 18 |
-
default=r"D:\Users\tianx\HuggingDatasets\international_voice\data\calling\
|
| 19 |
type=str,
|
| 20 |
)
|
| 21 |
parser.add_argument(
|
| 22 |
"--tgt_dir",
|
| 23 |
-
default=r"D:\Users\tianx\HuggingDatasets\international_voice\data\voice_test_examples\
|
| 24 |
type=str,
|
| 25 |
)
|
| 26 |
parser.add_argument(
|
|
|
|
| 15 |
|
| 16 |
parser.add_argument(
|
| 17 |
"--src_dir",
|
| 18 |
+
default=r"D:\Users\tianx\HuggingDatasets\international_voice\data\calling\63\voicemail",
|
| 19 |
type=str,
|
| 20 |
)
|
| 21 |
parser.add_argument(
|
| 22 |
"--tgt_dir",
|
| 23 |
+
default=r"D:\Users\tianx\HuggingDatasets\international_voice\data\voice_test_examples\63\96",
|
| 24 |
type=str,
|
| 25 |
)
|
| 26 |
parser.add_argument(
|
examples/online_model_test/test.py
ADDED
|
@@ -0,0 +1,84 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/python3
|
| 2 |
+
# -*- coding: utf-8 -*-
|
| 3 |
+
import argparse
|
| 4 |
+
from collections import defaultdict
|
| 5 |
+
from pathlib import Path
|
| 6 |
+
import shutil
|
| 7 |
+
|
| 8 |
+
from gradio_client import Client, handle_file
|
| 9 |
+
import librosa
|
| 10 |
+
import pandas as pd
|
| 11 |
+
from tqdm import tqdm
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
def get_args():
|
| 15 |
+
parser = argparse.ArgumentParser()
|
| 16 |
+
parser.add_argument(
|
| 17 |
+
"--finished_dir",
|
| 18 |
+
default=r"D:\Users\tianx\HuggingSpaces\cc_audio_8\data\calling\66\wav_1ch",
|
| 19 |
+
type=str,
|
| 20 |
+
)
|
| 21 |
+
parser.add_argument(
|
| 22 |
+
"--src_dir",
|
| 23 |
+
default=r"D:/Users/tianx/HuggingDatasets/international_voice/data/sea-idn/audio_lib_hkg_1/audio_lib_hkg_1/th-TH/th-TH/",
|
| 24 |
+
type=str,
|
| 25 |
+
)
|
| 26 |
+
parser.add_argument(
|
| 27 |
+
"--tgt_dir",
|
| 28 |
+
default=r"D:\Users\tianx\HuggingDatasets\international_voice\data\sea-idn\audio_lib_hkg_1\audio_lib_hkg_1\th-TH\bad_case",
|
| 29 |
+
type=str,
|
| 30 |
+
)
|
| 31 |
+
args = parser.parse_args()
|
| 32 |
+
return args
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
def main():
|
| 36 |
+
args = get_args()
|
| 37 |
+
|
| 38 |
+
finished_dir = Path(args.finished_dir)
|
| 39 |
+
src_dir = Path(args.src_dir)
|
| 40 |
+
tgt_dir = Path(args.tgt_dir)
|
| 41 |
+
tgt_dir.mkdir(parents=True, exist_ok=True)
|
| 42 |
+
|
| 43 |
+
# finished
|
| 44 |
+
finished = set()
|
| 45 |
+
for filename in finished_dir.glob("*.wav"):
|
| 46 |
+
splits = filename.stem.split("_")
|
| 47 |
+
call_id = splits[3]
|
| 48 |
+
if call_id in ("27521940-feef-4bfa-ba55-b1f00a10c64d",):
|
| 49 |
+
print(f"call_id: {call_id}")
|
| 50 |
+
|
| 51 |
+
finished.add(call_id)
|
| 52 |
+
print(f"finished count: {len(finished)}")
|
| 53 |
+
|
| 54 |
+
# call_id_to_wav_file_list
|
| 55 |
+
call_id_to_wav_file_list = defaultdict(list)
|
| 56 |
+
for filename in src_dir.glob("**/*.wav"):
|
| 57 |
+
splits = filename.stem.split("_")
|
| 58 |
+
call_id = splits[3]
|
| 59 |
+
language = splits[4]
|
| 60 |
+
scene_id = splits[5]
|
| 61 |
+
if call_id in ("27521940-feef-4bfa-ba55-b1f00a10c64d",):
|
| 62 |
+
print(f"call_id: {call_id}")
|
| 63 |
+
|
| 64 |
+
call_id_to_wav_file_list[call_id].append(filename.as_posix())
|
| 65 |
+
print(f"src count: {len(call_id_to_wav_file_list)}")
|
| 66 |
+
|
| 67 |
+
for filename in tqdm(src_dir.glob("**/active_media_r_*.wav")):
|
| 68 |
+
splits = filename.stem.split("_")
|
| 69 |
+
call_id = splits[3]
|
| 70 |
+
if call_id in ("27521940-feef-4bfa-ba55-b1f00a10c64d",):
|
| 71 |
+
print(f"call_id: {call_id}")
|
| 72 |
+
|
| 73 |
+
if call_id in finished:
|
| 74 |
+
wav_file_list = call_id_to_wav_file_list[call_id]
|
| 75 |
+
for wav_file in wav_file_list:
|
| 76 |
+
shutil.move(
|
| 77 |
+
wav_file,
|
| 78 |
+
tgt_dir.as_posix(),
|
| 79 |
+
)
|
| 80 |
+
return
|
| 81 |
+
|
| 82 |
+
|
| 83 |
+
if __name__ == "__main__":
|
| 84 |
+
main()
|
examples/{vm_sound_classification → sound_classification_by_cnn}/requirements.txt
RENAMED
|
File without changes
|
examples/{vm_sound_classification → sound_classification_by_cnn}/run.sh
RENAMED
|
@@ -2,22 +2,22 @@
|
|
| 2 |
|
| 3 |
: <<'END'
|
| 4 |
|
| 5 |
-
sh run.sh --stage 0 --stop_stage 1 --system_version windows --file_folder_name file_dir --final_model_name sound-4-ch32 \
|
| 6 |
--filename_patterns "E:/Users/tianx/HuggingDatasets/cc_audio_8/data/wav_finished/wav_finished/en-US/wav_finished/*/*.wav \
|
| 7 |
E:/Users/tianx/HuggingDatasets/cc_audio_8/data/wav_finished/id-ID/wav_finished/*/*.wav" \
|
| 8 |
--label_plan 4
|
| 9 |
|
| 10 |
-
sh run.sh --stage 2 --stop_stage 2 --system_version windows --file_folder_name file_dir --final_model_name sound-2-ch32 \
|
| 11 |
--filename_patterns "E:/Users/tianx/HuggingDatasets/cc_audio_8/data/wav_finished/wav_finished/en-US/wav_finished/*/*.wav \
|
| 12 |
E:/Users/tianx/HuggingDatasets/cc_audio_8/data/wav_finished/id-ID/wav_finished/*/*.wav" \
|
| 13 |
--label_plan 4
|
| 14 |
|
| 15 |
-
sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name sound-3-ch32 \
|
| 16 |
--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav" \
|
| 17 |
--label_plan 3 \
|
| 18 |
--config_file "yaml/conv2d-classifier-3-ch4.yaml"
|
| 19 |
|
| 20 |
-
sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-ms-my-2-ch32 \
|
| 21 |
--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/ms-MY/wav_finished/*/*.wav" \
|
| 22 |
--label_plan 2-voicemail \
|
| 23 |
--config_file "yaml/conv2d-classifier-2-ch32.yaml"
|
|
|
|
| 2 |
|
| 3 |
: <<'END'
|
| 4 |
|
| 5 |
+
sh run.sh --stage 0 --stop_stage 1 --system_version windows --file_folder_name file_dir --final_model_name sound-4-ch32-cnn \
|
| 6 |
--filename_patterns "E:/Users/tianx/HuggingDatasets/cc_audio_8/data/wav_finished/wav_finished/en-US/wav_finished/*/*.wav \
|
| 7 |
E:/Users/tianx/HuggingDatasets/cc_audio_8/data/wav_finished/id-ID/wav_finished/*/*.wav" \
|
| 8 |
--label_plan 4
|
| 9 |
|
| 10 |
+
sh run.sh --stage 2 --stop_stage 2 --system_version windows --file_folder_name file_dir --final_model_name sound-2-ch32-cnn \
|
| 11 |
--filename_patterns "E:/Users/tianx/HuggingDatasets/cc_audio_8/data/wav_finished/wav_finished/en-US/wav_finished/*/*.wav \
|
| 12 |
E:/Users/tianx/HuggingDatasets/cc_audio_8/data/wav_finished/id-ID/wav_finished/*/*.wav" \
|
| 13 |
--label_plan 4
|
| 14 |
|
| 15 |
+
sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name sound-3-ch32-cnn \
|
| 16 |
--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav" \
|
| 17 |
--label_plan 3 \
|
| 18 |
--config_file "yaml/conv2d-classifier-3-ch4.yaml"
|
| 19 |
|
| 20 |
+
sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-ms-my-2-ch32-cnn \
|
| 21 |
--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/ms-MY/wav_finished/*/*.wav" \
|
| 22 |
--label_plan 2-voicemail \
|
| 23 |
--config_file "yaml/conv2d-classifier-2-ch32.yaml"
|
examples/{vm_sound_classification → sound_classification_by_cnn}/run_batch.sh
RENAMED
|
@@ -3,25 +3,25 @@
|
|
| 3 |
|
| 4 |
# sound ch4
|
| 5 |
|
| 6 |
-
#sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name sound-2-ch4 \
|
| 7 |
#--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav" \
|
| 8 |
#--label_plan 2 \
|
| 9 |
#--config_file "yaml/conv2d-classifier-2-ch4.yaml"
|
| 10 |
#
|
| 11 |
#
|
| 12 |
-
#sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name sound-3-ch4 \
|
| 13 |
#--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav" \
|
| 14 |
#--label_plan 3 \
|
| 15 |
#--config_file "yaml/conv2d-classifier-3-ch4.yaml"
|
| 16 |
#
|
| 17 |
#
|
| 18 |
-
#sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name sound-4-ch4 \
|
| 19 |
#--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav" \
|
| 20 |
#--label_plan 4 \
|
| 21 |
#--config_file "yaml/conv2d-classifier-4-ch4.yaml"
|
| 22 |
#
|
| 23 |
#
|
| 24 |
-
#sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name sound-8-ch4 \
|
| 25 |
#--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav" \
|
| 26 |
#--label_plan 8 \
|
| 27 |
#--config_file "yaml/conv2d-classifier-8-ch4.yaml"
|
|
@@ -29,25 +29,25 @@
|
|
| 29 |
|
| 30 |
# sound ch8
|
| 31 |
|
| 32 |
-
#sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name sound-2-ch8 \
|
| 33 |
#--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav" \
|
| 34 |
#--label_plan 2 \
|
| 35 |
#--config_file "yaml/conv2d-classifier-2-ch8.yaml"
|
| 36 |
#
|
| 37 |
#
|
| 38 |
-
#sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name sound-3-ch8 \
|
| 39 |
#--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav" \
|
| 40 |
#--label_plan 3 \
|
| 41 |
#--config_file "yaml/conv2d-classifier-3-ch8.yaml"
|
| 42 |
#
|
| 43 |
#
|
| 44 |
-
#sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name sound-4-ch8 \
|
| 45 |
#--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav" \
|
| 46 |
#--label_plan 4 \
|
| 47 |
#--config_file "yaml/conv2d-classifier-4-ch8.yaml"
|
| 48 |
#
|
| 49 |
#
|
| 50 |
-
#sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name sound-8-ch8 \
|
| 51 |
#--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav" \
|
| 52 |
#--label_plan 8 \
|
| 53 |
#--config_file "yaml/conv2d-classifier-8-ch8.yaml"
|
|
@@ -55,25 +55,25 @@
|
|
| 55 |
|
| 56 |
# sound ch16
|
| 57 |
|
| 58 |
-
#sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name sound-2-ch16 \
|
| 59 |
#--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav" \
|
| 60 |
#--label_plan 2 \
|
| 61 |
#--config_file "yaml/conv2d-classifier-2-ch16.yaml"
|
| 62 |
|
| 63 |
|
| 64 |
-
#sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name sound-3-ch16 \
|
| 65 |
#--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav" \
|
| 66 |
#--label_plan 3 \
|
| 67 |
#--config_file "yaml/conv2d-classifier-3-ch16.yaml"
|
| 68 |
#
|
| 69 |
#
|
| 70 |
-
#sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name sound-4-ch16 \
|
| 71 |
#--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav" \
|
| 72 |
#--label_plan 4 \
|
| 73 |
#--config_file "yaml/conv2d-classifier-4-ch16.yaml"
|
| 74 |
#
|
| 75 |
#
|
| 76 |
-
#sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name sound-8-ch16 \
|
| 77 |
#--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav" \
|
| 78 |
#--label_plan 8 \
|
| 79 |
#--config_file "yaml/conv2d-classifier-8-ch16.yaml"
|
|
@@ -81,25 +81,25 @@
|
|
| 81 |
|
| 82 |
# sound ch32
|
| 83 |
|
| 84 |
-
#sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name sound-2-ch32 \
|
| 85 |
#--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav" \
|
| 86 |
#--label_plan 2 \
|
| 87 |
#--config_file "yaml/conv2d-classifier-2-ch32.yaml"
|
| 88 |
#
|
| 89 |
#
|
| 90 |
-
#sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name sound-3-ch32 \
|
| 91 |
#--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav" \
|
| 92 |
#--label_plan 3 \
|
| 93 |
#--config_file "yaml/conv2d-classifier-3-ch32.yaml"
|
| 94 |
#
|
| 95 |
#
|
| 96 |
-
#sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name sound-4-ch32 \
|
| 97 |
#--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav" \
|
| 98 |
#--label_plan 4 \
|
| 99 |
#--config_file "yaml/conv2d-classifier-4-ch32.yaml"
|
| 100 |
|
| 101 |
|
| 102 |
-
#sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name sound-8-ch32 \
|
| 103 |
#--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav" \
|
| 104 |
#--label_plan 8 \
|
| 105 |
#--config_file "yaml/conv2d-classifier-8-ch32.yaml"
|
|
@@ -107,12 +107,12 @@
|
|
| 107 |
|
| 108 |
# pretrained voicemail
|
| 109 |
|
| 110 |
-
sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-2-ch4 \
|
| 111 |
--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav" \
|
| 112 |
--label_plan 2-voicemail \
|
| 113 |
--config_file "yaml/conv2d-classifier-2-ch4.yaml"
|
| 114 |
|
| 115 |
-
sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-2-ch32 \
|
| 116 |
--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav" \
|
| 117 |
--label_plan 2-voicemail \
|
| 118 |
--config_file "yaml/conv2d-classifier-2-ch32.yaml"
|
|
@@ -120,149 +120,149 @@ sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name fi
|
|
| 120 |
|
| 121 |
# voicemail ch4
|
| 122 |
|
| 123 |
-
#sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-en-ph-2-ch4 \
|
| 124 |
#--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/en-PH/wav_finished/*/*.wav" \
|
| 125 |
#--label_plan 2-voicemail \
|
| 126 |
#--config_file "yaml/conv2d-classifier-2-ch4.yaml" \
|
| 127 |
-
#--pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-ch4.zip"
|
| 128 |
|
| 129 |
-
#sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-en-sg-2-ch4 \
|
| 130 |
#--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/en-SG/wav_finished/*/*.wav" \
|
| 131 |
#--label_plan 2-voicemail \
|
| 132 |
#--config_file "yaml/conv2d-classifier-2-ch4.yaml" \
|
| 133 |
-
#--pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-ch4.zip"
|
| 134 |
#
|
| 135 |
-
#sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-en-us-2-ch4 \
|
| 136 |
#--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/en-US/wav_finished/*/*.wav" \
|
| 137 |
#--label_plan 2-voicemail \
|
| 138 |
#--config_file "yaml/conv2d-classifier-2-ch4.yaml" \
|
| 139 |
-
#--pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-ch4.zip"
|
| 140 |
#
|
| 141 |
-
#sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-es-mx-2-ch4 \
|
| 142 |
#--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/es-MX/wav_finished/*/*.wav" \
|
| 143 |
#--label_plan 2-voicemail \
|
| 144 |
#--config_file "yaml/conv2d-classifier-2-ch4.yaml" \
|
| 145 |
-
#--pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-ch4.zip"
|
| 146 |
#
|
| 147 |
-
#sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-es-pe-2-ch4 \
|
| 148 |
#--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/es-PE/wav_finished/*/*.wav" \
|
| 149 |
#--label_plan 2-voicemail \
|
| 150 |
#--config_file "yaml/conv2d-classifier-2-ch4.yaml" \
|
| 151 |
-
#--pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-ch4.zip"
|
| 152 |
#
|
| 153 |
-
sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-id-id-2-ch4 \
|
| 154 |
--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/id-ID/wav_finished/*/*.wav" \
|
| 155 |
--label_plan 2-voicemail \
|
| 156 |
--config_file "yaml/conv2d-classifier-2-ch4.yaml" \
|
| 157 |
-
--pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-ch4.zip"
|
| 158 |
|
| 159 |
-
#sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-ja-jp-2-ch4 \
|
| 160 |
#--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/ja-JP/wav_finished/*/*.wav" \
|
| 161 |
#--label_plan 2-voicemail \
|
| 162 |
#--config_file "yaml/conv2d-classifier-2-ch4.yaml" \
|
| 163 |
-
#--pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-ch4.zip"
|
| 164 |
#
|
| 165 |
-
#sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-ko-kr-2-ch4 \
|
| 166 |
#--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/ko-KR/wav_finished/*/*.wav" \
|
| 167 |
#--label_plan 2-voicemail \
|
| 168 |
#--config_file "yaml/conv2d-classifier-2-ch4.yaml" \
|
| 169 |
-
#--pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-ch4.zip"
|
| 170 |
#
|
| 171 |
-
#sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-ms-my-2-ch4 \
|
| 172 |
#--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/ms-MY/wav_finished/*/*.wav" \
|
| 173 |
#--label_plan 2-voicemail \
|
| 174 |
#--config_file "yaml/conv2d-classifier-2-ch4.yaml" \
|
| 175 |
-
#--pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-ch4.zip"
|
| 176 |
#
|
| 177 |
-
#sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-pt-br-2-ch4 \
|
| 178 |
#--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/pt-BR/wav_finished/*/*.wav" \
|
| 179 |
#--label_plan 2-voicemail \
|
| 180 |
#--config_file "yaml/conv2d-classifier-2-ch4.yaml" \
|
| 181 |
-
#--pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-ch4.zip"
|
| 182 |
#
|
| 183 |
-
#sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-th-th-2-ch4 \
|
| 184 |
#--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/th-TH/wav_finished/*/*.wav" \
|
| 185 |
#--label_plan 2-voicemail \
|
| 186 |
#--config_file "yaml/conv2d-classifier-2-ch4.yaml" \
|
| 187 |
-
#--pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-ch4.zip"
|
| 188 |
#
|
| 189 |
-
#sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-zh-tw-2-ch4 \
|
| 190 |
#--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/zh-TW/wav_finished/*/*.wav" \
|
| 191 |
#--label_plan 2-voicemail \
|
| 192 |
#--config_file "yaml/conv2d-classifier-2-ch4.yaml" \
|
| 193 |
-
#--pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-ch4.zip"
|
| 194 |
|
| 195 |
|
| 196 |
# voicemail ch32
|
| 197 |
|
| 198 |
-
#sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-en-ph-2-ch32 \
|
| 199 |
#--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/en-PH/wav_finished/*/*.wav" \
|
| 200 |
#--label_plan 2-voicemail \
|
| 201 |
#--config_file "yaml/conv2d-classifier-2-ch32.yaml" \
|
| 202 |
-
#--pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-ch32.zip"
|
| 203 |
|
| 204 |
-
#sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-en-sg-2-ch32 \
|
| 205 |
#--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/en-SG/wav_finished/*/*.wav" \
|
| 206 |
#--label_plan 2-voicemail \
|
| 207 |
#--config_file "yaml/conv2d-classifier-2-ch32.yaml" \
|
| 208 |
-
#--pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-ch32.zip"
|
| 209 |
#
|
| 210 |
-
#sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-en-us-2-ch32 \
|
| 211 |
#--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/en-US/wav_finished/*/*.wav" \
|
| 212 |
#--label_plan 2-voicemail \
|
| 213 |
#--config_file "yaml/conv2d-classifier-2-ch32.yaml" \
|
| 214 |
-
#--pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-ch32.zip"
|
| 215 |
#
|
| 216 |
-
#sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-es-mx-2-ch32 \
|
| 217 |
#--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/es-MX/wav_finished/*/*.wav" \
|
| 218 |
#--label_plan 2-voicemail \
|
| 219 |
#--config_file "yaml/conv2d-classifier-2-ch32.yaml" \
|
| 220 |
-
#--pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-ch32.zip"
|
| 221 |
#
|
| 222 |
-
#sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-es-pe-2-ch32 \
|
| 223 |
#--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/es-PE/wav_finished/*/*.wav" \
|
| 224 |
#--label_plan 2-voicemail \
|
| 225 |
#--config_file "yaml/conv2d-classifier-2-ch32.yaml" \
|
| 226 |
-
#--pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-ch32.zip"
|
| 227 |
#
|
| 228 |
-
sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-id-id-2-ch32 \
|
| 229 |
--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/id-ID/wav_finished/*/*.wav" \
|
| 230 |
--label_plan 2-voicemail \
|
| 231 |
--config_file "yaml/conv2d-classifier-2-ch32.yaml" \
|
| 232 |
-
--pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-ch32.zip"
|
| 233 |
|
| 234 |
-
#sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-ja-jp-2-ch32 \
|
| 235 |
#--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/ja-JP/wav_finished/*/*.wav" \
|
| 236 |
#--label_plan 2-voicemail \
|
| 237 |
#--config_file "yaml/conv2d-classifier-2-ch32.yaml" \
|
| 238 |
-
#--pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-ch32.zip"
|
| 239 |
#
|
| 240 |
-
#sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-ko-kr-2-ch32 \
|
| 241 |
#--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/ko-KR/wav_finished/*/*.wav" \
|
| 242 |
#--label_plan 2-voicemail \
|
| 243 |
#--config_file "yaml/conv2d-classifier-2-ch32.yaml" \
|
| 244 |
-
#--pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-ch32.zip"
|
| 245 |
#
|
| 246 |
-
#sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-ms-my-2-ch32 \
|
| 247 |
#--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/ms-MY/wav_finished/*/*.wav" \
|
| 248 |
#--label_plan 2-voicemail \
|
| 249 |
#--config_file "yaml/conv2d-classifier-2-ch32.yaml" \
|
| 250 |
-
#--pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-ch32.zip"
|
| 251 |
#
|
| 252 |
-
#sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-pt-br-2-ch32 \
|
| 253 |
#--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/pt-BR/wav_finished/*/*.wav" \
|
| 254 |
#--label_plan 2-voicemail \
|
| 255 |
#--config_file "yaml/conv2d-classifier-2-ch32.yaml" \
|
| 256 |
-
#--pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-ch32.zip"
|
| 257 |
#
|
| 258 |
-
#sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-th-th-2-ch32 \
|
| 259 |
#--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/th-TH/wav_finished/*/*.wav" \
|
| 260 |
#--label_plan 2-voicemail \
|
| 261 |
#--config_file "yaml/conv2d-classifier-2-ch32.yaml" \
|
| 262 |
-
#--pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-ch32.zip"
|
| 263 |
#
|
| 264 |
-
#sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-zh-tw-2-ch32 \
|
| 265 |
#--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/zh-TW/wav_finished/*/*.wav" \
|
| 266 |
#--label_plan 2-voicemail \
|
| 267 |
#--config_file "yaml/conv2d-classifier-2-ch32.yaml" \
|
| 268 |
-
#--pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-ch32.zip"
|
|
|
|
| 3 |
|
| 4 |
# sound ch4
|
| 5 |
|
| 6 |
+
#sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name sound-2-ch4-cnn \
|
| 7 |
#--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav" \
|
| 8 |
#--label_plan 2 \
|
| 9 |
#--config_file "yaml/conv2d-classifier-2-ch4.yaml"
|
| 10 |
#
|
| 11 |
#
|
| 12 |
+
#sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name sound-3-ch4-cnn \
|
| 13 |
#--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav" \
|
| 14 |
#--label_plan 3 \
|
| 15 |
#--config_file "yaml/conv2d-classifier-3-ch4.yaml"
|
| 16 |
#
|
| 17 |
#
|
| 18 |
+
#sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name sound-4-ch4-cnn \
|
| 19 |
#--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav" \
|
| 20 |
#--label_plan 4 \
|
| 21 |
#--config_file "yaml/conv2d-classifier-4-ch4.yaml"
|
| 22 |
#
|
| 23 |
#
|
| 24 |
+
#sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name sound-8-ch4-cnn \
|
| 25 |
#--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav" \
|
| 26 |
#--label_plan 8 \
|
| 27 |
#--config_file "yaml/conv2d-classifier-8-ch4.yaml"
|
|
|
|
| 29 |
|
| 30 |
# sound ch8
|
| 31 |
|
| 32 |
+
#sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name sound-2-ch8-cnn \
|
| 33 |
#--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav" \
|
| 34 |
#--label_plan 2 \
|
| 35 |
#--config_file "yaml/conv2d-classifier-2-ch8.yaml"
|
| 36 |
#
|
| 37 |
#
|
| 38 |
+
#sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name sound-3-ch8-cnn \
|
| 39 |
#--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav" \
|
| 40 |
#--label_plan 3 \
|
| 41 |
#--config_file "yaml/conv2d-classifier-3-ch8.yaml"
|
| 42 |
#
|
| 43 |
#
|
| 44 |
+
#sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name sound-4-ch8-cnn \
|
| 45 |
#--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav" \
|
| 46 |
#--label_plan 4 \
|
| 47 |
#--config_file "yaml/conv2d-classifier-4-ch8.yaml"
|
| 48 |
#
|
| 49 |
#
|
| 50 |
+
#sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name sound-8-ch8-cnn \
|
| 51 |
#--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav" \
|
| 52 |
#--label_plan 8 \
|
| 53 |
#--config_file "yaml/conv2d-classifier-8-ch8.yaml"
|
|
|
|
| 55 |
|
| 56 |
# sound ch16
|
| 57 |
|
| 58 |
+
#sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name sound-2-ch16-cnn \
|
| 59 |
#--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav" \
|
| 60 |
#--label_plan 2 \
|
| 61 |
#--config_file "yaml/conv2d-classifier-2-ch16.yaml"
|
| 62 |
|
| 63 |
|
| 64 |
+
#sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name sound-3-ch16-cnn \
|
| 65 |
#--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav" \
|
| 66 |
#--label_plan 3 \
|
| 67 |
#--config_file "yaml/conv2d-classifier-3-ch16.yaml"
|
| 68 |
#
|
| 69 |
#
|
| 70 |
+
#sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name sound-4-ch16-cnn \
|
| 71 |
#--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav" \
|
| 72 |
#--label_plan 4 \
|
| 73 |
#--config_file "yaml/conv2d-classifier-4-ch16.yaml"
|
| 74 |
#
|
| 75 |
#
|
| 76 |
+
#sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name sound-8-ch16-cnn \
|
| 77 |
#--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav" \
|
| 78 |
#--label_plan 8 \
|
| 79 |
#--config_file "yaml/conv2d-classifier-8-ch16.yaml"
|
|
|
|
| 81 |
|
| 82 |
# sound ch32
|
| 83 |
|
| 84 |
+
#sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name sound-2-ch32-cnn \
|
| 85 |
#--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav" \
|
| 86 |
#--label_plan 2 \
|
| 87 |
#--config_file "yaml/conv2d-classifier-2-ch32.yaml"
|
| 88 |
#
|
| 89 |
#
|
| 90 |
+
#sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name sound-3-ch32-cnn \
|
| 91 |
#--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav" \
|
| 92 |
#--label_plan 3 \
|
| 93 |
#--config_file "yaml/conv2d-classifier-3-ch32.yaml"
|
| 94 |
#
|
| 95 |
#
|
| 96 |
+
#sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name sound-4-ch32-cnn \
|
| 97 |
#--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav" \
|
| 98 |
#--label_plan 4 \
|
| 99 |
#--config_file "yaml/conv2d-classifier-4-ch32.yaml"
|
| 100 |
|
| 101 |
|
| 102 |
+
#sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name sound-8-ch32-cnn \
|
| 103 |
#--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav" \
|
| 104 |
#--label_plan 8 \
|
| 105 |
#--config_file "yaml/conv2d-classifier-8-ch32.yaml"
|
|
|
|
| 107 |
|
| 108 |
# pretrained voicemail
|
| 109 |
|
| 110 |
+
sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-2-ch4-cnn \
|
| 111 |
--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav" \
|
| 112 |
--label_plan 2-voicemail \
|
| 113 |
--config_file "yaml/conv2d-classifier-2-ch4.yaml"
|
| 114 |
|
| 115 |
+
sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-2-ch32-cnn \
|
| 116 |
--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav" \
|
| 117 |
--label_plan 2-voicemail \
|
| 118 |
--config_file "yaml/conv2d-classifier-2-ch32.yaml"
|
|
|
|
| 120 |
|
| 121 |
# voicemail ch4
|
| 122 |
|
| 123 |
+
#sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-en-ph-2-ch4-cnn \
|
| 124 |
#--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/en-PH/wav_finished/*/*.wav" \
|
| 125 |
#--label_plan 2-voicemail \
|
| 126 |
#--config_file "yaml/conv2d-classifier-2-ch4.yaml" \
|
| 127 |
+
#--pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-ch4-cnn.zip"
|
| 128 |
|
| 129 |
+
#sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-en-sg-2-ch4-cnn \
|
| 130 |
#--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/en-SG/wav_finished/*/*.wav" \
|
| 131 |
#--label_plan 2-voicemail \
|
| 132 |
#--config_file "yaml/conv2d-classifier-2-ch4.yaml" \
|
| 133 |
+
#--pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-ch4-cnn.zip"
|
| 134 |
#
|
| 135 |
+
#sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-en-us-2-ch4-cnn \
|
| 136 |
#--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/en-US/wav_finished/*/*.wav" \
|
| 137 |
#--label_plan 2-voicemail \
|
| 138 |
#--config_file "yaml/conv2d-classifier-2-ch4.yaml" \
|
| 139 |
+
#--pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-ch4-cnn.zip"
|
| 140 |
#
|
| 141 |
+
#sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-es-mx-2-ch4-cnn \
|
| 142 |
#--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/es-MX/wav_finished/*/*.wav" \
|
| 143 |
#--label_plan 2-voicemail \
|
| 144 |
#--config_file "yaml/conv2d-classifier-2-ch4.yaml" \
|
| 145 |
+
#--pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-ch4-cnn.zip"
|
| 146 |
#
|
| 147 |
+
#sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-es-pe-2-ch4-cnn \
|
| 148 |
#--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/es-PE/wav_finished/*/*.wav" \
|
| 149 |
#--label_plan 2-voicemail \
|
| 150 |
#--config_file "yaml/conv2d-classifier-2-ch4.yaml" \
|
| 151 |
+
#--pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-ch4-cnn.zip"
|
| 152 |
#
|
| 153 |
+
sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-id-id-2-ch4-cnn \
|
| 154 |
--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/id-ID/wav_finished/*/*.wav" \
|
| 155 |
--label_plan 2-voicemail \
|
| 156 |
--config_file "yaml/conv2d-classifier-2-ch4.yaml" \
|
| 157 |
+
--pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-ch4-cnn.zip"
|
| 158 |
|
| 159 |
+
#sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-ja-jp-2-ch4-cnn \
|
| 160 |
#--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/ja-JP/wav_finished/*/*.wav" \
|
| 161 |
#--label_plan 2-voicemail \
|
| 162 |
#--config_file "yaml/conv2d-classifier-2-ch4.yaml" \
|
| 163 |
+
#--pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-ch4-cnn.zip"
|
| 164 |
#
|
| 165 |
+
#sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-ko-kr-2-ch4-cnn \
|
| 166 |
#--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/ko-KR/wav_finished/*/*.wav" \
|
| 167 |
#--label_plan 2-voicemail \
|
| 168 |
#--config_file "yaml/conv2d-classifier-2-ch4.yaml" \
|
| 169 |
+
#--pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-ch4-cnn.zip"
|
| 170 |
#
|
| 171 |
+
#sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-ms-my-2-ch4-cnn \
|
| 172 |
#--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/ms-MY/wav_finished/*/*.wav" \
|
| 173 |
#--label_plan 2-voicemail \
|
| 174 |
#--config_file "yaml/conv2d-classifier-2-ch4.yaml" \
|
| 175 |
+
#--pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-ch4-cnn.zip"
|
| 176 |
#
|
| 177 |
+
#sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-pt-br-2-ch4-cnn \
|
| 178 |
#--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/pt-BR/wav_finished/*/*.wav" \
|
| 179 |
#--label_plan 2-voicemail \
|
| 180 |
#--config_file "yaml/conv2d-classifier-2-ch4.yaml" \
|
| 181 |
+
#--pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-ch4-cnn.zip"
|
| 182 |
#
|
| 183 |
+
#sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-th-th-2-ch4-cnn \
|
| 184 |
#--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/th-TH/wav_finished/*/*.wav" \
|
| 185 |
#--label_plan 2-voicemail \
|
| 186 |
#--config_file "yaml/conv2d-classifier-2-ch4.yaml" \
|
| 187 |
+
#--pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-ch4-cnn.zip"
|
| 188 |
#
|
| 189 |
+
#sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-zh-tw-2-ch4-cnn \
|
| 190 |
#--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/zh-TW/wav_finished/*/*.wav" \
|
| 191 |
#--label_plan 2-voicemail \
|
| 192 |
#--config_file "yaml/conv2d-classifier-2-ch4.yaml" \
|
| 193 |
+
#--pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-ch4-cnn.zip"
|
| 194 |
|
| 195 |
|
| 196 |
# voicemail ch32
|
| 197 |
|
| 198 |
+
#sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-en-ph-2-ch32-cnn \
|
| 199 |
#--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/en-PH/wav_finished/*/*.wav" \
|
| 200 |
#--label_plan 2-voicemail \
|
| 201 |
#--config_file "yaml/conv2d-classifier-2-ch32.yaml" \
|
| 202 |
+
#--pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-ch32-cnn.zip"
|
| 203 |
|
| 204 |
+
#sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-en-sg-2-ch32-cnn \
|
| 205 |
#--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/en-SG/wav_finished/*/*.wav" \
|
| 206 |
#--label_plan 2-voicemail \
|
| 207 |
#--config_file "yaml/conv2d-classifier-2-ch32.yaml" \
|
| 208 |
+
#--pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-ch32-cnn.zip"
|
| 209 |
#
|
| 210 |
+
#sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-en-us-2-ch32-cnn \
|
| 211 |
#--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/en-US/wav_finished/*/*.wav" \
|
| 212 |
#--label_plan 2-voicemail \
|
| 213 |
#--config_file "yaml/conv2d-classifier-2-ch32.yaml" \
|
| 214 |
+
#--pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-ch32-cnn.zip"
|
| 215 |
#
|
| 216 |
+
#sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-es-mx-2-ch32-cnn \
|
| 217 |
#--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/es-MX/wav_finished/*/*.wav" \
|
| 218 |
#--label_plan 2-voicemail \
|
| 219 |
#--config_file "yaml/conv2d-classifier-2-ch32.yaml" \
|
| 220 |
+
#--pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-ch32-cnn.zip"
|
| 221 |
#
|
| 222 |
+
#sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-es-pe-2-ch32-cnn \
|
| 223 |
#--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/es-PE/wav_finished/*/*.wav" \
|
| 224 |
#--label_plan 2-voicemail \
|
| 225 |
#--config_file "yaml/conv2d-classifier-2-ch32.yaml" \
|
| 226 |
+
#--pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-ch32-cnn.zip"
|
| 227 |
#
|
| 228 |
+
sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-id-id-2-ch32-cnn \
|
| 229 |
--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/id-ID/wav_finished/*/*.wav" \
|
| 230 |
--label_plan 2-voicemail \
|
| 231 |
--config_file "yaml/conv2d-classifier-2-ch32.yaml" \
|
| 232 |
+
--pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-ch32-cnn.zip"
|
| 233 |
|
| 234 |
+
#sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-ja-jp-2-ch32-cnn \
|
| 235 |
#--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/ja-JP/wav_finished/*/*.wav" \
|
| 236 |
#--label_plan 2-voicemail \
|
| 237 |
#--config_file "yaml/conv2d-classifier-2-ch32.yaml" \
|
| 238 |
+
#--pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-ch32-cnn.zip"
|
| 239 |
#
|
| 240 |
+
#sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-ko-kr-2-ch32-cnn \
|
| 241 |
#--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/ko-KR/wav_finished/*/*.wav" \
|
| 242 |
#--label_plan 2-voicemail \
|
| 243 |
#--config_file "yaml/conv2d-classifier-2-ch32.yaml" \
|
| 244 |
+
#--pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-ch32-cnn.zip"
|
| 245 |
#
|
| 246 |
+
#sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-ms-my-2-ch32-cnn \
|
| 247 |
#--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/ms-MY/wav_finished/*/*.wav" \
|
| 248 |
#--label_plan 2-voicemail \
|
| 249 |
#--config_file "yaml/conv2d-classifier-2-ch32.yaml" \
|
| 250 |
+
#--pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-ch32-cnn.zip"
|
| 251 |
#
|
| 252 |
+
#sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-pt-br-2-ch32-cnn \
|
| 253 |
#--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/pt-BR/wav_finished/*/*.wav" \
|
| 254 |
#--label_plan 2-voicemail \
|
| 255 |
#--config_file "yaml/conv2d-classifier-2-ch32.yaml" \
|
| 256 |
+
#--pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-ch32-cnn.zip"
|
| 257 |
#
|
| 258 |
+
#sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-th-th-2-ch32-cnn \
|
| 259 |
#--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/th-TH/wav_finished/*/*.wav" \
|
| 260 |
#--label_plan 2-voicemail \
|
| 261 |
#--config_file "yaml/conv2d-classifier-2-ch32.yaml" \
|
| 262 |
+
#--pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-ch32-cnn.zip"
|
| 263 |
#
|
| 264 |
+
#sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-zh-tw-2-ch32-cnn \
|
| 265 |
#--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/zh-TW/wav_finished/*/*.wav" \
|
| 266 |
#--label_plan 2-voicemail \
|
| 267 |
#--config_file "yaml/conv2d-classifier-2-ch32.yaml" \
|
| 268 |
+
#--pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-ch32-cnn.zip"
|
examples/{vm_sound_classification → sound_classification_by_cnn}/step_1_prepare_data.py
RENAMED
|
File without changes
|
examples/{vm_sound_classification → sound_classification_by_cnn}/step_2_make_vocabulary.py
RENAMED
|
File without changes
|
examples/{vm_sound_classification → sound_classification_by_cnn}/step_3_train_model.py
RENAMED
|
@@ -50,7 +50,7 @@ def get_args():
|
|
| 50 |
parser.add_argument("--config_file", default="conv2d_classifier.yaml", type=str)
|
| 51 |
parser.add_argument(
|
| 52 |
"--pretrained_model",
|
| 53 |
-
# default=(project_path / "trained_models/voicemail-en-sg-2-ch4.zip").as_posix(),
|
| 54 |
default="null",
|
| 55 |
type=str
|
| 56 |
)
|
|
|
|
| 50 |
parser.add_argument("--config_file", default="conv2d_classifier.yaml", type=str)
|
| 51 |
parser.add_argument(
|
| 52 |
"--pretrained_model",
|
| 53 |
+
# default=(project_path / "trained_models/voicemail-en-sg-2-ch4-cnn.zip").as_posix(),
|
| 54 |
default="null",
|
| 55 |
type=str
|
| 56 |
)
|
examples/{vm_sound_classification → sound_classification_by_cnn}/step_4_evaluation_model.py
RENAMED
|
File without changes
|
examples/{vm_sound_classification → sound_classification_by_cnn}/step_5_export_models.py
RENAMED
|
File without changes
|
examples/{vm_sound_classification → sound_classification_by_cnn}/step_6_infer.py
RENAMED
|
File without changes
|
examples/{vm_sound_classification → sound_classification_by_cnn}/step_7_test_model.py
RENAMED
|
File without changes
|
examples/{vm_sound_classification → sound_classification_by_cnn}/stop.sh
RENAMED
|
File without changes
|
examples/{vm_sound_classification → sound_classification_by_cnn}/yaml/conv2d-classifier-2-ch16.yaml
RENAMED
|
File without changes
|
examples/{vm_sound_classification → sound_classification_by_cnn}/yaml/conv2d-classifier-2-ch32.yaml
RENAMED
|
File without changes
|
examples/{vm_sound_classification → sound_classification_by_cnn}/yaml/conv2d-classifier-2-ch4.yaml
RENAMED
|
File without changes
|
examples/{vm_sound_classification → sound_classification_by_cnn}/yaml/conv2d-classifier-2-ch8.yaml
RENAMED
|
File without changes
|
examples/{vm_sound_classification → sound_classification_by_cnn}/yaml/conv2d-classifier-3-ch16.yaml
RENAMED
|
File without changes
|
examples/{vm_sound_classification → sound_classification_by_cnn}/yaml/conv2d-classifier-3-ch32.yaml
RENAMED
|
File without changes
|
examples/{vm_sound_classification → sound_classification_by_cnn}/yaml/conv2d-classifier-3-ch4.yaml
RENAMED
|
File without changes
|
examples/{vm_sound_classification → sound_classification_by_cnn}/yaml/conv2d-classifier-3-ch8.yaml
RENAMED
|
File without changes
|
examples/{vm_sound_classification → sound_classification_by_cnn}/yaml/conv2d-classifier-4-ch16.yaml
RENAMED
|
File without changes
|
examples/{vm_sound_classification → sound_classification_by_cnn}/yaml/conv2d-classifier-4-ch32.yaml
RENAMED
|
File without changes
|
examples/{vm_sound_classification → sound_classification_by_cnn}/yaml/conv2d-classifier-4-ch4.yaml
RENAMED
|
File without changes
|
examples/{vm_sound_classification → sound_classification_by_cnn}/yaml/conv2d-classifier-4-ch8.yaml
RENAMED
|
File without changes
|
examples/{vm_sound_classification → sound_classification_by_cnn}/yaml/conv2d-classifier-8-ch16.yaml
RENAMED
|
File without changes
|
examples/{vm_sound_classification → sound_classification_by_cnn}/yaml/conv2d-classifier-8-ch32.yaml
RENAMED
|
File without changes
|
examples/{vm_sound_classification → sound_classification_by_cnn}/yaml/conv2d-classifier-8-ch4.yaml
RENAMED
|
File without changes
|
examples/{vm_sound_classification → sound_classification_by_cnn}/yaml/conv2d-classifier-8-ch8.yaml
RENAMED
|
File without changes
|
examples/{vm_sound_classification8 → sound_classification_by_cnn_union}/requirements.txt
RENAMED
|
File without changes
|
examples/{vm_sound_classification8 → sound_classification_by_cnn_union}/run.sh
RENAMED
|
File without changes
|
examples/{vm_sound_classification8 → sound_classification_by_cnn_union}/step_1_prepare_data.py
RENAMED
|
File without changes
|
examples/{vm_sound_classification8 → sound_classification_by_cnn_union}/step_2_make_vocabulary.py
RENAMED
|
File without changes
|
examples/{vm_sound_classification8 → sound_classification_by_cnn_union}/step_3_train_global_model.py
RENAMED
|
File without changes
|
examples/{vm_sound_classification8 → sound_classification_by_cnn_union}/step_4_train_country_model.py
RENAMED
|
File without changes
|
examples/{vm_sound_classification8 → sound_classification_by_cnn_union}/step_5_train_union.py
RENAMED
|
File without changes
|
examples/{vm_sound_classification8 → sound_classification_by_cnn_union}/stop.sh
RENAMED
|
File without changes
|
examples/sound_classification_by_lstm/run.sh
ADDED
|
@@ -0,0 +1,197 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env bash
|
| 2 |
+
|
| 3 |
+
: <<'END'
|
| 4 |
+
|
| 5 |
+
sh run.sh --stage 0 --stop_stage 1 --system_version windows --file_folder_name file_dir --final_model_name sound-4-ch32-lstm \
|
| 6 |
+
--filename_patterns "E:/Users/tianx/HuggingDatasets/cc_audio_8/data/wav_finished/wav_finished/en-US/wav_finished/*/*.wav \
|
| 7 |
+
E:/Users/tianx/HuggingDatasets/cc_audio_8/data/wav_finished/id-ID/wav_finished/*/*.wav" \
|
| 8 |
+
--label_plan 4
|
| 9 |
+
|
| 10 |
+
sh run.sh --stage 2 --stop_stage 2 --system_version windows --file_folder_name file_dir --final_model_name sound-2-ch32-lstm \
|
| 11 |
+
--filename_patterns "E:/Users/tianx/HuggingDatasets/cc_audio_8/data/wav_finished/wav_finished/en-US/wav_finished/*/*.wav \
|
| 12 |
+
E:/Users/tianx/HuggingDatasets/cc_audio_8/data/wav_finished/id-ID/wav_finished/*/*.wav" \
|
| 13 |
+
--label_plan 4
|
| 14 |
+
|
| 15 |
+
sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name sound-3-ch32-lstm \
|
| 16 |
+
--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav" \
|
| 17 |
+
--label_plan 3 \
|
| 18 |
+
--config_file "yaml/lstm_classifier-3-ch64.yaml"
|
| 19 |
+
|
| 20 |
+
sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-ms-my-2-ch32-lstm \
|
| 21 |
+
--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/ms-MY/wav_finished/*/*.wav" \
|
| 22 |
+
--label_plan 2-voicemail \
|
| 23 |
+
--config_file "yaml/lstm_classifier-2-ch64.yaml"
|
| 24 |
+
|
| 25 |
+
END
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
# params
|
| 29 |
+
system_version="windows";
|
| 30 |
+
verbose=true;
|
| 31 |
+
stage=0 # start from 0 if you need to start from data preparation
|
| 32 |
+
stop_stage=9
|
| 33 |
+
|
| 34 |
+
work_dir="$(pwd)"
|
| 35 |
+
file_folder_name=file_folder_name
|
| 36 |
+
final_model_name=final_model_name
|
| 37 |
+
filename_patterns="/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav"
|
| 38 |
+
label_plan=4
|
| 39 |
+
config_file="yaml/lstm_classifier-4-ch64.yaml"
|
| 40 |
+
pretrained_model=null
|
| 41 |
+
nohup_name=nohup.out
|
| 42 |
+
|
| 43 |
+
country=en-US
|
| 44 |
+
|
| 45 |
+
# model params
|
| 46 |
+
batch_size=64
|
| 47 |
+
max_epochs=200
|
| 48 |
+
save_top_k=10
|
| 49 |
+
patience=5
|
| 50 |
+
|
| 51 |
+
|
| 52 |
+
# parse options
|
| 53 |
+
while true; do
|
| 54 |
+
[ -z "${1:-}" ] && break; # break if there are no arguments
|
| 55 |
+
case "$1" in
|
| 56 |
+
--*) name=$(echo "$1" | sed s/^--// | sed s/-/_/g);
|
| 57 |
+
eval '[ -z "${'"$name"'+xxx}" ]' && echo "$0: invalid option $1" 1>&2 && exit 1;
|
| 58 |
+
old_value="(eval echo \\$$name)";
|
| 59 |
+
if [ "${old_value}" == "true" ] || [ "${old_value}" == "false" ]; then
|
| 60 |
+
was_bool=true;
|
| 61 |
+
else
|
| 62 |
+
was_bool=false;
|
| 63 |
+
fi
|
| 64 |
+
|
| 65 |
+
# Set the variable to the right value-- the escaped quotes make it work if
|
| 66 |
+
# the option had spaces, like --cmd "queue.pl -sync y"
|
| 67 |
+
eval "${name}=\"$2\"";
|
| 68 |
+
|
| 69 |
+
# Check that Boolean-valued arguments are really Boolean.
|
| 70 |
+
if $was_bool && [[ "$2" != "true" && "$2" != "false" ]]; then
|
| 71 |
+
echo "$0: expected \"true\" or \"false\": $1 $2" 1>&2
|
| 72 |
+
exit 1;
|
| 73 |
+
fi
|
| 74 |
+
shift 2;
|
| 75 |
+
;;
|
| 76 |
+
|
| 77 |
+
*) break;
|
| 78 |
+
esac
|
| 79 |
+
done
|
| 80 |
+
|
| 81 |
+
file_dir="${work_dir}/${file_folder_name}"
|
| 82 |
+
final_model_dir="${work_dir}/../../trained_models/${final_model_name}";
|
| 83 |
+
|
| 84 |
+
dataset="${file_dir}/dataset.xlsx"
|
| 85 |
+
train_dataset="${file_dir}/train.xlsx"
|
| 86 |
+
valid_dataset="${file_dir}/valid.xlsx"
|
| 87 |
+
evaluation_file="${file_dir}/evaluation.xlsx"
|
| 88 |
+
vocabulary_dir="${file_dir}/vocabulary"
|
| 89 |
+
|
| 90 |
+
$verbose && echo "system_version: ${system_version}"
|
| 91 |
+
$verbose && echo "file_folder_name: ${file_folder_name}"
|
| 92 |
+
|
| 93 |
+
if [ $system_version == "windows" ]; then
|
| 94 |
+
alias python3='D:/Users/tianx/PycharmProjects/virtualenv/cc_audio_8/Scripts/python.exe'
|
| 95 |
+
elif [ $system_version == "centos" ] || [ $system_version == "ubuntu" ]; then
|
| 96 |
+
#source /data/local/bin/cc_audio_8/bin/activate
|
| 97 |
+
alias python3='/data/local/bin/cc_audio_8/bin/python3'
|
| 98 |
+
fi
|
| 99 |
+
|
| 100 |
+
|
| 101 |
+
if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
|
| 102 |
+
$verbose && echo "stage 0: prepare data"
|
| 103 |
+
cd "${work_dir}" || exit 1
|
| 104 |
+
python3 step_1_prepare_data.py \
|
| 105 |
+
--file_dir "${file_dir}" \
|
| 106 |
+
--filename_patterns "${filename_patterns}" \
|
| 107 |
+
--train_dataset "${train_dataset}" \
|
| 108 |
+
--valid_dataset "${valid_dataset}" \
|
| 109 |
+
--label_plan "${label_plan}" \
|
| 110 |
+
|
| 111 |
+
fi
|
| 112 |
+
|
| 113 |
+
|
| 114 |
+
if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
|
| 115 |
+
$verbose && echo "stage 1: make vocabulary"
|
| 116 |
+
cd "${work_dir}" || exit 1
|
| 117 |
+
python3 step_2_make_vocabulary.py \
|
| 118 |
+
--vocabulary_dir "${vocabulary_dir}" \
|
| 119 |
+
--train_dataset "${train_dataset}" \
|
| 120 |
+
--valid_dataset "${valid_dataset}" \
|
| 121 |
+
|
| 122 |
+
fi
|
| 123 |
+
|
| 124 |
+
|
| 125 |
+
if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
|
| 126 |
+
$verbose && echo "stage 2: train model"
|
| 127 |
+
cd "${work_dir}" || exit 1
|
| 128 |
+
python3 step_3_train_model.py \
|
| 129 |
+
--vocabulary_dir "${vocabulary_dir}" \
|
| 130 |
+
--train_dataset "${train_dataset}" \
|
| 131 |
+
--valid_dataset "${valid_dataset}" \
|
| 132 |
+
--serialization_dir "${file_dir}" \
|
| 133 |
+
--config_file "${config_file}" \
|
| 134 |
+
--pretrained_model "${pretrained_model}" \
|
| 135 |
+
|
| 136 |
+
fi
|
| 137 |
+
|
| 138 |
+
|
| 139 |
+
if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
|
| 140 |
+
$verbose && echo "stage 3: test model"
|
| 141 |
+
cd "${work_dir}" || exit 1
|
| 142 |
+
python3 step_4_evaluation_model.py \
|
| 143 |
+
--dataset "${dataset}" \
|
| 144 |
+
--vocabulary_dir "${vocabulary_dir}" \
|
| 145 |
+
--model_dir "${file_dir}/best" \
|
| 146 |
+
--output_file "${evaluation_file}" \
|
| 147 |
+
|
| 148 |
+
fi
|
| 149 |
+
|
| 150 |
+
|
| 151 |
+
if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
|
| 152 |
+
$verbose && echo "stage 4: export model"
|
| 153 |
+
cd "${work_dir}" || exit 1
|
| 154 |
+
python3 step_5_export_models.py \
|
| 155 |
+
--vocabulary_dir "${vocabulary_dir}" \
|
| 156 |
+
--model_dir "${file_dir}/best" \
|
| 157 |
+
--serialization_dir "${file_dir}" \
|
| 158 |
+
|
| 159 |
+
fi
|
| 160 |
+
|
| 161 |
+
|
| 162 |
+
if [ ${stage} -le 5 ] && [ ${stop_stage} -ge 5 ]; then
|
| 163 |
+
$verbose && echo "stage 5: collect files"
|
| 164 |
+
cd "${work_dir}" || exit 1
|
| 165 |
+
|
| 166 |
+
mkdir -p ${final_model_dir}
|
| 167 |
+
|
| 168 |
+
cp "${file_dir}/best"/* "${final_model_dir}"
|
| 169 |
+
cp -r "${file_dir}/vocabulary" "${final_model_dir}"
|
| 170 |
+
|
| 171 |
+
cp "${file_dir}/evaluation.xlsx" "${final_model_dir}/evaluation.xlsx"
|
| 172 |
+
|
| 173 |
+
cp "${file_dir}/trace_model.zip" "${final_model_dir}/trace_model.zip"
|
| 174 |
+
cp "${file_dir}/trace_quant_model.zip" "${final_model_dir}/trace_quant_model.zip"
|
| 175 |
+
cp "${file_dir}/script_model.zip" "${final_model_dir}/script_model.zip"
|
| 176 |
+
cp "${file_dir}/script_quant_model.zip" "${final_model_dir}/script_quant_model.zip"
|
| 177 |
+
|
| 178 |
+
cd "${final_model_dir}/.." || exit 1;
|
| 179 |
+
|
| 180 |
+
if [ -e "${final_model_name}.zip" ]; then
|
| 181 |
+
rm -rf "${final_model_name}_backup.zip"
|
| 182 |
+
mv "${final_model_name}.zip" "${final_model_name}_backup.zip"
|
| 183 |
+
fi
|
| 184 |
+
|
| 185 |
+
zip -r "${final_model_name}.zip" "${final_model_name}"
|
| 186 |
+
rm -rf "${final_model_name}"
|
| 187 |
+
|
| 188 |
+
fi
|
| 189 |
+
|
| 190 |
+
|
| 191 |
+
if [ ${stage} -le 6 ] && [ ${stop_stage} -ge 6 ]; then
|
| 192 |
+
$verbose && echo "stage 6: clear file_dir"
|
| 193 |
+
cd "${work_dir}" || exit 1
|
| 194 |
+
|
| 195 |
+
rm -rf "${file_dir}";
|
| 196 |
+
|
| 197 |
+
fi
|
examples/sound_classification_by_lstm/step_1_prepare_data.py
ADDED
|
@@ -0,0 +1,193 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/python3
|
| 2 |
+
# -*- coding: utf-8 -*-
|
| 3 |
+
import argparse
|
| 4 |
+
from glob import glob
|
| 5 |
+
import os
|
| 6 |
+
from pathlib import Path
|
| 7 |
+
import random
|
| 8 |
+
import sys
|
| 9 |
+
|
| 10 |
+
pwd = os.path.abspath(os.path.dirname(__file__))
|
| 11 |
+
sys.path.append(os.path.join(pwd, "../../"))
|
| 12 |
+
|
| 13 |
+
import pandas as pd
|
| 14 |
+
from scipy.io import wavfile
|
| 15 |
+
from tqdm import tqdm
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
def get_args():
|
| 19 |
+
parser = argparse.ArgumentParser()
|
| 20 |
+
parser.add_argument("--file_dir", default="./", type=str)
|
| 21 |
+
parser.add_argument("--filename_patterns", type=str)
|
| 22 |
+
|
| 23 |
+
parser.add_argument("--train_dataset", default="train.xlsx", type=str)
|
| 24 |
+
parser.add_argument("--valid_dataset", default="valid.xlsx", type=str)
|
| 25 |
+
|
| 26 |
+
parser.add_argument("--label_plan", default="4", type=str)
|
| 27 |
+
|
| 28 |
+
args = parser.parse_args()
|
| 29 |
+
return args
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
def get_dataset(args):
|
| 33 |
+
filename_patterns = args.filename_patterns
|
| 34 |
+
filename_patterns = filename_patterns.split(" ")
|
| 35 |
+
print(filename_patterns)
|
| 36 |
+
|
| 37 |
+
file_dir = Path(args.file_dir)
|
| 38 |
+
file_dir.mkdir(exist_ok=True)
|
| 39 |
+
|
| 40 |
+
if args.label_plan == "2-voicemail":
|
| 41 |
+
label_map = {
|
| 42 |
+
"bell": "voicemail",
|
| 43 |
+
"white_noise": "non_voicemail",
|
| 44 |
+
"low_white_noise": "non_voicemail",
|
| 45 |
+
"high_white_noise": "non_voicemail",
|
| 46 |
+
# "music": "non_voicemail",
|
| 47 |
+
"mute": "non_voicemail",
|
| 48 |
+
"noise": "non_voicemail",
|
| 49 |
+
"noise_mute": "non_voicemail",
|
| 50 |
+
"voice": "non_voicemail",
|
| 51 |
+
"voicemail": "voicemail",
|
| 52 |
+
}
|
| 53 |
+
elif args.label_plan == "2":
|
| 54 |
+
label_map = {
|
| 55 |
+
"bell": "non_voice",
|
| 56 |
+
"white_noise": "non_voice",
|
| 57 |
+
"low_white_noise": "non_voice",
|
| 58 |
+
"high_white_noise": "non_voice",
|
| 59 |
+
"music": "non_voice",
|
| 60 |
+
"mute": "non_voice",
|
| 61 |
+
"noise": "non_voice",
|
| 62 |
+
"noise_mute": "non_voice",
|
| 63 |
+
"voice": "voice",
|
| 64 |
+
"voicemail": "voice",
|
| 65 |
+
}
|
| 66 |
+
elif args.label_plan == "3":
|
| 67 |
+
label_map = {
|
| 68 |
+
"bell": "voicemail",
|
| 69 |
+
"white_noise": "mute",
|
| 70 |
+
"low_white_noise": "mute",
|
| 71 |
+
"high_white_noise": "mute",
|
| 72 |
+
# "music": "music",
|
| 73 |
+
"mute": "mute",
|
| 74 |
+
"noise": "voice_or_noise",
|
| 75 |
+
"noise_mute": "voice_or_noise",
|
| 76 |
+
"voice": "voice_or_noise",
|
| 77 |
+
"voicemail": "voicemail",
|
| 78 |
+
}
|
| 79 |
+
elif args.label_plan == "4":
|
| 80 |
+
label_map = {
|
| 81 |
+
"bell": "voicemail",
|
| 82 |
+
"white_noise": "mute",
|
| 83 |
+
"low_white_noise": "mute",
|
| 84 |
+
"high_white_noise": "mute",
|
| 85 |
+
# "music": "music",
|
| 86 |
+
"mute": "mute",
|
| 87 |
+
"noise": "noise",
|
| 88 |
+
"noise_mute": "noise",
|
| 89 |
+
"voice": "voice",
|
| 90 |
+
"voicemail": "voicemail",
|
| 91 |
+
}
|
| 92 |
+
elif args.label_plan == "8":
|
| 93 |
+
label_map = {
|
| 94 |
+
"bell": "bell",
|
| 95 |
+
"white_noise": "white_noise",
|
| 96 |
+
"low_white_noise": "white_noise",
|
| 97 |
+
"high_white_noise": "white_noise",
|
| 98 |
+
"music": "music",
|
| 99 |
+
"mute": "mute",
|
| 100 |
+
"noise": "noise",
|
| 101 |
+
"noise_mute": "noise_mute",
|
| 102 |
+
"voice": "voice",
|
| 103 |
+
"voicemail": "voicemail",
|
| 104 |
+
}
|
| 105 |
+
else:
|
| 106 |
+
raise AssertionError
|
| 107 |
+
|
| 108 |
+
result = list()
|
| 109 |
+
for filename_pattern in filename_patterns:
|
| 110 |
+
filename_list = glob(filename_pattern)
|
| 111 |
+
for filename in tqdm(filename_list):
|
| 112 |
+
filename = Path(filename)
|
| 113 |
+
sample_rate, signal = wavfile.read(filename.as_posix())
|
| 114 |
+
if len(signal) < sample_rate * 2:
|
| 115 |
+
continue
|
| 116 |
+
|
| 117 |
+
folder = filename.parts[-2]
|
| 118 |
+
country = filename.parts[-4]
|
| 119 |
+
|
| 120 |
+
if folder not in label_map.keys():
|
| 121 |
+
continue
|
| 122 |
+
|
| 123 |
+
labels = label_map[folder]
|
| 124 |
+
|
| 125 |
+
random1 = random.random()
|
| 126 |
+
random2 = random.random()
|
| 127 |
+
|
| 128 |
+
result.append({
|
| 129 |
+
"filename": filename,
|
| 130 |
+
"folder": folder,
|
| 131 |
+
"category": country,
|
| 132 |
+
"labels": labels,
|
| 133 |
+
"random1": random1,
|
| 134 |
+
"random2": random2,
|
| 135 |
+
"flag": "TRAIN" if random2 < 0.8 else "TEST",
|
| 136 |
+
})
|
| 137 |
+
|
| 138 |
+
df = pd.DataFrame(result)
|
| 139 |
+
pivot_table = pd.pivot_table(df, index=["labels"], values=["filename"], aggfunc="count")
|
| 140 |
+
print(pivot_table)
|
| 141 |
+
|
| 142 |
+
df = df.sort_values(by=["random1"], ascending=False)
|
| 143 |
+
df.to_excel(
|
| 144 |
+
file_dir / "dataset.xlsx",
|
| 145 |
+
index=False,
|
| 146 |
+
# encoding="utf_8_sig"
|
| 147 |
+
)
|
| 148 |
+
|
| 149 |
+
return
|
| 150 |
+
|
| 151 |
+
|
| 152 |
+
def split_dataset(args):
|
| 153 |
+
"""分割训练集, 测试集"""
|
| 154 |
+
file_dir = Path(args.file_dir)
|
| 155 |
+
file_dir.mkdir(exist_ok=True)
|
| 156 |
+
|
| 157 |
+
df = pd.read_excel(file_dir / "dataset.xlsx")
|
| 158 |
+
|
| 159 |
+
train = list()
|
| 160 |
+
test = list()
|
| 161 |
+
|
| 162 |
+
for i, row in df.iterrows():
|
| 163 |
+
flag = row["flag"]
|
| 164 |
+
if flag == "TRAIN":
|
| 165 |
+
train.append(row)
|
| 166 |
+
else:
|
| 167 |
+
test.append(row)
|
| 168 |
+
|
| 169 |
+
train = pd.DataFrame(train)
|
| 170 |
+
train.to_excel(
|
| 171 |
+
args.train_dataset,
|
| 172 |
+
index=False,
|
| 173 |
+
# encoding="utf_8_sig"
|
| 174 |
+
)
|
| 175 |
+
test = pd.DataFrame(test)
|
| 176 |
+
test.to_excel(
|
| 177 |
+
args.valid_dataset,
|
| 178 |
+
index=False,
|
| 179 |
+
# encoding="utf_8_sig"
|
| 180 |
+
)
|
| 181 |
+
|
| 182 |
+
return
|
| 183 |
+
|
| 184 |
+
|
| 185 |
+
def main():
|
| 186 |
+
args = get_args()
|
| 187 |
+
get_dataset(args)
|
| 188 |
+
split_dataset(args)
|
| 189 |
+
return
|
| 190 |
+
|
| 191 |
+
|
| 192 |
+
if __name__ == "__main__":
|
| 193 |
+
main()
|
examples/sound_classification_by_lstm/step_2_make_vocabulary.py
ADDED
|
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/python3
|
| 2 |
+
# -*- coding: utf-8 -*-
|
| 3 |
+
import argparse
|
| 4 |
+
import os
|
| 5 |
+
import sys
|
| 6 |
+
|
| 7 |
+
pwd = os.path.abspath(os.path.dirname(__file__))
|
| 8 |
+
sys.path.append(os.path.join(pwd, "../../"))
|
| 9 |
+
|
| 10 |
+
import pandas as pd
|
| 11 |
+
|
| 12 |
+
from toolbox.torch.utils.data.vocabulary import Vocabulary
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
def get_args():
|
| 16 |
+
parser = argparse.ArgumentParser()
|
| 17 |
+
parser.add_argument("--vocabulary_dir", default="vocabulary", type=str)
|
| 18 |
+
|
| 19 |
+
parser.add_argument("--train_dataset", default="train.xlsx", type=str)
|
| 20 |
+
parser.add_argument("--valid_dataset", default="valid.xlsx", type=str)
|
| 21 |
+
|
| 22 |
+
args = parser.parse_args()
|
| 23 |
+
return args
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
def main():
|
| 27 |
+
args = get_args()
|
| 28 |
+
|
| 29 |
+
train_dataset = pd.read_excel(args.train_dataset)
|
| 30 |
+
valid_dataset = pd.read_excel(args.valid_dataset)
|
| 31 |
+
|
| 32 |
+
vocabulary = Vocabulary()
|
| 33 |
+
|
| 34 |
+
# train
|
| 35 |
+
for i, row in train_dataset.iterrows():
|
| 36 |
+
label = row["labels"]
|
| 37 |
+
vocabulary.add_token_to_namespace(label, namespace="labels")
|
| 38 |
+
|
| 39 |
+
# valid
|
| 40 |
+
for i, row in valid_dataset.iterrows():
|
| 41 |
+
label = row["labels"]
|
| 42 |
+
vocabulary.add_token_to_namespace(label, namespace="labels")
|
| 43 |
+
|
| 44 |
+
vocabulary.save_to_files(args.vocabulary_dir)
|
| 45 |
+
|
| 46 |
+
return
|
| 47 |
+
|
| 48 |
+
|
| 49 |
+
if __name__ == "__main__":
|
| 50 |
+
main()
|
examples/sound_classification_by_lstm/step_3_train_model.py
ADDED
|
@@ -0,0 +1,367 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/python3
|
| 2 |
+
# -*- coding: utf-8 -*-
|
| 3 |
+
import argparse
|
| 4 |
+
from collections import defaultdict
|
| 5 |
+
import json
|
| 6 |
+
import logging
|
| 7 |
+
from logging.handlers import TimedRotatingFileHandler
|
| 8 |
+
import os
|
| 9 |
+
import platform
|
| 10 |
+
from pathlib import Path
|
| 11 |
+
import random
|
| 12 |
+
import sys
|
| 13 |
+
import shutil
|
| 14 |
+
import tempfile
|
| 15 |
+
from typing import List
|
| 16 |
+
import zipfile
|
| 17 |
+
|
| 18 |
+
pwd = os.path.abspath(os.path.dirname(__file__))
|
| 19 |
+
sys.path.append(os.path.join(pwd, "../../"))
|
| 20 |
+
|
| 21 |
+
import numpy as np
|
| 22 |
+
import torch
|
| 23 |
+
from torch.utils.data.dataloader import DataLoader
|
| 24 |
+
from tqdm import tqdm
|
| 25 |
+
|
| 26 |
+
from toolbox.torch.modules.loss import FocalLoss, HingeLoss, HingeLinear
|
| 27 |
+
from toolbox.torch.training.metrics.categorical_accuracy import CategoricalAccuracy
|
| 28 |
+
from toolbox.torch.utils.data.vocabulary import Vocabulary
|
| 29 |
+
from toolbox.torch.utils.data.dataset.wave_classifier_excel_dataset import WaveClassifierExcelDataset
|
| 30 |
+
from toolbox.torchaudio.models.lstm_audio_classifier.modeling_lstm_audio_classifier import LSTMClassifierPretrainedModel
|
| 31 |
+
from toolbox.torchaudio.models.lstm_audio_classifier.configuration_lstm_audio_classifier import LSTMClassifierConfig
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
def get_args():
|
| 35 |
+
parser = argparse.ArgumentParser()
|
| 36 |
+
parser.add_argument("--vocabulary_dir", default="vocabulary", type=str)
|
| 37 |
+
|
| 38 |
+
parser.add_argument("--train_dataset", default="train.xlsx", type=str)
|
| 39 |
+
parser.add_argument("--valid_dataset", default="valid.xlsx", type=str)
|
| 40 |
+
|
| 41 |
+
parser.add_argument("--max_epochs", default=100, type=int)
|
| 42 |
+
|
| 43 |
+
parser.add_argument("--batch_size", default=64, type=int)
|
| 44 |
+
parser.add_argument("--learning_rate", default=1e-3, type=float)
|
| 45 |
+
parser.add_argument("--num_serialized_models_to_keep", default=10, type=int)
|
| 46 |
+
parser.add_argument("--patience", default=5, type=int)
|
| 47 |
+
parser.add_argument("--serialization_dir", default="serialization_dir", type=str)
|
| 48 |
+
parser.add_argument("--seed", default=0, type=int)
|
| 49 |
+
|
| 50 |
+
parser.add_argument("--config_file", default="conv2d_classifier.yaml", type=str)
|
| 51 |
+
parser.add_argument(
|
| 52 |
+
"--pretrained_model",
|
| 53 |
+
# default=(project_path / "trained_models/voicemail-en-sg-2-ch4-cnn.zip").as_posix(),
|
| 54 |
+
default="null",
|
| 55 |
+
type=str
|
| 56 |
+
)
|
| 57 |
+
|
| 58 |
+
args = parser.parse_args()
|
| 59 |
+
return args
|
| 60 |
+
|
| 61 |
+
|
| 62 |
+
def logging_config(file_dir: str):
|
| 63 |
+
fmt = "%(asctime)s - %(name)s - %(levelname)s %(filename)s:%(lineno)d > %(message)s"
|
| 64 |
+
|
| 65 |
+
logging.basicConfig(format=fmt,
|
| 66 |
+
datefmt="%m/%d/%Y %H:%M:%S",
|
| 67 |
+
level=logging.DEBUG)
|
| 68 |
+
file_handler = TimedRotatingFileHandler(
|
| 69 |
+
filename=os.path.join(file_dir, "main.log"),
|
| 70 |
+
encoding="utf-8",
|
| 71 |
+
when="D",
|
| 72 |
+
interval=1,
|
| 73 |
+
backupCount=7
|
| 74 |
+
)
|
| 75 |
+
file_handler.setLevel(logging.INFO)
|
| 76 |
+
file_handler.setFormatter(logging.Formatter(fmt))
|
| 77 |
+
logger = logging.getLogger(__name__)
|
| 78 |
+
logger.addHandler(file_handler)
|
| 79 |
+
|
| 80 |
+
return logger
|
| 81 |
+
|
| 82 |
+
|
| 83 |
+
class CollateFunction(object):
|
| 84 |
+
def __init__(self):
|
| 85 |
+
pass
|
| 86 |
+
|
| 87 |
+
def __call__(self, batch: List[dict]):
|
| 88 |
+
array_list = list()
|
| 89 |
+
label_list = list()
|
| 90 |
+
for sample in batch:
|
| 91 |
+
array = sample["waveform"]
|
| 92 |
+
label = sample["label"]
|
| 93 |
+
|
| 94 |
+
l = len(array)
|
| 95 |
+
if l < 16000:
|
| 96 |
+
delta = int(16000 - l)
|
| 97 |
+
array = np.concatenate([array, np.zeros(shape=(delta,), dtype=np.float32)], axis=-1)
|
| 98 |
+
if l > 16000:
|
| 99 |
+
array = array[:16000]
|
| 100 |
+
|
| 101 |
+
array_list.append(array)
|
| 102 |
+
label_list.append(label)
|
| 103 |
+
|
| 104 |
+
array_list = torch.stack(array_list)
|
| 105 |
+
label_list = torch.stack(label_list)
|
| 106 |
+
return array_list, label_list
|
| 107 |
+
|
| 108 |
+
|
| 109 |
+
collate_fn = CollateFunction()
|
| 110 |
+
|
| 111 |
+
|
| 112 |
+
def main():
|
| 113 |
+
args = get_args()
|
| 114 |
+
|
| 115 |
+
serialization_dir = Path(args.serialization_dir)
|
| 116 |
+
serialization_dir.mkdir(parents=True, exist_ok=True)
|
| 117 |
+
|
| 118 |
+
logger = logging_config(serialization_dir)
|
| 119 |
+
|
| 120 |
+
random.seed(args.seed)
|
| 121 |
+
np.random.seed(args.seed)
|
| 122 |
+
torch.manual_seed(args.seed)
|
| 123 |
+
logger.info("set seed: {}".format(args.seed))
|
| 124 |
+
|
| 125 |
+
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 126 |
+
n_gpu = torch.cuda.device_count()
|
| 127 |
+
logger.info("GPU available count: {}; device: {}".format(n_gpu, device))
|
| 128 |
+
|
| 129 |
+
vocabulary = Vocabulary.from_files(args.vocabulary_dir)
|
| 130 |
+
|
| 131 |
+
# datasets
|
| 132 |
+
logger.info("prepare datasets")
|
| 133 |
+
train_dataset = WaveClassifierExcelDataset(
|
| 134 |
+
vocab=vocabulary,
|
| 135 |
+
excel_file=args.train_dataset,
|
| 136 |
+
category=None,
|
| 137 |
+
category_field="category",
|
| 138 |
+
label_field="labels",
|
| 139 |
+
expected_sample_rate=8000,
|
| 140 |
+
max_wave_value=32768.0,
|
| 141 |
+
)
|
| 142 |
+
valid_dataset = WaveClassifierExcelDataset(
|
| 143 |
+
vocab=vocabulary,
|
| 144 |
+
excel_file=args.valid_dataset,
|
| 145 |
+
category=None,
|
| 146 |
+
category_field="category",
|
| 147 |
+
label_field="labels",
|
| 148 |
+
expected_sample_rate=8000,
|
| 149 |
+
max_wave_value=32768.0,
|
| 150 |
+
)
|
| 151 |
+
train_data_loader = DataLoader(
|
| 152 |
+
dataset=train_dataset,
|
| 153 |
+
batch_size=args.batch_size,
|
| 154 |
+
shuffle=True,
|
| 155 |
+
# Linux 系统中可以使用多个子进程���载数据, 而在 Windows 系统中不能.
|
| 156 |
+
num_workers=0 if platform.system() == "Windows" else os.cpu_count() // 2,
|
| 157 |
+
collate_fn=collate_fn,
|
| 158 |
+
pin_memory=False,
|
| 159 |
+
# prefetch_factor=64,
|
| 160 |
+
)
|
| 161 |
+
valid_data_loader = DataLoader(
|
| 162 |
+
dataset=valid_dataset,
|
| 163 |
+
batch_size=args.batch_size,
|
| 164 |
+
shuffle=True,
|
| 165 |
+
# Linux 系统中可以使用多个子进程加载数据, 而在 Windows 系统中不能.
|
| 166 |
+
num_workers=0 if platform.system() == "Windows" else os.cpu_count() // 2,
|
| 167 |
+
collate_fn=collate_fn,
|
| 168 |
+
pin_memory=False,
|
| 169 |
+
# prefetch_factor=64,
|
| 170 |
+
)
|
| 171 |
+
|
| 172 |
+
# models
|
| 173 |
+
logger.info(f"prepare models. config_file: {args.config_file}")
|
| 174 |
+
config = LSTMClassifierConfig.from_pretrained(
|
| 175 |
+
pretrained_model_name_or_path=args.config_file,
|
| 176 |
+
# num_labels=vocabulary.get_vocab_size(namespace="labels")
|
| 177 |
+
)
|
| 178 |
+
if not config.cls_head_param["num_labels"] == vocabulary.get_vocab_size(namespace="labels"):
|
| 179 |
+
raise AssertionError("expected num labels: {} instead of {}.".format(
|
| 180 |
+
vocabulary.get_vocab_size(namespace="labels"),
|
| 181 |
+
config.cls_head_param["num_labels"],
|
| 182 |
+
))
|
| 183 |
+
model = LSTMClassifierPretrainedModel(
|
| 184 |
+
config=config,
|
| 185 |
+
)
|
| 186 |
+
|
| 187 |
+
if args.pretrained_model is not None and os.path.exists(args.pretrained_model):
|
| 188 |
+
logger.info(f"load pretrained model state dict from: {args.pretrained_model}")
|
| 189 |
+
pretrained_model = Path(args.pretrained_model)
|
| 190 |
+
with zipfile.ZipFile(pretrained_model.as_posix(), "r") as f_zip:
|
| 191 |
+
out_root = Path(tempfile.gettempdir()) / "cc_audio_8"
|
| 192 |
+
# print(out_root.as_posix())
|
| 193 |
+
if out_root.exists():
|
| 194 |
+
shutil.rmtree(out_root.as_posix())
|
| 195 |
+
out_root.mkdir(parents=True, exist_ok=True)
|
| 196 |
+
f_zip.extractall(path=out_root)
|
| 197 |
+
|
| 198 |
+
tgt_path = out_root / pretrained_model.stem
|
| 199 |
+
model_pt_file = tgt_path / "model.pt"
|
| 200 |
+
with open(model_pt_file, "rb") as f:
|
| 201 |
+
state_dict = torch.load(f, map_location="cpu")
|
| 202 |
+
model.load_state_dict(state_dict=state_dict)
|
| 203 |
+
|
| 204 |
+
model.to(device)
|
| 205 |
+
model.train()
|
| 206 |
+
|
| 207 |
+
# optimizer
|
| 208 |
+
logger.info("prepare optimizer, lr_scheduler, loss_fn, categorical_accuracy")
|
| 209 |
+
param_optimizer = model.parameters()
|
| 210 |
+
optimizer = torch.optim.Adam(
|
| 211 |
+
param_optimizer,
|
| 212 |
+
lr=args.learning_rate,
|
| 213 |
+
)
|
| 214 |
+
# lr_scheduler = torch.optim.lr_scheduler.StepLR(
|
| 215 |
+
# optimizer,
|
| 216 |
+
# step_size=2000
|
| 217 |
+
# )
|
| 218 |
+
lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(
|
| 219 |
+
optimizer,
|
| 220 |
+
milestones=[10000, 20000, 30000, 40000, 50000], gamma=0.5
|
| 221 |
+
)
|
| 222 |
+
focal_loss = FocalLoss(
|
| 223 |
+
num_classes=vocabulary.get_vocab_size(namespace="labels"),
|
| 224 |
+
reduction="mean",
|
| 225 |
+
)
|
| 226 |
+
categorical_accuracy = CategoricalAccuracy()
|
| 227 |
+
|
| 228 |
+
# training loop
|
| 229 |
+
logger.info("training")
|
| 230 |
+
|
| 231 |
+
training_loss = 10000000000
|
| 232 |
+
training_accuracy = 0.
|
| 233 |
+
evaluation_loss = 10000000000
|
| 234 |
+
evaluation_accuracy = 0.
|
| 235 |
+
|
| 236 |
+
model_list = list()
|
| 237 |
+
best_idx_epoch = None
|
| 238 |
+
best_accuracy = None
|
| 239 |
+
patience_count = 0
|
| 240 |
+
|
| 241 |
+
for idx_epoch in range(args.max_epochs):
|
| 242 |
+
categorical_accuracy.reset()
|
| 243 |
+
total_loss = 0.
|
| 244 |
+
total_examples = 0.
|
| 245 |
+
progress_bar = tqdm(
|
| 246 |
+
total=len(train_data_loader),
|
| 247 |
+
desc="Training; epoch: {}".format(idx_epoch),
|
| 248 |
+
)
|
| 249 |
+
for batch in train_data_loader:
|
| 250 |
+
input_ids, label_ids = batch
|
| 251 |
+
input_ids = input_ids.to(device)
|
| 252 |
+
label_ids: torch.LongTensor = label_ids.to(device).long()
|
| 253 |
+
|
| 254 |
+
logits = model.forward(input_ids)
|
| 255 |
+
loss = focal_loss.forward(logits, label_ids.view(-1))
|
| 256 |
+
categorical_accuracy(logits, label_ids)
|
| 257 |
+
|
| 258 |
+
total_loss += loss.item()
|
| 259 |
+
total_examples += input_ids.size(0)
|
| 260 |
+
|
| 261 |
+
optimizer.zero_grad()
|
| 262 |
+
loss.backward()
|
| 263 |
+
optimizer.step()
|
| 264 |
+
lr_scheduler.step()
|
| 265 |
+
|
| 266 |
+
training_loss = total_loss / total_examples
|
| 267 |
+
training_loss = round(training_loss, 4)
|
| 268 |
+
training_accuracy = categorical_accuracy.get_metric()["accuracy"]
|
| 269 |
+
training_accuracy = round(training_accuracy, 4)
|
| 270 |
+
|
| 271 |
+
progress_bar.update(1)
|
| 272 |
+
progress_bar.set_postfix({
|
| 273 |
+
"training_loss": training_loss,
|
| 274 |
+
"training_accuracy": training_accuracy,
|
| 275 |
+
})
|
| 276 |
+
|
| 277 |
+
categorical_accuracy.reset()
|
| 278 |
+
total_loss = 0.
|
| 279 |
+
total_examples = 0.
|
| 280 |
+
progress_bar = tqdm(
|
| 281 |
+
total=len(valid_data_loader),
|
| 282 |
+
desc="Evaluation; epoch: {}".format(idx_epoch),
|
| 283 |
+
)
|
| 284 |
+
for batch in valid_data_loader:
|
| 285 |
+
input_ids, label_ids = batch
|
| 286 |
+
input_ids = input_ids.to(device)
|
| 287 |
+
label_ids: torch.LongTensor = label_ids.to(device).long()
|
| 288 |
+
|
| 289 |
+
with torch.no_grad():
|
| 290 |
+
logits = model.forward(input_ids)
|
| 291 |
+
loss = focal_loss.forward(logits, label_ids.view(-1))
|
| 292 |
+
categorical_accuracy(logits, label_ids)
|
| 293 |
+
|
| 294 |
+
total_loss += loss.item()
|
| 295 |
+
total_examples += input_ids.size(0)
|
| 296 |
+
|
| 297 |
+
evaluation_loss = total_loss / total_examples
|
| 298 |
+
evaluation_loss = round(evaluation_loss, 4)
|
| 299 |
+
evaluation_accuracy = categorical_accuracy.get_metric()["accuracy"]
|
| 300 |
+
evaluation_accuracy = round(evaluation_accuracy, 4)
|
| 301 |
+
|
| 302 |
+
progress_bar.update(1)
|
| 303 |
+
progress_bar.set_postfix({
|
| 304 |
+
"evaluation_loss": evaluation_loss,
|
| 305 |
+
"evaluation_accuracy": evaluation_accuracy,
|
| 306 |
+
})
|
| 307 |
+
|
| 308 |
+
# save path
|
| 309 |
+
epoch_dir = serialization_dir / "epoch-{}".format(idx_epoch)
|
| 310 |
+
epoch_dir.mkdir(parents=True, exist_ok=False)
|
| 311 |
+
|
| 312 |
+
# save models
|
| 313 |
+
model.save_pretrained(epoch_dir.as_posix())
|
| 314 |
+
|
| 315 |
+
model_list.append(epoch_dir)
|
| 316 |
+
if len(model_list) >= args.num_serialized_models_to_keep:
|
| 317 |
+
model_to_delete: Path = model_list.pop(0)
|
| 318 |
+
shutil.rmtree(model_to_delete.as_posix())
|
| 319 |
+
|
| 320 |
+
# save metric
|
| 321 |
+
if best_accuracy is None:
|
| 322 |
+
best_idx_epoch = idx_epoch
|
| 323 |
+
best_accuracy = evaluation_accuracy
|
| 324 |
+
elif evaluation_accuracy > best_accuracy:
|
| 325 |
+
best_idx_epoch = idx_epoch
|
| 326 |
+
best_accuracy = evaluation_accuracy
|
| 327 |
+
else:
|
| 328 |
+
pass
|
| 329 |
+
|
| 330 |
+
metrics = {
|
| 331 |
+
"idx_epoch": idx_epoch,
|
| 332 |
+
"best_idx_epoch": best_idx_epoch,
|
| 333 |
+
"best_accuracy": best_accuracy,
|
| 334 |
+
"training_loss": training_loss,
|
| 335 |
+
"training_accuracy": training_accuracy,
|
| 336 |
+
"evaluation_loss": evaluation_loss,
|
| 337 |
+
"evaluation_accuracy": evaluation_accuracy,
|
| 338 |
+
"learning_rate": optimizer.param_groups[0]['lr'],
|
| 339 |
+
}
|
| 340 |
+
metrics_filename = epoch_dir / "metrics_epoch.json"
|
| 341 |
+
with open(metrics_filename, "w", encoding="utf-8") as f:
|
| 342 |
+
json.dump(metrics, f, indent=4, ensure_ascii=False)
|
| 343 |
+
|
| 344 |
+
# save best
|
| 345 |
+
best_dir = serialization_dir / "best"
|
| 346 |
+
if best_idx_epoch == idx_epoch:
|
| 347 |
+
if best_dir.exists():
|
| 348 |
+
shutil.rmtree(best_dir)
|
| 349 |
+
shutil.copytree(epoch_dir, best_dir)
|
| 350 |
+
|
| 351 |
+
# early stop
|
| 352 |
+
early_stop_flag = False
|
| 353 |
+
if best_idx_epoch == idx_epoch:
|
| 354 |
+
patience_count = 0
|
| 355 |
+
else:
|
| 356 |
+
patience_count += 1
|
| 357 |
+
if patience_count >= args.patience:
|
| 358 |
+
early_stop_flag = True
|
| 359 |
+
|
| 360 |
+
# early stop
|
| 361 |
+
if early_stop_flag:
|
| 362 |
+
break
|
| 363 |
+
return
|
| 364 |
+
|
| 365 |
+
|
| 366 |
+
if __name__ == "__main__":
|
| 367 |
+
main()
|
examples/sound_classification_by_lstm/yaml/lstm_classifier-4-ch64.yaml
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
model_name: "lstm_audio_classifier"
|
| 2 |
+
|
| 3 |
+
mel_spectrogram_param:
|
| 4 |
+
sample_rate: 8000
|
| 5 |
+
n_fft: 512
|
| 6 |
+
win_length: 200
|
| 7 |
+
hop_length: 80
|
| 8 |
+
f_min: 10
|
| 9 |
+
f_max: 3800
|
| 10 |
+
window_fn: hamming
|
| 11 |
+
n_mels: 80
|
| 12 |
+
|
| 13 |
+
lstm_layer_param:
|
| 14 |
+
input_size: 80
|
| 15 |
+
hidden_size: 64
|
| 16 |
+
num_layers: 3
|
| 17 |
+
dropout: 0.2
|
| 18 |
+
pool_layer: last
|
| 19 |
+
|
| 20 |
+
cls_head_param:
|
| 21 |
+
input_dim: 64
|
| 22 |
+
num_layers: 1
|
| 23 |
+
hidden_dims:
|
| 24 |
+
- 32
|
| 25 |
+
activations: relu
|
| 26 |
+
dropout: 0.1
|
| 27 |
+
num_labels: 4
|