Spaces:
				
			
			
	
			
			
		Sleeping
		
	
	
	
			
			
	
	
	
	
		
		
		Sleeping
		
	update
Browse filesThis view is limited to 50 files because it contains too many changes.  
							See raw diff
- .gitignore +2 -1
- examples/download_wav/Temp Query 5_20251008-093912.csv +101 -0
- examples/download_wav/step_1_download_wav.py +12 -37
- examples/download_wav/step_2_to_1ch.py +12 -8
- examples/download_wav/step_3_split_two_second_wav.py +14 -7
- examples/lstm_badcase_filter/step_1_badcase_filter.py +233 -0
- examples/online_model_test/step_1_predict.py +6 -4
- examples/online_model_test/step_2_audio_filter.py +6 -2
- examples/online_model_test/step_3_make_test.py +2 -2
- examples/online_model_test/test.py +84 -0
- examples/{vm_sound_classification → sound_classification_by_cnn}/requirements.txt +0 -0
- examples/{vm_sound_classification → sound_classification_by_cnn}/run.sh +4 -4
- examples/{vm_sound_classification → sound_classification_by_cnn}/run_batch.sh +66 -66
- examples/{vm_sound_classification → sound_classification_by_cnn}/step_1_prepare_data.py +0 -0
- examples/{vm_sound_classification → sound_classification_by_cnn}/step_2_make_vocabulary.py +0 -0
- examples/{vm_sound_classification → sound_classification_by_cnn}/step_3_train_model.py +1 -1
- examples/{vm_sound_classification → sound_classification_by_cnn}/step_4_evaluation_model.py +0 -0
- examples/{vm_sound_classification → sound_classification_by_cnn}/step_5_export_models.py +0 -0
- examples/{vm_sound_classification → sound_classification_by_cnn}/step_6_infer.py +0 -0
- examples/{vm_sound_classification → sound_classification_by_cnn}/step_7_test_model.py +0 -0
- examples/{vm_sound_classification → sound_classification_by_cnn}/stop.sh +0 -0
- examples/{vm_sound_classification → sound_classification_by_cnn}/yaml/conv2d-classifier-2-ch16.yaml +0 -0
- examples/{vm_sound_classification → sound_classification_by_cnn}/yaml/conv2d-classifier-2-ch32.yaml +0 -0
- examples/{vm_sound_classification → sound_classification_by_cnn}/yaml/conv2d-classifier-2-ch4.yaml +0 -0
- examples/{vm_sound_classification → sound_classification_by_cnn}/yaml/conv2d-classifier-2-ch8.yaml +0 -0
- examples/{vm_sound_classification → sound_classification_by_cnn}/yaml/conv2d-classifier-3-ch16.yaml +0 -0
- examples/{vm_sound_classification → sound_classification_by_cnn}/yaml/conv2d-classifier-3-ch32.yaml +0 -0
- examples/{vm_sound_classification → sound_classification_by_cnn}/yaml/conv2d-classifier-3-ch4.yaml +0 -0
- examples/{vm_sound_classification → sound_classification_by_cnn}/yaml/conv2d-classifier-3-ch8.yaml +0 -0
- examples/{vm_sound_classification → sound_classification_by_cnn}/yaml/conv2d-classifier-4-ch16.yaml +0 -0
- examples/{vm_sound_classification → sound_classification_by_cnn}/yaml/conv2d-classifier-4-ch32.yaml +0 -0
- examples/{vm_sound_classification → sound_classification_by_cnn}/yaml/conv2d-classifier-4-ch4.yaml +0 -0
- examples/{vm_sound_classification → sound_classification_by_cnn}/yaml/conv2d-classifier-4-ch8.yaml +0 -0
- examples/{vm_sound_classification → sound_classification_by_cnn}/yaml/conv2d-classifier-8-ch16.yaml +0 -0
- examples/{vm_sound_classification → sound_classification_by_cnn}/yaml/conv2d-classifier-8-ch32.yaml +0 -0
- examples/{vm_sound_classification → sound_classification_by_cnn}/yaml/conv2d-classifier-8-ch4.yaml +0 -0
- examples/{vm_sound_classification → sound_classification_by_cnn}/yaml/conv2d-classifier-8-ch8.yaml +0 -0
- examples/{vm_sound_classification8 → sound_classification_by_cnn_union}/requirements.txt +0 -0
- examples/{vm_sound_classification8 → sound_classification_by_cnn_union}/run.sh +0 -0
- examples/{vm_sound_classification8 → sound_classification_by_cnn_union}/step_1_prepare_data.py +0 -0
- examples/{vm_sound_classification8 → sound_classification_by_cnn_union}/step_2_make_vocabulary.py +0 -0
- examples/{vm_sound_classification8 → sound_classification_by_cnn_union}/step_3_train_global_model.py +0 -0
- examples/{vm_sound_classification8 → sound_classification_by_cnn_union}/step_4_train_country_model.py +0 -0
- examples/{vm_sound_classification8 → sound_classification_by_cnn_union}/step_5_train_union.py +0 -0
- examples/{vm_sound_classification8 → sound_classification_by_cnn_union}/stop.sh +0 -0
- examples/sound_classification_by_lstm/run.sh +197 -0
- examples/sound_classification_by_lstm/step_1_prepare_data.py +193 -0
- examples/sound_classification_by_lstm/step_2_make_vocabulary.py +50 -0
- examples/sound_classification_by_lstm/step_3_train_model.py +367 -0
- examples/sound_classification_by_lstm/yaml/lstm_classifier-4-ch64.yaml +27 -0
    	
        .gitignore
    CHANGED
    
    | @@ -15,6 +15,7 @@ | |
| 15 | 
             
            /trained_models/
         | 
| 16 | 
             
            /temp/
         | 
| 17 |  | 
|  | |
|  | |
| 18 | 
             
            #**/*.wav
         | 
| 19 | 
             
            **/*.xlsx
         | 
| 20 | 
            -
            **/*.onnx
         | 
|  | |
| 15 | 
             
            /trained_models/
         | 
| 16 | 
             
            /temp/
         | 
| 17 |  | 
| 18 | 
            +
            **/*.csv
         | 
| 19 | 
            +
            **/*.onnx
         | 
| 20 | 
             
            #**/*.wav
         | 
| 21 | 
             
            **/*.xlsx
         | 
|  | 
    	
        examples/download_wav/Temp Query 5_20251008-093912.csv
    ADDED
    
    | @@ -0,0 +1,101 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            date,overdue_term,id,case_id,credit_user_id,call_start_timestamp,call_end_timestamp,thirdpart_download_url
         | 
| 2 | 
            +
            11/10/2025,M3,201577107,62145483,2.05158E+18,1760156453,1760156464,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251011/21962/6b76d306-b767-44e5-be9a-0a15d1165113.mp3
         | 
| 3 | 
            +
            11/10/2025,M3,201552895,61647547,2.04871E+18,1760150223,1760150235,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251011/21962/91eb4d93-aaaf-4a22-b1b5-93f90790f360.mp3
         | 
| 4 | 
            +
            11/10/2025,M1,201571248,64869969,1.63814E+18,1760154872,1760154878,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251011/21962/9feab432-a05f-4c12-a7a5-1de81c5c5552.mp3
         | 
| 5 | 
            +
            10/10/2025,M5,201481243,57774660,1.86995E+18,1760093720,1760093736,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251010/21962/8ca27707-73e9-41a9-a531-f84011a2d021.mp3
         | 
| 6 | 
            +
            11/10/2025,M6,201602065,56556981,1.96434E+18,1760162403,1760162411,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251011/21962/23edb55b-d7d7-496d-92d9-27be9a8d0f06.mp3
         | 
| 7 | 
            +
            10/10/2025,M3,201432876,62937736,1.71926E+18,1760081217,1760081223,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251010/21962/d64b9511-1ada-435c-bf1d-7ff8edd194d1.mp3
         | 
| 8 | 
            +
            10/10/2025,M2,201418064,63818662,2.06059E+18,1760078017,1760078023,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251010/21962/7a53b0cf-d4a8-496b-8533-578e1b3c8050.mp3
         | 
| 9 | 
            +
            11/10/2025,M1,201546922,65604125,1.86304E+18,1760149167,1760149175,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251011/21962/e6adf4e4-269c-4668-955d-7b3dd1c60736.mp3
         | 
| 10 | 
            +
            10/10/2025,M3,201430098,61807602,1.85118E+18,1760080774,1760080785,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251010/21962/b2b8e1e5-d92d-424d-9150-7af7506305c4.mp3
         | 
| 11 | 
            +
            10/10/2025,M1,201448566,64796208,1.65278E+18,1760085408,1760085415,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251010/21962/09d6248c-111d-4b73-910f-e218049185d8.mp3
         | 
| 12 | 
            +
            11/10/2025,M4,201571566,60538522,1.88122E+18,1760154923,1760154930,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251011/21962/944211c2-e492-4889-b655-e508fdd5879d.mp3
         | 
| 13 | 
            +
            11/10/2025,M1,201566967,65843234,2.02107E+18,1760154065,1760154073,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251011/21962/bfa8591b-e527-45e9-ae57-7f74f9e7302b.mp3
         | 
| 14 | 
            +
            10/10/2025,M2,201447321,64267309,1.56498E+18,1760085020,1760085033,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251010/21962/4bb8e69a-4c6c-4828-857a-ce0e43cc75a1.mp3
         | 
| 15 | 
            +
            11/10/2025,M1,201568415,65114574,4883832,1760154398,1760154405,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251011/21962/51e805c9-5cc5-490b-bae2-7d5eab6f343c.mp3
         | 
| 16 | 
            +
            11/10/2025,M2,201605984,63943082,1.8374E+18,1760163009,1760163030,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251011/21962/98af45d4-16cd-4eaf-9b91-05c7143a26bc.mp3
         | 
| 17 | 
            +
            10/10/2025,M1,201419656,66515322,1.49814E+17,1760078339,1760078345,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251010/21962/91f43558-6567-43d9-a709-ffa3173c81b4.mp3
         | 
| 18 | 
            +
            10/10/2025,M2,201427406,63880041,1.56918E+18,1760080267,1760080275,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251010/21962/375fdc20-428e-4658-aeb7-cb9cca17c534.mp3
         | 
| 19 | 
            +
            11/10/2025,M1,201575782,64887894,1.73042E+18,1760156066,1760156082,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251011/21962/b2d6b5a2-3d7f-4fc7-abf6-258cd7a7de5f.mp3
         | 
| 20 | 
            +
            10/10/2025,M3,201418794,62368390,1.94558E+18,1760078142,1760078156,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251010/21962/ba673b33-8496-4901-8f2f-8083802a5213.mp3
         | 
| 21 | 
            +
            10/10/2025,M1,201424572,66395236,1203507,1760079797,1760079804,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251010/21962/a95596c0-8b46-4333-b126-6c4c11ca41fc.mp3
         | 
| 22 | 
            +
            11/10/2025,M2,201571228,64248917,1.88019E+18,1760154871,1760154880,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251011/21962/e4eb3384-2d75-4563-b9a7-7bb3256a2aae.mp3
         | 
| 23 | 
            +
            11/10/2025,M4,201570642,60447265,1.98507E+18,1760154782,1760154787,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251011/21962/a5440c2e-3fc5-43b2-8ecf-d4bde44f62e8.mp3
         | 
| 24 | 
            +
            10/10/2025,M5,201453357,58652419,1.85737E+18,1760086342,1760086350,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251010/21962/68def6c9-130e-4705-b4b6-19d5d8b4b27d.mp3
         | 
| 25 | 
            +
            11/10/2025,M6,201573623,57234397,1.97251E+18,1760155303,1760155313,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251011/21962/0ba67a44-c94a-4d68-a6ba-d003cf4b57c8.mp3
         | 
| 26 | 
            +
            10/10/2025,M5,201424683,57553385,1.86241E+18,1760079814,1760079831,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251010/21962/50c26933-40a1-45bc-baf0-eb7b4c268ffb.mp3
         | 
| 27 | 
            +
            11/10/2025,M1,201570171,66334366,1.8276E+18,1760154717,1760154723,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251011/21962/ecbb5b43-19cf-44e2-8ee4-ea131c706421.mp3
         | 
| 28 | 
            +
            10/10/2025,M4,201451276,59840709,2.04014E+18,1760085947,1760085952,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251010/21962/6059e9c2-2f3c-4cef-a536-4796436b9765.mp3
         | 
| 29 | 
            +
            10/10/2025,M4,201432508,59867441,1.91396E+18,1760081156,1760081172,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251010/21962/c0814466-4e13-4ef1-b4be-2fbe66eebfd8.mp3
         | 
| 30 | 
            +
            11/10/2025,M5,201612109,58418373,1.87946E+18,1760163975,1760163983,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251011/21962/c7c54616-575a-4dc9-9820-fea245211933.mp3
         | 
| 31 | 
            +
            10/10/2025,M2,201432653,64650851,1.74295E+18,1760081176,1760081192,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251010/21962/e8cc82f1-ded6-484f-9e6a-b9bf514eda04.mp3
         | 
| 32 | 
            +
            11/10/2025,M1,201580231,65755142,1.96545E+18,1760157408,1760157415,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251011/21962/d2ed8a99-d8a4-4c0a-9b30-0a4f97e8db6e.mp3
         | 
| 33 | 
            +
            10/10/2025,M3,201430023,61812734,7400607,1760080766,1760080780,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251010/21962/f693b1d5-dc99-43d9-a730-2040ca645f17.mp3
         | 
| 34 | 
            +
            10/10/2025,M3,201450322,62009884,2.007E+18,1760085762,1760085768,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251010/21962/4915b87b-1169-41f7-af8c-6509a66dfbe6.mp3
         | 
| 35 | 
            +
            10/10/2025,M3,201431281,62172812,2.05076E+18,1760080963,1760080975,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251010/21962/dc9c5331-04bb-4cbf-b789-61b0b811d6b6.mp3
         | 
| 36 | 
            +
            10/10/2025,M5,201430080,58314791,1.99801E+18,1760080772,1760080777,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251010/21962/e571b6f4-6471-4311-81ed-cd1af7e55e07.mp3
         | 
| 37 | 
            +
            11/10/2025,M4,201538284,59471661,2.03412E+18,1760147632,1760147638,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251011/21962/cfbe5f77-02b3-4a04-8948-9ca4237c3abc.mp3
         | 
| 38 | 
            +
            10/10/2025,M1,201447219,65817559,2.02777E+18,1760085001,1760085007,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251010/21962/49dad1dd-d681-410a-87df-649eca036ff0.mp3
         | 
| 39 | 
            +
            10/10/2025,M1,201481818,66043196,1.93698E+18,1760093888,1760093894,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251010/21962/93c19c1f-93fa-41ec-8f57-54a606a7f4a4.mp3
         | 
| 40 | 
            +
            10/10/2025,M1,201485519,66563695,2.074E+18,1760095020,1760095034,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251010/21962/e8c214bd-6261-42a7-863b-3bcbd82f081e.mp3
         | 
| 41 | 
            +
            11/10/2025,M5,201595914,59014301,1.5286E+18,1760161399,1760161415,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251011/21962/095bcf3f-1807-4c71-a997-ce6806b4da99.mp3
         | 
| 42 | 
            +
            11/10/2025,M2,201533318,63403949,1.88317E+18,1760146862,1760146871,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251011/21962/8ae1702d-eca8-4e77-b39d-fd119c72499e.mp3
         | 
| 43 | 
            +
            11/10/2025,M4,201576553,60295505,1.80115E+18,1760156314,1760156320,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251011/21962/47bad51d-65fb-4795-9211-a8a0434b95ad.mp3
         | 
| 44 | 
            +
            10/10/2025,M1,201485741,65280144,2.07517E+18,1760095107,1760095114,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251010/21962/87d8ccb1-f7e9-4377-a1d8-7bf9228e0c3f.mp3
         | 
| 45 | 
            +
            10/10/2025,M1,201431349,64854591,1.58546E+18,1760080970,1760080978,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251010/21962/6f983610-0fd3-4da6-be49-1cc50f205618.mp3
         | 
| 46 | 
            +
            10/10/2025,M5,201487648,57318618,1.64505E+18,1760095790,1760095797,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251010/21962/254f796e-b955-4a5e-a190-1d3579474645.mp3
         | 
| 47 | 
            +
            11/10/2025,M1,201577796,64963614,1.86777E+18,1760156648,1760156657,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251011/21962/b382a477-933e-43b1-8759-a2b4e17e92b2.mp3
         | 
| 48 | 
            +
            11/10/2025,M3,201533254,62192769,2.023E+18,1760146812,1760146817,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251011/21962/ac78c5f9-7109-45fc-87da-5e275e0159f4.mp3
         | 
| 49 | 
            +
            10/10/2025,M1,201428974,65690748,1.89974E+18,1760080550,1760080557,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251010/21962/b4e53f33-4c55-4b16-b786-6619332f47fc.mp3
         | 
| 50 | 
            +
            11/10/2025,M2,201546294,64684883,1.93667E+18,1760149074,1760149090,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251011/21962/337edca5-80fa-4f9d-8820-3fb333b384d5.mp3
         | 
| 51 | 
            +
            10/10/2025,M1,201475668,66214001,1.80957E+18,1760092690,1760092695,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251010/21962/21e2b2ec-2475-4692-9b67-2453e262e77b.mp3
         | 
| 52 | 
            +
            10/10/2025,M5,201459599,57741938,6633631,1760087612,1760087617,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251010/21962/02bd6e79-efc1-4cfd-bee9-12967c844735.mp3
         | 
| 53 | 
            +
            11/10/2025,M3,201551060,61444707,1.94443E+18,1760149891,1760149899,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251011/21962/700c2e32-c5bd-48f3-8085-0617694963dd.mp3
         | 
| 54 | 
            +
            10/10/2025,M4,201453055,59345041,1.68365E+18,1760086275,1760086282,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251010/21962/4f1ef95b-3432-41d3-8970-1698504ba010.mp3
         | 
| 55 | 
            +
            10/10/2025,M1,201426891,66520128,1728931,1760080190,1760080196,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251010/21962/ec7180ac-2948-49a3-b709-f60b80dfee27.mp3
         | 
| 56 | 
            +
            12/10/2025,M1,201704571,66237684,1.77477E+18,1760229906,1760229911,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251012/21962/b70ed0f3-145f-49a6-9dbb-aa695e21d7de.mp3
         | 
| 57 | 
            +
            10/10/2025,M1,201457899,65109188,1.58483E+18,1760087330,1760087339,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251010/21962/c5f910f6-6a5e-4bd0-8a64-f2805837558d.mp3
         | 
| 58 | 
            +
            11/10/2025,M2,201537965,63920995,2.04332E+18,1760147578,1760147593,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251011/21962/38ff4d8b-4db3-4410-902e-3953699bf4eb.mp3
         | 
| 59 | 
            +
            11/10/2025,M1,201568081,66644507,1.92267E+18,1760154313,1760154318,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251011/21962/a801cfcc-40d2-4dde-9994-b709836df856.mp3
         | 
| 60 | 
            +
            11/10/2025,M3,201539641,62112487,1.75162E+18,1760147868,1760147875,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251011/21962/56ec2c07-3bf5-47ba-82c6-039bf094d6ca.mp3
         | 
| 61 | 
            +
            10/10/2025,M1,201483514,65958944,1.96386E+18,1760094328,1760094335,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251010/21962/edd080b5-2439-4079-a27f-2f0941217825.mp3
         | 
| 62 | 
            +
            10/10/2025,M5,201417598,57494166,1.59238E+18,1760077922,1760077928,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251010/21962/65750a23-6edc-479a-bea4-14ba9e43648e.mp3
         | 
| 63 | 
            +
            11/10/2025,M1,201528466,65224705,1.75014E+18,1760145272,1760145278,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251011/21962/5b63572f-79d2-4d22-9ded-4fa2e3cd372b.mp3
         | 
| 64 | 
            +
            10/10/2025,M5,201453641,58921447,1.92301E+18,1760086392,1760086405,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251010/21962/4a9b93d4-9878-4023-8dd2-40fcf9502a49.mp3
         | 
| 65 | 
            +
            11/10/2025,M1,201611955,65789335,1.88401E+18,1760163939,1760163946,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251011/21962/6fb4f49d-fe9c-4a64-8c74-b5649d7e8175.mp3
         | 
| 66 | 
            +
            10/10/2025,M1,201459093,66318002,1.61088E+18,1760087527,1760087538,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251010/21962/8a608622-e3d7-4a96-89ac-1300b7653c6f.mp3
         | 
| 67 | 
            +
            10/10/2025,M2,201418416,63100145,1.85044E+18,1760078077,1760078083,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251010/21962/f6ab0a88-2055-4b88-9fbd-7af4aef5731f.mp3
         | 
| 68 | 
            +
            11/10/2025,M3,201537163,61356706,2.04189E+18,1760147448,1760147453,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251011/21962/f1f6ad6a-7399-4fb2-8073-c83ef7093b6e.mp3
         | 
| 69 | 
            +
            10/10/2025,M3,201480897,61752670,1.99088E+18,1760093653,1760093662,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251010/21962/cdf7552c-a3a8-4cfc-a4b1-651f84141090.mp3
         | 
| 70 | 
            +
            11/10/2025,M2,201605821,63901708,2.06357E+18,1760162987,1760162993,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251011/21962/70b3816a-2e43-403a-97e0-dd3288596c71.mp3
         | 
| 71 | 
            +
            10/10/2025,M3,201457652,61356706,2.04189E+18,1760087292,1760087299,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251010/21962/d5f5598a-b93c-4fb8-8bb1-56acc9bb0033.mp3
         | 
| 72 | 
            +
            10/10/2025,M2,201480118,64077815,1.99009E+18,1760093518,1760093526,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251010/21962/8cac232e-2ed4-4506-8bcf-2a8e4bca91b5.mp3
         | 
| 73 | 
            +
            11/10/2025,M1,201551882,65617862,1.65598E+18,1760150016,1760150024,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251011/21962/e71c8472-c3f7-486e-a388-1196836899bf.mp3
         | 
| 74 | 
            +
            10/10/2025,M5,201417451,58993884,1.18898E+16,1760077893,1760077905,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251010/21962/2965b989-cf94-4dc4-afaa-5fda4ac4bb34.mp3
         | 
| 75 | 
            +
            11/10/2025,M5,201547506,58539902,1.92196E+18,1760149254,1760149265,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251011/21962/1cb5b911-69c9-4b5a-ab2f-e4bad61dd0be.mp3
         | 
| 76 | 
            +
            11/10/2025,M1,201606566,66579640,1.92316E+18,1760163098,1760163103,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251011/21962/2e5ff340-8197-4069-8533-0d47a221dc57.mp3
         | 
| 77 | 
            +
            11/10/2025,M2,201545849,63976411,2.01076E+18,1760148993,1760148999,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251011/21962/e7592b1f-d24d-4fb5-afb9-0d64f23719d5.mp3
         | 
| 78 | 
            +
            10/10/2025,M1,201487535,66304049,1.96729E+18,1760095749,1760095754,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251010/21962/03b31201-d6f7-4246-80ef-ec902f93e6bf.mp3
         | 
| 79 | 
            +
            10/10/2025,M1,201458971,66590224,2.06508E+18,1760087508,1760087515,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251010/21962/01013ca6-1659-4613-8ae7-5d79372d4464.mp3
         | 
| 80 | 
            +
            11/10/2025,M4,201548032,59720355,1.54183E+18,1760149338,1760149343,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251011/21962/61bafa17-b411-4d1a-b9bd-1a76c5087b9e.mp3
         | 
| 81 | 
            +
            10/10/2025,M5,201430001,57789932,1.99701E+18,1760080762,1760080768,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251010/21962/aa25ffd1-82a2-4048-938d-0adc335cec41.mp3
         | 
| 82 | 
            +
            11/10/2025,M1,201596095,66001014,1.94845E+18,1760161430,1760161436,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251011/21962/76c67081-af66-4a80-b531-25b14a8e0443.mp3
         | 
| 83 | 
            +
            11/10/2025,M3,201549933,62873165,1.87383E+18,1760149739,1760149747,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251011/21962/fd0caad7-72fd-4152-a8ed-58253e946567.mp3
         | 
| 84 | 
            +
            10/10/2025,M5,201447596,58417708,1.9626E+18,1760085087,1760085092,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251010/21962/0f8f64c4-2587-4501-abb8-23e0e5389635.mp3
         | 
| 85 | 
            +
            11/10/2025,M1,201596157,65991243,1.88004E+18,1760161446,1760161452,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251011/21962/8f731992-cbc6-45e1-8203-5628d51a6e45.mp3
         | 
| 86 | 
            +
            10/10/2025,M2,201391828,63462209,1.1835E+16,1760067513,1760067535,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251010/21962/2c641568-ebf6-4989-abeb-e2bc404d79e8.mp3
         | 
| 87 | 
            +
            11/10/2025,M4,201579066,60241526,1.95443E+18,1760157061,1760157070,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251011/21962/34c79266-7c59-4e26-b936-05d64716fa79.mp3
         | 
| 88 | 
            +
            11/10/2025,M3,201539123,61552513,1.65655E+18,1760147755,1760147762,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251011/21962/1dc2d33a-f080-4cb4-ad66-fb50761b500a.mp3
         | 
| 89 | 
            +
            11/10/2025,M5,201607636,57899370,1.93241E+18,1760163267,1760163287,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251011/21962/d8e13804-692a-4cf7-b4e2-781384a1d559.mp3
         | 
| 90 | 
            +
            10/10/2025,M4,201426209,60181850,1.79511E+18,1760080075,1760080081,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251010/21962/8f81c559-7392-46ff-8ebb-1a6edc41381c.mp3
         | 
| 91 | 
            +
            11/10/2025,M1,201535197,66655594,1.66159E+18,1760147133,1760147139,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251011/21962/d793a0e3-bd7b-44f3-a1e2-bc1eb0908c1a.mp3
         | 
| 92 | 
            +
            11/10/2025,M4,201613127,61191667,1.78852E+18,1760164234,1760164240,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251011/21962/4e794fb2-01bf-41ff-843f-945b2b1ec9df.mp3
         | 
| 93 | 
            +
            10/10/2025,M3,201456582,61353852,1.70556E+18,1760087088,1760087093,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251010/21962/5fe67f28-5f86-43bb-a87f-589b75118d56.mp3
         | 
| 94 | 
            +
            11/10/2025,M2,201536974,63373730,2.02636E+18,1760147420,1760147433,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251011/21962/cf2bf8cc-e2af-478a-96e4-686c59a98d4d.mp3
         | 
| 95 | 
            +
            11/10/2025,M6,201598303,57270639,1.57805E+18,1760161833,1760161850,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251011/21962/07e2de01-8040-4942-8586-77d6eb38b64f.mp3
         | 
| 96 | 
            +
            11/10/2025,M4,201577614,60545450,1.95248E+18,1760156607,1760156635,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251011/21962/53f6d6eb-c22e-4935-9d3c-941969a0241f.mp3
         | 
| 97 | 
            +
            10/10/2025,M1,201451460,66451819,1406890,1760085972,1760085978,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251010/21962/43061915-f8e0-405b-9be0-bd3826d0aa69.mp3
         | 
| 98 | 
            +
            11/10/2025,M5,201566778,57480954,1.79569E+18,1760154035,1760154041,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251011/21962/f737520a-d62f-4c71-a81c-bfbcbd2887d3.mp3
         | 
| 99 | 
            +
            11/10/2025,M5,201579474,58000396,1.17763E+16,1760157205,1760157215,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251011/21962/b8d5a9b1-13cf-4667-8271-1b562de5dd22.mp3
         | 
| 100 | 
            +
            11/10/2025,M1,201547564,66391023,987018,1760149263,1760149269,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251011/21962/434cea47-98a9-4f24-b29e-bee36e9f9832.mp3
         | 
| 101 | 
            +
            11/10/2025,M5,201538978,57340689,1.62497E+18,1760147736,1760147755,https://idn1.obs.ap-southeast-4.myhuaweicloud.com/20251011/21962/a74e253e-34cf-42df-9c3c-c8f6a45b994e.mp3
         | 
    	
        examples/download_wav/step_1_download_wav.py
    CHANGED
    
    | @@ -31,7 +31,7 @@ def get_args(): | |
| 31 | 
             
                )
         | 
| 32 | 
             
                parser.add_argument(
         | 
| 33 | 
             
                    "--output_dir",
         | 
| 34 | 
            -
                    default=(project_path / "data/calling/ | 
| 35 | 
             
                    type=str
         | 
| 36 | 
             
                )
         | 
| 37 | 
             
                args = parser.parse_args()
         | 
| @@ -39,37 +39,7 @@ def get_args(): | |
| 39 |  | 
| 40 |  | 
| 41 | 
             
            excel_file_str = """
         | 
| 42 | 
            -
             | 
| 43 | 
            -
            AIAgent-CallLog-20250929134959.xlsx
         | 
| 44 | 
            -
            AIAgent-CallLog-20250929135030.xlsx
         | 
| 45 | 
            -
            AIAgent-CallLog-20250929135052.xlsx
         | 
| 46 | 
            -
            AIAgent-CallLog-20250929135122.xlsx
         | 
| 47 | 
            -
            AIAgent-CallLog-20250929135134.xlsx
         | 
| 48 | 
            -
            AIAgent-CallLog-20250929135209.xlsx
         | 
| 49 | 
            -
            AIAgent-CallLog-20250929135219.xlsx
         | 
| 50 | 
            -
            AIAgent-CallLog-20250929135247.xlsx
         | 
| 51 | 
            -
            AIAgent-CallLog-20250929135300.xlsx
         | 
| 52 | 
            -
            AIAgent-CallLog-20250929135311.xlsx
         | 
| 53 | 
            -
            AIAgent-CallLog-20250929135335.xlsx
         | 
| 54 | 
            -
            AIAgent-CallLog-20250929135344.xlsx
         | 
| 55 | 
            -
            AIAgent-CallLog-20250929135355.xlsx
         | 
| 56 | 
            -
            AIAgent-CallLog-20250929135443.xlsx
         | 
| 57 | 
            -
            AIAgent-CallLog-20250929135452.xlsx
         | 
| 58 | 
            -
            AIAgent-CallLog-20250929135501.xlsx
         | 
| 59 | 
            -
            AIAgent-CallLog-20250929135537.xlsx
         | 
| 60 | 
            -
            AIAgent-CallLog-20250929135544.xlsx
         | 
| 61 | 
            -
            AIAgent-CallLog-20250929135554.xlsx
         | 
| 62 | 
            -
            AIAgent-CallLog-20250929135630.xlsx
         | 
| 63 | 
            -
            AIAgent-CallLog-20250929135701.xlsx
         | 
| 64 | 
            -
            AIAgent-CallLog-20250929135710.xlsx
         | 
| 65 | 
            -
            AIAgent-CallLog-20250929135716.xlsx
         | 
| 66 | 
            -
            AIAgent-CallLog-20250929135755.xlsx
         | 
| 67 | 
            -
            AIAgent-CallLog-20250929135800.xlsx
         | 
| 68 | 
            -
            AIAgent-CallLog-20250929135809.xlsx
         | 
| 69 | 
            -
            AIAgent-CallLog-20250929135842.xlsx
         | 
| 70 | 
            -
            AIAgent-CallLog-20250929135849.xlsx
         | 
| 71 | 
            -
            AIAgent-CallLog-20250929135858.xlsx
         | 
| 72 | 
            -
            AIAgent-CallLog-20250929135909.xlsx
         | 
| 73 | 
             
            """
         | 
| 74 |  | 
| 75 |  | 
| @@ -101,11 +71,16 @@ def main(): | |
| 101 | 
             
                        continue
         | 
| 102 | 
             
                    excel_file = excel_file_dir / name
         | 
| 103 |  | 
| 104 | 
            -
                    df = pd.read_excel(excel_file.as_posix())
         | 
|  | |
| 105 | 
             
                    for i, row in tqdm(df.iterrows()):
         | 
| 106 | 
            -
                        call_date =  | 
| 107 | 
            -
                         | 
| 108 | 
            -
                         | 
|  | |
|  | |
|  | |
|  | |
| 109 | 
             
                        if pd.isna(record_url):
         | 
| 110 | 
             
                            continue
         | 
| 111 |  | 
| @@ -137,7 +112,7 @@ def main(): | |
| 137 | 
             
                        if resp.status_code != 200:
         | 
| 138 | 
             
                            raise AssertionError("status_code: {}; text: {}".format(resp.status_code, resp.text))
         | 
| 139 |  | 
| 140 | 
            -
                        filename = output_dir / "{}. | 
| 141 | 
             
                        with open(filename.as_posix(), "wb") as f:
         | 
| 142 | 
             
                            f.write(resp.content)
         | 
| 143 |  | 
|  | |
| 31 | 
             
                )
         | 
| 32 | 
             
                parser.add_argument(
         | 
| 33 | 
             
                    "--output_dir",
         | 
| 34 | 
            +
                    default=(project_path / "data/calling/62/wav_2ch").as_posix(),
         | 
| 35 | 
             
                    type=str
         | 
| 36 | 
             
                )
         | 
| 37 | 
             
                args = parser.parse_args()
         | 
|  | |
| 39 |  | 
| 40 |  | 
| 41 | 
             
            excel_file_str = """
         | 
| 42 | 
            +
            Temp Query 5_20251008-093912.csv
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 43 | 
             
            """
         | 
| 44 |  | 
| 45 |  | 
|  | |
| 71 | 
             
                        continue
         | 
| 72 | 
             
                    excel_file = excel_file_dir / name
         | 
| 73 |  | 
| 74 | 
            +
                    # df = pd.read_excel(excel_file.as_posix())
         | 
| 75 | 
            +
                    df = pd.read_csv(excel_file.as_posix())
         | 
| 76 | 
             
                    for i, row in tqdm(df.iterrows()):
         | 
| 77 | 
            +
                        call_date = "2025-10-12 00:00:00"
         | 
| 78 | 
            +
                        record_url = row["thirdpart_download_url"]
         | 
| 79 | 
            +
                        call_id = Path(record_url).stem
         | 
| 80 | 
            +
             | 
| 81 | 
            +
                        # call_date = row["Attempt time"]
         | 
| 82 | 
            +
                        # call_id = row["Call ID"]
         | 
| 83 | 
            +
                        # record_url = row["Recording file"]
         | 
| 84 | 
             
                        if pd.isna(record_url):
         | 
| 85 | 
             
                            continue
         | 
| 86 |  | 
|  | |
| 112 | 
             
                        if resp.status_code != 200:
         | 
| 113 | 
             
                            raise AssertionError("status_code: {}; text: {}".format(resp.status_code, resp.text))
         | 
| 114 |  | 
| 115 | 
            +
                        filename = output_dir / "{}.mp3".format(call_id)
         | 
| 116 | 
             
                        with open(filename.as_posix(), "wb") as f:
         | 
| 117 | 
             
                            f.write(resp.content)
         | 
| 118 |  | 
    	
        examples/download_wav/step_2_to_1ch.py
    CHANGED
    
    | @@ -3,8 +3,9 @@ | |
| 3 | 
             
            import argparse
         | 
| 4 | 
             
            import os
         | 
| 5 | 
             
            from pathlib import Path
         | 
| 6 | 
            -
            import time
         | 
| 7 |  | 
|  | |
|  | |
| 8 | 
             
            from scipy.io import wavfile
         | 
| 9 | 
             
            from tqdm import tqdm
         | 
| 10 |  | 
| @@ -16,12 +17,12 @@ def get_args(): | |
| 16 |  | 
| 17 | 
             
                parser.add_argument(
         | 
| 18 | 
             
                    "--audio_dir",
         | 
| 19 | 
            -
                    default=(project_path / "data/calling/ | 
| 20 | 
             
                    type=str
         | 
| 21 | 
             
                )
         | 
| 22 | 
             
                parser.add_argument(
         | 
| 23 | 
             
                    "--output_dir",
         | 
| 24 | 
            -
                    default=(project_path / "data/calling/ | 
| 25 | 
             
                    type=str
         | 
| 26 | 
             
                )
         | 
| 27 | 
             
                args = parser.parse_args()
         | 
| @@ -36,13 +37,13 @@ def main(): | |
| 36 | 
             
                output_dir.mkdir(parents=True, exist_ok=True)
         | 
| 37 |  | 
| 38 | 
             
                finished = set()
         | 
| 39 | 
            -
                for filename in tqdm(list(output_dir.glob("*. | 
| 40 | 
             
                    splits = filename.stem.split("_")
         | 
| 41 | 
             
                    call_id = splits[3]
         | 
| 42 | 
             
                    finished.add(call_id)
         | 
| 43 | 
             
                print(f"finished count: {len(finished)}")
         | 
| 44 |  | 
| 45 | 
            -
                for filename in tqdm(list(audio_dir.glob("*. | 
| 46 | 
             
                    call_id = filename.stem
         | 
| 47 |  | 
| 48 | 
             
                    if call_id in finished:
         | 
| @@ -51,16 +52,19 @@ def main(): | |
| 51 | 
             
                    finished.add(call_id)
         | 
| 52 |  | 
| 53 | 
             
                    try:
         | 
| 54 | 
            -
                        sample_rate, signal = wavfile.read(filename.as_posix())
         | 
|  | |
|  | |
| 55 | 
             
                    except UnboundLocalError as error:
         | 
| 56 | 
             
                        print(f"wavfile read failed. error type: {type(error)}, text: {str(error)}, filename: {filename.as_posix()}")
         | 
| 57 | 
             
                        raise error
         | 
| 58 | 
             
                    if sample_rate != 8000:
         | 
| 59 | 
             
                        raise AssertionError
         | 
| 60 |  | 
| 61 | 
            -
                    signal = signal[:, 0]
         | 
|  | |
| 62 |  | 
| 63 | 
            -
                    to_filename = output_dir / f"active_media_r_{call_id} | 
| 64 | 
             
                    try:
         | 
| 65 | 
             
                        wavfile.write(
         | 
| 66 | 
             
                            to_filename.as_posix(),
         | 
|  | |
| 3 | 
             
            import argparse
         | 
| 4 | 
             
            import os
         | 
| 5 | 
             
            from pathlib import Path
         | 
|  | |
| 6 |  | 
| 7 | 
            +
            import librosa
         | 
| 8 | 
            +
            import numpy as np
         | 
| 9 | 
             
            from scipy.io import wavfile
         | 
| 10 | 
             
            from tqdm import tqdm
         | 
| 11 |  | 
|  | |
| 17 |  | 
| 18 | 
             
                parser.add_argument(
         | 
| 19 | 
             
                    "--audio_dir",
         | 
| 20 | 
            +
                    default=(project_path / "data/calling/62/wav_2ch").as_posix(),
         | 
| 21 | 
             
                    type=str
         | 
| 22 | 
             
                )
         | 
| 23 | 
             
                parser.add_argument(
         | 
| 24 | 
             
                    "--output_dir",
         | 
| 25 | 
            +
                    default=(project_path / "data/calling/62/wav_1ch").as_posix(),
         | 
| 26 | 
             
                    type=str
         | 
| 27 | 
             
                )
         | 
| 28 | 
             
                args = parser.parse_args()
         | 
|  | |
| 37 | 
             
                output_dir.mkdir(parents=True, exist_ok=True)
         | 
| 38 |  | 
| 39 | 
             
                finished = set()
         | 
| 40 | 
            +
                for filename in tqdm(list(output_dir.glob("*.mp3"))):
         | 
| 41 | 
             
                    splits = filename.stem.split("_")
         | 
| 42 | 
             
                    call_id = splits[3]
         | 
| 43 | 
             
                    finished.add(call_id)
         | 
| 44 | 
             
                print(f"finished count: {len(finished)}")
         | 
| 45 |  | 
| 46 | 
            +
                for filename in tqdm(list(audio_dir.glob("*.mp3"))):
         | 
| 47 | 
             
                    call_id = filename.stem
         | 
| 48 |  | 
| 49 | 
             
                    if call_id in finished:
         | 
|  | |
| 52 | 
             
                    finished.add(call_id)
         | 
| 53 |  | 
| 54 | 
             
                    try:
         | 
| 55 | 
            +
                        # sample_rate, signal = wavfile.read(filename.as_posix())
         | 
| 56 | 
            +
                        signal, sample_rate = librosa.load(filename.as_posix(), sr=8000, mono=False)
         | 
| 57 | 
            +
                        signal = np.array(signal * (1 << 15), dtype=np.int16)
         | 
| 58 | 
             
                    except UnboundLocalError as error:
         | 
| 59 | 
             
                        print(f"wavfile read failed. error type: {type(error)}, text: {str(error)}, filename: {filename.as_posix()}")
         | 
| 60 | 
             
                        raise error
         | 
| 61 | 
             
                    if sample_rate != 8000:
         | 
| 62 | 
             
                        raise AssertionError
         | 
| 63 |  | 
| 64 | 
            +
                    # signal = signal[:, 0]
         | 
| 65 | 
            +
                    signal = signal[0, :]
         | 
| 66 |  | 
| 67 | 
            +
                    to_filename = output_dir / f"active_media_r_{call_id}_id-ID_none.wav"
         | 
| 68 | 
             
                    try:
         | 
| 69 | 
             
                        wavfile.write(
         | 
| 70 | 
             
                            to_filename.as_posix(),
         | 
    	
        examples/download_wav/step_3_split_two_second_wav.py
    CHANGED
    
    | @@ -15,17 +15,21 @@ def get_args(): | |
| 15 |  | 
| 16 | 
             
                parser.add_argument(
         | 
| 17 | 
             
                    "--audio_dir",
         | 
| 18 | 
            -
                    default=(project_path / "data/calling/ | 
|  | |
|  | |
|  | |
| 19 | 
             
                    type=str
         | 
| 20 | 
             
                )
         | 
| 21 | 
             
                parser.add_argument(
         | 
| 22 | 
             
                    "--output_dir",
         | 
| 23 | 
            -
                    default=(project_path / "data/calling/358/wav_segmented").as_posix(),
         | 
|  | |
| 24 | 
             
                    type=str
         | 
| 25 | 
             
                )
         | 
| 26 | 
             
                parser.add_argument(
         | 
| 27 | 
             
                    "--first_n_seconds",
         | 
| 28 | 
            -
                    default= | 
| 29 | 
             
                    type=int
         | 
| 30 | 
             
                )
         | 
| 31 | 
             
                args = parser.parse_args()
         | 
| @@ -40,12 +44,16 @@ def main(): | |
| 40 | 
             
                output_dir.mkdir(parents=True, exist_ok=True)
         | 
| 41 |  | 
| 42 | 
             
                for filename in tqdm(list(audio_dir.glob("*.wav"))):
         | 
| 43 | 
            -
                     | 
|  | |
|  | |
|  | |
|  | |
| 44 | 
             
                    sample_rate, signal = wavfile.read(filename.as_posix())
         | 
| 45 | 
             
                    if sample_rate != 8000:
         | 
| 46 | 
             
                        raise AssertionError
         | 
| 47 |  | 
| 48 | 
            -
                    signal = signal[:, 0]
         | 
| 49 | 
             
                    signal_length = len(signal) - sample_rate * 2
         | 
| 50 | 
             
                    if signal_length <= 0:
         | 
| 51 | 
             
                        continue
         | 
| @@ -56,8 +64,7 @@ def main(): | |
| 56 | 
             
                        end = begin + sample_rate * 2
         | 
| 57 | 
             
                        sub_signal = signal[begin: end]
         | 
| 58 |  | 
| 59 | 
            -
                         | 
| 60 | 
            -
                        to_filename = output_dir / "{}_fi-FI_none_{}.wav".format(call_id, ts)
         | 
| 61 | 
             
                        wavfile.write(
         | 
| 62 | 
             
                            to_filename.as_posix(),
         | 
| 63 | 
             
                            sample_rate,
         | 
|  | |
| 15 |  | 
| 16 | 
             
                parser.add_argument(
         | 
| 17 | 
             
                    "--audio_dir",
         | 
| 18 | 
            +
                    # default=(project_path / "data/calling/66/wav_1ch").as_posix(),
         | 
| 19 | 
            +
                    # default=(project_path / "data/calling/358/wav_1ch/finished/voicemail_annotation").as_posix(),
         | 
| 20 | 
            +
                    # default=(project_path / "data/calling/358/wav_1ch/finished/voicemail_annotation").as_posix(),
         | 
| 21 | 
            +
                    default=r"D:\Users\tianx\HuggingSpaces\template_match_asr\data\wav\early_media\52\music",
         | 
| 22 | 
             
                    type=str
         | 
| 23 | 
             
                )
         | 
| 24 | 
             
                parser.add_argument(
         | 
| 25 | 
             
                    "--output_dir",
         | 
| 26 | 
            +
                    # default=(project_path / "data/calling/358/wav_segmented").as_posix(),
         | 
| 27 | 
            +
                    default=r"D:\Users\tianx\HuggingSpaces\template_match_asr\data\wav\early_media\52\music\wav_segmented",
         | 
| 28 | 
             
                    type=str
         | 
| 29 | 
             
                )
         | 
| 30 | 
             
                parser.add_argument(
         | 
| 31 | 
             
                    "--first_n_seconds",
         | 
| 32 | 
            +
                    default=1000,
         | 
| 33 | 
             
                    type=int
         | 
| 34 | 
             
                )
         | 
| 35 | 
             
                args = parser.parse_args()
         | 
|  | |
| 44 | 
             
                output_dir.mkdir(parents=True, exist_ok=True)
         | 
| 45 |  | 
| 46 | 
             
                for filename in tqdm(list(audio_dir.glob("*.wav"))):
         | 
| 47 | 
            +
                    splits = filename.stem.split("_")
         | 
| 48 | 
            +
                    call_id = splits[3]
         | 
| 49 | 
            +
                    language = splits[4]
         | 
| 50 | 
            +
                    scene_id = splits[5]
         | 
| 51 | 
            +
             | 
| 52 | 
             
                    sample_rate, signal = wavfile.read(filename.as_posix())
         | 
| 53 | 
             
                    if sample_rate != 8000:
         | 
| 54 | 
             
                        raise AssertionError
         | 
| 55 |  | 
| 56 | 
            +
                    # signal = signal[:, 0]
         | 
| 57 | 
             
                    signal_length = len(signal) - sample_rate * 2
         | 
| 58 | 
             
                    if signal_length <= 0:
         | 
| 59 | 
             
                        continue
         | 
|  | |
| 64 | 
             
                        end = begin + sample_rate * 2
         | 
| 65 | 
             
                        sub_signal = signal[begin: end]
         | 
| 66 |  | 
| 67 | 
            +
                        to_filename = output_dir / f"active_media_r_{call_id}_{language}_{scene_id}_{begin}.wav"
         | 
|  | |
| 68 | 
             
                        wavfile.write(
         | 
| 69 | 
             
                            to_filename.as_posix(),
         | 
| 70 | 
             
                            sample_rate,
         | 
    	
        examples/lstm_badcase_filter/step_1_badcase_filter.py
    ADDED
    
    | @@ -0,0 +1,233 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            #!/usr/bin/python3
         | 
| 2 | 
            +
            # -*- coding: utf-8 -*-
         | 
| 3 | 
            +
            import argparse
         | 
| 4 | 
            +
            from pathlib import Path
         | 
| 5 | 
            +
             | 
| 6 | 
            +
            from tqdm import tqdm
         | 
| 7 | 
            +
             | 
| 8 | 
            +
            from gradio_client import Client, handle_file
         | 
| 9 | 
            +
            import librosa
         | 
| 10 | 
            +
            import numpy as np
         | 
| 11 | 
            +
            import onnxruntime as ort
         | 
| 12 | 
            +
            from scipy.io import wavfile
         | 
| 13 | 
            +
            import torch
         | 
| 14 | 
            +
            import torchaudio
         | 
| 15 | 
            +
            import shutil
         | 
| 16 | 
            +
             | 
| 17 | 
            +
            from project_settings import project_path
         | 
| 18 | 
            +
             | 
| 19 | 
            +
             | 
| 20 | 
            +
            def get_args():
         | 
| 21 | 
            +
                parser = argparse.ArgumentParser()
         | 
| 22 | 
            +
                parser.add_argument(
         | 
| 23 | 
            +
                    "--audio_dir",
         | 
| 24 | 
            +
                    # default=r"D:\Users\tianx\HuggingDatasets\international_voice\data\sea-idn\audio_lib_hkg_1\audio_lib_hkg_1\pt-BR2",
         | 
| 25 | 
            +
                    # default=r"D:\Users\tianx\HuggingDatasets\international_voice\data\sea-idn\audio_lib_hkg_1\audio_lib_hkg_1\pt-BR",
         | 
| 26 | 
            +
                    default=r"D:\Users\tianx\HuggingDatasets\calling_analysis\data\pt-BR\bell_and_di_then_mute",
         | 
| 27 | 
            +
                    type=str,
         | 
| 28 | 
            +
                )
         | 
| 29 | 
            +
                parser.add_argument(
         | 
| 30 | 
            +
                    "--onnx_model_file",
         | 
| 31 | 
            +
                    # default=(project_path / "examples/online_model_test/models/pt-BR.onnx").as_posix(),
         | 
| 32 | 
            +
                    default="../online_model_test/models/pt-BR.onnx",
         | 
| 33 | 
            +
                    type=str
         | 
| 34 | 
            +
                )
         | 
| 35 | 
            +
                parser.add_argument(
         | 
| 36 | 
            +
                    "--output_dir",
         | 
| 37 | 
            +
                    default=(project_path / "data/badcase").as_posix(),
         | 
| 38 | 
            +
                    type=str,
         | 
| 39 | 
            +
                )
         | 
| 40 | 
            +
                args = parser.parse_args()
         | 
| 41 | 
            +
                return args
         | 
| 42 | 
            +
             | 
| 43 | 
            +
             | 
| 44 | 
            +
            class OnlineModelConfig(object):
         | 
| 45 | 
            +
                def __init__(self,
         | 
| 46 | 
            +
                             sample_rate: int = 8000,
         | 
| 47 | 
            +
                             n_fft: int = 1024,
         | 
| 48 | 
            +
                             hop_size: int = 512,
         | 
| 49 | 
            +
                             n_mels: int = 80,
         | 
| 50 | 
            +
                             f_min: float = 10.0,
         | 
| 51 | 
            +
                             f_max: float = 3800.0,
         | 
| 52 | 
            +
                             ):
         | 
| 53 | 
            +
                    self.sample_rate = sample_rate
         | 
| 54 | 
            +
                    self.n_fft = n_fft
         | 
| 55 | 
            +
                    self.hop_size = hop_size
         | 
| 56 | 
            +
                    self.n_mels = n_mels
         | 
| 57 | 
            +
                    self.f_min = f_min
         | 
| 58 | 
            +
                    self.f_max = f_max
         | 
| 59 | 
            +
             | 
| 60 | 
            +
             | 
| 61 | 
            +
            class OnlineModelInference(object):
         | 
| 62 | 
            +
                def __init__(self,
         | 
| 63 | 
            +
                             model_path: str,
         | 
| 64 | 
            +
                             ):
         | 
| 65 | 
            +
                    self.model_path = model_path
         | 
| 66 | 
            +
             | 
| 67 | 
            +
                    providers = [
         | 
| 68 | 
            +
                        "CUDAExecutionProvider", "CPUExecutionProvider"
         | 
| 69 | 
            +
                    ] if torch.cuda.is_available() else [
         | 
| 70 | 
            +
                        "CPUExecutionProvider"
         | 
| 71 | 
            +
                    ]
         | 
| 72 | 
            +
                    self.session = ort.InferenceSession(self.model_path, providers=providers)
         | 
| 73 | 
            +
             | 
| 74 | 
            +
                    self.config = OnlineModelConfig()
         | 
| 75 | 
            +
             | 
| 76 | 
            +
                    self.mel_transform = torchaudio.transforms.MelSpectrogram(
         | 
| 77 | 
            +
                        sample_rate=self.config.sample_rate,
         | 
| 78 | 
            +
                        n_fft=self.config.n_fft,
         | 
| 79 | 
            +
                        hop_length=self.config.hop_size,
         | 
| 80 | 
            +
                        n_mels=self.config.n_mels,
         | 
| 81 | 
            +
                        f_min=self.config.f_min,
         | 
| 82 | 
            +
                        f_max=self.config.f_max,
         | 
| 83 | 
            +
                        window_fn=torch.hamming_window
         | 
| 84 | 
            +
                    )
         | 
| 85 | 
            +
             | 
| 86 | 
            +
                def predict_by_ndarray(self,
         | 
| 87 | 
            +
                                       sub_signal: np.ndarray,
         | 
| 88 | 
            +
                                       h: np.ndarray = None,
         | 
| 89 | 
            +
                                       c: np.ndarray = None,
         | 
| 90 | 
            +
                                       ):
         | 
| 91 | 
            +
                    # sub_signal, shape: [num_samples,]
         | 
| 92 | 
            +
                    sub_signal = torch.tensor(sub_signal, dtype=torch.float32)
         | 
| 93 | 
            +
             | 
| 94 | 
            +
                    sub_signal = sub_signal.unsqueeze(0)
         | 
| 95 | 
            +
                    # sub_signal, shape: [1, num_samples]
         | 
| 96 | 
            +
                    mel_spec = self.mel_transform.forward(sub_signal)
         | 
| 97 | 
            +
                    # mel_spec, shape: [1, n_mels, n_frames]
         | 
| 98 | 
            +
                    mel_spec = torch.transpose(mel_spec, dim0=1, dim1=2)
         | 
| 99 | 
            +
                    # mel_spec, shape: [1, n_frames, n_mels]
         | 
| 100 | 
            +
             | 
| 101 | 
            +
                    h = torch.tensor(h) if h is not None else None
         | 
| 102 | 
            +
                    c = torch.tensor(c) if h is not None else None
         | 
| 103 | 
            +
                    label, prob, h, c = self.predict_by_mel_spec(mel_spec, h=h, c=c)
         | 
| 104 | 
            +
                    # h, c: torch.Tensor
         | 
| 105 | 
            +
                    h = h.numpy()
         | 
| 106 | 
            +
                    c = c.numpy()
         | 
| 107 | 
            +
                    return label, prob, h, c
         | 
| 108 | 
            +
             | 
| 109 | 
            +
                def predict_by_mel_spec(self,
         | 
| 110 | 
            +
                                        mel_spec: torch.Tensor,
         | 
| 111 | 
            +
                                        h: torch.Tensor = None,
         | 
| 112 | 
            +
                                        c: torch.Tensor = None,
         | 
| 113 | 
            +
                                        ):
         | 
| 114 | 
            +
                    # mel_spec, shape: [1, n_frames, n_mels]
         | 
| 115 | 
            +
             | 
| 116 | 
            +
                    if h is None:
         | 
| 117 | 
            +
                        h = np.zeros((3, 1, 64), dtype=np.float32)  # 3层LSTM,批次大小1,隐藏大小64
         | 
| 118 | 
            +
                    else:
         | 
| 119 | 
            +
                        h = h.numpy()
         | 
| 120 | 
            +
                    if c is None:
         | 
| 121 | 
            +
                        c = np.zeros((3, 1, 64), dtype=np.float32)  # 3层LSTM,批次大小1,隐藏大小64
         | 
| 122 | 
            +
                    else:
         | 
| 123 | 
            +
                        c = c.numpy()
         | 
| 124 | 
            +
             | 
| 125 | 
            +
                    mel_spec_np = mel_spec.numpy()
         | 
| 126 | 
            +
                    outputs = self.session.run(
         | 
| 127 | 
            +
                        input_feed={
         | 
| 128 | 
            +
                            "input": mel_spec_np,
         | 
| 129 | 
            +
                            "h": h,
         | 
| 130 | 
            +
                            "c": c
         | 
| 131 | 
            +
                        },
         | 
| 132 | 
            +
                        output_names=[
         | 
| 133 | 
            +
                            "output", "h_out", "c_out"
         | 
| 134 | 
            +
                        ],
         | 
| 135 | 
            +
                    )
         | 
| 136 | 
            +
                    logits, h, c = outputs
         | 
| 137 | 
            +
                    # logits, np.ndarray, shape: [b, num_labels]
         | 
| 138 | 
            +
                    # h, c: np.ndarray
         | 
| 139 | 
            +
                    h = torch.tensor(h)
         | 
| 140 | 
            +
                    c = torch.tensor(c)
         | 
| 141 | 
            +
             | 
| 142 | 
            +
                    probs = torch.softmax(torch.tensor(logits), dim=1)
         | 
| 143 | 
            +
                    max_prob, predicted_label_index = torch.max(probs, dim=1)
         | 
| 144 | 
            +
             | 
| 145 | 
            +
                    label = self.get_label_by_index(predicted_label_index.item())
         | 
| 146 | 
            +
                    prob = max_prob.item()
         | 
| 147 | 
            +
                    return label, prob, h, c
         | 
| 148 | 
            +
             | 
| 149 | 
            +
                @staticmethod
         | 
| 150 | 
            +
                def get_label_by_index(index: int):
         | 
| 151 | 
            +
                    label_map = {
         | 
| 152 | 
            +
                        0: "voice",
         | 
| 153 | 
            +
                        1: "voicemail",
         | 
| 154 | 
            +
                        2: "mute",
         | 
| 155 | 
            +
                        3: "noise"
         | 
| 156 | 
            +
                    }
         | 
| 157 | 
            +
                    result = label_map[index]
         | 
| 158 | 
            +
                    return result
         | 
| 159 | 
            +
             | 
| 160 | 
            +
             | 
| 161 | 
            +
            def main():
         | 
| 162 | 
            +
                args = get_args()
         | 
| 163 | 
            +
             | 
| 164 | 
            +
                client = Client("http://127.0.0.1:7864/")
         | 
| 165 | 
            +
                # client = Client("http://10.75.27.247:7864/")
         | 
| 166 | 
            +
             | 
| 167 | 
            +
                audio_dir = Path(args.audio_dir)
         | 
| 168 | 
            +
                output_dir = Path(args.output_dir)
         | 
| 169 | 
            +
                output_dir.mkdir(parents=True, exist_ok=True)
         | 
| 170 | 
            +
             | 
| 171 | 
            +
                model = OnlineModelInference(model_path=args.onnx_model_file)
         | 
| 172 | 
            +
             | 
| 173 | 
            +
                for filename in tqdm(audio_dir.glob("**/active_media_r_*.wav")):
         | 
| 174 | 
            +
                    splits = filename.stem.split("_")
         | 
| 175 | 
            +
                    call_id = splits[3]
         | 
| 176 | 
            +
                    language = splits[4]
         | 
| 177 | 
            +
                    scene_id = splits[5]
         | 
| 178 | 
            +
             | 
| 179 | 
            +
                    signal, sample_rate = librosa.load(filename.as_posix(), sr=8000)
         | 
| 180 | 
            +
                    duration = librosa.get_duration(y=signal, sr=sample_rate)
         | 
| 181 | 
            +
                    signal_length = len(signal)
         | 
| 182 | 
            +
                    if signal_length == 0:
         | 
| 183 | 
            +
                        continue
         | 
| 184 | 
            +
             | 
| 185 | 
            +
                    begin = 0
         | 
| 186 | 
            +
                    end = begin + sample_rate * 2
         | 
| 187 | 
            +
                    sub_signal = signal[begin: end]
         | 
| 188 | 
            +
                    if sub_signal.shape[0] != 16000:
         | 
| 189 | 
            +
                        continue
         | 
| 190 | 
            +
             | 
| 191 | 
            +
                    h = None
         | 
| 192 | 
            +
                    c = None
         | 
| 193 | 
            +
                    label1, prob1, h, c = model.predict_by_ndarray(sub_signal, h=h, c=c)
         | 
| 194 | 
            +
             | 
| 195 | 
            +
                    sub_signal_ = np.array(sub_signal * (1 << 15), dtype=np.int16)
         | 
| 196 | 
            +
                    temp_file = "temp.wav"
         | 
| 197 | 
            +
             | 
| 198 | 
            +
                    wavfile.write(
         | 
| 199 | 
            +
                        temp_file,
         | 
| 200 | 
            +
                        8000,
         | 
| 201 | 
            +
                        sub_signal_,
         | 
| 202 | 
            +
                    )
         | 
| 203 | 
            +
             | 
| 204 | 
            +
                    # label2, prob2 = client.predict(
         | 
| 205 | 
            +
                    #     audio_t=handle_file(temp_file),
         | 
| 206 | 
            +
                    #     model_name="voicemail-pt-br-2-ch4",
         | 
| 207 | 
            +
                    #     ground_true="Hello!!",
         | 
| 208 | 
            +
                    #     api_name="/when_click_cls_button"
         | 
| 209 | 
            +
                    # )
         | 
| 210 | 
            +
                    label2, prob2 = client.predict(
         | 
| 211 | 
            +
                        audio_t=handle_file(temp_file),
         | 
| 212 | 
            +
                        model_name="sound-8-ch4",
         | 
| 213 | 
            +
                        ground_true="Hello!!",
         | 
| 214 | 
            +
                        api_name="/when_click_cls_button"
         | 
| 215 | 
            +
                    )
         | 
| 216 | 
            +
             | 
| 217 | 
            +
                    print(label1)
         | 
| 218 | 
            +
                    print(label2)
         | 
| 219 | 
            +
                    label2 = "voicemail"
         | 
| 220 | 
            +
                    label1 = "non_voicemail"
         | 
| 221 | 
            +
                    if label2 in ("voicemail", "bell") and label1 != "voicemail":
         | 
| 222 | 
            +
                        tgt_file = output_dir / f"active_media_r_{call_id}_{language}_{scene_id}_0.wav"
         | 
| 223 | 
            +
                        if not tgt_file.exists():
         | 
| 224 | 
            +
                            shutil.move(
         | 
| 225 | 
            +
                                temp_file,
         | 
| 226 | 
            +
                                tgt_file.as_posix(),
         | 
| 227 | 
            +
                            )
         | 
| 228 | 
            +
             | 
| 229 | 
            +
                return
         | 
| 230 | 
            +
             | 
| 231 | 
            +
             | 
| 232 | 
            +
            if __name__ == "__main__":
         | 
| 233 | 
            +
                main()
         | 
    	
        examples/online_model_test/step_1_predict.py
    CHANGED
    
    | @@ -19,13 +19,13 @@ def get_args(): | |
| 19 | 
             
                parser = argparse.ArgumentParser()
         | 
| 20 | 
             
                parser.add_argument(
         | 
| 21 | 
             
                    "--audio_dir",
         | 
| 22 | 
            -
                    default=r"D:\Users\tianx\HuggingDatasets\international_voice\data\sea-idn\audio_lib_hkg_1\audio_lib_hkg_1\ | 
| 23 | 
             
                    type=str,
         | 
| 24 | 
             
                )
         | 
| 25 | 
            -
                parser.add_argument("--onnx_model_file", default=" | 
| 26 | 
             
                parser.add_argument("--target_duration", default=8.0, type=float)
         | 
| 27 |  | 
| 28 | 
            -
                parser.add_argument("--output_file", default=" | 
| 29 |  | 
| 30 | 
             
                args = parser.parse_args()
         | 
| 31 | 
             
                return args
         | 
| @@ -177,13 +177,15 @@ def main(): | |
| 177 | 
             
                    for begin in range(0, target_duration, sample_rate*2):
         | 
| 178 | 
             
                        end = begin + sample_rate*2
         | 
| 179 | 
             
                        sub_signal = signal[begin: end]
         | 
| 180 | 
            -
                        if len(sub_signal)  | 
| 181 | 
             
                            break
         | 
| 182 | 
             
                        label, prob, h, c = model.predict_by_ndarray(sub_signal, h=h, c=c)
         | 
| 183 | 
             
                        predict_result.append({
         | 
| 184 | 
             
                            "label": label,
         | 
| 185 | 
             
                            "prob": prob,
         | 
| 186 | 
             
                        })
         | 
|  | |
|  | |
| 187 | 
             
                    label_list = [p["label"] for p in predict_result]
         | 
| 188 | 
             
                    predict_result_ = json.dumps(predict_result, ensure_ascii=False, indent=4)
         | 
| 189 | 
             
                    label2 = predict_result[0]["label"]
         | 
|  | |
| 19 | 
             
                parser = argparse.ArgumentParser()
         | 
| 20 | 
             
                parser.add_argument(
         | 
| 21 | 
             
                    "--audio_dir",
         | 
| 22 | 
            +
                    default=r"D:\Users\tianx\HuggingDatasets\international_voice\data\sea-idn\audio_lib_hkg_1\audio_lib_hkg_1\th-TH\th-TH\early_media_no_voice",
         | 
| 23 | 
             
                    type=str,
         | 
| 24 | 
             
                )
         | 
| 25 | 
            +
                parser.add_argument("--onnx_model_file", default="models/th-TH.onnx", type=str)
         | 
| 26 | 
             
                parser.add_argument("--target_duration", default=8.0, type=float)
         | 
| 27 |  | 
| 28 | 
            +
                parser.add_argument("--output_file", default="th-TH_predict.xlsx", type=str)
         | 
| 29 |  | 
| 30 | 
             
                args = parser.parse_args()
         | 
| 31 | 
             
                return args
         | 
|  | |
| 177 | 
             
                    for begin in range(0, target_duration, sample_rate*2):
         | 
| 178 | 
             
                        end = begin + sample_rate*2
         | 
| 179 | 
             
                        sub_signal = signal[begin: end]
         | 
| 180 | 
            +
                        if len(sub_signal) < 0.5 * sample_rate:
         | 
| 181 | 
             
                            break
         | 
| 182 | 
             
                        label, prob, h, c = model.predict_by_ndarray(sub_signal, h=h, c=c)
         | 
| 183 | 
             
                        predict_result.append({
         | 
| 184 | 
             
                            "label": label,
         | 
| 185 | 
             
                            "prob": prob,
         | 
| 186 | 
             
                        })
         | 
| 187 | 
            +
                    if len(predict_result) == 0:
         | 
| 188 | 
            +
                        continue
         | 
| 189 | 
             
                    label_list = [p["label"] for p in predict_result]
         | 
| 190 | 
             
                    predict_result_ = json.dumps(predict_result, ensure_ascii=False, indent=4)
         | 
| 191 | 
             
                    label2 = predict_result[0]["label"]
         | 
    	
        examples/online_model_test/step_2_audio_filter.py
    CHANGED
    
    | @@ -10,10 +10,10 @@ import pandas as pd | |
| 10 | 
             
            def get_args():
         | 
| 11 | 
             
                parser = argparse.ArgumentParser()
         | 
| 12 |  | 
| 13 | 
            -
                parser.add_argument("--predict_file", default=" | 
| 14 | 
             
                parser.add_argument(
         | 
| 15 | 
             
                    "--output_dir",
         | 
| 16 | 
            -
                    default=r"D:\Users\tianx\HuggingDatasets\international_voice\data\ | 
| 17 | 
             
                    type=str,
         | 
| 18 | 
             
                )
         | 
| 19 | 
             
                args = parser.parse_args()
         | 
| @@ -24,12 +24,16 @@ def main(): | |
| 24 | 
             
                args = get_args()
         | 
| 25 |  | 
| 26 | 
             
                output_dir = Path(args.output_dir)
         | 
|  | |
| 27 |  | 
| 28 | 
             
                df = pd.read_excel(args.predict_file)
         | 
| 29 | 
             
                for i, row in df.iterrows():
         | 
| 30 | 
             
                    filename = row["filename"]
         | 
| 31 | 
             
                    ground_truth_ = row["ground_truth_"]
         | 
|  | |
| 32 |  | 
|  | |
|  | |
| 33 | 
             
                    if ground_truth_ == "voicemail":
         | 
| 34 | 
             
                        shutil.copy(
         | 
| 35 | 
             
                            filename,
         | 
|  | |
| 10 | 
             
            def get_args():
         | 
| 11 | 
             
                parser = argparse.ArgumentParser()
         | 
| 12 |  | 
| 13 | 
            +
                parser.add_argument("--predict_file", default="th-TH_predict.xlsx", type=str)
         | 
| 14 | 
             
                parser.add_argument(
         | 
| 15 | 
             
                    "--output_dir",
         | 
| 16 | 
            +
                    default=r"D:\Users\tianx\HuggingDatasets\international_voice\data\sea-idn\audio_lib_hkg_1\audio_lib_hkg_1\th-TH\th-TH\early_media_no_voice\bad_case",
         | 
| 17 | 
             
                    type=str,
         | 
| 18 | 
             
                )
         | 
| 19 | 
             
                args = parser.parse_args()
         | 
|  | |
| 24 | 
             
                args = get_args()
         | 
| 25 |  | 
| 26 | 
             
                output_dir = Path(args.output_dir)
         | 
| 27 | 
            +
                output_dir.mkdir(parents=True, exist_ok=True)
         | 
| 28 |  | 
| 29 | 
             
                df = pd.read_excel(args.predict_file)
         | 
| 30 | 
             
                for i, row in df.iterrows():
         | 
| 31 | 
             
                    filename = row["filename"]
         | 
| 32 | 
             
                    ground_truth_ = row["ground_truth_"]
         | 
| 33 | 
            +
                    flag = row["flag"]
         | 
| 34 |  | 
| 35 | 
            +
                    if flag == 1:
         | 
| 36 | 
            +
                        continue
         | 
| 37 | 
             
                    if ground_truth_ == "voicemail":
         | 
| 38 | 
             
                        shutil.copy(
         | 
| 39 | 
             
                            filename,
         | 
    	
        examples/online_model_test/step_3_make_test.py
    CHANGED
    
    | @@ -15,12 +15,12 @@ def get_args(): | |
| 15 |  | 
| 16 | 
             
                parser.add_argument(
         | 
| 17 | 
             
                    "--src_dir",
         | 
| 18 | 
            -
                    default=r"D:\Users\tianx\HuggingDatasets\international_voice\data\calling\ | 
| 19 | 
             
                    type=str,
         | 
| 20 | 
             
                )
         | 
| 21 | 
             
                parser.add_argument(
         | 
| 22 | 
             
                    "--tgt_dir",
         | 
| 23 | 
            -
                    default=r"D:\Users\tianx\HuggingDatasets\international_voice\data\voice_test_examples\ | 
| 24 | 
             
                    type=str,
         | 
| 25 | 
             
                )
         | 
| 26 | 
             
                parser.add_argument(
         | 
|  | |
| 15 |  | 
| 16 | 
             
                parser.add_argument(
         | 
| 17 | 
             
                    "--src_dir",
         | 
| 18 | 
            +
                    default=r"D:\Users\tianx\HuggingDatasets\international_voice\data\calling\63\voicemail",
         | 
| 19 | 
             
                    type=str,
         | 
| 20 | 
             
                )
         | 
| 21 | 
             
                parser.add_argument(
         | 
| 22 | 
             
                    "--tgt_dir",
         | 
| 23 | 
            +
                    default=r"D:\Users\tianx\HuggingDatasets\international_voice\data\voice_test_examples\63\96",
         | 
| 24 | 
             
                    type=str,
         | 
| 25 | 
             
                )
         | 
| 26 | 
             
                parser.add_argument(
         | 
    	
        examples/online_model_test/test.py
    ADDED
    
    | @@ -0,0 +1,84 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            #!/usr/bin/python3
         | 
| 2 | 
            +
            # -*- coding: utf-8 -*-
         | 
| 3 | 
            +
            import argparse
         | 
| 4 | 
            +
            from collections import defaultdict
         | 
| 5 | 
            +
            from pathlib import Path
         | 
| 6 | 
            +
            import shutil
         | 
| 7 | 
            +
             | 
| 8 | 
            +
            from gradio_client import Client, handle_file
         | 
| 9 | 
            +
            import librosa
         | 
| 10 | 
            +
            import pandas as pd
         | 
| 11 | 
            +
            from tqdm import tqdm
         | 
| 12 | 
            +
             | 
| 13 | 
            +
             | 
| 14 | 
            +
            def get_args():
         | 
| 15 | 
            +
                parser = argparse.ArgumentParser()
         | 
| 16 | 
            +
                parser.add_argument(
         | 
| 17 | 
            +
                    "--finished_dir",
         | 
| 18 | 
            +
                    default=r"D:\Users\tianx\HuggingSpaces\cc_audio_8\data\calling\66\wav_1ch",
         | 
| 19 | 
            +
                    type=str,
         | 
| 20 | 
            +
                )
         | 
| 21 | 
            +
                parser.add_argument(
         | 
| 22 | 
            +
                    "--src_dir",
         | 
| 23 | 
            +
                    default=r"D:/Users/tianx/HuggingDatasets/international_voice/data/sea-idn/audio_lib_hkg_1/audio_lib_hkg_1/th-TH/th-TH/",
         | 
| 24 | 
            +
                    type=str,
         | 
| 25 | 
            +
                )
         | 
| 26 | 
            +
                parser.add_argument(
         | 
| 27 | 
            +
                    "--tgt_dir",
         | 
| 28 | 
            +
                    default=r"D:\Users\tianx\HuggingDatasets\international_voice\data\sea-idn\audio_lib_hkg_1\audio_lib_hkg_1\th-TH\bad_case",
         | 
| 29 | 
            +
                    type=str,
         | 
| 30 | 
            +
                )
         | 
| 31 | 
            +
                args = parser.parse_args()
         | 
| 32 | 
            +
                return args
         | 
| 33 | 
            +
             | 
| 34 | 
            +
             | 
| 35 | 
            +
            def main():
         | 
| 36 | 
            +
                args = get_args()
         | 
| 37 | 
            +
             | 
| 38 | 
            +
                finished_dir = Path(args.finished_dir)
         | 
| 39 | 
            +
                src_dir = Path(args.src_dir)
         | 
| 40 | 
            +
                tgt_dir = Path(args.tgt_dir)
         | 
| 41 | 
            +
                tgt_dir.mkdir(parents=True, exist_ok=True)
         | 
| 42 | 
            +
             | 
| 43 | 
            +
                # finished
         | 
| 44 | 
            +
                finished = set()
         | 
| 45 | 
            +
                for filename in finished_dir.glob("*.wav"):
         | 
| 46 | 
            +
                    splits = filename.stem.split("_")
         | 
| 47 | 
            +
                    call_id = splits[3]
         | 
| 48 | 
            +
                    if call_id in ("27521940-feef-4bfa-ba55-b1f00a10c64d",):
         | 
| 49 | 
            +
                        print(f"call_id: {call_id}")
         | 
| 50 | 
            +
             | 
| 51 | 
            +
                    finished.add(call_id)
         | 
| 52 | 
            +
                print(f"finished count: {len(finished)}")
         | 
| 53 | 
            +
             | 
| 54 | 
            +
                # call_id_to_wav_file_list
         | 
| 55 | 
            +
                call_id_to_wav_file_list = defaultdict(list)
         | 
| 56 | 
            +
                for filename in src_dir.glob("**/*.wav"):
         | 
| 57 | 
            +
                    splits = filename.stem.split("_")
         | 
| 58 | 
            +
                    call_id = splits[3]
         | 
| 59 | 
            +
                    language = splits[4]
         | 
| 60 | 
            +
                    scene_id = splits[5]
         | 
| 61 | 
            +
                    if call_id in ("27521940-feef-4bfa-ba55-b1f00a10c64d",):
         | 
| 62 | 
            +
                        print(f"call_id: {call_id}")
         | 
| 63 | 
            +
             | 
| 64 | 
            +
                    call_id_to_wav_file_list[call_id].append(filename.as_posix())
         | 
| 65 | 
            +
                print(f"src count: {len(call_id_to_wav_file_list)}")
         | 
| 66 | 
            +
             | 
| 67 | 
            +
                for filename in tqdm(src_dir.glob("**/active_media_r_*.wav")):
         | 
| 68 | 
            +
                    splits = filename.stem.split("_")
         | 
| 69 | 
            +
                    call_id = splits[3]
         | 
| 70 | 
            +
                    if call_id in ("27521940-feef-4bfa-ba55-b1f00a10c64d",):
         | 
| 71 | 
            +
                        print(f"call_id: {call_id}")
         | 
| 72 | 
            +
             | 
| 73 | 
            +
                    if call_id in finished:
         | 
| 74 | 
            +
                        wav_file_list = call_id_to_wav_file_list[call_id]
         | 
| 75 | 
            +
                        for wav_file in wav_file_list:
         | 
| 76 | 
            +
                            shutil.move(
         | 
| 77 | 
            +
                                wav_file,
         | 
| 78 | 
            +
                                tgt_dir.as_posix(),
         | 
| 79 | 
            +
                            )
         | 
| 80 | 
            +
                return
         | 
| 81 | 
            +
             | 
| 82 | 
            +
             | 
| 83 | 
            +
            if __name__ == "__main__":
         | 
| 84 | 
            +
                main()
         | 
    	
        examples/{vm_sound_classification → sound_classification_by_cnn}/requirements.txt
    RENAMED
    
    | 
            File without changes
         | 
    	
        examples/{vm_sound_classification → sound_classification_by_cnn}/run.sh
    RENAMED
    
    | @@ -2,22 +2,22 @@ | |
| 2 |  | 
| 3 | 
             
            : <<'END'
         | 
| 4 |  | 
| 5 | 
            -
            sh run.sh --stage 0 --stop_stage 1 --system_version windows --file_folder_name file_dir --final_model_name sound-4-ch32 \
         | 
| 6 | 
             
            --filename_patterns "E:/Users/tianx/HuggingDatasets/cc_audio_8/data/wav_finished/wav_finished/en-US/wav_finished/*/*.wav \
         | 
| 7 | 
             
            E:/Users/tianx/HuggingDatasets/cc_audio_8/data/wav_finished/id-ID/wav_finished/*/*.wav" \
         | 
| 8 | 
             
            --label_plan 4
         | 
| 9 |  | 
| 10 | 
            -
            sh run.sh --stage 2 --stop_stage 2 --system_version windows --file_folder_name file_dir --final_model_name sound-2-ch32 \
         | 
| 11 | 
             
            --filename_patterns "E:/Users/tianx/HuggingDatasets/cc_audio_8/data/wav_finished/wav_finished/en-US/wav_finished/*/*.wav \
         | 
| 12 | 
             
            E:/Users/tianx/HuggingDatasets/cc_audio_8/data/wav_finished/id-ID/wav_finished/*/*.wav" \
         | 
| 13 | 
             
            --label_plan 4
         | 
| 14 |  | 
| 15 | 
            -
            sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name sound-3-ch32 \
         | 
| 16 | 
             
            --filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav" \
         | 
| 17 | 
             
            --label_plan 3 \
         | 
| 18 | 
             
            --config_file "yaml/conv2d-classifier-3-ch4.yaml"
         | 
| 19 |  | 
| 20 | 
            -
            sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-ms-my-2-ch32 \
         | 
| 21 | 
             
            --filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/ms-MY/wav_finished/*/*.wav" \
         | 
| 22 | 
             
            --label_plan 2-voicemail \
         | 
| 23 | 
             
            --config_file "yaml/conv2d-classifier-2-ch32.yaml"
         | 
|  | |
| 2 |  | 
| 3 | 
             
            : <<'END'
         | 
| 4 |  | 
| 5 | 
            +
            sh run.sh --stage 0 --stop_stage 1 --system_version windows --file_folder_name file_dir --final_model_name sound-4-ch32-cnn \
         | 
| 6 | 
             
            --filename_patterns "E:/Users/tianx/HuggingDatasets/cc_audio_8/data/wav_finished/wav_finished/en-US/wav_finished/*/*.wav \
         | 
| 7 | 
             
            E:/Users/tianx/HuggingDatasets/cc_audio_8/data/wav_finished/id-ID/wav_finished/*/*.wav" \
         | 
| 8 | 
             
            --label_plan 4
         | 
| 9 |  | 
| 10 | 
            +
            sh run.sh --stage 2 --stop_stage 2 --system_version windows --file_folder_name file_dir --final_model_name sound-2-ch32-cnn \
         | 
| 11 | 
             
            --filename_patterns "E:/Users/tianx/HuggingDatasets/cc_audio_8/data/wav_finished/wav_finished/en-US/wav_finished/*/*.wav \
         | 
| 12 | 
             
            E:/Users/tianx/HuggingDatasets/cc_audio_8/data/wav_finished/id-ID/wav_finished/*/*.wav" \
         | 
| 13 | 
             
            --label_plan 4
         | 
| 14 |  | 
| 15 | 
            +
            sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name sound-3-ch32-cnn \
         | 
| 16 | 
             
            --filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav" \
         | 
| 17 | 
             
            --label_plan 3 \
         | 
| 18 | 
             
            --config_file "yaml/conv2d-classifier-3-ch4.yaml"
         | 
| 19 |  | 
| 20 | 
            +
            sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-ms-my-2-ch32-cnn \
         | 
| 21 | 
             
            --filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/ms-MY/wav_finished/*/*.wav" \
         | 
| 22 | 
             
            --label_plan 2-voicemail \
         | 
| 23 | 
             
            --config_file "yaml/conv2d-classifier-2-ch32.yaml"
         | 
    	
        examples/{vm_sound_classification → sound_classification_by_cnn}/run_batch.sh
    RENAMED
    
    | @@ -3,25 +3,25 @@ | |
| 3 |  | 
| 4 | 
             
            # sound ch4
         | 
| 5 |  | 
| 6 | 
            -
            #sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name sound-2-ch4 \
         | 
| 7 | 
             
            #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav" \
         | 
| 8 | 
             
            #--label_plan 2 \
         | 
| 9 | 
             
            #--config_file "yaml/conv2d-classifier-2-ch4.yaml"
         | 
| 10 | 
             
            #
         | 
| 11 | 
             
            #
         | 
| 12 | 
            -
            #sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name sound-3-ch4 \
         | 
| 13 | 
             
            #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav" \
         | 
| 14 | 
             
            #--label_plan 3 \
         | 
| 15 | 
             
            #--config_file "yaml/conv2d-classifier-3-ch4.yaml"
         | 
| 16 | 
             
            #
         | 
| 17 | 
             
            #
         | 
| 18 | 
            -
            #sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name sound-4-ch4 \
         | 
| 19 | 
             
            #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav" \
         | 
| 20 | 
             
            #--label_plan 4 \
         | 
| 21 | 
             
            #--config_file "yaml/conv2d-classifier-4-ch4.yaml"
         | 
| 22 | 
             
            #
         | 
| 23 | 
             
            #
         | 
| 24 | 
            -
            #sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name sound-8-ch4 \
         | 
| 25 | 
             
            #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav" \
         | 
| 26 | 
             
            #--label_plan 8 \
         | 
| 27 | 
             
            #--config_file "yaml/conv2d-classifier-8-ch4.yaml"
         | 
| @@ -29,25 +29,25 @@ | |
| 29 |  | 
| 30 | 
             
            # sound ch8
         | 
| 31 |  | 
| 32 | 
            -
            #sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name sound-2-ch8 \
         | 
| 33 | 
             
            #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav" \
         | 
| 34 | 
             
            #--label_plan 2 \
         | 
| 35 | 
             
            #--config_file "yaml/conv2d-classifier-2-ch8.yaml"
         | 
| 36 | 
             
            #
         | 
| 37 | 
             
            #
         | 
| 38 | 
            -
            #sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name sound-3-ch8 \
         | 
| 39 | 
             
            #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav" \
         | 
| 40 | 
             
            #--label_plan 3 \
         | 
| 41 | 
             
            #--config_file "yaml/conv2d-classifier-3-ch8.yaml"
         | 
| 42 | 
             
            #
         | 
| 43 | 
             
            #
         | 
| 44 | 
            -
            #sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name sound-4-ch8 \
         | 
| 45 | 
             
            #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav" \
         | 
| 46 | 
             
            #--label_plan 4 \
         | 
| 47 | 
             
            #--config_file "yaml/conv2d-classifier-4-ch8.yaml"
         | 
| 48 | 
             
            #
         | 
| 49 | 
             
            #
         | 
| 50 | 
            -
            #sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name sound-8-ch8 \
         | 
| 51 | 
             
            #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav" \
         | 
| 52 | 
             
            #--label_plan 8 \
         | 
| 53 | 
             
            #--config_file "yaml/conv2d-classifier-8-ch8.yaml"
         | 
| @@ -55,25 +55,25 @@ | |
| 55 |  | 
| 56 | 
             
            # sound ch16
         | 
| 57 |  | 
| 58 | 
            -
            #sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name sound-2-ch16 \
         | 
| 59 | 
             
            #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav" \
         | 
| 60 | 
             
            #--label_plan 2 \
         | 
| 61 | 
             
            #--config_file "yaml/conv2d-classifier-2-ch16.yaml"
         | 
| 62 |  | 
| 63 |  | 
| 64 | 
            -
            #sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name sound-3-ch16 \
         | 
| 65 | 
             
            #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav" \
         | 
| 66 | 
             
            #--label_plan 3 \
         | 
| 67 | 
             
            #--config_file "yaml/conv2d-classifier-3-ch16.yaml"
         | 
| 68 | 
             
            #
         | 
| 69 | 
             
            #
         | 
| 70 | 
            -
            #sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name sound-4-ch16 \
         | 
| 71 | 
             
            #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav" \
         | 
| 72 | 
             
            #--label_plan 4 \
         | 
| 73 | 
             
            #--config_file "yaml/conv2d-classifier-4-ch16.yaml"
         | 
| 74 | 
             
            #
         | 
| 75 | 
             
            #
         | 
| 76 | 
            -
            #sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name sound-8-ch16 \
         | 
| 77 | 
             
            #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav" \
         | 
| 78 | 
             
            #--label_plan 8 \
         | 
| 79 | 
             
            #--config_file "yaml/conv2d-classifier-8-ch16.yaml"
         | 
| @@ -81,25 +81,25 @@ | |
| 81 |  | 
| 82 | 
             
            # sound ch32
         | 
| 83 |  | 
| 84 | 
            -
            #sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name sound-2-ch32 \
         | 
| 85 | 
             
            #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav" \
         | 
| 86 | 
             
            #--label_plan 2 \
         | 
| 87 | 
             
            #--config_file "yaml/conv2d-classifier-2-ch32.yaml"
         | 
| 88 | 
             
            #
         | 
| 89 | 
             
            #
         | 
| 90 | 
            -
            #sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name sound-3-ch32 \
         | 
| 91 | 
             
            #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav" \
         | 
| 92 | 
             
            #--label_plan 3 \
         | 
| 93 | 
             
            #--config_file "yaml/conv2d-classifier-3-ch32.yaml"
         | 
| 94 | 
             
            #
         | 
| 95 | 
             
            #
         | 
| 96 | 
            -
            #sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name sound-4-ch32 \
         | 
| 97 | 
             
            #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav" \
         | 
| 98 | 
             
            #--label_plan 4 \
         | 
| 99 | 
             
            #--config_file "yaml/conv2d-classifier-4-ch32.yaml"
         | 
| 100 |  | 
| 101 |  | 
| 102 | 
            -
            #sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name sound-8-ch32 \
         | 
| 103 | 
             
            #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav" \
         | 
| 104 | 
             
            #--label_plan 8 \
         | 
| 105 | 
             
            #--config_file "yaml/conv2d-classifier-8-ch32.yaml"
         | 
| @@ -107,12 +107,12 @@ | |
| 107 |  | 
| 108 | 
             
            # pretrained voicemail
         | 
| 109 |  | 
| 110 | 
            -
            sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-2-ch4 \
         | 
| 111 | 
             
            --filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav" \
         | 
| 112 | 
             
            --label_plan 2-voicemail \
         | 
| 113 | 
             
            --config_file "yaml/conv2d-classifier-2-ch4.yaml"
         | 
| 114 |  | 
| 115 | 
            -
            sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-2-ch32 \
         | 
| 116 | 
             
            --filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav" \
         | 
| 117 | 
             
            --label_plan 2-voicemail \
         | 
| 118 | 
             
            --config_file "yaml/conv2d-classifier-2-ch32.yaml"
         | 
| @@ -120,149 +120,149 @@ sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name fi | |
| 120 |  | 
| 121 | 
             
            # voicemail ch4
         | 
| 122 |  | 
| 123 | 
            -
            #sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-en-ph-2-ch4 \
         | 
| 124 | 
             
            #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/en-PH/wav_finished/*/*.wav" \
         | 
| 125 | 
             
            #--label_plan 2-voicemail \
         | 
| 126 | 
             
            #--config_file "yaml/conv2d-classifier-2-ch4.yaml" \
         | 
| 127 | 
            -
            #--pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-ch4.zip"
         | 
| 128 |  | 
| 129 | 
            -
            #sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-en-sg-2-ch4 \
         | 
| 130 | 
             
            #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/en-SG/wav_finished/*/*.wav" \
         | 
| 131 | 
             
            #--label_plan 2-voicemail \
         | 
| 132 | 
             
            #--config_file "yaml/conv2d-classifier-2-ch4.yaml" \
         | 
| 133 | 
            -
            #--pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-ch4.zip"
         | 
| 134 | 
             
            #
         | 
| 135 | 
            -
            #sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-en-us-2-ch4 \
         | 
| 136 | 
             
            #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/en-US/wav_finished/*/*.wav" \
         | 
| 137 | 
             
            #--label_plan 2-voicemail \
         | 
| 138 | 
             
            #--config_file "yaml/conv2d-classifier-2-ch4.yaml" \
         | 
| 139 | 
            -
            #--pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-ch4.zip"
         | 
| 140 | 
             
            #
         | 
| 141 | 
            -
            #sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-es-mx-2-ch4 \
         | 
| 142 | 
             
            #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/es-MX/wav_finished/*/*.wav" \
         | 
| 143 | 
             
            #--label_plan 2-voicemail \
         | 
| 144 | 
             
            #--config_file "yaml/conv2d-classifier-2-ch4.yaml" \
         | 
| 145 | 
            -
            #--pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-ch4.zip"
         | 
| 146 | 
             
            #
         | 
| 147 | 
            -
            #sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-es-pe-2-ch4 \
         | 
| 148 | 
             
            #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/es-PE/wav_finished/*/*.wav" \
         | 
| 149 | 
             
            #--label_plan 2-voicemail \
         | 
| 150 | 
             
            #--config_file "yaml/conv2d-classifier-2-ch4.yaml" \
         | 
| 151 | 
            -
            #--pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-ch4.zip"
         | 
| 152 | 
             
            #
         | 
| 153 | 
            -
            sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-id-id-2-ch4 \
         | 
| 154 | 
             
            --filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/id-ID/wav_finished/*/*.wav" \
         | 
| 155 | 
             
            --label_plan 2-voicemail \
         | 
| 156 | 
             
            --config_file "yaml/conv2d-classifier-2-ch4.yaml" \
         | 
| 157 | 
            -
            --pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-ch4.zip"
         | 
| 158 |  | 
| 159 | 
            -
            #sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-ja-jp-2-ch4 \
         | 
| 160 | 
             
            #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/ja-JP/wav_finished/*/*.wav" \
         | 
| 161 | 
             
            #--label_plan 2-voicemail \
         | 
| 162 | 
             
            #--config_file "yaml/conv2d-classifier-2-ch4.yaml" \
         | 
| 163 | 
            -
            #--pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-ch4.zip"
         | 
| 164 | 
             
            #
         | 
| 165 | 
            -
            #sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-ko-kr-2-ch4 \
         | 
| 166 | 
             
            #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/ko-KR/wav_finished/*/*.wav" \
         | 
| 167 | 
             
            #--label_plan 2-voicemail \
         | 
| 168 | 
             
            #--config_file "yaml/conv2d-classifier-2-ch4.yaml" \
         | 
| 169 | 
            -
            #--pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-ch4.zip"
         | 
| 170 | 
             
            #
         | 
| 171 | 
            -
            #sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-ms-my-2-ch4 \
         | 
| 172 | 
             
            #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/ms-MY/wav_finished/*/*.wav" \
         | 
| 173 | 
             
            #--label_plan 2-voicemail \
         | 
| 174 | 
             
            #--config_file "yaml/conv2d-classifier-2-ch4.yaml" \
         | 
| 175 | 
            -
            #--pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-ch4.zip"
         | 
| 176 | 
             
            #
         | 
| 177 | 
            -
            #sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-pt-br-2-ch4 \
         | 
| 178 | 
             
            #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/pt-BR/wav_finished/*/*.wav" \
         | 
| 179 | 
             
            #--label_plan 2-voicemail \
         | 
| 180 | 
             
            #--config_file "yaml/conv2d-classifier-2-ch4.yaml" \
         | 
| 181 | 
            -
            #--pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-ch4.zip"
         | 
| 182 | 
             
            #
         | 
| 183 | 
            -
            #sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-th-th-2-ch4 \
         | 
| 184 | 
             
            #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/th-TH/wav_finished/*/*.wav" \
         | 
| 185 | 
             
            #--label_plan 2-voicemail \
         | 
| 186 | 
             
            #--config_file "yaml/conv2d-classifier-2-ch4.yaml" \
         | 
| 187 | 
            -
            #--pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-ch4.zip"
         | 
| 188 | 
             
            #
         | 
| 189 | 
            -
            #sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-zh-tw-2-ch4 \
         | 
| 190 | 
             
            #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/zh-TW/wav_finished/*/*.wav" \
         | 
| 191 | 
             
            #--label_plan 2-voicemail \
         | 
| 192 | 
             
            #--config_file "yaml/conv2d-classifier-2-ch4.yaml" \
         | 
| 193 | 
            -
            #--pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-ch4.zip"
         | 
| 194 |  | 
| 195 |  | 
| 196 | 
             
            # voicemail ch32
         | 
| 197 |  | 
| 198 | 
            -
            #sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-en-ph-2-ch32 \
         | 
| 199 | 
             
            #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/en-PH/wav_finished/*/*.wav" \
         | 
| 200 | 
             
            #--label_plan 2-voicemail \
         | 
| 201 | 
             
            #--config_file "yaml/conv2d-classifier-2-ch32.yaml" \
         | 
| 202 | 
            -
            #--pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-ch32.zip"
         | 
| 203 |  | 
| 204 | 
            -
            #sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-en-sg-2-ch32 \
         | 
| 205 | 
             
            #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/en-SG/wav_finished/*/*.wav" \
         | 
| 206 | 
             
            #--label_plan 2-voicemail \
         | 
| 207 | 
             
            #--config_file "yaml/conv2d-classifier-2-ch32.yaml" \
         | 
| 208 | 
            -
            #--pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-ch32.zip"
         | 
| 209 | 
             
            #
         | 
| 210 | 
            -
            #sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-en-us-2-ch32 \
         | 
| 211 | 
             
            #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/en-US/wav_finished/*/*.wav" \
         | 
| 212 | 
             
            #--label_plan 2-voicemail \
         | 
| 213 | 
             
            #--config_file "yaml/conv2d-classifier-2-ch32.yaml" \
         | 
| 214 | 
            -
            #--pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-ch32.zip"
         | 
| 215 | 
             
            #
         | 
| 216 | 
            -
            #sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-es-mx-2-ch32 \
         | 
| 217 | 
             
            #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/es-MX/wav_finished/*/*.wav" \
         | 
| 218 | 
             
            #--label_plan 2-voicemail \
         | 
| 219 | 
             
            #--config_file "yaml/conv2d-classifier-2-ch32.yaml" \
         | 
| 220 | 
            -
            #--pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-ch32.zip"
         | 
| 221 | 
             
            #
         | 
| 222 | 
            -
            #sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-es-pe-2-ch32 \
         | 
| 223 | 
             
            #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/es-PE/wav_finished/*/*.wav" \
         | 
| 224 | 
             
            #--label_plan 2-voicemail \
         | 
| 225 | 
             
            #--config_file "yaml/conv2d-classifier-2-ch32.yaml" \
         | 
| 226 | 
            -
            #--pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-ch32.zip"
         | 
| 227 | 
             
            #
         | 
| 228 | 
            -
            sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-id-id-2-ch32 \
         | 
| 229 | 
             
            --filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/id-ID/wav_finished/*/*.wav" \
         | 
| 230 | 
             
            --label_plan 2-voicemail \
         | 
| 231 | 
             
            --config_file "yaml/conv2d-classifier-2-ch32.yaml" \
         | 
| 232 | 
            -
            --pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-ch32.zip"
         | 
| 233 |  | 
| 234 | 
            -
            #sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-ja-jp-2-ch32 \
         | 
| 235 | 
             
            #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/ja-JP/wav_finished/*/*.wav" \
         | 
| 236 | 
             
            #--label_plan 2-voicemail \
         | 
| 237 | 
             
            #--config_file "yaml/conv2d-classifier-2-ch32.yaml" \
         | 
| 238 | 
            -
            #--pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-ch32.zip"
         | 
| 239 | 
             
            #
         | 
| 240 | 
            -
            #sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-ko-kr-2-ch32 \
         | 
| 241 | 
             
            #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/ko-KR/wav_finished/*/*.wav" \
         | 
| 242 | 
             
            #--label_plan 2-voicemail \
         | 
| 243 | 
             
            #--config_file "yaml/conv2d-classifier-2-ch32.yaml" \
         | 
| 244 | 
            -
            #--pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-ch32.zip"
         | 
| 245 | 
             
            #
         | 
| 246 | 
            -
            #sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-ms-my-2-ch32 \
         | 
| 247 | 
             
            #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/ms-MY/wav_finished/*/*.wav" \
         | 
| 248 | 
             
            #--label_plan 2-voicemail \
         | 
| 249 | 
             
            #--config_file "yaml/conv2d-classifier-2-ch32.yaml" \
         | 
| 250 | 
            -
            #--pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-ch32.zip"
         | 
| 251 | 
             
            #
         | 
| 252 | 
            -
            #sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-pt-br-2-ch32 \
         | 
| 253 | 
             
            #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/pt-BR/wav_finished/*/*.wav" \
         | 
| 254 | 
             
            #--label_plan 2-voicemail \
         | 
| 255 | 
             
            #--config_file "yaml/conv2d-classifier-2-ch32.yaml" \
         | 
| 256 | 
            -
            #--pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-ch32.zip"
         | 
| 257 | 
             
            #
         | 
| 258 | 
            -
            #sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-th-th-2-ch32 \
         | 
| 259 | 
             
            #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/th-TH/wav_finished/*/*.wav" \
         | 
| 260 | 
             
            #--label_plan 2-voicemail \
         | 
| 261 | 
             
            #--config_file "yaml/conv2d-classifier-2-ch32.yaml" \
         | 
| 262 | 
            -
            #--pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-ch32.zip"
         | 
| 263 | 
             
            #
         | 
| 264 | 
            -
            #sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-zh-tw-2-ch32 \
         | 
| 265 | 
             
            #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/zh-TW/wav_finished/*/*.wav" \
         | 
| 266 | 
             
            #--label_plan 2-voicemail \
         | 
| 267 | 
             
            #--config_file "yaml/conv2d-classifier-2-ch32.yaml" \
         | 
| 268 | 
            -
            #--pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-ch32.zip"
         | 
|  | |
| 3 |  | 
| 4 | 
             
            # sound ch4
         | 
| 5 |  | 
| 6 | 
            +
            #sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name sound-2-ch4-cnn \
         | 
| 7 | 
             
            #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav" \
         | 
| 8 | 
             
            #--label_plan 2 \
         | 
| 9 | 
             
            #--config_file "yaml/conv2d-classifier-2-ch4.yaml"
         | 
| 10 | 
             
            #
         | 
| 11 | 
             
            #
         | 
| 12 | 
            +
            #sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name sound-3-ch4-cnn \
         | 
| 13 | 
             
            #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav" \
         | 
| 14 | 
             
            #--label_plan 3 \
         | 
| 15 | 
             
            #--config_file "yaml/conv2d-classifier-3-ch4.yaml"
         | 
| 16 | 
             
            #
         | 
| 17 | 
             
            #
         | 
| 18 | 
            +
            #sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name sound-4-ch4-cnn \
         | 
| 19 | 
             
            #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav" \
         | 
| 20 | 
             
            #--label_plan 4 \
         | 
| 21 | 
             
            #--config_file "yaml/conv2d-classifier-4-ch4.yaml"
         | 
| 22 | 
             
            #
         | 
| 23 | 
             
            #
         | 
| 24 | 
            +
            #sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name sound-8-ch4-cnn \
         | 
| 25 | 
             
            #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav" \
         | 
| 26 | 
             
            #--label_plan 8 \
         | 
| 27 | 
             
            #--config_file "yaml/conv2d-classifier-8-ch4.yaml"
         | 
|  | |
| 29 |  | 
| 30 | 
             
            # sound ch8
         | 
| 31 |  | 
| 32 | 
            +
            #sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name sound-2-ch8-cnn \
         | 
| 33 | 
             
            #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav" \
         | 
| 34 | 
             
            #--label_plan 2 \
         | 
| 35 | 
             
            #--config_file "yaml/conv2d-classifier-2-ch8.yaml"
         | 
| 36 | 
             
            #
         | 
| 37 | 
             
            #
         | 
| 38 | 
            +
            #sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name sound-3-ch8-cnn \
         | 
| 39 | 
             
            #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav" \
         | 
| 40 | 
             
            #--label_plan 3 \
         | 
| 41 | 
             
            #--config_file "yaml/conv2d-classifier-3-ch8.yaml"
         | 
| 42 | 
             
            #
         | 
| 43 | 
             
            #
         | 
| 44 | 
            +
            #sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name sound-4-ch8-cnn \
         | 
| 45 | 
             
            #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav" \
         | 
| 46 | 
             
            #--label_plan 4 \
         | 
| 47 | 
             
            #--config_file "yaml/conv2d-classifier-4-ch8.yaml"
         | 
| 48 | 
             
            #
         | 
| 49 | 
             
            #
         | 
| 50 | 
            +
            #sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name sound-8-ch8-cnn \
         | 
| 51 | 
             
            #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav" \
         | 
| 52 | 
             
            #--label_plan 8 \
         | 
| 53 | 
             
            #--config_file "yaml/conv2d-classifier-8-ch8.yaml"
         | 
|  | |
| 55 |  | 
| 56 | 
             
            # sound ch16
         | 
| 57 |  | 
| 58 | 
            +
            #sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name sound-2-ch16-cnn \
         | 
| 59 | 
             
            #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav" \
         | 
| 60 | 
             
            #--label_plan 2 \
         | 
| 61 | 
             
            #--config_file "yaml/conv2d-classifier-2-ch16.yaml"
         | 
| 62 |  | 
| 63 |  | 
| 64 | 
            +
            #sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name sound-3-ch16-cnn \
         | 
| 65 | 
             
            #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav" \
         | 
| 66 | 
             
            #--label_plan 3 \
         | 
| 67 | 
             
            #--config_file "yaml/conv2d-classifier-3-ch16.yaml"
         | 
| 68 | 
             
            #
         | 
| 69 | 
             
            #
         | 
| 70 | 
            +
            #sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name sound-4-ch16-cnn \
         | 
| 71 | 
             
            #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav" \
         | 
| 72 | 
             
            #--label_plan 4 \
         | 
| 73 | 
             
            #--config_file "yaml/conv2d-classifier-4-ch16.yaml"
         | 
| 74 | 
             
            #
         | 
| 75 | 
             
            #
         | 
| 76 | 
            +
            #sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name sound-8-ch16-cnn \
         | 
| 77 | 
             
            #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav" \
         | 
| 78 | 
             
            #--label_plan 8 \
         | 
| 79 | 
             
            #--config_file "yaml/conv2d-classifier-8-ch16.yaml"
         | 
|  | |
| 81 |  | 
| 82 | 
             
            # sound ch32
         | 
| 83 |  | 
| 84 | 
            +
            #sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name sound-2-ch32-cnn \
         | 
| 85 | 
             
            #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav" \
         | 
| 86 | 
             
            #--label_plan 2 \
         | 
| 87 | 
             
            #--config_file "yaml/conv2d-classifier-2-ch32.yaml"
         | 
| 88 | 
             
            #
         | 
| 89 | 
             
            #
         | 
| 90 | 
            +
            #sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name sound-3-ch32-cnn \
         | 
| 91 | 
             
            #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav" \
         | 
| 92 | 
             
            #--label_plan 3 \
         | 
| 93 | 
             
            #--config_file "yaml/conv2d-classifier-3-ch32.yaml"
         | 
| 94 | 
             
            #
         | 
| 95 | 
             
            #
         | 
| 96 | 
            +
            #sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name sound-4-ch32-cnn \
         | 
| 97 | 
             
            #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav" \
         | 
| 98 | 
             
            #--label_plan 4 \
         | 
| 99 | 
             
            #--config_file "yaml/conv2d-classifier-4-ch32.yaml"
         | 
| 100 |  | 
| 101 |  | 
| 102 | 
            +
            #sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name sound-8-ch32-cnn \
         | 
| 103 | 
             
            #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav" \
         | 
| 104 | 
             
            #--label_plan 8 \
         | 
| 105 | 
             
            #--config_file "yaml/conv2d-classifier-8-ch32.yaml"
         | 
|  | |
| 107 |  | 
| 108 | 
             
            # pretrained voicemail
         | 
| 109 |  | 
| 110 | 
            +
            sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-2-ch4-cnn \
         | 
| 111 | 
             
            --filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav" \
         | 
| 112 | 
             
            --label_plan 2-voicemail \
         | 
| 113 | 
             
            --config_file "yaml/conv2d-classifier-2-ch4.yaml"
         | 
| 114 |  | 
| 115 | 
            +
            sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-2-ch32-cnn \
         | 
| 116 | 
             
            --filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav" \
         | 
| 117 | 
             
            --label_plan 2-voicemail \
         | 
| 118 | 
             
            --config_file "yaml/conv2d-classifier-2-ch32.yaml"
         | 
|  | |
| 120 |  | 
| 121 | 
             
            # voicemail ch4
         | 
| 122 |  | 
| 123 | 
            +
            #sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-en-ph-2-ch4-cnn \
         | 
| 124 | 
             
            #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/en-PH/wav_finished/*/*.wav" \
         | 
| 125 | 
             
            #--label_plan 2-voicemail \
         | 
| 126 | 
             
            #--config_file "yaml/conv2d-classifier-2-ch4.yaml" \
         | 
| 127 | 
            +
            #--pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-ch4-cnn.zip"
         | 
| 128 |  | 
| 129 | 
            +
            #sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-en-sg-2-ch4-cnn \
         | 
| 130 | 
             
            #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/en-SG/wav_finished/*/*.wav" \
         | 
| 131 | 
             
            #--label_plan 2-voicemail \
         | 
| 132 | 
             
            #--config_file "yaml/conv2d-classifier-2-ch4.yaml" \
         | 
| 133 | 
            +
            #--pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-ch4-cnn.zip"
         | 
| 134 | 
             
            #
         | 
| 135 | 
            +
            #sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-en-us-2-ch4-cnn \
         | 
| 136 | 
             
            #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/en-US/wav_finished/*/*.wav" \
         | 
| 137 | 
             
            #--label_plan 2-voicemail \
         | 
| 138 | 
             
            #--config_file "yaml/conv2d-classifier-2-ch4.yaml" \
         | 
| 139 | 
            +
            #--pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-ch4-cnn.zip"
         | 
| 140 | 
             
            #
         | 
| 141 | 
            +
            #sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-es-mx-2-ch4-cnn \
         | 
| 142 | 
             
            #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/es-MX/wav_finished/*/*.wav" \
         | 
| 143 | 
             
            #--label_plan 2-voicemail \
         | 
| 144 | 
             
            #--config_file "yaml/conv2d-classifier-2-ch4.yaml" \
         | 
| 145 | 
            +
            #--pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-ch4-cnn.zip"
         | 
| 146 | 
             
            #
         | 
| 147 | 
            +
            #sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-es-pe-2-ch4-cnn \
         | 
| 148 | 
             
            #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/es-PE/wav_finished/*/*.wav" \
         | 
| 149 | 
             
            #--label_plan 2-voicemail \
         | 
| 150 | 
             
            #--config_file "yaml/conv2d-classifier-2-ch4.yaml" \
         | 
| 151 | 
            +
            #--pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-ch4-cnn.zip"
         | 
| 152 | 
             
            #
         | 
| 153 | 
            +
            sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-id-id-2-ch4-cnn \
         | 
| 154 | 
             
            --filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/id-ID/wav_finished/*/*.wav" \
         | 
| 155 | 
             
            --label_plan 2-voicemail \
         | 
| 156 | 
             
            --config_file "yaml/conv2d-classifier-2-ch4.yaml" \
         | 
| 157 | 
            +
            --pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-ch4-cnn.zip"
         | 
| 158 |  | 
| 159 | 
            +
            #sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-ja-jp-2-ch4-cnn \
         | 
| 160 | 
             
            #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/ja-JP/wav_finished/*/*.wav" \
         | 
| 161 | 
             
            #--label_plan 2-voicemail \
         | 
| 162 | 
             
            #--config_file "yaml/conv2d-classifier-2-ch4.yaml" \
         | 
| 163 | 
            +
            #--pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-ch4-cnn.zip"
         | 
| 164 | 
             
            #
         | 
| 165 | 
            +
            #sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-ko-kr-2-ch4-cnn \
         | 
| 166 | 
             
            #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/ko-KR/wav_finished/*/*.wav" \
         | 
| 167 | 
             
            #--label_plan 2-voicemail \
         | 
| 168 | 
             
            #--config_file "yaml/conv2d-classifier-2-ch4.yaml" \
         | 
| 169 | 
            +
            #--pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-ch4-cnn.zip"
         | 
| 170 | 
             
            #
         | 
| 171 | 
            +
            #sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-ms-my-2-ch4-cnn \
         | 
| 172 | 
             
            #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/ms-MY/wav_finished/*/*.wav" \
         | 
| 173 | 
             
            #--label_plan 2-voicemail \
         | 
| 174 | 
             
            #--config_file "yaml/conv2d-classifier-2-ch4.yaml" \
         | 
| 175 | 
            +
            #--pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-ch4-cnn.zip"
         | 
| 176 | 
             
            #
         | 
| 177 | 
            +
            #sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-pt-br-2-ch4-cnn \
         | 
| 178 | 
             
            #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/pt-BR/wav_finished/*/*.wav" \
         | 
| 179 | 
             
            #--label_plan 2-voicemail \
         | 
| 180 | 
             
            #--config_file "yaml/conv2d-classifier-2-ch4.yaml" \
         | 
| 181 | 
            +
            #--pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-ch4-cnn.zip"
         | 
| 182 | 
             
            #
         | 
| 183 | 
            +
            #sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-th-th-2-ch4-cnn \
         | 
| 184 | 
             
            #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/th-TH/wav_finished/*/*.wav" \
         | 
| 185 | 
             
            #--label_plan 2-voicemail \
         | 
| 186 | 
             
            #--config_file "yaml/conv2d-classifier-2-ch4.yaml" \
         | 
| 187 | 
            +
            #--pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-ch4-cnn.zip"
         | 
| 188 | 
             
            #
         | 
| 189 | 
            +
            #sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-zh-tw-2-ch4-cnn \
         | 
| 190 | 
             
            #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/zh-TW/wav_finished/*/*.wav" \
         | 
| 191 | 
             
            #--label_plan 2-voicemail \
         | 
| 192 | 
             
            #--config_file "yaml/conv2d-classifier-2-ch4.yaml" \
         | 
| 193 | 
            +
            #--pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-ch4-cnn.zip"
         | 
| 194 |  | 
| 195 |  | 
| 196 | 
             
            # voicemail ch32
         | 
| 197 |  | 
| 198 | 
            +
            #sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-en-ph-2-ch32-cnn \
         | 
| 199 | 
             
            #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/en-PH/wav_finished/*/*.wav" \
         | 
| 200 | 
             
            #--label_plan 2-voicemail \
         | 
| 201 | 
             
            #--config_file "yaml/conv2d-classifier-2-ch32.yaml" \
         | 
| 202 | 
            +
            #--pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-ch32-cnn.zip"
         | 
| 203 |  | 
| 204 | 
            +
            #sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-en-sg-2-ch32-cnn \
         | 
| 205 | 
             
            #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/en-SG/wav_finished/*/*.wav" \
         | 
| 206 | 
             
            #--label_plan 2-voicemail \
         | 
| 207 | 
             
            #--config_file "yaml/conv2d-classifier-2-ch32.yaml" \
         | 
| 208 | 
            +
            #--pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-ch32-cnn.zip"
         | 
| 209 | 
             
            #
         | 
| 210 | 
            +
            #sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-en-us-2-ch32-cnn \
         | 
| 211 | 
             
            #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/en-US/wav_finished/*/*.wav" \
         | 
| 212 | 
             
            #--label_plan 2-voicemail \
         | 
| 213 | 
             
            #--config_file "yaml/conv2d-classifier-2-ch32.yaml" \
         | 
| 214 | 
            +
            #--pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-ch32-cnn.zip"
         | 
| 215 | 
             
            #
         | 
| 216 | 
            +
            #sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-es-mx-2-ch32-cnn \
         | 
| 217 | 
             
            #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/es-MX/wav_finished/*/*.wav" \
         | 
| 218 | 
             
            #--label_plan 2-voicemail \
         | 
| 219 | 
             
            #--config_file "yaml/conv2d-classifier-2-ch32.yaml" \
         | 
| 220 | 
            +
            #--pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-ch32-cnn.zip"
         | 
| 221 | 
             
            #
         | 
| 222 | 
            +
            #sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-es-pe-2-ch32-cnn \
         | 
| 223 | 
             
            #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/es-PE/wav_finished/*/*.wav" \
         | 
| 224 | 
             
            #--label_plan 2-voicemail \
         | 
| 225 | 
             
            #--config_file "yaml/conv2d-classifier-2-ch32.yaml" \
         | 
| 226 | 
            +
            #--pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-ch32-cnn.zip"
         | 
| 227 | 
             
            #
         | 
| 228 | 
            +
            sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-id-id-2-ch32-cnn \
         | 
| 229 | 
             
            --filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/id-ID/wav_finished/*/*.wav" \
         | 
| 230 | 
             
            --label_plan 2-voicemail \
         | 
| 231 | 
             
            --config_file "yaml/conv2d-classifier-2-ch32.yaml" \
         | 
| 232 | 
            +
            --pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-ch32-cnn.zip"
         | 
| 233 |  | 
| 234 | 
            +
            #sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-ja-jp-2-ch32-cnn \
         | 
| 235 | 
             
            #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/ja-JP/wav_finished/*/*.wav" \
         | 
| 236 | 
             
            #--label_plan 2-voicemail \
         | 
| 237 | 
             
            #--config_file "yaml/conv2d-classifier-2-ch32.yaml" \
         | 
| 238 | 
            +
            #--pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-ch32-cnn.zip"
         | 
| 239 | 
             
            #
         | 
| 240 | 
            +
            #sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-ko-kr-2-ch32-cnn \
         | 
| 241 | 
             
            #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/ko-KR/wav_finished/*/*.wav" \
         | 
| 242 | 
             
            #--label_plan 2-voicemail \
         | 
| 243 | 
             
            #--config_file "yaml/conv2d-classifier-2-ch32.yaml" \
         | 
| 244 | 
            +
            #--pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-ch32-cnn.zip"
         | 
| 245 | 
             
            #
         | 
| 246 | 
            +
            #sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-ms-my-2-ch32-cnn \
         | 
| 247 | 
             
            #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/ms-MY/wav_finished/*/*.wav" \
         | 
| 248 | 
             
            #--label_plan 2-voicemail \
         | 
| 249 | 
             
            #--config_file "yaml/conv2d-classifier-2-ch32.yaml" \
         | 
| 250 | 
            +
            #--pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-ch32-cnn.zip"
         | 
| 251 | 
             
            #
         | 
| 252 | 
            +
            #sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-pt-br-2-ch32-cnn \
         | 
| 253 | 
             
            #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/pt-BR/wav_finished/*/*.wav" \
         | 
| 254 | 
             
            #--label_plan 2-voicemail \
         | 
| 255 | 
             
            #--config_file "yaml/conv2d-classifier-2-ch32.yaml" \
         | 
| 256 | 
            +
            #--pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-ch32-cnn.zip"
         | 
| 257 | 
             
            #
         | 
| 258 | 
            +
            #sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-th-th-2-ch32-cnn \
         | 
| 259 | 
             
            #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/th-TH/wav_finished/*/*.wav" \
         | 
| 260 | 
             
            #--label_plan 2-voicemail \
         | 
| 261 | 
             
            #--config_file "yaml/conv2d-classifier-2-ch32.yaml" \
         | 
| 262 | 
            +
            #--pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-ch32-cnn.zip"
         | 
| 263 | 
             
            #
         | 
| 264 | 
            +
            #sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-zh-tw-2-ch32-cnn \
         | 
| 265 | 
             
            #--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/zh-TW/wav_finished/*/*.wav" \
         | 
| 266 | 
             
            #--label_plan 2-voicemail \
         | 
| 267 | 
             
            #--config_file "yaml/conv2d-classifier-2-ch32.yaml" \
         | 
| 268 | 
            +
            #--pretrained_model "/data/tianxing/PycharmProjects/cc_audio_8/trained_models/voicemail-2-ch32-cnn.zip"
         | 
    	
        examples/{vm_sound_classification → sound_classification_by_cnn}/step_1_prepare_data.py
    RENAMED
    
    | 
            File without changes
         | 
    	
        examples/{vm_sound_classification → sound_classification_by_cnn}/step_2_make_vocabulary.py
    RENAMED
    
    | 
            File without changes
         | 
    	
        examples/{vm_sound_classification → sound_classification_by_cnn}/step_3_train_model.py
    RENAMED
    
    | @@ -50,7 +50,7 @@ def get_args(): | |
| 50 | 
             
                parser.add_argument("--config_file", default="conv2d_classifier.yaml", type=str)
         | 
| 51 | 
             
                parser.add_argument(
         | 
| 52 | 
             
                    "--pretrained_model",
         | 
| 53 | 
            -
                    # default=(project_path / "trained_models/voicemail-en-sg-2-ch4.zip").as_posix(),
         | 
| 54 | 
             
                    default="null",
         | 
| 55 | 
             
                    type=str
         | 
| 56 | 
             
                )
         | 
|  | |
| 50 | 
             
                parser.add_argument("--config_file", default="conv2d_classifier.yaml", type=str)
         | 
| 51 | 
             
                parser.add_argument(
         | 
| 52 | 
             
                    "--pretrained_model",
         | 
| 53 | 
            +
                    # default=(project_path / "trained_models/voicemail-en-sg-2-ch4-cnn.zip").as_posix(),
         | 
| 54 | 
             
                    default="null",
         | 
| 55 | 
             
                    type=str
         | 
| 56 | 
             
                )
         | 
    	
        examples/{vm_sound_classification → sound_classification_by_cnn}/step_4_evaluation_model.py
    RENAMED
    
    | 
            File without changes
         | 
    	
        examples/{vm_sound_classification → sound_classification_by_cnn}/step_5_export_models.py
    RENAMED
    
    | 
            File without changes
         | 
    	
        examples/{vm_sound_classification → sound_classification_by_cnn}/step_6_infer.py
    RENAMED
    
    | 
            File without changes
         | 
    	
        examples/{vm_sound_classification → sound_classification_by_cnn}/step_7_test_model.py
    RENAMED
    
    | 
            File without changes
         | 
    	
        examples/{vm_sound_classification → sound_classification_by_cnn}/stop.sh
    RENAMED
    
    | 
            File without changes
         | 
    	
        examples/{vm_sound_classification → sound_classification_by_cnn}/yaml/conv2d-classifier-2-ch16.yaml
    RENAMED
    
    | 
            File without changes
         | 
    	
        examples/{vm_sound_classification → sound_classification_by_cnn}/yaml/conv2d-classifier-2-ch32.yaml
    RENAMED
    
    | 
            File without changes
         | 
    	
        examples/{vm_sound_classification → sound_classification_by_cnn}/yaml/conv2d-classifier-2-ch4.yaml
    RENAMED
    
    | 
            File without changes
         | 
    	
        examples/{vm_sound_classification → sound_classification_by_cnn}/yaml/conv2d-classifier-2-ch8.yaml
    RENAMED
    
    | 
            File without changes
         | 
    	
        examples/{vm_sound_classification → sound_classification_by_cnn}/yaml/conv2d-classifier-3-ch16.yaml
    RENAMED
    
    | 
            File without changes
         | 
    	
        examples/{vm_sound_classification → sound_classification_by_cnn}/yaml/conv2d-classifier-3-ch32.yaml
    RENAMED
    
    | 
            File without changes
         | 
    	
        examples/{vm_sound_classification → sound_classification_by_cnn}/yaml/conv2d-classifier-3-ch4.yaml
    RENAMED
    
    | 
            File without changes
         | 
    	
        examples/{vm_sound_classification → sound_classification_by_cnn}/yaml/conv2d-classifier-3-ch8.yaml
    RENAMED
    
    | 
            File without changes
         | 
    	
        examples/{vm_sound_classification → sound_classification_by_cnn}/yaml/conv2d-classifier-4-ch16.yaml
    RENAMED
    
    | 
            File without changes
         | 
    	
        examples/{vm_sound_classification → sound_classification_by_cnn}/yaml/conv2d-classifier-4-ch32.yaml
    RENAMED
    
    | 
            File without changes
         | 
    	
        examples/{vm_sound_classification → sound_classification_by_cnn}/yaml/conv2d-classifier-4-ch4.yaml
    RENAMED
    
    | 
            File without changes
         | 
    	
        examples/{vm_sound_classification → sound_classification_by_cnn}/yaml/conv2d-classifier-4-ch8.yaml
    RENAMED
    
    | 
            File without changes
         | 
    	
        examples/{vm_sound_classification → sound_classification_by_cnn}/yaml/conv2d-classifier-8-ch16.yaml
    RENAMED
    
    | 
            File without changes
         | 
    	
        examples/{vm_sound_classification → sound_classification_by_cnn}/yaml/conv2d-classifier-8-ch32.yaml
    RENAMED
    
    | 
            File without changes
         | 
    	
        examples/{vm_sound_classification → sound_classification_by_cnn}/yaml/conv2d-classifier-8-ch4.yaml
    RENAMED
    
    | 
            File without changes
         | 
    	
        examples/{vm_sound_classification → sound_classification_by_cnn}/yaml/conv2d-classifier-8-ch8.yaml
    RENAMED
    
    | 
            File without changes
         | 
    	
        examples/{vm_sound_classification8 → sound_classification_by_cnn_union}/requirements.txt
    RENAMED
    
    | 
            File without changes
         | 
    	
        examples/{vm_sound_classification8 → sound_classification_by_cnn_union}/run.sh
    RENAMED
    
    | 
            File without changes
         | 
    	
        examples/{vm_sound_classification8 → sound_classification_by_cnn_union}/step_1_prepare_data.py
    RENAMED
    
    | 
            File without changes
         | 
    	
        examples/{vm_sound_classification8 → sound_classification_by_cnn_union}/step_2_make_vocabulary.py
    RENAMED
    
    | 
            File without changes
         | 
    	
        examples/{vm_sound_classification8 → sound_classification_by_cnn_union}/step_3_train_global_model.py
    RENAMED
    
    | 
            File without changes
         | 
    	
        examples/{vm_sound_classification8 → sound_classification_by_cnn_union}/step_4_train_country_model.py
    RENAMED
    
    | 
            File without changes
         | 
    	
        examples/{vm_sound_classification8 → sound_classification_by_cnn_union}/step_5_train_union.py
    RENAMED
    
    | 
            File without changes
         | 
    	
        examples/{vm_sound_classification8 → sound_classification_by_cnn_union}/stop.sh
    RENAMED
    
    | 
            File without changes
         | 
    	
        examples/sound_classification_by_lstm/run.sh
    ADDED
    
    | @@ -0,0 +1,197 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            #!/usr/bin/env bash
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            : <<'END'
         | 
| 4 | 
            +
             | 
| 5 | 
            +
            sh run.sh --stage 0 --stop_stage 1 --system_version windows --file_folder_name file_dir --final_model_name sound-4-ch32-lstm \
         | 
| 6 | 
            +
            --filename_patterns "E:/Users/tianx/HuggingDatasets/cc_audio_8/data/wav_finished/wav_finished/en-US/wav_finished/*/*.wav \
         | 
| 7 | 
            +
            E:/Users/tianx/HuggingDatasets/cc_audio_8/data/wav_finished/id-ID/wav_finished/*/*.wav" \
         | 
| 8 | 
            +
            --label_plan 4
         | 
| 9 | 
            +
             | 
| 10 | 
            +
            sh run.sh --stage 2 --stop_stage 2 --system_version windows --file_folder_name file_dir --final_model_name sound-2-ch32-lstm \
         | 
| 11 | 
            +
            --filename_patterns "E:/Users/tianx/HuggingDatasets/cc_audio_8/data/wav_finished/wav_finished/en-US/wav_finished/*/*.wav \
         | 
| 12 | 
            +
            E:/Users/tianx/HuggingDatasets/cc_audio_8/data/wav_finished/id-ID/wav_finished/*/*.wav" \
         | 
| 13 | 
            +
            --label_plan 4
         | 
| 14 | 
            +
             | 
| 15 | 
            +
            sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name sound-3-ch32-lstm \
         | 
| 16 | 
            +
            --filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav" \
         | 
| 17 | 
            +
            --label_plan 3 \
         | 
| 18 | 
            +
            --config_file "yaml/lstm_classifier-3-ch64.yaml"
         | 
| 19 | 
            +
             | 
| 20 | 
            +
            sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-ms-my-2-ch32-lstm \
         | 
| 21 | 
            +
            --filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/ms-MY/wav_finished/*/*.wav" \
         | 
| 22 | 
            +
            --label_plan 2-voicemail \
         | 
| 23 | 
            +
            --config_file "yaml/lstm_classifier-2-ch64.yaml"
         | 
| 24 | 
            +
             | 
| 25 | 
            +
            END
         | 
| 26 | 
            +
             | 
| 27 | 
            +
             | 
| 28 | 
            +
            # params
         | 
| 29 | 
            +
            system_version="windows";
         | 
| 30 | 
            +
            verbose=true;
         | 
| 31 | 
            +
            stage=0 # start from 0 if you need to start from data preparation
         | 
| 32 | 
            +
            stop_stage=9
         | 
| 33 | 
            +
             | 
| 34 | 
            +
            work_dir="$(pwd)"
         | 
| 35 | 
            +
            file_folder_name=file_folder_name
         | 
| 36 | 
            +
            final_model_name=final_model_name
         | 
| 37 | 
            +
            filename_patterns="/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav"
         | 
| 38 | 
            +
            label_plan=4
         | 
| 39 | 
            +
            config_file="yaml/lstm_classifier-4-ch64.yaml"
         | 
| 40 | 
            +
            pretrained_model=null
         | 
| 41 | 
            +
            nohup_name=nohup.out
         | 
| 42 | 
            +
             | 
| 43 | 
            +
            country=en-US
         | 
| 44 | 
            +
             | 
| 45 | 
            +
            # model params
         | 
| 46 | 
            +
            batch_size=64
         | 
| 47 | 
            +
            max_epochs=200
         | 
| 48 | 
            +
            save_top_k=10
         | 
| 49 | 
            +
            patience=5
         | 
| 50 | 
            +
             | 
| 51 | 
            +
             | 
| 52 | 
            +
            # parse options
         | 
| 53 | 
            +
            while true; do
         | 
| 54 | 
            +
              [ -z "${1:-}" ] && break;  # break if there are no arguments
         | 
| 55 | 
            +
              case "$1" in
         | 
| 56 | 
            +
                --*) name=$(echo "$1" | sed s/^--// | sed s/-/_/g);
         | 
| 57 | 
            +
                  eval '[ -z "${'"$name"'+xxx}" ]' && echo "$0: invalid option $1" 1>&2 && exit 1;
         | 
| 58 | 
            +
                  old_value="(eval echo \\$$name)";
         | 
| 59 | 
            +
                  if [ "${old_value}" == "true" ] || [ "${old_value}" == "false" ]; then
         | 
| 60 | 
            +
                    was_bool=true;
         | 
| 61 | 
            +
                  else
         | 
| 62 | 
            +
                    was_bool=false;
         | 
| 63 | 
            +
                  fi
         | 
| 64 | 
            +
             | 
| 65 | 
            +
                  # Set the variable to the right value-- the escaped quotes make it work if
         | 
| 66 | 
            +
                  # the option had spaces, like --cmd "queue.pl -sync y"
         | 
| 67 | 
            +
                  eval "${name}=\"$2\"";
         | 
| 68 | 
            +
             | 
| 69 | 
            +
                  # Check that Boolean-valued arguments are really Boolean.
         | 
| 70 | 
            +
                  if $was_bool && [[ "$2" != "true" && "$2" != "false" ]]; then
         | 
| 71 | 
            +
                    echo "$0: expected \"true\" or \"false\": $1 $2" 1>&2
         | 
| 72 | 
            +
                    exit 1;
         | 
| 73 | 
            +
                  fi
         | 
| 74 | 
            +
                  shift 2;
         | 
| 75 | 
            +
                  ;;
         | 
| 76 | 
            +
             | 
| 77 | 
            +
                *) break;
         | 
| 78 | 
            +
              esac
         | 
| 79 | 
            +
            done
         | 
| 80 | 
            +
             | 
| 81 | 
            +
            file_dir="${work_dir}/${file_folder_name}"
         | 
| 82 | 
            +
            final_model_dir="${work_dir}/../../trained_models/${final_model_name}";
         | 
| 83 | 
            +
             | 
| 84 | 
            +
            dataset="${file_dir}/dataset.xlsx"
         | 
| 85 | 
            +
            train_dataset="${file_dir}/train.xlsx"
         | 
| 86 | 
            +
            valid_dataset="${file_dir}/valid.xlsx"
         | 
| 87 | 
            +
            evaluation_file="${file_dir}/evaluation.xlsx"
         | 
| 88 | 
            +
            vocabulary_dir="${file_dir}/vocabulary"
         | 
| 89 | 
            +
             | 
| 90 | 
            +
            $verbose && echo "system_version: ${system_version}"
         | 
| 91 | 
            +
            $verbose && echo "file_folder_name: ${file_folder_name}"
         | 
| 92 | 
            +
             | 
| 93 | 
            +
            if [ $system_version == "windows" ]; then
         | 
| 94 | 
            +
              alias python3='D:/Users/tianx/PycharmProjects/virtualenv/cc_audio_8/Scripts/python.exe'
         | 
| 95 | 
            +
            elif [ $system_version == "centos" ] || [ $system_version == "ubuntu" ]; then
         | 
| 96 | 
            +
              #source /data/local/bin/cc_audio_8/bin/activate
         | 
| 97 | 
            +
              alias python3='/data/local/bin/cc_audio_8/bin/python3'
         | 
| 98 | 
            +
            fi
         | 
| 99 | 
            +
             | 
| 100 | 
            +
             | 
| 101 | 
            +
            if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
         | 
| 102 | 
            +
              $verbose && echo "stage 0: prepare data"
         | 
| 103 | 
            +
              cd "${work_dir}" || exit 1
         | 
| 104 | 
            +
              python3 step_1_prepare_data.py \
         | 
| 105 | 
            +
              --file_dir "${file_dir}" \
         | 
| 106 | 
            +
              --filename_patterns "${filename_patterns}" \
         | 
| 107 | 
            +
              --train_dataset "${train_dataset}" \
         | 
| 108 | 
            +
              --valid_dataset "${valid_dataset}" \
         | 
| 109 | 
            +
              --label_plan "${label_plan}" \
         | 
| 110 | 
            +
             | 
| 111 | 
            +
            fi
         | 
| 112 | 
            +
             | 
| 113 | 
            +
             | 
| 114 | 
            +
            if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
         | 
| 115 | 
            +
              $verbose && echo "stage 1: make vocabulary"
         | 
| 116 | 
            +
              cd "${work_dir}" || exit 1
         | 
| 117 | 
            +
              python3 step_2_make_vocabulary.py \
         | 
| 118 | 
            +
              --vocabulary_dir "${vocabulary_dir}" \
         | 
| 119 | 
            +
              --train_dataset "${train_dataset}" \
         | 
| 120 | 
            +
              --valid_dataset "${valid_dataset}" \
         | 
| 121 | 
            +
             | 
| 122 | 
            +
            fi
         | 
| 123 | 
            +
             | 
| 124 | 
            +
             | 
| 125 | 
            +
            if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
         | 
| 126 | 
            +
              $verbose && echo "stage 2: train model"
         | 
| 127 | 
            +
              cd "${work_dir}" || exit 1
         | 
| 128 | 
            +
              python3 step_3_train_model.py \
         | 
| 129 | 
            +
              --vocabulary_dir "${vocabulary_dir}" \
         | 
| 130 | 
            +
              --train_dataset "${train_dataset}" \
         | 
| 131 | 
            +
              --valid_dataset "${valid_dataset}" \
         | 
| 132 | 
            +
              --serialization_dir "${file_dir}" \
         | 
| 133 | 
            +
              --config_file "${config_file}" \
         | 
| 134 | 
            +
              --pretrained_model "${pretrained_model}" \
         | 
| 135 | 
            +
             | 
| 136 | 
            +
            fi
         | 
| 137 | 
            +
             | 
| 138 | 
            +
             | 
| 139 | 
            +
            if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
         | 
| 140 | 
            +
              $verbose && echo "stage 3: test model"
         | 
| 141 | 
            +
              cd "${work_dir}" || exit 1
         | 
| 142 | 
            +
              python3 step_4_evaluation_model.py \
         | 
| 143 | 
            +
              --dataset "${dataset}" \
         | 
| 144 | 
            +
              --vocabulary_dir "${vocabulary_dir}" \
         | 
| 145 | 
            +
              --model_dir "${file_dir}/best" \
         | 
| 146 | 
            +
              --output_file "${evaluation_file}" \
         | 
| 147 | 
            +
             | 
| 148 | 
            +
            fi
         | 
| 149 | 
            +
             | 
| 150 | 
            +
             | 
| 151 | 
            +
            if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
         | 
| 152 | 
            +
              $verbose && echo "stage 4: export model"
         | 
| 153 | 
            +
              cd "${work_dir}" || exit 1
         | 
| 154 | 
            +
              python3 step_5_export_models.py \
         | 
| 155 | 
            +
              --vocabulary_dir "${vocabulary_dir}" \
         | 
| 156 | 
            +
              --model_dir "${file_dir}/best" \
         | 
| 157 | 
            +
              --serialization_dir "${file_dir}" \
         | 
| 158 | 
            +
             | 
| 159 | 
            +
            fi
         | 
| 160 | 
            +
             | 
| 161 | 
            +
             | 
| 162 | 
            +
            if [ ${stage} -le 5 ] && [ ${stop_stage} -ge 5 ]; then
         | 
| 163 | 
            +
              $verbose && echo "stage 5: collect files"
         | 
| 164 | 
            +
              cd "${work_dir}" || exit 1
         | 
| 165 | 
            +
             | 
| 166 | 
            +
              mkdir -p ${final_model_dir}
         | 
| 167 | 
            +
             | 
| 168 | 
            +
              cp "${file_dir}/best"/* "${final_model_dir}"
         | 
| 169 | 
            +
              cp -r "${file_dir}/vocabulary" "${final_model_dir}"
         | 
| 170 | 
            +
             | 
| 171 | 
            +
              cp "${file_dir}/evaluation.xlsx" "${final_model_dir}/evaluation.xlsx"
         | 
| 172 | 
            +
             | 
| 173 | 
            +
              cp "${file_dir}/trace_model.zip" "${final_model_dir}/trace_model.zip"
         | 
| 174 | 
            +
              cp "${file_dir}/trace_quant_model.zip" "${final_model_dir}/trace_quant_model.zip"
         | 
| 175 | 
            +
              cp "${file_dir}/script_model.zip" "${final_model_dir}/script_model.zip"
         | 
| 176 | 
            +
              cp "${file_dir}/script_quant_model.zip" "${final_model_dir}/script_quant_model.zip"
         | 
| 177 | 
            +
             | 
| 178 | 
            +
              cd "${final_model_dir}/.." || exit 1;
         | 
| 179 | 
            +
             | 
| 180 | 
            +
              if [ -e "${final_model_name}.zip" ]; then
         | 
| 181 | 
            +
                rm -rf "${final_model_name}_backup.zip"
         | 
| 182 | 
            +
                mv "${final_model_name}.zip" "${final_model_name}_backup.zip"
         | 
| 183 | 
            +
              fi
         | 
| 184 | 
            +
             | 
| 185 | 
            +
              zip -r "${final_model_name}.zip" "${final_model_name}"
         | 
| 186 | 
            +
              rm -rf "${final_model_name}"
         | 
| 187 | 
            +
             | 
| 188 | 
            +
            fi
         | 
| 189 | 
            +
             | 
| 190 | 
            +
             | 
| 191 | 
            +
            if [ ${stage} -le 6 ] && [ ${stop_stage} -ge 6 ]; then
         | 
| 192 | 
            +
              $verbose && echo "stage 6: clear file_dir"
         | 
| 193 | 
            +
              cd "${work_dir}" || exit 1
         | 
| 194 | 
            +
             | 
| 195 | 
            +
              rm -rf "${file_dir}";
         | 
| 196 | 
            +
             | 
| 197 | 
            +
            fi
         | 
    	
        examples/sound_classification_by_lstm/step_1_prepare_data.py
    ADDED
    
    | @@ -0,0 +1,193 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            #!/usr/bin/python3
         | 
| 2 | 
            +
            # -*- coding: utf-8 -*-
         | 
| 3 | 
            +
            import argparse
         | 
| 4 | 
            +
            from glob import glob
         | 
| 5 | 
            +
            import os
         | 
| 6 | 
            +
            from pathlib import Path
         | 
| 7 | 
            +
            import random
         | 
| 8 | 
            +
            import sys
         | 
| 9 | 
            +
             | 
| 10 | 
            +
            pwd = os.path.abspath(os.path.dirname(__file__))
         | 
| 11 | 
            +
            sys.path.append(os.path.join(pwd, "../../"))
         | 
| 12 | 
            +
             | 
| 13 | 
            +
            import pandas as pd
         | 
| 14 | 
            +
            from scipy.io import wavfile
         | 
| 15 | 
            +
            from tqdm import tqdm
         | 
| 16 | 
            +
             | 
| 17 | 
            +
             | 
| 18 | 
            +
            def get_args():
         | 
| 19 | 
            +
                parser = argparse.ArgumentParser()
         | 
| 20 | 
            +
                parser.add_argument("--file_dir", default="./", type=str)
         | 
| 21 | 
            +
                parser.add_argument("--filename_patterns", type=str)
         | 
| 22 | 
            +
             | 
| 23 | 
            +
                parser.add_argument("--train_dataset", default="train.xlsx", type=str)
         | 
| 24 | 
            +
                parser.add_argument("--valid_dataset", default="valid.xlsx", type=str)
         | 
| 25 | 
            +
             | 
| 26 | 
            +
                parser.add_argument("--label_plan", default="4", type=str)
         | 
| 27 | 
            +
             | 
| 28 | 
            +
                args = parser.parse_args()
         | 
| 29 | 
            +
                return args
         | 
| 30 | 
            +
             | 
| 31 | 
            +
             | 
| 32 | 
            +
            def get_dataset(args):
         | 
| 33 | 
            +
                filename_patterns = args.filename_patterns
         | 
| 34 | 
            +
                filename_patterns = filename_patterns.split(" ")
         | 
| 35 | 
            +
                print(filename_patterns)
         | 
| 36 | 
            +
             | 
| 37 | 
            +
                file_dir = Path(args.file_dir)
         | 
| 38 | 
            +
                file_dir.mkdir(exist_ok=True)
         | 
| 39 | 
            +
             | 
| 40 | 
            +
                if args.label_plan == "2-voicemail":
         | 
| 41 | 
            +
                    label_map = {
         | 
| 42 | 
            +
                        "bell": "voicemail",
         | 
| 43 | 
            +
                        "white_noise": "non_voicemail",
         | 
| 44 | 
            +
                        "low_white_noise": "non_voicemail",
         | 
| 45 | 
            +
                        "high_white_noise": "non_voicemail",
         | 
| 46 | 
            +
                        # "music": "non_voicemail",
         | 
| 47 | 
            +
                        "mute": "non_voicemail",
         | 
| 48 | 
            +
                        "noise": "non_voicemail",
         | 
| 49 | 
            +
                        "noise_mute": "non_voicemail",
         | 
| 50 | 
            +
                        "voice": "non_voicemail",
         | 
| 51 | 
            +
                        "voicemail": "voicemail",
         | 
| 52 | 
            +
                    }
         | 
| 53 | 
            +
                elif args.label_plan == "2":
         | 
| 54 | 
            +
                    label_map = {
         | 
| 55 | 
            +
                        "bell": "non_voice",
         | 
| 56 | 
            +
                        "white_noise": "non_voice",
         | 
| 57 | 
            +
                        "low_white_noise": "non_voice",
         | 
| 58 | 
            +
                        "high_white_noise": "non_voice",
         | 
| 59 | 
            +
                        "music": "non_voice",
         | 
| 60 | 
            +
                        "mute": "non_voice",
         | 
| 61 | 
            +
                        "noise": "non_voice",
         | 
| 62 | 
            +
                        "noise_mute": "non_voice",
         | 
| 63 | 
            +
                        "voice": "voice",
         | 
| 64 | 
            +
                        "voicemail": "voice",
         | 
| 65 | 
            +
                    }
         | 
| 66 | 
            +
                elif args.label_plan == "3":
         | 
| 67 | 
            +
                    label_map = {
         | 
| 68 | 
            +
                        "bell": "voicemail",
         | 
| 69 | 
            +
                        "white_noise": "mute",
         | 
| 70 | 
            +
                        "low_white_noise": "mute",
         | 
| 71 | 
            +
                        "high_white_noise": "mute",
         | 
| 72 | 
            +
                        # "music": "music",
         | 
| 73 | 
            +
                        "mute": "mute",
         | 
| 74 | 
            +
                        "noise": "voice_or_noise",
         | 
| 75 | 
            +
                        "noise_mute": "voice_or_noise",
         | 
| 76 | 
            +
                        "voice": "voice_or_noise",
         | 
| 77 | 
            +
                        "voicemail": "voicemail",
         | 
| 78 | 
            +
                    }
         | 
| 79 | 
            +
                elif args.label_plan == "4":
         | 
| 80 | 
            +
                    label_map = {
         | 
| 81 | 
            +
                        "bell": "voicemail",
         | 
| 82 | 
            +
                        "white_noise": "mute",
         | 
| 83 | 
            +
                        "low_white_noise": "mute",
         | 
| 84 | 
            +
                        "high_white_noise": "mute",
         | 
| 85 | 
            +
                        # "music": "music",
         | 
| 86 | 
            +
                        "mute": "mute",
         | 
| 87 | 
            +
                        "noise": "noise",
         | 
| 88 | 
            +
                        "noise_mute": "noise",
         | 
| 89 | 
            +
                        "voice": "voice",
         | 
| 90 | 
            +
                        "voicemail": "voicemail",
         | 
| 91 | 
            +
                    }
         | 
| 92 | 
            +
                elif args.label_plan == "8":
         | 
| 93 | 
            +
                    label_map = {
         | 
| 94 | 
            +
                        "bell": "bell",
         | 
| 95 | 
            +
                        "white_noise": "white_noise",
         | 
| 96 | 
            +
                        "low_white_noise": "white_noise",
         | 
| 97 | 
            +
                        "high_white_noise": "white_noise",
         | 
| 98 | 
            +
                        "music": "music",
         | 
| 99 | 
            +
                        "mute": "mute",
         | 
| 100 | 
            +
                        "noise": "noise",
         | 
| 101 | 
            +
                        "noise_mute": "noise_mute",
         | 
| 102 | 
            +
                        "voice": "voice",
         | 
| 103 | 
            +
                        "voicemail": "voicemail",
         | 
| 104 | 
            +
                    }
         | 
| 105 | 
            +
                else:
         | 
| 106 | 
            +
                    raise AssertionError
         | 
| 107 | 
            +
             | 
| 108 | 
            +
                result = list()
         | 
| 109 | 
            +
                for filename_pattern in filename_patterns:
         | 
| 110 | 
            +
                    filename_list = glob(filename_pattern)
         | 
| 111 | 
            +
                    for filename in tqdm(filename_list):
         | 
| 112 | 
            +
                        filename = Path(filename)
         | 
| 113 | 
            +
                        sample_rate, signal = wavfile.read(filename.as_posix())
         | 
| 114 | 
            +
                        if len(signal) < sample_rate * 2:
         | 
| 115 | 
            +
                            continue
         | 
| 116 | 
            +
             | 
| 117 | 
            +
                        folder = filename.parts[-2]
         | 
| 118 | 
            +
                        country = filename.parts[-4]
         | 
| 119 | 
            +
             | 
| 120 | 
            +
                        if folder not in label_map.keys():
         | 
| 121 | 
            +
                            continue
         | 
| 122 | 
            +
             | 
| 123 | 
            +
                        labels = label_map[folder]
         | 
| 124 | 
            +
             | 
| 125 | 
            +
                        random1 = random.random()
         | 
| 126 | 
            +
                        random2 = random.random()
         | 
| 127 | 
            +
             | 
| 128 | 
            +
                        result.append({
         | 
| 129 | 
            +
                            "filename": filename,
         | 
| 130 | 
            +
                            "folder": folder,
         | 
| 131 | 
            +
                            "category": country,
         | 
| 132 | 
            +
                            "labels": labels,
         | 
| 133 | 
            +
                            "random1": random1,
         | 
| 134 | 
            +
                            "random2": random2,
         | 
| 135 | 
            +
                            "flag": "TRAIN" if random2 < 0.8 else "TEST",
         | 
| 136 | 
            +
                        })
         | 
| 137 | 
            +
             | 
| 138 | 
            +
                df = pd.DataFrame(result)
         | 
| 139 | 
            +
                pivot_table = pd.pivot_table(df, index=["labels"], values=["filename"], aggfunc="count")
         | 
| 140 | 
            +
                print(pivot_table)
         | 
| 141 | 
            +
             | 
| 142 | 
            +
                df = df.sort_values(by=["random1"], ascending=False)
         | 
| 143 | 
            +
                df.to_excel(
         | 
| 144 | 
            +
                    file_dir / "dataset.xlsx",
         | 
| 145 | 
            +
                    index=False,
         | 
| 146 | 
            +
                    # encoding="utf_8_sig"
         | 
| 147 | 
            +
                )
         | 
| 148 | 
            +
             | 
| 149 | 
            +
                return
         | 
| 150 | 
            +
             | 
| 151 | 
            +
             | 
| 152 | 
            +
            def split_dataset(args):
         | 
| 153 | 
            +
                """分割训练集, 测试集"""
         | 
| 154 | 
            +
                file_dir = Path(args.file_dir)
         | 
| 155 | 
            +
                file_dir.mkdir(exist_ok=True)
         | 
| 156 | 
            +
             | 
| 157 | 
            +
                df = pd.read_excel(file_dir / "dataset.xlsx")
         | 
| 158 | 
            +
             | 
| 159 | 
            +
                train = list()
         | 
| 160 | 
            +
                test = list()
         | 
| 161 | 
            +
             | 
| 162 | 
            +
                for i, row in df.iterrows():
         | 
| 163 | 
            +
                    flag = row["flag"]
         | 
| 164 | 
            +
                    if flag == "TRAIN":
         | 
| 165 | 
            +
                        train.append(row)
         | 
| 166 | 
            +
                    else:
         | 
| 167 | 
            +
                        test.append(row)
         | 
| 168 | 
            +
             | 
| 169 | 
            +
                train = pd.DataFrame(train)
         | 
| 170 | 
            +
                train.to_excel(
         | 
| 171 | 
            +
                    args.train_dataset,
         | 
| 172 | 
            +
                    index=False,
         | 
| 173 | 
            +
                    # encoding="utf_8_sig"
         | 
| 174 | 
            +
                )
         | 
| 175 | 
            +
                test = pd.DataFrame(test)
         | 
| 176 | 
            +
                test.to_excel(
         | 
| 177 | 
            +
                    args.valid_dataset,
         | 
| 178 | 
            +
                    index=False,
         | 
| 179 | 
            +
                    # encoding="utf_8_sig"
         | 
| 180 | 
            +
                )
         | 
| 181 | 
            +
             | 
| 182 | 
            +
                return
         | 
| 183 | 
            +
             | 
| 184 | 
            +
             | 
| 185 | 
            +
            def main():
         | 
| 186 | 
            +
                args = get_args()
         | 
| 187 | 
            +
                get_dataset(args)
         | 
| 188 | 
            +
                split_dataset(args)
         | 
| 189 | 
            +
                return
         | 
| 190 | 
            +
             | 
| 191 | 
            +
             | 
| 192 | 
            +
            if __name__ == "__main__":
         | 
| 193 | 
            +
                main()
         | 
    	
        examples/sound_classification_by_lstm/step_2_make_vocabulary.py
    ADDED
    
    | @@ -0,0 +1,50 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            #!/usr/bin/python3
         | 
| 2 | 
            +
            # -*- coding: utf-8 -*-
         | 
| 3 | 
            +
            import argparse
         | 
| 4 | 
            +
            import os
         | 
| 5 | 
            +
            import sys
         | 
| 6 | 
            +
             | 
| 7 | 
            +
            pwd = os.path.abspath(os.path.dirname(__file__))
         | 
| 8 | 
            +
            sys.path.append(os.path.join(pwd, "../../"))
         | 
| 9 | 
            +
             | 
| 10 | 
            +
            import pandas as pd
         | 
| 11 | 
            +
             | 
| 12 | 
            +
            from toolbox.torch.utils.data.vocabulary import Vocabulary
         | 
| 13 | 
            +
             | 
| 14 | 
            +
             | 
| 15 | 
            +
            def get_args():
         | 
| 16 | 
            +
                parser = argparse.ArgumentParser()
         | 
| 17 | 
            +
                parser.add_argument("--vocabulary_dir", default="vocabulary", type=str)
         | 
| 18 | 
            +
             | 
| 19 | 
            +
                parser.add_argument("--train_dataset", default="train.xlsx", type=str)
         | 
| 20 | 
            +
                parser.add_argument("--valid_dataset", default="valid.xlsx", type=str)
         | 
| 21 | 
            +
             | 
| 22 | 
            +
                args = parser.parse_args()
         | 
| 23 | 
            +
                return args
         | 
| 24 | 
            +
             | 
| 25 | 
            +
             | 
| 26 | 
            +
            def main():
         | 
| 27 | 
            +
                args = get_args()
         | 
| 28 | 
            +
             | 
| 29 | 
            +
                train_dataset = pd.read_excel(args.train_dataset)
         | 
| 30 | 
            +
                valid_dataset = pd.read_excel(args.valid_dataset)
         | 
| 31 | 
            +
             | 
| 32 | 
            +
                vocabulary = Vocabulary()
         | 
| 33 | 
            +
             | 
| 34 | 
            +
                # train
         | 
| 35 | 
            +
                for i, row in train_dataset.iterrows():
         | 
| 36 | 
            +
                    label = row["labels"]
         | 
| 37 | 
            +
                    vocabulary.add_token_to_namespace(label, namespace="labels")
         | 
| 38 | 
            +
             | 
| 39 | 
            +
                # valid
         | 
| 40 | 
            +
                for i, row in valid_dataset.iterrows():
         | 
| 41 | 
            +
                    label = row["labels"]
         | 
| 42 | 
            +
                    vocabulary.add_token_to_namespace(label, namespace="labels")
         | 
| 43 | 
            +
             | 
| 44 | 
            +
                vocabulary.save_to_files(args.vocabulary_dir)
         | 
| 45 | 
            +
             | 
| 46 | 
            +
                return
         | 
| 47 | 
            +
             | 
| 48 | 
            +
             | 
| 49 | 
            +
            if __name__ == "__main__":
         | 
| 50 | 
            +
                main()
         | 
    	
        examples/sound_classification_by_lstm/step_3_train_model.py
    ADDED
    
    | @@ -0,0 +1,367 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            #!/usr/bin/python3
         | 
| 2 | 
            +
            # -*- coding: utf-8 -*-
         | 
| 3 | 
            +
            import argparse
         | 
| 4 | 
            +
            from collections import defaultdict
         | 
| 5 | 
            +
            import json
         | 
| 6 | 
            +
            import logging
         | 
| 7 | 
            +
            from logging.handlers import TimedRotatingFileHandler
         | 
| 8 | 
            +
            import os
         | 
| 9 | 
            +
            import platform
         | 
| 10 | 
            +
            from pathlib import Path
         | 
| 11 | 
            +
            import random
         | 
| 12 | 
            +
            import sys
         | 
| 13 | 
            +
            import shutil
         | 
| 14 | 
            +
            import tempfile
         | 
| 15 | 
            +
            from typing import List
         | 
| 16 | 
            +
            import zipfile
         | 
| 17 | 
            +
             | 
| 18 | 
            +
            pwd = os.path.abspath(os.path.dirname(__file__))
         | 
| 19 | 
            +
            sys.path.append(os.path.join(pwd, "../../"))
         | 
| 20 | 
            +
             | 
| 21 | 
            +
            import numpy as np
         | 
| 22 | 
            +
            import torch
         | 
| 23 | 
            +
            from torch.utils.data.dataloader import DataLoader
         | 
| 24 | 
            +
            from tqdm import tqdm
         | 
| 25 | 
            +
             | 
| 26 | 
            +
            from toolbox.torch.modules.loss import FocalLoss, HingeLoss, HingeLinear
         | 
| 27 | 
            +
            from toolbox.torch.training.metrics.categorical_accuracy import CategoricalAccuracy
         | 
| 28 | 
            +
            from toolbox.torch.utils.data.vocabulary import Vocabulary
         | 
| 29 | 
            +
            from toolbox.torch.utils.data.dataset.wave_classifier_excel_dataset import WaveClassifierExcelDataset
         | 
| 30 | 
            +
            from toolbox.torchaudio.models.lstm_audio_classifier.modeling_lstm_audio_classifier import LSTMClassifierPretrainedModel
         | 
| 31 | 
            +
            from toolbox.torchaudio.models.lstm_audio_classifier.configuration_lstm_audio_classifier import LSTMClassifierConfig
         | 
| 32 | 
            +
             | 
| 33 | 
            +
             | 
| 34 | 
            +
            def get_args():
         | 
| 35 | 
            +
                parser = argparse.ArgumentParser()
         | 
| 36 | 
            +
                parser.add_argument("--vocabulary_dir", default="vocabulary", type=str)
         | 
| 37 | 
            +
             | 
| 38 | 
            +
                parser.add_argument("--train_dataset", default="train.xlsx", type=str)
         | 
| 39 | 
            +
                parser.add_argument("--valid_dataset", default="valid.xlsx", type=str)
         | 
| 40 | 
            +
             | 
| 41 | 
            +
                parser.add_argument("--max_epochs", default=100, type=int)
         | 
| 42 | 
            +
             | 
| 43 | 
            +
                parser.add_argument("--batch_size", default=64, type=int)
         | 
| 44 | 
            +
                parser.add_argument("--learning_rate", default=1e-3, type=float)
         | 
| 45 | 
            +
                parser.add_argument("--num_serialized_models_to_keep", default=10, type=int)
         | 
| 46 | 
            +
                parser.add_argument("--patience", default=5, type=int)
         | 
| 47 | 
            +
                parser.add_argument("--serialization_dir", default="serialization_dir", type=str)
         | 
| 48 | 
            +
                parser.add_argument("--seed", default=0, type=int)
         | 
| 49 | 
            +
             | 
| 50 | 
            +
                parser.add_argument("--config_file", default="conv2d_classifier.yaml", type=str)
         | 
| 51 | 
            +
                parser.add_argument(
         | 
| 52 | 
            +
                    "--pretrained_model",
         | 
| 53 | 
            +
                    # default=(project_path / "trained_models/voicemail-en-sg-2-ch4-cnn.zip").as_posix(),
         | 
| 54 | 
            +
                    default="null",
         | 
| 55 | 
            +
                    type=str
         | 
| 56 | 
            +
                )
         | 
| 57 | 
            +
             | 
| 58 | 
            +
                args = parser.parse_args()
         | 
| 59 | 
            +
                return args
         | 
| 60 | 
            +
             | 
| 61 | 
            +
             | 
| 62 | 
            +
            def logging_config(file_dir: str):
         | 
| 63 | 
            +
                fmt = "%(asctime)s - %(name)s - %(levelname)s  %(filename)s:%(lineno)d >  %(message)s"
         | 
| 64 | 
            +
             | 
| 65 | 
            +
                logging.basicConfig(format=fmt,
         | 
| 66 | 
            +
                                    datefmt="%m/%d/%Y %H:%M:%S",
         | 
| 67 | 
            +
                                    level=logging.DEBUG)
         | 
| 68 | 
            +
                file_handler = TimedRotatingFileHandler(
         | 
| 69 | 
            +
                    filename=os.path.join(file_dir, "main.log"),
         | 
| 70 | 
            +
                    encoding="utf-8",
         | 
| 71 | 
            +
                    when="D",
         | 
| 72 | 
            +
                    interval=1,
         | 
| 73 | 
            +
                    backupCount=7
         | 
| 74 | 
            +
                )
         | 
| 75 | 
            +
                file_handler.setLevel(logging.INFO)
         | 
| 76 | 
            +
                file_handler.setFormatter(logging.Formatter(fmt))
         | 
| 77 | 
            +
                logger = logging.getLogger(__name__)
         | 
| 78 | 
            +
                logger.addHandler(file_handler)
         | 
| 79 | 
            +
             | 
| 80 | 
            +
                return logger
         | 
| 81 | 
            +
             | 
| 82 | 
            +
             | 
| 83 | 
            +
            class CollateFunction(object):
         | 
| 84 | 
            +
                def __init__(self):
         | 
| 85 | 
            +
                    pass
         | 
| 86 | 
            +
             | 
| 87 | 
            +
                def __call__(self, batch: List[dict]):
         | 
| 88 | 
            +
                    array_list = list()
         | 
| 89 | 
            +
                    label_list = list()
         | 
| 90 | 
            +
                    for sample in batch:
         | 
| 91 | 
            +
                        array = sample["waveform"]
         | 
| 92 | 
            +
                        label = sample["label"]
         | 
| 93 | 
            +
             | 
| 94 | 
            +
                        l = len(array)
         | 
| 95 | 
            +
                        if l < 16000:
         | 
| 96 | 
            +
                            delta = int(16000 - l)
         | 
| 97 | 
            +
                            array = np.concatenate([array, np.zeros(shape=(delta,), dtype=np.float32)], axis=-1)
         | 
| 98 | 
            +
                        if l > 16000:
         | 
| 99 | 
            +
                            array = array[:16000]
         | 
| 100 | 
            +
             | 
| 101 | 
            +
                        array_list.append(array)
         | 
| 102 | 
            +
                        label_list.append(label)
         | 
| 103 | 
            +
             | 
| 104 | 
            +
                    array_list = torch.stack(array_list)
         | 
| 105 | 
            +
                    label_list = torch.stack(label_list)
         | 
| 106 | 
            +
                    return array_list, label_list
         | 
| 107 | 
            +
             | 
| 108 | 
            +
             | 
| 109 | 
            +
            collate_fn = CollateFunction()
         | 
| 110 | 
            +
             | 
| 111 | 
            +
             | 
| 112 | 
            +
            def main():
         | 
| 113 | 
            +
                args = get_args()
         | 
| 114 | 
            +
             | 
| 115 | 
            +
                serialization_dir = Path(args.serialization_dir)
         | 
| 116 | 
            +
                serialization_dir.mkdir(parents=True, exist_ok=True)
         | 
| 117 | 
            +
             | 
| 118 | 
            +
                logger = logging_config(serialization_dir)
         | 
| 119 | 
            +
             | 
| 120 | 
            +
                random.seed(args.seed)
         | 
| 121 | 
            +
                np.random.seed(args.seed)
         | 
| 122 | 
            +
                torch.manual_seed(args.seed)
         | 
| 123 | 
            +
                logger.info("set seed: {}".format(args.seed))
         | 
| 124 | 
            +
             | 
| 125 | 
            +
                device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
         | 
| 126 | 
            +
                n_gpu = torch.cuda.device_count()
         | 
| 127 | 
            +
                logger.info("GPU available count: {}; device: {}".format(n_gpu, device))
         | 
| 128 | 
            +
             | 
| 129 | 
            +
                vocabulary = Vocabulary.from_files(args.vocabulary_dir)
         | 
| 130 | 
            +
             | 
| 131 | 
            +
                # datasets
         | 
| 132 | 
            +
                logger.info("prepare datasets")
         | 
| 133 | 
            +
                train_dataset = WaveClassifierExcelDataset(
         | 
| 134 | 
            +
                    vocab=vocabulary,
         | 
| 135 | 
            +
                    excel_file=args.train_dataset,
         | 
| 136 | 
            +
                    category=None,
         | 
| 137 | 
            +
                    category_field="category",
         | 
| 138 | 
            +
                    label_field="labels",
         | 
| 139 | 
            +
                    expected_sample_rate=8000,
         | 
| 140 | 
            +
                    max_wave_value=32768.0,
         | 
| 141 | 
            +
                )
         | 
| 142 | 
            +
                valid_dataset = WaveClassifierExcelDataset(
         | 
| 143 | 
            +
                    vocab=vocabulary,
         | 
| 144 | 
            +
                    excel_file=args.valid_dataset,
         | 
| 145 | 
            +
                    category=None,
         | 
| 146 | 
            +
                    category_field="category",
         | 
| 147 | 
            +
                    label_field="labels",
         | 
| 148 | 
            +
                    expected_sample_rate=8000,
         | 
| 149 | 
            +
                    max_wave_value=32768.0,
         | 
| 150 | 
            +
                )
         | 
| 151 | 
            +
                train_data_loader = DataLoader(
         | 
| 152 | 
            +
                    dataset=train_dataset,
         | 
| 153 | 
            +
                    batch_size=args.batch_size,
         | 
| 154 | 
            +
                    shuffle=True,
         | 
| 155 | 
            +
                    # Linux 系统中可以使用多个子进程���载数据, 而在 Windows 系统中不能.
         | 
| 156 | 
            +
                    num_workers=0 if platform.system() == "Windows" else os.cpu_count() // 2,
         | 
| 157 | 
            +
                    collate_fn=collate_fn,
         | 
| 158 | 
            +
                    pin_memory=False,
         | 
| 159 | 
            +
                    # prefetch_factor=64,
         | 
| 160 | 
            +
                )
         | 
| 161 | 
            +
                valid_data_loader = DataLoader(
         | 
| 162 | 
            +
                    dataset=valid_dataset,
         | 
| 163 | 
            +
                    batch_size=args.batch_size,
         | 
| 164 | 
            +
                    shuffle=True,
         | 
| 165 | 
            +
                    # Linux 系统中可以使用多个子进程加载数据, 而在 Windows 系统中不能.
         | 
| 166 | 
            +
                    num_workers=0 if platform.system() == "Windows" else os.cpu_count() // 2,
         | 
| 167 | 
            +
                    collate_fn=collate_fn,
         | 
| 168 | 
            +
                    pin_memory=False,
         | 
| 169 | 
            +
                    # prefetch_factor=64,
         | 
| 170 | 
            +
                )
         | 
| 171 | 
            +
             | 
| 172 | 
            +
                # models
         | 
| 173 | 
            +
                logger.info(f"prepare models. config_file: {args.config_file}")
         | 
| 174 | 
            +
                config = LSTMClassifierConfig.from_pretrained(
         | 
| 175 | 
            +
                    pretrained_model_name_or_path=args.config_file,
         | 
| 176 | 
            +
                    # num_labels=vocabulary.get_vocab_size(namespace="labels")
         | 
| 177 | 
            +
                )
         | 
| 178 | 
            +
                if not config.cls_head_param["num_labels"] == vocabulary.get_vocab_size(namespace="labels"):
         | 
| 179 | 
            +
                    raise AssertionError("expected num labels: {} instead of {}.".format(
         | 
| 180 | 
            +
                        vocabulary.get_vocab_size(namespace="labels"),
         | 
| 181 | 
            +
                        config.cls_head_param["num_labels"],
         | 
| 182 | 
            +
                    ))
         | 
| 183 | 
            +
                model = LSTMClassifierPretrainedModel(
         | 
| 184 | 
            +
                    config=config,
         | 
| 185 | 
            +
                )
         | 
| 186 | 
            +
             | 
| 187 | 
            +
                if args.pretrained_model is not None and os.path.exists(args.pretrained_model):
         | 
| 188 | 
            +
                    logger.info(f"load pretrained model state dict from: {args.pretrained_model}")
         | 
| 189 | 
            +
                    pretrained_model = Path(args.pretrained_model)
         | 
| 190 | 
            +
                    with zipfile.ZipFile(pretrained_model.as_posix(), "r") as f_zip:
         | 
| 191 | 
            +
                        out_root = Path(tempfile.gettempdir()) / "cc_audio_8"
         | 
| 192 | 
            +
                        # print(out_root.as_posix())
         | 
| 193 | 
            +
                        if out_root.exists():
         | 
| 194 | 
            +
                            shutil.rmtree(out_root.as_posix())
         | 
| 195 | 
            +
                        out_root.mkdir(parents=True, exist_ok=True)
         | 
| 196 | 
            +
                        f_zip.extractall(path=out_root)
         | 
| 197 | 
            +
             | 
| 198 | 
            +
                    tgt_path = out_root / pretrained_model.stem
         | 
| 199 | 
            +
                    model_pt_file = tgt_path / "model.pt"
         | 
| 200 | 
            +
                    with open(model_pt_file, "rb") as f:
         | 
| 201 | 
            +
                        state_dict = torch.load(f, map_location="cpu")
         | 
| 202 | 
            +
                    model.load_state_dict(state_dict=state_dict)
         | 
| 203 | 
            +
             | 
| 204 | 
            +
                model.to(device)
         | 
| 205 | 
            +
                model.train()
         | 
| 206 | 
            +
             | 
| 207 | 
            +
                # optimizer
         | 
| 208 | 
            +
                logger.info("prepare optimizer, lr_scheduler, loss_fn, categorical_accuracy")
         | 
| 209 | 
            +
                param_optimizer = model.parameters()
         | 
| 210 | 
            +
                optimizer = torch.optim.Adam(
         | 
| 211 | 
            +
                    param_optimizer,
         | 
| 212 | 
            +
                    lr=args.learning_rate,
         | 
| 213 | 
            +
                )
         | 
| 214 | 
            +
                # lr_scheduler = torch.optim.lr_scheduler.StepLR(
         | 
| 215 | 
            +
                #     optimizer,
         | 
| 216 | 
            +
                #     step_size=2000
         | 
| 217 | 
            +
                # )
         | 
| 218 | 
            +
                lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(
         | 
| 219 | 
            +
                    optimizer,
         | 
| 220 | 
            +
                    milestones=[10000, 20000, 30000, 40000, 50000], gamma=0.5
         | 
| 221 | 
            +
                )
         | 
| 222 | 
            +
                focal_loss = FocalLoss(
         | 
| 223 | 
            +
                    num_classes=vocabulary.get_vocab_size(namespace="labels"),
         | 
| 224 | 
            +
                    reduction="mean",
         | 
| 225 | 
            +
                )
         | 
| 226 | 
            +
                categorical_accuracy = CategoricalAccuracy()
         | 
| 227 | 
            +
             | 
| 228 | 
            +
                # training loop
         | 
| 229 | 
            +
                logger.info("training")
         | 
| 230 | 
            +
             | 
| 231 | 
            +
                training_loss = 10000000000
         | 
| 232 | 
            +
                training_accuracy = 0.
         | 
| 233 | 
            +
                evaluation_loss = 10000000000
         | 
| 234 | 
            +
                evaluation_accuracy = 0.
         | 
| 235 | 
            +
             | 
| 236 | 
            +
                model_list = list()
         | 
| 237 | 
            +
                best_idx_epoch = None
         | 
| 238 | 
            +
                best_accuracy = None
         | 
| 239 | 
            +
                patience_count = 0
         | 
| 240 | 
            +
             | 
| 241 | 
            +
                for idx_epoch in range(args.max_epochs):
         | 
| 242 | 
            +
                    categorical_accuracy.reset()
         | 
| 243 | 
            +
                    total_loss = 0.
         | 
| 244 | 
            +
                    total_examples = 0.
         | 
| 245 | 
            +
                    progress_bar = tqdm(
         | 
| 246 | 
            +
                        total=len(train_data_loader),
         | 
| 247 | 
            +
                        desc="Training; epoch: {}".format(idx_epoch),
         | 
| 248 | 
            +
                    )
         | 
| 249 | 
            +
                    for batch in train_data_loader:
         | 
| 250 | 
            +
                        input_ids, label_ids = batch
         | 
| 251 | 
            +
                        input_ids = input_ids.to(device)
         | 
| 252 | 
            +
                        label_ids: torch.LongTensor = label_ids.to(device).long()
         | 
| 253 | 
            +
             | 
| 254 | 
            +
                        logits = model.forward(input_ids)
         | 
| 255 | 
            +
                        loss = focal_loss.forward(logits, label_ids.view(-1))
         | 
| 256 | 
            +
                        categorical_accuracy(logits, label_ids)
         | 
| 257 | 
            +
             | 
| 258 | 
            +
                        total_loss += loss.item()
         | 
| 259 | 
            +
                        total_examples += input_ids.size(0)
         | 
| 260 | 
            +
             | 
| 261 | 
            +
                        optimizer.zero_grad()
         | 
| 262 | 
            +
                        loss.backward()
         | 
| 263 | 
            +
                        optimizer.step()
         | 
| 264 | 
            +
                        lr_scheduler.step()
         | 
| 265 | 
            +
             | 
| 266 | 
            +
                        training_loss = total_loss / total_examples
         | 
| 267 | 
            +
                        training_loss = round(training_loss, 4)
         | 
| 268 | 
            +
                        training_accuracy = categorical_accuracy.get_metric()["accuracy"]
         | 
| 269 | 
            +
                        training_accuracy = round(training_accuracy, 4)
         | 
| 270 | 
            +
             | 
| 271 | 
            +
                        progress_bar.update(1)
         | 
| 272 | 
            +
                        progress_bar.set_postfix({
         | 
| 273 | 
            +
                            "training_loss": training_loss,
         | 
| 274 | 
            +
                            "training_accuracy": training_accuracy,
         | 
| 275 | 
            +
                        })
         | 
| 276 | 
            +
             | 
| 277 | 
            +
                    categorical_accuracy.reset()
         | 
| 278 | 
            +
                    total_loss = 0.
         | 
| 279 | 
            +
                    total_examples = 0.
         | 
| 280 | 
            +
                    progress_bar = tqdm(
         | 
| 281 | 
            +
                        total=len(valid_data_loader),
         | 
| 282 | 
            +
                        desc="Evaluation; epoch: {}".format(idx_epoch),
         | 
| 283 | 
            +
                    )
         | 
| 284 | 
            +
                    for batch in valid_data_loader:
         | 
| 285 | 
            +
                        input_ids, label_ids = batch
         | 
| 286 | 
            +
                        input_ids = input_ids.to(device)
         | 
| 287 | 
            +
                        label_ids: torch.LongTensor = label_ids.to(device).long()
         | 
| 288 | 
            +
             | 
| 289 | 
            +
                        with torch.no_grad():
         | 
| 290 | 
            +
                            logits = model.forward(input_ids)
         | 
| 291 | 
            +
                            loss = focal_loss.forward(logits, label_ids.view(-1))
         | 
| 292 | 
            +
                            categorical_accuracy(logits, label_ids)
         | 
| 293 | 
            +
             | 
| 294 | 
            +
                        total_loss += loss.item()
         | 
| 295 | 
            +
                        total_examples += input_ids.size(0)
         | 
| 296 | 
            +
             | 
| 297 | 
            +
                        evaluation_loss = total_loss / total_examples
         | 
| 298 | 
            +
                        evaluation_loss = round(evaluation_loss, 4)
         | 
| 299 | 
            +
                        evaluation_accuracy = categorical_accuracy.get_metric()["accuracy"]
         | 
| 300 | 
            +
                        evaluation_accuracy = round(evaluation_accuracy, 4)
         | 
| 301 | 
            +
             | 
| 302 | 
            +
                        progress_bar.update(1)
         | 
| 303 | 
            +
                        progress_bar.set_postfix({
         | 
| 304 | 
            +
                            "evaluation_loss": evaluation_loss,
         | 
| 305 | 
            +
                            "evaluation_accuracy": evaluation_accuracy,
         | 
| 306 | 
            +
                        })
         | 
| 307 | 
            +
             | 
| 308 | 
            +
                    # save path
         | 
| 309 | 
            +
                    epoch_dir = serialization_dir / "epoch-{}".format(idx_epoch)
         | 
| 310 | 
            +
                    epoch_dir.mkdir(parents=True, exist_ok=False)
         | 
| 311 | 
            +
             | 
| 312 | 
            +
                    # save models
         | 
| 313 | 
            +
                    model.save_pretrained(epoch_dir.as_posix())
         | 
| 314 | 
            +
             | 
| 315 | 
            +
                    model_list.append(epoch_dir)
         | 
| 316 | 
            +
                    if len(model_list) >= args.num_serialized_models_to_keep:
         | 
| 317 | 
            +
                        model_to_delete: Path = model_list.pop(0)
         | 
| 318 | 
            +
                        shutil.rmtree(model_to_delete.as_posix())
         | 
| 319 | 
            +
             | 
| 320 | 
            +
                    # save metric
         | 
| 321 | 
            +
                    if best_accuracy is None:
         | 
| 322 | 
            +
                        best_idx_epoch = idx_epoch
         | 
| 323 | 
            +
                        best_accuracy = evaluation_accuracy
         | 
| 324 | 
            +
                    elif evaluation_accuracy > best_accuracy:
         | 
| 325 | 
            +
                        best_idx_epoch = idx_epoch
         | 
| 326 | 
            +
                        best_accuracy = evaluation_accuracy
         | 
| 327 | 
            +
                    else:
         | 
| 328 | 
            +
                        pass
         | 
| 329 | 
            +
             | 
| 330 | 
            +
                    metrics = {
         | 
| 331 | 
            +
                        "idx_epoch": idx_epoch,
         | 
| 332 | 
            +
                        "best_idx_epoch": best_idx_epoch,
         | 
| 333 | 
            +
                        "best_accuracy": best_accuracy,
         | 
| 334 | 
            +
                        "training_loss": training_loss,
         | 
| 335 | 
            +
                        "training_accuracy": training_accuracy,
         | 
| 336 | 
            +
                        "evaluation_loss": evaluation_loss,
         | 
| 337 | 
            +
                        "evaluation_accuracy": evaluation_accuracy,
         | 
| 338 | 
            +
                        "learning_rate": optimizer.param_groups[0]['lr'],
         | 
| 339 | 
            +
                    }
         | 
| 340 | 
            +
                    metrics_filename = epoch_dir / "metrics_epoch.json"
         | 
| 341 | 
            +
                    with open(metrics_filename, "w", encoding="utf-8") as f:
         | 
| 342 | 
            +
                        json.dump(metrics, f, indent=4, ensure_ascii=False)
         | 
| 343 | 
            +
             | 
| 344 | 
            +
                    # save best
         | 
| 345 | 
            +
                    best_dir = serialization_dir / "best"
         | 
| 346 | 
            +
                    if best_idx_epoch == idx_epoch:
         | 
| 347 | 
            +
                        if best_dir.exists():
         | 
| 348 | 
            +
                            shutil.rmtree(best_dir)
         | 
| 349 | 
            +
                        shutil.copytree(epoch_dir, best_dir)
         | 
| 350 | 
            +
             | 
| 351 | 
            +
                    # early stop
         | 
| 352 | 
            +
                    early_stop_flag = False
         | 
| 353 | 
            +
                    if best_idx_epoch == idx_epoch:
         | 
| 354 | 
            +
                        patience_count = 0
         | 
| 355 | 
            +
                    else:
         | 
| 356 | 
            +
                        patience_count += 1
         | 
| 357 | 
            +
                    if patience_count >= args.patience:
         | 
| 358 | 
            +
                        early_stop_flag = True
         | 
| 359 | 
            +
             | 
| 360 | 
            +
                    # early stop
         | 
| 361 | 
            +
                    if early_stop_flag:
         | 
| 362 | 
            +
                        break
         | 
| 363 | 
            +
                return
         | 
| 364 | 
            +
             | 
| 365 | 
            +
             | 
| 366 | 
            +
            if __name__ == "__main__":
         | 
| 367 | 
            +
                main()
         | 
    	
        examples/sound_classification_by_lstm/yaml/lstm_classifier-4-ch64.yaml
    ADDED
    
    | @@ -0,0 +1,27 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            model_name: "lstm_audio_classifier"
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            mel_spectrogram_param:
         | 
| 4 | 
            +
              sample_rate: 8000
         | 
| 5 | 
            +
              n_fft: 512
         | 
| 6 | 
            +
              win_length: 200
         | 
| 7 | 
            +
              hop_length: 80
         | 
| 8 | 
            +
              f_min: 10
         | 
| 9 | 
            +
              f_max: 3800
         | 
| 10 | 
            +
              window_fn: hamming
         | 
| 11 | 
            +
              n_mels: 80
         | 
| 12 | 
            +
             | 
| 13 | 
            +
            lstm_layer_param:
         | 
| 14 | 
            +
              input_size: 80
         | 
| 15 | 
            +
              hidden_size: 64
         | 
| 16 | 
            +
              num_layers: 3
         | 
| 17 | 
            +
              dropout: 0.2
         | 
| 18 | 
            +
              pool_layer: last
         | 
| 19 | 
            +
             | 
| 20 | 
            +
            cls_head_param:
         | 
| 21 | 
            +
              input_dim: 64
         | 
| 22 | 
            +
              num_layers: 1
         | 
| 23 | 
            +
              hidden_dims:
         | 
| 24 | 
            +
                - 32
         | 
| 25 | 
            +
              activations: relu
         | 
| 26 | 
            +
              dropout: 0.1
         | 
| 27 | 
            +
              num_labels: 4
         | 
