lmzjms commited on
Commit
15ac91d
1 Parent(s): a1c3802

Upload 35 files

Browse files
Files changed (35) hide show
  1. audio_detection/__init__.py +0 -0
  2. audio_detection/audio_infer/__init__.py +0 -0
  3. audio_detection/audio_infer/__pycache__/__init__.cpython-38.pyc +0 -0
  4. audio_detection/audio_infer/metadata/black_list/groundtruth_weak_label_evaluation_set.csv +1350 -0
  5. audio_detection/audio_infer/metadata/black_list/groundtruth_weak_label_testing_set.csv +606 -0
  6. audio_detection/audio_infer/metadata/class_labels_indices.csv +528 -0
  7. audio_detection/audio_infer/pytorch/__pycache__/models.cpython-38.pyc +0 -0
  8. audio_detection/audio_infer/pytorch/__pycache__/pytorch_utils.cpython-38.pyc +0 -0
  9. audio_detection/audio_infer/pytorch/evaluate.py +42 -0
  10. audio_detection/audio_infer/pytorch/finetune_template.py +127 -0
  11. audio_detection/audio_infer/pytorch/inference.py +206 -0
  12. audio_detection/audio_infer/pytorch/losses.py +14 -0
  13. audio_detection/audio_infer/pytorch/main.py +378 -0
  14. audio_detection/audio_infer/pytorch/models.py +951 -0
  15. audio_detection/audio_infer/pytorch/pytorch_utils.py +251 -0
  16. audio_detection/audio_infer/results/YDlWd7Wmdi1E.png +0 -0
  17. audio_detection/audio_infer/useful_ckpts/audio_detection.pth +3 -0
  18. audio_detection/audio_infer/utils/__pycache__/config.cpython-38.pyc +0 -0
  19. audio_detection/audio_infer/utils/config.py +94 -0
  20. audio_detection/audio_infer/utils/crash.py +12 -0
  21. audio_detection/audio_infer/utils/create_black_list.py +64 -0
  22. audio_detection/audio_infer/utils/create_indexes.py +126 -0
  23. audio_detection/audio_infer/utils/data_generator.py +421 -0
  24. audio_detection/audio_infer/utils/dataset.py +224 -0
  25. audio_detection/audio_infer/utils/plot_for_paper.py +565 -0
  26. audio_detection/audio_infer/utils/plot_statistics.py +0 -0
  27. audio_detection/audio_infer/utils/utilities.py +172 -0
  28. audio_detection/target_sound_detection/src/__pycache__/models.cpython-38.pyc +0 -0
  29. audio_detection/target_sound_detection/src/__pycache__/utils.cpython-38.pyc +0 -0
  30. audio_detection/target_sound_detection/src/models.py +1288 -0
  31. audio_detection/target_sound_detection/src/utils.py +353 -0
  32. audio_detection/target_sound_detection/useful_ckpts/tsd/ref_mel.pth +3 -0
  33. audio_detection/target_sound_detection/useful_ckpts/tsd/run_config.pth +3 -0
  34. audio_detection/target_sound_detection/useful_ckpts/tsd/run_model_7_loss=-0.0724.pt +3 -0
  35. audio_detection/target_sound_detection/useful_ckpts/tsd/text_emb.pth +3 -0
audio_detection/__init__.py ADDED
File without changes
audio_detection/audio_infer/__init__.py ADDED
File without changes
audio_detection/audio_infer/__pycache__/__init__.cpython-38.pyc ADDED
Binary file (171 Bytes). View file
audio_detection/audio_infer/metadata/black_list/groundtruth_weak_label_evaluation_set.csv ADDED
@@ -0,0 +1,1350 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ -JMT0mK0Dbg_30.000_40.000.wav 30.000 40.000 Train horn
2
+ 3ACjUf9QpAQ_30.000_40.000.wav 30.000 40.000 Train horn
3
+ 3S2-TODd__k_90.000_100.000.wav 90.000 100.000 Train horn
4
+ 3YJewEC-NWo_30.000_40.000.wav 30.000 40.000 Train horn
5
+ 3jXAh3V2FO8_30.000_40.000.wav 30.000 40.000 Train horn
6
+ 53oq_Otm_XI_30.000_40.000.wav 30.000 40.000 Train horn
7
+ 8IaInXpdd9M_0.000_10.000.wav 0.000 10.000 Train horn
8
+ 8nU1aVscJec_30.000_40.000.wav 30.000 40.000 Train horn
9
+ 9LQEZJPNVpw_30.000_40.000.wav 30.000 40.000 Train horn
10
+ AHom7lBbtoY_30.000_40.000.wav 30.000 40.000 Train horn
11
+ Ag_zT74ZGNc_9.000_19.000.wav 9.000 19.000 Train horn
12
+ BQpa8whzwAE_30.000_40.000.wav 30.000 40.000 Train horn
13
+ CCX_4cW_SAU_0.000_10.000.wav 0.000 10.000 Train horn
14
+ CLIdVCUO_Vw_30.000_40.000.wav 30.000 40.000 Train horn
15
+ D_nXtMgbPNY_30.000_40.000.wav 30.000 40.000 Train horn
16
+ GFQnh84kNwU_30.000_40.000.wav 30.000 40.000 Train horn
17
+ I4qODX0fypE_30.000_40.000.wav 30.000 40.000 Train horn
18
+ IdqEbjujFb8_30.000_40.000.wav 30.000 40.000 Train horn
19
+ L3a132_uApg_50.000_60.000.wav 50.000 60.000 Train horn
20
+ LzcNa3HvD7c_30.000_40.000.wav 30.000 40.000 Train horn
21
+ MCYY8tJsnfY_7.000_17.000.wav 7.000 17.000 Train horn
22
+ MPSf7dJpV5w_30.000_40.000.wav 30.000 40.000 Train horn
23
+ NdCr5IDnkxc_30.000_40.000.wav 30.000 40.000 Train horn
24
+ P54KKbTA_TE_0.000_7.000.wav 0.000 7.000 Train horn
25
+ PJUy17bXlhc_40.000_50.000.wav 40.000 50.000 Train horn
26
+ QrAoRSA13bM_30.000_40.000.wav 30.000 40.000 Train horn
27
+ R_Lpb-51Kl4_30.000_40.000.wav 30.000 40.000 Train horn
28
+ Rq-22Cycrpg_30.000_40.000.wav 30.000 40.000 Train horn
29
+ TBjrN1aMRrM_30.000_40.000.wav 30.000 40.000 Train horn
30
+ XAUtk9lwzU8_30.000_40.000.wav 30.000 40.000 Train horn
31
+ XW8pSKLyr0o_20.000_30.000.wav 20.000 30.000 Train horn
32
+ Y10I9JSvJuQ_30.000_40.000.wav 30.000 40.000 Train horn
33
+ Y_jwEflLthg_190.000_200.000.wav 190.000 200.000 Train horn
34
+ YilfKdY7w6Y_60.000_70.000.wav 60.000 70.000 Train horn
35
+ ZcTI8fQgEZE_240.000_250.000.wav 240.000 250.000 Train horn
36
+ _8MvhMlbwiE_40.000_50.000.wav 40.000 50.000 Train horn
37
+ _dkeW6lqmq4_30.000_40.000.wav 30.000 40.000 Train horn
38
+ aXsUHAKbyLs_30.000_40.000.wav 30.000 40.000 Train horn
39
+ arevYmB0qGg_30.000_40.000.wav 30.000 40.000 Train horn
40
+ d1o334I5X_k_30.000_40.000.wav 30.000 40.000 Train horn
41
+ dSzZWgbJ378_30.000_40.000.wav 30.000 40.000 Train horn
42
+ ePVb5Upev8k_40.000_50.000.wav 40.000 50.000 Train horn
43
+ g4cA-ifQc70_30.000_40.000.wav 30.000 40.000 Train horn
44
+ g9JVq7wfDIo_30.000_40.000.wav 30.000 40.000 Train horn
45
+ gTFCK9TuLOQ_30.000_40.000.wav 30.000 40.000 Train horn
46
+ hYqzr_rIIAw_30.000_40.000.wav 30.000 40.000 Train horn
47
+ iZgzRfa-xPQ_30.000_40.000.wav 30.000 40.000 Train horn
48
+ k8H8rn4NaSM_0.000_10.000.wav 0.000 10.000 Train horn
49
+ lKQ-I_P7TEM_20.000_30.000.wav 20.000 30.000 Train horn
50
+ nfY_zkJceDw_30.000_40.000.wav 30.000 40.000 Train horn
51
+ pW5SI1ZKUpA_30.000_40.000.wav 30.000 40.000 Train horn
52
+ pxmrmtEnROk_30.000_40.000.wav 30.000 40.000 Train horn
53
+ q7zzKHFWGkg_30.000_40.000.wav 30.000 40.000 Train horn
54
+ qu8vVFWKszA_30.000_40.000.wav 30.000 40.000 Train horn
55
+ stdjjG6Y5IU_30.000_40.000.wav 30.000 40.000 Train horn
56
+ tdRMxc4UWRk_30.000_40.000.wav 30.000 40.000 Train horn
57
+ tu-cxDG2mW8_0.000_10.000.wav 0.000 10.000 Train horn
58
+ txXSE7kgrc8_30.000_40.000.wav 30.000 40.000 Train horn
59
+ xabrKa79prM_30.000_40.000.wav 30.000 40.000 Train horn
60
+ yBVxtq9k8Sg_0.000_10.000.wav 0.000 10.000 Train horn
61
+ -WoudI3gGvk_30.000_40.000.wav 30.000 40.000 Air horn, truck horn
62
+ 0_gci63CtFY_30.000_40.000.wav 30.000 40.000 Air horn, truck horn
63
+ 2-h8MRSRvEg_30.000_40.000.wav 30.000 40.000 Air horn, truck horn
64
+ 3NX4HaOVBoo_240.000_250.000.wav 240.000 250.000 Air horn, truck horn
65
+ 9NPKQDaNCRk_0.000_6.000.wav 0.000 6.000 Air horn, truck horn
66
+ 9ct4w4aYWdc_30.000_40.000.wav 30.000 40.000 Air horn, truck horn
67
+ 9l9QXgsJSfo_120.000_130.000.wav 120.000 130.000 Air horn, truck horn
68
+ CN0Bi4MDpA4_20.000_30.000.wav 20.000 30.000 Air horn, truck horn
69
+ CU2MyVM_B48_30.000_40.000.wav 30.000 40.000 Air horn, truck horn
70
+ Cg-DWc9nPfQ_90.000_100.000.wav 90.000 100.000 Air horn, truck horn
71
+ D62L3husEa0_30.000_40.000.wav 30.000 40.000 Air horn, truck horn
72
+ GO2zKyMtBV4_30.000_40.000.wav 30.000 40.000 Air horn, truck horn
73
+ Ge_KWS-0098_30.000_40.000.wav 30.000 40.000 Air horn, truck horn
74
+ Hk7HqLBHWng_30.000_40.000.wav 30.000 40.000 Air horn, truck horn
75
+ IpyingiCwV8_0.000_3.000.wav 0.000 3.000 Air horn, truck horn
76
+ Isuh9pOuH6I_300.000_310.000.wav 300.000 310.000 Air horn, truck horn
77
+ IuTfMfzkr5Y_120.000_130.000.wav 120.000 130.000 Air horn, truck horn
78
+ MFxsgcZZtFs_10.000_20.000.wav 10.000 20.000 Air horn, truck horn
79
+ N3osL4QmOL8_49.000_59.000.wav 49.000 59.000 Air horn, truck horn
80
+ NOZsDTFLm7M_0.000_9.000.wav 0.000 9.000 Air horn, truck horn
81
+ OjVY3oM1jEU_40.000_50.000.wav 40.000 50.000 Air horn, truck horn
82
+ PNaLTW50fxM_60.000_70.000.wav 60.000 70.000 Air horn, truck horn
83
+ TYLZuBBu8ms_0.000_10.000.wav 0.000 10.000 Air horn, truck horn
84
+ UdHR1P_NIbo_110.000_120.000.wav 110.000 120.000 Air horn, truck horn
85
+ YilfKdY7w6Y_60.000_70.000.wav 60.000 70.000 Air horn, truck horn
86
+ Yt4ZWNjvJOY_50.000_60.000.wav 50.000 60.000 Air horn, truck horn
87
+ Z5M3fGT3Xjk_60.000_70.000.wav 60.000 70.000 Air horn, truck horn
88
+ ZauRsP1uH74_12.000_22.000.wav 12.000 22.000 Air horn, truck horn
89
+ a_6CZ2JaEuc_0.000_2.000.wav 0.000 2.000 Air horn, truck horn
90
+ b7m5Kt5U7Vc_30.000_40.000.wav 30.000 40.000 Air horn, truck horn
91
+ bIObkrK06rk_15.000_25.000.wav 15.000 25.000 Air horn, truck horn
92
+ cdrjKqyDrak_420.000_430.000.wav 420.000 430.000 Air horn, truck horn
93
+ ckSYn557ZyE_20.000_30.000.wav 20.000 30.000 Air horn, truck horn
94
+ cs-RPPsg_ks_30.000_40.000.wav 30.000 40.000 Air horn, truck horn
95
+ ctsq33oUBT8_30.000_40.000.wav 30.000 40.000 Air horn, truck horn
96
+ eCFUwyU9ZWA_9.000_19.000.wav 9.000 19.000 Air horn, truck horn
97
+ ePVb5Upev8k_40.000_50.000.wav 40.000 50.000 Air horn, truck horn
98
+ fHaQPHCjyfA_30.000_40.000.wav 30.000 40.000 Air horn, truck horn
99
+ fOVsAMJ3Yms_30.000_40.000.wav 30.000 40.000 Air horn, truck horn
100
+ g4cA-ifQc70_30.000_40.000.wav 30.000 40.000 Air horn, truck horn
101
+ gjlo4evwjlE_30.000_40.000.wav 30.000 40.000 Air horn, truck horn
102
+ i9VjpIbM3iE_410.000_420.000.wav 410.000 420.000 Air horn, truck horn
103
+ ieZVo7W3BQ4_30.000_40.000.wav 30.000 40.000 Air horn, truck horn
104
+ ii87iO6JboA_10.000_20.000.wav 10.000 20.000 Air horn, truck horn
105
+ jko48cNdvFA_80.000_90.000.wav 80.000 90.000 Air horn, truck horn
106
+ kJuvA2zmrnY_30.000_40.000.wav 30.000 40.000 Air horn, truck horn
107
+ kUrb38hMwPs_0.000_10.000.wav 0.000 10.000 Air horn, truck horn
108
+ km_hVyma2vo_0.000_10.000.wav 0.000 10.000 Air horn, truck horn
109
+ m1e9aOwRiDQ_0.000_9.000.wav 0.000 9.000 Air horn, truck horn
110
+ mQJcObz1k_E_30.000_40.000.wav 30.000 40.000 Air horn, truck horn
111
+ pk75WDyNZKc_30.000_40.000.wav 30.000 40.000 Air horn, truck horn
112
+ rhUfN81puDI_0.000_10.000.wav 0.000 10.000 Air horn, truck horn
113
+ suuYwAifIAQ_30.000_40.000.wav 30.000 40.000 Air horn, truck horn
114
+ wDdEZ46B-tM_460.000_470.000.wav 460.000 470.000 Air horn, truck horn
115
+ wHISHmuP58s_80.000_90.000.wav 80.000 90.000 Air horn, truck horn
116
+ xwqIKDz1bT4_30.000_40.000.wav 30.000 40.000 Air horn, truck horn
117
+ y4Ko6VNiqB0_30.000_40.000.wav 30.000 40.000 Air horn, truck horn
118
+ yhcmPrU3QSk_61.000_71.000.wav 61.000 71.000 Air horn, truck horn
119
+ 3FWHjjZGT9U_80.000_90.000.wav 80.000 90.000 Car alarm
120
+ 3YChVhqW42E_130.000_140.000.wav 130.000 140.000 Car alarm
121
+ 3YRkin3bMlQ_170.000_180.000.wav 170.000 180.000 Car alarm
122
+ 4APBvMmKubU_10.000_20.000.wav 10.000 20.000 Car alarm
123
+ 4JDah6Ckr9k_5.000_15.000.wav 5.000 15.000 Car alarm
124
+ 5hL1uGb4sas_30.000_40.000.wav 30.000 40.000 Car alarm
125
+ 969Zfj4IoPk_20.000_30.000.wav 20.000 30.000 Car alarm
126
+ AyfuBDN3Vdw_40.000_50.000.wav 40.000 50.000 Car alarm
127
+ B-ZqhRg3km4_60.000_70.000.wav 60.000 70.000 Car alarm
128
+ BDnwA3AaclE_10.000_20.000.wav 10.000 20.000 Car alarm
129
+ ES-rjFfuxq4_120.000_130.000.wav 120.000 130.000 Car alarm
130
+ EWbZq5ruCpg_0.000_10.000.wav 0.000 10.000 Car alarm
131
+ F50h9HiyC3k_40.000_50.000.wav 40.000 50.000 Car alarm
132
+ F5AP8kQvogM_30.000_40.000.wav 30.000 40.000 Car alarm
133
+ FKJuDOAumSk_20.000_30.000.wav 20.000 30.000 Car alarm
134
+ GmbNjZi4xBw_30.000_40.000.wav 30.000 40.000 Car alarm
135
+ H7lOMlND9dc_30.000_40.000.wav 30.000 40.000 Car alarm
136
+ Hu8lxbHYaqg_40.000_50.000.wav 40.000 50.000 Car alarm
137
+ IziTYkSwq9Q_30.000_40.000.wav 30.000 40.000 Car alarm
138
+ JcO2TTtiplA_30.000_40.000.wav 30.000 40.000 Car alarm
139
+ KKx7dWRg8s8_8.000_18.000.wav 8.000 18.000 Car alarm
140
+ Kf9Kr69mwOA_14.000_24.000.wav 14.000 24.000 Car alarm
141
+ L535vIV3ED4_40.000_50.000.wav 40.000 50.000 Car alarm
142
+ LOjT44tFx1A_0.000_10.000.wav 0.000 10.000 Car alarm
143
+ Mxn2FKuNwiI_20.000_30.000.wav 20.000 30.000 Car alarm
144
+ Nkqx09b-xyI_70.000_80.000.wav 70.000 80.000 Car alarm
145
+ QNKo1W1WRbc_22.000_32.000.wav 22.000 32.000 Car alarm
146
+ R0VxYDfjyAU_60.000_70.000.wav 60.000 70.000 Car alarm
147
+ TJ58vMpSy1w_30.000_40.000.wav 30.000 40.000 Car alarm
148
+ ToU1kRagUjY_0.000_10.000.wav 0.000 10.000 Car alarm
149
+ TrQGIZqrW0s_30.000_40.000.wav 30.000 40.000 Car alarm
150
+ ULFhHR0OLSE_30.000_40.000.wav 30.000 40.000 Car alarm
151
+ ULS3ffQkCW4_30.000_40.000.wav 30.000 40.000 Car alarm
152
+ U_9NuNORYQM_1.000_11.000.wav 1.000 11.000 Car alarm
153
+ UkCEuwYUW8c_110.000_120.000.wav 110.000 120.000 Car alarm
154
+ Wak5QxsS-QU_30.000_40.000.wav 30.000 40.000 Car alarm
155
+ XzE7mp3pVik_0.000_10.000.wav 0.000 10.000 Car alarm
156
+ Y-4dtrP-RNo_7.000_17.000.wav 7.000 17.000 Car alarm
157
+ Zltlj0fDeS4_30.000_40.000.wav 30.000 40.000 Car alarm
158
+ cB1jkzgH2es_150.000_160.000.wav 150.000 160.000 Car alarm
159
+ eIMjkADTWzA_60.000_70.000.wav 60.000 70.000 Car alarm
160
+ eL7s5CoW0UA_0.000_7.000.wav 0.000 7.000 Car alarm
161
+ i9VjpIbM3iE_410.000_420.000.wav 410.000 420.000 Car alarm
162
+ iWl-5LNURFc_30.000_40.000.wav 30.000 40.000 Car alarm
163
+ iX34nDCq9NU_10.000_20.000.wav 10.000 20.000 Car alarm
164
+ ii87iO6JboA_10.000_20.000.wav 10.000 20.000 Car alarm
165
+ l6_h_YHuTbY_30.000_40.000.wav 30.000 40.000 Car alarm
166
+ lhedRVb85Fk_30.000_40.000.wav 30.000 40.000 Car alarm
167
+ monelE7hnwI_20.000_30.000.wav 20.000 30.000 Car alarm
168
+ o2CmtHNUrXg_30.000_40.000.wav 30.000 40.000 Car alarm
169
+ pXX6cK4xtiY_11.000_21.000.wav 11.000 21.000 Car alarm
170
+ stnVta2ip9g_30.000_40.000.wav 30.000 40.000 Car alarm
171
+ uvuVg9Cl0n0_30.000_40.000.wav 30.000 40.000 Car alarm
172
+ vF2zXcbADUk_20.000_30.000.wav 20.000 30.000 Car alarm
173
+ vN7dJyt-nj0_20.000_30.000.wav 20.000 30.000 Car alarm
174
+ w8Md65mE5Vc_30.000_40.000.wav 30.000 40.000 Car alarm
175
+ ySqfMcFk5LM_30.000_40.000.wav 30.000 40.000 Car alarm
176
+ ysNK5RVF3Zw_0.000_10.000.wav 0.000 10.000 Car alarm
177
+ za8KPcQ0dTw_30.000_40.000.wav 30.000 40.000 Car alarm
178
+ -2sE5CH8Wb8_30.000_40.000.wav 30.000 40.000 Reversing beeps
179
+ -fJsZm3YRc0_30.000_40.000.wav 30.000 40.000 Reversing beeps
180
+ -oSzD8P2BtU_30.000_40.000.wav 30.000 40.000 Reversing beeps
181
+ -pzwalZ0ub0_5.000_15.000.wav 5.000 15.000 Reversing beeps
182
+ -t-htrAtNvM_30.000_40.000.wav 30.000 40.000 Reversing beeps
183
+ -zNEcuo28oE_30.000_40.000.wav 30.000 40.000 Reversing beeps
184
+ 077aWlQn6XI_30.000_40.000.wav 30.000 40.000 Reversing beeps
185
+ 0O-gZoirpRA_30.000_40.000.wav 30.000 40.000 Reversing beeps
186
+ 10aF24rMeu0_30.000_40.000.wav 30.000 40.000 Reversing beeps
187
+ 1P5FFxXLSpY_30.000_40.000.wav 30.000 40.000 Reversing beeps
188
+ 1n_s2Gb5R1Q_30.000_40.000.wav 30.000 40.000 Reversing beeps
189
+ 2HZcxlRs-hg_30.000_40.000.wav 30.000 40.000 Reversing beeps
190
+ 2Jpg_KvJWL0_30.000_40.000.wav 30.000 40.000 Reversing beeps
191
+ 2WTk_j_fivY_30.000_40.000.wav 30.000 40.000 Reversing beeps
192
+ 38F6eeIR-s0_30.000_40.000.wav 30.000 40.000 Reversing beeps
193
+ 3xh2kScw64U_30.000_40.000.wav 30.000 40.000 Reversing beeps
194
+ 4MIHbR4QZhE_30.000_40.000.wav 30.000 40.000 Reversing beeps
195
+ 4Tpy1lsfcSM_30.000_40.000.wav 30.000 40.000 Reversing beeps
196
+ 4XMY2IvVSf0_30.000_40.000.wav 30.000 40.000 Reversing beeps
197
+ 4ep09nZl3LA_30.000_40.000.wav 30.000 40.000 Reversing beeps
198
+ 4t1VqRz4w2g_30.000_40.000.wav 30.000 40.000 Reversing beeps
199
+ 4tKvAMmAUMM_30.000_40.000.wav 30.000 40.000 Reversing beeps
200
+ 5-x2pk3YYAs_11.000_21.000.wav 11.000 21.000 Reversing beeps
201
+ 5DW8WjxxCag_30.000_40.000.wav 30.000 40.000 Reversing beeps
202
+ 5DjZHCumLfs_11.000_21.000.wav 11.000 21.000 Reversing beeps
203
+ 5V0xKS-FGMk_30.000_40.000.wav 30.000 40.000 Reversing beeps
204
+ 5fLzQegwHUg_30.000_40.000.wav 30.000 40.000 Reversing beeps
205
+ 6Y8bKS6KLeE_30.000_40.000.wav 30.000 40.000 Reversing beeps
206
+ 6xEHP-C-ZuU_30.000_40.000.wav 30.000 40.000 Reversing beeps
207
+ 6yyToq9cW9A_60.000_70.000.wav 60.000 70.000 Reversing beeps
208
+ 7Gua0-UrKIw_30.000_40.000.wav 30.000 40.000 Reversing beeps
209
+ 7nglQSmcjAk_30.000_40.000.wav 30.000 40.000 Reversing beeps
210
+ 81DteAPIhoE_30.000_40.000.wav 30.000 40.000 Reversing beeps
211
+ 96a4smrM_30_30.000_40.000.wav 30.000 40.000 Reversing beeps
212
+ 9EsgN-WS2qY_30.000_40.000.wav 30.000 40.000 Reversing beeps
213
+ 9OcAwC8y-eQ_30.000_40.000.wav 30.000 40.000 Reversing beeps
214
+ 9Ti98L4PRCo_17.000_27.000.wav 17.000 27.000 Reversing beeps
215
+ 9yhMtJ50sys_30.000_40.000.wav 30.000 40.000 Reversing beeps
216
+ A9KMqwqLboE_30.000_40.000.wav 30.000 40.000 Reversing beeps
217
+ AFwmMFq_xlc_390.000_400.000.wav 390.000 400.000 Reversing beeps
218
+ AvhBRiwWJU4_30.000_40.000.wav 30.000 40.000 Reversing beeps
219
+ CL5vkiMs2c0_10.000_20.000.wav 10.000 20.000 Reversing beeps
220
+ DcU6AzN7imA_210.000_220.000.wav 210.000 220.000 Reversing beeps
221
+ ISBJKY8hwnM_30.000_40.000.wav 30.000 40.000 Reversing beeps
222
+ LA5TekLaIPI_10.000_20.000.wav 10.000 20.000 Reversing beeps
223
+ NqzZbJJl3E4_30.000_40.000.wav 30.000 40.000 Reversing beeps
224
+ PSt0xAYgf4g_0.000_10.000.wav 0.000 10.000 Reversing beeps
225
+ Q1CMSV81_ws_30.000_40.000.wav 30.000 40.000 Reversing beeps
226
+ _gG0KNGD47M_30.000_40.000.wav 30.000 40.000 Reversing beeps
227
+ ckt7YEGcSoY_30.000_40.000.wav 30.000 40.000 Reversing beeps
228
+ eIkUuCRE_0U_30.000_40.000.wav 30.000 40.000 Reversing beeps
229
+ kH6fFjIZkB0_30.000_40.000.wav 30.000 40.000 Reversing beeps
230
+ mCJ0aqIygWE_24.000_34.000.wav 24.000 34.000 Reversing beeps
231
+ nFqf1vflJaI_350.000_360.000.wav 350.000 360.000 Reversing beeps
232
+ nMaSkwx6cHE_30.000_40.000.wav 30.000 40.000 Reversing beeps
233
+ oHKTmTLEy68_11.000_21.000.wav 11.000 21.000 Reversing beeps
234
+ saPU2JNoytU_0.000_10.000.wav 0.000 10.000 Reversing beeps
235
+ tQd0vFueRKs_30.000_40.000.wav 30.000 40.000 Reversing beeps
236
+ vzP6soELj2Q_0.000_10.000.wav 0.000 10.000 Reversing beeps
237
+ 0x82_HySIVU_30.000_40.000.wav 30.000 40.000 Bicycle
238
+ 1IQdvfm9SDY_30.000_40.000.wav 30.000 40.000 Bicycle
239
+ 1_hGvbEiYAs_30.000_40.000.wav 30.000 40.000 Bicycle
240
+ 26CM8IXODG4_2.000_12.000.wav 2.000 12.000 Bicycle
241
+ 2f7Ad-XpbnY_30.000_40.000.wav 30.000 40.000 Bicycle
242
+ 3-a8i_MEUl8_30.000_40.000.wav 30.000 40.000 Bicycle
243
+ 7KiTXYwaD04_7.000_17.000.wav 7.000 17.000 Bicycle
244
+ 7gkjn-LLInI_30.000_40.000.wav 30.000 40.000 Bicycle
245
+ 84flVacRHUI_21.000_31.000.wav 21.000 31.000 Bicycle
246
+ 9VziOIkNXsE_30.000_40.000.wav 30.000 40.000 Bicycle
247
+ ANofTuuN0W0_160.000_170.000.wav 160.000 170.000 Bicycle
248
+ B6n0op0sLPA_30.000_40.000.wav 30.000 40.000 Bicycle
249
+ D4_zTwsCRds_60.000_70.000.wav 60.000 70.000 Bicycle
250
+ DEs_Sp9S1Nw_30.000_40.000.wav 30.000 40.000 Bicycle
251
+ GjsxrMRRdfQ_3.000_13.000.wav 3.000 13.000 Bicycle
252
+ GkpUU3VX4wQ_30.000_40.000.wav 30.000 40.000 Bicycle
253
+ H9HNXYxRmv8_30.000_40.000.wav 30.000 40.000 Bicycle
254
+ HPWRKwrs-rY_370.000_380.000.wav 370.000 380.000 Bicycle
255
+ HrQxbNO5jXU_6.000_16.000.wav 6.000 16.000 Bicycle
256
+ IYaEZkAO0LU_30.000_40.000.wav 30.000 40.000 Bicycle
257
+ Idzfy0XbZRo_7.000_17.000.wav 7.000 17.000 Bicycle
258
+ Iigfz_GeXVs_30.000_40.000.wav 30.000 40.000 Bicycle
259
+ JWCtQ_94YoQ_30.000_40.000.wav 30.000 40.000 Bicycle
260
+ JXmBrD4b4EI_30.000_40.000.wav 30.000 40.000 Bicycle
261
+ LSZPNwZex9s_30.000_40.000.wav 30.000 40.000 Bicycle
262
+ M5kwg1kx4q0_30.000_40.000.wav 30.000 40.000 Bicycle
263
+ NrR1wmCpqAk_12.000_22.000.wav 12.000 22.000 Bicycle
264
+ O1_Rw2dHb1I_2.000_12.000.wav 2.000 12.000 Bicycle
265
+ OEN0TySl1Jw_10.000_20.000.wav 10.000 20.000 Bicycle
266
+ PF7uY9ydMYc_30.000_40.000.wav 30.000 40.000 Bicycle
267
+ SDl0tWf9Q44_30.000_40.000.wav 30.000 40.000 Bicycle
268
+ SkXXjcw9sJI_30.000_40.000.wav 30.000 40.000 Bicycle
269
+ Ssa1m5Mnllw_0.000_9.000.wav 0.000 9.000 Bicycle
270
+ UB-A1oyNyyg_0.000_6.000.wav 0.000 6.000 Bicycle
271
+ UqyvFyQthHo_30.000_40.000.wav 30.000 40.000 Bicycle
272
+ Wg4ik5zZxBc_250.000_260.000.wav 250.000 260.000 Bicycle
273
+ WvquSD2PcCE_30.000_40.000.wav 30.000 40.000 Bicycle
274
+ YIJBuXUi64U_30.000_40.000.wav 30.000 40.000 Bicycle
275
+ aBHdl_TiseI_30.000_40.000.wav 30.000 40.000 Bicycle
276
+ aeHCq6fFkNo_30.000_40.000.wav 30.000 40.000 Bicycle
277
+ amKDjVcs1Vg_30.000_40.000.wav 30.000 40.000 Bicycle
278
+ ehYwty_G2L4_13.000_23.000.wav 13.000 23.000 Bicycle
279
+ jOlVJv7jAHg_30.000_40.000.wav 30.000 40.000 Bicycle
280
+ lGFDQ-ZwUfk_30.000_40.000.wav 30.000 40.000 Bicycle
281
+ lmTHvLGQy3g_50.000_60.000.wav 50.000 60.000 Bicycle
282
+ nNHW3Uxlb-g_30.000_40.000.wav 30.000 40.000 Bicycle
283
+ o98R4ruf8kw_30.000_40.000.wav 30.000 40.000 Bicycle
284
+ oiLHBkHgkAo_0.000_8.000.wav 0.000 8.000 Bicycle
285
+ qL0ESQcaPhM_30.000_40.000.wav 30.000 40.000 Bicycle
286
+ qjz5t9M4YCw_30.000_40.000.wav 30.000 40.000 Bicycle
287
+ qrCWPsqG9vA_30.000_40.000.wav 30.000 40.000 Bicycle
288
+ r06tmeUDgc8_3.000_13.000.wav 3.000 13.000 Bicycle
289
+ sAMjMyCdGOc_30.000_40.000.wav 30.000 40.000 Bicycle
290
+ tKdRlWz-1pg_30.000_40.000.wav 30.000 40.000 Bicycle
291
+ uNpSMpqlkMA_0.000_10.000.wav 0.000 10.000 Bicycle
292
+ vOYj9W7Jsxk_8.000_18.000.wav 8.000 18.000 Bicycle
293
+ xBKrmKdjAIA_0.000_10.000.wav 0.000 10.000 Bicycle
294
+ xfNeZaw4o3U_17.000_27.000.wav 17.000 27.000 Bicycle
295
+ xgiJqbhhU3c_30.000_40.000.wav 30.000 40.000 Bicycle
296
+ 0vg9qxNKXOw_30.000_40.000.wav 30.000 40.000 Skateboard
297
+ 10YXuv9Go0E_140.000_150.000.wav 140.000 150.000 Skateboard
298
+ 3-a8i_MEUl8_30.000_40.000.wav 30.000 40.000 Skateboard
299
+ 6kXUG1Zo6VA_0.000_10.000.wav 0.000 10.000 Skateboard
300
+ 84fDGWoRtsU_210.000_220.000.wav 210.000 220.000 Skateboard
301
+ 8kbHA22EWd0_330.000_340.000.wav 330.000 340.000 Skateboard
302
+ 8m-a_6wLTkU_230.000_240.000.wav 230.000 240.000 Skateboard
303
+ 9QwaP-cvdeU_360.000_370.000.wav 360.000 370.000 Skateboard
304
+ 9ZYj5toEbGA_0.000_10.000.wav 0.000 10.000 Skateboard
305
+ 9gkppwB5CXA_30.000_40.000.wav 30.000 40.000 Skateboard
306
+ 9hlXgXWXYXQ_0.000_6.000.wav 0.000 6.000 Skateboard
307
+ ALxn5-2bVyI_30.000_40.000.wav 30.000 40.000 Skateboard
308
+ ANPjV_rudog_30.000_40.000.wav 30.000 40.000 Skateboard
309
+ ATAL-_Dblvg_0.000_7.000.wav 0.000 7.000 Skateboard
310
+ An-4jPvUT14_60.000_70.000.wav 60.000 70.000 Skateboard
311
+ BGR0QnX4k6w_30.000_40.000.wav 30.000 40.000 Skateboard
312
+ BlhUt8AJJO8_30.000_40.000.wav 30.000 40.000 Skateboard
313
+ CD7INyI79fM_170.000_180.000.wav 170.000 180.000 Skateboard
314
+ CNcxzB9F-Q8_100.000_110.000.wav 100.000 110.000 Skateboard
315
+ DqOGYyFVnKk_200.000_210.000.wav 200.000 210.000 Skateboard
316
+ E0gBwPTHxqE_30.000_40.000.wav 30.000 40.000 Skateboard
317
+ E3XIdP8kxwg_110.000_120.000.wav 110.000 120.000 Skateboard
318
+ FQZnQhiM41U_0.000_6.000.wav 0.000 6.000 Skateboard
319
+ FRwFfq3Tl1g_310.000_320.000.wav 310.000 320.000 Skateboard
320
+ JJo971B_eDg_30.000_40.000.wav 30.000 40.000 Skateboard
321
+ KXkxqxoCylc_30.000_40.000.wav 30.000 40.000 Skateboard
322
+ L4Z7XkS6CtA_30.000_40.000.wav 30.000 40.000 Skateboard
323
+ LjEqr0Z7xm0_0.000_6.000.wav 0.000 6.000 Skateboard
324
+ MAbDEeLF4cQ_30.000_40.000.wav 30.000 40.000 Skateboard
325
+ MUBbiivNYZs_30.000_40.000.wav 30.000 40.000 Skateboard
326
+ Nq8GyBrTI8Y_30.000_40.000.wav 30.000 40.000 Skateboard
327
+ PPq9QZmV7jc_25.000_35.000.wav 25.000 35.000 Skateboard
328
+ PVgL5wFOKMs_30.000_40.000.wav 30.000 40.000 Skateboard
329
+ Tcq_xAdCMr4_30.000_40.000.wav 30.000 40.000 Skateboard
330
+ UtZofZjccBs_290.000_300.000.wav 290.000 300.000 Skateboard
331
+ VZfrDZhI7BU_30.000_40.000.wav 30.000 40.000 Skateboard
332
+ WxChkRrVOIs_0.000_7.000.wav 0.000 7.000 Skateboard
333
+ YV0noe1sZAs_150.000_160.000.wav 150.000 160.000 Skateboard
334
+ YjScrri_F7U_0.000_10.000.wav 0.000 10.000 Skateboard
335
+ YrGQKTbiG1g_30.000_40.000.wav 30.000 40.000 Skateboard
336
+ ZM67kt6G-d4_30.000_40.000.wav 30.000 40.000 Skateboard
337
+ ZaUaqnLdg6k_30.000_40.000.wav 30.000 40.000 Skateboard
338
+ ZhpkRcAEJzc_3.000_13.000.wav 3.000 13.000 Skateboard
339
+ _43OOP6UEw0_30.000_40.000.wav 30.000 40.000 Skateboard
340
+ _6Fyave4jqA_260.000_270.000.wav 260.000 270.000 Skateboard
341
+ aOoZ0bCoaZw_30.000_40.000.wav 30.000 40.000 Skateboard
342
+ gV6y9L24wWg_0.000_10.000.wav 0.000 10.000 Skateboard
343
+ hHb0Eq1I7Fk_0.000_10.000.wav 0.000 10.000 Skateboard
344
+ lGf_L6i6AZI_20.000_30.000.wav 20.000 30.000 Skateboard
345
+ leOH87itNWM_30.000_40.000.wav 30.000 40.000 Skateboard
346
+ mIkW7mWlnXw_30.000_40.000.wav 30.000 40.000 Skateboard
347
+ qadmKrM0ppo_20.000_30.000.wav 20.000 30.000 Skateboard
348
+ rLUIHCc4b9A_0.000_7.000.wav 0.000 7.000 Skateboard
349
+ u3vBJgEVJvk_0.000_10.000.wav 0.000 10.000 Skateboard
350
+ vHKBrtPDSvA_150.000_160.000.wav 150.000 160.000 Skateboard
351
+ wWmydRt0Z-w_21.000_31.000.wav 21.000 31.000 Skateboard
352
+ xeHt-R5ScmI_0.000_10.000.wav 0.000 10.000 Skateboard
353
+ xqGtIVeeXY4_330.000_340.000.wav 330.000 340.000 Skateboard
354
+ y_lfY0uzmr0_30.000_40.000.wav 30.000 40.000 Skateboard
355
+ 02Ak1eIyj3M_30.000_40.000.wav 30.000 40.000 Ambulance (siren)
356
+ 0N0C0Wbe6AI_30.000_40.000.wav 30.000 40.000 Ambulance (siren)
357
+ 2-h8MRSRvEg_30.000_40.000.wav 30.000 40.000 Ambulance (siren)
358
+ 4APBvMmKubU_10.000_20.000.wav 10.000 20.000 Ambulance (siren)
359
+ 5RgHBmX2HLw_30.000_40.000.wav 30.000 40.000 Ambulance (siren)
360
+ 6rXgD5JlYxY_30.000_40.000.wav 30.000 40.000 Ambulance (siren)
361
+ 7eeN-fXbso8_20.000_30.000.wav 20.000 30.000 Ambulance (siren)
362
+ 8Aq2DyLbUBA_30.000_40.000.wav 30.000 40.000 Ambulance (siren)
363
+ 8qMHvgA9mGw_20.000_30.000.wav 20.000 30.000 Ambulance (siren)
364
+ 9CRb-PToaAM_30.000_40.000.wav 30.000 40.000 Ambulance (siren)
365
+ AwFuGITwrms_30.000_40.000.wav 30.000 40.000 Ambulance (siren)
366
+ BGp9-Ro5h8Y_30.000_40.000.wav 30.000 40.000 Ambulance (siren)
367
+ CDrpqsGqfPo_10.000_20.000.wav 10.000 20.000 Ambulance (siren)
368
+ Cc7-P0py1Mc_30.000_40.000.wav 30.000 40.000 Ambulance (siren)
369
+ Daqv2F6SEmQ_30.000_40.000.wav 30.000 40.000 Ambulance (siren)
370
+ F9Dbcxr-lAI_30.000_40.000.wav 30.000 40.000 Ambulance (siren)
371
+ GORjnSWhZeY_30.000_40.000.wav 30.000 40.000 Ambulance (siren)
372
+ GgV0yYogTPI_30.000_40.000.wav 30.000 40.000 Ambulance (siren)
373
+ H9xQQVv3ElI_30.000_40.000.wav 30.000 40.000 Ambulance (siren)
374
+ LNQ7fzfdLiY_30.000_40.000.wav 30.000 40.000 Ambulance (siren)
375
+ MEUcv-QM0cQ_30.000_40.000.wav 30.000 40.000 Ambulance (siren)
376
+ QWVub6-0jX4_30.000_40.000.wav 30.000 40.000 Ambulance (siren)
377
+ R8G5Y0HASxY_60.000_70.000.wav 60.000 70.000 Ambulance (siren)
378
+ RVTKY5KR3ME_20.000_30.000.wav 20.000 30.000 Ambulance (siren)
379
+ Sm0pPvXPA9U_30.000_40.000.wav 30.000 40.000 Ambulance (siren)
380
+ VXI3-DI4xNs_30.000_40.000.wav 30.000 40.000 Ambulance (siren)
381
+ W8fIlauyJkk_30.000_40.000.wav 30.000 40.000 Ambulance (siren)
382
+ ZlS4vIWQMmE_0.000_10.000.wav 0.000 10.000 Ambulance (siren)
383
+ ZxlbI2Rj1VY_30.000_40.000.wav 30.000 40.000 Ambulance (siren)
384
+ ZyuX_gMFiss_30.000_40.000.wav 30.000 40.000 Ambulance (siren)
385
+ bA8mt0JI0Ko_30.000_40.000.wav 30.000 40.000 Ambulance (siren)
386
+ bIU0X1v4SF0_30.000_40.000.wav 30.000 40.000 Ambulance (siren)
387
+ cHm1cYBAXMI_30.000_40.000.wav 30.000 40.000 Ambulance (siren)
388
+ cR79KnWpiQA_70.000_80.000.wav 70.000 80.000 Ambulance (siren)
389
+ dPcw4R5lczw_500.000_510.000.wav 500.000 510.000 Ambulance (siren)
390
+ epwDz5WBkvc_80.000_90.000.wav 80.000 90.000 Ambulance (siren)
391
+ fHaQPHCjyfA_30.000_40.000.wav 30.000 40.000 Ambulance (siren)
392
+ gw9pYEG2Zb0_20.000_30.000.wav 20.000 30.000 Ambulance (siren)
393
+ iEX8L_oEbsU_30.000_40.000.wav 30.000 40.000 Ambulance (siren)
394
+ iM-U56fTTOQ_30.000_40.000.wav 30.000 40.000 Ambulance (siren)
395
+ iSnWMz4FUAg_30.000_40.000.wav 30.000 40.000 Ambulance (siren)
396
+ kJuvA2zmrnY_30.000_40.000.wav 30.000 40.000 Ambulance (siren)
397
+ kSjvt2Z_pBo_30.000_40.000.wav 30.000 40.000 Ambulance (siren)
398
+ ke35yF1LHs4_30.000_40.000.wav 30.000 40.000 Ambulance (siren)
399
+ lqGtL8sUo_g_30.000_40.000.wav 30.000 40.000 Ambulance (siren)
400
+ mAfPu0meA_Y_20.000_30.000.wav 20.000 30.000 Ambulance (siren)
401
+ mlS9LLiMIG8_30.000_40.000.wav 30.000 40.000 Ambulance (siren)
402
+ oPR7tUEUptk_30.000_40.000.wav 30.000 40.000 Ambulance (siren)
403
+ qsHc2X1toLs_30.000_40.000.wav 30.000 40.000 Ambulance (siren)
404
+ rCQykaL8Hy4_30.000_40.000.wav 30.000 40.000 Ambulance (siren)
405
+ rhUfN81puDI_0.000_10.000.wav 0.000 10.000 Ambulance (siren)
406
+ s0iddDFzL9s_30.000_40.000.wav 30.000 40.000 Ambulance (siren)
407
+ tcKlq7_cOkw_8.000_18.000.wav 8.000 18.000 Ambulance (siren)
408
+ u3yYpMwG4Us_30.000_40.000.wav 30.000 40.000 Ambulance (siren)
409
+ vBXPyBiyJG0_30.000_40.000.wav 30.000 40.000 Ambulance (siren)
410
+ vVqUvv1SSu8_30.000_40.000.wav 30.000 40.000 Ambulance (siren)
411
+ vYKWnuvq2FI_30.000_40.000.wav 30.000 40.000 Ambulance (siren)
412
+ ysNK5RVF3Zw_0.000_10.000.wav 0.000 10.000 Ambulance (siren)
413
+ z4B14tAqJ4w_30.000_40.000.wav 30.000 40.000 Ambulance (siren)
414
+ zbiJEml563w_20.000_30.000.wav 20.000 30.000 Ambulance (siren)
415
+ -HxRz4w60-Y_150.000_160.000.wav 150.000 160.000 Fire engine, fire truck (siren)
416
+ -_dElQcyJnA_30.000_40.000.wav 30.000 40.000 Fire engine, fire truck (siren)
417
+ 0K1mroXg8bs_9.000_19.000.wav 9.000 19.000 Fire engine, fire truck (siren)
418
+ 0SvSNVatkv0_30.000_40.000.wav 30.000 40.000 Fire engine, fire truck (siren)
419
+ 2-h8MRSRvEg_30.000_40.000.wav 30.000 40.000 Fire engine, fire truck (siren)
420
+ 31WGUPOYS5g_22.000_32.000.wav 22.000 32.000 Fire engine, fire truck (siren)
421
+ 3h3_IZWhX0g_30.000_40.000.wav 30.000 40.000 Fire engine, fire truck (siren)
422
+ 4APBvMmKubU_10.000_20.000.wav 10.000 20.000 Fire engine, fire truck (siren)
423
+ 5fjy_2ajEkg_30.000_40.000.wav 30.000 40.000 Fire engine, fire truck (siren)
424
+ 6rXgD5JlYxY_30.000_40.000.wav 30.000 40.000 Fire engine, fire truck (siren)
425
+ 8Aq2DyLbUBA_30.000_40.000.wav 30.000 40.000 Fire engine, fire truck (siren)
426
+ 8DaEd5KbnnA_80.000_90.000.wav 80.000 90.000 Fire engine, fire truck (siren)
427
+ ARIVxBOc0BQ_40.000_50.000.wav 40.000 50.000 Fire engine, fire truck (siren)
428
+ AwFuGITwrms_30.000_40.000.wav 30.000 40.000 Fire engine, fire truck (siren)
429
+ Bs2KqqI9F_k_30.000_40.000.wav 30.000 40.000 Fire engine, fire truck (siren)
430
+ Cc7-P0py1Mc_30.000_40.000.wav 30.000 40.000 Fire engine, fire truck (siren)
431
+ D4M3YT75ZrQ_90.000_100.000.wav 90.000 100.000 Fire engine, fire truck (siren)
432
+ DWXQ_cSUW98_30.000_40.000.wav 30.000 40.000 Fire engine, fire truck (siren)
433
+ Daqv2F6SEmQ_30.000_40.000.wav 30.000 40.000 Fire engine, fire truck (siren)
434
+ DpagxUQwXDo_30.000_40.000.wav 30.000 40.000 Fire engine, fire truck (siren)
435
+ FFSI6Bg2M-Q_30.000_40.000.wav 30.000 40.000 Fire engine, fire truck (siren)
436
+ GORjnSWhZeY_30.000_40.000.wav 30.000 40.000 Fire engine, fire truck (siren)
437
+ GbIuxmaiCOk_30.000_40.000.wav 30.000 40.000 Fire engine, fire truck (siren)
438
+ GgV0yYogTPI_30.000_40.000.wav 30.000 40.000 Fire engine, fire truck (siren)
439
+ H6c8ZDrdUaM_30.000_40.000.wav 30.000 40.000 Fire engine, fire truck (siren)
440
+ H9xQQVv3ElI_30.000_40.000.wav 30.000 40.000 Fire engine, fire truck (siren)
441
+ HQQxGJKg1iM_30.000_40.000.wav 30.000 40.000 Fire engine, fire truck (siren)
442
+ IiCh2H3JtsE_30.000_40.000.wav 30.000 40.000 Fire engine, fire truck (siren)
443
+ InrS4Fdndr4_0.000_10.000.wav 0.000 10.000 Fire engine, fire truck (siren)
444
+ JpLA7HY9r3Y_30.000_40.000.wav 30.000 40.000 Fire engine, fire truck (siren)
445
+ MEUcv-QM0cQ_30.000_40.000.wav 30.000 40.000 Fire engine, fire truck (siren)
446
+ PCl-q7lCT_U_50.000_60.000.wav 50.000 60.000 Fire engine, fire truck (siren)
447
+ VXI3-DI4xNs_30.000_40.000.wav 30.000 40.000 Fire engine, fire truck (siren)
448
+ Xggsbzzes3M_30.000_40.000.wav 30.000 40.000 Fire engine, fire truck (siren)
449
+ YbiiaDBU-HI_10.000_20.000.wav 10.000 20.000 Fire engine, fire truck (siren)
450
+ ZeH6Fc7Y900_30.000_40.000.wav 30.000 40.000 Fire engine, fire truck (siren)
451
+ ZlS4vIWQMmE_0.000_10.000.wav 0.000 10.000 Fire engine, fire truck (siren)
452
+ bIU0X1v4SF0_30.000_40.000.wav 30.000 40.000 Fire engine, fire truck (siren)
453
+ cHm1cYBAXMI_30.000_40.000.wav 30.000 40.000 Fire engine, fire truck (siren)
454
+ fHaQPHCjyfA_30.000_40.000.wav 30.000 40.000 Fire engine, fire truck (siren)
455
+ iM-U56fTTOQ_30.000_40.000.wav 30.000 40.000 Fire engine, fire truck (siren)
456
+ k2a30--j37Q_30.000_40.000.wav 30.000 40.000 Fire engine, fire truck (siren)
457
+ kJuvA2zmrnY_30.000_40.000.wav 30.000 40.000 Fire engine, fire truck (siren)
458
+ kr8ssbrDDMY_30.000_40.000.wav 30.000 40.000 Fire engine, fire truck (siren)
459
+ pvYwIdGrS90_30.000_40.000.wav 30.000 40.000 Fire engine, fire truck (siren)
460
+ qsHc2X1toLs_30.000_40.000.wav 30.000 40.000 Fire engine, fire truck (siren)
461
+ rCQykaL8Hy4_30.000_40.000.wav 30.000 40.000 Fire engine, fire truck (siren)
462
+ rhUfN81puDI_0.000_10.000.wav 0.000 10.000 Fire engine, fire truck (siren)
463
+ u08iA12iAmM_30.000_40.000.wav 30.000 40.000 Fire engine, fire truck (siren)
464
+ u9aHjYGbl5o_30.000_40.000.wav 30.000 40.000 Fire engine, fire truck (siren)
465
+ uUiZrgUpw2A_30.000_40.000.wav 30.000 40.000 Fire engine, fire truck (siren)
466
+ vBXPyBiyJG0_30.000_40.000.wav 30.000 40.000 Fire engine, fire truck (siren)
467
+ vVqUvv1SSu8_30.000_40.000.wav 30.000 40.000 Fire engine, fire truck (siren)
468
+ vYKWnuvq2FI_30.000_40.000.wav 30.000 40.000 Fire engine, fire truck (siren)
469
+ wD0P-doqkXo_20.000_30.000.wav 20.000 30.000 Fire engine, fire truck (siren)
470
+ xbr7x2V6mxk_30.000_40.000.wav 30.000 40.000 Fire engine, fire truck (siren)
471
+ ysNK5RVF3Zw_0.000_10.000.wav 0.000 10.000 Fire engine, fire truck (siren)
472
+ z4B14tAqJ4w_30.000_40.000.wav 30.000 40.000 Fire engine, fire truck (siren)
473
+ zpzJKMG5iGc_30.000_40.000.wav 30.000 40.000 Fire engine, fire truck (siren)
474
+ 02Ak1eIyj3M_30.000_40.000.wav 30.000 40.000 Civil defense siren
475
+ 0CJFt950vOk_30.000_40.000.wav 30.000 40.000 Civil defense siren
476
+ 0phl6nlC-n0_10.000_20.000.wav 10.000 20.000 Civil defense siren
477
+ 1jhbNtCWC9w_50.000_60.000.wav 50.000 60.000 Civil defense siren
478
+ 4Ukj2TTJxHM_30.000_40.000.wav 30.000 40.000 Civil defense siren
479
+ 4XAVaSz_P7c_150.000_160.000.wav 150.000 160.000 Civil defense siren
480
+ 69AIBPnJN5E_0.000_10.000.wav 0.000 10.000 Civil defense siren
481
+ 8DaEd5KbnnA_80.000_90.000.wav 80.000 90.000 Civil defense siren
482
+ 8ILgvaJVPCI_30.000_40.000.wav 30.000 40.000 Civil defense siren
483
+ 9MWHXCLAX8I_30.000_40.000.wav 30.000 40.000 Civil defense siren
484
+ A5y-aZc0CiM_30.000_40.000.wav 30.000 40.000 Civil defense siren
485
+ AQCZH4OdNSM_30.000_40.000.wav 30.000 40.000 Civil defense siren
486
+ AVBUh6qeHrQ_30.000_40.000.wav 30.000 40.000 Civil defense siren
487
+ BhQPDafekdw_30.000_40.000.wav 30.000 40.000 Civil defense siren
488
+ CJXNdudcJrs_30.000_40.000.wav 30.000 40.000 Civil defense siren
489
+ CU2MyVM_B48_30.000_40.000.wav 30.000 40.000 Civil defense siren
490
+ DdZw0XDv0JI_30.000_40.000.wav 30.000 40.000 Civil defense siren
491
+ DgWHUawAGnI_30.000_40.000.wav 30.000 40.000 Civil defense siren
492
+ Do9Dffb6vHA_30.000_40.000.wav 30.000 40.000 Civil defense siren
493
+ GO2zKyMtBV4_30.000_40.000.wav 30.000 40.000 Civil defense siren
494
+ GeRgy4of730_30.000_40.000.wav 30.000 40.000 Civil defense siren
495
+ IIypdzgZAaI_30.000_40.000.wav 30.000 40.000 Civil defense siren
496
+ JpLA7HY9r3Y_30.000_40.000.wav 30.000 40.000 Civil defense siren
497
+ JqHJ7015aWM_30.000_40.000.wav 30.000 40.000 Civil defense siren
498
+ K7a1P4RX_5w_30.000_40.000.wav 30.000 40.000 Civil defense siren
499
+ KrTocA-I550_190.000_200.000.wav 190.000 200.000 Civil defense siren
500
+ KumYcZVLOVU_350.000_360.000.wav 350.000 360.000 Civil defense siren
501
+ L60HS_jbZu0_30.000_40.000.wav 30.000 40.000 Civil defense siren
502
+ MZ1Yh6mRC-E_30.000_40.000.wav 30.000 40.000 Civil defense siren
503
+ R8XUrRCFkzs_30.000_40.000.wav 30.000 40.000 Civil defense siren
504
+ SyWbolNFst4_60.000_70.000.wav 60.000 70.000 Civil defense siren
505
+ TYLZuBBu8ms_0.000_10.000.wav 0.000 10.000 Civil defense siren
506
+ Tx6eSkU2lKc_30.000_40.000.wav 30.000 40.000 Civil defense siren
507
+ VcflBZLflSU_130.000_140.000.wav 130.000 140.000 Civil defense siren
508
+ WXsTHg_DiYA_30.000_40.000.wav 30.000 40.000 Civil defense siren
509
+ Wz5ffJxCElQ_10.000_20.000.wav 10.000 20.000 Civil defense siren
510
+ X2MlmcY8UZU_30.000_40.000.wav 30.000 40.000 Civil defense siren
511
+ XYLheTmlEYI_30.000_40.000.wav 30.000 40.000 Civil defense siren
512
+ YyxlD_FwZXM_30.000_40.000.wav 30.000 40.000 Civil defense siren
513
+ adCuLs-4nmI_30.000_40.000.wav 30.000 40.000 Civil defense siren
514
+ cPjtrTq3F-I_30.000_40.000.wav 30.000 40.000 Civil defense siren
515
+ eHDm93tI4Ok_30.000_40.000.wav 30.000 40.000 Civil defense siren
516
+ etppP5Sdo14_30.000_40.000.wav 30.000 40.000 Civil defense siren
517
+ fRKxUc1gQBw_50.000_60.000.wav 50.000 60.000 Civil defense siren
518
+ feIue4LHzfM_30.000_40.000.wav 30.000 40.000 Civil defense siren
519
+ gr-Yen6Sj_Q_0.000_10.000.wav 0.000 10.000 Civil defense siren
520
+ hl3Kqi9Wi_g_30.000_40.000.wav 30.000 40.000 Civil defense siren
521
+ iKca2cbowd4_30.000_40.000.wav 30.000 40.000 Civil defense siren
522
+ kzFyGWdj6MI_30.000_40.000.wav 30.000 40.000 Civil defense siren
523
+ m3LGopSVju4_30.000_40.000.wav 30.000 40.000 Civil defense siren
524
+ ne4IMxs-hMk_30.000_40.000.wav 30.000 40.000 Civil defense siren
525
+ nuu2iNisoQc_6.000_16.000.wav 6.000 16.000 Civil defense siren
526
+ oYeql9xE19k_30.000_40.000.wav 30.000 40.000 Civil defense siren
527
+ rGUrM19BnJ8_110.000_120.000.wav 110.000 120.000 Civil defense siren
528
+ u08iA12iAmM_30.000_40.000.wav 30.000 40.000 Civil defense siren
529
+ uCRAnDBXxgI_30.000_40.000.wav 30.000 40.000 Civil defense siren
530
+ vQG4HZR2KSk_30.000_40.000.wav 30.000 40.000 Civil defense siren
531
+ vjsG5b2yNzc_190.000_200.000.wav 190.000 200.000 Civil defense siren
532
+ yO7guxGY-_k_30.000_40.000.wav 30.000 40.000 Civil defense siren
533
+ -9GUUhB3QV0_30.000_40.000.wav 30.000 40.000 Police car (siren)
534
+ -HxRz4w60-Y_150.000_160.000.wav 150.000 160.000 Police car (siren)
535
+ -UBVqmhbT50_30.000_40.000.wav 30.000 40.000 Police car (siren)
536
+ -_dElQcyJnA_30.000_40.000.wav 30.000 40.000 Police car (siren)
537
+ 0N0C0Wbe6AI_30.000_40.000.wav 30.000 40.000 Police car (siren)
538
+ 0SvSNVatkv0_30.000_40.000.wav 30.000 40.000 Police car (siren)
539
+ 145N68nh4m0_120.000_130.000.wav 120.000 130.000 Police car (siren)
540
+ 2-h8MRSRvEg_30.000_40.000.wav 30.000 40.000 Police car (siren)
541
+ 31WGUPOYS5g_22.000_32.000.wav 22.000 32.000 Police car (siren)
542
+ 5RgHBmX2HLw_30.000_40.000.wav 30.000 40.000 Police car (siren)
543
+ 6rXgD5JlYxY_30.000_40.000.wav 30.000 40.000 Police car (siren)
544
+ 8Aq2DyLbUBA_30.000_40.000.wav 30.000 40.000 Police car (siren)
545
+ 8DaEd5KbnnA_80.000_90.000.wav 80.000 90.000 Police car (siren)
546
+ 8E7okHnCcTA_30.000_40.000.wav 30.000 40.000 Police car (siren)
547
+ 9CRb-PToaAM_30.000_40.000.wav 30.000 40.000 Police car (siren)
548
+ 9OFUd38sBNM_0.000_8.000.wav 0.000 8.000 Police car (siren)
549
+ AQCZH4OdNSM_30.000_40.000.wav 30.000 40.000 Police car (siren)
550
+ AwFuGITwrms_30.000_40.000.wav 30.000 40.000 Police car (siren)
551
+ CDrpqsGqfPo_10.000_20.000.wav 10.000 20.000 Police car (siren)
552
+ DK_6C29B2zs_14.000_24.000.wav 14.000 24.000 Police car (siren)
553
+ GORjnSWhZeY_30.000_40.000.wav 30.000 40.000 Police car (siren)
554
+ GgV0yYogTPI_30.000_40.000.wav 30.000 40.000 Police car (siren)
555
+ H6c8ZDrdUaM_30.000_40.000.wav 30.000 40.000 Police car (siren)
556
+ H7lOMlND9dc_30.000_40.000.wav 30.000 40.000 Police car (siren)
557
+ H9xQQVv3ElI_30.000_40.000.wav 30.000 40.000 Police car (siren)
558
+ IiCh2H3JtsE_30.000_40.000.wav 30.000 40.000 Police car (siren)
559
+ InrS4Fdndr4_0.000_10.000.wav 0.000 10.000 Police car (siren)
560
+ JgDuU9kpHpM_30.000_40.000.wav 30.000 40.000 Police car (siren)
561
+ JpLA7HY9r3Y_30.000_40.000.wav 30.000 40.000 Police car (siren)
562
+ LNQ7fzfdLiY_30.000_40.000.wav 30.000 40.000 Police car (siren)
563
+ PCl-q7lCT_U_50.000_60.000.wav 50.000 60.000 Police car (siren)
564
+ QWVub6-0jX4_30.000_40.000.wav 30.000 40.000 Police car (siren)
565
+ Wak5QxsS-QU_30.000_40.000.wav 30.000 40.000 Police car (siren)
566
+ YbiiaDBU-HI_10.000_20.000.wav 10.000 20.000 Police car (siren)
567
+ Z34SD-OEpJI_10.000_20.000.wav 10.000 20.000 Police car (siren)
568
+ ZeH6Fc7Y900_30.000_40.000.wav 30.000 40.000 Police car (siren)
569
+ ZlS4vIWQMmE_0.000_10.000.wav 0.000 10.000 Police car (siren)
570
+ ZyuX_gMFiss_30.000_40.000.wav 30.000 40.000 Police car (siren)
571
+ bIU0X1v4SF0_30.000_40.000.wav 30.000 40.000 Police car (siren)
572
+ eIMjkADTWzA_60.000_70.000.wav 60.000 70.000 Police car (siren)
573
+ epwDz5WBkvc_80.000_90.000.wav 80.000 90.000 Police car (siren)
574
+ fHaQPHCjyfA_30.000_40.000.wav 30.000 40.000 Police car (siren)
575
+ fNcrlqPrAqM_30.000_40.000.wav 30.000 40.000 Police car (siren)
576
+ g_DBLppDZAs_30.000_40.000.wav 30.000 40.000 Police car (siren)
577
+ gw9pYEG2Zb0_20.000_30.000.wav 20.000 30.000 Police car (siren)
578
+ iEX8L_oEbsU_30.000_40.000.wav 30.000 40.000 Police car (siren)
579
+ iM-U56fTTOQ_30.000_40.000.wav 30.000 40.000 Police car (siren)
580
+ kJuvA2zmrnY_30.000_40.000.wav 30.000 40.000 Police car (siren)
581
+ kSjvt2Z_pBo_30.000_40.000.wav 30.000 40.000 Police car (siren)
582
+ lqGtL8sUo_g_30.000_40.000.wav 30.000 40.000 Police car (siren)
583
+ mAfPu0meA_Y_20.000_30.000.wav 20.000 30.000 Police car (siren)
584
+ mlS9LLiMIG8_30.000_40.000.wav 30.000 40.000 Police car (siren)
585
+ pzup58Eyhuo_30.000_40.000.wav 30.000 40.000 Police car (siren)
586
+ rCQykaL8Hy4_30.000_40.000.wav 30.000 40.000 Police car (siren)
587
+ rhUfN81puDI_0.000_10.000.wav 0.000 10.000 Police car (siren)
588
+ u08iA12iAmM_30.000_40.000.wav 30.000 40.000 Police car (siren)
589
+ u3yYpMwG4Us_30.000_40.000.wav 30.000 40.000 Police car (siren)
590
+ u9aHjYGbl5o_30.000_40.000.wav 30.000 40.000 Police car (siren)
591
+ uUiZrgUpw2A_30.000_40.000.wav 30.000 40.000 Police car (siren)
592
+ vYKWnuvq2FI_30.000_40.000.wav 30.000 40.000 Police car (siren)
593
+ xbr7x2V6mxk_30.000_40.000.wav 30.000 40.000 Police car (siren)
594
+ z4B14tAqJ4w_30.000_40.000.wav 30.000 40.000 Police car (siren)
595
+ -FKrYTj_eCU_0.000_10.000.wav 0.000 10.000 Screaming
596
+ 0G50t4FlbIA_60.000_70.000.wav 60.000 70.000 Screaming
597
+ 1LTxZ2aNytc_30.000_40.000.wav 30.000 40.000 Screaming
598
+ 2FEhG1UXb_E_370.000_380.000.wav 370.000 380.000 Screaming
599
+ 45vBbOhzS6g_50.000_60.000.wav 50.000 60.000 Screaming
600
+ 4PYTtp78Ig0_60.000_70.000.wav 60.000 70.000 Screaming
601
+ 5QNq0IEPICQ_30.000_40.000.wav 30.000 40.000 Screaming
602
+ 5YcIJuYQECc_0.000_6.000.wav 0.000 6.000 Screaming
603
+ 5kQF4r03yRI_0.000_6.000.wav 0.000 6.000 Screaming
604
+ 7ARVgI_wx5Y_30.000_40.000.wav 30.000 40.000 Screaming
605
+ AIFvFuZPr68_30.000_40.000.wav 30.000 40.000 Screaming
606
+ Aw43FUCkIb8_20.000_30.000.wav 20.000 30.000 Screaming
607
+ AxM2BofYfPY_30.000_40.000.wav 30.000 40.000 Screaming
608
+ BFqHyCoypfM_16.000_26.000.wav 16.000 26.000 Screaming
609
+ Bk_xS_fKCpk_30.000_40.000.wav 30.000 40.000 Screaming
610
+ C4YMjmJ7tt4_90.000_100.000.wav 90.000 100.000 Screaming
611
+ CMWoAvgD0A0_9.000_19.000.wav 9.000 19.000 Screaming
612
+ DZfYFhywhRs_30.000_40.000.wav 30.000 40.000 Screaming
613
+ ElJFYwRtrH4_30.000_40.000.wav 30.000 40.000 Screaming
614
+ FcUVtXJMkJs_30.000_40.000.wav 30.000 40.000 Screaming
615
+ G--718JDmAQ_0.000_10.000.wav 0.000 10.000 Screaming
616
+ GPJ1uQwmNHk_30.000_40.000.wav 30.000 40.000 Screaming
617
+ H3vSRzkG82U_30.000_40.000.wav 30.000 40.000 Screaming
618
+ HS28EUWt8dE_110.000_120.000.wav 110.000 120.000 Screaming
619
+ KkGTB8ESMCM_0.000_10.000.wav 0.000 10.000 Screaming
620
+ MQ0YasvMcuQ_1.000_11.000.wav 1.000 11.000 Screaming
621
+ Msl9dI5yweA_90.000_100.000.wav 90.000 100.000 Screaming
622
+ Ntn6YvZM3kA_0.000_10.000.wav 0.000 10.000 Screaming
623
+ NwTHlpXdk4M_30.000_40.000.wav 30.000 40.000 Screaming
624
+ OHjfSfqa804_0.000_10.000.wav 0.000 10.000 Screaming
625
+ OzWJuqG2F3Y_30.000_40.000.wav 30.000 40.000 Screaming
626
+ QDW_uCMnMMU_0.000_8.000.wav 0.000 8.000 Screaming
627
+ SxI3Lnzzmkw_110.000_120.000.wav 110.000 120.000 Screaming
628
+ TVvbfuGu9eM_70.000_80.000.wav 70.000 80.000 Screaming
629
+ YCk9F0Uq3BE_70.000_80.000.wav 70.000 80.000 Screaming
630
+ Z54pSnNw2iM_30.000_40.000.wav 30.000 40.000 Screaming
631
+ a59ivTlYoNk_310.000_320.000.wav 310.000 320.000 Screaming
632
+ auC_LgwFF8g_30.000_40.000.wav 30.000 40.000 Screaming
633
+ bi8R9JbF2cc_80.000_90.000.wav 80.000 90.000 Screaming
634
+ cdbYsoEasio_70.000_80.000.wav 70.000 80.000 Screaming
635
+ dfsvT5xImNg_80.000_90.000.wav 80.000 90.000 Screaming
636
+ e2AaF6siR1A_540.000_550.000.wav 540.000 550.000 Screaming
637
+ gB1ytjgpcW4_190.000_200.000.wav 190.000 200.000 Screaming
638
+ gE-0JxMtUh0_20.000_30.000.wav 20.000 30.000 Screaming
639
+ hWiGgsuGnzs_100.000_110.000.wav 100.000 110.000 Screaming
640
+ l-iIfi3SNpw_120.000_130.000.wav 120.000 130.000 Screaming
641
+ mT-f0lGk-JM_30.000_40.000.wav 30.000 40.000 Screaming
642
+ nApE_Biu13k_10.000_20.000.wav 10.000 20.000 Screaming
643
+ nRMmafPUAEU_80.000_90.000.wav 80.000 90.000 Screaming
644
+ nYAbLuyqPis_30.000_40.000.wav 30.000 40.000 Screaming
645
+ nlYlNF30bVg_30.000_40.000.wav 30.000 40.000 Screaming
646
+ sUp-UXzgmrA_0.000_10.000.wav 0.000 10.000 Screaming
647
+ syIwNMo2TUA_0.000_7.000.wav 0.000 7.000 Screaming
648
+ uTu0a1wd9-M_21.000_31.000.wav 21.000 31.000 Screaming
649
+ xVG7dfH5DL0_320.000_330.000.wav 320.000 330.000 Screaming
650
+ xvAQ44hx3_k_220.000_230.000.wav 220.000 230.000 Screaming
651
+ yNTkb2zgA_M_70.000_80.000.wav 70.000 80.000 Screaming
652
+ zCdOEvduBTo_30.000_40.000.wav 30.000 40.000 Screaming
653
+ zMICvbCJ6zc_550.000_560.000.wav 550.000 560.000 Screaming
654
+ -0RWZT-miFs_420.000_430.000.wav 420.000 430.000 Car
655
+ -1pRmoJIGQc_11.000_21.000.wav 11.000 21.000 Car
656
+ -7eDqv-6AKQ_30.000_40.000.wav 30.000 40.000 Car
657
+ -CZ1LIc8aos_20.000_30.000.wav 20.000 30.000 Car
658
+ -HWygXWSNRA_30.000_40.000.wav 30.000 40.000 Car
659
+ -PVEno65928_30.000_40.000.wav 30.000 40.000 Car
660
+ -WgJ-M292Yc_30.000_40.000.wav 30.000 40.000 Car
661
+ 0O-gZoirpRA_30.000_40.000.wav 30.000 40.000 Car
662
+ 0QwxnzHf_0E_30.000_40.000.wav 30.000 40.000 Car
663
+ 0bg1nzEVdgY_0.000_10.000.wav 0.000 10.000 Car
664
+ 0lpPdWvg7Eo_0.000_10.000.wav 0.000 10.000 Car
665
+ 11Pn3yJifSQ_4.000_14.000.wav 4.000 14.000 Car
666
+ 1BgqrhbyRFw_30.000_40.000.wav 30.000 40.000 Car
667
+ 1F9zCsJyw6k_430.000_440.000.wav 430.000 440.000 Car
668
+ 1HayoASR-54_80.000_90.000.wav 80.000 90.000 Car
669
+ 1P5FFxXLSpY_30.000_40.000.wav 30.000 40.000 Car
670
+ 1hIg-Lsvc7Q_30.000_40.000.wav 30.000 40.000 Car
671
+ 27m49pmJ8Og_370.000_380.000.wav 370.000 380.000 Car
672
+ 2E_N8lnoVKE_30.000_40.000.wav 30.000 40.000 Car
673
+ 2Fdau5KTEls_30.000_40.000.wav 30.000 40.000 Car
674
+ 2STASUlGAjs_30.000_40.000.wav 30.000 40.000 Car
675
+ 2fi0m8ei_B4_30.000_40.000.wav 30.000 40.000 Car
676
+ 2uMXfAIMeN0_180.000_190.000.wav 180.000 190.000 Car
677
+ 32V2zsK7GME_110.000_120.000.wav 110.000 120.000 Car
678
+ 3YChVhqW42E_130.000_140.000.wav 130.000 140.000 Car
679
+ 3_OLj6XChvM_30.000_40.000.wav 30.000 40.000 Car
680
+ 3hLxPQpmfQo_30.000_40.000.wav 30.000 40.000 Car
681
+ 3mDPQ_CPopw_30.000_40.000.wav 30.000 40.000 Car
682
+ 3mor5mPSYoU_7.000_17.000.wav 7.000 17.000 Car
683
+ 3xh2kScw64U_30.000_40.000.wav 30.000 40.000 Car
684
+ 40s88hEcn5I_170.000_180.000.wav 170.000 180.000 Car
685
+ 42P93B_GzGA_30.000_40.000.wav 30.000 40.000 Car
686
+ 4KZWpXlcpM4_60.000_70.000.wav 60.000 70.000 Car
687
+ 4TshFWSsrn8_290.000_300.000.wav 290.000 300.000 Car
688
+ 4WRgvRI06zc_30.000_40.000.wav 30.000 40.000 Car
689
+ 4aJfQpHt9lY_160.000_170.000.wav 160.000 170.000 Car
690
+ 4hd2CLrzCZs_30.000_40.000.wav 30.000 40.000 Car
691
+ 4zCHl7pRsNY_30.000_40.000.wav 30.000 40.000 Car
692
+ 5RgHBmX2HLw_30.000_40.000.wav 30.000 40.000 Car
693
+ 5oirFKi6Sfo_190.000_200.000.wav 190.000 200.000 Car
694
+ 5vmxFp1r1ZM_30.000_40.000.wav 30.000 40.000 Car
695
+ 5z1rE_l-0Ow_0.000_8.000.wav 0.000 8.000 Car
696
+ 620GoTv5Ic8_30.000_40.000.wav 30.000 40.000 Car
697
+ 6BitLl5Bnxw_30.000_40.000.wav 30.000 40.000 Car
698
+ 6FVA4hqp1Ro_30.000_40.000.wav 30.000 40.000 Car
699
+ 6U942AYlcXA_30.000_40.000.wav 30.000 40.000 Car
700
+ 6b2ZMMrLTz8_5.000_15.000.wav 5.000 15.000 Car
701
+ 6ibh38autyA_30.000_40.000.wav 30.000 40.000 Car
702
+ 6kuESYFcEqw_30.000_40.000.wav 30.000 40.000 Car
703
+ 73cuZZq-J3w_20.000_30.000.wav 20.000 30.000 Car
704
+ 764IcMEMVUk_90.000_100.000.wav 90.000 100.000 Car
705
+ 7NH1WJlSiYI_30.000_40.000.wav 30.000 40.000 Car
706
+ 7lJu9wEsErY_220.000_230.000.wav 220.000 230.000 Car
707
+ 8CqqK9CzuXM_30.000_40.000.wav 30.000 40.000 Car
708
+ 8SYLYWR47EE_30.000_40.000.wav 30.000 40.000 Car
709
+ 8Wk-ZmlsUqY_28.000_38.000.wav 28.000 38.000 Car
710
+ 8q8JrJNAa-Q_30.000_40.000.wav 30.000 40.000 Car
711
+ 8rMlNbKlp_s_0.000_10.000.wav 0.000 10.000 Car
712
+ 8sGJFPr2Nmc_30.000_40.000.wav 30.000 40.000 Car
713
+ 8yRROnG0-lA_30.000_40.000.wav 30.000 40.000 Car
714
+ 9Ti98L4PRCo_17.000_27.000.wav 17.000 27.000 Car
715
+ 9fzAWj5YJ9c_30.000_40.000.wav 30.000 40.000 Car
716
+ 9rq8h4oMJ98_30.000_40.000.wav 30.000 40.000 Car
717
+ 9ye2Fn62xDc_60.000_70.000.wav 60.000 70.000 Car
718
+ ACGuC6SH4V4_150.000_160.000.wav 150.000 160.000 Car
719
+ AFz5TIs_Gug_30.000_40.000.wav 30.000 40.000 Car
720
+ AedlWfHafgw_21.000_31.000.wav 21.000 31.000 Car
721
+ AlsDSDTiaWI_30.000_40.000.wav 30.000 40.000 Car
722
+ B3SkK0wuOhY_130.000_140.000.wav 130.000 140.000 Car
723
+ B9n4a5ciI48_16.000_26.000.wav 16.000 26.000 Car
724
+ BAekfGvUtFM_30.000_40.000.wav 30.000 40.000 Car
725
+ BNLOvQbrPdc_290.000_300.000.wav 290.000 300.000 Car
726
+ BS1fqEDAvh0_330.000_340.000.wav 330.000 340.000 Car
727
+ Bqx_SZgCzZw_10.000_20.000.wav 10.000 20.000 Car
728
+ CZB6WXDuM1g_30.000_40.000.wav 30.000 40.000 Car
729
+ C_pnsyNXphA_30.000_40.000.wav 30.000 40.000 Car
730
+ Ck5ZjBf1nLM_30.000_40.000.wav 30.000 40.000 Car
731
+ CqNyeZeHb8Y_30.000_40.000.wav 30.000 40.000 Car
732
+ Cs1d7Ibk8CA_220.000_230.000.wav 220.000 230.000 Car
733
+ CuS-ok0xG9g_0.000_10.000.wav 0.000 10.000 Car
734
+ CuaBHNKycvI_30.000_40.000.wav 30.000 40.000 Car
735
+ Cwur_jvxMzY_360.000_370.000.wav 360.000 370.000 Car
736
+ DEGSyVygE98_110.000_120.000.wav 110.000 120.000 Car
737
+ DLxTYAUifjU_30.000_40.000.wav 30.000 40.000 Car
738
+ DkKpnvJk9u0_30.000_40.000.wav 30.000 40.000 Car
739
+ DkVfro9iq80_30.000_40.000.wav 30.000 40.000 Car
740
+ Dw1q9rBv7oU_30.000_40.000.wav 30.000 40.000 Car
741
+ E8NgxTz1d90_30.000_40.000.wav 30.000 40.000 Car
742
+ ExqedxdXuBc_70.000_80.000.wav 70.000 80.000 Car
743
+ FCxEMSNSEuI_160.000_170.000.wav 160.000 170.000 Car
744
+ FEoMTMxzn3U_30.000_40.000.wav 30.000 40.000 Car
745
+ FFSWmryaZ60_30.000_40.000.wav 30.000 40.000 Car
746
+ FYk2paHPSdg_30.000_40.000.wav 30.000 40.000 Car
747
+ Fo_FDiZhzDo_30.000_40.000.wav 30.000 40.000 Car
748
+ GteozUDpJRc_30.000_40.000.wav 30.000 40.000 Car
749
+ GwBS2NzjAvA_30.000_40.000.wav 30.000 40.000 Car
750
+ H8d1mZOqb1c_110.000_120.000.wav 110.000 120.000 Car
751
+ HFF_PpqLQ9w_30.000_40.000.wav 30.000 40.000 Car
752
+ HHlb-h2Pc7o_30.000_40.000.wav 30.000 40.000 Car
753
+ Hu8lxbHYaqg_40.000_50.000.wav 40.000 50.000 Car
754
+ I-HlrcP6Qg4_30.000_40.000.wav 30.000 40.000 Car
755
+ I7vs2H-Htt8_480.000_490.000.wav 480.000 490.000 Car
756
+ IblhEF_MiH8_400.000_410.000.wav 400.000 410.000 Car
757
+ JgXnbgS_XBk_480.000_490.000.wav 480.000 490.000 Car
758
+ Ju7Kg_H2iZQ_30.000_40.000.wav 30.000 40.000 Car
759
+ KiCB6pP6EEo_100.000_110.000.wav 100.000 110.000 Car
760
+ Kwpn3utYEHM_30.000_40.000.wav 30.000 40.000 Car
761
+ Ky9Kw-0XwAs_30.000_40.000.wav 30.000 40.000 Car
762
+ KzKDk-UgS54_30.000_40.000.wav 30.000 40.000 Car
763
+ L1qC8DicAZE_70.000_80.000.wav 70.000 80.000 Car
764
+ L4N0LOYZrFo_30.000_40.000.wav 30.000 40.000 Car
765
+ L535vIV3ED4_40.000_50.000.wav 40.000 50.000 Car
766
+ L9YtOeck3A0_0.000_10.000.wav 0.000 10.000 Car
767
+ LEtkHiZZugk_30.000_40.000.wav 30.000 40.000 Car
768
+ LLkNFGrrgUo_30.000_40.000.wav 30.000 40.000 Car
769
+ LhRNnXaSsCk_30.000_40.000.wav 30.000 40.000 Car
770
+ M7NvD1WJQ7o_70.000_80.000.wav 70.000 80.000 Car
771
+ M8BFtmQRHq4_200.000_210.000.wav 200.000 210.000 Car
772
+ Mxn2FKuNwiI_20.000_30.000.wav 20.000 30.000 Car
773
+ NMqSBlEq14Q_30.000_40.000.wav 30.000 40.000 Car
774
+ NoPbk9fy6uw_10.000_20.000.wav 10.000 20.000 Car
775
+ O36torHptH4_30.000_40.000.wav 30.000 40.000 Car
776
+ OBwh-KGukE8_30.000_40.000.wav 30.000 40.000 Car
777
+ Oa2Os8eOUjs_30.000_40.000.wav 30.000 40.000 Car
778
+ PNaLTW50fxM_60.000_70.000.wav 60.000 70.000 Car
779
+ PfXdcsW8dJI_540.000_550.000.wav 540.000 550.000 Car
780
+ QAWuHvVCI6g_30.000_40.000.wav 30.000 40.000 Car
781
+ QBMDnMRwQCc_70.000_80.000.wav 70.000 80.000 Car
782
+ QzrS-S7OerE_370.000_380.000.wav 370.000 380.000 Car
783
+ R0BtkTm_CPI_30.000_40.000.wav 30.000 40.000 Car
784
+ SEHxfje9Eio_30.000_40.000.wav 30.000 40.000 Car
785
+ Sb3V17F8xU8_360.000_370.000.wav 360.000 370.000 Car
786
+ SkbFczIabRY_30.000_40.000.wav 30.000 40.000 Car
787
+ SqWkV-UQ6CI_30.000_40.000.wav 30.000 40.000 Car
788
+ TWDytzefXXc_10.000_20.000.wav 10.000 20.000 Car
789
+ Tv67JhZDAYs_30.000_40.000.wav 30.000 40.000 Car
790
+ VTwVF3xRSWg_12.000_22.000.wav 12.000 22.000 Car
791
+ VulCKZgWspc_570.000_580.000.wav 570.000 580.000 Car
792
+ Vx6mttDHWfo_30.000_40.000.wav 30.000 40.000 Car
793
+ W11cJ9HZNaY_30.000_40.000.wav 30.000 40.000 Car
794
+ WLXQgcx8qTI_30.000_40.000.wav 30.000 40.000 Car
795
+ WMbdMQ7rdFs_30.000_40.000.wav 30.000 40.000 Car
796
+ WZoQD6cInx8_360.000_370.000.wav 360.000 370.000 Car
797
+ WffmaOr2p8I_30.000_40.000.wav 30.000 40.000 Car
798
+ WoynilrteLU_30.000_40.000.wav 30.000 40.000 Car
799
+ WxrKq0aI0iM_130.000_140.000.wav 130.000 140.000 Car
800
+ X60eVxecY3I_30.000_40.000.wav 30.000 40.000 Car
801
+ X8fEzx-fA0U_80.000_90.000.wav 80.000 90.000 Car
802
+ XVxlZqwWcBI_10.000_20.000.wav 10.000 20.000 Car
803
+ Xnd8ERrynEo_120.000_130.000.wav 120.000 130.000 Car
804
+ XqXLI7bDb-I_0.000_7.000.wav 0.000 7.000 Car
805
+ XyCjByHuDIk_260.000_270.000.wav 260.000 270.000 Car
806
+ XzE7mp3pVik_0.000_10.000.wav 0.000 10.000 Car
807
+ Y5e8BW513ww_20.000_30.000.wav 20.000 30.000 Car
808
+ YJdBwuIn4Ec_30.000_40.000.wav 30.000 40.000 Car
809
+ YTFJUFWcRns_30.000_40.000.wav 30.000 40.000 Car
810
+ YY9aConw2QE_0.000_10.000.wav 0.000 10.000 Car
811
+ Yc_WuISxfLI_30.000_40.000.wav 30.000 40.000 Car
812
+ Ys_rO2Ieg1U_30.000_40.000.wav 30.000 40.000 Car
813
+ Z34SD-OEpJI_10.000_20.000.wav 10.000 20.000 Car
814
+ Z8cigemT5_g_210.000_220.000.wav 210.000 220.000 Car
815
+ ZJW7ymsioQc_16.000_26.000.wav 16.000 26.000 Car
816
+ ZY6A9ZDkudg_130.000_140.000.wav 130.000 140.000 Car
817
+ _Mw9lKigni4_30.000_40.000.wav 30.000 40.000 Car
818
+ _ZiJA6phEq8_30.000_40.000.wav 30.000 40.000 Car
819
+ _yU0-fmspFY_210.000_220.000.wav 210.000 220.000 Car
820
+ a5vTn5286-A_80.000_90.000.wav 80.000 90.000 Car
821
+ aCX6vJhHO2c_30.000_40.000.wav 30.000 40.000 Car
822
+ aHEAK0iWqKk_180.000_190.000.wav 180.000 190.000 Car
823
+ aOVPHKqKjyQ_90.000_100.000.wav 90.000 100.000 Car
824
+ aUq4glO5ryE_30.000_40.000.wav 30.000 40.000 Car
825
+ aW3DY8XDrmw_22.000_32.000.wav 22.000 32.000 Car
826
+ aa4uhPvKviY_30.000_40.000.wav 30.000 40.000 Car
827
+ akgqVmFFDiY_30.000_40.000.wav 30.000 40.000 Car
828
+ buOEFwXhoe0_310.000_320.000.wav 310.000 320.000 Car
829
+ cHCIoXF7moA_30.000_40.000.wav 30.000 40.000 Car
830
+ cW859JAzVZ0_30.000_40.000.wav 30.000 40.000 Car
831
+ cbYZQRz09bc_390.000_400.000.wav 390.000 400.000 Car
832
+ d-do1XZ8f_E_30.000_40.000.wav 30.000 40.000 Car
833
+ d3gMwtMK6Gs_30.000_40.000.wav 30.000 40.000 Car
834
+ d6AioJ8CkTc_30.000_40.000.wav 30.000 40.000 Car
835
+ dAud19zNZyw_190.000_200.000.wav 190.000 200.000 Car
836
+ dC1TVxwiitc_30.000_40.000.wav 30.000 40.000 Car
837
+ dFqOBLxhEl8_20.000_30.000.wav 20.000 30.000 Car
838
+ dSfcznv4KLo_30.000_40.000.wav 30.000 40.000 Car
839
+ dThSTe35jb0_50.000_60.000.wav 50.000 60.000 Car
840
+ dfwr8wgZU8M_40.000_50.000.wav 40.000 50.000 Car
841
+ dmJH84FnQa8_30.000_40.000.wav 30.000 40.000 Car
842
+ e9xPBfEJni8_230.000_240.000.wav 230.000 240.000 Car
843
+ eAl9WwRaWUE_30.000_40.000.wav 30.000 40.000 Car
844
+ eAt6si6k65c_30.000_40.000.wav 30.000 40.000 Car
845
+ eHiqCLHmoxI_0.000_8.000.wav 0.000 8.000 Car
846
+ eV5JX81GzqA_150.000_160.000.wav 150.000 160.000 Car
847
+ er1vQ-nse_g_30.000_40.000.wav 30.000 40.000 Car
848
+ eyFPHlybqDg_30.000_40.000.wav 30.000 40.000 Car
849
+ f70nsY7ThBA_220.000_230.000.wav 220.000 230.000 Car
850
+ fJLCT3xDGxA_30.000_40.000.wav 30.000 40.000 Car
851
+ fZMPDCNyQxE_30.000_40.000.wav 30.000 40.000 Car
852
+ f__6chtFRM0_30.000_40.000.wav 30.000 40.000 Car
853
+ fdDTuo_COG8_90.000_100.000.wav 90.000 100.000 Car
854
+ gFJjYWXeBn0_30.000_40.000.wav 30.000 40.000 Car
855
+ g_DBLppDZAs_30.000_40.000.wav 30.000 40.000 Car
856
+ gaFQgJLQHtU_90.000_100.000.wav 90.000 100.000 Car
857
+ gc6VlixMHXE_30.000_40.000.wav 30.000 40.000 Car
858
+ hN1ykzC8kZM_30.000_40.000.wav 30.000 40.000 Car
859
+ hQ_yyPI46FI_11.000_21.000.wav 11.000 21.000 Car
860
+ haiMRJEH-Aw_0.000_9.000.wav 0.000 9.000 Car
861
+ hsC_sT0A4XM_30.000_40.000.wav 30.000 40.000 Car
862
+ ihQDd1CqFBw_70.000_80.000.wav 70.000 80.000 Car
863
+ ii87iO6JboA_10.000_20.000.wav 10.000 20.000 Car
864
+ j2R1zurR39E_30.000_40.000.wav 30.000 40.000 Car
865
+ j42ETHcp044_0.000_10.000.wav 0.000 10.000 Car
866
+ j7OEpDiK3IA_30.000_40.000.wav 30.000 40.000 Car
867
+ jCeUZwd8b2w_0.000_10.000.wav 0.000 10.000 Car
868
+ jZxusrD28rM_30.000_40.000.wav 30.000 40.000 Car
869
+ kdDgTDfo9HY_100.000_110.000.wav 100.000 110.000 Car
870
+ l6_h_YHuTbY_30.000_40.000.wav 30.000 40.000 Car
871
+ lRrv5m9Xu4k_30.000_40.000.wav 30.000 40.000 Car
872
+ lb1awXgoyQE_0.000_10.000.wav 0.000 10.000 Car
873
+ llZBUsAwRWc_30.000_40.000.wav 30.000 40.000 Car
874
+ lu5teS1j1RQ_0.000_10.000.wav 0.000 10.000 Car
875
+ mCmjh_EJtb4_30.000_40.000.wav 30.000 40.000 Car
876
+ nFqf1vflJaI_350.000_360.000.wav 350.000 360.000 Car
877
+ njodYtK0Hqg_30.000_40.000.wav 30.000 40.000 Car
878
+ noymXcxyxis_30.000_40.000.wav 30.000 40.000 Car
879
+ o2CmtHNUrXg_30.000_40.000.wav 30.000 40.000 Car
880
+ oPJVdi0cqNE_30.000_40.000.wav 30.000 40.000 Car
881
+ oxJYMzEmtk4_10.000_20.000.wav 10.000 20.000 Car
882
+ pPnLErF3GOY_30.000_40.000.wav 30.000 40.000 Car
883
+ pXX6cK4xtiY_11.000_21.000.wav 11.000 21.000 Car
884
+ qC5M7BAsKOA_0.000_10.000.wav 0.000 10.000 Car
885
+ qg4WxBm8h_w_510.000_520.000.wav 510.000 520.000 Car
886
+ qxLdv8u_Ujw_0.000_5.000.wav 0.000 5.000 Car
887
+ rgeu0Gtf3Es_40.000_50.000.wav 40.000 50.000 Car
888
+ s3-i5eUpe6c_30.000_40.000.wav 30.000 40.000 Car
889
+ s5s3aR8Z7I8_350.000_360.000.wav 350.000 360.000 Car
890
+ syCQldBsAtg_30.000_40.000.wav 30.000 40.000 Car
891
+ tAfucDIyRiM_30.000_40.000.wav 30.000 40.000 Car
892
+ teoER4j9H14_290.000_300.000.wav 290.000 300.000 Car
893
+ uFSkczD2i14_30.000_40.000.wav 30.000 40.000 Car
894
+ uUyB4q7jgn4_30.000_40.000.wav 30.000 40.000 Car
895
+ uYqlVTlSgbM_40.000_50.000.wav 40.000 50.000 Car
896
+ v8Kry1CbTkM_310.000_320.000.wav 310.000 320.000 Car
897
+ vF2zXcbADUk_20.000_30.000.wav 20.000 30.000 Car
898
+ vHlqKDR7ggA_30.000_40.000.wav 30.000 40.000 Car
899
+ vPDXFKcdaS4_0.000_10.000.wav 0.000 10.000 Car
900
+ vW1nk4o9u5g_30.000_40.000.wav 30.000 40.000 Car
901
+ vdFYBSlmsXw_30.000_40.000.wav 30.000 40.000 Car
902
+ vtE1J8HsCUs_30.000_40.000.wav 30.000 40.000 Car
903
+ w0vy1YvNcOg_30.000_40.000.wav 30.000 40.000 Car
904
+ wDKrcZ7xLY8_80.000_90.000.wav 80.000 90.000 Car
905
+ wM-sBzIDzok_30.000_40.000.wav 30.000 40.000 Car
906
+ wUY4eWJt17w_30.000_40.000.wav 30.000 40.000 Car
907
+ we66pU0MN1M_30.000_40.000.wav 30.000 40.000 Car
908
+ wjfMWiYLDWA_30.000_40.000.wav 30.000 40.000 Car
909
+ wu3-_VKULZU_30.000_40.000.wav 30.000 40.000 Car
910
+ wwNIm8bgzKc_30.000_40.000.wav 30.000 40.000 Car
911
+ xqH9TpH6Xy0_0.000_10.000.wav 0.000 10.000 Car
912
+ xsT5ZJUnBg0_160.000_170.000.wav 160.000 170.000 Car
913
+ y9DFJEsiTLk_110.000_120.000.wav 110.000 120.000 Car
914
+ yESwp_fg0Po_70.000_80.000.wav 70.000 80.000 Car
915
+ yQg3eMb0QKU_30.000_40.000.wav 30.000 40.000 Car
916
+ yQjnNR7fXKo_50.000_60.000.wav 50.000 60.000 Car
917
+ zCuKYr_oMlE_60.000_70.000.wav 60.000 70.000 Car
918
+ zz35Va7tYmA_30.000_40.000.wav 30.000 40.000 Car
919
+ -CZ1LIc8aos_20.000_30.000.wav 20.000 30.000 Car passing by
920
+ -WgJ-M292Yc_30.000_40.000.wav 30.000 40.000 Car passing by
921
+ -iAAxJkoqcM_0.000_6.000.wav 0.000 6.000 Car passing by
922
+ 0mQcGLpc8to_30.000_40.000.wav 30.000 40.000 Car passing by
923
+ 1HtGgZnlKjU_30.000_40.000.wav 30.000 40.000 Car passing by
924
+ 2IsAlhq0XFc_30.000_40.000.wav 30.000 40.000 Car passing by
925
+ 2UvEmetE__I_30.000_40.000.wav 30.000 40.000 Car passing by
926
+ 2oHGIzH_XzA_30.000_40.000.wav 30.000 40.000 Car passing by
927
+ 3mor5mPSYoU_7.000_17.000.wav 7.000 17.000 Car passing by
928
+ 8SYLYWR47EE_30.000_40.000.wav 30.000 40.000 Car passing by
929
+ 8rzhhvS0tGc_30.000_40.000.wav 30.000 40.000 Car passing by
930
+ 8v377AXrgac_30.000_40.000.wav 30.000 40.000 Car passing by
931
+ 9lMtTDKyDEk_30.000_40.000.wav 30.000 40.000 Car passing by
932
+ BWoL8oKoTFI_30.000_40.000.wav 30.000 40.000 Car passing by
933
+ BsvD806qNM8_10.000_20.000.wav 10.000 20.000 Car passing by
934
+ C3LLtToB2zA_30.000_40.000.wav 30.000 40.000 Car passing by
935
+ Dk6b9dVD0i8_6.000_16.000.wav 6.000 16.000 Car passing by
936
+ Dw1q9rBv7oU_30.000_40.000.wav 30.000 40.000 Car passing by
937
+ EqFuY_U0Yz0_30.000_40.000.wav 30.000 40.000 Car passing by
938
+ FjpOboRcrNc_10.000_20.000.wav 10.000 20.000 Car passing by
939
+ FjyZV8zIJ0k_30.000_40.000.wav 30.000 40.000 Car passing by
940
+ Fn7eSPVvgCQ_30.000_40.000.wav 30.000 40.000 Car passing by
941
+ G6A-sT2DOjY_30.000_40.000.wav 30.000 40.000 Car passing by
942
+ GBXRuYIvhfM_30.000_40.000.wav 30.000 40.000 Car passing by
943
+ HDEPd5MIaow_30.000_40.000.wav 30.000 40.000 Car passing by
944
+ HQQxGJKg1iM_30.000_40.000.wav 30.000 40.000 Car passing by
945
+ If-V0XO-mpo_30.000_40.000.wav 30.000 40.000 Car passing by
946
+ JtuNiusRRLk_30.000_40.000.wav 30.000 40.000 Car passing by
947
+ M8BFtmQRHq4_200.000_210.000.wav 200.000 210.000 Car passing by
948
+ NKPAwhwZmqs_30.000_40.000.wav 30.000 40.000 Car passing by
949
+ Oa2Os8eOUjs_30.000_40.000.wav 30.000 40.000 Car passing by
950
+ QcLfJE-YfJY_30.000_40.000.wav 30.000 40.000 Car passing by
951
+ SkbFczIabRY_30.000_40.000.wav 30.000 40.000 Car passing by
952
+ VAiH1LX8guk_17.000_27.000.wav 17.000 27.000 Car passing by
953
+ Yc_WuISxfLI_30.000_40.000.wav 30.000 40.000 Car passing by
954
+ Yd10enP9ykM_30.000_40.000.wav 30.000 40.000 Car passing by
955
+ _HGGCwtyNxM_30.000_40.000.wav 30.000 40.000 Car passing by
956
+ a2U10_mi5as_30.000_40.000.wav 30.000 40.000 Car passing by
957
+ aB6FDPKAPus_30.000_40.000.wav 30.000 40.000 Car passing by
958
+ bDFQWubN4x4_30.000_40.000.wav 30.000 40.000 Car passing by
959
+ cW859JAzVZ0_30.000_40.000.wav 30.000 40.000 Car passing by
960
+ dDTvjXXFkDg_30.000_40.000.wav 30.000 40.000 Car passing by
961
+ dfwr8wgZU8M_40.000_50.000.wav 40.000 50.000 Car passing by
962
+ fJLCT3xDGxA_30.000_40.000.wav 30.000 40.000 Car passing by
963
+ gc6VlixMHXE_30.000_40.000.wav 30.000 40.000 Car passing by
964
+ gd_KjDM4fi8_0.000_10.000.wav 0.000 10.000 Car passing by
965
+ j7OEpDiK3IA_30.000_40.000.wav 30.000 40.000 Car passing by
966
+ jZxusrD28rM_30.000_40.000.wav 30.000 40.000 Car passing by
967
+ llZBUsAwRWc_30.000_40.000.wav 30.000 40.000 Car passing by
968
+ m_dCO5bBCic_26.000_36.000.wav 26.000 36.000 Car passing by
969
+ qDQX7Xi3GsQ_30.000_40.000.wav 30.000 40.000 Car passing by
970
+ qxLdv8u_Ujw_0.000_5.000.wav 0.000 5.000 Car passing by
971
+ reP-OOWiLWU_30.000_40.000.wav 30.000 40.000 Car passing by
972
+ s4jG5ZJYCvQ_30.000_40.000.wav 30.000 40.000 Car passing by
973
+ s5s3aR8Z7I8_350.000_360.000.wav 350.000 360.000 Car passing by
974
+ uUyB4q7jgn4_30.000_40.000.wav 30.000 40.000 Car passing by
975
+ vPDXFKcdaS4_0.000_10.000.wav 0.000 10.000 Car passing by
976
+ wD4QouhX8zo_30.000_40.000.wav 30.000 40.000 Car passing by
977
+ xqH9TpH6Xy0_0.000_10.000.wav 0.000 10.000 Car passing by
978
+ zd67ihUZ1u4_25.000_35.000.wav 25.000 35.000 Car passing by
979
+ -3z5mFRgbxc_30.000_40.000.wav 30.000 40.000 Bus
980
+ 0N9EN0BEjP0_430.000_440.000.wav 430.000 440.000 Bus
981
+ 0lPcHRhXlWk_30.000_40.000.wav 30.000 40.000 Bus
982
+ 1E1evA4T_Tk_30.000_40.000.wav 30.000 40.000 Bus
983
+ 1hIg-Lsvc7Q_30.000_40.000.wav 30.000 40.000 Bus
984
+ 6-yQsEH2WYA_30.000_40.000.wav 30.000 40.000 Bus
985
+ 6Y8wSI1l-Lw_30.000_40.000.wav 30.000 40.000 Bus
986
+ 7T04388Ijk8_30.000_40.000.wav 30.000 40.000 Bus
987
+ 8E7okHnCcTA_30.000_40.000.wav 30.000 40.000 Bus
988
+ 8oEdgb8iXYA_1.000_11.000.wav 1.000 11.000 Bus
989
+ AdpNSGX2_Pk_10.000_20.000.wav 10.000 20.000 Bus
990
+ AwJ8orGuOXg_2.000_12.000.wav 2.000 12.000 Bus
991
+ BS1fqEDAvh0_330.000_340.000.wav 330.000 340.000 Bus
992
+ CoFbRc1OxFU_9.000_19.000.wav 9.000 19.000 Bus
993
+ DRqKOlP8BmU_110.000_120.000.wav 110.000 120.000 Bus
994
+ DYcXvyBFc5w_30.000_40.000.wav 30.000 40.000 Bus
995
+ DYdalOQnx1Y_30.000_40.000.wav 30.000 40.000 Bus
996
+ DkwFXd5nYLE_40.000_50.000.wav 40.000 50.000 Bus
997
+ FBMR3pW9H9o_30.000_40.000.wav 30.000 40.000 Bus
998
+ FEGa4e6RAlw_30.000_40.000.wav 30.000 40.000 Bus
999
+ Ge_KWS-0098_30.000_40.000.wav 30.000 40.000 Bus
1000
+ HxMoMMrA6Eo_30.000_40.000.wav 30.000 40.000 Bus
1001
+ I7esm6vqqZ4_30.000_40.000.wav 30.000 40.000 Bus
1002
+ JLj11umr1CE_0.000_10.000.wav 0.000 10.000 Bus
1003
+ JwAhcHHF2qg_30.000_40.000.wav 30.000 40.000 Bus
1004
+ LhRNnXaSsCk_30.000_40.000.wav 30.000 40.000 Bus
1005
+ LzZ_nxuZ8Co_30.000_40.000.wav 30.000 40.000 Bus
1006
+ LzcNa3HvD7c_30.000_40.000.wav 30.000 40.000 Bus
1007
+ Nyi9_-u6-w0_30.000_40.000.wav 30.000 40.000 Bus
1008
+ O_SKumO328I_30.000_40.000.wav 30.000 40.000 Bus
1009
+ Owg_XU9XmRM_30.000_40.000.wav 30.000 40.000 Bus
1010
+ P94rcZSuTT8_30.000_40.000.wav 30.000 40.000 Bus
1011
+ PP741kd2vRM_30.000_40.000.wav 30.000 40.000 Bus
1012
+ Qna9qrV8_go_30.000_40.000.wav 30.000 40.000 Bus
1013
+ Qt7FJkuqWPE_30.000_40.000.wav 30.000 40.000 Bus
1014
+ UcQ7cVukaxY_21.000_31.000.wav 21.000 31.000 Bus
1015
+ W8fIlauyJkk_30.000_40.000.wav 30.000 40.000 Bus
1016
+ WDn851XbWTk_30.000_40.000.wav 30.000 40.000 Bus
1017
+ WvquSD2PcCE_30.000_40.000.wav 30.000 40.000 Bus
1018
+ a9B_HA3y8WQ_30.000_40.000.wav 30.000 40.000 Bus
1019
+ cEEoKQ38fHY_30.000_40.000.wav 30.000 40.000 Bus
1020
+ er1vQ-nse_g_30.000_40.000.wav 30.000 40.000 Bus
1021
+ fLvM4bbpg6w_0.000_10.000.wav 0.000 10.000 Bus
1022
+ fOVsAMJ3Yms_30.000_40.000.wav 30.000 40.000 Bus
1023
+ gxVhAVNjSU0_30.000_40.000.wav 30.000 40.000 Bus
1024
+ jaSK_t8QP1E_30.000_40.000.wav 30.000 40.000 Bus
1025
+ ji_YCMygNHQ_8.000_18.000.wav 8.000 18.000 Bus
1026
+ kNKfoDp0uUw_30.000_40.000.wav 30.000 40.000 Bus
1027
+ kdDgTDfo9HY_100.000_110.000.wav 100.000 110.000 Bus
1028
+ lHP0q2sQzPQ_30.000_40.000.wav 30.000 40.000 Bus
1029
+ mGG8rop4Jig_30.000_40.000.wav 30.000 40.000 Bus
1030
+ oHKTmTLEy68_11.000_21.000.wav 11.000 21.000 Bus
1031
+ tAfucDIyRiM_30.000_40.000.wav 30.000 40.000 Bus
1032
+ tQd0vFueRKs_30.000_40.000.wav 30.000 40.000 Bus
1033
+ ucICmff0K-Q_30.000_40.000.wav 30.000 40.000 Bus
1034
+ x-2Abohj8VY_30.000_40.000.wav 30.000 40.000 Bus
1035
+ xFr2xX6PulQ_70.000_80.000.wav 70.000 80.000 Bus
1036
+ yfSBqp5IZSM_10.000_20.000.wav 10.000 20.000 Bus
1037
+ -2sE5CH8Wb8_30.000_40.000.wav 30.000 40.000 Truck
1038
+ -BY64_p-vtM_30.000_40.000.wav 30.000 40.000 Truck
1039
+ -fJsZm3YRc0_30.000_40.000.wav 30.000 40.000 Truck
1040
+ -t-htrAtNvM_30.000_40.000.wav 30.000 40.000 Truck
1041
+ -zNEcuo28oE_30.000_40.000.wav 30.000 40.000 Truck
1042
+ 01WuUBxFBp4_30.000_40.000.wav 30.000 40.000 Truck
1043
+ 077aWlQn6XI_30.000_40.000.wav 30.000 40.000 Truck
1044
+ 0Ga7T-2e490_17.000_27.000.wav 17.000 27.000 Truck
1045
+ 0N9EN0BEjP0_430.000_440.000.wav 430.000 440.000 Truck
1046
+ 10aF24rMeu0_30.000_40.000.wav 30.000 40.000 Truck
1047
+ 2HZcxlRs-hg_30.000_40.000.wav 30.000 40.000 Truck
1048
+ 2Jpg_KvJWL0_30.000_40.000.wav 30.000 40.000 Truck
1049
+ 2Tmi7EqpGZQ_0.000_10.000.wav 0.000 10.000 Truck
1050
+ 4DlKNmVcoek_20.000_30.000.wav 20.000 30.000 Truck
1051
+ 4MRzQbAIyV4_90.000_100.000.wav 90.000 100.000 Truck
1052
+ 4Tpy1lsfcSM_30.000_40.000.wav 30.000 40.000 Truck
1053
+ 4ep09nZl3LA_30.000_40.000.wav 30.000 40.000 Truck
1054
+ 5DW8WjxxCag_30.000_40.000.wav 30.000 40.000 Truck
1055
+ 5DjZHCumLfs_11.000_21.000.wav 11.000 21.000 Truck
1056
+ 5QP1Tc3XbDc_30.000_40.000.wav 30.000 40.000 Truck
1057
+ 5V0xKS-FGMk_30.000_40.000.wav 30.000 40.000 Truck
1058
+ 5fLzQegwHUg_30.000_40.000.wav 30.000 40.000 Truck
1059
+ 6HL_DKWK-WA_10.000_20.000.wav 10.000 20.000 Truck
1060
+ 6VQGk8IrV-4_30.000_40.000.wav 30.000 40.000 Truck
1061
+ 6Y8bKS6KLeE_30.000_40.000.wav 30.000 40.000 Truck
1062
+ 6xEHP-C-ZuU_30.000_40.000.wav 30.000 40.000 Truck
1063
+ 6yyToq9cW9A_60.000_70.000.wav 60.000 70.000 Truck
1064
+ 7Gua0-UrKIw_30.000_40.000.wav 30.000 40.000 Truck
1065
+ 7nglQSmcjAk_30.000_40.000.wav 30.000 40.000 Truck
1066
+ 81DteAPIhoE_30.000_40.000.wav 30.000 40.000 Truck
1067
+ 84E9i9_ELBs_30.000_40.000.wav 30.000 40.000 Truck
1068
+ 8jblPMBafKE_30.000_40.000.wav 30.000 40.000 Truck
1069
+ 8k17D6qiuqI_30.000_40.000.wav 30.000 40.000 Truck
1070
+ 9EsgN-WS2qY_30.000_40.000.wav 30.000 40.000 Truck
1071
+ 9LJnjmcRcb8_280.000_290.000.wav 280.000 290.000 Truck
1072
+ 9yhMtJ50sys_30.000_40.000.wav 30.000 40.000 Truck
1073
+ A9KMqwqLboE_30.000_40.000.wav 30.000 40.000 Truck
1074
+ ARIVxBOc0BQ_40.000_50.000.wav 40.000 50.000 Truck
1075
+ AwFuGITwrms_30.000_40.000.wav 30.000 40.000 Truck
1076
+ BQVXzH6YK8g_30.000_40.000.wav 30.000 40.000 Truck
1077
+ CnYWJp2bknU_50.000_60.000.wav 50.000 60.000 Truck
1078
+ DRqKOlP8BmU_110.000_120.000.wav 110.000 120.000 Truck
1079
+ DXlTakKvLzg_30.000_40.000.wav 30.000 40.000 Truck
1080
+ DkVfro9iq80_30.000_40.000.wav 30.000 40.000 Truck
1081
+ Dmy4EjohxxU_60.000_70.000.wav 60.000 70.000 Truck
1082
+ DvMFQ64YwcI_30.000_40.000.wav 30.000 40.000 Truck
1083
+ FEoMTMxzn3U_30.000_40.000.wav 30.000 40.000 Truck
1084
+ GTk_6JDmtCY_230.000_240.000.wav 230.000 240.000 Truck
1085
+ HDEPd5MIaow_30.000_40.000.wav 30.000 40.000 Truck
1086
+ HQkLVac7z9Q_70.000_80.000.wav 70.000 80.000 Truck
1087
+ I4VDcVTE4YA_30.000_40.000.wav 30.000 40.000 Truck
1088
+ IxlvxvG8zOE_110.000_120.000.wav 110.000 120.000 Truck
1089
+ JLzD44Im1Ec_30.000_40.000.wav 30.000 40.000 Truck
1090
+ K4Hcb00hTTY_30.000_40.000.wav 30.000 40.000 Truck
1091
+ L2M3xanqQP8_30.000_40.000.wav 30.000 40.000 Truck
1092
+ LA5TekLaIPI_10.000_20.000.wav 10.000 20.000 Truck
1093
+ LhRNnXaSsCk_30.000_40.000.wav 30.000 40.000 Truck
1094
+ MWTTe0M9vi4_30.000_40.000.wav 30.000 40.000 Truck
1095
+ Nkqx09b-xyI_70.000_80.000.wav 70.000 80.000 Truck
1096
+ NqzZbJJl3E4_30.000_40.000.wav 30.000 40.000 Truck
1097
+ OPd0cz1hRqc_30.000_40.000.wav 30.000 40.000 Truck
1098
+ PCl-q7lCT_U_50.000_60.000.wav 50.000 60.000 Truck
1099
+ PNaLTW50fxM_60.000_70.000.wav 60.000 70.000 Truck
1100
+ PO1eaJ7tQOg_180.000_190.000.wav 180.000 190.000 Truck
1101
+ PSt0xAYgf4g_0.000_10.000.wav 0.000 10.000 Truck
1102
+ Pef6g19i5iI_30.000_40.000.wav 30.000 40.000 Truck
1103
+ Q1CMSV81_ws_30.000_40.000.wav 30.000 40.000 Truck
1104
+ SiBIYAiIajM_30.000_40.000.wav 30.000 40.000 Truck
1105
+ T6oYCFRafPs_30.000_40.000.wav 30.000 40.000 Truck
1106
+ WdubBeFntYQ_460.000_470.000.wav 460.000 470.000 Truck
1107
+ _ZiJA6phEq8_30.000_40.000.wav 30.000 40.000 Truck
1108
+ _jfv_ziZWII_60.000_70.000.wav 60.000 70.000 Truck
1109
+ acvV6yYNc7Y_30.000_40.000.wav 30.000 40.000 Truck
1110
+ bQSaQ0iX_vk_30.000_40.000.wav 30.000 40.000 Truck
1111
+ bhxN5w03yS0_30.000_40.000.wav 30.000 40.000 Truck
1112
+ ckt7YEGcSoY_30.000_40.000.wav 30.000 40.000 Truck
1113
+ eIkUuCRE_0U_30.000_40.000.wav 30.000 40.000 Truck
1114
+ gxVhAVNjSU0_30.000_40.000.wav 30.000 40.000 Truck
1115
+ hDVNQOJCvOk_30.000_40.000.wav 30.000 40.000 Truck
1116
+ ieZVo7W3BQ4_30.000_40.000.wav 30.000 40.000 Truck
1117
+ ikmE_kRvDAc_30.000_40.000.wav 30.000 40.000 Truck
1118
+ jwZTKNsbf58_70.000_80.000.wav 70.000 80.000 Truck
1119
+ kH6fFjIZkB0_30.000_40.000.wav 30.000 40.000 Truck
1120
+ kr8ssbrDDMY_30.000_40.000.wav 30.000 40.000 Truck
1121
+ lp66EaEOOoU_30.000_40.000.wav 30.000 40.000 Truck
1122
+ n4o1r8Ai66o_30.000_40.000.wav 30.000 40.000 Truck
1123
+ nDtrUUc2J2U_0.000_10.000.wav 0.000 10.000 Truck
1124
+ nMaSkwx6cHE_30.000_40.000.wav 30.000 40.000 Truck
1125
+ p70IcMwsW9M_30.000_40.000.wav 30.000 40.000 Truck
1126
+ pJ1fore8JbQ_30.000_40.000.wav 30.000 40.000 Truck
1127
+ pt-J_L-OFI8_0.000_10.000.wav 0.000 10.000 Truck
1128
+ rdanJP7Usrg_30.000_40.000.wav 30.000 40.000 Truck
1129
+ srTX18ikXkE_10.000_20.000.wav 10.000 20.000 Truck
1130
+ tuplsUUDXKw_30.000_40.000.wav 30.000 40.000 Truck
1131
+ x6vuWsdeS3s_30.000_40.000.wav 30.000 40.000 Truck
1132
+ xMClk12ouB8_30.000_40.000.wav 30.000 40.000 Truck
1133
+ ycqDMKTrvLY_30.000_40.000.wav 30.000 40.000 Truck
1134
+ yk5LqHTtHLo_30.000_40.000.wav 30.000 40.000 Truck
1135
+ yrscqyUOIlI_30.000_40.000.wav 30.000 40.000 Truck
1136
+ zM3chsL-B7U_30.000_40.000.wav 30.000 40.000 Truck
1137
+ 06si40RVDco_30.000_40.000.wav 30.000 40.000 Motorcycle
1138
+ 0DzsPL-xElE_20.000_30.000.wav 20.000 30.000 Motorcycle
1139
+ 145N68nh4m0_120.000_130.000.wav 120.000 130.000 Motorcycle
1140
+ 16vw4K9qJnY_30.000_40.000.wav 30.000 40.000 Motorcycle
1141
+ 21QlKF17ipc_30.000_40.000.wav 30.000 40.000 Motorcycle
1142
+ 3LulQoOXNB0_30.000_40.000.wav 30.000 40.000 Motorcycle
1143
+ 45JHcLU57B8_20.000_30.000.wav 20.000 30.000 Motorcycle
1144
+ 4NZkW-XaIa4_30.000_40.000.wav 30.000 40.000 Motorcycle
1145
+ 506I6LfdDuk_50.000_60.000.wav 50.000 60.000 Motorcycle
1146
+ 6MCy1lh4qaw_20.000_30.000.wav 20.000 30.000 Motorcycle
1147
+ 6R8cO4ARzkY_30.000_40.000.wav 30.000 40.000 Motorcycle
1148
+ 6taAP7SFewI_30.000_40.000.wav 30.000 40.000 Motorcycle
1149
+ 7g6aZTBe2xE_30.000_40.000.wav 30.000 40.000 Motorcycle
1150
+ 9HcahqYUVoc_90.000_100.000.wav 90.000 100.000 Motorcycle
1151
+ 9N1iw5Vdim8_20.000_30.000.wav 20.000 30.000 Motorcycle
1152
+ ANWU9Hiy_5k_40.000_50.000.wav 40.000 50.000 Motorcycle
1153
+ BTNz6NftP34_30.000_40.000.wav 30.000 40.000 Motorcycle
1154
+ BxnLAGsByCI_10.000_20.000.wav 10.000 20.000 Motorcycle
1155
+ CZgx_6XaEkg_30.000_40.000.wav 30.000 40.000 Motorcycle
1156
+ D3BJuOwltoI_10.000_20.000.wav 10.000 20.000 Motorcycle
1157
+ FgN9v1jYqjA_30.000_40.000.wav 30.000 40.000 Motorcycle
1158
+ HQ8eR2lvjSE_30.000_40.000.wav 30.000 40.000 Motorcycle
1159
+ Mb-GyQEKoEc_30.000_40.000.wav 30.000 40.000 Motorcycle
1160
+ Pair_NsHdTc_30.000_40.000.wav 30.000 40.000 Motorcycle
1161
+ UFIBEBkm7ao_30.000_40.000.wav 30.000 40.000 Motorcycle
1162
+ UWz5OIijWM4_30.000_40.000.wav 30.000 40.000 Motorcycle
1163
+ WLX3Db60418_20.000_30.000.wav 20.000 30.000 Motorcycle
1164
+ X5Xs8Y1cJK0_30.000_40.000.wav 30.000 40.000 Motorcycle
1165
+ ZGf0vrZStwI_30.000_40.000.wav 30.000 40.000 Motorcycle
1166
+ ZfkO1HlI0zM_30.000_40.000.wav 30.000 40.000 Motorcycle
1167
+ bhtB2Zgh9Q8_110.000_120.000.wav 110.000 120.000 Motorcycle
1168
+ d-m8eXCpeDg_30.000_40.000.wav 30.000 40.000 Motorcycle
1169
+ d21IwtH2oHI_30.000_40.000.wav 30.000 40.000 Motorcycle
1170
+ dhaKGPCgtfw_30.000_40.000.wav 30.000 40.000 Motorcycle
1171
+ ee-0JGvEIng_30.000_40.000.wav 30.000 40.000 Motorcycle
1172
+ epGDNMrsQb8_40.000_50.000.wav 40.000 50.000 Motorcycle
1173
+ ezUkPETm6cs_30.000_40.000.wav 30.000 40.000 Motorcycle
1174
+ f724u5z_UDw_30.000_40.000.wav 30.000 40.000 Motorcycle
1175
+ gGmWm1i6pVo_30.000_40.000.wav 30.000 40.000 Motorcycle
1176
+ i9VjpIbM3iE_410.000_420.000.wav 410.000 420.000 Motorcycle
1177
+ iMp8nODaotA_580.000_590.000.wav 580.000 590.000 Motorcycle
1178
+ lVW2CqsHJ4Y_30.000_40.000.wav 30.000 40.000 Motorcycle
1179
+ lj7hzmz19-M_30.000_40.000.wav 30.000 40.000 Motorcycle
1180
+ mX45CiTjf8I_30.000_40.000.wav 30.000 40.000 Motorcycle
1181
+ mbLiZ_jpgeY_20.000_30.000.wav 20.000 30.000 Motorcycle
1182
+ owZDBEq6WdU_30.000_40.000.wav 30.000 40.000 Motorcycle
1183
+ pNMBIqvbyB4_30.000_40.000.wav 30.000 40.000 Motorcycle
1184
+ po-tnKZAzdg_40.000_50.000.wav 40.000 50.000 Motorcycle
1185
+ qAQuljp-atA_30.000_40.000.wav 30.000 40.000 Motorcycle
1186
+ r0Oll28wmXs_30.000_40.000.wav 30.000 40.000 Motorcycle
1187
+ sAMjMyCdGOc_30.000_40.000.wav 30.000 40.000 Motorcycle
1188
+ vHlqKDR7ggA_30.000_40.000.wav 30.000 40.000 Motorcycle
1189
+ wPfv8ifzzyg_30.000_40.000.wav 30.000 40.000 Motorcycle
1190
+ wyhurCZbKQU_30.000_40.000.wav 30.000 40.000 Motorcycle
1191
+ xQTPEQDb0Gg_30.000_40.000.wav 30.000 40.000 Motorcycle
1192
+ xTPmoYwgKf4_30.000_40.000.wav 30.000 40.000 Motorcycle
1193
+ xXGIKM4daMU_30.000_40.000.wav 30.000 40.000 Motorcycle
1194
+ xZ8hQliZqhg_160.000_170.000.wav 160.000 170.000 Motorcycle
1195
+ xuMBy2NoROI_30.000_40.000.wav 30.000 40.000 Motorcycle
1196
+ z_8yGVO1qws_30.000_40.000.wav 30.000 40.000 Motorcycle
1197
+ -BaVEk1zS2g_50.000_60.000.wav 50.000 60.000 Train
1198
+ -Q4fBQ4egrs_0.000_10.000.wav 0.000 10.000 Train
1199
+ -QxSFr1cYuQ_20.000_30.000.wav 20.000 30.000 Train
1200
+ -ZdReI9dL6M_530.000_540.000.wav 530.000 540.000 Train
1201
+ 0YIyGEM0yG0_550.000_560.000.wav 550.000 560.000 Train
1202
+ 1Mk2MJDhLJQ_20.000_30.000.wav 20.000 30.000 Train
1203
+ 2nejPPEWqJ8_320.000_330.000.wav 320.000 330.000 Train
1204
+ 3ACjUf9QpAQ_30.000_40.000.wav 30.000 40.000 Train
1205
+ 3RfrTU1p5SA_500.000_510.000.wav 500.000 510.000 Train
1206
+ 3YJewEC-NWo_30.000_40.000.wav 30.000 40.000 Train
1207
+ 3ZZDuYU2HM4_150.000_160.000.wav 150.000 160.000 Train
1208
+ 3fPX1LaGwJo_60.000_70.000.wav 60.000 70.000 Train
1209
+ 4_gyCWuPxRg_170.000_180.000.wav 170.000 180.000 Train
1210
+ 4l4vGrMD4Tw_550.000_560.000.wav 550.000 560.000 Train
1211
+ 4oT0bxldS80_30.000_40.000.wav 30.000 40.000 Train
1212
+ 4t7Mi3pnSA4_210.000_220.000.wav 210.000 220.000 Train
1213
+ 53oq_Otm_XI_30.000_40.000.wav 30.000 40.000 Train
1214
+ 6OgSNQOTw2U_30.000_40.000.wav 30.000 40.000 Train
1215
+ 6_TGlFO0DCk_10.000_20.000.wav 10.000 20.000 Train
1216
+ 7KdSGBzXvz8_420.000_430.000.wav 420.000 430.000 Train
1217
+ 7W_kcu0CJqI_310.000_320.000.wav 310.000 320.000 Train
1218
+ 8IaInXpdd9M_0.000_10.000.wav 0.000 10.000 Train
1219
+ 8nU1aVscJec_30.000_40.000.wav 30.000 40.000 Train
1220
+ 9LQEZJPNVpw_30.000_40.000.wav 30.000 40.000 Train
1221
+ 9NT6gEiqpWA_30.000_40.000.wav 30.000 40.000 Train
1222
+ AFhll08KM98_30.000_40.000.wav 30.000 40.000 Train
1223
+ AHom7lBbtoY_30.000_40.000.wav 30.000 40.000 Train
1224
+ AK0kZUDk294_2.000_12.000.wav 2.000 12.000 Train
1225
+ AKPC4rEGoyI_30.000_40.000.wav 30.000 40.000 Train
1226
+ APsvUzw7bWA_60.000_70.000.wav 60.000 70.000 Train
1227
+ AshwkKUV07s_23.000_33.000.wav 23.000 33.000 Train
1228
+ BI2Tol64na0_30.000_40.000.wav 30.000 40.000 Train
1229
+ BmS2NiuT2c0_160.000_170.000.wav 160.000 170.000 Train
1230
+ CCX_4cW_SAU_0.000_10.000.wav 0.000 10.000 Train
1231
+ D_nXtMgbPNY_30.000_40.000.wav 30.000 40.000 Train
1232
+ F-JFxERdA2w_30.000_40.000.wav 30.000 40.000 Train
1233
+ FoIBRxw0tyE_30.000_40.000.wav 30.000 40.000 Train
1234
+ G958vjLYBcI_110.000_120.000.wav 110.000 120.000 Train
1235
+ GFQnh84kNwU_30.000_40.000.wav 30.000 40.000 Train
1236
+ GKc8PCTen8Q_310.000_320.000.wav 310.000 320.000 Train
1237
+ I4qODX0fypE_30.000_40.000.wav 30.000 40.000 Train
1238
+ IIIxN_ziy_I_60.000_70.000.wav 60.000 70.000 Train
1239
+ IdqEbjujFb8_30.000_40.000.wav 30.000 40.000 Train
1240
+ K-i81KrH8BQ_30.000_40.000.wav 30.000 40.000 Train
1241
+ K9pSRLw6FNc_40.000_50.000.wav 40.000 50.000 Train
1242
+ KPyYUly5xCc_90.000_100.000.wav 90.000 100.000 Train
1243
+ L3a132_uApg_50.000_60.000.wav 50.000 60.000 Train
1244
+ LK4b2eJpy24_30.000_40.000.wav 30.000 40.000 Train
1245
+ LzcNa3HvD7c_30.000_40.000.wav 30.000 40.000 Train
1246
+ MCYY8tJsnfY_7.000_17.000.wav 7.000 17.000 Train
1247
+ MDF2vsjm8jU_10.000_20.000.wav 10.000 20.000 Train
1248
+ MMfiWJVftMA_60.000_70.000.wav 60.000 70.000 Train
1249
+ MYzVHespZ-E_30.000_40.000.wav 30.000 40.000 Train
1250
+ Mbe4rlNiM84_0.000_7.000.wav 0.000 7.000 Train
1251
+ MczH_PWBNeI_360.000_370.000.wav 360.000 370.000 Train
1252
+ Mfkif49LLc4_30.000_40.000.wav 30.000 40.000 Train
1253
+ MwSbYICrYj8_290.000_300.000.wav 290.000 300.000 Train
1254
+ PJUy17bXlhc_40.000_50.000.wav 40.000 50.000 Train
1255
+ QDTbchu0LrU_30.000_40.000.wav 30.000 40.000 Train
1256
+ QZJ5WAYIUh8_70.000_80.000.wav 70.000 80.000 Train
1257
+ QrAoRSA13bM_30.000_40.000.wav 30.000 40.000 Train
1258
+ RN-_agT8_Cg_0.000_10.000.wav 0.000 10.000 Train
1259
+ R_Lpb-51Kl4_30.000_40.000.wav 30.000 40.000 Train
1260
+ Rhvy7V4F95Q_40.000_50.000.wav 40.000 50.000 Train
1261
+ Rq-22Cycrpg_30.000_40.000.wav 30.000 40.000 Train
1262
+ RrlgSfQrqQc_20.000_30.000.wav 20.000 30.000 Train
1263
+ RwBKGPEg6uA_340.000_350.000.wav 340.000 350.000 Train
1264
+ T73runykdnE_25.000_35.000.wav 25.000 35.000 Train
1265
+ T8M6W4yOzI4_30.000_40.000.wav 30.000 40.000 Train
1266
+ Tmm4H6alHCE_30.000_40.000.wav 30.000 40.000 Train
1267
+ TyTORMEourg_270.000_280.000.wav 270.000 280.000 Train
1268
+ UQx0EMXtLZA_60.000_70.000.wav 60.000 70.000 Train
1269
+ UZx7OAgRMRY_90.000_100.000.wav 90.000 100.000 Train
1270
+ UerX5Bv2hcs_70.000_80.000.wav 70.000 80.000 Train
1271
+ UxSUGCvpskM_340.000_350.000.wav 340.000 350.000 Train
1272
+ V2hln47cP78_130.000_140.000.wav 130.000 140.000 Train
1273
+ VIe_Qkg5RJI_130.000_140.000.wav 130.000 140.000 Train
1274
+ WDn851XbWTk_30.000_40.000.wav 30.000 40.000 Train
1275
+ WFdpQCtpBB4_30.000_40.000.wav 30.000 40.000 Train
1276
+ XAUtk9lwzU8_30.000_40.000.wav 30.000 40.000 Train
1277
+ XDTlBb3aYqo_30.000_40.000.wav 30.000 40.000 Train
1278
+ XKvLkIM8dck_40.000_50.000.wav 40.000 50.000 Train
1279
+ XQbeLJYzY9k_90.000_100.000.wav 90.000 100.000 Train
1280
+ XW8pSKLyr0o_20.000_30.000.wav 20.000 30.000 Train
1281
+ XeYiNanFS_M_120.000_130.000.wav 120.000 130.000 Train
1282
+ Y10I9JSvJuQ_30.000_40.000.wav 30.000 40.000 Train
1283
+ YDGf-razgyU_250.000_260.000.wav 250.000 260.000 Train
1284
+ YFD1Qrlskrg_60.000_70.000.wav 60.000 70.000 Train
1285
+ Y_jwEflLthg_190.000_200.000.wav 190.000 200.000 Train
1286
+ Y_ynIwm3qm0_370.000_380.000.wav 370.000 380.000 Train
1287
+ Zy0goYEHPHU_30.000_40.000.wav 30.000 40.000 Train
1288
+ _dkeW6lqmq4_30.000_40.000.wav 30.000 40.000 Train
1289
+ aNO2KEXBCOk_30.000_40.000.wav 30.000 40.000 Train
1290
+ aXsUHAKbyLs_30.000_40.000.wav 30.000 40.000 Train
1291
+ ahct5yzUtdE_20.000_30.000.wav 20.000 30.000 Train
1292
+ arevYmB0qGg_30.000_40.000.wav 30.000 40.000 Train
1293
+ bCGtzspNbNo_30.000_40.000.wav 30.000 40.000 Train
1294
+ bI6wPI9kAm8_70.000_80.000.wav 70.000 80.000 Train
1295
+ bpdCMWWiB_0_30.000_40.000.wav 30.000 40.000 Train
1296
+ cdrjKqyDrak_420.000_430.000.wav 420.000 430.000 Train
1297
+ d1o334I5X_k_30.000_40.000.wav 30.000 40.000 Train
1298
+ dSzZWgbJ378_30.000_40.000.wav 30.000 40.000 Train
1299
+ eRclX9l0F_c_150.000_160.000.wav 150.000 160.000 Train
1300
+ fOVsAMJ3Yms_30.000_40.000.wav 30.000 40.000 Train
1301
+ fWVfi9pAh_4_10.000_20.000.wav 10.000 20.000 Train
1302
+ fztkF47lVQg_0.000_10.000.wav 0.000 10.000 Train
1303
+ g0ICxHjC9Uc_30.000_40.000.wav 30.000 40.000 Train
1304
+ g2scd3YVgwQ_30.000_40.000.wav 30.000 40.000 Train
1305
+ g4cA-ifQc70_30.000_40.000.wav 30.000 40.000 Train
1306
+ g9JVq7wfDIo_30.000_40.000.wav 30.000 40.000 Train
1307
+ gKMpowHeyKc_30.000_40.000.wav 30.000 40.000 Train
1308
+ gTFCK9TuLOQ_30.000_40.000.wav 30.000 40.000 Train
1309
+ gU0mD2fSh4c_500.000_510.000.wav 500.000 510.000 Train
1310
+ gkH_Zxasn8o_40.000_50.000.wav 40.000 50.000 Train
1311
+ gvnM4kK4r70_10.000_20.000.wav 10.000 20.000 Train
1312
+ hH_M56EnnDk_30.000_40.000.wav 30.000 40.000 Train
1313
+ hVvtTC9AmNs_30.000_40.000.wav 30.000 40.000 Train
1314
+ hYqzr_rIIAw_30.000_40.000.wav 30.000 40.000 Train
1315
+ hdYQzH2E-e4_310.000_320.000.wav 310.000 320.000 Train
1316
+ iZgzRfa-xPQ_30.000_40.000.wav 30.000 40.000 Train
1317
+ j9Z63H5hvrQ_0.000_10.000.wav 0.000 10.000 Train
1318
+ jbW2ew8VMfU_50.000_60.000.wav 50.000 60.000 Train
1319
+ jlz7r-NSUuA_50.000_60.000.wav 50.000 60.000 Train
1320
+ k0vRZm7ZnQk_280.000_290.000.wav 280.000 290.000 Train
1321
+ k8H8rn4NaSM_0.000_10.000.wav 0.000 10.000 Train
1322
+ kbfkq3TuAe0_470.000_480.000.wav 470.000 480.000 Train
1323
+ lf1Sblrda3A_560.000_570.000.wav 560.000 570.000 Train
1324
+ m4DS9-5Gkds_30.000_40.000.wav 30.000 40.000 Train
1325
+ m5HeCy87QYY_380.000_390.000.wav 380.000 390.000 Train
1326
+ nKM4MUAsVzg_100.000_110.000.wav 100.000 110.000 Train
1327
+ nY1gcEMzsWI_10.000_20.000.wav 10.000 20.000 Train
1328
+ nfY_zkJceDw_30.000_40.000.wav 30.000 40.000 Train
1329
+ oogrnx-_LBA_60.000_70.000.wav 60.000 70.000 Train
1330
+ pW5SI1ZKUpA_30.000_40.000.wav 30.000 40.000 Train
1331
+ pbOZLMrJy0A_0.000_10.000.wav 0.000 10.000 Train
1332
+ pxmrmtEnROk_30.000_40.000.wav 30.000 40.000 Train
1333
+ q7zzKHFWGkg_30.000_40.000.wav 30.000 40.000 Train
1334
+ qu8vVFWKszA_30.000_40.000.wav 30.000 40.000 Train
1335
+ r6mHSfFkY_8_30.000_40.000.wav 30.000 40.000 Train
1336
+ rNNPQ9DD4no_30.000_40.000.wav 30.000 40.000 Train
1337
+ rSrBDAgLUoI_460.000_470.000.wav 460.000 470.000 Train
1338
+ stdjjG6Y5IU_30.000_40.000.wav 30.000 40.000 Train
1339
+ t_lFhyZaZR0_150.000_160.000.wav 150.000 160.000 Train
1340
+ txXSE7kgrc8_30.000_40.000.wav 30.000 40.000 Train
1341
+ uZfsEDo3elY_20.000_30.000.wav 20.000 30.000 Train
1342
+ umcnfA9veOw_160.000_170.000.wav 160.000 170.000 Train
1343
+ uysTr0SfhLI_10.000_20.000.wav 10.000 20.000 Train
1344
+ wM9wNgY8d4g_150.000_160.000.wav 150.000 160.000 Train
1345
+ xabrKa79prM_30.000_40.000.wav 30.000 40.000 Train
1346
+ xshKOSEF_6o_0.000_10.000.wav 0.000 10.000 Train
1347
+ yBVxtq9k8Sg_0.000_10.000.wav 0.000 10.000 Train
1348
+ yH1r2Bblluw_240.000_250.000.wav 240.000 250.000 Train
1349
+ yywGJu6jp8U_30.000_40.000.wav 30.000 40.000 Train
1350
+ z5uKFGeTtNg_30.000_40.000.wav 30.000 40.000 Train
audio_detection/audio_infer/metadata/black_list/groundtruth_weak_label_testing_set.csv ADDED
@@ -0,0 +1,606 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ -5QrBL6MzLg_60.000_70.000.wav 60.000 70.000 Train horn
2
+ -E0shPRxAbo_30.000_40.000.wav 30.000 40.000 Train horn
3
+ -GCwoyCnYsY_0.000_10.000.wav 0.000 10.000 Train horn
4
+ -Gbohom8C4Q_30.000_40.000.wav 30.000 40.000 Train horn
5
+ -Qfk_Q2ctBs_30.000_40.000.wav 30.000 40.000 Train horn
6
+ -Wd1pV7UjWg_60.000_70.000.wav 60.000 70.000 Train horn
7
+ -Zq22n4OewA_30.000_40.000.wav 30.000 40.000 Train horn
8
+ -jj2tyuf6-A_80.000_90.000.wav 80.000 90.000 Train horn
9
+ -nGBPqlRNg4_30.000_40.000.wav 30.000 40.000 Train horn
10
+ -u9BxBNcrw4_30.000_40.000.wav 30.000 40.000 Train horn
11
+ -zqW9xCZd80_260.000_270.000.wav 260.000 270.000 Train horn
12
+ 02w3vd_GgF0_390.000_400.000.wav 390.000 400.000 Train horn
13
+ 0HqeYIREv8M_30.000_40.000.wav 30.000 40.000 Train horn
14
+ 0IpYF91Fdt0_80.000_90.000.wav 80.000 90.000 Train horn
15
+ 0NaZejdABG0_90.000_100.000.wav 90.000 100.000 Train horn
16
+ 0RurXUfKyow_4.000_14.000.wav 4.000 14.000 Train horn
17
+ 0_HnD-rW3lI_170.000_180.000.wav 170.000 180.000 Train horn
18
+ 10i60V1RZkQ_210.000_220.000.wav 210.000 220.000 Train horn
19
+ 1FJY5X1iY9I_170.000_180.000.wav 170.000 180.000 Train horn
20
+ 1S5WKCcf-wU_40.000_50.000.wav 40.000 50.000 Train horn
21
+ 1U0Ty6CW6AM_40.000_50.000.wav 40.000 50.000 Train horn
22
+ 1hQLr88iCvg_30.000_40.000.wav 30.000 40.000 Train horn
23
+ 1iUXERALOOs_190.000_200.000.wav 190.000 200.000 Train horn
24
+ 1iWFlLpixKU_5.000_15.000.wav 5.000 15.000 Train horn
25
+ 1oJAVJPX0YY_20.000_30.000.wav 20.000 30.000 Train horn
26
+ 26dNsDuIt9Q_340.000_350.000.wav 340.000 350.000 Train horn
27
+ 2BMHsKLcb7E_90.000_100.000.wav 90.000 100.000 Train horn
28
+ 2RpOd9MJjyQ_10.000_20.000.wav 10.000 20.000 Train horn
29
+ 2U4wSdl10to_200.000_210.000.wav 200.000 210.000 Train horn
30
+ 2aBV6AZt5nk_570.000_580.000.wav 570.000 580.000 Train horn
31
+ -8baTnilyjs_30.000_40.000.wav 30.000 40.000 Air horn, truck horn
32
+ -Gbohom8C4Q_30.000_40.000.wav 30.000 40.000 Air horn, truck horn
33
+ -jG26jT3fP8_230.000_240.000.wav 230.000 240.000 Air horn, truck horn
34
+ -jj2tyuf6-A_80.000_90.000.wav 80.000 90.000 Air horn, truck horn
35
+ -v7cUxke-f4_30.000_40.000.wav 30.000 40.000 Air horn, truck horn
36
+ -yeWlsEpcpA_15.000_25.000.wav 15.000 25.000 Air horn, truck horn
37
+ 04KOunVOkSA_30.000_40.000.wav 30.000 40.000 Air horn, truck horn
38
+ 08y2LHhxmsM_400.000_410.000.wav 400.000 410.000 Air horn, truck horn
39
+ 0G73yqtBwgE_11.000_21.000.wav 11.000 21.000 Air horn, truck horn
40
+ 0UPY7ws-VFs_10.000_20.000.wav 10.000 20.000 Air horn, truck horn
41
+ 0euD32aKYUs_10.000_20.000.wav 10.000 20.000 Air horn, truck horn
42
+ 1T1i2rny8RU_30.000_40.000.wav 30.000 40.000 Air horn, truck horn
43
+ 1iRgwn7p0DA_30.000_40.000.wav 30.000 40.000 Air horn, truck horn
44
+ 1myTsHAIvYc_30.000_40.000.wav 30.000 40.000 Air horn, truck horn
45
+ 1z0XoG6GEv4_420.000_430.000.wav 420.000 430.000 Air horn, truck horn
46
+ 26dNsDuIt9Q_340.000_350.000.wav 340.000 350.000 Air horn, truck horn
47
+ 2KmSuPb9gwA_24.000_34.000.wav 24.000 34.000 Air horn, truck horn
48
+ 2Vy5NCEkg2I_30.000_40.000.wav 30.000 40.000 Air horn, truck horn
49
+ 2ZciT0XrifM_0.000_8.000.wav 0.000 8.000 Air horn, truck horn
50
+ 2jOzX06bzuA_16.000_26.000.wav 16.000 26.000 Air horn, truck horn
51
+ 35EOmSMTQ6I_30.000_40.000.wav 30.000 40.000 Air horn, truck horn
52
+ 3YaLkgUMhAA_110.000_120.000.wav 110.000 120.000 Air horn, truck horn
53
+ 3ntFslTK6hM_90.000_100.000.wav 90.000 100.000 Air horn, truck horn
54
+ 3rGOv4evODE_20.000_30.000.wav 20.000 30.000 Air horn, truck horn
55
+ 42U7xIucU68_20.000_30.000.wav 20.000 30.000 Air horn, truck horn
56
+ 46r7mO2k6zY_30.000_40.000.wav 30.000 40.000 Air horn, truck horn
57
+ 4EBnb2DN3Yg_13.000_23.000.wav 13.000 23.000 Air horn, truck horn
58
+ 4NTjS5pFfSc_30.000_40.000.wav 30.000 40.000 Air horn, truck horn
59
+ 4bvfOnX7BIE_30.000_40.000.wav 30.000 40.000 Air horn, truck horn
60
+ 4l78f9VZ9uE_30.000_40.000.wav 30.000 40.000 Air horn, truck horn
61
+ -ajCLjpfGKI_83.000_93.000.wav 83.000 93.000 Car alarm
62
+ -hLSc9aPOms_13.000_23.000.wav 13.000 23.000 Car alarm
63
+ -rgDWfvxxqw_30.000_40.000.wav 30.000 40.000 Car alarm
64
+ 0C3kqtF76t8_50.000_60.000.wav 50.000 60.000 Car alarm
65
+ 0Hz4R_m0hmI_80.000_90.000.wav 80.000 90.000 Car alarm
66
+ 0ZPafgZftWk_80.000_90.000.wav 80.000 90.000 Car alarm
67
+ 0npLQ4LzD0c_40.000_50.000.wav 40.000 50.000 Car alarm
68
+ 17VuPl9Wxvs_20.000_30.000.wav 20.000 30.000 Car alarm
69
+ 3HxQ83IMyw4_70.000_80.000.wav 70.000 80.000 Car alarm
70
+ 3z05luLEc_Q_0.000_10.000.wav 0.000 10.000 Car alarm
71
+ 4A1Ar1TIXIY_30.000_40.000.wav 30.000 40.000 Car alarm
72
+ 4Kpklmj-ze0_53.000_63.000.wav 53.000 63.000 Car alarm
73
+ 4h01lBkTVQY_18.000_28.000.wav 18.000 28.000 Car alarm
74
+ 5-SzZotiaBU_30.000_40.000.wav 30.000 40.000 Car alarm
75
+ 54PbkldEp9M_30.000_40.000.wav 30.000 40.000 Car alarm
76
+ 5P6YYsMaIH4_30.000_40.000.wav 30.000 40.000 Car alarm
77
+ 5tzTahLHylw_70.000_80.000.wav 70.000 80.000 Car alarm
78
+ 7DC3HtNi4fU_160.000_170.000.wav 160.000 170.000 Car alarm
79
+ 7NJ5TbNEIvA_250.000_260.000.wav 250.000 260.000 Car alarm
80
+ 7NZ0kMj2HSI_54.000_64.000.wav 54.000 64.000 Car alarm
81
+ 7RQpt1_1ZzU_30.000_40.000.wav 30.000 40.000 Car alarm
82
+ 7ee54nr6jG8_30.000_40.000.wav 30.000 40.000 Car alarm
83
+ 8OajsyPSNt8_40.000_50.000.wav 40.000 50.000 Car alarm
84
+ 9fCibkUT_gQ_30.000_40.000.wav 30.000 40.000 Car alarm
85
+ 9fzeD7CeI7Y_110.000_120.000.wav 110.000 120.000 Car alarm
86
+ 9jYv9WuyknA_130.000_140.000.wav 130.000 140.000 Car alarm
87
+ A-GNszKtjJc_93.000_103.000.wav 93.000 103.000 Car alarm
88
+ A437a4Y_xag_230.000_240.000.wav 230.000 240.000 Car alarm
89
+ APMPW2YI-Zk_20.000_30.000.wav 20.000 30.000 Car alarm
90
+ AR-KmtlXg4Y_70.000_80.000.wav 70.000 80.000 Car alarm
91
+ -60XojQWWoc_30.000_40.000.wav 30.000 40.000 Reversing beeps
92
+ -6d-zxMvC5E_30.000_40.000.wav 30.000 40.000 Reversing beeps
93
+ -6qSMlbJJ58_30.000_40.000.wav 30.000 40.000 Reversing beeps
94
+ -8OITuFZha8_30.000_40.000.wav 30.000 40.000 Reversing beeps
95
+ -8n2NqDFRko_30.000_40.000.wav 30.000 40.000 Reversing beeps
96
+ -AIrHVeCgtM_30.000_40.000.wav 30.000 40.000 Reversing beeps
97
+ -AVzYvKHwPg_30.000_40.000.wav 30.000 40.000 Reversing beeps
98
+ -AXDeY-N2_M_30.000_40.000.wav 30.000 40.000 Reversing beeps
99
+ -B1uzsLG0Dk_30.000_40.000.wav 30.000 40.000 Reversing beeps
100
+ -BM_EAszxBg_30.000_40.000.wav 30.000 40.000 Reversing beeps
101
+ -Em3OpyaefM_30.000_40.000.wav 30.000 40.000 Reversing beeps
102
+ -FWkB2IDMhc_30.000_40.000.wav 30.000 40.000 Reversing beeps
103
+ -SP7KWmTRUU_30.000_40.000.wav 30.000 40.000 Reversing beeps
104
+ -h4or05bj_I_30.000_40.000.wav 30.000 40.000 Reversing beeps
105
+ -oV6dQu5tZo_30.000_40.000.wav 30.000 40.000 Reversing beeps
106
+ -r8mfjRiHrU_30.000_40.000.wav 30.000 40.000 Reversing beeps
107
+ -s9kwrRilOY_30.000_40.000.wav 30.000 40.000 Reversing beeps
108
+ -uMiGr6xvRA_30.000_40.000.wav 30.000 40.000 Reversing beeps
109
+ -x70B12Mb-8_30.000_40.000.wav 30.000 40.000 Reversing beeps
110
+ -xYsfYZOI-Y_30.000_40.000.wav 30.000 40.000 Reversing beeps
111
+ -zxrdL6MlKI_30.000_40.000.wav 30.000 40.000 Reversing beeps
112
+ 03xMfqt4fZI_24.000_34.000.wav 24.000 34.000 Reversing beeps
113
+ 0E4AqW9dmdk_30.000_40.000.wav 30.000 40.000 Reversing beeps
114
+ 0FQo-2xRJ0E_30.000_40.000.wav 30.000 40.000 Reversing beeps
115
+ 0HmiH-wKLB4_30.000_40.000.wav 30.000 40.000 Reversing beeps
116
+ 0KskqFt3DoY_15.000_25.000.wav 15.000 25.000 Reversing beeps
117
+ 0OiPtV9sd_w_30.000_40.000.wav 30.000 40.000 Reversing beeps
118
+ 0P-YGHC5cBU_30.000_40.000.wav 30.000 40.000 Reversing beeps
119
+ 0QKet-tdquc_30.000_40.000.wav 30.000 40.000 Reversing beeps
120
+ 0VnoYVqd-yo_30.000_40.000.wav 30.000 40.000 Reversing beeps
121
+ -5px8DVPl8A_28.000_38.000.wav 28.000 38.000 Bicycle
122
+ -D08wyQwDPQ_10.000_20.000.wav 10.000 20.000 Bicycle
123
+ -F1_Gh78vJ0_30.000_40.000.wav 30.000 40.000 Bicycle
124
+ -FZQIkX44Pk_10.000_20.000.wav 10.000 20.000 Bicycle
125
+ -FsvS99nWTc_30.000_40.000.wav 30.000 40.000 Bicycle
126
+ -Holdef_BZ0_30.000_40.000.wav 30.000 40.000 Bicycle
127
+ -Inn26beF70_30.000_40.000.wav 30.000 40.000 Bicycle
128
+ -Jq9HNSs_ns_14.000_24.000.wav 14.000 24.000 Bicycle
129
+ -KlN_AXMM0Q_30.000_40.000.wav 30.000 40.000 Bicycle
130
+ -NCcqKWiGus_30.000_40.000.wav 30.000 40.000 Bicycle
131
+ -NNC_TqWfGw_30.000_40.000.wav 30.000 40.000 Bicycle
132
+ -OGFiXvmldM_30.000_40.000.wav 30.000 40.000 Bicycle
133
+ -RFpDUZhN-g_13.000_23.000.wav 13.000 23.000 Bicycle
134
+ -XUfeRTw3b4_0.000_6.000.wav 0.000 6.000 Bicycle
135
+ -XoATxJ-Qcg_30.000_40.000.wav 30.000 40.000 Bicycle
136
+ -bFNxvFwDts_470.000_480.000.wav 470.000 480.000 Bicycle
137
+ -e5PokL6Cyo_30.000_40.000.wav 30.000 40.000 Bicycle
138
+ -fNyOf9zIU0_30.000_40.000.wav 30.000 40.000 Bicycle
139
+ -fhpkRyZL90_30.000_40.000.wav 30.000 40.000 Bicycle
140
+ -fo3m0hiZbg_30.000_40.000.wav 30.000 40.000 Bicycle
141
+ -ikJkNwcmkA_27.000_37.000.wav 27.000 37.000 Bicycle
142
+ -k2nMcxAjWE_30.000_40.000.wav 30.000 40.000 Bicycle
143
+ -k80ibA-fyw_30.000_40.000.wav 30.000 40.000 Bicycle
144
+ -lBcEVa_NKw_30.000_40.000.wav 30.000 40.000 Bicycle
145
+ -mQyAYU_Bd4_50.000_60.000.wav 50.000 60.000 Bicycle
146
+ -ngrinYHF4c_30.000_40.000.wav 30.000 40.000 Bicycle
147
+ -nqm_RJ2xj8_40.000_50.000.wav 40.000 50.000 Bicycle
148
+ -oAw5iTeT1g_40.000_50.000.wav 40.000 50.000 Bicycle
149
+ -p2EMzpTE38_4.000_14.000.wav 4.000 14.000 Bicycle
150
+ -qmfWP_yzn4_30.000_40.000.wav 30.000 40.000 Bicycle
151
+ -0DIFwkUpjQ_50.000_60.000.wav 50.000 60.000 Skateboard
152
+ -53qltVyjpc_180.000_190.000.wav 180.000 190.000 Skateboard
153
+ -5y4jb9eUWs_110.000_120.000.wav 110.000 120.000 Skateboard
154
+ -81kolkG8M0_0.000_8.000.wav 0.000 8.000 Skateboard
155
+ -9dwTSq6JZg_70.000_80.000.wav 70.000 80.000 Skateboard
156
+ -9oKZsjjf_0_20.000_30.000.wav 20.000 30.000 Skateboard
157
+ -AFGfu5zOzQ_30.000_40.000.wav 30.000 40.000 Skateboard
158
+ -DHGwygUsQc_30.000_40.000.wav 30.000 40.000 Skateboard
159
+ -DkuTmIs7_Q_30.000_40.000.wav 30.000 40.000 Skateboard
160
+ -E1E17R7UBA_260.000_270.000.wav 260.000 270.000 Skateboard
161
+ -E1aIXhB4YU_30.000_40.000.wav 30.000 40.000 Skateboard
162
+ -McJLXNN3-o_50.000_60.000.wav 50.000 60.000 Skateboard
163
+ -N7nQ4CXGsY_170.000_180.000.wav 170.000 180.000 Skateboard
164
+ -O5vrHFRzcY_30.000_40.000.wav 30.000 40.000 Skateboard
165
+ -Plh9jAN_Eo_0.000_2.000.wav 0.000 2.000 Skateboard
166
+ -Qd_dXTbgK0_30.000_40.000.wav 30.000 40.000 Skateboard
167
+ -aVZ-H92M_s_0.000_4.000.wav 0.000 4.000 Skateboard
168
+ -cd-Zn8qFxU_90.000_100.000.wav 90.000 100.000 Skateboard
169
+ -esP4loyvjM_60.000_70.000.wav 60.000 70.000 Skateboard
170
+ -iB3a71aPew_30.000_40.000.wav 30.000 40.000 Skateboard
171
+ -lZapwtvwlg_0.000_10.000.wav 0.000 10.000 Skateboard
172
+ -mxMaMJCXL8_180.000_190.000.wav 180.000 190.000 Skateboard
173
+ -nYGTw9Sypg_20.000_30.000.wav 20.000 30.000 Skateboard
174
+ -oS19KshdlM_30.000_40.000.wav 30.000 40.000 Skateboard
175
+ -s6uxc77NWo_40.000_50.000.wav 40.000 50.000 Skateboard
176
+ -sCrXS2kJlA_30.000_40.000.wav 30.000 40.000 Skateboard
177
+ -saCvPTdQ7s_30.000_40.000.wav 30.000 40.000 Skateboard
178
+ -sb-knLiDic_20.000_30.000.wav 20.000 30.000 Skateboard
179
+ -tSwRvqaKWg_90.000_100.000.wav 90.000 100.000 Skateboard
180
+ -x_jV34hVq4_30.000_40.000.wav 30.000 40.000 Skateboard
181
+ --ljM2Kojag_30.000_40.000.wav 30.000 40.000 Ambulance (siren)
182
+ -4F1TX-T6T4_30.000_40.000.wav 30.000 40.000 Ambulance (siren)
183
+ -7HVWUwyMig_30.000_40.000.wav 30.000 40.000 Ambulance (siren)
184
+ -9pUUT-6o8U_30.000_40.000.wav 30.000 40.000 Ambulance (siren)
185
+ -Ei2LE71Dfg_20.000_30.000.wav 20.000 30.000 Ambulance (siren)
186
+ -LGTb-xyjzA_11.000_21.000.wav 11.000 21.000 Ambulance (siren)
187
+ -Y1qiiugnk8_30.000_40.000.wav 30.000 40.000 Ambulance (siren)
188
+ -YsrLG2K1TE_30.000_40.000.wav 30.000 40.000 Ambulance (siren)
189
+ -ZeMV790MXE_10.000_20.000.wav 10.000 20.000 Ambulance (siren)
190
+ -d-T8Y9-TOg_17.000_27.000.wav 17.000 27.000 Ambulance (siren)
191
+ -dcrL5JLmvo_11.000_21.000.wav 11.000 21.000 Ambulance (siren)
192
+ -fCSO8SVWZU_6.000_16.000.wav 6.000 16.000 Ambulance (siren)
193
+ -fGFQTGd2nA_10.000_20.000.wav 10.000 20.000 Ambulance (siren)
194
+ -hA1yMrEXz0_10.000_20.000.wav 10.000 20.000 Ambulance (siren)
195
+ -jnQgpHubNI_30.000_40.000.wav 30.000 40.000 Ambulance (siren)
196
+ -k6p9n9y22Q_30.000_40.000.wav 30.000 40.000 Ambulance (siren)
197
+ -kr4SUjnm88_29.000_39.000.wav 29.000 39.000 Ambulance (siren)
198
+ -lyPnABQhCI_30.000_40.000.wav 30.000 40.000 Ambulance (siren)
199
+ -od8LQAVgno_30.000_40.000.wav 30.000 40.000 Ambulance (siren)
200
+ -pVEgzu95Nc_30.000_40.000.wav 30.000 40.000 Ambulance (siren)
201
+ -w-9yF465IY_30.000_40.000.wav 30.000 40.000 Ambulance (siren)
202
+ -woquFRnQk8_16.000_26.000.wav 16.000 26.000 Ambulance (siren)
203
+ -xz75wUCln8_50.000_60.000.wav 50.000 60.000 Ambulance (siren)
204
+ -yGElLHdkEI_30.000_40.000.wav 30.000 40.000 Ambulance (siren)
205
+ -yPSgCn9AWo_30.000_40.000.wav 30.000 40.000 Ambulance (siren)
206
+ -z8jsgl3iHE_30.000_40.000.wav 30.000 40.000 Ambulance (siren)
207
+ 00H_s-krtg8_30.000_40.000.wav 30.000 40.000 Ambulance (siren)
208
+ 02u3P99INjs_8.000_18.000.wav 8.000 18.000 Ambulance (siren)
209
+ 06RreMb5qbE_0.000_10.000.wav 0.000 10.000 Ambulance (siren)
210
+ 0EPK7Pv_lbE_30.000_40.000.wav 30.000 40.000 Ambulance (siren)
211
+ -0Eem_FuIto_15.000_25.000.wav 15.000 25.000 Fire engine, fire truck (siren)
212
+ -2sT5oBBWWY_30.000_40.000.wav 30.000 40.000 Fire engine, fire truck (siren)
213
+ -45cKZA7Jww_30.000_40.000.wav 30.000 40.000 Fire engine, fire truck (siren)
214
+ -4B435WQvag_20.000_30.000.wav 20.000 30.000 Fire engine, fire truck (siren)
215
+ -6qhtwdfGOA_23.000_33.000.wav 23.000 33.000 Fire engine, fire truck (siren)
216
+ -8uyNBFbdFc_30.000_40.000.wav 30.000 40.000 Fire engine, fire truck (siren)
217
+ -Jsu4dbuO4A_30.000_40.000.wav 30.000 40.000 Fire engine, fire truck (siren)
218
+ -KsPTvgJJVE_350.000_360.000.wav 350.000 360.000 Fire engine, fire truck (siren)
219
+ -PRrNx6_MD0_16.000_26.000.wav 16.000 26.000 Fire engine, fire truck (siren)
220
+ -QBo1W2w8II_30.000_40.000.wav 30.000 40.000 Fire engine, fire truck (siren)
221
+ -QX-ddNtUvE_24.000_34.000.wav 24.000 34.000 Fire engine, fire truck (siren)
222
+ -RlUu1el2G4_30.000_40.000.wav 30.000 40.000 Fire engine, fire truck (siren)
223
+ -SkO97C81Ms_30.000_40.000.wav 30.000 40.000 Fire engine, fire truck (siren)
224
+ -T8QHPXfIC4_13.000_23.000.wav 13.000 23.000 Fire engine, fire truck (siren)
225
+ -USiTjZoh88_30.000_40.000.wav 30.000 40.000 Fire engine, fire truck (siren)
226
+ -X0vNLwH1C0_30.000_40.000.wav 30.000 40.000 Fire engine, fire truck (siren)
227
+ -Z3ByS_RCwI_30.000_40.000.wav 30.000 40.000 Fire engine, fire truck (siren)
228
+ -ZtZOcg3s7M_30.000_40.000.wav 30.000 40.000 Fire engine, fire truck (siren)
229
+ -cOjJ0Nvtlw_23.000_33.000.wav 23.000 33.000 Fire engine, fire truck (siren)
230
+ -cbYvBBXE6A_12.000_22.000.wav 12.000 22.000 Fire engine, fire truck (siren)
231
+ -eYUCWGQ_wU_30.000_40.000.wav 30.000 40.000 Fire engine, fire truck (siren)
232
+ -hA1yMrEXz0_10.000_20.000.wav 10.000 20.000 Fire engine, fire truck (siren)
233
+ -hplTh4SGvs_90.000_100.000.wav 90.000 100.000 Fire engine, fire truck (siren)
234
+ -nPhg6Eu4b4_30.000_40.000.wav 30.000 40.000 Fire engine, fire truck (siren)
235
+ -oCvKmNbhl0_30.000_40.000.wav 30.000 40.000 Fire engine, fire truck (siren)
236
+ -oEGuMg8hT4_30.000_40.000.wav 30.000 40.000 Fire engine, fire truck (siren)
237
+ -pvaJ4DwtRg_3.000_13.000.wav 3.000 13.000 Fire engine, fire truck (siren)
238
+ -qKRKDTbt4c_30.000_40.000.wav 30.000 40.000 Fire engine, fire truck (siren)
239
+ -sJn3uUxpH8_30.000_40.000.wav 30.000 40.000 Fire engine, fire truck (siren)
240
+ -sfn1NDHWJI_30.000_40.000.wav 30.000 40.000 Fire engine, fire truck (siren)
241
+ -09rxiqNNEs_30.000_40.000.wav 30.000 40.000 Civil defense siren
242
+ -3qh-WFUV2U_30.000_40.000.wav 30.000 40.000 Civil defense siren
243
+ -4JG_Ag99hY_30.000_40.000.wav 30.000 40.000 Civil defense siren
244
+ -60NmEaP0is_0.000_10.000.wav 0.000 10.000 Civil defense siren
245
+ -6cTEqIcics_30.000_40.000.wav 30.000 40.000 Civil defense siren
246
+ -6iVBmb5PZU_40.000_50.000.wav 40.000 50.000 Civil defense siren
247
+ -6qp8NjWffE_30.000_40.000.wav 30.000 40.000 Civil defense siren
248
+ -75iY1j3MeY_30.000_40.000.wav 30.000 40.000 Civil defense siren
249
+ -E3Yju3lrRo_30.000_40.000.wav 30.000 40.000 Civil defense siren
250
+ -FHSBdx5A3g_40.000_50.000.wav 40.000 50.000 Civil defense siren
251
+ -JhSzxTdcwY_30.000_40.000.wav 30.000 40.000 Civil defense siren
252
+ -OtNDK_Hxp8_30.000_40.000.wav 30.000 40.000 Civil defense siren
253
+ -S3_I0RiG3g_30.000_40.000.wav 30.000 40.000 Civil defense siren
254
+ -YMXgDKKAwU_30.000_40.000.wav 30.000 40.000 Civil defense siren
255
+ -c7XoYM-SSY_30.000_40.000.wav 30.000 40.000 Civil defense siren
256
+ -j8EeIX9ynk_30.000_40.000.wav 30.000 40.000 Civil defense siren
257
+ -t478yabOQw_30.000_40.000.wav 30.000 40.000 Civil defense siren
258
+ -uIyMR9luvg_30.000_40.000.wav 30.000 40.000 Civil defense siren
259
+ -wgP6ua-t4k_40.000_50.000.wav 40.000 50.000 Civil defense siren
260
+ -zGAb18JxmI_30.000_40.000.wav 30.000 40.000 Civil defense siren
261
+ 03NLMEMi8-I_30.000_40.000.wav 30.000 40.000 Civil defense siren
262
+ 0552YhBdeXo_30.000_40.000.wav 30.000 40.000 Civil defense siren
263
+ 06TM6z3NvuY_30.000_40.000.wav 30.000 40.000 Civil defense siren
264
+ 0CUi0oGUzjU_30.000_40.000.wav 30.000 40.000 Civil defense siren
265
+ 0GpUFFJNFH8_30.000_40.000.wav 30.000 40.000 Civil defense siren
266
+ 0H_WUo2srs0_30.000_40.000.wav 30.000 40.000 Civil defense siren
267
+ 0HvYkBXQ44A_30.000_40.000.wav 30.000 40.000 Civil defense siren
268
+ 0I6Mlp27_gM_30.000_40.000.wav 30.000 40.000 Civil defense siren
269
+ 0JKcTVpby0I_30.000_40.000.wav 30.000 40.000 Civil defense siren
270
+ 0PhU-PIsUMw_40.000_50.000.wav 40.000 50.000 Civil defense siren
271
+ -122tCXtFhU_30.000_40.000.wav 30.000 40.000 Police car (siren)
272
+ -1U98XBTyB4_30.000_40.000.wav 30.000 40.000 Police car (siren)
273
+ -2GlU3e0nTU_170.000_180.000.wav 170.000 180.000 Police car (siren)
274
+ -6WqJCSmkCw_70.000_80.000.wav 70.000 80.000 Police car (siren)
275
+ -AF7wp3ezww_140.000_150.000.wav 140.000 150.000 Police car (siren)
276
+ -AFASmp1fpk_6.000_16.000.wav 6.000 16.000 Police car (siren)
277
+ -F2lk9A8B8M_30.000_40.000.wav 30.000 40.000 Police car (siren)
278
+ -GPv09qi9A8_120.000_130.000.wav 120.000 130.000 Police car (siren)
279
+ -Hi-WpRGUpc_9.000_19.000.wav 9.000 19.000 Police car (siren)
280
+ -KsPTvgJJVE_350.000_360.000.wav 350.000 360.000 Police car (siren)
281
+ -MfBpxtGQmE_20.000_30.000.wav 20.000 30.000 Police car (siren)
282
+ -Pg4vVPs4bE_30.000_40.000.wav 30.000 40.000 Police car (siren)
283
+ -UCf_-3yzWU_290.000_300.000.wav 290.000 300.000 Police car (siren)
284
+ -VULyMtKazE_0.000_7.000.wav 0.000 7.000 Police car (siren)
285
+ -XRiLbb3Syo_2.000_12.000.wav 2.000 12.000 Police car (siren)
286
+ -XrpzGb6xCU_190.000_200.000.wav 190.000 200.000 Police car (siren)
287
+ -YsrLG2K1TE_30.000_40.000.wav 30.000 40.000 Police car (siren)
288
+ -ZtZOcg3s7M_30.000_40.000.wav 30.000 40.000 Police car (siren)
289
+ -_8fdnv6Crg_30.000_40.000.wav 30.000 40.000 Police car (siren)
290
+ -az6BooRLxw_40.000_50.000.wav 40.000 50.000 Police car (siren)
291
+ -bs3c27rEtc_30.000_40.000.wav 30.000 40.000 Police car (siren)
292
+ -dBTGdL4RFs_30.000_40.000.wav 30.000 40.000 Police car (siren)
293
+ -gKNRXbpAKs_30.000_40.000.wav 30.000 40.000 Police car (siren)
294
+ -hA1yMrEXz0_10.000_20.000.wav 10.000 20.000 Police car (siren)
295
+ -haSUR_IUto_30.000_40.000.wav 30.000 40.000 Police car (siren)
296
+ -l-DEfDAvNA_30.000_40.000.wav 30.000 40.000 Police car (siren)
297
+ -lWs7_49gss_30.000_40.000.wav 30.000 40.000 Police car (siren)
298
+ -lhnhB4rbGw_3.000_13.000.wav 3.000 13.000 Police car (siren)
299
+ -rkJeBBmiTQ_60.000_70.000.wav 60.000 70.000 Police car (siren)
300
+ -rs7FPxzc6w_8.000_18.000.wav 8.000 18.000 Police car (siren)
301
+ -20uudT97E0_30.000_40.000.wav 30.000 40.000 Screaming
302
+ -3bGlOhRkAo_140.000_150.000.wav 140.000 150.000 Screaming
303
+ -4pUrlMafww_1.000_11.000.wav 1.000 11.000 Screaming
304
+ -7R0ybQQAHg_60.000_70.000.wav 60.000 70.000 Screaming
305
+ -7gojlG6bE4_30.000_40.000.wav 30.000 40.000 Screaming
306
+ -GI5PbO6j50_30.000_40.000.wav 30.000 40.000 Screaming
307
+ -MuIRudOtxw_30.000_40.000.wav 30.000 40.000 Screaming
308
+ -WfQBr42ymw_30.000_40.000.wav 30.000 40.000 Screaming
309
+ -YOjIgYspsY_30.000_40.000.wav 30.000 40.000 Screaming
310
+ -g_AcRVFfXU_30.000_40.000.wav 30.000 40.000 Screaming
311
+ -gb5uvwsRpI_30.000_40.000.wav 30.000 40.000 Screaming
312
+ -iAwqlQ3TEk_0.000_3.000.wav 0.000 3.000 Screaming
313
+ -nJoxcmxz5g_30.000_40.000.wav 30.000 40.000 Screaming
314
+ -pwgypWE-J8_30.000_40.000.wav 30.000 40.000 Screaming
315
+ -pzasCR0kpc_30.000_40.000.wav 30.000 40.000 Screaming
316
+ -sUgHKZQKYc_30.000_40.000.wav 30.000 40.000 Screaming
317
+ -uazzQEmQ7c_0.000_10.000.wav 0.000 10.000 Screaming
318
+ -vHJU1wDRsY_30.000_40.000.wav 30.000 40.000 Screaming
319
+ 0-RnTXpp8Q0_30.000_40.000.wav 30.000 40.000 Screaming
320
+ 09YQukdYVI4_30.000_40.000.wav 30.000 40.000 Screaming
321
+ 0Ees8KFCUXM_30.000_40.000.wav 30.000 40.000 Screaming
322
+ 0EymGuYWkFk_30.000_40.000.wav 30.000 40.000 Screaming
323
+ 0Nw1OyTsaAo_30.000_40.000.wav 30.000 40.000 Screaming
324
+ 0YnOMAls83g_30.000_40.000.wav 30.000 40.000 Screaming
325
+ 0_gyUQkLCY8_30.000_40.000.wav 30.000 40.000 Screaming
326
+ 0_hnDV2SHBI_7.000_17.000.wav 7.000 17.000 Screaming
327
+ 0cqEaAkbrbI_80.000_90.000.wav 80.000 90.000 Screaming
328
+ 0hC044mDsWA_30.000_40.000.wav 30.000 40.000 Screaming
329
+ 0kQANiakiH0_30.000_40.000.wav 30.000 40.000 Screaming
330
+ 0rVBXpbgO8s_30.000_40.000.wav 30.000 40.000 Screaming
331
+ ---lTs1dxhU_30.000_40.000.wav 30.000 40.000 Car
332
+ --330hg-Ocw_30.000_40.000.wav 30.000 40.000 Car
333
+ --8puiAGLhs_30.000_40.000.wav 30.000 40.000 Car
334
+ --9VR_F7CtY_30.000_40.000.wav 30.000 40.000 Car
335
+ --F70LWypIg_30.000_40.000.wav 30.000 40.000 Car
336
+ --P4wuph3Mc_0.000_8.000.wav 0.000 8.000 Car
337
+ --QvRbvnbUE_30.000_40.000.wav 30.000 40.000 Car
338
+ --SeOZy3Yik_30.000_40.000.wav 30.000 40.000 Car
339
+ --Zz7BgxSUg_30.000_40.000.wav 30.000 40.000 Car
340
+ --e0Vu_ruTc_30.000_40.000.wav 30.000 40.000 Car
341
+ --iFD6IyQW8_30.000_40.000.wav 30.000 40.000 Car
342
+ --jGnLqFsQ4_24.000_34.000.wav 24.000 34.000 Car
343
+ --jc0NAxK8M_30.000_40.000.wav 30.000 40.000 Car
344
+ --v1WjOJv-w_150.000_160.000.wav 150.000 160.000 Car
345
+ --xDffQ9Mwo_30.000_40.000.wav 30.000 40.000 Car
346
+ --yaQA8d1dI_6.000_16.000.wav 6.000 16.000 Car
347
+ --zLzL0sq3M_30.000_40.000.wav 30.000 40.000 Car
348
+ -0-jXXldDOU_10.000_20.000.wav 10.000 20.000 Car
349
+ -03ld83JliM_29.000_39.000.wav 29.000 39.000 Car
350
+ -0B-egfXU7E_30.000_40.000.wav 30.000 40.000 Car
351
+ -0Bkyt8iZ1I_8.000_18.000.wav 8.000 18.000 Car
352
+ -0CIk-OOp7Y_30.000_40.000.wav 30.000 40.000 Car
353
+ -0CRb8H4hzY_4.000_14.000.wav 4.000 14.000 Car
354
+ -0CY5NWBHyY_20.000_30.000.wav 20.000 30.000 Car
355
+ -0HsrVfb5vc_20.000_30.000.wav 20.000 30.000 Car
356
+ -0I89-H0AFo_26.000_36.000.wav 26.000 36.000 Car
357
+ -0P6VDQ1YDs_80.000_90.000.wav 80.000 90.000 Car
358
+ -0PrEsytvc0_30.000_40.000.wav 30.000 40.000 Car
359
+ -0RqnaXZu_E_30.000_40.000.wav 30.000 40.000 Car
360
+ -0Yynyhm1AY_14.000_24.000.wav 14.000 24.000 Car
361
+ ---lTs1dxhU_30.000_40.000.wav 30.000 40.000 Car passing by
362
+ --P4wuph3Mc_0.000_8.000.wav 0.000 8.000 Car passing by
363
+ --xDffQ9Mwo_30.000_40.000.wav 30.000 40.000 Car passing by
364
+ --zLzL0sq3M_30.000_40.000.wav 30.000 40.000 Car passing by
365
+ --zbPxnl27o_20.000_30.000.wav 20.000 30.000 Car passing by
366
+ -0CRb8H4hzY_4.000_14.000.wav 4.000 14.000 Car passing by
367
+ -0MnD7jBvkE_0.000_4.000.wav 0.000 4.000 Car passing by
368
+ -0U3c4PN8sc_30.000_40.000.wav 30.000 40.000 Car passing by
369
+ -0Yynyhm1AY_14.000_24.000.wav 14.000 24.000 Car passing by
370
+ -10fWp7Pqs4_30.000_40.000.wav 30.000 40.000 Car passing by
371
+ -14BFlDzjS4_6.000_16.000.wav 6.000 16.000 Car passing by
372
+ -15nPYi2v1g_30.000_40.000.wav 30.000 40.000 Car passing by
373
+ -19pq3HJoBM_30.000_40.000.wav 30.000 40.000 Car passing by
374
+ -1BrkFLHD74_19.000_29.000.wav 19.000 29.000 Car passing by
375
+ -1HlfoHZCEE_6.000_16.000.wav 6.000 16.000 Car passing by
376
+ -1McjOPUzbo_30.000_40.000.wav 30.000 40.000 Car passing by
377
+ -1sGSNmgiPs_4.000_14.000.wav 4.000 14.000 Car passing by
378
+ -2-luek6dI8_30.000_40.000.wav 30.000 40.000 Car passing by
379
+ -21-RfxQscI_30.000_40.000.wav 30.000 40.000 Car passing by
380
+ -25LkbSjEos_30.000_40.000.wav 30.000 40.000 Car passing by
381
+ -2LJWaL2PuA_30.000_40.000.wav 30.000 40.000 Car passing by
382
+ -2ZbvsBSZmY_2.000_12.000.wav 2.000 12.000 Car passing by
383
+ -2cz2qQDmr4_30.000_40.000.wav 30.000 40.000 Car passing by
384
+ -31KUAOSg5U_5.000_15.000.wav 5.000 15.000 Car passing by
385
+ -35qBdzN9ck_30.000_40.000.wav 30.000 40.000 Car passing by
386
+ -3929cmVE20_30.000_40.000.wav 30.000 40.000 Car passing by
387
+ -3M-k4nIYIM_30.000_40.000.wav 30.000 40.000 Car passing by
388
+ -3MNphBfq_0_30.000_40.000.wav 30.000 40.000 Car passing by
389
+ -3_RSVYKkkk_30.000_40.000.wav 30.000 40.000 Car passing by
390
+ -3exNVlj92w_30.000_40.000.wav 30.000 40.000 Car passing by
391
+ --0w1YA1Hm4_30.000_40.000.wav 30.000 40.000 Bus
392
+ -0_vEaaXndY_11.000_21.000.wav 11.000 21.000 Bus
393
+ -5GcZwBvBdI_30.000_40.000.wav 30.000 40.000 Bus
394
+ -5digoPWn6U_8.000_18.000.wav 8.000 18.000 Bus
395
+ -79l4w4DsYM_30.000_40.000.wav 30.000 40.000 Bus
396
+ -7B4pbkIEas_30.000_40.000.wav 30.000 40.000 Bus
397
+ -8YTu7ZGA2w_30.000_40.000.wav 30.000 40.000 Bus
398
+ -93IM29_8rs_14.000_24.000.wav 14.000 24.000 Bus
399
+ -9GhPxGkpio_26.000_36.000.wav 26.000 36.000 Bus
400
+ -9J9xs7LM9Y_25.000_35.000.wav 25.000 35.000 Bus
401
+ -AY_lZLYJR8_8.000_18.000.wav 8.000 18.000 Bus
402
+ -AdQBgtN_4E_30.000_40.000.wav 30.000 40.000 Bus
403
+ -BxfsWlPUPY_30.000_40.000.wav 30.000 40.000 Bus
404
+ -CgCr8Eknm0_14.000_24.000.wav 14.000 24.000 Bus
405
+ -CnsvTDIXdE_20.000_30.000.wav 20.000 30.000 Bus
406
+ -CpMlnGhxEU_0.000_9.000.wav 0.000 9.000 Bus
407
+ -DP_cv0x_Ng_30.000_40.000.wav 30.000 40.000 Bus
408
+ -FEXRjcryZE_30.000_40.000.wav 30.000 40.000 Bus
409
+ -Fp2-w-iLiE_20.000_30.000.wav 20.000 30.000 Bus
410
+ -GLk6G9U09A_30.000_40.000.wav 30.000 40.000 Bus
411
+ -Ga9sSkpngg_30.000_40.000.wav 30.000 40.000 Bus
412
+ -H8V23dZoLo_0.000_10.000.wav 0.000 10.000 Bus
413
+ -HeQfwKbFzg_30.000_40.000.wav 30.000 40.000 Bus
414
+ -HzzEuFBiDU_30.000_40.000.wav 30.000 40.000 Bus
415
+ -I4INTpMKT4_30.000_40.000.wav 30.000 40.000 Bus
416
+ -II-7qJxKPc_21.000_31.000.wav 21.000 31.000 Bus
417
+ -LnpzyfTkF8_30.000_40.000.wav 30.000 40.000 Bus
418
+ -OgRshQfsi8_30.000_40.000.wav 30.000 40.000 Bus
419
+ -P53lJ1ViWk_30.000_40.000.wav 30.000 40.000 Bus
420
+ -PvNUvEov4Q_30.000_40.000.wav 30.000 40.000 Bus
421
+ --12UOziMF0_30.000_40.000.wav 30.000 40.000 Truck
422
+ --73E04RpiQ_0.000_9.000.wav 0.000 9.000 Truck
423
+ --J947HxQVM_0.000_9.000.wav 0.000 9.000 Truck
424
+ --bD1DVKlzQ_30.000_40.000.wav 30.000 40.000 Truck
425
+ --ivFZu-hlc_30.000_40.000.wav 30.000 40.000 Truck
426
+ --wuU7kzB5o_30.000_40.000.wav 30.000 40.000 Truck
427
+ -0B_CYyG5Dg_30.000_40.000.wav 30.000 40.000 Truck
428
+ -0JqTq_4jaE_40.000_50.000.wav 40.000 50.000 Truck
429
+ -0MrEZKJ5MQ_30.000_40.000.wav 30.000 40.000 Truck
430
+ -0awng26xQ8_30.000_40.000.wav 30.000 40.000 Truck
431
+ -0dq1Vg9rd8_30.000_40.000.wav 30.000 40.000 Truck
432
+ -0wkq7CUYME_310.000_320.000.wav 310.000 320.000 Truck
433
+ -14RXdkqYuI_30.000_40.000.wav 30.000 40.000 Truck
434
+ -1B3CzpiW1M_30.000_40.000.wav 30.000 40.000 Truck
435
+ -1Q21cZhHDE_30.000_40.000.wav 30.000 40.000 Truck
436
+ -1ZXXnBXJ6c_8.000_18.000.wav 8.000 18.000 Truck
437
+ -1s0DWApvT8_30.000_40.000.wav 30.000 40.000 Truck
438
+ -1s84_2Vn4g_30.000_40.000.wav 30.000 40.000 Truck
439
+ -26ansJluVo_30.000_40.000.wav 30.000 40.000 Truck
440
+ -2EscdO0l-A_30.000_40.000.wav 30.000 40.000 Truck
441
+ -2GlU3e0nTU_170.000_180.000.wav 170.000 180.000 Truck
442
+ -2NBZUCcvm0_30.000_40.000.wav 30.000 40.000 Truck
443
+ -2sT5oBBWWY_30.000_40.000.wav 30.000 40.000 Truck
444
+ -2vmprMUw10_30.000_40.000.wav 30.000 40.000 Truck
445
+ -2x4TB8VWvE_18.000_28.000.wav 18.000 28.000 Truck
446
+ -39q4y0tt-g_30.000_40.000.wav 30.000 40.000 Truck
447
+ -3N5rjPrNCc_190.000_200.000.wav 190.000 200.000 Truck
448
+ -3NcUIyJtFY_30.000_40.000.wav 30.000 40.000 Truck
449
+ -3PplV0ErOk_30.000_40.000.wav 30.000 40.000 Truck
450
+ -3gSkrDKNSA_27.000_37.000.wav 27.000 37.000 Truck
451
+ --p-rk_HBuU_30.000_40.000.wav 30.000 40.000 Motorcycle
452
+ -1WK72M4xeg_220.000_230.000.wav 220.000 230.000 Motorcycle
453
+ -1XfuJcdvfg_30.000_40.000.wav 30.000 40.000 Motorcycle
454
+ -3XWBAmjmaQ_11.000_21.000.wav 11.000 21.000 Motorcycle
455
+ -4-87UgJcUw_70.000_80.000.wav 70.000 80.000 Motorcycle
456
+ -4D3Gkyisyc_30.000_40.000.wav 30.000 40.000 Motorcycle
457
+ -5k5GyHd2So_4.000_14.000.wav 4.000 14.000 Motorcycle
458
+ -6A2L1U9b5Y_54.000_64.000.wav 54.000 64.000 Motorcycle
459
+ -6Yfati1N10_80.000_90.000.wav 80.000 90.000 Motorcycle
460
+ -7_o_GhpZpM_12.000_22.000.wav 12.000 22.000 Motorcycle
461
+ -7rZwMK6uSs_70.000_80.000.wav 70.000 80.000 Motorcycle
462
+ -85f5DKKfSo_30.000_40.000.wav 30.000 40.000 Motorcycle
463
+ -9Smdrt5zwk_40.000_50.000.wav 40.000 50.000 Motorcycle
464
+ -9gZLVDKpnE_30.000_40.000.wav 30.000 40.000 Motorcycle
465
+ -BGebo8V4XY_30.000_40.000.wav 30.000 40.000 Motorcycle
466
+ -DdiduB5B_w_190.000_200.000.wav 190.000 200.000 Motorcycle
467
+ -HIPq7T3eFI_11.000_21.000.wav 11.000 21.000 Motorcycle
468
+ -H_3oEkKe0M_50.000_60.000.wav 50.000 60.000 Motorcycle
469
+ -HmuMoykRqA_500.000_510.000.wav 500.000 510.000 Motorcycle
470
+ -IMRE_psvtI_30.000_40.000.wav 30.000 40.000 Motorcycle
471
+ -Ie4LSPDEF4_6.000_16.000.wav 6.000 16.000 Motorcycle
472
+ -J0F29UCZiA_70.000_80.000.wav 70.000 80.000 Motorcycle
473
+ -KFCJ7ydu2E_0.000_10.000.wav 0.000 10.000 Motorcycle
474
+ -KmDAgYb0Uo_100.000_110.000.wav 100.000 110.000 Motorcycle
475
+ -P7iW3WzNfc_400.000_410.000.wav 400.000 410.000 Motorcycle
476
+ -QMAKXzIGx4_10.000_20.000.wav 10.000 20.000 Motorcycle
477
+ -S-5z2vYtxw_10.000_20.000.wav 10.000 20.000 Motorcycle
478
+ -SlL0NZh51w_30.000_40.000.wav 30.000 40.000 Motorcycle
479
+ -US2mpJxbj4_30.000_40.000.wav 30.000 40.000 Motorcycle
480
+ -VO-C9C0uqY_1.000_11.000.wav 1.000 11.000 Motorcycle
481
+ --H_-CEB2wA_30.000_40.000.wav 30.000 40.000 Train
482
+ -1VsFy0eVJs_30.000_40.000.wav 30.000 40.000 Train
483
+ -1X7kpLnOpM_60.000_70.000.wav 60.000 70.000 Train
484
+ -3FIglJti0s_30.000_40.000.wav 30.000 40.000 Train
485
+ -5QrBL6MzLg_60.000_70.000.wav 60.000 70.000 Train
486
+ -6KOEEiAf9s_19.000_29.000.wav 19.000 29.000 Train
487
+ -97l_c6PToE_30.000_40.000.wav 30.000 40.000 Train
488
+ -9S5Z-uciLo_70.000_80.000.wav 70.000 80.000 Train
489
+ -CkgGfKepO4_140.000_150.000.wav 140.000 150.000 Train
490
+ -E0shPRxAbo_30.000_40.000.wav 30.000 40.000 Train
491
+ -Gbohom8C4Q_30.000_40.000.wav 30.000 40.000 Train
492
+ -JpQivta6MQ_20.000_30.000.wav 20.000 30.000 Train
493
+ -K9oTZj3mVQ_30.000_40.000.wav 30.000 40.000 Train
494
+ -KjE40DlSdU_0.000_10.000.wav 0.000 10.000 Train
495
+ -NrFtZ_xxFU_30.000_40.000.wav 30.000 40.000 Train
496
+ -PYRamK58Ss_0.000_10.000.wav 0.000 10.000 Train
497
+ -P_XDJt4p_s_30.000_40.000.wav 30.000 40.000 Train
498
+ -Pjylzex7oc_350.000_360.000.wav 350.000 360.000 Train
499
+ -QHuZGmIy_I_30.000_40.000.wav 30.000 40.000 Train
500
+ -Qfk_Q2ctBs_30.000_40.000.wav 30.000 40.000 Train
501
+ -RXKRoRPWXg_30.000_40.000.wav 30.000 40.000 Train
502
+ -VH414svzI0_30.000_40.000.wav 30.000 40.000 Train
503
+ -WFdYxE-PYI_30.000_40.000.wav 30.000 40.000 Train
504
+ -Wd1pV7UjWg_60.000_70.000.wav 60.000 70.000 Train
505
+ -XcC-UlbcRA_30.000_40.000.wav 30.000 40.000 Train
506
+ -Y2cD8xvCHI_30.000_40.000.wav 30.000 40.000 Train
507
+ -ZKZkMHe3cY_70.000_80.000.wav 70.000 80.000 Train
508
+ -Zq22n4OewA_30.000_40.000.wav 30.000 40.000 Train
509
+ -aZ7XC4LG2A_30.000_40.000.wav 30.000 40.000 Train
510
+ -abVemAm9HM_430.000_440.000.wav 430.000 440.000 Train
511
+ 1T1i2rny8RU_30.000_40.000.wav 30.000 40.000 Ambulance (siren)
512
+ 7DC3HtNi4fU_160.000_170.000.wav 160.000 170.000 Ambulance (siren)
513
+ -z8jsgl3iHE_30.000_40.000.wav 30.000 40.000 Fire engine, fire truck (siren)
514
+ 00H_s-krtg8_30.000_40.000.wav 30.000 40.000 Fire engine, fire truck (siren)
515
+ 0I6Mlp27_gM_30.000_40.000.wav 30.000 40.000 Fire engine, fire truck (siren)
516
+ 3YaLkgUMhAA_110.000_120.000.wav 110.000 120.000 Fire engine, fire truck (siren)
517
+ 4l78f9VZ9uE_30.000_40.000.wav 30.000 40.000 Fire engine, fire truck (siren)
518
+ 35EOmSMTQ6I_30.000_40.000.wav 30.000 40.000 Civil defense siren
519
+ 06RreMb5qbE_0.000_10.000.wav 0.000 10.000 Police car (siren)
520
+ 0EPK7Pv_lbE_30.000_40.000.wav 30.000 40.000 Police car (siren)
521
+ 0I6Mlp27_gM_30.000_40.000.wav 30.000 40.000 Police car (siren)
522
+ 17VuPl9Wxvs_20.000_30.000.wav 20.000 30.000 Police car (siren)
523
+ 4A1Ar1TIXIY_30.000_40.000.wav 30.000 40.000 Police car (siren)
524
+ -10fWp7Pqs4_30.000_40.000.wav 30.000 40.000 Car
525
+ -122tCXtFhU_30.000_40.000.wav 30.000 40.000 Car
526
+ -14BFlDzjS4_6.000_16.000.wav 6.000 16.000 Car
527
+ -1BrkFLHD74_19.000_29.000.wav 19.000 29.000 Car
528
+ -1HlfoHZCEE_6.000_16.000.wav 6.000 16.000 Car
529
+ -1McjOPUzbo_30.000_40.000.wav 30.000 40.000 Car
530
+ -1sGSNmgiPs_4.000_14.000.wav 4.000 14.000 Car
531
+ -25LkbSjEos_30.000_40.000.wav 30.000 40.000 Car
532
+ -2GlU3e0nTU_170.000_180.000.wav 170.000 180.000 Car
533
+ -2LJWaL2PuA_30.000_40.000.wav 30.000 40.000 Car
534
+ -2ZbvsBSZmY_2.000_12.000.wav 2.000 12.000 Car
535
+ -2cz2qQDmr4_30.000_40.000.wav 30.000 40.000 Car
536
+ -31KUAOSg5U_5.000_15.000.wav 5.000 15.000 Car
537
+ -35qBdzN9ck_30.000_40.000.wav 30.000 40.000 Car
538
+ -3929cmVE20_30.000_40.000.wav 30.000 40.000 Car
539
+ -3M-k4nIYIM_30.000_40.000.wav 30.000 40.000 Car
540
+ -3MNphBfq_0_30.000_40.000.wav 30.000 40.000 Car
541
+ -3_RSVYKkkk_30.000_40.000.wav 30.000 40.000 Car
542
+ -AF7wp3ezww_140.000_150.000.wav 140.000 150.000 Car
543
+ -Pg4vVPs4bE_30.000_40.000.wav 30.000 40.000 Car
544
+ -VULyMtKazE_0.000_7.000.wav 0.000 7.000 Car
545
+ -cbYvBBXE6A_12.000_22.000.wav 12.000 22.000 Car
546
+ 06RreMb5qbE_0.000_10.000.wav 0.000 10.000 Car
547
+ 0E4AqW9dmdk_30.000_40.000.wav 30.000 40.000 Car
548
+ 0Hz4R_m0hmI_80.000_90.000.wav 80.000 90.000 Car
549
+ 4Kpklmj-ze0_53.000_63.000.wav 53.000 63.000 Car
550
+ 5tzTahLHylw_70.000_80.000.wav 70.000 80.000 Car
551
+ 7NJ5TbNEIvA_250.000_260.000.wav 250.000 260.000 Car
552
+ 9fCibkUT_gQ_30.000_40.000.wav 30.000 40.000 Car
553
+ 9jYv9WuyknA_130.000_140.000.wav 130.000 140.000 Car
554
+ -l-DEfDAvNA_30.000_40.000.wav 30.000 40.000 Car passing by
555
+ 9fCibkUT_gQ_30.000_40.000.wav 30.000 40.000 Car passing by
556
+ -jj2tyuf6-A_80.000_90.000.wav 80.000 90.000 Bus
557
+ -45cKZA7Jww_30.000_40.000.wav 30.000 40.000 Truck
558
+ -4B435WQvag_20.000_30.000.wav 20.000 30.000 Truck
559
+ -60XojQWWoc_30.000_40.000.wav 30.000 40.000 Truck
560
+ -6qhtwdfGOA_23.000_33.000.wav 23.000 33.000 Truck
561
+ -8OITuFZha8_30.000_40.000.wav 30.000 40.000 Truck
562
+ -8n2NqDFRko_30.000_40.000.wav 30.000 40.000 Truck
563
+ -AIrHVeCgtM_30.000_40.000.wav 30.000 40.000 Truck
564
+ -AVzYvKHwPg_30.000_40.000.wav 30.000 40.000 Truck
565
+ -BM_EAszxBg_30.000_40.000.wav 30.000 40.000 Truck
566
+ -Ei2LE71Dfg_20.000_30.000.wav 20.000 30.000 Truck
567
+ -FWkB2IDMhc_30.000_40.000.wav 30.000 40.000 Truck
568
+ -Jsu4dbuO4A_30.000_40.000.wav 30.000 40.000 Truck
569
+ -PRrNx6_MD0_16.000_26.000.wav 16.000 26.000 Truck
570
+ -X0vNLwH1C0_30.000_40.000.wav 30.000 40.000 Truck
571
+ -cbYvBBXE6A_12.000_22.000.wav 12.000 22.000 Truck
572
+ -oCvKmNbhl0_30.000_40.000.wav 30.000 40.000 Truck
573
+ -oV6dQu5tZo_30.000_40.000.wav 30.000 40.000 Truck
574
+ -qKRKDTbt4c_30.000_40.000.wav 30.000 40.000 Truck
575
+ -r8mfjRiHrU_30.000_40.000.wav 30.000 40.000 Truck
576
+ -s9kwrRilOY_30.000_40.000.wav 30.000 40.000 Truck
577
+ -uMiGr6xvRA_30.000_40.000.wav 30.000 40.000 Truck
578
+ -x70B12Mb-8_30.000_40.000.wav 30.000 40.000 Truck
579
+ -xYsfYZOI-Y_30.000_40.000.wav 30.000 40.000 Truck
580
+ -zxrdL6MlKI_30.000_40.000.wav 30.000 40.000 Truck
581
+ 0C3kqtF76t8_50.000_60.000.wav 50.000 60.000 Truck
582
+ 0HmiH-wKLB4_30.000_40.000.wav 30.000 40.000 Truck
583
+ 0KskqFt3DoY_15.000_25.000.wav 15.000 25.000 Truck
584
+ 0OiPtV9sd_w_30.000_40.000.wav 30.000 40.000 Truck
585
+ 0VnoYVqd-yo_30.000_40.000.wav 30.000 40.000 Truck
586
+ 3YaLkgUMhAA_110.000_120.000.wav 110.000 120.000 Truck
587
+ -nGBPqlRNg4_30.000_40.000.wav 30.000 40.000 Train
588
+ 02w3vd_GgF0_390.000_400.000.wav 390.000 400.000 Train
589
+ 0HqeYIREv8M_30.000_40.000.wav 30.000 40.000 Train
590
+ 0IpYF91Fdt0_80.000_90.000.wav 80.000 90.000 Train
591
+ 0NaZejdABG0_90.000_100.000.wav 90.000 100.000 Train
592
+ 0RurXUfKyow_4.000_14.000.wav 4.000 14.000 Train
593
+ 0_HnD-rW3lI_170.000_180.000.wav 170.000 180.000 Train
594
+ 10i60V1RZkQ_210.000_220.000.wav 210.000 220.000 Train
595
+ 1FJY5X1iY9I_170.000_180.000.wav 170.000 180.000 Train
596
+ 1U0Ty6CW6AM_40.000_50.000.wav 40.000 50.000 Train
597
+ 1hQLr88iCvg_30.000_40.000.wav 30.000 40.000 Train
598
+ 1iUXERALOOs_190.000_200.000.wav 190.000 200.000 Train
599
+ 1iWFlLpixKU_5.000_15.000.wav 5.000 15.000 Train
600
+ 1oJAVJPX0YY_20.000_30.000.wav 20.000 30.000 Train
601
+ 26dNsDuIt9Q_340.000_350.000.wav 340.000 350.000 Train
602
+ 2BMHsKLcb7E_90.000_100.000.wav 90.000 100.000 Train
603
+ 2RpOd9MJjyQ_10.000_20.000.wav 10.000 20.000 Train
604
+ 2U4wSdl10to_200.000_210.000.wav 200.000 210.000 Train
605
+ 2aBV6AZt5nk_570.000_580.000.wav 570.000 580.000 Train
606
+ 3ntFslTK6hM_90.000_100.000.wav 90.000 100.000 Train
audio_detection/audio_infer/metadata/class_labels_indices.csv ADDED
@@ -0,0 +1,528 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ index,mid,display_name
2
+ 0,/m/09x0r,"Speech"
3
+ 1,/m/05zppz,"Male speech, man speaking"
4
+ 2,/m/02zsn,"Female speech, woman speaking"
5
+ 3,/m/0ytgt,"Child speech, kid speaking"
6
+ 4,/m/01h8n0,"Conversation"
7
+ 5,/m/02qldy,"Narration, monologue"
8
+ 6,/m/0261r1,"Babbling"
9
+ 7,/m/0brhx,"Speech synthesizer"
10
+ 8,/m/07p6fty,"Shout"
11
+ 9,/m/07q4ntr,"Bellow"
12
+ 10,/m/07rwj3x,"Whoop"
13
+ 11,/m/07sr1lc,"Yell"
14
+ 12,/m/04gy_2,"Battle cry"
15
+ 13,/t/dd00135,"Children shouting"
16
+ 14,/m/03qc9zr,"Screaming"
17
+ 15,/m/02rtxlg,"Whispering"
18
+ 16,/m/01j3sz,"Laughter"
19
+ 17,/t/dd00001,"Baby laughter"
20
+ 18,/m/07r660_,"Giggle"
21
+ 19,/m/07s04w4,"Snicker"
22
+ 20,/m/07sq110,"Belly laugh"
23
+ 21,/m/07rgt08,"Chuckle, chortle"
24
+ 22,/m/0463cq4,"Crying, sobbing"
25
+ 23,/t/dd00002,"Baby cry, infant cry"
26
+ 24,/m/07qz6j3,"Whimper"
27
+ 25,/m/07qw_06,"Wail, moan"
28
+ 26,/m/07plz5l,"Sigh"
29
+ 27,/m/015lz1,"Singing"
30
+ 28,/m/0l14jd,"Choir"
31
+ 29,/m/01swy6,"Yodeling"
32
+ 30,/m/02bk07,"Chant"
33
+ 31,/m/01c194,"Mantra"
34
+ 32,/t/dd00003,"Male singing"
35
+ 33,/t/dd00004,"Female singing"
36
+ 34,/t/dd00005,"Child singing"
37
+ 35,/t/dd00006,"Synthetic singing"
38
+ 36,/m/06bxc,"Rapping"
39
+ 37,/m/02fxyj,"Humming"
40
+ 38,/m/07s2xch,"Groan"
41
+ 39,/m/07r4k75,"Grunt"
42
+ 40,/m/01w250,"Whistling"
43
+ 41,/m/0lyf6,"Breathing"
44
+ 42,/m/07mzm6,"Wheeze"
45
+ 43,/m/01d3sd,"Snoring"
46
+ 44,/m/07s0dtb,"Gasp"
47
+ 45,/m/07pyy8b,"Pant"
48
+ 46,/m/07q0yl5,"Snort"
49
+ 47,/m/01b_21,"Cough"
50
+ 48,/m/0dl9sf8,"Throat clearing"
51
+ 49,/m/01hsr_,"Sneeze"
52
+ 50,/m/07ppn3j,"Sniff"
53
+ 51,/m/06h7j,"Run"
54
+ 52,/m/07qv_x_,"Shuffle"
55
+ 53,/m/07pbtc8,"Walk, footsteps"
56
+ 54,/m/03cczk,"Chewing, mastication"
57
+ 55,/m/07pdhp0,"Biting"
58
+ 56,/m/0939n_,"Gargling"
59
+ 57,/m/01g90h,"Stomach rumble"
60
+ 58,/m/03q5_w,"Burping, eructation"
61
+ 59,/m/02p3nc,"Hiccup"
62
+ 60,/m/02_nn,"Fart"
63
+ 61,/m/0k65p,"Hands"
64
+ 62,/m/025_jnm,"Finger snapping"
65
+ 63,/m/0l15bq,"Clapping"
66
+ 64,/m/01jg02,"Heart sounds, heartbeat"
67
+ 65,/m/01jg1z,"Heart murmur"
68
+ 66,/m/053hz1,"Cheering"
69
+ 67,/m/028ght,"Applause"
70
+ 68,/m/07rkbfh,"Chatter"
71
+ 69,/m/03qtwd,"Crowd"
72
+ 70,/m/07qfr4h,"Hubbub, speech noise, speech babble"
73
+ 71,/t/dd00013,"Children playing"
74
+ 72,/m/0jbk,"Animal"
75
+ 73,/m/068hy,"Domestic animals, pets"
76
+ 74,/m/0bt9lr,"Dog"
77
+ 75,/m/05tny_,"Bark"
78
+ 76,/m/07r_k2n,"Yip"
79
+ 77,/m/07qf0zm,"Howl"
80
+ 78,/m/07rc7d9,"Bow-wow"
81
+ 79,/m/0ghcn6,"Growling"
82
+ 80,/t/dd00136,"Whimper (dog)"
83
+ 81,/m/01yrx,"Cat"
84
+ 82,/m/02yds9,"Purr"
85
+ 83,/m/07qrkrw,"Meow"
86
+ 84,/m/07rjwbb,"Hiss"
87
+ 85,/m/07r81j2,"Caterwaul"
88
+ 86,/m/0ch8v,"Livestock, farm animals, working animals"
89
+ 87,/m/03k3r,"Horse"
90
+ 88,/m/07rv9rh,"Clip-clop"
91
+ 89,/m/07q5rw0,"Neigh, whinny"
92
+ 90,/m/01xq0k1,"Cattle, bovinae"
93
+ 91,/m/07rpkh9,"Moo"
94
+ 92,/m/0239kh,"Cowbell"
95
+ 93,/m/068zj,"Pig"
96
+ 94,/t/dd00018,"Oink"
97
+ 95,/m/03fwl,"Goat"
98
+ 96,/m/07q0h5t,"Bleat"
99
+ 97,/m/07bgp,"Sheep"
100
+ 98,/m/025rv6n,"Fowl"
101
+ 99,/m/09b5t,"Chicken, rooster"
102
+ 100,/m/07st89h,"Cluck"
103
+ 101,/m/07qn5dc,"Crowing, cock-a-doodle-doo"
104
+ 102,/m/01rd7k,"Turkey"
105
+ 103,/m/07svc2k,"Gobble"
106
+ 104,/m/09ddx,"Duck"
107
+ 105,/m/07qdb04,"Quack"
108
+ 106,/m/0dbvp,"Goose"
109
+ 107,/m/07qwf61,"Honk"
110
+ 108,/m/01280g,"Wild animals"
111
+ 109,/m/0cdnk,"Roaring cats (lions, tigers)"
112
+ 110,/m/04cvmfc,"Roar"
113
+ 111,/m/015p6,"Bird"
114
+ 112,/m/020bb7,"Bird vocalization, bird call, bird song"
115
+ 113,/m/07pggtn,"Chirp, tweet"
116
+ 114,/m/07sx8x_,"Squawk"
117
+ 115,/m/0h0rv,"Pigeon, dove"
118
+ 116,/m/07r_25d,"Coo"
119
+ 117,/m/04s8yn,"Crow"
120
+ 118,/m/07r5c2p,"Caw"
121
+ 119,/m/09d5_,"Owl"
122
+ 120,/m/07r_80w,"Hoot"
123
+ 121,/m/05_wcq,"Bird flight, flapping wings"
124
+ 122,/m/01z5f,"Canidae, dogs, wolves"
125
+ 123,/m/06hps,"Rodents, rats, mice"
126
+ 124,/m/04rmv,"Mouse"
127
+ 125,/m/07r4gkf,"Patter"
128
+ 126,/m/03vt0,"Insect"
129
+ 127,/m/09xqv,"Cricket"
130
+ 128,/m/09f96,"Mosquito"
131
+ 129,/m/0h2mp,"Fly, housefly"
132
+ 130,/m/07pjwq1,"Buzz"
133
+ 131,/m/01h3n,"Bee, wasp, etc."
134
+ 132,/m/09ld4,"Frog"
135
+ 133,/m/07st88b,"Croak"
136
+ 134,/m/078jl,"Snake"
137
+ 135,/m/07qn4z3,"Rattle"
138
+ 136,/m/032n05,"Whale vocalization"
139
+ 137,/m/04rlf,"Music"
140
+ 138,/m/04szw,"Musical instrument"
141
+ 139,/m/0fx80y,"Plucked string instrument"
142
+ 140,/m/0342h,"Guitar"
143
+ 141,/m/02sgy,"Electric guitar"
144
+ 142,/m/018vs,"Bass guitar"
145
+ 143,/m/042v_gx,"Acoustic guitar"
146
+ 144,/m/06w87,"Steel guitar, slide guitar"
147
+ 145,/m/01glhc,"Tapping (guitar technique)"
148
+ 146,/m/07s0s5r,"Strum"
149
+ 147,/m/018j2,"Banjo"
150
+ 148,/m/0jtg0,"Sitar"
151
+ 149,/m/04rzd,"Mandolin"
152
+ 150,/m/01bns_,"Zither"
153
+ 151,/m/07xzm,"Ukulele"
154
+ 152,/m/05148p4,"Keyboard (musical)"
155
+ 153,/m/05r5c,"Piano"
156
+ 154,/m/01s0ps,"Electric piano"
157
+ 155,/m/013y1f,"Organ"
158
+ 156,/m/03xq_f,"Electronic organ"
159
+ 157,/m/03gvt,"Hammond organ"
160
+ 158,/m/0l14qv,"Synthesizer"
161
+ 159,/m/01v1d8,"Sampler"
162
+ 160,/m/03q5t,"Harpsichord"
163
+ 161,/m/0l14md,"Percussion"
164
+ 162,/m/02hnl,"Drum kit"
165
+ 163,/m/0cfdd,"Drum machine"
166
+ 164,/m/026t6,"Drum"
167
+ 165,/m/06rvn,"Snare drum"
168
+ 166,/m/03t3fj,"Rimshot"
169
+ 167,/m/02k_mr,"Drum roll"
170
+ 168,/m/0bm02,"Bass drum"
171
+ 169,/m/011k_j,"Timpani"
172
+ 170,/m/01p970,"Tabla"
173
+ 171,/m/01qbl,"Cymbal"
174
+ 172,/m/03qtq,"Hi-hat"
175
+ 173,/m/01sm1g,"Wood block"
176
+ 174,/m/07brj,"Tambourine"
177
+ 175,/m/05r5wn,"Rattle (instrument)"
178
+ 176,/m/0xzly,"Maraca"
179
+ 177,/m/0mbct,"Gong"
180
+ 178,/m/016622,"Tubular bells"
181
+ 179,/m/0j45pbj,"Mallet percussion"
182
+ 180,/m/0dwsp,"Marimba, xylophone"
183
+ 181,/m/0dwtp,"Glockenspiel"
184
+ 182,/m/0dwt5,"Vibraphone"
185
+ 183,/m/0l156b,"Steelpan"
186
+ 184,/m/05pd6,"Orchestra"
187
+ 185,/m/01kcd,"Brass instrument"
188
+ 186,/m/0319l,"French horn"
189
+ 187,/m/07gql,"Trumpet"
190
+ 188,/m/07c6l,"Trombone"
191
+ 189,/m/0l14_3,"Bowed string instrument"
192
+ 190,/m/02qmj0d,"String section"
193
+ 191,/m/07y_7,"Violin, fiddle"
194
+ 192,/m/0d8_n,"Pizzicato"
195
+ 193,/m/01xqw,"Cello"
196
+ 194,/m/02fsn,"Double bass"
197
+ 195,/m/085jw,"Wind instrument, woodwind instrument"
198
+ 196,/m/0l14j_,"Flute"
199
+ 197,/m/06ncr,"Saxophone"
200
+ 198,/m/01wy6,"Clarinet"
201
+ 199,/m/03m5k,"Harp"
202
+ 200,/m/0395lw,"Bell"
203
+ 201,/m/03w41f,"Church bell"
204
+ 202,/m/027m70_,"Jingle bell"
205
+ 203,/m/0gy1t2s,"Bicycle bell"
206
+ 204,/m/07n_g,"Tuning fork"
207
+ 205,/m/0f8s22,"Chime"
208
+ 206,/m/026fgl,"Wind chime"
209
+ 207,/m/0150b9,"Change ringing (campanology)"
210
+ 208,/m/03qjg,"Harmonica"
211
+ 209,/m/0mkg,"Accordion"
212
+ 210,/m/0192l,"Bagpipes"
213
+ 211,/m/02bxd,"Didgeridoo"
214
+ 212,/m/0l14l2,"Shofar"
215
+ 213,/m/07kc_,"Theremin"
216
+ 214,/m/0l14t7,"Singing bowl"
217
+ 215,/m/01hgjl,"Scratching (performance technique)"
218
+ 216,/m/064t9,"Pop music"
219
+ 217,/m/0glt670,"Hip hop music"
220
+ 218,/m/02cz_7,"Beatboxing"
221
+ 219,/m/06by7,"Rock music"
222
+ 220,/m/03lty,"Heavy metal"
223
+ 221,/m/05r6t,"Punk rock"
224
+ 222,/m/0dls3,"Grunge"
225
+ 223,/m/0dl5d,"Progressive rock"
226
+ 224,/m/07sbbz2,"Rock and roll"
227
+ 225,/m/05w3f,"Psychedelic rock"
228
+ 226,/m/06j6l,"Rhythm and blues"
229
+ 227,/m/0gywn,"Soul music"
230
+ 228,/m/06cqb,"Reggae"
231
+ 229,/m/01lyv,"Country"
232
+ 230,/m/015y_n,"Swing music"
233
+ 231,/m/0gg8l,"Bluegrass"
234
+ 232,/m/02x8m,"Funk"
235
+ 233,/m/02w4v,"Folk music"
236
+ 234,/m/06j64v,"Middle Eastern music"
237
+ 235,/m/03_d0,"Jazz"
238
+ 236,/m/026z9,"Disco"
239
+ 237,/m/0ggq0m,"Classical music"
240
+ 238,/m/05lls,"Opera"
241
+ 239,/m/02lkt,"Electronic music"
242
+ 240,/m/03mb9,"House music"
243
+ 241,/m/07gxw,"Techno"
244
+ 242,/m/07s72n,"Dubstep"
245
+ 243,/m/0283d,"Drum and bass"
246
+ 244,/m/0m0jc,"Electronica"
247
+ 245,/m/08cyft,"Electronic dance music"
248
+ 246,/m/0fd3y,"Ambient music"
249
+ 247,/m/07lnk,"Trance music"
250
+ 248,/m/0g293,"Music of Latin America"
251
+ 249,/m/0ln16,"Salsa music"
252
+ 250,/m/0326g,"Flamenco"
253
+ 251,/m/0155w,"Blues"
254
+ 252,/m/05fw6t,"Music for children"
255
+ 253,/m/02v2lh,"New-age music"
256
+ 254,/m/0y4f8,"Vocal music"
257
+ 255,/m/0z9c,"A capella"
258
+ 256,/m/0164x2,"Music of Africa"
259
+ 257,/m/0145m,"Afrobeat"
260
+ 258,/m/02mscn,"Christian music"
261
+ 259,/m/016cjb,"Gospel music"
262
+ 260,/m/028sqc,"Music of Asia"
263
+ 261,/m/015vgc,"Carnatic music"
264
+ 262,/m/0dq0md,"Music of Bollywood"
265
+ 263,/m/06rqw,"Ska"
266
+ 264,/m/02p0sh1,"Traditional music"
267
+ 265,/m/05rwpb,"Independent music"
268
+ 266,/m/074ft,"Song"
269
+ 267,/m/025td0t,"Background music"
270
+ 268,/m/02cjck,"Theme music"
271
+ 269,/m/03r5q_,"Jingle (music)"
272
+ 270,/m/0l14gg,"Soundtrack music"
273
+ 271,/m/07pkxdp,"Lullaby"
274
+ 272,/m/01z7dr,"Video game music"
275
+ 273,/m/0140xf,"Christmas music"
276
+ 274,/m/0ggx5q,"Dance music"
277
+ 275,/m/04wptg,"Wedding music"
278
+ 276,/t/dd00031,"Happy music"
279
+ 277,/t/dd00032,"Funny music"
280
+ 278,/t/dd00033,"Sad music"
281
+ 279,/t/dd00034,"Tender music"
282
+ 280,/t/dd00035,"Exciting music"
283
+ 281,/t/dd00036,"Angry music"
284
+ 282,/t/dd00037,"Scary music"
285
+ 283,/m/03m9d0z,"Wind"
286
+ 284,/m/09t49,"Rustling leaves"
287
+ 285,/t/dd00092,"Wind noise (microphone)"
288
+ 286,/m/0jb2l,"Thunderstorm"
289
+ 287,/m/0ngt1,"Thunder"
290
+ 288,/m/0838f,"Water"
291
+ 289,/m/06mb1,"Rain"
292
+ 290,/m/07r10fb,"Raindrop"
293
+ 291,/t/dd00038,"Rain on surface"
294
+ 292,/m/0j6m2,"Stream"
295
+ 293,/m/0j2kx,"Waterfall"
296
+ 294,/m/05kq4,"Ocean"
297
+ 295,/m/034srq,"Waves, surf"
298
+ 296,/m/06wzb,"Steam"
299
+ 297,/m/07swgks,"Gurgling"
300
+ 298,/m/02_41,"Fire"
301
+ 299,/m/07pzfmf,"Crackle"
302
+ 300,/m/07yv9,"Vehicle"
303
+ 301,/m/019jd,"Boat, Water vehicle"
304
+ 302,/m/0hsrw,"Sailboat, sailing ship"
305
+ 303,/m/056ks2,"Rowboat, canoe, kayak"
306
+ 304,/m/02rlv9,"Motorboat, speedboat"
307
+ 305,/m/06q74,"Ship"
308
+ 306,/m/012f08,"Motor vehicle (road)"
309
+ 307,/m/0k4j,"Car"
310
+ 308,/m/0912c9,"Vehicle horn, car horn, honking"
311
+ 309,/m/07qv_d5,"Toot"
312
+ 310,/m/02mfyn,"Car alarm"
313
+ 311,/m/04gxbd,"Power windows, electric windows"
314
+ 312,/m/07rknqz,"Skidding"
315
+ 313,/m/0h9mv,"Tire squeal"
316
+ 314,/t/dd00134,"Car passing by"
317
+ 315,/m/0ltv,"Race car, auto racing"
318
+ 316,/m/07r04,"Truck"
319
+ 317,/m/0gvgw0,"Air brake"
320
+ 318,/m/05x_td,"Air horn, truck horn"
321
+ 319,/m/02rhddq,"Reversing beeps"
322
+ 320,/m/03cl9h,"Ice cream truck, ice cream van"
323
+ 321,/m/01bjv,"Bus"
324
+ 322,/m/03j1ly,"Emergency vehicle"
325
+ 323,/m/04qvtq,"Police car (siren)"
326
+ 324,/m/012n7d,"Ambulance (siren)"
327
+ 325,/m/012ndj,"Fire engine, fire truck (siren)"
328
+ 326,/m/04_sv,"Motorcycle"
329
+ 327,/m/0btp2,"Traffic noise, roadway noise"
330
+ 328,/m/06d_3,"Rail transport"
331
+ 329,/m/07jdr,"Train"
332
+ 330,/m/04zmvq,"Train whistle"
333
+ 331,/m/0284vy3,"Train horn"
334
+ 332,/m/01g50p,"Railroad car, train wagon"
335
+ 333,/t/dd00048,"Train wheels squealing"
336
+ 334,/m/0195fx,"Subway, metro, underground"
337
+ 335,/m/0k5j,"Aircraft"
338
+ 336,/m/014yck,"Aircraft engine"
339
+ 337,/m/04229,"Jet engine"
340
+ 338,/m/02l6bg,"Propeller, airscrew"
341
+ 339,/m/09ct_,"Helicopter"
342
+ 340,/m/0cmf2,"Fixed-wing aircraft, airplane"
343
+ 341,/m/0199g,"Bicycle"
344
+ 342,/m/06_fw,"Skateboard"
345
+ 343,/m/02mk9,"Engine"
346
+ 344,/t/dd00065,"Light engine (high frequency)"
347
+ 345,/m/08j51y,"Dental drill, dentist's drill"
348
+ 346,/m/01yg9g,"Lawn mower"
349
+ 347,/m/01j4z9,"Chainsaw"
350
+ 348,/t/dd00066,"Medium engine (mid frequency)"
351
+ 349,/t/dd00067,"Heavy engine (low frequency)"
352
+ 350,/m/01h82_,"Engine knocking"
353
+ 351,/t/dd00130,"Engine starting"
354
+ 352,/m/07pb8fc,"Idling"
355
+ 353,/m/07q2z82,"Accelerating, revving, vroom"
356
+ 354,/m/02dgv,"Door"
357
+ 355,/m/03wwcy,"Doorbell"
358
+ 356,/m/07r67yg,"Ding-dong"
359
+ 357,/m/02y_763,"Sliding door"
360
+ 358,/m/07rjzl8,"Slam"
361
+ 359,/m/07r4wb8,"Knock"
362
+ 360,/m/07qcpgn,"Tap"
363
+ 361,/m/07q6cd_,"Squeak"
364
+ 362,/m/0642b4,"Cupboard open or close"
365
+ 363,/m/0fqfqc,"Drawer open or close"
366
+ 364,/m/04brg2,"Dishes, pots, and pans"
367
+ 365,/m/023pjk,"Cutlery, silverware"
368
+ 366,/m/07pn_8q,"Chopping (food)"
369
+ 367,/m/0dxrf,"Frying (food)"
370
+ 368,/m/0fx9l,"Microwave oven"
371
+ 369,/m/02pjr4,"Blender"
372
+ 370,/m/02jz0l,"Water tap, faucet"
373
+ 371,/m/0130jx,"Sink (filling or washing)"
374
+ 372,/m/03dnzn,"Bathtub (filling or washing)"
375
+ 373,/m/03wvsk,"Hair dryer"
376
+ 374,/m/01jt3m,"Toilet flush"
377
+ 375,/m/012xff,"Toothbrush"
378
+ 376,/m/04fgwm,"Electric toothbrush"
379
+ 377,/m/0d31p,"Vacuum cleaner"
380
+ 378,/m/01s0vc,"Zipper (clothing)"
381
+ 379,/m/03v3yw,"Keys jangling"
382
+ 380,/m/0242l,"Coin (dropping)"
383
+ 381,/m/01lsmm,"Scissors"
384
+ 382,/m/02g901,"Electric shaver, electric razor"
385
+ 383,/m/05rj2,"Shuffling cards"
386
+ 384,/m/0316dw,"Typing"
387
+ 385,/m/0c2wf,"Typewriter"
388
+ 386,/m/01m2v,"Computer keyboard"
389
+ 387,/m/081rb,"Writing"
390
+ 388,/m/07pp_mv,"Alarm"
391
+ 389,/m/07cx4,"Telephone"
392
+ 390,/m/07pp8cl,"Telephone bell ringing"
393
+ 391,/m/01hnzm,"Ringtone"
394
+ 392,/m/02c8p,"Telephone dialing, DTMF"
395
+ 393,/m/015jpf,"Dial tone"
396
+ 394,/m/01z47d,"Busy signal"
397
+ 395,/m/046dlr,"Alarm clock"
398
+ 396,/m/03kmc9,"Siren"
399
+ 397,/m/0dgbq,"Civil defense siren"
400
+ 398,/m/030rvx,"Buzzer"
401
+ 399,/m/01y3hg,"Smoke detector, smoke alarm"
402
+ 400,/m/0c3f7m,"Fire alarm"
403
+ 401,/m/04fq5q,"Foghorn"
404
+ 402,/m/0l156k,"Whistle"
405
+ 403,/m/06hck5,"Steam whistle"
406
+ 404,/t/dd00077,"Mechanisms"
407
+ 405,/m/02bm9n,"Ratchet, pawl"
408
+ 406,/m/01x3z,"Clock"
409
+ 407,/m/07qjznt,"Tick"
410
+ 408,/m/07qjznl,"Tick-tock"
411
+ 409,/m/0l7xg,"Gears"
412
+ 410,/m/05zc1,"Pulleys"
413
+ 411,/m/0llzx,"Sewing machine"
414
+ 412,/m/02x984l,"Mechanical fan"
415
+ 413,/m/025wky1,"Air conditioning"
416
+ 414,/m/024dl,"Cash register"
417
+ 415,/m/01m4t,"Printer"
418
+ 416,/m/0dv5r,"Camera"
419
+ 417,/m/07bjf,"Single-lens reflex camera"
420
+ 418,/m/07k1x,"Tools"
421
+ 419,/m/03l9g,"Hammer"
422
+ 420,/m/03p19w,"Jackhammer"
423
+ 421,/m/01b82r,"Sawing"
424
+ 422,/m/02p01q,"Filing (rasp)"
425
+ 423,/m/023vsd,"Sanding"
426
+ 424,/m/0_ksk,"Power tool"
427
+ 425,/m/01d380,"Drill"
428
+ 426,/m/014zdl,"Explosion"
429
+ 427,/m/032s66,"Gunshot, gunfire"
430
+ 428,/m/04zjc,"Machine gun"
431
+ 429,/m/02z32qm,"Fusillade"
432
+ 430,/m/0_1c,"Artillery fire"
433
+ 431,/m/073cg4,"Cap gun"
434
+ 432,/m/0g6b5,"Fireworks"
435
+ 433,/g/122z_qxw,"Firecracker"
436
+ 434,/m/07qsvvw,"Burst, pop"
437
+ 435,/m/07pxg6y,"Eruption"
438
+ 436,/m/07qqyl4,"Boom"
439
+ 437,/m/083vt,"Wood"
440
+ 438,/m/07pczhz,"Chop"
441
+ 439,/m/07pl1bw,"Splinter"
442
+ 440,/m/07qs1cx,"Crack"
443
+ 441,/m/039jq,"Glass"
444
+ 442,/m/07q7njn,"Chink, clink"
445
+ 443,/m/07rn7sz,"Shatter"
446
+ 444,/m/04k94,"Liquid"
447
+ 445,/m/07rrlb6,"Splash, splatter"
448
+ 446,/m/07p6mqd,"Slosh"
449
+ 447,/m/07qlwh6,"Squish"
450
+ 448,/m/07r5v4s,"Drip"
451
+ 449,/m/07prgkl,"Pour"
452
+ 450,/m/07pqc89,"Trickle, dribble"
453
+ 451,/t/dd00088,"Gush"
454
+ 452,/m/07p7b8y,"Fill (with liquid)"
455
+ 453,/m/07qlf79,"Spray"
456
+ 454,/m/07ptzwd,"Pump (liquid)"
457
+ 455,/m/07ptfmf,"Stir"
458
+ 456,/m/0dv3j,"Boiling"
459
+ 457,/m/0790c,"Sonar"
460
+ 458,/m/0dl83,"Arrow"
461
+ 459,/m/07rqsjt,"Whoosh, swoosh, swish"
462
+ 460,/m/07qnq_y,"Thump, thud"
463
+ 461,/m/07rrh0c,"Thunk"
464
+ 462,/m/0b_fwt,"Electronic tuner"
465
+ 463,/m/02rr_,"Effects unit"
466
+ 464,/m/07m2kt,"Chorus effect"
467
+ 465,/m/018w8,"Basketball bounce"
468
+ 466,/m/07pws3f,"Bang"
469
+ 467,/m/07ryjzk,"Slap, smack"
470
+ 468,/m/07rdhzs,"Whack, thwack"
471
+ 469,/m/07pjjrj,"Smash, crash"
472
+ 470,/m/07pc8lb,"Breaking"
473
+ 471,/m/07pqn27,"Bouncing"
474
+ 472,/m/07rbp7_,"Whip"
475
+ 473,/m/07pyf11,"Flap"
476
+ 474,/m/07qb_dv,"Scratch"
477
+ 475,/m/07qv4k0,"Scrape"
478
+ 476,/m/07pdjhy,"Rub"
479
+ 477,/m/07s8j8t,"Roll"
480
+ 478,/m/07plct2,"Crushing"
481
+ 479,/t/dd00112,"Crumpling, crinkling"
482
+ 480,/m/07qcx4z,"Tearing"
483
+ 481,/m/02fs_r,"Beep, bleep"
484
+ 482,/m/07qwdck,"Ping"
485
+ 483,/m/07phxs1,"Ding"
486
+ 484,/m/07rv4dm,"Clang"
487
+ 485,/m/07s02z0,"Squeal"
488
+ 486,/m/07qh7jl,"Creak"
489
+ 487,/m/07qwyj0,"Rustle"
490
+ 488,/m/07s34ls,"Whir"
491
+ 489,/m/07qmpdm,"Clatter"
492
+ 490,/m/07p9k1k,"Sizzle"
493
+ 491,/m/07qc9xj,"Clicking"
494
+ 492,/m/07rwm0c,"Clickety-clack"
495
+ 493,/m/07phhsh,"Rumble"
496
+ 494,/m/07qyrcz,"Plop"
497
+ 495,/m/07qfgpx,"Jingle, tinkle"
498
+ 496,/m/07rcgpl,"Hum"
499
+ 497,/m/07p78v5,"Zing"
500
+ 498,/t/dd00121,"Boing"
501
+ 499,/m/07s12q4,"Crunch"
502
+ 500,/m/028v0c,"Silence"
503
+ 501,/m/01v_m0,"Sine wave"
504
+ 502,/m/0b9m1,"Harmonic"
505
+ 503,/m/0hdsk,"Chirp tone"
506
+ 504,/m/0c1dj,"Sound effect"
507
+ 505,/m/07pt_g0,"Pulse"
508
+ 506,/t/dd00125,"Inside, small room"
509
+ 507,/t/dd00126,"Inside, large room or hall"
510
+ 508,/t/dd00127,"Inside, public space"
511
+ 509,/t/dd00128,"Outside, urban or manmade"
512
+ 510,/t/dd00129,"Outside, rural or natural"
513
+ 511,/m/01b9nn,"Reverberation"
514
+ 512,/m/01jnbd,"Echo"
515
+ 513,/m/096m7z,"Noise"
516
+ 514,/m/06_y0by,"Environmental noise"
517
+ 515,/m/07rgkc5,"Static"
518
+ 516,/m/06xkwv,"Mains hum"
519
+ 517,/m/0g12c5,"Distortion"
520
+ 518,/m/08p9q4,"Sidetone"
521
+ 519,/m/07szfh9,"Cacophony"
522
+ 520,/m/0chx_,"White noise"
523
+ 521,/m/0cj0r,"Pink noise"
524
+ 522,/m/07p_0gm,"Throbbing"
525
+ 523,/m/01jwx6,"Vibration"
526
+ 524,/m/07c52,"Television"
527
+ 525,/m/06bz3,"Radio"
528
+ 526,/m/07hvw1,"Field recording"
audio_detection/audio_infer/pytorch/__pycache__/models.cpython-38.pyc ADDED
Binary file (24.6 kB). View file
audio_detection/audio_infer/pytorch/__pycache__/pytorch_utils.cpython-38.pyc ADDED
Binary file (7.3 kB). View file
audio_detection/audio_infer/pytorch/evaluate.py ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from sklearn import metrics
2
+
3
+ from pytorch_utils import forward
4
+
5
+
6
+ class Evaluator(object):
7
+ def __init__(self, model):
8
+ """Evaluator.
9
+
10
+ Args:
11
+ model: object
12
+ """
13
+ self.model = model
14
+
15
+ def evaluate(self, data_loader):
16
+ """Forward evaluation data and calculate statistics.
17
+
18
+ Args:
19
+ data_loader: object
20
+
21
+ Returns:
22
+ statistics: dict,
23
+ {'average_precision': (classes_num,), 'auc': (classes_num,)}
24
+ """
25
+
26
+ # Forward
27
+ output_dict = forward(
28
+ model=self.model,
29
+ generator=data_loader,
30
+ return_target=True)
31
+
32
+ clipwise_output = output_dict['clipwise_output'] # (audios_num, classes_num)
33
+ target = output_dict['target'] # (audios_num, classes_num)
34
+
35
+ average_precision = metrics.average_precision_score(
36
+ target, clipwise_output, average=None)
37
+
38
+ auc = metrics.roc_auc_score(target, clipwise_output, average=None)
39
+
40
+ statistics = {'average_precision': average_precision, 'auc': auc}
41
+
42
+ return statistics
audio_detection/audio_infer/pytorch/finetune_template.py ADDED
@@ -0,0 +1,127 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+ sys.path.insert(1, os.path.join(sys.path[0], '../utils'))
4
+ import numpy as np
5
+ import argparse
6
+ import h5py
7
+ import math
8
+ import time
9
+ import logging
10
+ import matplotlib.pyplot as plt
11
+
12
+ import torch
13
+ torch.backends.cudnn.benchmark=True
14
+ torch.manual_seed(0)
15
+ import torch.nn as nn
16
+ import torch.nn.functional as F
17
+ import torch.optim as optim
18
+ import torch.utils.data
19
+
20
+ from utilities import get_filename
21
+ from models import *
22
+ import config
23
+
24
+
25
+ class Transfer_Cnn14(nn.Module):
26
+ def __init__(self, sample_rate, window_size, hop_size, mel_bins, fmin,
27
+ fmax, classes_num, freeze_base):
28
+ """Classifier for a new task using pretrained Cnn14 as a sub module.
29
+ """
30
+ super(Transfer_Cnn14, self).__init__()
31
+ audioset_classes_num = 527
32
+
33
+ self.base = Cnn14(sample_rate, window_size, hop_size, mel_bins, fmin,
34
+ fmax, audioset_classes_num)
35
+
36
+ # Transfer to another task layer
37
+ self.fc_transfer = nn.Linear(2048, classes_num, bias=True)
38
+
39
+ if freeze_base:
40
+ # Freeze AudioSet pretrained layers
41
+ for param in self.base.parameters():
42
+ param.requires_grad = False
43
+
44
+ self.init_weights()
45
+
46
+ def init_weights(self):
47
+ init_layer(self.fc_transfer)
48
+
49
+ def load_from_pretrain(self, pretrained_checkpoint_path):
50
+ checkpoint = torch.load(pretrained_checkpoint_path)
51
+ self.base.load_state_dict(checkpoint['model'])
52
+
53
+ def forward(self, input, mixup_lambda=None):
54
+ """Input: (batch_size, data_length)
55
+ """
56
+ output_dict = self.base(input, mixup_lambda)
57
+ embedding = output_dict['embedding']
58
+
59
+ clipwise_output = torch.log_softmax(self.fc_transfer(embedding), dim=-1)
60
+ output_dict['clipwise_output'] = clipwise_output
61
+
62
+ return output_dict
63
+
64
+
65
+ def train(args):
66
+
67
+ # Arugments & parameters
68
+ sample_rate = args.sample_rate
69
+ window_size = args.window_size
70
+ hop_size = args.hop_size
71
+ mel_bins = args.mel_bins
72
+ fmin = args.fmin
73
+ fmax = args.fmax
74
+ model_type = args.model_type
75
+ pretrained_checkpoint_path = args.pretrained_checkpoint_path
76
+ freeze_base = args.freeze_base
77
+ device = 'cuda' if (args.cuda and torch.cuda.is_available()) else 'cpu'
78
+
79
+ classes_num = config.classes_num
80
+ pretrain = True if pretrained_checkpoint_path else False
81
+
82
+ # Model
83
+ Model = eval(model_type)
84
+ model = Model(sample_rate, window_size, hop_size, mel_bins, fmin, fmax,
85
+ classes_num, freeze_base)
86
+
87
+ # Load pretrained model
88
+ if pretrain:
89
+ logging.info('Load pretrained model from {}'.format(pretrained_checkpoint_path))
90
+ model.load_from_pretrain(pretrained_checkpoint_path)
91
+
92
+ # Parallel
93
+ print('GPU number: {}'.format(torch.cuda.device_count()))
94
+ model = torch.nn.DataParallel(model)
95
+
96
+ if 'cuda' in device:
97
+ model.to(device)
98
+
99
+ print('Load pretrained model successfully!')
100
+
101
+
102
+ if __name__ == '__main__':
103
+ parser = argparse.ArgumentParser(description='Example of parser. ')
104
+ subparsers = parser.add_subparsers(dest='mode')
105
+
106
+ # Train
107
+ parser_train = subparsers.add_parser('train')
108
+ parser_train.add_argument('--sample_rate', type=int, required=True)
109
+ parser_train.add_argument('--window_size', type=int, required=True)
110
+ parser_train.add_argument('--hop_size', type=int, required=True)
111
+ parser_train.add_argument('--mel_bins', type=int, required=True)
112
+ parser_train.add_argument('--fmin', type=int, required=True)
113
+ parser_train.add_argument('--fmax', type=int, required=True)
114
+ parser_train.add_argument('--model_type', type=str, required=True)
115
+ parser_train.add_argument('--pretrained_checkpoint_path', type=str)
116
+ parser_train.add_argument('--freeze_base', action='store_true', default=False)
117
+ parser_train.add_argument('--cuda', action='store_true', default=False)
118
+
119
+ # Parse arguments
120
+ args = parser.parse_args()
121
+ args.filename = get_filename(__file__)
122
+
123
+ if args.mode == 'train':
124
+ train(args)
125
+
126
+ else:
127
+ raise Exception('Error argument!')
audio_detection/audio_infer/pytorch/inference.py ADDED
@@ -0,0 +1,206 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+ sys.path.insert(1, os.path.join(sys.path[0], '../utils'))
4
+ import numpy as np
5
+ import argparse
6
+ import librosa
7
+ import matplotlib.pyplot as plt
8
+ import torch
9
+
10
+ from utilities import create_folder, get_filename
11
+ from models import *
12
+ from pytorch_utils import move_data_to_device
13
+ import config
14
+
15
+ def audio_tagging(args):
16
+ """Inference audio tagging result of an audio clip.
17
+ """
18
+
19
+ # Arugments & parameters
20
+ sample_rate = args.sample_rate
21
+ window_size = args.window_size
22
+ hop_size = args.hop_size
23
+ mel_bins = args.mel_bins
24
+ fmin = args.fmin
25
+ fmax = args.fmax
26
+ model_type = args.model_type
27
+ checkpoint_path = args.checkpoint_path
28
+ audio_path = args.audio_path
29
+ device = torch.device('cuda') if args.cuda and torch.cuda.is_available() else torch.device('cpu')
30
+
31
+ classes_num = config.classes_num
32
+ labels = config.labels
33
+
34
+ # Model
35
+ Model = eval(model_type)
36
+ model = Model(sample_rate=sample_rate, window_size=window_size,
37
+ hop_size=hop_size, mel_bins=mel_bins, fmin=fmin, fmax=fmax,
38
+ classes_num=classes_num)
39
+
40
+ checkpoint = torch.load(checkpoint_path, map_location=device)
41
+ model.load_state_dict(checkpoint['model'])
42
+
43
+ # Parallel
44
+ if 'cuda' in str(device):
45
+ model.to(device)
46
+ print('GPU number: {}'.format(torch.cuda.device_count()))
47
+ model = torch.nn.DataParallel(model)
48
+ else:
49
+ print('Using CPU.')
50
+
51
+ # Load audio
52
+ (waveform, _) = librosa.core.load(audio_path, sr=sample_rate, mono=True)
53
+
54
+ waveform = waveform[None, :] # (1, audio_length)
55
+ waveform = move_data_to_device(waveform, device)
56
+
57
+ # Forward
58
+ with torch.no_grad():
59
+ model.eval()
60
+ batch_output_dict = model(waveform, None)
61
+
62
+ clipwise_output = batch_output_dict['clipwise_output'].data.cpu().numpy()[0]
63
+ """(classes_num,)"""
64
+
65
+ sorted_indexes = np.argsort(clipwise_output)[::-1]
66
+
67
+ # Print audio tagging top probabilities
68
+ for k in range(10):
69
+ print('{}: {:.3f}'.format(np.array(labels)[sorted_indexes[k]],
70
+ clipwise_output[sorted_indexes[k]]))
71
+
72
+ # Print embedding
73
+ if 'embedding' in batch_output_dict.keys():
74
+ embedding = batch_output_dict['embedding'].data.cpu().numpy()[0]
75
+ print('embedding: {}'.format(embedding.shape))
76
+
77
+ return clipwise_output, labels
78
+
79
+
80
+ def sound_event_detection(args):
81
+ """Inference sound event detection result of an audio clip.
82
+ """
83
+
84
+ # Arugments & parameters
85
+ sample_rate = args.sample_rate
86
+ window_size = args.window_size
87
+ hop_size = args.hop_size
88
+ mel_bins = args.mel_bins
89
+ fmin = args.fmin
90
+ fmax = args.fmax
91
+ model_type = args.model_type
92
+ checkpoint_path = args.checkpoint_path
93
+ audio_path = args.audio_path
94
+ device = torch.device('cuda') if args.cuda and torch.cuda.is_available() else torch.device('cpu')
95
+
96
+ classes_num = config.classes_num
97
+ labels = config.labels
98
+ frames_per_second = sample_rate // hop_size
99
+
100
+ # Paths
101
+ fig_path = os.path.join('results', '{}.png'.format(get_filename(audio_path)))
102
+ create_folder(os.path.dirname(fig_path))
103
+
104
+ # Model
105
+ Model = eval(model_type)
106
+ model = Model(sample_rate=sample_rate, window_size=window_size,
107
+ hop_size=hop_size, mel_bins=mel_bins, fmin=fmin, fmax=fmax,
108
+ classes_num=classes_num)
109
+
110
+ checkpoint = torch.load(checkpoint_path, map_location=device)
111
+ model.load_state_dict(checkpoint['model'])
112
+
113
+ # Parallel
114
+ print('GPU number: {}'.format(torch.cuda.device_count()))
115
+ model = torch.nn.DataParallel(model)
116
+
117
+ if 'cuda' in str(device):
118
+ model.to(device)
119
+
120
+ # Load audio
121
+ (waveform, _) = librosa.core.load(audio_path, sr=sample_rate, mono=True)
122
+
123
+ waveform = waveform[None, :] # (1, audio_length)
124
+ waveform = move_data_to_device(waveform, device)
125
+
126
+ # Forward
127
+ with torch.no_grad():
128
+ model.eval()
129
+ batch_output_dict = model(waveform, None)
130
+
131
+ framewise_output = batch_output_dict['framewise_output'].data.cpu().numpy()[0]
132
+ """(time_steps, classes_num)"""
133
+
134
+ print('Sound event detection result (time_steps x classes_num): {}'.format(
135
+ framewise_output.shape))
136
+
137
+ sorted_indexes = np.argsort(np.max(framewise_output, axis=0))[::-1]
138
+
139
+ top_k = 10 # Show top results
140
+ top_result_mat = framewise_output[:, sorted_indexes[0 : top_k]]
141
+ """(time_steps, top_k)"""
142
+
143
+ # Plot result
144
+ stft = librosa.core.stft(y=waveform[0].data.cpu().numpy(), n_fft=window_size,
145
+ hop_length=hop_size, window='hann', center=True)
146
+ frames_num = stft.shape[-1]
147
+
148
+ fig, axs = plt.subplots(2, 1, sharex=True, figsize=(10, 4))
149
+ axs[0].matshow(np.log(np.abs(stft)), origin='lower', aspect='auto', cmap='jet')
150
+ axs[0].set_ylabel('Frequency bins')
151
+ axs[0].set_title('Log spectrogram')
152
+ axs[1].matshow(top_result_mat.T, origin='upper', aspect='auto', cmap='jet', vmin=0, vmax=1)
153
+ axs[1].xaxis.set_ticks(np.arange(0, frames_num, frames_per_second))
154
+ axs[1].xaxis.set_ticklabels(np.arange(0, frames_num / frames_per_second))
155
+ axs[1].yaxis.set_ticks(np.arange(0, top_k))
156
+ axs[1].yaxis.set_ticklabels(np.array(labels)[sorted_indexes[0 : top_k]])
157
+ axs[1].yaxis.grid(color='k', linestyle='solid', linewidth=0.3, alpha=0.3)
158
+ axs[1].set_xlabel('Seconds')
159
+ axs[1].xaxis.set_ticks_position('bottom')
160
+
161
+ plt.tight_layout()
162
+ plt.savefig(fig_path)
163
+ print('Save sound event detection visualization to {}'.format(fig_path))
164
+
165
+ return framewise_output, labels
166
+
167
+
168
+ if __name__ == '__main__':
169
+
170
+ parser = argparse.ArgumentParser(description='Example of parser. ')
171
+ subparsers = parser.add_subparsers(dest='mode')
172
+
173
+ parser_at = subparsers.add_parser('audio_tagging')
174
+ parser_at.add_argument('--sample_rate', type=int, default=32000)
175
+ parser_at.add_argument('--window_size', type=int, default=1024)
176
+ parser_at.add_argument('--hop_size', type=int, default=320)
177
+ parser_at.add_argument('--mel_bins', type=int, default=64)
178
+ parser_at.add_argument('--fmin', type=int, default=50)
179
+ parser_at.add_argument('--fmax', type=int, default=14000)
180
+ parser_at.add_argument('--model_type', type=str, required=True)
181
+ parser_at.add_argument('--checkpoint_path', type=str, required=True)
182
+ parser_at.add_argument('--audio_path', type=str, required=True)
183
+ parser_at.add_argument('--cuda', action='store_true', default=False)
184
+
185
+ parser_sed = subparsers.add_parser('sound_event_detection')
186
+ parser_sed.add_argument('--sample_rate', type=int, default=32000)
187
+ parser_sed.add_argument('--window_size', type=int, default=1024)
188
+ parser_sed.add_argument('--hop_size', type=int, default=320)
189
+ parser_sed.add_argument('--mel_bins', type=int, default=64)
190
+ parser_sed.add_argument('--fmin', type=int, default=50)
191
+ parser_sed.add_argument('--fmax', type=int, default=14000)
192
+ parser_sed.add_argument('--model_type', type=str, required=True)
193
+ parser_sed.add_argument('--checkpoint_path', type=str, required=True)
194
+ parser_sed.add_argument('--audio_path', type=str, required=True)
195
+ parser_sed.add_argument('--cuda', action='store_true', default=False)
196
+
197
+ args = parser.parse_args()
198
+
199
+ if args.mode == 'audio_tagging':
200
+ audio_tagging(args)
201
+
202
+ elif args.mode == 'sound_event_detection':
203
+ sound_event_detection(args)
204
+
205
+ else:
206
+ raise Exception('Error argument!')
audio_detection/audio_infer/pytorch/losses.py ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn.functional as F
3
+
4
+
5
+ def clip_bce(output_dict, target_dict):
6
+ """Binary crossentropy loss.
7
+ """
8
+ return F.binary_cross_entropy(
9
+ output_dict['clipwise_output'], target_dict['target'])
10
+
11
+
12
+ def get_loss_func(loss_type):
13
+ if loss_type == 'clip_bce':
14
+ return clip_bce
audio_detection/audio_infer/pytorch/main.py ADDED
@@ -0,0 +1,378 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+ sys.path.insert(1, os.path.join(sys.path[0], '../utils'))
4
+ import numpy as np
5
+ import argparse
6
+ import time
7
+ import logging
8
+
9
+ import torch
10
+ import torch.nn as nn
11
+ import torch.nn.functional as F
12
+ import torch.optim as optim
13
+ import torch.utils.data
14
+
15
+ from utilities import (create_folder, get_filename, create_logging, Mixup,
16
+ StatisticsContainer)
17
+ from models import (PVT, PVT2, PVT_lr, PVT_nopretrain, PVT_2layer, Cnn14, Cnn14_no_specaug, Cnn14_no_dropout,
18
+ Cnn6, Cnn10, ResNet22, ResNet38, ResNet54, Cnn14_emb512, Cnn14_emb128,
19
+ Cnn14_emb32, MobileNetV1, MobileNetV2, LeeNet11, LeeNet24, DaiNet19,
20
+ Res1dNet31, Res1dNet51, Wavegram_Cnn14, Wavegram_Logmel_Cnn14,
21
+ Wavegram_Logmel128_Cnn14, Cnn14_16k, Cnn14_8k, Cnn14_mel32, Cnn14_mel128,
22
+ Cnn14_mixup_time_domain, Cnn14_DecisionLevelMax, Cnn14_DecisionLevelAtt, Cnn6_Transformer, GLAM, GLAM2, GLAM3, Cnn4, EAT)
23
+ #from models_test import (PVT_test)
24
+ #from models1 import (PVT1)
25
+ #from models_vig import (VIG, VIG2)
26
+ #from models_vvt import (VVT)
27
+ #from models2 import (MPVIT, MPVIT2)
28
+ #from models_reshape import (PVT_reshape, PVT_tscam)
29
+ #from models_swin import (Swin, Swin_nopretrain)
30
+ #from models_swin2 import (Swin2)
31
+ #from models_van import (Van, Van_tiny)
32
+ #from models_focal import (Focal)
33
+ #from models_cross import (Cross)
34
+ #from models_cov import (Cov)
35
+ #from models_cnn import (Cnn_light)
36
+ #from models_twins import (Twins)
37
+ #from models_cmt import (Cmt, Cmt1)
38
+ #from models_shunted import (Shunted)
39
+ #from models_quadtree import (Quadtree, Quadtree2, Quadtree_nopretrain)
40
+ #from models_davit import (Davit_tscam, Davit, Davit_nopretrain)
41
+ from pytorch_utils import (move_data_to_device, count_parameters, count_flops,
42
+ do_mixup)
43
+ from data_generator import (AudioSetDataset, TrainSampler, BalancedTrainSampler,
44
+ AlternateTrainSampler, EvaluateSampler, collate_fn)
45
+ from evaluate import Evaluator
46
+ import config
47
+ from losses import get_loss_func
48
+
49
+
50
+ def train(args):
51
+ """Train AudioSet tagging model.
52
+
53
+ Args:
54
+ dataset_dir: str
55
+ workspace: str
56
+ data_type: 'balanced_train' | 'full_train'
57
+ window_size: int
58
+ hop_size: int
59
+ mel_bins: int
60
+ model_type: str
61
+ loss_type: 'clip_bce'
62
+ balanced: 'none' | 'balanced' | 'alternate'
63
+ augmentation: 'none' | 'mixup'
64
+ batch_size: int
65
+ learning_rate: float
66
+ resume_iteration: int
67
+ early_stop: int
68
+ accumulation_steps: int
69
+ cuda: bool
70
+ """
71
+
72
+ # Arugments & parameters
73
+ workspace = args.workspace
74
+ data_type = args.data_type
75
+ sample_rate = args.sample_rate
76
+ window_size = args.window_size
77
+ hop_size = args.hop_size
78
+ mel_bins = args.mel_bins
79
+ fmin = args.fmin
80
+ fmax = args.fmax
81
+ model_type = args.model_type
82
+ loss_type = args.loss_type
83
+ balanced = args.balanced
84
+ augmentation = args.augmentation
85
+ batch_size = args.batch_size
86
+ learning_rate = args.learning_rate
87
+ resume_iteration = args.resume_iteration
88
+ early_stop = args.early_stop
89
+ device = torch.device('cuda') if args.cuda and torch.cuda.is_available() else torch.device('cpu')
90
+ filename = args.filename
91
+
92
+ num_workers = 8
93
+ clip_samples = config.clip_samples
94
+ classes_num = config.classes_num
95
+ loss_func = get_loss_func(loss_type)
96
+
97
+ # Paths
98
+ black_list_csv = None
99
+
100
+ train_indexes_hdf5_path = os.path.join(workspace, 'hdf5s', 'indexes',
101
+ '{}.h5'.format(data_type))
102
+
103
+ eval_bal_indexes_hdf5_path = os.path.join(workspace,
104
+ 'hdf5s', 'indexes', 'balanced_train.h5')
105
+
106
+ eval_test_indexes_hdf5_path = os.path.join(workspace, 'hdf5s', 'indexes',
107
+ 'eval.h5')
108
+
109
+ checkpoints_dir = os.path.join(workspace, 'checkpoints', filename,
110
+ 'sample_rate={},window_size={},hop_size={},mel_bins={},fmin={},fmax={}'.format(
111
+ sample_rate, window_size, hop_size, mel_bins, fmin, fmax),
112
+ 'data_type={}'.format(data_type), model_type,
113
+ 'loss_type={}'.format(loss_type), 'balanced={}'.format(balanced),
114
+ 'augmentation={}'.format(augmentation), 'batch_size={}'.format(batch_size))
115
+ create_folder(checkpoints_dir)
116
+
117
+ statistics_path = os.path.join(workspace, 'statistics', filename,
118
+ 'sample_rate={},window_size={},hop_size={},mel_bins={},fmin={},fmax={}'.format(
119
+ sample_rate, window_size, hop_size, mel_bins, fmin, fmax),
120
+ 'data_type={}'.format(data_type), model_type,
121
+ 'loss_type={}'.format(loss_type), 'balanced={}'.format(balanced),
122
+ 'augmentation={}'.format(augmentation), 'batch_size={}'.format(batch_size),
123
+ 'statistics.pkl')
124
+ create_folder(os.path.dirname(statistics_path))
125
+
126
+ logs_dir = os.path.join(workspace, 'logs', filename,
127
+ 'sample_rate={},window_size={},hop_size={},mel_bins={},fmin={},fmax={}'.format(
128
+ sample_rate, window_size, hop_size, mel_bins, fmin, fmax),
129
+ 'data_type={}'.format(data_type), model_type,
130
+ 'loss_type={}'.format(loss_type), 'balanced={}'.format(balanced),
131
+ 'augmentation={}'.format(augmentation), 'batch_size={}'.format(batch_size))
132
+
133
+ create_logging(logs_dir, filemode='w')
134
+ logging.info(args)
135
+
136
+ if 'cuda' in str(device):
137
+ logging.info('Using GPU.')
138
+ device = 'cuda'
139
+ else:
140
+ logging.info('Using CPU. Set --cuda flag to use GPU.')
141
+ device = 'cpu'
142
+
143
+ # Model
144
+ Model = eval(model_type)
145
+ model = Model(sample_rate=sample_rate, window_size=window_size,
146
+ hop_size=hop_size, mel_bins=mel_bins, fmin=fmin, fmax=fmax,
147
+ classes_num=classes_num)
148
+ total = sum(p.numel() for p in model.parameters())
149
+ print("Total params: %.2fM" % (total/1e6))
150
+ logging.info("Total params: %.2fM" % (total/1e6))
151
+ #params_num = count_parameters(model)
152
+ # flops_num = count_flops(model, clip_samples)
153
+ #logging.info('Parameters num: {}'.format(params_num))
154
+ # logging.info('Flops num: {:.3f} G'.format(flops_num / 1e9))
155
+
156
+ # Dataset will be used by DataLoader later. Dataset takes a meta as input
157
+ # and return a waveform and a target.
158
+ dataset = AudioSetDataset(sample_rate=sample_rate)
159
+
160
+ # Train sampler
161
+ if balanced == 'none':
162
+ Sampler = TrainSampler
163
+ elif balanced == 'balanced':
164
+ Sampler = BalancedTrainSampler
165
+ elif balanced == 'alternate':
166
+ Sampler = AlternateTrainSampler
167
+
168
+ train_sampler = Sampler(
169
+ indexes_hdf5_path=train_indexes_hdf5_path,
170
+ batch_size=batch_size * 2 if 'mixup' in augmentation else batch_size,
171
+ black_list_csv=black_list_csv)
172
+
173
+ # Evaluate sampler
174
+ eval_bal_sampler = EvaluateSampler(
175
+ indexes_hdf5_path=eval_bal_indexes_hdf5_path, batch_size=batch_size)
176
+
177
+ eval_test_sampler = EvaluateSampler(
178
+ indexes_hdf5_path=eval_test_indexes_hdf5_path, batch_size=batch_size)
179
+
180
+ # Data loader
181
+ train_loader = torch.utils.data.DataLoader(dataset=dataset,
182
+ batch_sampler=train_sampler, collate_fn=collate_fn,
183
+ num_workers=num_workers, pin_memory=True)
184
+
185
+ eval_bal_loader = torch.utils.data.DataLoader(dataset=dataset,
186
+ batch_sampler=eval_bal_sampler, collate_fn=collate_fn,
187
+ num_workers=num_workers, pin_memory=True)
188
+
189
+ eval_test_loader = torch.utils.data.DataLoader(dataset=dataset,
190
+ batch_sampler=eval_test_sampler, collate_fn=collate_fn,
191
+ num_workers=num_workers, pin_memory=True)
192
+ mix=0.5
193
+ if 'mixup' in augmentation:
194
+ mixup_augmenter = Mixup(mixup_alpha=mix)
195
+ print(mix)
196
+ logging.info(mix)
197
+
198
+ # Evaluator
199
+ evaluator = Evaluator(model=model)
200
+
201
+ # Statistics
202
+ statistics_container = StatisticsContainer(statistics_path)
203
+
204
+ # Optimizer
205
+ optimizer = optim.AdamW(model.parameters(), lr=learning_rate, betas=(0.9, 0.999), eps=1e-08, weight_decay=0.05, amsgrad=True)
206
+ scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', factor=0.5, patience=4, min_lr=1e-06, verbose=True)
207
+ train_bgn_time = time.time()
208
+
209
+ # Resume training
210
+ if resume_iteration > 0:
211
+ resume_checkpoint_path = os.path.join(workspace, 'checkpoints', filename,
212
+ 'sample_rate={},window_size={},hop_size={},mel_bins={},fmin={},fmax={}'.format(
213
+ sample_rate, window_size, hop_size, mel_bins, fmin, fmax),
214
+ 'data_type={}'.format(data_type), model_type,
215
+ 'loss_type={}'.format(loss_type), 'balanced={}'.format(balanced),
216
+ 'augmentation={}'.format(augmentation), 'batch_size={}'.format(batch_size),
217
+ '{}_iterations.pth'.format(resume_iteration))
218
+
219
+ logging.info('Loading checkpoint {}'.format(resume_checkpoint_path))
220
+ checkpoint = torch.load(resume_checkpoint_path)
221
+ model.load_state_dict(checkpoint['model'])
222
+ train_sampler.load_state_dict(checkpoint['sampler'])
223
+ statistics_container.load_state_dict(resume_iteration)
224
+ iteration = checkpoint['iteration']
225
+
226
+ else:
227
+ iteration = 0
228
+
229
+ # Parallel
230
+ print('GPU number: {}'.format(torch.cuda.device_count()))
231
+ model = torch.nn.DataParallel(model)
232
+
233
+ if 'cuda' in str(device):
234
+ model.to(device)
235
+
236
+ if resume_iteration:
237
+ optimizer.load_state_dict(checkpoint['optimizer'])
238
+ scheduler.load_state_dict(checkpoint['scheduler'])
239
+ print(optimizer.state_dict()['param_groups'][0]['lr'])
240
+
241
+ time1 = time.time()
242
+
243
+ for batch_data_dict in train_loader:
244
+ """batch_data_dict: {
245
+ 'audio_name': (batch_size [*2 if mixup],),
246
+ 'waveform': (batch_size [*2 if mixup], clip_samples),
247
+ 'target': (batch_size [*2 if mixup], classes_num),
248
+ (ifexist) 'mixup_lambda': (batch_size * 2,)}
249
+ """
250
+
251
+ # Evaluate
252
+ if (iteration % 2000 == 0 and iteration >= resume_iteration) or (iteration == 0):
253
+ train_fin_time = time.time()
254
+
255
+ bal_statistics = evaluator.evaluate(eval_bal_loader)
256
+ test_statistics = evaluator.evaluate(eval_test_loader)
257
+
258
+ logging.info('Validate bal mAP: {:.3f}'.format(
259
+ np.mean(bal_statistics['average_precision'])))
260
+
261
+ logging.info('Validate test mAP: {:.3f}'.format(
262
+ np.mean(test_statistics['average_precision'])))
263
+
264
+ statistics_container.append(iteration, bal_statistics, data_type='bal')
265
+ statistics_container.append(iteration, test_statistics, data_type='test')
266
+ statistics_container.dump()
267
+
268
+ train_time = train_fin_time - train_bgn_time
269
+ validate_time = time.time() - train_fin_time
270
+
271
+ logging.info(
272
+ 'iteration: {}, train time: {:.3f} s, validate time: {:.3f} s'
273
+ ''.format(iteration, train_time, validate_time))
274
+
275
+ logging.info('------------------------------------')
276
+
277
+ train_bgn_time = time.time()
278
+
279
+ # Save model
280
+ if iteration % 2000 == 0:
281
+ checkpoint = {
282
+ 'iteration': iteration,
283
+ 'model': model.module.state_dict(),
284
+ 'sampler': train_sampler.state_dict(),
285
+ 'optimizer': optimizer.state_dict(),
286
+ 'scheduler': scheduler.state_dict()}
287
+
288
+ checkpoint_path = os.path.join(
289
+ checkpoints_dir, '{}_iterations.pth'.format(iteration))
290
+
291
+ torch.save(checkpoint, checkpoint_path)
292
+ logging.info('Model saved to {}'.format(checkpoint_path))
293
+
294
+ # Mixup lambda
295
+ if 'mixup' in augmentation:
296
+ batch_data_dict['mixup_lambda'] = mixup_augmenter.get_lambda(
297
+ batch_size=len(batch_data_dict['waveform']))
298
+
299
+ # Move data to device
300
+ for key in batch_data_dict.keys():
301
+ batch_data_dict[key] = move_data_to_device(batch_data_dict[key], device)
302
+
303
+ # Forward
304
+ model.train()
305
+
306
+ if 'mixup' in augmentation:
307
+ batch_output_dict = model(batch_data_dict['waveform'],
308
+ batch_data_dict['mixup_lambda'])
309
+ """{'clipwise_output': (batch_size, classes_num), ...}"""
310
+
311
+ batch_target_dict = {'target': do_mixup(batch_data_dict['target'],
312
+ batch_data_dict['mixup_lambda'])}
313
+ """{'target': (batch_size, classes_num)}"""
314
+ else:
315
+ batch_output_dict = model(batch_data_dict['waveform'], None)
316
+ """{'clipwise_output': (batch_size, classes_num), ...}"""
317
+
318
+ batch_target_dict = {'target': batch_data_dict['target']}
319
+ """{'target': (batch_size, classes_num)}"""
320
+
321
+ # Loss
322
+ loss = loss_func(batch_output_dict, batch_target_dict)
323
+ # Backward
324
+ loss.backward()
325
+
326
+ optimizer.step()
327
+ optimizer.zero_grad()
328
+
329
+ if iteration % 10 == 0:
330
+ print(iteration, loss)
331
+ #print('--- Iteration: {}, train time: {:.3f} s / 10 iterations ---'\
332
+ # .format(iteration, time.time() - time1))
333
+ #time1 = time.time()
334
+
335
+ if iteration % 2000 == 0:
336
+ scheduler.step(np.mean(test_statistics['average_precision']))
337
+ print(optimizer.state_dict()['param_groups'][0]['lr'])
338
+ logging.info(optimizer.state_dict()['param_groups'][0]['lr'])
339
+
340
+ # Stop learning
341
+ if iteration == early_stop:
342
+ break
343
+
344
+ iteration += 1
345
+
346
+
347
+ if __name__ == '__main__':
348
+
349
+ parser = argparse.ArgumentParser(description='Example of parser. ')
350
+ subparsers = parser.add_subparsers(dest='mode')
351
+
352
+ parser_train = subparsers.add_parser('train')
353
+ parser_train.add_argument('--workspace', type=str, required=True)
354
+ parser_train.add_argument('--data_type', type=str, default='full_train', choices=['balanced_train', 'full_train'])
355
+ parser_train.add_argument('--sample_rate', type=int, default=32000)
356
+ parser_train.add_argument('--window_size', type=int, default=1024)
357
+ parser_train.add_argument('--hop_size', type=int, default=320)
358
+ parser_train.add_argument('--mel_bins', type=int, default=64)
359
+ parser_train.add_argument('--fmin', type=int, default=50)
360
+ parser_train.add_argument('--fmax', type=int, default=14000)
361
+ parser_train.add_argument('--model_type', type=str, required=True)
362
+ parser_train.add_argument('--loss_type', type=str, default='clip_bce', choices=['clip_bce'])
363
+ parser_train.add_argument('--balanced', type=str, default='balanced', choices=['none', 'balanced', 'alternate'])
364
+ parser_train.add_argument('--augmentation', type=str, default='mixup', choices=['none', 'mixup'])
365
+ parser_train.add_argument('--batch_size', type=int, default=32)
366
+ parser_train.add_argument('--learning_rate', type=float, default=1e-3)
367
+ parser_train.add_argument('--resume_iteration', type=int, default=0)
368
+ parser_train.add_argument('--early_stop', type=int, default=1000000)
369
+ parser_train.add_argument('--cuda', action='store_true', default=False)
370
+
371
+ args = parser.parse_args()
372
+ args.filename = get_filename(__file__)
373
+
374
+ if args.mode == 'train':
375
+ train(args)
376
+
377
+ else:
378
+ raise Exception('Error argument!')
audio_detection/audio_infer/pytorch/models.py ADDED
@@ -0,0 +1,951 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+ import torch.nn.functional as F
4
+ from torchlibrosa.stft import Spectrogram, LogmelFilterBank
5
+ from torchlibrosa.augmentation import SpecAugmentation
6
+
7
+ from audio_infer.pytorch.pytorch_utils import do_mixup, interpolate, pad_framewise_output
8
+ import os
9
+ import sys
10
+ import math
11
+ import numpy as np
12
+
13
+ import torch
14
+ import torch.nn as nn
15
+ import torch.nn.functional as F
16
+ from torch.nn.parameter import Parameter
17
+ from torchlibrosa.stft import Spectrogram, LogmelFilterBank
18
+ from torchlibrosa.augmentation import SpecAugmentation
19
+ from audio_infer.pytorch.pytorch_utils import do_mixup
20
+ import torch.utils.checkpoint as checkpoint
21
+ from timm.models.layers import DropPath, to_2tuple, trunc_normal_
22
+ import warnings
23
+ from functools import partial
24
+ #from mmdet.models.builder import BACKBONES
25
+ from mmdet.utils import get_root_logger
26
+ from mmcv.runner import load_checkpoint
27
+ os.environ['TORCH_HOME'] = '../pretrained_models'
28
+ from copy import deepcopy
29
+ from timm.models.helpers import load_pretrained
30
+ from torch.cuda.amp import autocast
31
+ from collections import OrderedDict
32
+ import io
33
+ import re
34
+ from mmcv.runner import _load_checkpoint, load_state_dict
35
+ import mmcv.runner
36
+ import copy
37
+ import random
38
+ from einops import rearrange
39
+ from einops.layers.torch import Rearrange, Reduce
40
+ from torch import nn, einsum
41
+
42
+
43
+ def load_checkpoint(model,
44
+ filename,
45
+ map_location=None,
46
+ strict=False,
47
+ logger=None,
48
+ revise_keys=[(r'^module\.', '')]):
49
+ """Load checkpoint from a file or URI.
50
+
51
+ Args:
52
+ model (Module): Module to load checkpoint.
53
+ filename (str): Accept local filepath, URL, ``torchvision://xxx``,
54
+ ``open-mmlab://xxx``. Please refer to ``docs/model_zoo.md`` for
55
+ details.
56
+ map_location (str): Same as :func:`torch.load`.
57
+ strict (bool): Whether to allow different params for the model and
58
+ checkpoint.
59
+ logger (:mod:`logging.Logger` or None): The logger for error message.
60
+ revise_keys (list): A list of customized keywords to modify the
61
+ state_dict in checkpoint. Each item is a (pattern, replacement)
62
+ pair of the regular expression operations. Default: strip
63
+ the prefix 'module.' by [(r'^module\\.', '')].
64
+
65
+ Returns:
66
+ dict or OrderedDict: The loaded checkpoint.
67
+ """
68
+
69
+ checkpoint = _load_checkpoint(filename, map_location, logger)
70
+ new_proj = torch.nn.Conv2d(1, 64, kernel_size=(7, 7), stride=(4, 4), padding=(2, 2))
71
+ new_proj.weight = torch.nn.Parameter(torch.sum(checkpoint['patch_embed1.proj.weight'], dim=1).unsqueeze(1))
72
+ checkpoint['patch_embed1.proj.weight'] = new_proj.weight
73
+ # OrderedDict is a subclass of dict
74
+ if not isinstance(checkpoint, dict):
75
+ raise RuntimeError(
76
+ f'No state_dict found in checkpoint file {filename}')
77
+ # get state_dict from checkpoint
78
+ if 'state_dict' in checkpoint:
79
+ state_dict = checkpoint['state_dict']
80
+ else:
81
+ state_dict = checkpoint
82
+
83
+ # strip prefix of state_dict
84
+ metadata = getattr(state_dict, '_metadata', OrderedDict())
85
+ for p, r in revise_keys:
86
+ state_dict = OrderedDict(
87
+ {re.sub(p, r, k): v
88
+ for k, v in state_dict.items()})
89
+ state_dict = OrderedDict({k.replace('backbone.',''):v for k,v in state_dict.items()})
90
+ # Keep metadata in state_dict
91
+ state_dict._metadata = metadata
92
+
93
+ # load state_dict
94
+ load_state_dict(model, state_dict, strict, logger)
95
+ return checkpoint
96
+
97
+ def init_layer(layer):
98
+ """Initialize a Linear or Convolutional layer. """
99
+ nn.init.xavier_uniform_(layer.weight)
100
+
101
+ if hasattr(layer, 'bias'):
102
+ if layer.bias is not None:
103
+ layer.bias.data.fill_(0.)
104
+
105
+
106
+ def init_bn(bn):
107
+ """Initialize a Batchnorm layer. """
108
+ bn.bias.data.fill_(0.)
109
+ bn.weight.data.fill_(1.)
110
+
111
+
112
+
113
+
114
+ class TimeShift(nn.Module):
115
+ def __init__(self, mean, std):
116
+ super().__init__()
117
+ self.mean = mean
118
+ self.std = std
119
+
120
+ def forward(self, x):
121
+ if self.training:
122
+ shift = torch.empty(1).normal_(self.mean, self.std).int().item()
123
+ x = torch.roll(x, shift, dims=2)
124
+ return x
125
+
126
+ class LinearSoftPool(nn.Module):
127
+ """LinearSoftPool
128
+ Linear softmax, takes logits and returns a probability, near to the actual maximum value.
129
+ Taken from the paper:
130
+ A Comparison of Five Multiple Instance Learning Pooling Functions for Sound Event Detection with Weak Labeling
131
+ https://arxiv.org/abs/1810.09050
132
+ """
133
+ def __init__(self, pooldim=1):
134
+ super().__init__()
135
+ self.pooldim = pooldim
136
+
137
+ def forward(self, logits, time_decision):
138
+ return (time_decision**2).sum(self.pooldim) / time_decision.sum(
139
+ self.pooldim)
140
+
141
+ class PVT(nn.Module):
142
+ def __init__(self, sample_rate, window_size, hop_size, mel_bins, fmin,
143
+ fmax, classes_num):
144
+
145
+ super(PVT, self).__init__()
146
+
147
+ window = 'hann'
148
+ center = True
149
+ pad_mode = 'reflect'
150
+ ref = 1.0
151
+ amin = 1e-10
152
+ top_db = None
153
+
154
+ # Spectrogram extractor
155
+ self.spectrogram_extractor = Spectrogram(n_fft=window_size, hop_length=hop_size,
156
+ win_length=window_size, window=window, center=center, pad_mode=pad_mode,
157
+ freeze_parameters=True)
158
+
159
+ # Logmel feature extractor
160
+ self.logmel_extractor = LogmelFilterBank(sr=sample_rate, n_fft=window_size,
161
+ n_mels=mel_bins, fmin=fmin, fmax=fmax, ref=ref, amin=amin, top_db=top_db,
162
+ freeze_parameters=True)
163
+
164
+ self.time_shift = TimeShift(0, 10)
165
+ # Spec augmenter
166
+ self.spec_augmenter = SpecAugmentation(time_drop_width=64, time_stripes_num=2,
167
+ freq_drop_width=8, freq_stripes_num=2)
168
+
169
+ self.bn0 = nn.BatchNorm2d(64)
170
+ self.pvt_transformer = PyramidVisionTransformerV2(tdim=1001,
171
+ fdim=64,
172
+ patch_size=7,
173
+ stride=4,
174
+ in_chans=1,
175
+ num_classes=classes_num,
176
+ embed_dims=[64, 128, 320, 512],
177
+ depths=[3, 4, 6, 3],
178
+ num_heads=[1, 2, 5, 8],
179
+ mlp_ratios=[8, 8, 4, 4],
180
+ qkv_bias=True,
181
+ qk_scale=None,
182
+ drop_rate=0.0,
183
+ drop_path_rate=0.1,
184
+ sr_ratios=[8, 4, 2, 1],
185
+ norm_layer=partial(nn.LayerNorm, eps=1e-6),
186
+ num_stages=4,
187
+ #pretrained='https://github.com/whai362/PVT/releases/download/v2/pvt_v2_b2.pth'
188
+ )
189
+ #self.temp_pool = LinearSoftPool()
190
+ self.avgpool = nn.AdaptiveAvgPool1d(1)
191
+ self.fc_audioset = nn.Linear(512, classes_num, bias=True)
192
+
193
+ self.init_weights()
194
+
195
+ def init_weights(self):
196
+ init_bn(self.bn0)
197
+ init_layer(self.fc_audioset)
198
+
199
+ def forward(self, input, mixup_lambda=None):
200
+ """Input: (batch_size, times_steps, freq_bins)"""
201
+
202
+ interpolate_ratio = 32
203
+
204
+ x = self.spectrogram_extractor(input) # (batch_size, 1, time_steps, freq_bins)
205
+ x = self.logmel_extractor(x) # (batch_size, 1, time_steps, mel_bins)
206
+ frames_num = x.shape[2]
207
+ x = x.transpose(1, 3)
208
+ x = self.bn0(x)
209
+ x = x.transpose(1, 3)
210
+
211
+ if self.training:
212
+ x = self.time_shift(x)
213
+ x = self.spec_augmenter(x)
214
+
215
+ # Mixup on spectrogram
216
+ if self.training and mixup_lambda is not None:
217
+ x = do_mixup(x, mixup_lambda)
218
+ #print(x.shape) #torch.Size([10, 1, 1001, 64])
219
+ x = self.pvt_transformer(x)
220
+ #print(x.shape) #torch.Size([10, 800, 128])
221
+ x = torch.mean(x, dim=3)
222
+
223
+ x = x.transpose(1, 2).contiguous()
224
+ framewise_output = torch.sigmoid(self.fc_audioset(x))
225
+ #clipwise_output = torch.mean(framewise_output, dim=1)
226
+ #clipwise_output = self.temp_pool(x, framewise_output).clamp(1e-7, 1.).squeeze(1)
227
+ x = framewise_output.transpose(1, 2).contiguous()
228
+ x = self.avgpool(x)
229
+ clipwise_output = torch.flatten(x, 1)
230
+ #print(framewise_output.shape) #torch.Size([10, 100, 17])
231
+ framewise_output = interpolate(framewise_output, interpolate_ratio)
232
+ #framewise_output = framewise_output[:,:1000,:]
233
+ #framewise_output = pad_framewise_output(framewise_output, frames_num)
234
+ output_dict = {'framewise_output': framewise_output,
235
+ 'clipwise_output': clipwise_output}
236
+
237
+ return output_dict
238
+
239
+ class PVT2(nn.Module):
240
+ def __init__(self, sample_rate, window_size, hop_size, mel_bins, fmin,
241
+ fmax, classes_num):
242
+
243
+ super(PVT2, self).__init__()
244
+
245
+ window = 'hann'
246
+ center = True
247
+ pad_mode = 'reflect'
248
+ ref = 1.0
249
+ amin = 1e-10
250
+ top_db = None
251
+
252
+ # Spectrogram extractor
253
+ self.spectrogram_extractor = Spectrogram(n_fft=window_size, hop_length=hop_size,
254
+ win_length=window_size, window=window, center=center, pad_mode=pad_mode,
255
+ freeze_parameters=True)
256
+
257
+ # Logmel feature extractor
258
+ self.logmel_extractor = LogmelFilterBank(sr=sample_rate, n_fft=window_size,
259
+ n_mels=mel_bins, fmin=fmin, fmax=fmax, ref=ref, amin=amin, top_db=top_db,
260
+ freeze_parameters=True)
261
+
262
+ self.time_shift = TimeShift(0, 10)
263
+ # Spec augmenter
264
+ self.spec_augmenter = SpecAugmentation(time_drop_width=64, time_stripes_num=2,
265
+ freq_drop_width=8, freq_stripes_num=2)
266
+
267
+ self.bn0 = nn.BatchNorm2d(64)
268
+ self.pvt_transformer = PyramidVisionTransformerV2(tdim=1001,
269
+ fdim=64,
270
+ patch_size=7,
271
+ stride=4,
272
+ in_chans=1,
273
+ num_classes=classes_num,
274
+ embed_dims=[64, 128, 320, 512],
275
+ depths=[3, 4, 6, 3],
276
+ num_heads=[1, 2, 5, 8],
277
+ mlp_ratios=[8, 8, 4, 4],
278
+ qkv_bias=True,
279
+ qk_scale=None,
280
+ drop_rate=0.0,
281
+ drop_path_rate=0.1,
282
+ sr_ratios=[8, 4, 2, 1],
283
+ norm_layer=partial(nn.LayerNorm, eps=1e-6),
284
+ num_stages=4,
285
+ pretrained='https://github.com/whai362/PVT/releases/download/v2/pvt_v2_b2.pth'
286
+ )
287
+ #self.temp_pool = LinearSoftPool()
288
+ self.fc_audioset = nn.Linear(512, classes_num, bias=True)
289
+
290
+ self.init_weights()
291
+
292
+ def init_weights(self):
293
+ init_bn(self.bn0)
294
+ init_layer(self.fc_audioset)
295
+
296
+ def forward(self, input, mixup_lambda=None):
297
+ """Input: (batch_size, times_steps, freq_bins)"""
298
+
299
+ interpolate_ratio = 32
300
+
301
+ x = self.spectrogram_extractor(input) # (batch_size, 1, time_steps, freq_bins)
302
+ x = self.logmel_extractor(x) # (batch_size, 1, time_steps, mel_bins)
303
+ frames_num = x.shape[2]
304
+ x = x.transpose(1, 3)
305
+ x = self.bn0(x)
306
+ x = x.transpose(1, 3)
307
+
308
+ if self.training:
309
+ #x = self.time_shift(x)
310
+ x = self.spec_augmenter(x)
311
+
312
+ # Mixup on spectrogram
313
+ if self.training and mixup_lambda is not None:
314
+ x = do_mixup(x, mixup_lambda)
315
+ #print(x.shape) #torch.Size([10, 1, 1001, 64])
316
+ x = self.pvt_transformer(x)
317
+ #print(x.shape) #torch.Size([10, 800, 128])
318
+ x = torch.mean(x, dim=3)
319
+
320
+ x = x.transpose(1, 2).contiguous()
321
+ framewise_output = torch.sigmoid(self.fc_audioset(x))
322
+ clipwise_output = torch.mean(framewise_output, dim=1)
323
+ #clipwise_output = self.temp_pool(x, framewise_output).clamp(1e-7, 1.).squeeze(1)
324
+ #print(framewise_output.shape) #torch.Size([10, 100, 17])
325
+ framewise_output = interpolate(framewise_output, interpolate_ratio)
326
+ #framewise_output = framewise_output[:,:1000,:]
327
+ #framewise_output = pad_framewise_output(framewise_output, frames_num)
328
+ output_dict = {'framewise_output': framewise_output,
329
+ 'clipwise_output': clipwise_output}
330
+
331
+ return output_dict
332
+
333
+ class PVT_2layer(nn.Module):
334
+ def __init__(self, sample_rate, window_size, hop_size, mel_bins, fmin,
335
+ fmax, classes_num):
336
+
337
+ super(PVT_2layer, self).__init__()
338
+
339
+ window = 'hann'
340
+ center = True
341
+ pad_mode = 'reflect'
342
+ ref = 1.0
343
+ amin = 1e-10
344
+ top_db = None
345
+
346
+ # Spectrogram extractor
347
+ self.spectrogram_extractor = Spectrogram(n_fft=window_size, hop_length=hop_size,
348
+ win_length=window_size, window=window, center=center, pad_mode=pad_mode,
349
+ freeze_parameters=True)
350
+
351
+ # Logmel feature extractor
352
+ self.logmel_extractor = LogmelFilterBank(sr=sample_rate, n_fft=window_size,
353
+ n_mels=mel_bins, fmin=fmin, fmax=fmax, ref=ref, amin=amin, top_db=top_db,
354
+ freeze_parameters=True)
355
+
356
+ self.time_shift = TimeShift(0, 10)
357
+ # Spec augmenter
358
+ self.spec_augmenter = SpecAugmentation(time_drop_width=64, time_stripes_num=2,
359
+ freq_drop_width=8, freq_stripes_num=2)
360
+
361
+ self.bn0 = nn.BatchNorm2d(64)
362
+ self.pvt_transformer = PyramidVisionTransformerV2(tdim=1001,
363
+ fdim=64,
364
+ patch_size=7,
365
+ stride=4,
366
+ in_chans=1,
367
+ num_classes=classes_num,
368
+ embed_dims=[64, 128],
369
+ depths=[3, 4],
370
+ num_heads=[1, 2],
371
+ mlp_ratios=[8, 8],
372
+ qkv_bias=True,
373
+ qk_scale=None,
374
+ drop_rate=0.0,
375
+ drop_path_rate=0.1,
376
+ sr_ratios=[8, 4],
377
+ norm_layer=partial(nn.LayerNorm, eps=1e-6),
378
+ num_stages=2,
379
+ pretrained='https://github.com/whai362/PVT/releases/download/v2/pvt_v2_b2.pth'
380
+ )
381
+ #self.temp_pool = LinearSoftPool()
382
+ self.avgpool = nn.AdaptiveAvgPool1d(1)
383
+ self.fc_audioset = nn.Linear(128, classes_num, bias=True)
384
+
385
+ self.init_weights()
386
+
387
+ def init_weights(self):
388
+ init_bn(self.bn0)
389
+ init_layer(self.fc_audioset)
390
+
391
+ def forward(self, input, mixup_lambda=None):
392
+ """Input: (batch_size, times_steps, freq_bins)"""
393
+
394
+ interpolate_ratio = 8
395
+
396
+ x = self.spectrogram_extractor(input) # (batch_size, 1, time_steps, freq_bins)
397
+ x = self.logmel_extractor(x) # (batch_size, 1, time_steps, mel_bins)
398
+ frames_num = x.shape[2]
399
+ x = x.transpose(1, 3)
400
+ x = self.bn0(x)
401
+ x = x.transpose(1, 3)
402
+
403
+ if self.training:
404
+ x = self.time_shift(x)
405
+ x = self.spec_augmenter(x)
406
+
407
+ # Mixup on spectrogram
408
+ if self.training and mixup_lambda is not None:
409
+ x = do_mixup(x, mixup_lambda)
410
+ #print(x.shape) #torch.Size([10, 1, 1001, 64])
411
+ x = self.pvt_transformer(x)
412
+ #print(x.shape) #torch.Size([10, 800, 128])
413
+ x = torch.mean(x, dim=3)
414
+
415
+ x = x.transpose(1, 2).contiguous()
416
+ framewise_output = torch.sigmoid(self.fc_audioset(x))
417
+ #clipwise_output = torch.mean(framewise_output, dim=1)
418
+ #clipwise_output = self.temp_pool(x, framewise_output).clamp(1e-7, 1.).squeeze(1)
419
+ x = framewise_output.transpose(1, 2).contiguous()
420
+ x = self.avgpool(x)
421
+ clipwise_output = torch.flatten(x, 1)
422
+ #print(framewise_output.shape) #torch.Size([10, 100, 17])
423
+ framewise_output = interpolate(framewise_output, interpolate_ratio)
424
+ #framewise_output = framewise_output[:,:1000,:]
425
+ #framewise_output = pad_framewise_output(framewise_output, frames_num)
426
+ output_dict = {'framewise_output': framewise_output,
427
+ 'clipwise_output': clipwise_output}
428
+
429
+ return output_dict
430
+
431
+ class PVT_lr(nn.Module):
432
+ def __init__(self, sample_rate, window_size, hop_size, mel_bins, fmin,
433
+ fmax, classes_num):
434
+
435
+ super(PVT_lr, self).__init__()
436
+
437
+ window = 'hann'
438
+ center = True
439
+ pad_mode = 'reflect'
440
+ ref = 1.0
441
+ amin = 1e-10
442
+ top_db = None
443
+
444
+ # Spectrogram extractor
445
+ self.spectrogram_extractor = Spectrogram(n_fft=window_size, hop_length=hop_size,
446
+ win_length=window_size, window=window, center=center, pad_mode=pad_mode,
447
+ freeze_parameters=True)
448
+
449
+ # Logmel feature extractor
450
+ self.logmel_extractor = LogmelFilterBank(sr=sample_rate, n_fft=window_size,
451
+ n_mels=mel_bins, fmin=fmin, fmax=fmax, ref=ref, amin=amin, top_db=top_db,
452
+ freeze_parameters=True)
453
+
454
+ self.time_shift = TimeShift(0, 10)
455
+ # Spec augmenter
456
+ self.spec_augmenter = SpecAugmentation(time_drop_width=64, time_stripes_num=2,
457
+ freq_drop_width=8, freq_stripes_num=2)
458
+
459
+ self.bn0 = nn.BatchNorm2d(64)
460
+ self.pvt_transformer = PyramidVisionTransformerV2(tdim=1001,
461
+ fdim=64,
462
+ patch_size=7,
463
+ stride=4,
464
+ in_chans=1,
465
+ num_classes=classes_num,
466
+ embed_dims=[64, 128, 320, 512],
467
+ depths=[3, 4, 6, 3],
468
+ num_heads=[1, 2, 5, 8],
469
+ mlp_ratios=[8, 8, 4, 4],
470
+ qkv_bias=True,
471
+ qk_scale=None,
472
+ drop_rate=0.0,
473
+ drop_path_rate=0.1,
474
+ sr_ratios=[8, 4, 2, 1],
475
+ norm_layer=partial(nn.LayerNorm, eps=1e-6),
476
+ num_stages=4,
477
+ pretrained='https://github.com/whai362/PVT/releases/download/v2/pvt_v2_b2.pth'
478
+ )
479
+ self.temp_pool = LinearSoftPool()
480
+ self.fc_audioset = nn.Linear(512, classes_num, bias=True)
481
+
482
+ self.init_weights()
483
+
484
+ def init_weights(self):
485
+ init_bn(self.bn0)
486
+ init_layer(self.fc_audioset)
487
+
488
+ def forward(self, input, mixup_lambda=None):
489
+ """Input: (batch_size, times_steps, freq_bins)"""
490
+
491
+ interpolate_ratio = 32
492
+
493
+ x = self.spectrogram_extractor(input) # (batch_size, 1, time_steps, freq_bins)
494
+ x = self.logmel_extractor(x) # (batch_size, 1, time_steps, mel_bins)
495
+ frames_num = x.shape[2]
496
+ x = x.transpose(1, 3)
497
+ x = self.bn0(x)
498
+ x = x.transpose(1, 3)
499
+
500
+ if self.training:
501
+ x = self.time_shift(x)
502
+ x = self.spec_augmenter(x)
503
+
504
+ # Mixup on spectrogram
505
+ if self.training and mixup_lambda is not None:
506
+ x = do_mixup(x, mixup_lambda)
507
+ #print(x.shape) #torch.Size([10, 1, 1001, 64])
508
+ x = self.pvt_transformer(x)
509
+ #print(x.shape) #torch.Size([10, 800, 128])
510
+ x = torch.mean(x, dim=3)
511
+
512
+ x = x.transpose(1, 2).contiguous()
513
+ framewise_output = torch.sigmoid(self.fc_audioset(x))
514
+ clipwise_output = self.temp_pool(x, framewise_output).clamp(1e-7, 1.).squeeze(1)
515
+ #print(framewise_output.shape) #torch.Size([10, 100, 17])
516
+ framewise_output = interpolate(framewise_output, interpolate_ratio)
517
+ #framewise_output = framewise_output[:,:1000,:]
518
+ #framewise_output = pad_framewise_output(framewise_output, frames_num)
519
+ output_dict = {'framewise_output': framewise_output,
520
+ 'clipwise_output': clipwise_output}
521
+
522
+ return output_dict
523
+
524
+
525
+ class PVT_nopretrain(nn.Module):
526
+ def __init__(self, sample_rate, window_size, hop_size, mel_bins, fmin,
527
+ fmax, classes_num):
528
+
529
+ super(PVT_nopretrain, self).__init__()
530
+
531
+ window = 'hann'
532
+ center = True
533
+ pad_mode = 'reflect'
534
+ ref = 1.0
535
+ amin = 1e-10
536
+ top_db = None
537
+
538
+ # Spectrogram extractor
539
+ self.spectrogram_extractor = Spectrogram(n_fft=window_size, hop_length=hop_size,
540
+ win_length=window_size, window=window, center=center, pad_mode=pad_mode,
541
+ freeze_parameters=True)
542
+
543
+ # Logmel feature extractor
544
+ self.logmel_extractor = LogmelFilterBank(sr=sample_rate, n_fft=window_size,
545
+ n_mels=mel_bins, fmin=fmin, fmax=fmax, ref=ref, amin=amin, top_db=top_db,
546
+ freeze_parameters=True)
547
+
548
+ self.time_shift = TimeShift(0, 10)
549
+ # Spec augmenter
550
+ self.spec_augmenter = SpecAugmentation(time_drop_width=64, time_stripes_num=2,
551
+ freq_drop_width=8, freq_stripes_num=2)
552
+
553
+ self.bn0 = nn.BatchNorm2d(64)
554
+ self.pvt_transformer = PyramidVisionTransformerV2(tdim=1001,
555
+ fdim=64,
556
+ patch_size=7,
557
+ stride=4,
558
+ in_chans=1,
559
+ num_classes=classes_num,
560
+ embed_dims=[64, 128, 320, 512],
561
+ depths=[3, 4, 6, 3],
562
+ num_heads=[1, 2, 5, 8],
563
+ mlp_ratios=[8, 8, 4, 4],
564
+ qkv_bias=True,
565
+ qk_scale=None,
566
+ drop_rate=0.0,
567
+ drop_path_rate=0.1,
568
+ sr_ratios=[8, 4, 2, 1],
569
+ norm_layer=partial(nn.LayerNorm, eps=1e-6),
570
+ num_stages=4,
571
+ #pretrained='https://github.com/whai362/PVT/releases/download/v2/pvt_v2_b2.pth'
572
+ )
573
+ self.temp_pool = LinearSoftPool()
574
+ self.fc_audioset = nn.Linear(512, classes_num, bias=True)
575
+
576
+ self.init_weights()
577
+
578
+ def init_weights(self):
579
+ init_bn(self.bn0)
580
+ init_layer(self.fc_audioset)
581
+
582
+ def forward(self, input, mixup_lambda=None):
583
+ """Input: (batch_size, times_steps, freq_bins)"""
584
+
585
+ interpolate_ratio = 32
586
+
587
+ x = self.spectrogram_extractor(input) # (batch_size, 1, time_steps, freq_bins)
588
+ x = self.logmel_extractor(x) # (batch_size, 1, time_steps, mel_bins)
589
+ frames_num = x.shape[2]
590
+ x = x.transpose(1, 3)
591
+ x = self.bn0(x)
592
+ x = x.transpose(1, 3)
593
+
594
+ if self.training:
595
+ x = self.time_shift(x)
596
+ x = self.spec_augmenter(x)
597
+
598
+ # Mixup on spectrogram
599
+ if self.training and mixup_lambda is not None:
600
+ x = do_mixup(x, mixup_lambda)
601
+ #print(x.shape) #torch.Size([10, 1, 1001, 64])
602
+ x = self.pvt_transformer(x)
603
+ #print(x.shape) #torch.Size([10, 800, 128])
604
+ x = torch.mean(x, dim=3)
605
+
606
+ x = x.transpose(1, 2).contiguous()
607
+ framewise_output = torch.sigmoid(self.fc_audioset(x))
608
+ clipwise_output = self.temp_pool(x, framewise_output).clamp(1e-7, 1.).squeeze(1)
609
+ #print(framewise_output.shape) #torch.Size([10, 100, 17])
610
+ framewise_output = interpolate(framewise_output, interpolate_ratio)
611
+ framewise_output = framewise_output[:,:1000,:]
612
+ #framewise_output = pad_framewise_output(framewise_output, frames_num)
613
+ output_dict = {'framewise_output': framewise_output,
614
+ 'clipwise_output': clipwise_output}
615
+
616
+ return output_dict
617
+
618
+
619
+ class Mlp(nn.Module):
620
+ def __init__(self, in_features, hidden_features=None, out_features=None, act_layer=nn.GELU, drop=0., linear=False):
621
+ super().__init__()
622
+ out_features = out_features or in_features
623
+ hidden_features = hidden_features or in_features
624
+ self.fc1 = nn.Linear(in_features, hidden_features)
625
+ self.dwconv = DWConv(hidden_features)
626
+ self.act = act_layer()
627
+ self.fc2 = nn.Linear(hidden_features, out_features)
628
+ self.drop = nn.Dropout(drop)
629
+ self.linear = linear
630
+ if self.linear:
631
+ self.relu = nn.ReLU()
632
+ self.apply(self._init_weights)
633
+
634
+ def _init_weights(self, m):
635
+ if isinstance(m, nn.Linear):
636
+ trunc_normal_(m.weight, std=.02)
637
+ if isinstance(m, nn.Linear) and m.bias is not None:
638
+ nn.init.constant_(m.bias, 0)
639
+ elif isinstance(m, nn.LayerNorm):
640
+ nn.init.constant_(m.bias, 0)
641
+ nn.init.constant_(m.weight, 1.0)
642
+ elif isinstance(m, nn.Conv2d):
643
+ fan_out = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
644
+ fan_out //= m.groups
645
+ m.weight.data.normal_(0, math.sqrt(2.0 / fan_out))
646
+ if m.bias is not None:
647
+ m.bias.data.zero_()
648
+
649
+ def forward(self, x, H, W):
650
+ x = self.fc1(x)
651
+ if self.linear:
652
+ x = self.relu(x)
653
+ x = self.dwconv(x, H, W)
654
+ x = self.act(x)
655
+ x = self.drop(x)
656
+ x = self.fc2(x)
657
+ x = self.drop(x)
658
+ return x
659
+
660
+
661
+ class Attention(nn.Module):
662
+ def __init__(self, dim, num_heads=8, qkv_bias=False, qk_scale=None, attn_drop=0., proj_drop=0., sr_ratio=1, linear=False):
663
+ super().__init__()
664
+ assert dim % num_heads == 0, f"dim {dim} should be divided by num_heads {num_heads}."
665
+
666
+ self.dim = dim
667
+ self.num_heads = num_heads
668
+ head_dim = dim // num_heads
669
+ self.scale = qk_scale or head_dim ** -0.5
670
+
671
+ self.q = nn.Linear(dim, dim, bias=qkv_bias)
672
+ self.kv = nn.Linear(dim, dim * 2, bias=qkv_bias)
673
+ self.attn_drop = nn.Dropout(attn_drop)
674
+ self.proj = nn.Linear(dim, dim)
675
+ self.proj_drop = nn.Dropout(proj_drop)
676
+
677
+ self.linear = linear
678
+ self.sr_ratio = sr_ratio
679
+ if not linear:
680
+ if sr_ratio > 1:
681
+ self.sr = nn.Conv2d(dim, dim, kernel_size=sr_ratio, stride=sr_ratio)
682
+ self.norm = nn.LayerNorm(dim)
683
+ else:
684
+ self.pool = nn.AdaptiveAvgPool2d(7)
685
+ self.sr = nn.Conv2d(dim, dim, kernel_size=1, stride=1)
686
+ self.norm = nn.LayerNorm(dim)
687
+ self.act = nn.GELU()
688
+ self.apply(self._init_weights)
689
+
690
+ def _init_weights(self, m):
691
+ if isinstance(m, nn.Linear):
692
+ trunc_normal_(m.weight, std=.02)
693
+ if isinstance(m, nn.Linear) and m.bias is not None:
694
+ nn.init.constant_(m.bias, 0)
695
+ elif isinstance(m, nn.LayerNorm):
696
+ nn.init.constant_(m.bias, 0)
697
+ nn.init.constant_(m.weight, 1.0)
698
+ elif isinstance(m, nn.Conv2d):
699
+ fan_out = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
700
+ fan_out //= m.groups
701
+ m.weight.data.normal_(0, math.sqrt(2.0 / fan_out))
702
+ if m.bias is not None:
703
+ m.bias.data.zero_()
704
+
705
+ def forward(self, x, H, W):
706
+ B, N, C = x.shape
707
+ q = self.q(x).reshape(B, N, self.num_heads, C // self.num_heads).permute(0, 2, 1, 3)
708
+
709
+ if not self.linear:
710
+ if self.sr_ratio > 1:
711
+ x_ = x.permute(0, 2, 1).reshape(B, C, H, W)
712
+ x_ = self.sr(x_).reshape(B, C, -1).permute(0, 2, 1)
713
+ x_ = self.norm(x_)
714
+ kv = self.kv(x_).reshape(B, -1, 2, self.num_heads, C // self.num_heads).permute(2, 0, 3, 1, 4)
715
+ else:
716
+ kv = self.kv(x).reshape(B, -1, 2, self.num_heads, C // self.num_heads).permute(2, 0, 3, 1, 4)
717
+ else:
718
+ x_ = x.permute(0, 2, 1).reshape(B, C, H, W)
719
+ x_ = self.sr(self.pool(x_)).reshape(B, C, -1).permute(0, 2, 1)
720
+ x_ = self.norm(x_)
721
+ x_ = self.act(x_)
722
+ kv = self.kv(x_).reshape(B, -1, 2, self.num_heads, C // self.num_heads).permute(2, 0, 3, 1, 4)
723
+ k, v = kv[0], kv[1]
724
+
725
+ attn = (q @ k.transpose(-2, -1)) * self.scale
726
+ attn = attn.softmax(dim=-1)
727
+ attn = self.attn_drop(attn)
728
+
729
+ x = (attn @ v).transpose(1, 2).reshape(B, N, C)
730
+ x = self.proj(x)
731
+ x = self.proj_drop(x)
732
+
733
+ return x
734
+
735
+
736
+ class Pooling(nn.Module):
737
+ """
738
+ Implementation of pooling for PoolFormer
739
+ --pool_size: pooling size
740
+ """
741
+ def __init__(self, pool_size=3):
742
+ super().__init__()
743
+ self.pool = nn.AvgPool2d(
744
+ pool_size, stride=1, padding=pool_size//2, count_include_pad=False)
745
+
746
+ def forward(self, x):
747
+ return self.pool(x) - x
748
+
749
+ class Block(nn.Module):
750
+
751
+ def __init__(self, dim, num_heads, mlp_ratio=4., qkv_bias=False, qk_scale=None, drop=0., attn_drop=0.,
752
+ drop_path=0., act_layer=nn.GELU, norm_layer=nn.LayerNorm, sr_ratio=1, linear=False):
753
+ super().__init__()
754
+ self.norm1 = norm_layer(dim)
755
+ self.attn = Attention(
756
+ dim,
757
+ num_heads=num_heads, qkv_bias=qkv_bias, qk_scale=qk_scale,
758
+ attn_drop=attn_drop, proj_drop=drop, sr_ratio=sr_ratio, linear=linear)
759
+ #self.norm3 = norm_layer(dim)
760
+ #self.token_mixer = Pooling(pool_size=3)
761
+ # NOTE: drop path for stochastic depth, we shall see if this is better than dropout here
762
+ self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity()
763
+ self.norm2 = norm_layer(dim)
764
+ mlp_hidden_dim = int(dim * mlp_ratio)
765
+ self.mlp = Mlp(in_features=dim, hidden_features=mlp_hidden_dim, act_layer=act_layer, drop=drop, linear=linear)
766
+ self.apply(self._init_weights)
767
+
768
+ def _init_weights(self, m):
769
+ if isinstance(m, nn.Linear):
770
+ trunc_normal_(m.weight, std=.02)
771
+ if isinstance(m, nn.Linear) and m.bias is not None:
772
+ nn.init.constant_(m.bias, 0)
773
+ elif isinstance(m, nn.LayerNorm):
774
+ nn.init.constant_(m.bias, 0)
775
+ nn.init.constant_(m.weight, 1.0)
776
+ elif isinstance(m, nn.Conv2d):
777
+ fan_out = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
778
+ fan_out //= m.groups
779
+ m.weight.data.normal_(0, math.sqrt(2.0 / fan_out))
780
+ if m.bias is not None:
781
+ m.bias.data.zero_()
782
+
783
+ def forward(self, x, H, W):
784
+ x = x + self.drop_path(self.attn(self.norm1(x), H, W))
785
+ x = x + self.drop_path(self.mlp(self.norm2(x), H, W))
786
+ return x
787
+
788
+
789
+ class OverlapPatchEmbed(nn.Module):
790
+ """ Image to Patch Embedding
791
+ """
792
+
793
+ def __init__(self, tdim, fdim, patch_size=7, stride=4, in_chans=3, embed_dim=768):
794
+ super().__init__()
795
+ img_size = (tdim, fdim)
796
+ patch_size = to_2tuple(patch_size)
797
+
798
+ self.img_size = img_size
799
+ self.patch_size = patch_size
800
+ self.H, self.W = img_size[0] // stride, img_size[1] // stride
801
+ self.num_patches = self.H * self.W
802
+ self.proj = nn.Conv2d(in_chans, embed_dim, kernel_size=patch_size, stride=stride,
803
+ padding=(patch_size[0] // 3, patch_size[1] // 3))
804
+ self.norm = nn.LayerNorm(embed_dim)
805
+
806
+ self.apply(self._init_weights)
807
+
808
+ def _init_weights(self, m):
809
+ if isinstance(m, nn.Linear):
810
+ trunc_normal_(m.weight, std=.02)
811
+ if isinstance(m, nn.Linear) and m.bias is not None:
812
+ nn.init.constant_(m.bias, 0)
813
+ elif isinstance(m, nn.LayerNorm):
814
+ nn.init.constant_(m.bias, 0)
815
+ nn.init.constant_(m.weight, 1.0)
816
+ elif isinstance(m, nn.Conv2d):
817
+ fan_out = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
818
+ fan_out //= m.groups
819
+ m.weight.data.normal_(0, math.sqrt(2.0 / fan_out))
820
+ if m.bias is not None:
821
+ m.bias.data.zero_()
822
+
823
+ def forward(self, x):
824
+ x = self.proj(x)
825
+ _, _, H, W = x.shape
826
+ x = x.flatten(2).transpose(1, 2)
827
+ x = self.norm(x)
828
+
829
+ return x, H, W
830
+
831
+
832
+ class PyramidVisionTransformerV2(nn.Module):
833
+ def __init__(self, tdim=1001, fdim=64, patch_size=16, stride=4, in_chans=3, num_classes=1000, embed_dims=[64, 128, 256, 512],
834
+ num_heads=[1, 2, 4, 8], mlp_ratios=[4, 4, 4, 4], qkv_bias=False, qk_scale=None, drop_rate=0.,
835
+ attn_drop_rate=0., drop_path_rate=0.1, norm_layer=partial(nn.LayerNorm, eps=1e-6), depths=[3, 4, 6, 3],
836
+ sr_ratios=[8, 4, 2, 1], num_stages=2, linear=False, pretrained=None):
837
+ super().__init__()
838
+ # self.num_classes = num_classes
839
+ self.depths = depths
840
+ self.num_stages = num_stages
841
+ self.linear = linear
842
+
843
+ dpr = [x.item() for x in torch.linspace(0, drop_path_rate, sum(depths))] # stochastic depth decay rule
844
+ cur = 0
845
+
846
+ for i in range(num_stages):
847
+ patch_embed = OverlapPatchEmbed(tdim=tdim if i == 0 else tdim // (2 ** (i + 1)),
848
+ fdim=fdim if i == 0 else tdim // (2 ** (i + 1)),
849
+ patch_size=7 if i == 0 else 3,
850
+ stride=stride if i == 0 else 2,
851
+ in_chans=in_chans if i == 0 else embed_dims[i - 1],
852
+ embed_dim=embed_dims[i])
853
+ block = nn.ModuleList([Block(
854
+ dim=embed_dims[i], num_heads=num_heads[i], mlp_ratio=mlp_ratios[i], qkv_bias=qkv_bias,
855
+ qk_scale=qk_scale,
856
+ drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[cur + j], norm_layer=norm_layer,
857
+ sr_ratio=sr_ratios[i], linear=linear)
858
+ for j in range(depths[i])])
859
+ norm = norm_layer(embed_dims[i])
860
+ cur += depths[i]
861
+
862
+ setattr(self, f"patch_embed{i + 1}", patch_embed)
863
+ setattr(self, f"block{i + 1}", block)
864
+ setattr(self, f"norm{i + 1}", norm)
865
+ #self.n = nn.Linear(125, 250, bias=True)
866
+ # classification head
867
+ # self.head = nn.Linear(embed_dims[3], num_classes) if num_classes > 0 else nn.Identity()
868
+ self.apply(self._init_weights)
869
+ self.init_weights(pretrained)
870
+
871
+ def _init_weights(self, m):
872
+ if isinstance(m, nn.Linear):
873
+ trunc_normal_(m.weight, std=.02)
874
+ if isinstance(m, nn.Linear) and m.bias is not None:
875
+ nn.init.constant_(m.bias, 0)
876
+ elif isinstance(m, nn.LayerNorm):
877
+ nn.init.constant_(m.bias, 0)
878
+ nn.init.constant_(m.weight, 1.0)
879
+ elif isinstance(m, nn.Conv2d):
880
+ fan_out = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
881
+ fan_out //= m.groups
882
+ m.weight.data.normal_(0, math.sqrt(2.0 / fan_out))
883
+ if m.bias is not None:
884
+ m.bias.data.zero_()
885
+
886
+ def init_weights(self, pretrained=None):
887
+ if isinstance(pretrained, str):
888
+ logger = get_root_logger()
889
+ load_checkpoint(self, pretrained, map_location='cpu', strict=False, logger=logger)
890
+
891
+ def freeze_patch_emb(self):
892
+ self.patch_embed1.requires_grad = False
893
+
894
+ @torch.jit.ignore
895
+ def no_weight_decay(self):
896
+ return {'pos_embed1', 'pos_embed2', 'pos_embed3', 'pos_embed4', 'cls_token'} # has pos_embed may be better
897
+
898
+ def get_classifier(self):
899
+ return self.head
900
+
901
+ def reset_classifier(self, num_classes, global_pool=''):
902
+ self.num_classes = num_classes
903
+ self.head = nn.Linear(self.embed_dim, num_classes) if num_classes > 0 else nn.Identity()
904
+
905
+ def forward_features(self, x):
906
+ B = x.shape[0]
907
+
908
+ for i in range(self.num_stages):
909
+ patch_embed = getattr(self, f"patch_embed{i + 1}")
910
+ block = getattr(self, f"block{i + 1}")
911
+ norm = getattr(self, f"norm{i + 1}")
912
+ x, H, W = patch_embed(x)
913
+ #print(x.shape)
914
+ for blk in block:
915
+ x = blk(x, H, W)
916
+ #print(x.shape)
917
+ x = norm(x)
918
+ #if i != self.num_stages - 1:
919
+ x = x.reshape(B, H, W, -1).permute(0, 3, 1, 2).contiguous()
920
+ #print(x.shape)
921
+ return x
922
+
923
+ def forward(self, x):
924
+ x = self.forward_features(x)
925
+ # x = self.head(x)
926
+
927
+ return x
928
+
929
+ class DWConv(nn.Module):
930
+ def __init__(self, dim=768):
931
+ super(DWConv, self).__init__()
932
+ self.dwconv = nn.Conv2d(dim, dim, 3, 1, 1, bias=True, groups=dim)
933
+
934
+ def forward(self, x, H, W):
935
+ B, N, C = x.shape
936
+ x = x.transpose(1, 2).view(B, C, H, W)
937
+ x = self.dwconv(x)
938
+ x = x.flatten(2).transpose(1, 2)
939
+
940
+ return x
941
+
942
+
943
+ def _conv_filter(state_dict, patch_size=16):
944
+ """ convert patch embedding weight from manual patchify + linear proj to conv"""
945
+ out_dict = {}
946
+ for k, v in state_dict.items():
947
+ if 'patch_embed.proj.weight' in k:
948
+ v = v.reshape((v.shape[0], 3, patch_size, patch_size))
949
+ out_dict[k] = v
950
+
951
+ return out_dict
audio_detection/audio_infer/pytorch/pytorch_utils.py ADDED
@@ -0,0 +1,251 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import time
3
+ import torch
4
+ import torch.nn as nn
5
+
6
+
7
+ def move_data_to_device(x, device):
8
+ if 'float' in str(x.dtype):
9
+ x = torch.Tensor(x)
10
+ elif 'int' in str(x.dtype):
11
+ x = torch.LongTensor(x)
12
+ else:
13
+ return x
14
+
15
+ return x.to(device)
16
+
17
+
18
+ def do_mixup(x, mixup_lambda):
19
+ """Mixup x of even indexes (0, 2, 4, ...) with x of odd indexes
20
+ (1, 3, 5, ...).
21
+
22
+ Args:
23
+ x: (batch_size * 2, ...)
24
+ mixup_lambda: (batch_size * 2,)
25
+
26
+ Returns:
27
+ out: (batch_size, ...)
28
+ """
29
+ out = (x[0 :: 2].transpose(0, -1) * mixup_lambda[0 :: 2] + \
30
+ x[1 :: 2].transpose(0, -1) * mixup_lambda[1 :: 2]).transpose(0, -1)
31
+ return out
32
+
33
+
34
+ def append_to_dict(dict, key, value):
35
+ if key in dict.keys():
36
+ dict[key].append(value)
37
+ else:
38
+ dict[key] = [value]
39
+
40
+
41
+ def forward(model, generator, return_input=False,
42
+ return_target=False):
43
+ """Forward data to a model.
44
+
45
+ Args:
46
+ model: object
47
+ generator: object
48
+ return_input: bool
49
+ return_target: bool
50
+
51
+ Returns:
52
+ audio_name: (audios_num,)
53
+ clipwise_output: (audios_num, classes_num)
54
+ (ifexist) segmentwise_output: (audios_num, segments_num, classes_num)
55
+ (ifexist) framewise_output: (audios_num, frames_num, classes_num)
56
+ (optional) return_input: (audios_num, segment_samples)
57
+ (optional) return_target: (audios_num, classes_num)
58
+ """
59
+ output_dict = {}
60
+ device = next(model.parameters()).device
61
+ time1 = time.time()
62
+
63
+ # Forward data to a model in mini-batches
64
+ for n, batch_data_dict in enumerate(generator):
65
+ print(n)
66
+ batch_waveform = move_data_to_device(batch_data_dict['waveform'], device)
67
+
68
+ with torch.no_grad():
69
+ model.eval()
70
+ batch_output = model(batch_waveform)
71
+
72
+ append_to_dict(output_dict, 'audio_name', batch_data_dict['audio_name'])
73
+
74
+ append_to_dict(output_dict, 'clipwise_output',
75
+ batch_output['clipwise_output'].data.cpu().numpy())
76
+
77
+ if 'segmentwise_output' in batch_output.keys():
78
+ append_to_dict(output_dict, 'segmentwise_output',
79
+ batch_output['segmentwise_output'].data.cpu().numpy())
80
+
81
+ if 'framewise_output' in batch_output.keys():
82
+ append_to_dict(output_dict, 'framewise_output',
83
+ batch_output['framewise_output'].data.cpu().numpy())
84
+
85
+ if return_input:
86
+ append_to_dict(output_dict, 'waveform', batch_data_dict['waveform'])
87
+
88
+ if return_target:
89
+ if 'target' in batch_data_dict.keys():
90
+ append_to_dict(output_dict, 'target', batch_data_dict['target'])
91
+
92
+ if n % 10 == 0:
93
+ print(' --- Inference time: {:.3f} s / 10 iterations ---'.format(
94
+ time.time() - time1))
95
+ time1 = time.time()
96
+
97
+ for key in output_dict.keys():
98
+ output_dict[key] = np.concatenate(output_dict[key], axis=0)
99
+
100
+ return output_dict
101
+
102
+
103
+ def interpolate(x, ratio):
104
+ """Interpolate data in time domain. This is used to compensate the
105
+ resolution reduction in downsampling of a CNN.
106
+
107
+ Args:
108
+ x: (batch_size, time_steps, classes_num)
109
+ ratio: int, ratio to interpolate
110
+
111
+ Returns:
112
+ upsampled: (batch_size, time_steps * ratio, classes_num)
113
+ """
114
+ (batch_size, time_steps, classes_num) = x.shape
115
+ upsampled = x[:, :, None, :].repeat(1, 1, ratio, 1)
116
+ upsampled = upsampled.reshape(batch_size, time_steps * ratio, classes_num)
117
+ return upsampled
118
+
119
+
120
+ def pad_framewise_output(framewise_output, frames_num):
121
+ """Pad framewise_output to the same length as input frames. The pad value
122
+ is the same as the value of the last frame.
123
+
124
+ Args:
125
+ framewise_output: (batch_size, frames_num, classes_num)
126
+ frames_num: int, number of frames to pad
127
+
128
+ Outputs:
129
+ output: (batch_size, frames_num, classes_num)
130
+ """
131
+ pad = framewise_output[:, -1 :, :].repeat(1, frames_num - framewise_output.shape[1], 1)
132
+ """tensor for padding"""
133
+
134
+ output = torch.cat((framewise_output, pad), dim=1)
135
+ """(batch_size, frames_num, classes_num)"""
136
+
137
+ return output
138
+
139
+
140
+ def count_parameters(model):
141
+ return sum(p.numel() for p in model.parameters() if p.requires_grad)
142
+
143
+
144
+ def count_flops(model, audio_length):
145
+ """Count flops. Code modified from others' implementation.
146
+ """
147
+ multiply_adds = True
148
+ list_conv2d=[]
149
+ def conv2d_hook(self, input, output):
150
+ batch_size, input_channels, input_height, input_width = input[0].size()
151
+ output_channels, output_height, output_width = output[0].size()
152
+
153
+ kernel_ops = self.kernel_size[0] * self.kernel_size[1] * (self.in_channels / self.groups) * (2 if multiply_adds else 1)
154
+ bias_ops = 1 if self.bias is not None else 0
155
+
156
+ params = output_channels * (kernel_ops + bias_ops)
157
+ flops = batch_size * params * output_height * output_width
158
+
159
+ list_conv2d.append(flops)
160
+
161
+ list_conv1d=[]
162
+ def conv1d_hook(self, input, output):
163
+ batch_size, input_channels, input_length = input[0].size()
164
+ output_channels, output_length = output[0].size()
165
+
166
+ kernel_ops = self.kernel_size[0] * (self.in_channels / self.groups) * (2 if multiply_adds else 1)
167
+ bias_ops = 1 if self.bias is not None else 0
168
+
169
+ params = output_channels * (kernel_ops + bias_ops)
170
+ flops = batch_size * params * output_length
171
+
172
+ list_conv1d.append(flops)
173
+
174
+ list_linear=[]
175
+ def linear_hook(self, input, output):
176
+ batch_size = input[0].size(0) if input[0].dim() == 2 else 1
177
+
178
+ weight_ops = self.weight.nelement() * (2 if multiply_adds else 1)
179
+ bias_ops = self.bias.nelement()
180
+
181
+ flops = batch_size * (weight_ops + bias_ops)
182
+ list_linear.append(flops)
183
+
184
+ list_bn=[]
185
+ def bn_hook(self, input, output):
186
+ list_bn.append(input[0].nelement() * 2)
187
+
188
+ list_relu=[]
189
+ def relu_hook(self, input, output):
190
+ list_relu.append(input[0].nelement() * 2)
191
+
192
+ list_pooling2d=[]
193
+ def pooling2d_hook(self, input, output):
194
+ batch_size, input_channels, input_height, input_width = input[0].size()
195
+ output_channels, output_height, output_width = output[0].size()
196
+
197
+ kernel_ops = self.kernel_size * self.kernel_size
198
+ bias_ops = 0
199
+ params = output_channels * (kernel_ops + bias_ops)
200
+ flops = batch_size * params * output_height * output_width
201
+
202
+ list_pooling2d.append(flops)
203
+
204
+ list_pooling1d=[]
205
+ def pooling1d_hook(self, input, output):
206
+ batch_size, input_channels, input_length = input[0].size()
207
+ output_channels, output_length = output[0].size()
208
+
209
+ kernel_ops = self.kernel_size[0]
210
+ bias_ops = 0
211
+
212
+ params = output_channels * (kernel_ops + bias_ops)
213
+ flops = batch_size * params * output_length
214
+
215
+ list_pooling2d.append(flops)
216
+
217
+ def foo(net):
218
+ childrens = list(net.children())
219
+ if not childrens:
220
+ if isinstance(net, nn.Conv2d):
221
+ net.register_forward_hook(conv2d_hook)
222
+ elif isinstance(net, nn.Conv1d):
223
+ net.register_forward_hook(conv1d_hook)
224
+ elif isinstance(net, nn.Linear):
225
+ net.register_forward_hook(linear_hook)
226
+ elif isinstance(net, nn.BatchNorm2d) or isinstance(net, nn.BatchNorm1d):
227
+ net.register_forward_hook(bn_hook)
228
+ elif isinstance(net, nn.ReLU):
229
+ net.register_forward_hook(relu_hook)
230
+ elif isinstance(net, nn.AvgPool2d) or isinstance(net, nn.MaxPool2d):
231
+ net.register_forward_hook(pooling2d_hook)
232
+ elif isinstance(net, nn.AvgPool1d) or isinstance(net, nn.MaxPool1d):
233
+ net.register_forward_hook(pooling1d_hook)
234
+ else:
235
+ print('Warning: flop of module {} is not counted!'.format(net))
236
+ return
237
+ for c in childrens:
238
+ foo(c)
239
+
240
+ # Register hook
241
+ foo(model)
242
+
243
+ device = device = next(model.parameters()).device
244
+ input = torch.rand(1, audio_length).to(device)
245
+
246
+ out = model(input)
247
+
248
+ total_flops = sum(list_conv2d) + sum(list_conv1d) + sum(list_linear) + \
249
+ sum(list_bn) + sum(list_relu) + sum(list_pooling2d) + sum(list_pooling1d)
250
+
251
+ return total_flops
audio_detection/audio_infer/results/YDlWd7Wmdi1E.png ADDED
audio_detection/audio_infer/useful_ckpts/audio_detection.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9f909808f17d424dc29063a21953ff2be103489518a4f60a6c649d2e3e7d3e81
3
+ size 441042195
audio_detection/audio_infer/utils/__pycache__/config.cpython-38.pyc ADDED
Binary file (6.33 kB). View file
audio_detection/audio_infer/utils/config.py ADDED
@@ -0,0 +1,94 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import csv
3
+
4
+ sample_rate = 32000
5
+ clip_samples = sample_rate * 10 # Audio clips are 10-second
6
+
7
+ # Load label
8
+ with open('./audio_detection/audio_infer/metadata/class_labels_indices.csv', 'r') as f:
9
+ reader = csv.reader(f, delimiter=',')
10
+ lines = list(reader)
11
+
12
+ labels = []
13
+ ids = [] # Each label has a unique id such as "/m/068hy"
14
+ for i1 in range(1, len(lines)):
15
+ id = lines[i1][1]
16
+ label = lines[i1][2]
17
+ ids.append(id)
18
+ labels.append(label)
19
+
20
+ classes_num = len(labels)
21
+
22
+ lb_to_ix = {label : i for i, label in enumerate(labels)}
23
+ ix_to_lb = {i : label for i, label in enumerate(labels)}
24
+
25
+ id_to_ix = {id : i for i, id in enumerate(ids)}
26
+ ix_to_id = {i : id for i, id in enumerate(ids)}
27
+
28
+ full_samples_per_class = np.array([
29
+ 937432, 16344, 7822, 10271, 2043, 14420, 733, 1511,
30
+ 1258, 424, 1751, 704, 369, 590, 1063, 1375,
31
+ 5026, 743, 853, 1648, 714, 1497, 1251, 2139,
32
+ 1093, 133, 224, 39469, 6423, 407, 1559, 4546,
33
+ 6826, 7464, 2468, 549, 4063, 334, 587, 238,
34
+ 1766, 691, 114, 2153, 236, 209, 421, 740,
35
+ 269, 959, 137, 4192, 485, 1515, 655, 274,
36
+ 69, 157, 1128, 807, 1022, 346, 98, 680,
37
+ 890, 352, 4169, 2061, 1753, 9883, 1339, 708,
38
+ 37857, 18504, 12864, 2475, 2182, 757, 3624, 677,
39
+ 1683, 3583, 444, 1780, 2364, 409, 4060, 3097,
40
+ 3143, 502, 723, 600, 230, 852, 1498, 1865,
41
+ 1879, 2429, 5498, 5430, 2139, 1761, 1051, 831,
42
+ 2401, 2258, 1672, 1711, 987, 646, 794, 25061,
43
+ 5792, 4256, 96, 8126, 2740, 752, 513, 554,
44
+ 106, 254, 1592, 556, 331, 615, 2841, 737,
45
+ 265, 1349, 358, 1731, 1115, 295, 1070, 972,
46
+ 174, 937780, 112337, 42509, 49200, 11415, 6092, 13851,
47
+ 2665, 1678, 13344, 2329, 1415, 2244, 1099, 5024,
48
+ 9872, 10948, 4409, 2732, 1211, 1289, 4807, 5136,
49
+ 1867, 16134, 14519, 3086, 19261, 6499, 4273, 2790,
50
+ 8820, 1228, 1575, 4420, 3685, 2019, 664, 324,
51
+ 513, 411, 436, 2997, 5162, 3806, 1389, 899,
52
+ 8088, 7004, 1105, 3633, 2621, 9753, 1082, 26854,
53
+ 3415, 4991, 2129, 5546, 4489, 2850, 1977, 1908,
54
+ 1719, 1106, 1049, 152, 136, 802, 488, 592,
55
+ 2081, 2712, 1665, 1128, 250, 544, 789, 2715,
56
+ 8063, 7056, 2267, 8034, 6092, 3815, 1833, 3277,
57
+ 8813, 2111, 4662, 2678, 2954, 5227, 1472, 2591,
58
+ 3714, 1974, 1795, 4680, 3751, 6585, 2109, 36617,
59
+ 6083, 16264, 17351, 3449, 5034, 3931, 2599, 4134,
60
+ 3892, 2334, 2211, 4516, 2766, 2862, 3422, 1788,
61
+ 2544, 2403, 2892, 4042, 3460, 1516, 1972, 1563,
62
+ 1579, 2776, 1647, 4535, 3921, 1261, 6074, 2922,
63
+ 3068, 1948, 4407, 712, 1294, 1019, 1572, 3764,
64
+ 5218, 975, 1539, 6376, 1606, 6091, 1138, 1169,
65
+ 7925, 3136, 1108, 2677, 2680, 1383, 3144, 2653,
66
+ 1986, 1800, 1308, 1344, 122231, 12977, 2552, 2678,
67
+ 7824, 768, 8587, 39503, 3474, 661, 430, 193,
68
+ 1405, 1442, 3588, 6280, 10515, 785, 710, 305,
69
+ 206, 4990, 5329, 3398, 1771, 3022, 6907, 1523,
70
+ 8588, 12203, 666, 2113, 7916, 434, 1636, 5185,
71
+ 1062, 664, 952, 3490, 2811, 2749, 2848, 15555,
72
+ 363, 117, 1494, 1647, 5886, 4021, 633, 1013,
73
+ 5951, 11343, 2324, 243, 372, 943, 734, 242,
74
+ 3161, 122, 127, 201, 1654, 768, 134, 1467,
75
+ 642, 1148, 2156, 1368, 1176, 302, 1909, 61,
76
+ 223, 1812, 287, 422, 311, 228, 748, 230,
77
+ 1876, 539, 1814, 737, 689, 1140, 591, 943,
78
+ 353, 289, 198, 490, 7938, 1841, 850, 457,
79
+ 814, 146, 551, 728, 1627, 620, 648, 1621,
80
+ 2731, 535, 88, 1736, 736, 328, 293, 3170,
81
+ 344, 384, 7640, 433, 215, 715, 626, 128,
82
+ 3059, 1833, 2069, 3732, 1640, 1508, 836, 567,
83
+ 2837, 1151, 2068, 695, 1494, 3173, 364, 88,
84
+ 188, 740, 677, 273, 1533, 821, 1091, 293,
85
+ 647, 318, 1202, 328, 532, 2847, 526, 721,
86
+ 370, 258, 956, 1269, 1641, 339, 1322, 4485,
87
+ 286, 1874, 277, 757, 1393, 1330, 380, 146,
88
+ 377, 394, 318, 339, 1477, 1886, 101, 1435,
89
+ 284, 1425, 686, 621, 221, 117, 87, 1340,
90
+ 201, 1243, 1222, 651, 1899, 421, 712, 1016,
91
+ 1279, 124, 351, 258, 7043, 368, 666, 162,
92
+ 7664, 137, 70159, 26179, 6321, 32236, 33320, 771,
93
+ 1169, 269, 1103, 444, 364, 2710, 121, 751,
94
+ 1609, 855, 1141, 2287, 1940, 3943, 289])
audio_detection/audio_infer/utils/crash.py ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sys
2
+
3
+ class ExceptionHook:
4
+ instance = None
5
+ def __call__(self, *args, **kwargs):
6
+ if self.instance is None:
7
+ from IPython.core import ultratb
8
+ self.instance = ultratb.FormattedTB(mode='Plain',
9
+ color_scheme='Linux', call_pdb=1)
10
+ return self.instance(*args, **kwargs)
11
+
12
+ sys.excepthook = ExceptionHook()
audio_detection/audio_infer/utils/create_black_list.py ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ import csv
3
+ import os
4
+
5
+ from utilities import create_folder
6
+
7
+
8
+ def dcase2017task4(args):
9
+ """Create black list. Black list is a list of audio ids that will be
10
+ skipped in training.
11
+ """
12
+
13
+ # Augments & parameters
14
+ workspace = args.workspace
15
+
16
+ # Black list from DCASE 2017 Task 4
17
+ test_weak_csv = 'metadata/black_list/groundtruth_weak_label_testing_set.csv'
18
+ evaluation_weak_csv = 'metadata/black_list/groundtruth_weak_label_evaluation_set.csv'
19
+
20
+ black_list_csv = os.path.join(workspace, 'black_list', 'dcase2017task4.csv')
21
+ create_folder(os.path.dirname(black_list_csv))
22
+
23
+ def get_id_sets(csv_path):
24
+ with open(csv_path, 'r') as fr:
25
+ reader = csv.reader(fr, delimiter='\t')
26
+ lines = list(reader)
27
+
28
+ ids_set = []
29
+
30
+ for line in lines:
31
+ """line: ['-5QrBL6MzLg_60.000_70.000.wav', '60.000', '70.000', 'Train horn']"""
32
+ ids_set.append(line[0][0 : 11])
33
+
34
+ ids_set = list(set(ids_set))
35
+ return ids_set
36
+
37
+ test_ids_set = get_id_sets(test_weak_csv)
38
+ evaluation_ids_set = get_id_sets(evaluation_weak_csv)
39
+
40
+ full_ids_set = test_ids_set + evaluation_ids_set
41
+
42
+ # Write black list
43
+ fw = open(black_list_csv, 'w')
44
+
45
+ for id in full_ids_set:
46
+ fw.write('{}\n'.format(id))
47
+
48
+ print('Write black list to {}'.format(black_list_csv))
49
+
50
+
51
+ if __name__ == '__main__':
52
+ parser = argparse.ArgumentParser(description='')
53
+ subparsers = parser.add_subparsers(dest='mode')
54
+
55
+ parser_dcase2017task4 = subparsers.add_parser('dcase2017task4')
56
+ parser_dcase2017task4.add_argument('--workspace', type=str, required=True)
57
+
58
+ args = parser.parse_args()
59
+
60
+ if args.mode == 'dcase2017task4':
61
+ dcase2017task4(args)
62
+
63
+ else:
64
+ raise Exception('Error argument!')
audio_detection/audio_infer/utils/create_indexes.py ADDED
@@ -0,0 +1,126 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import argparse
3
+ import csv
4
+ import os
5
+ import glob
6
+ import datetime
7
+ import time
8
+ import logging
9
+ import h5py
10
+ import librosa
11
+
12
+ from utilities import create_folder, get_sub_filepaths
13
+ import config
14
+
15
+
16
+ def create_indexes(args):
17
+ """Create indexes a for dataloader to read for training. When users have
18
+ a new task and their own data, they need to create similar indexes. The
19
+ indexes contain meta information of "where to find the data for training".
20
+ """
21
+
22
+ # Arguments & parameters
23
+ waveforms_hdf5_path = args.waveforms_hdf5_path
24
+ indexes_hdf5_path = args.indexes_hdf5_path
25
+
26
+ # Paths
27
+ create_folder(os.path.dirname(indexes_hdf5_path))
28
+
29
+ with h5py.File(waveforms_hdf5_path, 'r') as hr:
30
+ with h5py.File(indexes_hdf5_path, 'w') as hw:
31
+ audios_num = len(hr['audio_name'])
32
+ hw.create_dataset('audio_name', data=hr['audio_name'][:], dtype='S20')
33
+ hw.create_dataset('target', data=hr['target'][:], dtype=np.bool)
34
+ hw.create_dataset('hdf5_path', data=[waveforms_hdf5_path.encode()] * audios_num, dtype='S200')
35
+ hw.create_dataset('index_in_hdf5', data=np.arange(audios_num), dtype=np.int32)
36
+
37
+ print('Write to {}'.format(indexes_hdf5_path))
38
+
39
+
40
+ def combine_full_indexes(args):
41
+ """Combine all balanced and unbalanced indexes hdf5s to a single hdf5. This
42
+ combined indexes hdf5 is used for training with full data (~20k balanced
43
+ audio clips + ~1.9m unbalanced audio clips).
44
+ """
45
+
46
+ # Arguments & parameters
47
+ indexes_hdf5s_dir = args.indexes_hdf5s_dir
48
+ full_indexes_hdf5_path = args.full_indexes_hdf5_path
49
+
50
+ classes_num = config.classes_num
51
+
52
+ # Paths
53
+ paths = get_sub_filepaths(indexes_hdf5s_dir)
54
+ paths = [path for path in paths if (
55
+ 'train' in path and 'full_train' not in path and 'mini' not in path)]
56
+
57
+ print('Total {} hdf5 to combine.'.format(len(paths)))
58
+
59
+ with h5py.File(full_indexes_hdf5_path, 'w') as full_hf:
60
+ full_hf.create_dataset(
61
+ name='audio_name',
62
+ shape=(0,),
63
+ maxshape=(None,),
64
+ dtype='S20')
65
+
66
+ full_hf.create_dataset(
67
+ name='target',
68
+ shape=(0, classes_num),
69
+ maxshape=(None, classes_num),
70
+ dtype=np.bool)
71
+
72
+ full_hf.create_dataset(
73
+ name='hdf5_path',
74
+ shape=(0,),
75
+ maxshape=(None,),
76
+ dtype='S200')
77
+
78
+ full_hf.create_dataset(
79
+ name='index_in_hdf5',
80
+ shape=(0,),
81
+ maxshape=(None,),
82
+ dtype=np.int32)
83
+
84
+ for path in paths:
85
+ with h5py.File(path, 'r') as part_hf:
86
+ print(path)
87
+ n = len(full_hf['audio_name'][:])
88
+ new_n = n + len(part_hf['audio_name'][:])
89
+
90
+ full_hf['audio_name'].resize((new_n,))
91
+ full_hf['audio_name'][n : new_n] = part_hf['audio_name'][:]
92
+
93
+ full_hf['target'].resize((new_n, classes_num))
94
+ full_hf['target'][n : new_n] = part_hf['target'][:]
95
+
96
+ full_hf['hdf5_path'].resize((new_n,))
97
+ full_hf['hdf5_path'][n : new_n] = part_hf['hdf5_path'][:]
98
+
99
+ full_hf['index_in_hdf5'].resize((new_n,))
100
+ full_hf['index_in_hdf5'][n : new_n] = part_hf['index_in_hdf5'][:]
101
+
102
+ print('Write combined full hdf5 to {}'.format(full_indexes_hdf5_path))
103
+
104
+
105
+ if __name__ == '__main__':
106
+ parser = argparse.ArgumentParser()
107
+ subparsers = parser.add_subparsers(dest='mode')
108
+
109
+ parser_create_indexes = subparsers.add_parser('create_indexes')
110
+ parser_create_indexes.add_argument('--waveforms_hdf5_path', type=str, required=True, help='Path of packed waveforms hdf5.')
111
+ parser_create_indexes.add_argument('--indexes_hdf5_path', type=str, required=True, help='Path to write out indexes hdf5.')
112
+
113
+ parser_combine_full_indexes = subparsers.add_parser('combine_full_indexes')
114
+ parser_combine_full_indexes.add_argument('--indexes_hdf5s_dir', type=str, required=True, help='Directory containing indexes hdf5s to be combined.')
115
+ parser_combine_full_indexes.add_argument('--full_indexes_hdf5_path', type=str, required=True, help='Path to write out full indexes hdf5 file.')
116
+
117
+ args = parser.parse_args()
118
+
119
+ if args.mode == 'create_indexes':
120
+ create_indexes(args)
121
+
122
+ elif args.mode == 'combine_full_indexes':
123
+ combine_full_indexes(args)
124
+
125
+ else:
126
+ raise Exception('Incorrect arguments!')
audio_detection/audio_infer/utils/data_generator.py ADDED
@@ -0,0 +1,421 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import h5py
3
+ import csv
4
+ import time
5
+ import logging
6
+
7
+ from utilities import int16_to_float32
8
+
9
+
10
+ def read_black_list(black_list_csv):
11
+ """Read audio names from black list.
12
+ """
13
+ with open(black_list_csv, 'r') as fr:
14
+ reader = csv.reader(fr)
15
+ lines = list(reader)
16
+
17
+ black_list_names = ['Y{}.wav'.format(line[0]) for line in lines]
18
+ return black_list_names
19
+
20
+
21
+ class AudioSetDataset(object):
22
+ def __init__(self, sample_rate=32000):
23
+ """This class takes the meta of an audio clip as input, and return
24
+ the waveform and target of the audio clip. This class is used by DataLoader.
25
+ """
26
+ self.sample_rate = sample_rate
27
+
28
+ def __getitem__(self, meta):
29
+ """Load waveform and target of an audio clip.
30
+
31
+ Args:
32
+ meta: {
33
+ 'hdf5_path': str,
34
+ 'index_in_hdf5': int}
35
+
36
+ Returns:
37
+ data_dict: {
38
+ 'audio_name': str,
39
+ 'waveform': (clip_samples,),
40
+ 'target': (classes_num,)}
41
+ """
42
+ hdf5_path = meta['hdf5_path']
43
+ index_in_hdf5 = meta['index_in_hdf5']
44
+ with h5py.File(hdf5_path, 'r') as hf:
45
+ audio_name = hf['audio_name'][index_in_hdf5].decode()
46
+ waveform = int16_to_float32(hf['waveform'][index_in_hdf5])
47
+ waveform = self.resample(waveform)
48
+ target = hf['target'][index_in_hdf5].astype(np.float32)
49
+
50
+ data_dict = {
51
+ 'audio_name': audio_name, 'waveform': waveform, 'target': target}
52
+
53
+ return data_dict
54
+
55
+ def resample(self, waveform):
56
+ """Resample.
57
+
58
+ Args:
59
+ waveform: (clip_samples,)
60
+
61
+ Returns:
62
+ (resampled_clip_samples,)
63
+ """
64
+ if self.sample_rate == 32000:
65
+ return waveform
66
+ elif self.sample_rate == 16000:
67
+ return waveform[0 :: 2]
68
+ elif self.sample_rate == 8000:
69
+ return waveform[0 :: 4]
70
+ else:
71
+ raise Exception('Incorrect sample rate!')
72
+
73
+
74
+ class Base(object):
75
+ def __init__(self, indexes_hdf5_path, batch_size, black_list_csv, random_seed):
76
+ """Base class of train sampler.
77
+
78
+ Args:
79
+ indexes_hdf5_path: string
80
+ batch_size: int
81
+ black_list_csv: string
82
+ random_seed: int
83
+ """
84
+ self.batch_size = batch_size
85
+ self.random_state = np.random.RandomState(random_seed)
86
+
87
+ # Black list
88
+ if black_list_csv:
89
+ self.black_list_names = read_black_list(black_list_csv)
90
+ else:
91
+ self.black_list_names = []
92
+
93
+ logging.info('Black list samples: {}'.format(len(self.black_list_names)))
94
+
95
+ # Load target
96
+ load_time = time.time()
97
+
98
+ with h5py.File(indexes_hdf5_path, 'r') as hf:
99
+ self.audio_names = [audio_name.decode() for audio_name in hf['audio_name'][:]]
100
+ self.hdf5_paths = [hdf5_path.decode() for hdf5_path in hf['hdf5_path'][:]]
101
+ self.indexes_in_hdf5 = hf['index_in_hdf5'][:]
102
+ self.targets = hf['target'][:].astype(np.float32)
103
+
104
+ (self.audios_num, self.classes_num) = self.targets.shape
105
+ logging.info('Training number: {}'.format(self.audios_num))
106
+ logging.info('Load target time: {:.3f} s'.format(time.time() - load_time))
107
+
108
+
109
+ class TrainSampler(Base):
110
+ def __init__(self, indexes_hdf5_path, batch_size, black_list_csv=None,
111
+ random_seed=1234):
112
+ """Balanced sampler. Generate batch meta for training.
113
+
114
+ Args:
115
+ indexes_hdf5_path: string
116
+ batch_size: int
117
+ black_list_csv: string
118
+ random_seed: int
119
+ """
120
+ super(TrainSampler, self).__init__(indexes_hdf5_path, batch_size,
121
+ black_list_csv, random_seed)
122
+
123
+ self.indexes = np.arange(self.audios_num)
124
+
125
+ # Shuffle indexes
126
+ self.random_state.shuffle(self.indexes)
127
+
128
+ self.pointer = 0
129
+
130
+ def __iter__(self):
131
+ """Generate batch meta for training.
132
+
133
+ Returns:
134
+ batch_meta: e.g.: [
135
+ {'hdf5_path': string, 'index_in_hdf5': int},
136
+ ...]
137
+ """
138
+ batch_size = self.batch_size
139
+
140
+ while True:
141
+ batch_meta = []
142
+ i = 0
143
+ while i < batch_size:
144
+ index = self.indexes[self.pointer]
145
+ self.pointer += 1
146
+
147
+ # Shuffle indexes and reset pointer
148
+ if self.pointer >= self.audios_num:
149
+ self.pointer = 0
150
+ self.random_state.shuffle(self.indexes)
151
+
152
+ # If audio in black list then continue
153
+ if self.audio_names[index] in self.black_list_names:
154
+ continue
155
+ else:
156
+ batch_meta.append({
157
+ 'hdf5_path': self.hdf5_paths[index],
158
+ 'index_in_hdf5': self.indexes_in_hdf5[index]})
159
+ i += 1
160
+
161
+ yield batch_meta
162
+
163
+ def state_dict(self):
164
+ state = {
165
+ 'indexes': self.indexes,
166
+ 'pointer': self.pointer}
167
+ return state
168
+
169
+ def load_state_dict(self, state):
170
+ self.indexes = state['indexes']
171
+ self.pointer = state['pointer']
172
+
173
+
174
+ class BalancedTrainSampler(Base):
175
+ def __init__(self, indexes_hdf5_path, batch_size, black_list_csv=None,
176
+ random_seed=1234):
177
+ """Balanced sampler. Generate batch meta for training. Data are equally
178
+ sampled from different sound classes.
179
+
180
+ Args:
181
+ indexes_hdf5_path: string
182
+ batch_size: int
183
+ black_list_csv: string
184
+ random_seed: int
185
+ """
186
+ super(BalancedTrainSampler, self).__init__(indexes_hdf5_path,
187
+ batch_size, black_list_csv, random_seed)
188
+
189
+ self.samples_num_per_class = np.sum(self.targets, axis=0)
190
+ logging.info('samples_num_per_class: {}'.format(
191
+ self.samples_num_per_class.astype(np.int32)))
192
+
193
+ # Training indexes of all sound classes. E.g.:
194
+ # [[0, 11, 12, ...], [3, 4, 15, 16, ...], [7, 8, ...], ...]
195
+ self.indexes_per_class = []
196
+
197
+ for k in range(self.classes_num):
198
+ self.indexes_per_class.append(
199
+ np.where(self.targets[:, k] == 1)[0])
200
+
201
+ # Shuffle indexes
202
+ for k in range(self.classes_num):
203
+ self.random_state.shuffle(self.indexes_per_class[k])
204
+
205
+ self.queue = []
206
+ self.pointers_of_classes = [0] * self.classes_num
207
+
208
+ def expand_queue(self, queue):
209
+ classes_set = np.arange(self.classes_num).tolist()
210
+ self.random_state.shuffle(classes_set)
211
+ queue += classes_set
212
+ return queue
213
+
214
+ def __iter__(self):
215
+ """Generate batch meta for training.
216
+
217
+ Returns:
218
+ batch_meta: e.g.: [
219
+ {'hdf5_path': string, 'index_in_hdf5': int},
220
+ ...]
221
+ """
222
+ batch_size = self.batch_size
223
+
224
+ while True:
225
+ batch_meta = []
226
+ i = 0
227
+ while i < batch_size:
228
+ if len(self.queue) == 0:
229
+ self.queue = self.expand_queue(self.queue)
230
+
231
+ class_id = self.queue.pop(0)
232
+ pointer = self.pointers_of_classes[class_id]
233
+ self.pointers_of_classes[class_id] += 1
234
+ index = self.indexes_per_class[class_id][pointer]
235
+
236
+ # When finish one epoch of a sound class, then shuffle its indexes and reset pointer
237
+ if self.pointers_of_classes[class_id] >= self.samples_num_per_class[class_id]:
238
+ self.pointers_of_classes[class_id] = 0
239
+ self.random_state.shuffle(self.indexes_per_class[class_id])
240
+
241
+ # If audio in black list then continue
242
+ if self.audio_names[index] in self.black_list_names:
243
+ continue
244
+ else:
245
+ batch_meta.append({
246
+ 'hdf5_path': self.hdf5_paths[index],
247
+ 'index_in_hdf5': self.indexes_in_hdf5[index]})
248
+ i += 1
249
+
250
+ yield batch_meta
251
+
252
+ def state_dict(self):
253
+ state = {
254
+ 'indexes_per_class': self.indexes_per_class,
255
+ 'queue': self.queue,
256
+ 'pointers_of_classes': self.pointers_of_classes}
257
+ return state
258
+
259
+ def load_state_dict(self, state):
260
+ self.indexes_per_class = state['indexes_per_class']
261
+ self.queue = state['queue']
262
+ self.pointers_of_classes = state['pointers_of_classes']
263
+
264
+
265
+ class AlternateTrainSampler(Base):
266
+ def __init__(self, indexes_hdf5_path, batch_size, black_list_csv=None,
267
+ random_seed=1234):
268
+ """AlternateSampler is a combination of Sampler and Balanced Sampler.
269
+ AlternateSampler alternately sample data from Sampler and Blanced Sampler.
270
+
271
+ Args:
272
+ indexes_hdf5_path: string
273
+ batch_size: int
274
+ black_list_csv: string
275
+ random_seed: int
276
+ """
277
+ self.sampler1 = TrainSampler(indexes_hdf5_path, batch_size,
278
+ black_list_csv, random_seed)
279
+
280
+ self.sampler2 = BalancedTrainSampler(indexes_hdf5_path, batch_size,
281
+ black_list_csv, random_seed)
282
+
283
+ self.batch_size = batch_size
284
+ self.count = 0
285
+
286
+ def __iter__(self):
287
+ """Generate batch meta for training.
288
+
289
+ Returns:
290
+ batch_meta: e.g.: [
291
+ {'hdf5_path': string, 'index_in_hdf5': int},
292
+ ...]
293
+ """
294
+ batch_size = self.batch_size
295
+
296
+ while True:
297
+ self.count += 1
298
+
299
+ if self.count % 2 == 0:
300
+ batch_meta = []
301
+ i = 0
302
+ while i < batch_size:
303
+ index = self.sampler1.indexes[self.sampler1.pointer]
304
+ self.sampler1.pointer += 1
305
+
306
+ # Shuffle indexes and reset pointer
307
+ if self.sampler1.pointer >= self.sampler1.audios_num:
308
+ self.sampler1.pointer = 0
309
+ self.sampler1.random_state.shuffle(self.sampler1.indexes)
310
+
311
+ # If audio in black list then continue
312
+ if self.sampler1.audio_names[index] in self.sampler1.black_list_names:
313
+ continue
314
+ else:
315
+ batch_meta.append({
316
+ 'hdf5_path': self.sampler1.hdf5_paths[index],
317
+ 'index_in_hdf5': self.sampler1.indexes_in_hdf5[index]})
318
+ i += 1
319
+
320
+ elif self.count % 2 == 1:
321
+ batch_meta = []
322
+ i = 0
323
+ while i < batch_size:
324
+ if len(self.sampler2.queue) == 0:
325
+ self.sampler2.queue = self.sampler2.expand_queue(self.sampler2.queue)
326
+
327
+ class_id = self.sampler2.queue.pop(0)
328
+ pointer = self.sampler2.pointers_of_classes[class_id]
329
+ self.sampler2.pointers_of_classes[class_id] += 1
330
+ index = self.sampler2.indexes_per_class[class_id][pointer]
331
+
332
+ # When finish one epoch of a sound class, then shuffle its indexes and reset pointer
333
+ if self.sampler2.pointers_of_classes[class_id] >= self.sampler2.samples_num_per_class[class_id]:
334
+ self.sampler2.pointers_of_classes[class_id] = 0
335
+ self.sampler2.random_state.shuffle(self.sampler2.indexes_per_class[class_id])
336
+
337
+ # If audio in black list then continue
338
+ if self.sampler2.audio_names[index] in self.sampler2.black_list_names:
339
+ continue
340
+ else:
341
+ batch_meta.append({
342
+ 'hdf5_path': self.sampler2.hdf5_paths[index],
343
+ 'index_in_hdf5': self.sampler2.indexes_in_hdf5[index]})
344
+ i += 1
345
+
346
+ yield batch_meta
347
+
348
+ def state_dict(self):
349
+ state = {
350
+ 'sampler1': self.sampler1.state_dict(),
351
+ 'sampler2': self.sampler2.state_dict()}
352
+ return state
353
+
354
+ def load_state_dict(self, state):
355
+ self.sampler1.load_state_dict(state['sampler1'])
356
+ self.sampler2.load_state_dict(state['sampler2'])
357
+
358
+
359
+ class EvaluateSampler(object):
360
+ def __init__(self, indexes_hdf5_path, batch_size):
361
+ """Evaluate sampler. Generate batch meta for evaluation.
362
+
363
+ Args:
364
+ indexes_hdf5_path: string
365
+ batch_size: int
366
+ """
367
+ self.batch_size = batch_size
368
+
369
+ with h5py.File(indexes_hdf5_path, 'r') as hf:
370
+ self.audio_names = [audio_name.decode() for audio_name in hf['audio_name'][:]]
371
+ self.hdf5_paths = [hdf5_path.decode() for hdf5_path in hf['hdf5_path'][:]]
372
+ self.indexes_in_hdf5 = hf['index_in_hdf5'][:]
373
+ self.targets = hf['target'][:].astype(np.float32)
374
+
375
+ self.audios_num = len(self.audio_names)
376
+
377
+ def __iter__(self):
378
+ """Generate batch meta for training.
379
+
380
+ Returns:
381
+ batch_meta: e.g.: [
382
+ {'hdf5_path': string,
383
+ 'index_in_hdf5': int}
384
+ ...]
385
+ """
386
+ batch_size = self.batch_size
387
+ pointer = 0
388
+
389
+ while pointer < self.audios_num:
390
+ batch_indexes = np.arange(pointer,
391
+ min(pointer + batch_size, self.audios_num))
392
+
393
+ batch_meta = []
394
+
395
+ for index in batch_indexes:
396
+ batch_meta.append({
397
+ 'audio_name': self.audio_names[index],
398
+ 'hdf5_path': self.hdf5_paths[index],
399
+ 'index_in_hdf5': self.indexes_in_hdf5[index],
400
+ 'target': self.targets[index]})
401
+
402
+ pointer += batch_size
403
+ yield batch_meta
404
+
405
+
406
+ def collate_fn(list_data_dict):
407
+ """Collate data.
408
+ Args:
409
+ list_data_dict, e.g., [{'audio_name': str, 'waveform': (clip_samples,), ...},
410
+ {'audio_name': str, 'waveform': (clip_samples,), ...},
411
+ ...]
412
+ Returns:
413
+ np_data_dict, dict, e.g.,
414
+ {'audio_name': (batch_size,), 'waveform': (batch_size, clip_samples), ...}
415
+ """
416
+ np_data_dict = {}
417
+
418
+ for key in list_data_dict[0].keys():
419
+ np_data_dict[key] = np.array([data_dict[key] for data_dict in list_data_dict])
420
+
421
+ return np_data_dict
audio_detection/audio_infer/utils/dataset.py ADDED
@@ -0,0 +1,224 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import argparse
3
+ import csv
4
+ import os
5
+ import glob
6
+ import datetime
7
+ import time
8
+ import logging
9
+ import h5py
10
+ import librosa
11
+
12
+ from utilities import (create_folder, get_filename, create_logging,
13
+ float32_to_int16, pad_or_truncate, read_metadata)
14
+ import config
15
+
16
+
17
+ def split_unbalanced_csv_to_partial_csvs(args):
18
+ """Split unbalanced csv to part csvs. Each part csv contains up to 50000 ids.
19
+ """
20
+
21
+ unbalanced_csv_path = args.unbalanced_csv
22
+ unbalanced_partial_csvs_dir = args.unbalanced_partial_csvs_dir
23
+
24
+ create_folder(unbalanced_partial_csvs_dir)
25
+
26
+ with open(unbalanced_csv_path, 'r') as f:
27
+ lines = f.readlines()
28
+
29
+ lines = lines[3:] # Remove head info
30
+ audios_num_per_file = 50000
31
+
32
+ files_num = int(np.ceil(len(lines) / float(audios_num_per_file)))
33
+
34
+ for r in range(files_num):
35
+ lines_per_file = lines[r * audios_num_per_file :
36
+ (r + 1) * audios_num_per_file]
37
+
38
+ out_csv_path = os.path.join(unbalanced_partial_csvs_dir,
39
+ 'unbalanced_train_segments_part{:02d}.csv'.format(r))
40
+
41
+ with open(out_csv_path, 'w') as f:
42
+ f.write('empty\n')
43
+ f.write('empty\n')
44
+ f.write('empty\n')
45
+ for line in lines_per_file:
46
+ f.write(line)
47
+
48
+ print('Write out csv to {}'.format(out_csv_path))
49
+
50
+
51
+ def download_wavs(args):
52
+ """Download videos and extract audio in wav format.
53
+ """
54
+
55
+ # Paths
56
+ csv_path = args.csv_path
57
+ audios_dir = args.audios_dir
58
+ mini_data = args.mini_data
59
+
60
+ if mini_data:
61
+ logs_dir = '_logs/download_dataset/{}'.format(get_filename(csv_path))
62
+ else:
63
+ logs_dir = '_logs/download_dataset_minidata/{}'.format(get_filename(csv_path))
64
+
65
+ create_folder(audios_dir)
66
+ create_folder(logs_dir)
67
+ create_logging(logs_dir, filemode='w')
68
+ logging.info('Download log is saved to {}'.format(logs_dir))
69
+
70
+ # Read csv
71
+ with open(csv_path, 'r') as f:
72
+ lines = f.readlines()
73
+
74
+ lines = lines[3:] # Remove csv head info
75
+
76
+ if mini_data:
77
+ lines = lines[0 : 10] # Download partial data for debug
78
+
79
+ download_time = time.time()
80
+
81
+ # Download
82
+ for (n, line) in enumerate(lines):
83
+
84
+ items = line.split(', ')
85
+ audio_id = items[0]
86
+ start_time = float(items[1])
87
+ end_time = float(items[2])
88
+ duration = end_time - start_time
89
+
90
+ logging.info('{} {} start_time: {:.1f}, end_time: {:.1f}'.format(
91
+ n, audio_id, start_time, end_time))
92
+
93
+ # Download full video of whatever format
94
+ video_name = os.path.join(audios_dir, '_Y{}.%(ext)s'.format(audio_id))
95
+ os.system("youtube-dl --quiet -o '{}' -x https://www.youtube.com/watch?v={}"\
96
+ .format(video_name, audio_id))
97
+
98
+ video_paths = glob.glob(os.path.join(audios_dir, '_Y' + audio_id + '.*'))
99
+
100
+ # If download successful
101
+ if len(video_paths) > 0:
102
+ video_path = video_paths[0] # Choose one video
103
+
104
+ # Add 'Y' to the head because some video ids are started with '-'
105
+ # which will cause problem
106
+ audio_path = os.path.join(audios_dir, 'Y' + audio_id + '.wav')
107
+
108
+ # Extract audio in wav format
109
+ os.system("ffmpeg -loglevel panic -i {} -ac 1 -ar 32000 -ss {} -t 00:00:{} {} "\
110
+ .format(video_path,
111
+ str(datetime.timedelta(seconds=start_time)), duration,
112
+ audio_path))
113
+
114
+ # Remove downloaded video
115
+ os.system("rm {}".format(video_path))
116
+
117
+ logging.info("Download and convert to {}".format(audio_path))
118
+
119
+ logging.info('Download finished! Time spent: {:.3f} s'.format(
120
+ time.time() - download_time))
121
+
122
+ logging.info('Logs can be viewed in {}'.format(logs_dir))
123
+
124
+
125
+ def pack_waveforms_to_hdf5(args):
126
+ """Pack waveform and target of several audio clips to a single hdf5 file.
127
+ This can speed up loading and training.
128
+ """
129
+
130
+ # Arguments & parameters
131
+ audios_dir = args.audios_dir
132
+ csv_path = args.csv_path
133
+ waveforms_hdf5_path = args.waveforms_hdf5_path
134
+ mini_data = args.mini_data
135
+
136
+ clip_samples = config.clip_samples
137
+ classes_num = config.classes_num
138
+ sample_rate = config.sample_rate
139
+ id_to_ix = config.id_to_ix
140
+
141
+ # Paths
142
+ if mini_data:
143
+ prefix = 'mini_'
144
+ waveforms_hdf5_path += '.mini'
145
+ else:
146
+ prefix = ''
147
+
148
+ create_folder(os.path.dirname(waveforms_hdf5_path))
149
+
150
+ logs_dir = '_logs/pack_waveforms_to_hdf5/{}{}'.format(prefix, get_filename(csv_path))
151
+ create_folder(logs_dir)
152
+ create_logging(logs_dir, filemode='w')
153
+ logging.info('Write logs to {}'.format(logs_dir))
154
+
155
+ # Read csv file
156
+ meta_dict = read_metadata(csv_path, classes_num, id_to_ix)
157
+
158
+ if mini_data:
159
+ mini_num = 10
160
+ for key in meta_dict.keys():
161
+ meta_dict[key] = meta_dict[key][0 : mini_num]
162
+
163
+ audios_num = len(meta_dict['audio_name'])
164
+
165
+ # Pack waveform to hdf5
166
+ total_time = time.time()
167
+
168
+ with h5py.File(waveforms_hdf5_path, 'w') as hf:
169
+ hf.create_dataset('audio_name', shape=((audios_num,)), dtype='S20')
170
+ hf.create_dataset('waveform', shape=((audios_num, clip_samples)), dtype=np.int16)
171
+ hf.create_dataset('target', shape=((audios_num, classes_num)), dtype=np.bool)
172
+ hf.attrs.create('sample_rate', data=sample_rate, dtype=np.int32)
173
+
174
+ # Pack waveform & target of several audio clips to a single hdf5 file
175
+ for n in range(audios_num):
176
+ audio_path = os.path.join(audios_dir, meta_dict['audio_name'][n])
177
+
178
+ if os.path.isfile(audio_path):
179
+ logging.info('{} {}'.format(n, audio_path))
180
+ (audio, _) = librosa.core.load(audio_path, sr=sample_rate, mono=True)
181
+ audio = pad_or_truncate(audio, clip_samples)
182
+
183
+ hf['audio_name'][n] = meta_dict['audio_name'][n].encode()
184
+ hf['waveform'][n] = float32_to_int16(audio)
185
+ hf['target'][n] = meta_dict['target'][n]
186
+ else:
187
+ logging.info('{} File does not exist! {}'.format(n, audio_path))
188
+
189
+ logging.info('Write to {}'.format(waveforms_hdf5_path))
190
+ logging.info('Pack hdf5 time: {:.3f}'.format(time.time() - total_time))
191
+
192
+
193
+ if __name__ == '__main__':
194
+ parser = argparse.ArgumentParser()
195
+ subparsers = parser.add_subparsers(dest='mode')
196
+
197
+ parser_split = subparsers.add_parser('split_unbalanced_csv_to_partial_csvs')
198
+ parser_split.add_argument('--unbalanced_csv', type=str, required=True, help='Path of unbalanced_csv file to read.')
199
+ parser_split.add_argument('--unbalanced_partial_csvs_dir', type=str, required=True, help='Directory to save out split unbalanced partial csv.')
200
+
201
+ parser_download_wavs = subparsers.add_parser('download_wavs')
202
+ parser_download_wavs.add_argument('--csv_path', type=str, required=True, help='Path of csv file containing audio info to be downloaded.')
203
+ parser_download_wavs.add_argument('--audios_dir', type=str, required=True, help='Directory to save out downloaded audio.')
204
+ parser_download_wavs.add_argument('--mini_data', action='store_true', default=True, help='Set true to only download 10 audios for debugging.')
205
+
206
+ parser_pack_wavs = subparsers.add_parser('pack_waveforms_to_hdf5')
207
+ parser_pack_wavs.add_argument('--csv_path', type=str, required=True, help='Path of csv file containing audio info to be downloaded.')
208
+ parser_pack_wavs.add_argument('--audios_dir', type=str, required=True, help='Directory to save out downloaded audio.')
209
+ parser_pack_wavs.add_argument('--waveforms_hdf5_path', type=str, required=True, help='Path to save out packed hdf5.')
210
+ parser_pack_wavs.add_argument('--mini_data', action='store_true', default=False, help='Set true to only download 10 audios for debugging.')
211
+
212
+ args = parser.parse_args()
213
+
214
+ if args.mode == 'split_unbalanced_csv_to_partial_csvs':
215
+ split_unbalanced_csv_to_partial_csvs(args)
216
+
217
+ elif args.mode == 'download_wavs':
218
+ download_wavs(args)
219
+
220
+ elif args.mode == 'pack_waveforms_to_hdf5':
221
+ pack_waveforms_to_hdf5(args)
222
+
223
+ else:
224
+ raise Exception('Incorrect arguments!')
audio_detection/audio_infer/utils/plot_for_paper.py ADDED
@@ -0,0 +1,565 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+ import numpy as np
4
+ import argparse
5
+ import h5py
6
+ import time
7
+ import pickle
8
+ import matplotlib.pyplot as plt
9
+ import csv
10
+ from sklearn import metrics
11
+
12
+ from utilities import (create_folder, get_filename, d_prime)
13
+ import config
14
+
15
+
16
+ def load_statistics(statistics_path):
17
+ statistics_dict = pickle.load(open(statistics_path, 'rb'))
18
+
19
+ bal_map = np.array([statistics['average_precision'] for statistics in statistics_dict['bal']]) # (N, classes_num)
20
+ bal_map = np.mean(bal_map, axis=-1)
21
+ test_map = np.array([statistics['average_precision'] for statistics in statistics_dict['test']]) # (N, classes_num)
22
+ test_map = np.mean(test_map, axis=-1)
23
+
24
+ return bal_map, test_map
25
+
26
+
27
+ def crop_label(label):
28
+ max_len = 16
29
+ if len(label) <= max_len:
30
+ return label
31
+ else:
32
+ words = label.split(' ')
33
+ cropped_label = ''
34
+ for w in words:
35
+ if len(cropped_label + ' ' + w) > max_len:
36
+ break
37
+ else:
38
+ cropped_label += ' {}'.format(w)
39
+ return cropped_label
40
+
41
+
42
+ def add_comma(integer):
43
+ """E.g., 1234567 -> 1,234,567
44
+ """
45
+ integer = int(integer)
46
+ if integer >= 1000:
47
+ return str(integer // 1000) + ',' + str(integer % 1000)
48
+ else:
49
+ return str(integer)
50
+
51
+
52
+ def plot_classwise_iteration_map(args):
53
+
54
+ # Paths
55
+ save_out_path = 'results/classwise_iteration_map.pdf'
56
+ create_folder(os.path.dirname(save_out_path))
57
+
58
+ # Load statistics
59
+ statistics_dict = pickle.load(open('paper_statistics/statistics_sr32000_window1024_hop320_mel64_fmin50_fmax14000_full_train_WavegramLogmelCnn_balanced_mixup_bs32.pkl', 'rb'))
60
+
61
+ mAP_mat = np.array([e['average_precision'] for e in statistics_dict['test']])
62
+ mAP_mat = mAP_mat[0 : 300, :] # 300 * 2000 = 600k iterations
63
+ sorted_indexes = np.argsort(config.full_samples_per_class)[::-1]
64
+
65
+ fig, axs = plt.subplots(1, 3, figsize=(20, 5))
66
+ ranges = [np.arange(0, 10), np.arange(250, 260), np.arange(517, 527)]
67
+ axs[0].set_ylabel('AP')
68
+
69
+ for col in range(0, 3):
70
+ axs[col].set_ylim(0, 1.)
71
+ axs[col].set_xlim(0, 301)
72
+ axs[col].set_xlabel('Iterations')
73
+ axs[col].set_ylabel('AP')
74
+ axs[col].xaxis.set_ticks(np.arange(0, 301, 100))
75
+ axs[col].xaxis.set_ticklabels(['0', '200k', '400k', '600k'])
76
+ lines = []
77
+ for _ix in ranges[col]:
78
+ _label = crop_label(config.labels[sorted_indexes[_ix]]) + \
79
+ ' ({})'.format(add_comma(config.full_samples_per_class[sorted_indexes[_ix]]))
80
+ line, = axs[col].plot(mAP_mat[:, sorted_indexes[_ix]], label=_label)
81
+ lines.append(line)
82
+ box = axs[col].get_position()
83
+ axs[col].set_position([box.x0, box.y0, box.width * 1., box.height])
84
+ axs[col].legend(handles=lines, bbox_to_anchor=(1., 1.))
85
+ axs[col].yaxis.grid(color='k', linestyle='solid', alpha=0.3, linewidth=0.3)
86
+
87
+ plt.tight_layout(pad=4, w_pad=1, h_pad=1)
88
+ plt.savefig(save_out_path)
89
+ print(save_out_path)
90
+
91
+
92
+ def plot_six_figures(args):
93
+
94
+ # Arguments & parameters
95
+ classes_num = config.classes_num
96
+ labels = config.labels
97
+ max_plot_iteration = 540000
98
+ iterations = np.arange(0, max_plot_iteration, 2000)
99
+
100
+ # Paths
101
+ class_labels_indices_path = os.path.join('metadata', 'class_labels_indices.csv')
102
+ save_out_path = 'results/six_figures.pdf'
103
+ create_folder(os.path.dirname(save_out_path))
104
+
105
+ # Plot
106
+ fig, ax = plt.subplots(2, 3, figsize=(14, 7))
107
+ bal_alpha = 0.3
108
+ test_alpha = 1.0
109
+ linewidth = 1.
110
+
111
+ # (a) Comparison of architectures
112
+ if True:
113
+ lines = []
114
+
115
+ # Wavegram-Logmel-CNN
116
+ (bal_map, test_map) = load_statistics('paper_statistics/statistics_sr32000_window1024_hop320_mel64_fmin50_fmax14000_full_train_WavegramLogmelCnn_balanced_mixup_bs32.pkl')
117
+ line, = ax[0, 0].plot(bal_map, color='g', alpha=bal_alpha, linewidth=linewidth)
118
+ line, = ax[0, 0].plot(test_map, label='Wavegram-Logmel-CNN', color='g', alpha=test_alpha, linewidth=linewidth)
119
+ lines.append(line)
120
+
121
+ # Cnn14
122
+ (bal_map, test_map) = load_statistics('paper_statistics/statistics_sr32000_window1024_hop320_mel64_fmin50_fmax14000_full_train_Cnn14_balanced_mixup_bs32.pkl')
123
+ line, = ax[0, 0].plot(bal_map, color='r', alpha=bal_alpha, linewidth=linewidth)
124
+ line, = ax[0, 0].plot(test_map, label='CNN14', color='r', alpha=test_alpha, linewidth=linewidth)
125
+ lines.append(line)
126
+
127
+ # MobileNetV1
128
+ (bal_map, test_map) = load_statistics('paper_statistics/statistics_sr32000_window1024_hop320_mel64_fmin50_fmax14000_full_train_MobileNetV1_balanced_mixup_bs32.pkl')
129
+ line, = ax[0, 0].plot(bal_map, color='b', alpha=bal_alpha, linewidth=linewidth)
130
+ line, = ax[0, 0].plot(test_map, label='MobileNetV1', color='b', alpha=test_alpha, linewidth=linewidth)
131
+ lines.append(line)
132
+
133
+ ax[0, 0].legend(handles=lines, loc=2)
134
+ ax[0, 0].set_title('(a) Comparison of architectures')
135
+
136
+ # (b) Comparison of training data and augmentation'
137
+ if True:
138
+ lines = []
139
+
140
+ # Full data + balanced sampler + mixup
141
+ (bal_map, test_map) = load_statistics('paper_statistics/statistics_sr32000_window1024_hop320_mel64_fmin50_fmax14000_full_train_Cnn14_balanced_mixup_bs32.pkl')
142
+ line, = ax[0, 1].plot(bal_map, color='r', alpha=bal_alpha, linewidth=linewidth)
143
+ line, = ax[0, 1].plot(test_map, label='CNN14,bal,mixup (1.9m)', color='r', alpha=test_alpha, linewidth=linewidth)
144
+ lines.append(line)
145
+
146
+ # Full data + balanced sampler + mixup in time domain
147
+ (bal_map, test_map) = load_statistics('paper_statistics/statistics_sr32000_window1024_hop320_mel64_fmin50_fmax14000_full_train_Cnn14_balanced_mixup_timedomain_bs32.pkl')
148
+ line, = ax[0, 1].plot(bal_map, color='y', alpha=bal_alpha, linewidth=linewidth)
149
+ line, = ax[0, 1].plot(test_map, label='CNN14,bal,mixup-wav (1.9m)', color='y', alpha=test_alpha, linewidth=linewidth)
150
+ lines.append(line)
151
+
152
+ # Full data + balanced sampler + no mixup
153
+ (bal_map, test_map) = load_statistics('paper_statistics/statistics_sr32000_window1024_hop320_mel64_fmin50_fmax14000_full_train_Cnn14_balanced_nomixup_bs32.pkl')
154
+ line, = ax[0, 1].plot(bal_map, color='g', alpha=bal_alpha, linewidth=linewidth)
155
+ line, = ax[0, 1].plot(test_map, label='CNN14,bal,no-mixup (1.9m)', color='g', alpha=test_alpha, linewidth=linewidth)
156
+ lines.append(line)
157
+
158
+ # Full data + uniform sampler + no mixup
159
+ (bal_map, test_map) = load_statistics('paper_statistics/statistics_sr32000_window1024_hop320_mel64_fmin50_fmax14000_full_train_Cnn14_nobalanced_nomixup_bs32.pkl')
160
+ line, = ax[0, 1].plot(bal_map, color='b', alpha=bal_alpha, linewidth=linewidth)
161
+ line, = ax[0, 1].plot(test_map, label='CNN14,no-bal,no-mixup (1.9m)', color='b', alpha=test_alpha, linewidth=linewidth)
162
+ lines.append(line)
163
+
164
+ # Balanced data + balanced sampler + mixup
165
+ (bal_map, test_map) = load_statistics('paper_statistics/statistics_sr32000_window1024_hop320_mel64_fmin50_fmax14000_balanced_train_Cnn14_balanced_mixup_bs32.pkl')
166
+ line, = ax[0, 1].plot(bal_map, color='m', alpha=bal_alpha, linewidth=linewidth)
167
+ line, = ax[0, 1].plot(test_map, label='CNN14,bal,mixup (20k)', color='m', alpha=test_alpha, linewidth=linewidth)
168
+ lines.append(line)
169
+
170
+ # Balanced data + balanced sampler + no mixup
171
+ (bal_map, test_map) = load_statistics('paper_statistics/statistics_sr32000_window1024_hop320_mel64_fmin50_fmax14000_balanced_train_Cnn14_balanced_nomixup_bs32.pkl')
172
+ line, = ax[0, 1].plot(bal_map, color='k', alpha=bal_alpha, linewidth=linewidth)
173
+ line, = ax[0, 1].plot(test_map, label='CNN14,bal,no-mixup (20k)', color='k', alpha=test_alpha, linewidth=linewidth)
174
+ lines.append(line)
175
+
176
+ ax[0, 1].legend(handles=lines, loc=2, fontsize=8)
177
+ ax[0, 1].set_title('(b) Comparison of training data and augmentation')
178
+
179
+ # (c) Comparison of embedding size
180
+ if True:
181
+ lines = []
182
+
183
+ # Embedding size 2048
184
+ (bal_map, test_map) = load_statistics('paper_statistics/statistics_sr32000_window1024_hop320_mel64_fmin50_fmax14000_full_train_Cnn14_balanced_mixup_bs32.pkl')
185
+ line, = ax[0, 2].plot(bal_map, color='r', alpha=bal_alpha, linewidth=linewidth)
186
+ line, = ax[0, 2].plot(test_map, label='CNN14,emb=2048', color='r', alpha=test_alpha, linewidth=linewidth)
187
+ lines.append(line)
188
+
189
+ # Embedding size 128
190
+ (bal_map, test_map) = load_statistics('paper_statistics/statistics_sr32000_window1024_hop320_mel64_fmin50_fmax14000_full_train_Cnn14_emb128_balanced_mixup_bs32.pkl')
191
+ line, = ax[0, 2].plot(bal_map, color='g', alpha=bal_alpha, linewidth=linewidth)
192
+ line, = ax[0, 2].plot(test_map, label='CNN14,emb=128', color='g', alpha=test_alpha, linewidth=linewidth)
193
+ lines.append(line)
194
+
195
+ # Embedding size 32
196
+ (bal_map, test_map) = load_statistics('paper_statistics/statistics_sr32000_window1024_hop320_mel64_fmin50_fmax14000_full_train_Cnn14_emb32_balanced_mixup_bs32.pkl')
197
+ line, = ax[0, 2].plot(bal_map, color='b', alpha=bal_alpha, linewidth=linewidth)
198
+ line, = ax[0, 2].plot(test_map, label='CNN14,emb=32', color='b', alpha=test_alpha, linewidth=linewidth)
199
+ lines.append(line)
200
+
201
+ ax[0, 2].legend(handles=lines, loc=2)
202
+ ax[0, 2].set_title('(c) Comparison of embedding size')
203
+
204
+ # (d) Comparison of amount of training data
205
+ if True:
206
+ lines = []
207
+
208
+ # 100% of full training data
209
+ (bal_map, test_map) = load_statistics('paper_statistics/statistics_sr32000_window1024_hop320_mel64_fmin50_fmax14000_full_train_Cnn14_balanced_mixup_bs32.pkl')
210
+ line, = ax[1, 0].plot(bal_map, color='r', alpha=bal_alpha, linewidth=linewidth)
211
+ line, = ax[1, 0].plot(test_map, label='CNN14 (100% full)', color='r', alpha=test_alpha, linewidth=linewidth)
212
+ lines.append(line)
213
+
214
+ # 80% of full training data
215
+ (bal_map, test_map) = load_statistics('paper_statistics/statistics_sr32000_window1024_hop320_mel64_fmin50_fmax14000_0.8full_train_Cnn14_balanced_mixup_bs32.pkl')
216
+ line, = ax[1, 0].plot(bal_map, color='b', alpha=bal_alpha, linewidth=linewidth)
217
+ line, = ax[1, 0].plot(test_map, label='CNN14 (80% full)', color='b', alpha=test_alpha, linewidth=linewidth)
218
+ lines.append(line)
219
+
220
+ # 50% of full training data
221
+ (bal_map, test_map) = load_statistics('paper_statistics/statistics_sr32000_window1024_hop320_mel64_fmin50_fmax14000_0.5full_train_Cnn14_balanced_mixup_bs32.pkl')
222
+ line, = ax[1, 0].plot(bal_map, color='g', alpha=bal_alpha, linewidth=linewidth)
223
+ line, = ax[1, 0].plot(test_map, label='cnn14 (50% full)', color='g', alpha=test_alpha, linewidth=linewidth)
224
+ lines.append(line)
225
+
226
+ ax[1, 0].legend(handles=lines, loc=2)
227
+ ax[1, 0].set_title('(d) Comparison of amount of training data')
228
+
229
+ # (e) Comparison of sampling rate
230
+ if True:
231
+ lines = []
232
+
233
+ # Cnn14 + 32 kHz
234
+ (bal_map, test_map) = load_statistics('paper_statistics/statistics_sr32000_window1024_hop320_mel64_fmin50_fmax14000_full_train_Cnn14_balanced_mixup_bs32.pkl')
235
+ line, = ax[1, 1].plot(bal_map, color='r', alpha=bal_alpha, linewidth=linewidth)
236
+ line, = ax[1, 1].plot(test_map, label='CNN14,32kHz', color='r', alpha=test_alpha, linewidth=linewidth)
237
+ lines.append(line)
238
+
239
+ # Cnn14 + 16 kHz
240
+ (bal_map, test_map) = load_statistics('paper_statistics/statistics_sr32000_window1024_hop320_mel64_fmin50_fmax14000_full_train_Cnn14_16k_balanced_mixup_bs32.pkl')
241
+ line, = ax[1, 1].plot(bal_map, color='b', alpha=bal_alpha, linewidth=linewidth)
242
+ line, = ax[1, 1].plot(test_map, label='CNN14,16kHz', color='b', alpha=test_alpha, linewidth=linewidth)
243
+ lines.append(line)
244
+
245
+ # Cnn14 + 8 kHz
246
+ (bal_map, test_map) = load_statistics('paper_statistics/statistics_sr32000_window1024_hop320_mel64_fmin50_fmax14000_full_train_Cnn14_8k_balanced_mixup_bs32.pkl')
247
+ line, = ax[1, 1].plot(bal_map, color='g', alpha=bal_alpha, linewidth=linewidth)
248
+ line, = ax[1, 1].plot(test_map, label='CNN14,8kHz', color='g', alpha=test_alpha, linewidth=linewidth)
249
+ lines.append(line)
250
+
251
+ ax[1, 1].legend(handles=lines, loc=2)
252
+ ax[1, 1].set_title('(e) Comparison of sampling rate')
253
+
254
+ # (f) Comparison of mel bins number
255
+ if True:
256
+ lines = []
257
+
258
+ # Cnn14 + 128 mel bins
259
+ (bal_map, test_map) = load_statistics('paper_statistics/statistics_sr32000_window1024_hop320_mel128_fmin50_fmax14000_full_train_Cnn14_balanced_mixup_bs32.pkl')
260
+ line, = ax[1, 2].plot(bal_map, color='g', alpha=bal_alpha)
261
+ line, = ax[1, 2].plot(test_map, label='CNN14,128-melbins', color='g', alpha=test_alpha, linewidth=linewidth)
262
+ lines.append(line)
263
+
264
+ # Cnn14 + 64 mel bins
265
+ (bal_map, test_map) = load_statistics('paper_statistics/statistics_sr32000_window1024_hop320_mel64_fmin50_fmax14000_full_train_Cnn14_balanced_mixup_bs32.pkl')
266
+ line, = ax[1, 2].plot(bal_map, color='r', alpha=bal_alpha, linewidth=linewidth)
267
+ line, = ax[1, 2].plot(test_map, label='CNN14,64-melbins', color='r', alpha=test_alpha, linewidth=linewidth)
268
+ lines.append(line)
269
+
270
+ # Cnn14 + 32 mel bins
271
+ (bal_map, test_map) = load_statistics('paper_statistics/statistics_sr32000_window1024_hop320_mel32_fmin50_fmax14000_full_train_Cnn14_balanced_mixup_bs32.pkl')
272
+ line, = ax[1, 2].plot(bal_map, color='b', alpha=bal_alpha)
273
+ line, = ax[1, 2].plot(test_map, label='CNN14,32-melbins', color='b', alpha=test_alpha, linewidth=linewidth)
274
+ lines.append(line)
275
+
276
+ ax[1, 2].legend(handles=lines, loc=2)
277
+ ax[1, 2].set_title('(f) Comparison of mel bins number')
278
+
279
+ for i in range(2):
280
+ for j in range(3):
281
+ ax[i, j].set_ylim(0, 0.8)
282
+ ax[i, j].set_xlim(0, len(iterations))
283
+ ax[i, j].set_xlabel('Iterations')
284
+ ax[i, j].set_ylabel('mAP')
285
+ ax[i, j].xaxis.set_ticks(np.arange(0, len(iterations), 50))
286
+ ax[i, j].xaxis.set_ticklabels(['0', '100k', '200k', '300k', '400k', '500k'])
287
+ ax[i, j].yaxis.set_ticks(np.arange(0, 0.81, 0.05))
288
+ ax[i, j].yaxis.set_ticklabels(['0', '', '0.1', '', '0.2', '', '0.3',
289
+ '', '0.4', '', '0.5', '', '0.6', '', '0.7', '', '0.8'])
290
+ ax[i, j].yaxis.grid(color='k', linestyle='solid', alpha=0.3, linewidth=0.3)
291
+ ax[i, j].xaxis.grid(color='k', linestyle='solid', alpha=0.3, linewidth=0.3)
292
+
293
+ plt.tight_layout(0, 1, 0)
294
+ plt.savefig(save_out_path)
295
+ print('Save figure to {}'.format(save_out_path))
296
+
297
+
298
+ def plot_complexity_map(args):
299
+
300
+ # Paths
301
+ save_out_path = 'results/complexity_mAP.pdf'
302
+ create_folder(os.path.dirname(save_out_path))
303
+
304
+ plt.figure(figsize=(5, 5))
305
+ fig, ax = plt.subplots(1, 1)
306
+
307
+ model_types = np.array(['Cnn6', 'Cnn10', 'Cnn14', 'ResNet22', 'ResNet38', 'ResNet54',
308
+ 'MobileNetV1', 'MobileNetV2', 'DaiNet', 'LeeNet', 'LeeNet18',
309
+ 'Res1dNet30', 'Res1dNet44', 'Wavegram-CNN', 'Wavegram-\nLogmel-CNN'])
310
+ flops = np.array([21.986, 28.166, 42.220, 30.081, 48.962, 54.563, 3.614, 2.810,
311
+ 30.395, 4.741, 26.369, 32.688, 61.833, 44.234, 53.510])
312
+ mAPs = np.array([0.343, 0.380, 0.431, 0.430, 0.434, 0.429, 0.389, 0.383, 0.295,
313
+ 0.266, 0.336, 0.365, 0.355, 0.389, 0.439])
314
+
315
+ sorted_indexes = np.sort(flops)
316
+ ax.scatter(flops, mAPs)
317
+
318
+ shift = [[-5.5, -0.004], [1, -0.004], [-1, -0.014], [-2, 0.006], [-7, 0.006],
319
+ [1, -0.01], [0.5, 0.004], [-1, -0.014], [1, -0.007], [0.8, -0.008],
320
+ [1, -0.007], [1, 0.002], [-6, -0.015], [1, -0.008], [0.8, 0]]
321
+
322
+ for i, model_type in enumerate(model_types):
323
+ ax.annotate(model_type, (flops[i] + shift[i][0], mAPs[i] + shift[i][1]))
324
+
325
+ ax.plot(flops[[0, 1, 2]], mAPs[[0, 1, 2]])
326
+ ax.plot(flops[[3, 4, 5]], mAPs[[3, 4, 5]])
327
+ ax.plot(flops[[6, 7]], mAPs[[6, 7]])
328
+ ax.plot(flops[[9, 10]], mAPs[[9, 10]])
329
+ ax.plot(flops[[11, 12]], mAPs[[11, 12]])
330
+ ax.plot(flops[[13, 14]], mAPs[[13, 14]])
331
+
332
+ ax.set_xlim(0, 70)
333
+ ax.set_ylim(0.2, 0.5)
334
+ ax.set_xlabel('Multi-load_statisticss (million)', fontsize=15)
335
+ ax.set_ylabel('mAP', fontsize=15)
336
+ ax.tick_params(axis='x', labelsize=12)
337
+ ax.tick_params(axis='y', labelsize=12)
338
+
339
+ plt.tight_layout(0, 0, 0)
340
+
341
+ plt.savefig(save_out_path)
342
+ print('Write out figure to {}'.format(save_out_path))
343
+
344
+
345
+ def plot_long_fig(args):
346
+
347
+ # Paths
348
+ stats = pickle.load(open('paper_statistics/stats_for_long_fig.pkl', 'rb'))
349
+
350
+ save_out_path = 'results/long_fig.pdf'
351
+ create_folder(os.path.dirname(save_out_path))
352
+
353
+ # Load meta
354
+ N = len(config.labels)
355
+ sorted_indexes = stats['sorted_indexes_for_plot']
356
+ sorted_labels = np.array(config.labels)[sorted_indexes]
357
+ audio_clips_per_class = stats['official_balanced_training_samples'] + stats['official_unbalanced_training_samples']
358
+ audio_clips_per_class = audio_clips_per_class[sorted_indexes]
359
+
360
+ # Prepare axes for plot
361
+ (ax1a, ax2a, ax3a, ax4a, ax1b, ax2b, ax3b, ax4b) = prepare_plot_long_4_rows(sorted_labels)
362
+
363
+ # plot the number of training samples
364
+ ax1a.bar(np.arange(N), audio_clips_per_class, alpha=0.3)
365
+ ax2a.bar(np.arange(N), audio_clips_per_class, alpha=0.3)
366
+ ax3a.bar(np.arange(N), audio_clips_per_class, alpha=0.3)
367
+ ax4a.bar(np.arange(N), audio_clips_per_class, alpha=0.3)
368
+
369
+ # Load mAP of different systems
370
+ """Average instance system of [1] with an mAP of 0.317.
371
+ [1] Kong, Qiuqiang, Changsong Yu, Yong Xu, Turab Iqbal, Wenwu Wang, and
372
+ Mark D. Plumbley. "Weakly labelled audioset tagging with attention neural
373
+ networks." IEEE/ACM Transactions on Audio, Speech, and Language Processing
374
+ 27, no. 11 (2019): 1791-1802."""
375
+ maps_avg_instances = stats['averaging_instance_system_avg_9_probs_from_10000_to_50000_iterations']['eval']['average_precision']
376
+ maps_avg_instances = maps_avg_instances[sorted_indexes]
377
+
378
+ # PANNs Cnn14
379
+ maps_panns_cnn14 = stats['panns_cnn14']['eval']['average_precision']
380
+ maps_panns_cnn14 = maps_panns_cnn14[sorted_indexes]
381
+
382
+ # PANNs MobileNetV1
383
+ maps_panns_mobilenetv1 = stats['panns_mobilenetv1']['eval']['average_precision']
384
+ maps_panns_mobilenetv1 = maps_panns_mobilenetv1[sorted_indexes]
385
+
386
+ # PANNs Wavegram-Logmel-Cnn14
387
+ maps_panns_wavegram_logmel_cnn14 = stats['panns_wavegram_logmel_cnn14']['eval']['average_precision']
388
+ maps_panns_wavegram_logmel_cnn14 = maps_panns_wavegram_logmel_cnn14[sorted_indexes]
389
+
390
+ # Plot mAPs
391
+ _scatter_4_rows(maps_panns_wavegram_logmel_cnn14, ax1b, ax2b, ax3b, ax4b, s=5, c='g')
392
+ _scatter_4_rows(maps_panns_cnn14, ax1b, ax2b, ax3b, ax4b, s=5, c='r')
393
+ _scatter_4_rows(maps_panns_mobilenetv1, ax1b, ax2b, ax3b, ax4b, s=5, c='b')
394
+ _scatter_4_rows(maps_avg_instances, ax1b, ax2b, ax3b, ax4b, s=5, c='k')
395
+
396
+ linewidth = 0.7
397
+ line0te = _plot_4_rows(maps_panns_wavegram_logmel_cnn14, ax1b, ax2b, ax3b, ax4b,
398
+ c='g', linewidth=linewidth, label='AP with Wavegram-Logmel-CNN')
399
+ line1te = _plot_4_rows(maps_panns_cnn14, ax1b, ax2b, ax3b, ax4b, c='r',
400
+ linewidth=linewidth, label='AP with CNN14')
401
+ line2te = _plot_4_rows(maps_panns_mobilenetv1, ax1b, ax2b, ax3b, ax4b, c='b',
402
+ linewidth=linewidth, label='AP with MobileNetV1')
403
+ line3te = _plot_4_rows(maps_avg_instances, ax1b, ax2b, ax3b, ax4b, c='k',
404
+ linewidth=linewidth, label='AP with averaging instances (baseline)')
405
+
406
+ # Plot label quality
407
+ label_quality = stats['label_quality']
408
+ sorted_label_quality = np.array(label_quality)[sorted_indexes]
409
+ for k in range(len(sorted_label_quality)):
410
+ if sorted_label_quality[k] and sorted_label_quality[k] == 1:
411
+ sorted_label_quality[k] = 0.99
412
+
413
+ ax1b.scatter(np.arange(N)[sorted_label_quality != None],
414
+ sorted_label_quality[sorted_label_quality != None], s=12, c='r', linewidth=0.8, marker='+')
415
+ ax2b.scatter(np.arange(N)[sorted_label_quality != None],
416
+ sorted_label_quality[sorted_label_quality != None], s=12, c='r', linewidth=0.8, marker='+')
417
+ ax3b.scatter(np.arange(N)[sorted_label_quality != None],
418
+ sorted_label_quality[sorted_label_quality != None], s=12, c='r', linewidth=0.8, marker='+')
419
+ line_label_quality = ax4b.scatter(np.arange(N)[sorted_label_quality != None],
420
+ sorted_label_quality[sorted_label_quality != None], s=12, c='r', linewidth=0.8, marker='+', label='Label quality')
421
+ ax1b.scatter(np.arange(N)[sorted_label_quality == None],
422
+ 0.5 * np.ones(len(np.arange(N)[sorted_label_quality == None])), s=12, c='r', linewidth=0.8, marker='_')
423
+ ax2b.scatter(np.arange(N)[sorted_label_quality == None],
424
+ 0.5 * np.ones(len(np.arange(N)[sorted_label_quality == None])), s=12, c='r', linewidth=0.8, marker='_')
425
+ ax3b.scatter(np.arange(N)[sorted_label_quality == None],
426
+ 0.5 * np.ones(len(np.arange(N)[sorted_label_quality == None])), s=12, c='r', linewidth=0.8, marker='_')
427
+ ax4b.scatter(np.arange(N)[sorted_label_quality == None],
428
+ 0.5 * np.ones(len(np.arange(N)[sorted_label_quality == None])), s=12, c='r', linewidth=0.8, marker='_')
429
+
430
+ plt.legend(handles=[line0te, line1te, line2te, line3te, line_label_quality], fontsize=6, loc=1)
431
+ plt.tight_layout(0, 0, 0)
432
+ plt.savefig(save_out_path)
433
+ print('Save fig to {}'.format(save_out_path))
434
+
435
+
436
+ def prepare_plot_long_4_rows(sorted_lbs):
437
+ N = len(sorted_lbs)
438
+
439
+ f,(ax1a, ax2a, ax3a, ax4a) = plt.subplots(4, 1, sharey=False, facecolor='w', figsize=(10, 10.5))
440
+
441
+ fontsize = 5
442
+
443
+ K = 132
444
+ ax1a.set_xlim(0, K)
445
+ ax2a.set_xlim(K, 2 * K)
446
+ ax3a.set_xlim(2 * K, 3 * K)
447
+ ax4a.set_xlim(3 * K, N)
448
+
449
+ truncated_sorted_lbs = []
450
+ for lb in sorted_lbs:
451
+ lb = lb[0 : 25]
452
+ words = lb.split(' ')
453
+ if len(words[-1]) < 3:
454
+ lb = ' '.join(words[0:-1])
455
+ truncated_sorted_lbs.append(lb)
456
+
457
+ ax1a.grid(which='major', axis='x', linestyle='-', alpha=0.3)
458
+ ax2a.grid(which='major', axis='x', linestyle='-', alpha=0.3)
459
+ ax3a.grid(which='major', axis='x', linestyle='-', alpha=0.3)
460
+ ax4a.grid(which='major', axis='x', linestyle='-', alpha=0.3)
461
+
462
+ ax1a.set_yscale('log')
463
+ ax2a.set_yscale('log')
464
+ ax3a.set_yscale('log')
465
+ ax4a.set_yscale('log')
466
+
467
+ ax1b = ax1a.twinx()
468
+ ax2b = ax2a.twinx()
469
+ ax3b = ax3a.twinx()
470
+ ax4b = ax4a.twinx()
471
+ ax1b.set_ylim(0., 1.)
472
+ ax2b.set_ylim(0., 1.)
473
+ ax3b.set_ylim(0., 1.)
474
+ ax4b.set_ylim(0., 1.)
475
+ ax1b.set_ylabel('Average precision')
476
+ ax2b.set_ylabel('Average precision')
477
+ ax3b.set_ylabel('Average precision')
478
+ ax4b.set_ylabel('Average precision')
479
+
480
+ ax1b.yaxis.grid(color='grey', linestyle='--', alpha=0.5)
481
+ ax2b.yaxis.grid(color='grey', linestyle='--', alpha=0.5)
482
+ ax3b.yaxis.grid(color='grey', linestyle='--', alpha=0.5)
483
+ ax4b.yaxis.grid(color='grey', linestyle='--', alpha=0.5)
484
+
485
+ ax1a.xaxis.set_ticks(np.arange(K))
486
+ ax1a.xaxis.set_ticklabels(truncated_sorted_lbs[0:K], rotation=90, fontsize=fontsize)
487
+ ax1a.xaxis.tick_bottom()
488
+ ax1a.set_ylabel("Number of audio clips")
489
+
490
+ ax2a.xaxis.set_ticks(np.arange(K, 2*K))
491
+ ax2a.xaxis.set_ticklabels(truncated_sorted_lbs[K:2*K], rotation=90, fontsize=fontsize)
492
+ ax2a.xaxis.tick_bottom()
493
+ ax2a.set_ylabel("Number of audio clips")
494
+
495
+ ax3a.xaxis.set_ticks(np.arange(2*K, 3*K))
496
+ ax3a.xaxis.set_ticklabels(truncated_sorted_lbs[2*K:3*K], rotation=90, fontsize=fontsize)
497
+ ax3a.xaxis.tick_bottom()
498
+ ax3a.set_ylabel("Number of audio clips")
499
+
500
+ ax4a.xaxis.set_ticks(np.arange(3*K, N))
501
+ ax4a.xaxis.set_ticklabels(truncated_sorted_lbs[3*K:], rotation=90, fontsize=fontsize)
502
+ ax4a.xaxis.tick_bottom()
503
+ ax4a.set_ylabel("Number of audio clips")
504
+
505
+ ax1a.spines['right'].set_visible(False)
506
+ ax1b.spines['right'].set_visible(False)
507
+ ax2a.spines['left'].set_visible(False)
508
+ ax2b.spines['left'].set_visible(False)
509
+ ax2a.spines['right'].set_visible(False)
510
+ ax2b.spines['right'].set_visible(False)
511
+ ax3a.spines['left'].set_visible(False)
512
+ ax3b.spines['left'].set_visible(False)
513
+ ax3a.spines['right'].set_visible(False)
514
+ ax3b.spines['right'].set_visible(False)
515
+ ax4a.spines['left'].set_visible(False)
516
+ ax4b.spines['left'].set_visible(False)
517
+
518
+ plt.subplots_adjust(hspace = 0.8)
519
+
520
+ return ax1a, ax2a, ax3a, ax4a, ax1b, ax2b, ax3b, ax4b
521
+
522
+
523
+ def _scatter_4_rows(x, ax, ax2, ax3, ax4, s, c, marker='.', alpha=1.):
524
+ N = len(x)
525
+ ax.scatter(np.arange(N), x, s=s, c=c, marker=marker, alpha=alpha)
526
+ ax2.scatter(np.arange(N), x, s=s, c=c, marker=marker, alpha=alpha)
527
+ ax3.scatter(np.arange(N), x, s=s, c=c, marker=marker, alpha=alpha)
528
+ ax4.scatter(np.arange(N), x, s=s, c=c, marker=marker, alpha=alpha)
529
+
530
+
531
+ def _plot_4_rows(x, ax, ax2, ax3, ax4, c, linewidth=1.0, alpha=1.0, label=""):
532
+ N = len(x)
533
+ ax.plot(x, c=c, linewidth=linewidth, alpha=alpha)
534
+ ax2.plot(x, c=c, linewidth=linewidth, alpha=alpha)
535
+ ax3.plot(x, c=c, linewidth=linewidth, alpha=alpha)
536
+ line, = ax4.plot(x, c=c, linewidth=linewidth, alpha=alpha, label=label)
537
+ return line
538
+
539
+
540
+ if __name__ == '__main__':
541
+
542
+ parser = argparse.ArgumentParser(description='')
543
+ subparsers = parser.add_subparsers(dest='mode')
544
+
545
+ parser_classwise_iteration_map = subparsers.add_parser('plot_classwise_iteration_map')
546
+ parser_six_figures = subparsers.add_parser('plot_six_figures')
547
+ parser_complexity_map = subparsers.add_parser('plot_complexity_map')
548
+ parser_long_fig = subparsers.add_parser('plot_long_fig')
549
+
550
+ args = parser.parse_args()
551
+
552
+ if args.mode == 'plot_classwise_iteration_map':
553
+ plot_classwise_iteration_map(args)
554
+
555
+ elif args.mode == 'plot_six_figures':
556
+ plot_six_figures(args)
557
+
558
+ elif args.mode == 'plot_complexity_map':
559
+ plot_complexity_map(args)
560
+
561
+ elif args.mode == 'plot_long_fig':
562
+ plot_long_fig(args)
563
+
564
+ else:
565
+ raise Exception('Incorrect argument!')
audio_detection/audio_infer/utils/plot_statistics.py ADDED
The diff for this file is too large to render. See raw diff
audio_detection/audio_infer/utils/utilities.py ADDED
@@ -0,0 +1,172 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import logging
3
+ import h5py
4
+ import soundfile
5
+ import librosa
6
+ import numpy as np
7
+ import pandas as pd
8
+ from scipy import stats
9
+ import datetime
10
+ import pickle
11
+
12
+
13
+ def create_folder(fd):
14
+ if not os.path.exists(fd):
15
+ os.makedirs(fd)
16
+
17
+
18
+ def get_filename(path):
19
+ path = os.path.realpath(path)
20
+ na_ext = path.split('/')[-1]
21
+ na = os.path.splitext(na_ext)[0]
22
+ return na
23
+
24
+
25
+ def get_sub_filepaths(folder):
26
+ paths = []
27
+ for root, dirs, files in os.walk(folder):
28
+ for name in files:
29
+ path = os.path.join(root, name)
30
+ paths.append(path)
31
+ return paths
32
+
33
+
34
+ def create_logging(log_dir, filemode):
35
+ create_folder(log_dir)
36
+ i1 = 0
37
+
38
+ while os.path.isfile(os.path.join(log_dir, '{:04d}.log'.format(i1))):
39
+ i1 += 1
40
+
41
+ log_path = os.path.join(log_dir, '{:04d}.log'.format(i1))
42
+ logging.basicConfig(
43
+ level=logging.DEBUG,
44
+ format='%(asctime)s %(filename)s[line:%(lineno)d] %(levelname)s %(message)s',
45
+ datefmt='%a, %d %b %Y %H:%M:%S',
46
+ filename=log_path,
47
+ filemode=filemode)
48
+
49
+ # Print to console
50
+ console = logging.StreamHandler()
51
+ console.setLevel(logging.INFO)
52
+ formatter = logging.Formatter('%(name)-12s: %(levelname)-8s %(message)s')
53
+ console.setFormatter(formatter)
54
+ logging.getLogger('').addHandler(console)
55
+
56
+ return logging
57
+
58
+
59
+ def read_metadata(csv_path, classes_num, id_to_ix):
60
+ """Read metadata of AudioSet from a csv file.
61
+
62
+ Args:
63
+ csv_path: str
64
+
65
+ Returns:
66
+ meta_dict: {'audio_name': (audios_num,), 'target': (audios_num, classes_num)}
67
+ """
68
+
69
+ with open(csv_path, 'r') as fr:
70
+ lines = fr.readlines()
71
+ lines = lines[3:] # Remove heads
72
+
73
+ audios_num = len(lines)
74
+ targets = np.zeros((audios_num, classes_num), dtype=np.bool)
75
+ audio_names = []
76
+
77
+ for n, line in enumerate(lines):
78
+ items = line.split(', ')
79
+ """items: ['--4gqARaEJE', '0.000', '10.000', '"/m/068hy,/m/07q6cd_,/m/0bt9lr,/m/0jbk"\n']"""
80
+
81
+ audio_name = 'Y{}.wav'.format(items[0]) # Audios are started with an extra 'Y' when downloading
82
+ label_ids = items[3].split('"')[1].split(',')
83
+
84
+ audio_names.append(audio_name)
85
+
86
+ # Target
87
+ for id in label_ids:
88
+ ix = id_to_ix[id]
89
+ targets[n, ix] = 1
90
+
91
+ meta_dict = {'audio_name': np.array(audio_names), 'target': targets}
92
+ return meta_dict
93
+
94
+
95
+ def float32_to_int16(x):
96
+ assert np.max(np.abs(x)) <= 1.2
97
+ x = np.clip(x, -1, 1)
98
+ return (x * 32767.).astype(np.int16)
99
+
100
+ def int16_to_float32(x):
101
+ return (x / 32767.).astype(np.float32)
102
+
103
+
104
+ def pad_or_truncate(x, audio_length):
105
+ """Pad all audio to specific length."""
106
+ if len(x) <= audio_length:
107
+ return np.concatenate((x, np.zeros(audio_length - len(x))), axis=0)
108
+ else:
109
+ return x[0 : audio_length]
110
+
111
+
112
+ def d_prime(auc):
113
+ d_prime = stats.norm().ppf(auc) * np.sqrt(2.0)
114
+ return d_prime
115
+
116
+
117
+ class Mixup(object):
118
+ def __init__(self, mixup_alpha, random_seed=1234):
119
+ """Mixup coefficient generator.
120
+ """
121
+ self.mixup_alpha = mixup_alpha
122
+ self.random_state = np.random.RandomState(random_seed)
123
+
124
+ def get_lambda(self, batch_size):
125
+ """Get mixup random coefficients.
126
+ Args:
127
+ batch_size: int
128
+ Returns:
129
+ mixup_lambdas: (batch_size,)
130
+ """
131
+ mixup_lambdas = []
132
+ for n in range(0, batch_size, 2):
133
+ lam = self.random_state.beta(self.mixup_alpha, self.mixup_alpha, 1)[0]
134
+ mixup_lambdas.append(lam)
135
+ mixup_lambdas.append(1. - lam)
136
+
137
+ return np.array(mixup_lambdas)
138
+
139
+
140
+ class StatisticsContainer(object):
141
+ def __init__(self, statistics_path):
142
+ """Contain statistics of different training iterations.
143
+ """
144
+ self.statistics_path = statistics_path
145
+
146
+ self.backup_statistics_path = '{}_{}.pkl'.format(
147
+ os.path.splitext(self.statistics_path)[0],
148
+ datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S'))
149
+
150
+ self.statistics_dict = {'bal': [], 'test': []}
151
+
152
+ def append(self, iteration, statistics, data_type):
153
+ statistics['iteration'] = iteration
154
+ self.statistics_dict[data_type].append(statistics)
155
+
156
+ def dump(self):
157
+ pickle.dump(self.statistics_dict, open(self.statistics_path, 'wb'))
158
+ pickle.dump(self.statistics_dict, open(self.backup_statistics_path, 'wb'))
159
+ logging.info(' Dump statistics to {}'.format(self.statistics_path))
160
+ logging.info(' Dump statistics to {}'.format(self.backup_statistics_path))
161
+
162
+ def load_state_dict(self, resume_iteration):
163
+ self.statistics_dict = pickle.load(open(self.statistics_path, 'rb'))
164
+
165
+ resume_statistics_dict = {'bal': [], 'test': []}
166
+
167
+ for key in self.statistics_dict.keys():
168
+ for statistics in self.statistics_dict[key]:
169
+ if statistics['iteration'] <= resume_iteration:
170
+ resume_statistics_dict[key].append(statistics)
171
+
172
+ self.statistics_dict = resume_statistics_dict
audio_detection/target_sound_detection/src/__pycache__/models.cpython-38.pyc ADDED
Binary file (37.9 kB). View file
audio_detection/target_sound_detection/src/__pycache__/utils.cpython-38.pyc ADDED
Binary file (11.1 kB). View file
audio_detection/target_sound_detection/src/models.py ADDED
@@ -0,0 +1,1288 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # !/usr/bin/env python
2
+ # -*- coding: utf-8 -*-
3
+ # @Time : 2021/3/9 16:33
4
+ # @Author : dongchao yang
5
+ # @File : train.py
6
+ from itertools import zip_longest
7
+ import numpy as np
8
+ from scipy import ndimage
9
+ import torch
10
+ import torch.nn as nn
11
+ import torch.nn.functional as F
12
+ import time
13
+ from torchlibrosa.augmentation import SpecAugmentation
14
+ from torchlibrosa.stft import Spectrogram, LogmelFilterBank
15
+ import math
16
+ from sklearn.cluster import KMeans
17
+ import os
18
+ import time
19
+ from functools import partial
20
+ # import timm
21
+ # from timm.models.layers import DropPath, to_2tuple, trunc_normal_
22
+ import warnings
23
+ from functools import partial
24
+ # from timm.models.registry import register_model
25
+ # from timm.models.vision_transformer import _cfg
26
+ # from mmdet.utils import get_root_logger
27
+ # from mmcv.runner import load_checkpoint
28
+ # from mmcv.runner import _load_checkpoint, load_state_dict
29
+ # import mmcv.runner
30
+ import copy
31
+ from collections import OrderedDict
32
+ import io
33
+ import re
34
+ DEBUG=0
35
+ event_labels = ['Alarm', 'Alarm_clock', 'Animal', 'Applause', 'Arrow', 'Artillery_fire',
36
+ 'Babbling', 'Baby_laughter', 'Bark', 'Basketball_bounce', 'Battle_cry',
37
+ 'Bell', 'Bird', 'Bleat', 'Bouncing', 'Breathing', 'Buzz', 'Camera',
38
+ 'Cap_gun', 'Car', 'Car_alarm', 'Cat', 'Caw', 'Cheering', 'Child_singing',
39
+ 'Choir', 'Chop', 'Chopping_(food)', 'Clapping', 'Clickety-clack', 'Clicking',
40
+ 'Clip-clop', 'Cluck', 'Coin_(dropping)', 'Computer_keyboard', 'Conversation',
41
+ 'Coo', 'Cough', 'Cowbell', 'Creak', 'Cricket', 'Croak', 'Crow', 'Crowd', 'DTMF',
42
+ 'Dog', 'Door', 'Drill', 'Drip', 'Engine', 'Engine_starting', 'Explosion', 'Fart',
43
+ 'Female_singing', 'Filing_(rasp)', 'Finger_snapping', 'Fire', 'Fire_alarm', 'Firecracker',
44
+ 'Fireworks', 'Frog', 'Gasp', 'Gears', 'Giggle', 'Glass', 'Glass_shatter', 'Gobble', 'Groan',
45
+ 'Growling', 'Hammer', 'Hands', 'Hiccup', 'Honk', 'Hoot', 'Howl', 'Human_sounds', 'Human_voice',
46
+ 'Insect', 'Laughter', 'Liquid', 'Machine_gun', 'Male_singing', 'Mechanisms', 'Meow', 'Moo',
47
+ 'Motorcycle', 'Mouse', 'Music', 'Oink', 'Owl', 'Pant', 'Pant_(dog)', 'Patter', 'Pig', 'Plop',
48
+ 'Pour', 'Power_tool', 'Purr', 'Quack', 'Radio', 'Rain_on_surface', 'Rapping', 'Rattle',
49
+ 'Reversing_beeps', 'Ringtone', 'Roar', 'Run', 'Rustle', 'Scissors', 'Scrape', 'Scratch',
50
+ 'Screaming', 'Sewing_machine', 'Shout', 'Shuffle', 'Shuffling_cards', 'Singing',
51
+ 'Single-lens_reflex_camera', 'Siren', 'Skateboard', 'Sniff', 'Snoring', 'Speech',
52
+ 'Speech_synthesizer', 'Spray', 'Squeak', 'Squeal', 'Steam', 'Stir', 'Surface_contact',
53
+ 'Tap', 'Tap_dance', 'Telephone_bell_ringing', 'Television', 'Tick', 'Tick-tock', 'Tools',
54
+ 'Train', 'Train_horn', 'Train_wheels_squealing', 'Truck', 'Turkey', 'Typewriter', 'Typing',
55
+ 'Vehicle', 'Video_game_sound', 'Water', 'Whimper_(dog)', 'Whip', 'Whispering', 'Whistle',
56
+ 'Whistling', 'Whoop', 'Wind', 'Writing', 'Yip', 'and_pans', 'bird_song', 'bleep', 'clink',
57
+ 'cock-a-doodle-doo', 'crinkling', 'dove', 'dribble', 'eructation', 'faucet', 'flapping_wings',
58
+ 'footsteps', 'gunfire', 'heartbeat', 'infant_cry', 'kid_speaking', 'man_speaking', 'mastication',
59
+ 'mice', 'river', 'rooster', 'silverware', 'skidding', 'smack', 'sobbing', 'speedboat', 'splatter',
60
+ 'surf', 'thud', 'thwack', 'toot', 'truck_horn', 'tweet', 'vroom', 'waterfowl', 'woman_speaking']
61
+ def load_checkpoint(model,
62
+ filename,
63
+ map_location=None,
64
+ strict=False,
65
+ logger=None,
66
+ revise_keys=[(r'^module\.', '')]):
67
+ """Load checkpoint from a file or URI.
68
+ Args:
69
+ model (Module): Module to load checkpoint.
70
+ filename (str): Accept local filepath, URL, ``torchvision://xxx``,
71
+ ``open-mmlab://xxx``. Please refer to ``docs/model_zoo.md`` for
72
+ details.
73
+ map_location (str): Same as :func:`torch.load`.
74
+ strict (bool): Whether to allow different params for the model and
75
+ checkpoint.
76
+ logger (:mod:`logging.Logger` or None): The logger for error message.
77
+ revise_keys (list): A list of customized keywords to modify the
78
+ state_dict in checkpoint. Each item is a (pattern, replacement)
79
+ pair of the regular expression operations. Default: strip
80
+ the prefix 'module.' by [(r'^module\\.', '')].
81
+ Returns:
82
+ dict or OrderedDict: The loaded checkpoint.
83
+ """
84
+
85
+ checkpoint = _load_checkpoint(filename, map_location, logger)
86
+ '''
87
+ new_proj = torch.nn.Conv2d(1, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
88
+ new_proj.weight = torch.nn.Parameter(torch.sum(checkpoint['patch_embed1.proj.weight'], dim=1).unsqueeze(1))
89
+ checkpoint['patch_embed1.proj.weight'] = new_proj.weight
90
+ new_proj.weight = torch.nn.Parameter(torch.sum(checkpoint['patch_embed1.proj.weight'], dim=2).unsqueeze(2).repeat(1,1,3,1))
91
+ checkpoint['patch_embed1.proj.weight'] = new_proj.weight
92
+ new_proj.weight = torch.nn.Parameter(torch.sum(checkpoint['patch_embed1.proj.weight'], dim=3).unsqueeze(3).repeat(1,1,1,3))
93
+ checkpoint['patch_embed1.proj.weight'] = new_proj.weight
94
+ '''
95
+ new_proj = torch.nn.Conv2d(1, 64, kernel_size=(7, 7), stride=(4, 4), padding=(2, 2))
96
+ new_proj.weight = torch.nn.Parameter(torch.sum(checkpoint['patch_embed1.proj.weight'], dim=1).unsqueeze(1))
97
+ checkpoint['patch_embed1.proj.weight'] = new_proj.weight
98
+ # OrderedDict is a subclass of dict
99
+ if not isinstance(checkpoint, dict):
100
+ raise RuntimeError(
101
+ f'No state_dict found in checkpoint file {filename}')
102
+ # get state_dict from checkpoint
103
+ if 'state_dict' in checkpoint:
104
+ state_dict = checkpoint['state_dict']
105
+ else:
106
+ state_dict = checkpoint
107
+
108
+ # strip prefix of state_dict
109
+ metadata = getattr(state_dict, '_metadata', OrderedDict())
110
+ for p, r in revise_keys:
111
+ state_dict = OrderedDict(
112
+ {re.sub(p, r, k): v
113
+ for k, v in state_dict.items()})
114
+ state_dict = OrderedDict({k.replace('backbone.',''):v for k,v in state_dict.items()})
115
+ # Keep metadata in state_dict
116
+ state_dict._metadata = metadata
117
+
118
+ # load state_dict
119
+ load_state_dict(model, state_dict, strict, logger)
120
+ return checkpoint
121
+
122
+ def init_weights(m):
123
+ if isinstance(m, (nn.Conv2d, nn.Conv1d)):
124
+ nn.init.kaiming_normal_(m.weight)
125
+ if m.bias is not None:
126
+ nn.init.constant_(m.bias, 0)
127
+ elif isinstance(m, nn.BatchNorm2d):
128
+ nn.init.constant_(m.weight, 1)
129
+ if m.bias is not None:
130
+ nn.init.constant_(m.bias, 0)
131
+ if isinstance(m, nn.Linear):
132
+ nn.init.kaiming_uniform_(m.weight)
133
+ if m.bias is not None:
134
+ nn.init.constant_(m.bias, 0)
135
+ def init_layer(layer):
136
+ """Initialize a Linear or Convolutional layer. """
137
+ nn.init.xavier_uniform_(layer.weight)
138
+ if hasattr(layer, 'bias'):
139
+ if layer.bias is not None:
140
+ layer.bias.data.fill_(0.)
141
+
142
+
143
+ def init_bn(bn):
144
+ """Initialize a Batchnorm layer. """
145
+ bn.bias.data.fill_(0.)
146
+ bn.weight.data.fill_(1.)
147
+
148
+ class MaxPool(nn.Module):
149
+ def __init__(self, pooldim=1):
150
+ super().__init__()
151
+ self.pooldim = pooldim
152
+
153
+ def forward(self, logits, decision):
154
+ return torch.max(decision, dim=self.pooldim)[0]
155
+
156
+
157
+ class LinearSoftPool(nn.Module):
158
+ """LinearSoftPool
159
+ Linear softmax, takes logits and returns a probability, near to the actual maximum value.
160
+ Taken from the paper:
161
+ A Comparison of Five Multiple Instance Learning Pooling Functions for Sound Event Detection with Weak Labeling
162
+ https://arxiv.org/abs/1810.09050
163
+ """
164
+ def __init__(self, pooldim=1):
165
+ super().__init__()
166
+ self.pooldim = pooldim
167
+
168
+ def forward(self, logits, time_decision):
169
+ return (time_decision**2).sum(self.pooldim) / (time_decision.sum(
170
+ self.pooldim)+1e-7)
171
+
172
+ class ConvBlock(nn.Module):
173
+ def __init__(self, in_channels, out_channels):
174
+
175
+ super(ConvBlock, self).__init__()
176
+
177
+ self.conv1 = nn.Conv2d(in_channels=in_channels,
178
+ out_channels=out_channels,
179
+ kernel_size=(3, 3), stride=(1, 1),
180
+ padding=(1, 1), bias=False)
181
+
182
+ self.conv2 = nn.Conv2d(in_channels=out_channels,
183
+ out_channels=out_channels,
184
+ kernel_size=(3, 3), stride=(1, 1),
185
+ padding=(1, 1), bias=False)
186
+
187
+ self.bn1 = nn.BatchNorm2d(out_channels)
188
+ self.bn2 = nn.BatchNorm2d(out_channels)
189
+
190
+ self.init_weight()
191
+
192
+ def init_weight(self):
193
+ init_layer(self.conv1)
194
+ init_layer(self.conv2)
195
+ init_bn(self.bn1)
196
+ init_bn(self.bn2)
197
+
198
+
199
+ def forward(self, input, pool_size=(2, 2), pool_type='avg'):
200
+
201
+ x = input
202
+ x = F.relu_(self.bn1(self.conv1(x)))
203
+ x = F.relu_(self.bn2(self.conv2(x)))
204
+ if pool_type == 'max':
205
+ x = F.max_pool2d(x, kernel_size=pool_size)
206
+ elif pool_type == 'avg':
207
+ x = F.avg_pool2d(x, kernel_size=pool_size)
208
+ elif pool_type == 'avg+max':
209
+ x1 = F.avg_pool2d(x, kernel_size=pool_size)
210
+ x2 = F.max_pool2d(x, kernel_size=pool_size)
211
+ x = x1 + x2
212
+ else:
213
+ raise Exception('Incorrect argument!')
214
+
215
+ return x
216
+
217
+ class ConvBlock_GLU(nn.Module):
218
+ def __init__(self, in_channels, out_channels,kernel_size=(3,3)):
219
+ super(ConvBlock_GLU, self).__init__()
220
+ self.conv1 = nn.Conv2d(in_channels=in_channels,
221
+ out_channels=out_channels,
222
+ kernel_size=kernel_size, stride=(1, 1),
223
+ padding=(1, 1), bias=False)
224
+ self.bn1 = nn.BatchNorm2d(out_channels)
225
+ self.sigmoid = nn.Sigmoid()
226
+ self.init_weight()
227
+
228
+ def init_weight(self):
229
+ init_layer(self.conv1)
230
+ init_bn(self.bn1)
231
+
232
+ def forward(self, input, pool_size=(2, 2), pool_type='avg'):
233
+ x = input
234
+ x = self.bn1(self.conv1(x))
235
+ cnn1 = self.sigmoid(x[:, :x.shape[1]//2, :, :])
236
+ cnn2 = x[:,x.shape[1]//2:,:,:]
237
+ x = cnn1*cnn2
238
+ if pool_type == 'max':
239
+ x = F.max_pool2d(x, kernel_size=pool_size)
240
+ elif pool_type == 'avg':
241
+ x = F.avg_pool2d(x, kernel_size=pool_size)
242
+ elif pool_type == 'avg+max':
243
+ x1 = F.avg_pool2d(x, kernel_size=pool_size)
244
+ x2 = F.max_pool2d(x, kernel_size=pool_size)
245
+ x = x1 + x2
246
+ elif pool_type == 'None':
247
+ pass
248
+ elif pool_type == 'LP':
249
+ pass
250
+ #nn.LPPool2d(4, pool_size)
251
+ else:
252
+ raise Exception('Incorrect argument!')
253
+ return x
254
+
255
+ class Mul_scale_GLU(nn.Module):
256
+ def __init__(self):
257
+ super(Mul_scale_GLU,self).__init__()
258
+ self.conv_block1_1 = ConvBlock_GLU(in_channels=1, out_channels=64,kernel_size=(1,1)) # 1*1
259
+ self.conv_block1_2 = ConvBlock_GLU(in_channels=1, out_channels=64,kernel_size=(3,3)) # 3*3
260
+ self.conv_block1_3 = ConvBlock_GLU(in_channels=1, out_channels=64,kernel_size=(5,5)) # 5*5
261
+ self.conv_block2 = ConvBlock_GLU(in_channels=96, out_channels=128*2)
262
+ # self.conv_block3 = ConvBlock(in_channels=64, out_channels=128)
263
+ self.conv_block3 = ConvBlock_GLU(in_channels=128, out_channels=128*2)
264
+ self.conv_block4 = ConvBlock_GLU(in_channels=128, out_channels=256*2)
265
+ self.conv_block5 = ConvBlock_GLU(in_channels=256, out_channels=256*2)
266
+ self.conv_block6 = ConvBlock_GLU(in_channels=256, out_channels=512*2)
267
+ self.conv_block7 = ConvBlock_GLU(in_channels=512, out_channels=512*2)
268
+ self.padding = nn.ReplicationPad2d((0,1,0,1))
269
+
270
+ def forward(self, input, fi=None):
271
+ """
272
+ Input: (batch_size, data_length)"""
273
+ x1 = self.conv_block1_1(input, pool_size=(2, 2), pool_type='avg')
274
+ x1 = x1[:,:,:500,:32]
275
+ #print('x1 ',x1.shape)
276
+ x2 = self.conv_block1_2(input,pool_size=(2,2),pool_type='avg')
277
+ #print('x2 ',x2.shape)
278
+ x3 = self.conv_block1_3(input,pool_size=(2,2),pool_type='avg')
279
+ x3 = self.padding(x3)
280
+ #print('x3 ',x3.shape)
281
+ # assert 1==2
282
+ x = torch.cat([x1,x2],dim=1)
283
+ x = torch.cat([x,x3],dim=1)
284
+ #print('x ',x.shape)
285
+ x = self.conv_block2(x, pool_size=(2, 2), pool_type='None')
286
+ x = self.conv_block3(x,pool_size=(2,2),pool_type='avg')
287
+ x = F.dropout(x, p=0.2, training=self.training) #
288
+ #print('x2,3 ',x.shape)
289
+ x = self.conv_block4(x, pool_size=(2, 4), pool_type='None')
290
+ x = self.conv_block5(x,pool_size=(2,4),pool_type='avg')
291
+ x = F.dropout(x, p=0.2, training=self.training)
292
+ #print('x4,5 ',x.shape)
293
+
294
+ x = self.conv_block6(x, pool_size=(1, 4), pool_type='None')
295
+ x = self.conv_block7(x, pool_size=(1, 4), pool_type='avg')
296
+ x = F.dropout(x, p=0.2, training=self.training)
297
+ # print('x6,7 ',x.shape)
298
+ # assert 1==2
299
+ return x
300
+
301
+ class Cnn14(nn.Module):
302
+ def __init__(self, sample_rate=32000, window_size=1024, hop_size=320, mel_bins=64, fmin=50,
303
+ fmax=14000, classes_num=527):
304
+
305
+ super(Cnn14, self).__init__()
306
+
307
+ window = 'hann'
308
+ center = True
309
+ pad_mode = 'reflect'
310
+ ref = 1.0
311
+ amin = 1e-10
312
+ top_db = None
313
+
314
+ # Spectrogram extractor
315
+ self.spectrogram_extractor = Spectrogram(n_fft=window_size, hop_length=hop_size,
316
+ win_length=window_size, window=window, center=center, pad_mode=pad_mode,
317
+ freeze_parameters=True)
318
+
319
+ # Logmel feature extractor
320
+ self.logmel_extractor = LogmelFilterBank(sr=sample_rate, n_fft=window_size,
321
+ n_mels=mel_bins, fmin=fmin, fmax=fmax, ref=ref, amin=amin, top_db=top_db,
322
+ freeze_parameters=True)
323
+
324
+ # Spec augmenter
325
+ self.spec_augmenter = SpecAugmentation(time_drop_width=64, time_stripes_num=2,
326
+ freq_drop_width=8, freq_stripes_num=2)
327
+
328
+ self.bn0 = nn.BatchNorm2d(64)
329
+
330
+ self.conv_block1 = ConvBlock(in_channels=1, out_channels=64)
331
+ self.conv_block2 = ConvBlock(in_channels=64, out_channels=128)
332
+ self.conv_block3 = ConvBlock(in_channels=128, out_channels=256)
333
+ self.conv_block4 = ConvBlock(in_channels=256, out_channels=512)
334
+ self.conv_block5 = ConvBlock(in_channels=512, out_channels=1024)
335
+ self.conv_block6 = ConvBlock(in_channels=1024, out_channels=2048)
336
+
337
+ self.fc1 = nn.Linear(2048, 128, bias=True)
338
+ self.fc_audioset = nn.Linear(128, classes_num, bias=True)
339
+
340
+ self.init_weight()
341
+
342
+ def init_weight(self):
343
+ init_layer(self.fc1)
344
+ init_layer(self.fc_audioset)
345
+
346
+ def forward(self, input_, mixup_lambda=None):
347
+ """
348
+ Input: (batch_size, data_length)"""
349
+ input_ = input_.unsqueeze(1)
350
+ x = self.conv_block1(input_, pool_size=(2, 2), pool_type='avg')
351
+ x = F.dropout(x, p=0.2, training=self.training)
352
+ x = self.conv_block2(x, pool_size=(2, 2), pool_type='avg')
353
+ x = F.dropout(x, p=0.2, training=self.training)
354
+ x = self.conv_block3(x, pool_size=(2, 2), pool_type='avg')
355
+ x = F.dropout(x, p=0.2, training=self.training)
356
+ x = self.conv_block4(x, pool_size=(1, 2), pool_type='avg')
357
+ x = F.dropout(x, p=0.2, training=self.training)
358
+ x = self.conv_block5(x, pool_size=(1, 2), pool_type='avg')
359
+ x = F.dropout(x, p=0.2, training=self.training)
360
+ x = self.conv_block6(x, pool_size=(1, 2), pool_type='avg')
361
+ x = F.dropout(x, p=0.2, training=self.training)
362
+ # print(x.shape)
363
+ # x = torch.mean(x, dim=3)
364
+ x = x.transpose(1, 2).contiguous().flatten(-2)
365
+ x = self.fc1(x)
366
+ # print(x.shape)
367
+ # assert 1==2
368
+ # (x1,_) = torch.max(x, dim=2)
369
+ # x2 = torch.mean(x, dim=2)
370
+ # x = x1 + x2
371
+ # x = F.dropout(x, p=0.5, training=self.training)
372
+ # x = F.relu_(self.fc1(x))
373
+ # embedding = F.dropout(x, p=0.5, training=self.training)
374
+ return x
375
+
376
+ class Cnn10_fi(nn.Module):
377
+ def __init__(self):
378
+ super(Cnn10_fi, self).__init__()
379
+ self.conv_block1 = ConvBlock(in_channels=1, out_channels=64)
380
+ self.conv_block2 = ConvBlock(in_channels=64, out_channels=128)
381
+ self.conv_block3 = ConvBlock(in_channels=128, out_channels=256)
382
+ self.conv_block4 = ConvBlock(in_channels=256, out_channels=512)
383
+
384
+ # self.fc1 = nn.Linear(512, 512, bias=True)
385
+ # self.fc_audioset = nn.Linear(512, classes_num, bias=True)
386
+
387
+ # self.init_weight()
388
+
389
+ def forward(self, input, fi=None):
390
+ """
391
+ Input: (batch_size, data_length)"""
392
+
393
+ x = self.conv_block1(input, pool_size=(2, 2), pool_type='avg')
394
+ if fi != None:
395
+ gamma = fi[:,0].unsqueeze(1).unsqueeze(2).unsqueeze(3).expand_as(x)
396
+ beta = fi[:,1].unsqueeze(1).unsqueeze(2).unsqueeze(3).expand_as(x)
397
+ x = (gamma)*x + beta
398
+ x = F.dropout(x, p=0.2, training=self.training)
399
+ x = self.conv_block2(x, pool_size=(2, 2), pool_type='avg')
400
+ if fi != None:
401
+ gamma = fi[:,0].unsqueeze(1).unsqueeze(2).unsqueeze(3).expand_as(x)
402
+ beta = fi[:,1].unsqueeze(1).unsqueeze(2).unsqueeze(3).expand_as(x)
403
+ x = (gamma)*x + beta
404
+ x = F.dropout(x, p=0.2, training=self.training)
405
+ x = self.conv_block3(x, pool_size=(2, 4), pool_type='avg')
406
+ if fi != None:
407
+ gamma = fi[:,0].unsqueeze(1).unsqueeze(2).unsqueeze(3).expand_as(x)
408
+ beta = fi[:,1].unsqueeze(1).unsqueeze(2).unsqueeze(3).expand_as(x)
409
+ x = (gamma)*x + beta
410
+ x = F.dropout(x, p=0.2, training=self.training)
411
+ x = self.conv_block4(x, pool_size=(1, 4), pool_type='avg')
412
+ if fi != None:
413
+ gamma = fi[:,0].unsqueeze(1).unsqueeze(2).unsqueeze(3).expand_as(x)
414
+ beta = fi[:,1].unsqueeze(1).unsqueeze(2).unsqueeze(3).expand_as(x)
415
+ x = (gamma)*x + beta
416
+ x = F.dropout(x, p=0.2, training=self.training)
417
+ return x
418
+
419
+ class Cnn10_mul_scale(nn.Module):
420
+ def __init__(self,scale=8):
421
+ super(Cnn10_mul_scale, self).__init__()
422
+ self.conv_block1_1 = ConvBlock_GLU(in_channels=1, out_channels=64,kernel_size=(1,1))
423
+ self.conv_block1_2 = ConvBlock_GLU(in_channels=1, out_channels=64,kernel_size=(3,3))
424
+ self.conv_block1_3 = ConvBlock_GLU(in_channels=1, out_channels=64,kernel_size=(5,5))
425
+ self.conv_block2 = ConvBlock(in_channels=96, out_channels=128)
426
+ self.conv_block3 = ConvBlock(in_channels=128, out_channels=256)
427
+ self.conv_block4 = ConvBlock(in_channels=256, out_channels=512)
428
+ self.scale = scale
429
+ self.padding = nn.ReplicationPad2d((0,1,0,1))
430
+ def forward(self, input, pool_size=(2, 2), pool_type='avg'):
431
+ """
432
+ Input: (batch_size, data_length)"""
433
+ if self.scale == 8:
434
+ pool_size1 = (2,2)
435
+ pool_size2 = (2,2)
436
+ pool_size3 = (2,4)
437
+ pool_size4 = (1,4)
438
+ elif self.scale == 4:
439
+ pool_size1 = (2,2)
440
+ pool_size2 = (2,2)
441
+ pool_size3 = (1,4)
442
+ pool_size4 = (1,4)
443
+ elif self.scale == 2:
444
+ pool_size1 = (2,2)
445
+ pool_size2 = (1,2)
446
+ pool_size3 = (1,4)
447
+ pool_size4 = (1,4)
448
+ else:
449
+ pool_size1 = (1,2)
450
+ pool_size2 = (1,2)
451
+ pool_size3 = (1,4)
452
+ pool_size4 = (1,4)
453
+ # print('input ',input.shape)
454
+ x1 = self.conv_block1_1(input, pool_size=pool_size1, pool_type='avg')
455
+ x1 = x1[:,:,:500,:32]
456
+ #print('x1 ',x1.shape)
457
+ x2 = self.conv_block1_2(input, pool_size=pool_size1, pool_type='avg')
458
+ #print('x2 ',x2.shape)
459
+ x3 = self.conv_block1_3(input, pool_size=pool_size1, pool_type='avg')
460
+ x3 = self.padding(x3)
461
+ #print('x3 ',x3.shape)
462
+ # assert 1==2
463
+ m_i = min(x3.shape[2],min(x1.shape[2],x2.shape[2]))
464
+ #print('m_i ', m_i)
465
+ x = torch.cat([x1[:,:,:m_i,:],x2[:,:, :m_i,:],x3[:,:, :m_i,:]],dim=1)
466
+ # x = torch.cat([x,x3],dim=1)
467
+
468
+ # x = self.conv_block1(input, pool_size=pool_size1, pool_type='avg')
469
+ x = F.dropout(x, p=0.2, training=self.training)
470
+ x = self.conv_block2(x, pool_size=pool_size2, pool_type='avg')
471
+ x = F.dropout(x, p=0.2, training=self.training)
472
+ x = self.conv_block3(x, pool_size=pool_size3, pool_type='avg')
473
+ x = F.dropout(x, p=0.2, training=self.training)
474
+ x = self.conv_block4(x, pool_size=pool_size4, pool_type='avg')
475
+ x = F.dropout(x, p=0.2, training=self.training)
476
+ return x
477
+
478
+
479
+ class Cnn10(nn.Module):
480
+ def __init__(self,scale=8):
481
+ super(Cnn10, self).__init__()
482
+ self.conv_block1 = ConvBlock(in_channels=1, out_channels=64)
483
+ self.conv_block2 = ConvBlock(in_channels=64, out_channels=128)
484
+ self.conv_block3 = ConvBlock(in_channels=128, out_channels=256)
485
+ self.conv_block4 = ConvBlock(in_channels=256, out_channels=512)
486
+ self.scale = scale
487
+ def forward(self, input, pool_size=(2, 2), pool_type='avg'):
488
+ """
489
+ Input: (batch_size, data_length)"""
490
+ if self.scale == 8:
491
+ pool_size1 = (2,2)
492
+ pool_size2 = (2,2)
493
+ pool_size3 = (2,4)
494
+ pool_size4 = (1,4)
495
+ elif self.scale == 4:
496
+ pool_size1 = (2,2)
497
+ pool_size2 = (2,2)
498
+ pool_size3 = (1,4)
499
+ pool_size4 = (1,4)
500
+ elif self.scale == 2:
501
+ pool_size1 = (2,2)
502
+ pool_size2 = (1,2)
503
+ pool_size3 = (1,4)
504
+ pool_size4 = (1,4)
505
+ else:
506
+ pool_size1 = (1,2)
507
+ pool_size2 = (1,2)
508
+ pool_size3 = (1,4)
509
+ pool_size4 = (1,4)
510
+ x = self.conv_block1(input, pool_size=pool_size1, pool_type='avg')
511
+ x = F.dropout(x, p=0.2, training=self.training)
512
+ x = self.conv_block2(x, pool_size=pool_size2, pool_type='avg')
513
+ x = F.dropout(x, p=0.2, training=self.training)
514
+ x = self.conv_block3(x, pool_size=pool_size3, pool_type='avg')
515
+ x = F.dropout(x, p=0.2, training=self.training)
516
+ x = self.conv_block4(x, pool_size=pool_size4, pool_type='avg')
517
+ x = F.dropout(x, p=0.2, training=self.training)
518
+ return x
519
+
520
+ class MeanPool(nn.Module):
521
+ def __init__(self, pooldim=1):
522
+ super().__init__()
523
+ self.pooldim = pooldim
524
+
525
+ def forward(self, logits, decision):
526
+ return torch.mean(decision, dim=self.pooldim)
527
+
528
+ class ResPool(nn.Module):
529
+ def __init__(self, pooldim=1):
530
+ super().__init__()
531
+ self.pooldim = pooldim
532
+ self.linPool = LinearSoftPool(pooldim=1)
533
+
534
+ class AutoExpPool(nn.Module):
535
+ def __init__(self, outputdim=10, pooldim=1):
536
+ super().__init__()
537
+ self.outputdim = outputdim
538
+ self.alpha = nn.Parameter(torch.full((outputdim, ), 1))
539
+ self.pooldim = pooldim
540
+
541
+ def forward(self, logits, decision):
542
+ scaled = self.alpha * decision # \alpha * P(Y|x) in the paper
543
+ return (logits * torch.exp(scaled)).sum(
544
+ self.pooldim) / torch.exp(scaled).sum(self.pooldim)
545
+
546
+
547
+ class SoftPool(nn.Module):
548
+ def __init__(self, T=1, pooldim=1):
549
+ super().__init__()
550
+ self.pooldim = pooldim
551
+ self.T = T
552
+
553
+ def forward(self, logits, decision):
554
+ w = torch.softmax(decision / self.T, dim=self.pooldim)
555
+ return torch.sum(decision * w, dim=self.pooldim)
556
+
557
+
558
+ class AutoPool(nn.Module):
559
+ """docstring for AutoPool"""
560
+ def __init__(self, outputdim=10, pooldim=1):
561
+ super().__init__()
562
+ self.outputdim = outputdim
563
+ self.alpha = nn.Parameter(torch.ones(outputdim))
564
+ self.dim = pooldim
565
+
566
+ def forward(self, logits, decision):
567
+ scaled = self.alpha * decision # \alpha * P(Y|x) in the paper
568
+ weight = torch.softmax(scaled, dim=self.dim)
569
+ return torch.sum(decision * weight, dim=self.dim) # B x C
570
+
571
+
572
+ class ExtAttentionPool(nn.Module):
573
+ def __init__(self, inputdim, outputdim=10, pooldim=1, **kwargs):
574
+ super().__init__()
575
+ self.inputdim = inputdim
576
+ self.outputdim = outputdim
577
+ self.pooldim = pooldim
578
+ self.attention = nn.Linear(inputdim, outputdim)
579
+ nn.init.zeros_(self.attention.weight)
580
+ nn.init.zeros_(self.attention.bias)
581
+ self.activ = nn.Softmax(dim=self.pooldim)
582
+
583
+ def forward(self, logits, decision):
584
+ # Logits of shape (B, T, D), decision of shape (B, T, C)
585
+ w_x = self.activ(self.attention(logits) / self.outputdim)
586
+ h = (logits.permute(0, 2, 1).contiguous().unsqueeze(-2) *
587
+ w_x.unsqueeze(-1)).flatten(-2).contiguous()
588
+ return torch.sum(h, self.pooldim)
589
+
590
+
591
+ class AttentionPool(nn.Module):
592
+ """docstring for AttentionPool"""
593
+ def __init__(self, inputdim, outputdim=10, pooldim=1, **kwargs):
594
+ super().__init__()
595
+ self.inputdim = inputdim
596
+ self.outputdim = outputdim
597
+ self.pooldim = pooldim
598
+ self.transform = nn.Linear(inputdim, outputdim)
599
+ self.activ = nn.Softmax(dim=self.pooldim)
600
+ self.eps = 1e-7
601
+
602
+ def forward(self, logits, decision):
603
+ # Input is (B, T, D)
604
+ # B, T , D
605
+ w = self.activ(torch.clamp(self.transform(logits), -15, 15))
606
+ detect = (decision * w).sum(
607
+ self.pooldim) / (w.sum(self.pooldim) + self.eps)
608
+ # B, T, D
609
+ return detect
610
+
611
+ class Block2D(nn.Module):
612
+ def __init__(self, cin, cout, kernel_size=3, padding=1):
613
+ super().__init__()
614
+ self.block = nn.Sequential(
615
+ nn.BatchNorm2d(cin),
616
+ nn.Conv2d(cin,
617
+ cout,
618
+ kernel_size=kernel_size,
619
+ padding=padding,
620
+ bias=False),
621
+ nn.LeakyReLU(inplace=True, negative_slope=0.1))
622
+
623
+ def forward(self, x):
624
+ return self.block(x)
625
+
626
+ class AudioCNN(nn.Module):
627
+ def __init__(self, classes_num):
628
+ super(AudioCNN, self).__init__()
629
+ self.conv_block1 = ConvBlock(in_channels=1, out_channels=64)
630
+ self.conv_block2 = ConvBlock(in_channels=64, out_channels=128)
631
+ self.conv_block3 = ConvBlock(in_channels=128, out_channels=256)
632
+ self.conv_block4 = ConvBlock(in_channels=256, out_channels=512)
633
+ self.fc1 = nn.Linear(512,128,bias=True)
634
+ self.fc = nn.Linear(128, classes_num, bias=True)
635
+ self.init_weights()
636
+
637
+ def init_weights(self):
638
+ init_layer(self.fc)
639
+
640
+ def forward(self, input):
641
+ '''
642
+ Input: (batch_size, times_steps, freq_bins)'''
643
+ # [128, 801, 168] --> [128,1,801,168]
644
+ x = input[:, None, :, :]
645
+ '''(batch_size, 1, times_steps, freq_bins)'''
646
+ x = self.conv_block1(x, pool_size=(2, 2), pool_type='avg') # 128,64,400,84
647
+ x = self.conv_block2(x, pool_size=(2, 2), pool_type='avg') # 128,128,200,42
648
+ x = self.conv_block3(x, pool_size=(2, 2), pool_type='avg') # 128,256,100,21
649
+ x = self.conv_block4(x, pool_size=(2, 2), pool_type='avg') # 128,512,50,10
650
+ '''(batch_size, feature_maps, time_steps, freq_bins)'''
651
+ x = torch.mean(x, dim=3) # (batch_size, feature_maps, time_stpes) # 128,512,50
652
+ (x, _) = torch.max(x, dim=2) # (batch_size, feature_maps) 128,512
653
+ x = self.fc1(x) # 128,128
654
+ output = self.fc(x) # 128,10
655
+ return x,output
656
+
657
+ def extract(self,input):
658
+ '''Input: (batch_size, times_steps, freq_bins)'''
659
+ x = input[:, None, :, :]
660
+ x = self.conv_block1(x, pool_size=(2, 2), pool_type='avg')
661
+ x = self.conv_block2(x, pool_size=(2, 2), pool_type='avg')
662
+ x = self.conv_block3(x, pool_size=(2, 2), pool_type='avg')
663
+ x = self.conv_block4(x, pool_size=(2, 2), pool_type='avg')
664
+ '''(batch_size, feature_maps, time_steps, freq_bins)'''
665
+ x = torch.mean(x, dim=3) # (batch_size, feature_maps, time_stpes)
666
+ (x, _) = torch.max(x, dim=2) # (batch_size, feature_maps)
667
+ x = self.fc1(x) # 128,128
668
+ return x
669
+
670
+ def parse_poolingfunction(poolingfunction_name='mean', **kwargs):
671
+ """parse_poolingfunction
672
+ A heler function to parse any temporal pooling
673
+ Pooling is done on dimension 1
674
+ :param poolingfunction_name:
675
+ :param **kwargs:
676
+ """
677
+ poolingfunction_name = poolingfunction_name.lower()
678
+ if poolingfunction_name == 'mean':
679
+ return MeanPool(pooldim=1)
680
+ elif poolingfunction_name == 'max':
681
+ return MaxPool(pooldim=1)
682
+ elif poolingfunction_name == 'linear':
683
+ return LinearSoftPool(pooldim=1)
684
+ elif poolingfunction_name == 'expalpha':
685
+ return AutoExpPool(outputdim=kwargs['outputdim'], pooldim=1)
686
+
687
+ elif poolingfunction_name == 'soft':
688
+ return SoftPool(pooldim=1)
689
+ elif poolingfunction_name == 'auto':
690
+ return AutoPool(outputdim=kwargs['outputdim'])
691
+ elif poolingfunction_name == 'attention':
692
+ return AttentionPool(inputdim=kwargs['inputdim'],
693
+ outputdim=kwargs['outputdim'])
694
+ class conv1d(nn.Module):
695
+ def __init__(self, nin, nout, kernel_size=3, stride=1, padding='VALID', dilation=1):
696
+ super(conv1d, self).__init__()
697
+ if padding == 'VALID':
698
+ dconv_pad = 0
699
+ elif padding == 'SAME':
700
+ dconv_pad = dilation * ((kernel_size - 1) // 2)
701
+ else:
702
+ raise ValueError("Padding Mode Error!")
703
+ self.conv = nn.Conv1d(nin, nout, kernel_size=kernel_size, stride=stride, padding=dconv_pad)
704
+ self.act = nn.ReLU()
705
+ self.init_layer(self.conv)
706
+
707
+ def init_layer(self, layer, nonlinearity='relu'):
708
+ """Initialize a Linear or Convolutional layer. """
709
+ nn.init.kaiming_normal_(layer.weight, nonlinearity=nonlinearity)
710
+ nn.init.constant_(layer.bias, 0.1)
711
+
712
+ def forward(self, x):
713
+ out = self.act(self.conv(x))
714
+ return out
715
+
716
+ class Atten_1(nn.Module):
717
+ def __init__(self, input_dim, context=2, dropout_rate=0.2):
718
+ super(Atten_1, self).__init__()
719
+ self._matrix_k = nn.Linear(input_dim, input_dim // 4)
720
+ self._matrix_q = nn.Linear(input_dim, input_dim // 4)
721
+ self.relu = nn.ReLU()
722
+ self.context = context
723
+ self._dropout_layer = nn.Dropout(dropout_rate)
724
+ self.init_layer(self._matrix_k)
725
+ self.init_layer(self._matrix_q)
726
+
727
+ def init_layer(self, layer, nonlinearity='leaky_relu'):
728
+ """Initialize a Linear or Convolutional layer. """
729
+ nn.init.kaiming_uniform_(layer.weight, nonlinearity=nonlinearity)
730
+ if hasattr(layer, 'bias'):
731
+ if layer.bias is not None:
732
+ layer.bias.data.fill_(0.)
733
+
734
+ def forward(self, input_x):
735
+ k_x = input_x
736
+ k_x = self.relu(self._matrix_k(k_x))
737
+ k_x = self._dropout_layer(k_x)
738
+ # print('k_x ',k_x.shape)
739
+ q_x = input_x[:, self.context, :]
740
+ # print('q_x ',q_x.shape)
741
+ q_x = q_x[:, None, :]
742
+ # print('q_x1 ',q_x.shape)
743
+ q_x = self.relu(self._matrix_q(q_x))
744
+ q_x = self._dropout_layer(q_x)
745
+ # print('q_x2 ',q_x.shape)
746
+ x_ = torch.matmul(k_x, q_x.transpose(-2, -1) / math.sqrt(k_x.size(-1)))
747
+ # print('x_ ',x_.shape)
748
+ x_ = x_.squeeze(2)
749
+ alpha = F.softmax(x_, dim=-1)
750
+ att_ = alpha
751
+ # print('alpha ',alpha)
752
+ alpha = alpha.unsqueeze(2).repeat(1,1,input_x.shape[2])
753
+ # print('alpha ',alpha)
754
+ # alpha = alpha.view(alpha.size(0), alpha.size(1), alpha.size(2), 1)
755
+ out = alpha * input_x
756
+ # print('out ', out.shape)
757
+ # out = out.mean(2)
758
+ out = out.mean(1)
759
+ # print('out ',out.shape)
760
+ # assert 1==2
761
+ #y = alpha * input_x
762
+ #return y, att_
763
+ out = input_x[:, self.context, :] + out
764
+ return out
765
+
766
+ class Fusion(nn.Module):
767
+ def __init__(self, inputdim, inputdim2, n_fac):
768
+ super().__init__()
769
+ self.fuse_layer1 = conv1d(inputdim, inputdim2*n_fac,1)
770
+ self.fuse_layer2 = conv1d(inputdim2, inputdim2*n_fac,1)
771
+ self.avg_pool = nn.AvgPool1d(n_fac, stride=n_fac) # 沿着最后一个维度进行pooling
772
+
773
+ def forward(self,embedding,mix_embed):
774
+ embedding = embedding.permute(0,2,1)
775
+ fuse1_out = self.fuse_layer1(embedding) # [2, 501, 2560] ,512*5, 1D卷积融合,spk_embeding ,扩大其维度
776
+ fuse1_out = fuse1_out.permute(0,2,1)
777
+
778
+ mix_embed = mix_embed.permute(0,2,1)
779
+ fuse2_out = self.fuse_layer2(mix_embed) # [2, 501, 2560] ,512*5, 1D卷积融合,spk_embeding ,扩大其维度
780
+ fuse2_out = fuse2_out.permute(0,2,1)
781
+ as_embs = torch.mul(fuse1_out, fuse2_out) # 相乘 [2, 501, 2560]
782
+ # (10, 501, 512)
783
+ as_embs = self.avg_pool(as_embs) # [2, 501, 512] 相当于 2560//5
784
+ return as_embs
785
+
786
+ class CDur_fusion(nn.Module):
787
+ def __init__(self, inputdim, outputdim, **kwargs):
788
+ super().__init__()
789
+ self.features = nn.Sequential(
790
+ Block2D(1, 32),
791
+ nn.LPPool2d(4, (2, 4)),
792
+ Block2D(32, 128),
793
+ Block2D(128, 128),
794
+ nn.LPPool2d(4, (2, 4)),
795
+ Block2D(128, 128),
796
+ Block2D(128, 128),
797
+ nn.LPPool2d(4, (1, 4)),
798
+ nn.Dropout(0.3),
799
+ )
800
+ with torch.no_grad():
801
+ rnn_input_dim = self.features(torch.randn(1, 1, 500,inputdim)).shape
802
+ rnn_input_dim = rnn_input_dim[1] * rnn_input_dim[-1]
803
+
804
+ self.gru = nn.GRU(128, 128, bidirectional=True, batch_first=True)
805
+ self.fusion = Fusion(128,2)
806
+ self.fc = nn.Linear(256,256)
807
+ self.outputlayer = nn.Linear(256, outputdim)
808
+ self.features.apply(init_weights)
809
+ self.outputlayer.apply(init_weights)
810
+
811
+ def forward(self, x, embedding): #
812
+ batch, time, dim = x.shape
813
+ x = x.unsqueeze(1) # (b,1,t,d)
814
+ x = self.features(x) #
815
+ x = x.transpose(1, 2).contiguous().flatten(-2) # 重新拷贝一份x,之后推平-2:-1之间的维度 # (b,125,128)
816
+ embedding = embedding.unsqueeze(1)
817
+ embedding = embedding.repeat(1, x.shape[1], 1)
818
+ x = self.fusion(embedding,x)
819
+ #x = torch.cat((x, embedding), dim=2) # [B, T, 128 + emb_dim]
820
+ if not hasattr(self, '_flattened'):
821
+ self.gru.flatten_parameters()
822
+ x, _ = self.gru(x) # x torch.Size([16, 125, 256])
823
+ x = self.fc(x)
824
+ decision_time = torch.softmax(self.outputlayer(x),dim=2) # x torch.Size([16, 125, 2])
825
+ decision_up = torch.nn.functional.interpolate(
826
+ decision_time.transpose(1, 2), # [16, 2, 125]
827
+ time, # 501
828
+ mode='linear',
829
+ align_corners=False).transpose(1, 2) # 从125插值回 501 ?--> (16,501,2)
830
+ return decision_time[:,:,0],decision_up
831
+
832
+ class CDur(nn.Module):
833
+ def __init__(self, inputdim, outputdim,time_resolution, **kwargs):
834
+ super().__init__()
835
+ self.features = nn.Sequential(
836
+ Block2D(1, 32),
837
+ nn.LPPool2d(4, (2, 4)),
838
+ Block2D(32, 128),
839
+ Block2D(128, 128),
840
+ nn.LPPool2d(4, (2, 4)),
841
+ Block2D(128, 128),
842
+ Block2D(128, 128),
843
+ nn.LPPool2d(4, (2, 4)),
844
+ nn.Dropout(0.3),
845
+ )
846
+ with torch.no_grad():
847
+ rnn_input_dim = self.features(torch.randn(1, 1, 500,inputdim)).shape
848
+ rnn_input_dim = rnn_input_dim[1] * rnn_input_dim[-1]
849
+
850
+ self.gru = nn.GRU(256, 256, bidirectional=True, batch_first=True)
851
+ self.fc = nn.Linear(512,256)
852
+ self.outputlayer = nn.Linear(256, outputdim)
853
+ self.features.apply(init_weights)
854
+ self.outputlayer.apply(init_weights)
855
+
856
+ def forward(self, x, embedding,one_hot=None): #
857
+ batch, time, dim = x.shape
858
+ x = x.unsqueeze(1) # (b,1,t,d)
859
+ x = self.features(x) #
860
+ x = x.transpose(1, 2).contiguous().flatten(-2) # 重新拷贝一份x,之后推平-2:-1之间的维度 # (b,125,128)
861
+ embedding = embedding.unsqueeze(1)
862
+ embedding = embedding.repeat(1, x.shape[1], 1)
863
+ x = torch.cat((x, embedding), dim=2) # [B, T, 128 + emb_dim]
864
+ if not hasattr(self, '_flattened'):
865
+ self.gru.flatten_parameters()
866
+ x, _ = self.gru(x) # x torch.Size([16, 125, 256])
867
+ x = self.fc(x)
868
+ decision_time = torch.softmax(self.outputlayer(x),dim=2) # x torch.Size([16, 125, 2])
869
+ decision_up = torch.nn.functional.interpolate(
870
+ decision_time.transpose(1, 2), # [16, 2, 125]
871
+ time, # 501
872
+ mode='linear',
873
+ align_corners=False).transpose(1, 2) # 从125插值回 501 ?--> (16,501,2)
874
+ return decision_time[:,:,0],decision_up
875
+
876
+ class CDur_big(nn.Module):
877
+ def __init__(self, inputdim, outputdim, **kwargs):
878
+ super().__init__()
879
+ self.features = nn.Sequential(
880
+ Block2D(1, 64),
881
+ Block2D(64, 64),
882
+ nn.LPPool2d(4, (2, 2)),
883
+ Block2D(64, 128),
884
+ Block2D(128, 128),
885
+ nn.LPPool2d(4, (2, 2)),
886
+ Block2D(128, 256),
887
+ Block2D(256, 256),
888
+ nn.LPPool2d(4, (2, 4)),
889
+ Block2D(256, 512),
890
+ Block2D(512, 512),
891
+ nn.LPPool2d(4, (1, 4)),
892
+ nn.Dropout(0.3),)
893
+ with torch.no_grad():
894
+ rnn_input_dim = self.features(torch.randn(1, 1, 500,inputdim)).shape
895
+ rnn_input_dim = rnn_input_dim[1] * rnn_input_dim[-1]
896
+ self.gru = nn.GRU(640, 512, bidirectional=True, batch_first=True)
897
+ self.fc = nn.Linear(1024,256)
898
+ self.outputlayer = nn.Linear(256, outputdim)
899
+ self.features.apply(init_weights)
900
+ self.outputlayer.apply(init_weights)
901
+
902
+ def forward(self, x, embedding): #
903
+ batch, time, dim = x.shape
904
+ x = x.unsqueeze(1) # (b,1,t,d)
905
+ x = self.features(x) #
906
+ x = x.transpose(1, 2).contiguous().flatten(-2) # 重新拷贝一份x,之后推平-2:-1之间的维度 # (b,125,512)
907
+ embedding = embedding.unsqueeze(1)
908
+ embedding = embedding.repeat(1, x.shape[1], 1)
909
+ x = torch.cat((x, embedding), dim=2) # [B, T, 128 + emb_dim]
910
+ if not hasattr(self, '_flattened'):
911
+ self.gru.flatten_parameters()
912
+ x, _ = self.gru(x) # x torch.Size([16, 125, 256])
913
+ x = self.fc(x)
914
+ decision_time = torch.softmax(self.outputlayer(x),dim=2) # x torch.Size([16, 125, 2])
915
+ decision_up = torch.nn.functional.interpolate(
916
+ decision_time.transpose(1, 2), # [16, 2, 125]
917
+ time, # 501
918
+ mode='linear',
919
+ align_corners=False).transpose(1, 2) # 从125插值回 501 ?--> (16,501,2)
920
+ return decision_time[:,:,0],decision_up
921
+
922
+ class CDur_GLU(nn.Module):
923
+ def __init__(self, inputdim, outputdim, **kwargs):
924
+ super().__init__()
925
+ self.features = Mul_scale_GLU()
926
+ # with torch.no_grad():
927
+ # rnn_input_dim = self.features(torch.randn(1, 1, 500,inputdim)).shape
928
+ # rnn_input_dim = rnn_input_dim[1] * rnn_input_dim[-1]
929
+ self.gru = nn.GRU(640, 512,1, bidirectional=True, batch_first=True) # previous is 640
930
+ # self.gru = LSTMModel(640, 512,1)
931
+ self.fc = nn.Linear(1024,256)
932
+ self.outputlayer = nn.Linear(256, outputdim)
933
+ # self.features.apply(init_weights)
934
+ self.outputlayer.apply(init_weights)
935
+
936
+ def forward(self, x, embedding,one_hot=None): #
937
+ batch, time, dim = x.shape
938
+ x = x.unsqueeze(1) # (b,1,t,d)
939
+ x = self.features(x) #
940
+ x = x.transpose(1, 2).contiguous().flatten(-2) # 重新拷贝一份x,之后推平-2:-1之间的维度 # (b,125,512)
941
+ # print('x ',x.shape)
942
+ # assert 1==2
943
+ embedding = embedding.unsqueeze(1)
944
+ embedding = embedding.repeat(1, x.shape[1], 1)
945
+
946
+ x = torch.cat((x, embedding), dim=2) # [B, T, 128 + emb_dim]
947
+ if not hasattr(self, '_flattened'):
948
+ self.gru.flatten_parameters()
949
+ x, _ = self.gru(x) # x torch.Size([16, 125, 256])
950
+ # x = self.gru(x) # x torch.Size([16, 125, 256])
951
+ x = self.fc(x)
952
+ decision_time = torch.softmax(self.outputlayer(x),dim=2) # x torch.Size([16, 125, 2])
953
+ decision_up = torch.nn.functional.interpolate(
954
+ decision_time.transpose(1, 2), # [16, 2, 125]
955
+ time, # 501
956
+ mode='linear',
957
+ align_corners=False).transpose(1, 2) # 从125插值回 501 ?--> (16,501,2)
958
+ return decision_time[:,:,0],decision_up
959
+
960
+ class CDur_CNN14(nn.Module):
961
+ def __init__(self, inputdim, outputdim,time_resolution,**kwargs):
962
+ super().__init__()
963
+ if time_resolution==125:
964
+ self.features = Cnn10(8)
965
+ elif time_resolution == 250:
966
+ #print('time_resolution ',time_resolution)
967
+ self.features = Cnn10(4)
968
+ elif time_resolution == 500:
969
+ self.features = Cnn10(2)
970
+ else:
971
+ self.features = Cnn10(0)
972
+ with torch.no_grad():
973
+ rnn_input_dim = self.features(torch.randn(1, 1, 500,inputdim)).shape
974
+ rnn_input_dim = rnn_input_dim[1] * rnn_input_dim[-1]
975
+ # self.features = Cnn10()
976
+ self.gru = nn.GRU(640, 512, bidirectional=True, batch_first=True)
977
+ # self.gru = LSTMModel(640, 512,1)
978
+ self.fc = nn.Linear(1024,256)
979
+ self.outputlayer = nn.Linear(256, outputdim)
980
+ # self.features.apply(init_weights)
981
+ self.outputlayer.apply(init_weights)
982
+
983
+ def forward(self, x, embedding,one_hot=None):
984
+ batch, time, dim = x.shape
985
+ x = x.unsqueeze(1) # (b,1,t,d)
986
+ x = self.features(x) #
987
+ x = x.transpose(1, 2).contiguous().flatten(-2) # 重新拷贝一份x,之后推平-2:-1之间的维度 # (b,125,512)
988
+ # print('x ',x.shape)
989
+ # assert 1==2
990
+ embedding = embedding.unsqueeze(1)
991
+ embedding = embedding.repeat(1, x.shape[1], 1)
992
+ x = torch.cat((x, embedding), dim=2) # [B, T, 128 + emb_dim]
993
+ if not hasattr(self, '_flattened'):
994
+ self.gru.flatten_parameters()
995
+ x, _ = self.gru(x) # x torch.Size([16, 125, 256])
996
+ # x = self.gru(x) # x torch.Size([16, 125, 256])
997
+ x = self.fc(x)
998
+ decision_time = torch.softmax(self.outputlayer(x),dim=2) # x torch.Size([16, 125, 2])
999
+ decision_up = torch.nn.functional.interpolate(
1000
+ decision_time.transpose(1, 2), # [16, 2, 125]
1001
+ time, # 501
1002
+ mode='linear',
1003
+ align_corners=False).transpose(1, 2) # 从125插值回 501 ?--> (16,501,2)
1004
+ return decision_time[:,:,0],decision_up
1005
+
1006
+ class CDur_CNN_mul_scale(nn.Module):
1007
+ def __init__(self, inputdim, outputdim,time_resolution,**kwargs):
1008
+ super().__init__()
1009
+ if time_resolution==125:
1010
+ self.features = Cnn10_mul_scale(8)
1011
+ elif time_resolution == 250:
1012
+ #print('time_resolution ',time_resolution)
1013
+ self.features = Cnn10_mul_scale(4)
1014
+ elif time_resolution == 500:
1015
+ self.features = Cnn10_mul_scale(2)
1016
+ else:
1017
+ self.features = Cnn10_mul_scale(0)
1018
+ # with torch.no_grad():
1019
+ # rnn_input_dim = self.features(torch.randn(1, 1, 500,inputdim)).shape
1020
+ # rnn_input_dim = rnn_input_dim[1] * rnn_input_dim[-1]
1021
+ # self.features = Cnn10()
1022
+ self.gru = nn.GRU(640, 512, bidirectional=True, batch_first=True)
1023
+ # self.gru = LSTMModel(640, 512,1)
1024
+ self.fc = nn.Linear(1024,256)
1025
+ self.outputlayer = nn.Linear(256, outputdim)
1026
+ # self.features.apply(init_weights)
1027
+ self.outputlayer.apply(init_weights)
1028
+
1029
+ def forward(self, x, embedding,one_hot=None):
1030
+ # print('x ',x.shape)
1031
+ # assert 1==2
1032
+ batch, time, dim = x.shape
1033
+ x = x.unsqueeze(1) # (b,1,t,d)
1034
+ x = self.features(x) #
1035
+ x = x.transpose(1, 2).contiguous().flatten(-2) # 重新拷贝一份x,之后推平-2:-1之间的维度 # (b,125,512)
1036
+ # print('x ',x.shape)
1037
+ # assert 1==2
1038
+ embedding = embedding.unsqueeze(1)
1039
+ embedding = embedding.repeat(1, x.shape[1], 1)
1040
+ x = torch.cat((x, embedding), dim=2) # [B, T, 128 + emb_dim]
1041
+ if not hasattr(self, '_flattened'):
1042
+ self.gru.flatten_parameters()
1043
+ x, _ = self.gru(x) # x torch.Size([16, 125, 256])
1044
+ # x = self.gru(x) # x torch.Size([16, 125, 256])
1045
+ x = self.fc(x)
1046
+ decision_time = torch.softmax(self.outputlayer(x),dim=2) # x torch.Size([16, 125, 2])
1047
+ decision_up = torch.nn.functional.interpolate(
1048
+ decision_time.transpose(1, 2), # [16, 2, 125]
1049
+ time, # 501
1050
+ mode='linear',
1051
+ align_corners=False).transpose(1, 2) # 从125插值回 501 ?--> (16,501,2)
1052
+ return decision_time[:,:,0],decision_up
1053
+
1054
+ class CDur_CNN_mul_scale_fusion(nn.Module):
1055
+ def __init__(self, inputdim, outputdim, time_resolution,**kwargs):
1056
+ super().__init__()
1057
+ if time_resolution==125:
1058
+ self.features = Cnn10_mul_scale(8)
1059
+ elif time_resolution == 250:
1060
+ #print('time_resolution ',time_resolution)
1061
+ self.features = Cnn10_mul_scale(4)
1062
+ elif time_resolution == 500:
1063
+ self.features = Cnn10_mul_scale(2)
1064
+ else:
1065
+ self.features = Cnn10_mul_scale(0)
1066
+ # with torch.no_grad():
1067
+ # rnn_input_dim = self.features(torch.randn(1, 1, 500,inputdim)).shape
1068
+ # rnn_input_dim = rnn_input_dim[1] * rnn_input_dim[-1]
1069
+ # self.features = Cnn10()
1070
+ self.gru = nn.GRU(512, 512, bidirectional=True, batch_first=True)
1071
+ # self.gru = LSTMModel(640, 512,1)
1072
+ self.fc = nn.Linear(1024,256)
1073
+ self.fusion = Fusion(128,512,2)
1074
+ self.outputlayer = nn.Linear(256, outputdim)
1075
+ # self.features.apply(init_weights)
1076
+ self.outputlayer.apply(init_weights)
1077
+
1078
+ def forward(self, x, embedding,one_hot=None):
1079
+ # print('x ',x.shape)
1080
+ # assert 1==2
1081
+ batch, time, dim = x.shape
1082
+ x = x.unsqueeze(1) # (b,1,t,d)
1083
+ x = self.features(x) #
1084
+ x = x.transpose(1, 2).contiguous().flatten(-2) # 重新拷贝一份x,之后推平-2:-1之间的维度 # (b,125,512)
1085
+ # print('x ',x.shape)
1086
+ # assert 1==2
1087
+ embedding = embedding.unsqueeze(1)
1088
+ embedding = embedding.repeat(1, x.shape[1], 1)
1089
+ x = self.fusion(embedding, x)
1090
+ #x = torch.cat((x, embedding), dim=2) # [B, T, 128 + emb_dim]
1091
+ if not hasattr(self, '_flattened'):
1092
+ self.gru.flatten_parameters()
1093
+ x, _ = self.gru(x) # x torch.Size([16, 125, 256])
1094
+ # x = self.gru(x) # x torch.Size([16, 125, 256])
1095
+ x = self.fc(x)
1096
+ decision_time = torch.softmax(self.outputlayer(x),dim=2) # x torch.Size([16, 125, 2])
1097
+ decision_up = torch.nn.functional.interpolate(
1098
+ decision_time.transpose(1, 2), # [16, 2, 125]
1099
+ time, # 501
1100
+ mode='linear',
1101
+ align_corners=False).transpose(1, 2) # 从125插值回 501 ?--> (16,501,2)
1102
+ return decision_time[:,:,0],decision_up
1103
+
1104
+
1105
+ class RaDur_fusion(nn.Module):
1106
+ def __init__(self, model_config, inputdim, outputdim, time_resolution, **kwargs):
1107
+ super().__init__()
1108
+ self.encoder = Cnn14()
1109
+ self.detection = CDur_CNN_mul_scale_fusion(inputdim, outputdim, time_resolution)
1110
+ self.softmax = nn.Softmax(dim=2)
1111
+ #self.temperature = 5
1112
+ # if model_config['pre_train']:
1113
+ # self.encoder.load_state_dict(torch.load(model_config['encoder_path'])['model'])
1114
+ # self.detection.load_state_dict(torch.load(model_config['CDur_path']))
1115
+
1116
+ self.q = nn.Linear(128,128)
1117
+ self.k = nn.Linear(128,128)
1118
+ self.q_ee = nn.Linear(128, 128)
1119
+ self.k_ee = nn.Linear(128, 128)
1120
+ self.temperature = 11.3 # sqrt(128)
1121
+ self.att_pool = model_config['att_pool']
1122
+ self.enhancement = model_config['enhancement']
1123
+ self.tao = model_config['tao']
1124
+ self.top = model_config['top']
1125
+ self.bn = nn.BatchNorm1d(128)
1126
+ self.EE_fusion = Fusion(128, 128, 4)
1127
+
1128
+ def get_w(self,q,k):
1129
+ q = self.q(q)
1130
+ k = self.k(k)
1131
+ q = q.unsqueeze(1)
1132
+ attn = torch.bmm(q, k.transpose(1, 2))
1133
+ attn = attn/self.temperature
1134
+ attn = self.softmax(attn)
1135
+ return attn
1136
+
1137
+ def get_w_ee(self,q,k):
1138
+ q = self.q_ee(q)
1139
+ k = self.k_ee(k)
1140
+ q = q.unsqueeze(1)
1141
+ attn = torch.bmm(q, k.transpose(1, 2))
1142
+ attn = attn/self.temperature
1143
+ attn = self.softmax(attn)
1144
+ return attn
1145
+
1146
+ def attention_pooling(self, embeddings, mean_embedding):
1147
+ att_pool_w = self.get_w(mean_embedding,embeddings)
1148
+ embedding = torch.bmm(att_pool_w, embeddings).squeeze(1)
1149
+ # print(embedding.shape)
1150
+ # print(att_pool_w.shape)
1151
+ # print(att_pool_w[0])
1152
+ # assert 1==2
1153
+ return embedding
1154
+
1155
+ def select_topk_embeddings(self, scores, embeddings, k):
1156
+ _, idx_DESC = scores.sort(descending=True, dim=1) # 根据分数进行排序
1157
+ top_k = _[:,:k]
1158
+ # print('top_k ', top_k)
1159
+ # top_k = top_k.mean(1)
1160
+ idx_topk = idx_DESC[:, :k] # 取top_k个
1161
+ # print('index ', idx_topk)
1162
+ idx_topk = idx_topk.unsqueeze(2).expand([-1, -1, embeddings.shape[2]])
1163
+ selected_embeddings = torch.gather(embeddings, 1, idx_topk)
1164
+ return selected_embeddings,top_k
1165
+
1166
+ def sum_with_attention(self, embedding, top_k, selected_embeddings):
1167
+ # print('embedding ',embedding)
1168
+ # print('selected_embeddings ',selected_embeddings.shape)
1169
+ att_1 = self.get_w_ee(embedding, selected_embeddings)
1170
+ att_1 = att_1.squeeze(1)
1171
+ #print('att_1 ',att_1.shape)
1172
+ larger = top_k > self.tao
1173
+ # print('larger ',larger)
1174
+ top_k = top_k*larger
1175
+ # print('top_k ',top_k.shape)
1176
+ # print('top_k ',top_k)
1177
+ att_1 = att_1*top_k
1178
+ #print('att_1 ',att_1.shape)
1179
+ # assert 1==2
1180
+ att_2 = att_1.unsqueeze(2).repeat(1,1,128)
1181
+ Es = selected_embeddings*att_2
1182
+ return Es
1183
+
1184
+ def orcal_EE(self, x, embedding, label):
1185
+ batch, time, dim = x.shape
1186
+
1187
+ mixture_embedding = self.encoder(x) # 8, 125, 128
1188
+ mixture_embedding = mixture_embedding.transpose(1,2)
1189
+ mixture_embedding = self.bn(mixture_embedding)
1190
+ mixture_embedding = mixture_embedding.transpose(1,2)
1191
+
1192
+ x = x.unsqueeze(1) # (b,1,t,d)
1193
+ x = self.detection.features(x) #
1194
+ x = x.transpose(1, 2).contiguous().flatten(-2) # 重新拷贝一份x,之后推平-2:-1之间的维度 # (b,125,128)
1195
+ embedding_pre = embedding.unsqueeze(1)
1196
+ embedding_pre = embedding_pre.repeat(1, x.shape[1], 1)
1197
+ f = self.detection.fusion(embedding_pre, x) # the first stage results
1198
+ #f = torch.cat((x, embedding_pre), dim=2) # [B, T, 128 + emb_dim]
1199
+ if not hasattr(self, '_flattened'):
1200
+ self.detection.gru.flatten_parameters()
1201
+ f, _ = self.detection.gru(f) # x torch.Size([16, 125, 256])
1202
+ f = self.detection.fc(f)
1203
+ decision_time = torch.softmax(self.detection.outputlayer(f),dim=2) # x torch.Size([16, 125, 2])
1204
+
1205
+ selected_embeddings, top_k = self.select_topk_embeddings(decision_time[:,:,0], mixture_embedding, self.top)
1206
+
1207
+ selected_embeddings = self.sum_with_attention(embedding, top_k, selected_embeddings) # add the weight
1208
+
1209
+ mix_embedding = selected_embeddings.mean(1).unsqueeze(1) #
1210
+ mix_embedding = mix_embedding.repeat(1, x.shape[1], 1)
1211
+ embedding = embedding.unsqueeze(1)
1212
+ embedding = embedding.repeat(1, x.shape[1], 1)
1213
+ mix_embedding = self.EE_fusion(mix_embedding, embedding) # 使用神经网络进行融合
1214
+ # mix_embedding2 = selected_embeddings2.mean(1)
1215
+ #mix_embedding = embedding + mix_embedding # 直接相加
1216
+ # new detection results
1217
+ # embedding_now = mix_embedding.unsqueeze(1)
1218
+ # embedding_now = embedding_now.repeat(1, x.shape[1], 1)
1219
+ f_now = self.detection.fusion(mix_embedding, x)
1220
+ #f_now = torch.cat((x, embedding_now), dim=2) #
1221
+ f_now, _ = self.detection.gru(f_now) # x torch.Size([16, 125, 256])
1222
+ f_now = self.detection.fc(f_now)
1223
+ decision_time_now = torch.softmax(self.detection.outputlayer(f_now), dim=2) # x torch.Size([16, 125, 2])
1224
+
1225
+ top_k = top_k.mean(1) # get avg score,higher score will have more weight
1226
+ larger = top_k > self.tao
1227
+ top_k = top_k * larger
1228
+ top_k = top_k/2.0
1229
+ # print('top_k ',top_k)
1230
+ # assert 1==2
1231
+ # print('tok_k[ ',top_k.shape)
1232
+ # print('decision_time ',decision_time.shape)
1233
+ # print('decision_time_now ',decision_time_now.shape)
1234
+ neg_w = top_k.unsqueeze(1).unsqueeze(2)
1235
+ neg_w = neg_w.repeat(1, decision_time_now.shape[1], decision_time_now.shape[2])
1236
+ # print('neg_w ',neg_w.shape)
1237
+ #print('neg_w ',neg_w[:,0:10,0])
1238
+ pos_w = 1-neg_w
1239
+ #print('pos_w ',pos_w[:,0:10,0])
1240
+ decision_time_final = decision_time*pos_w + neg_w*decision_time_now
1241
+ #print('decision_time_final ',decision_time_final[0,0:10,0])
1242
+ # print(decision_time_final[0,:,:])
1243
+ #assert 1==2
1244
+ return decision_time_final
1245
+
1246
+ def forward(self, x, ref, label=None):
1247
+ batch, time, dim = x.shape
1248
+ logit = torch.zeros(1).cuda()
1249
+ embeddings = self.encoder(ref)
1250
+ mean_embedding = embeddings.mean(1)
1251
+ if self.att_pool == True:
1252
+ mean_embedding = self.bn(mean_embedding)
1253
+ embeddings = embeddings.transpose(1,2)
1254
+ embeddings = self.bn(embeddings)
1255
+ embeddings = embeddings.transpose(1,2)
1256
+ embedding = self.attention_pooling(embeddings, mean_embedding)
1257
+ else:
1258
+ embedding = mean_embedding
1259
+ if self.enhancement == True:
1260
+ decision_time = self.orcal_EE(x, embedding, label)
1261
+ decision_up = torch.nn.functional.interpolate(
1262
+ decision_time.transpose(1, 2), # [16, 2, 125]
1263
+ time, # 501
1264
+ mode='linear',
1265
+ align_corners=False).transpose(1, 2) # 从125插值回 501 ?--> (16,501,2)
1266
+ return decision_time[:,:,0], decision_up, logit
1267
+
1268
+ x = x.unsqueeze(1) # (b,1,t,d)
1269
+ x = self.detection.features(x) #
1270
+ x = x.transpose(1, 2).contiguous().flatten(-2) # 重新拷贝一份x,之后推平-2:-1之间的维度 # (b,125,128)
1271
+ embedding = embedding.unsqueeze(1)
1272
+ embedding = embedding.repeat(1, x.shape[1], 1)
1273
+ # x = torch.cat((x, embedding), dim=2) # [B, T, 128 + emb_dim]
1274
+ x = self.detection.fusion(embedding, x)
1275
+ # embedding = embedding.unsqueeze(1)
1276
+ # embedding = embedding.repeat(1, x.shape[1], 1)
1277
+ # x = torch.cat((x, embedding), dim=2) # [B, T, 128 + emb_dim]
1278
+ if not hasattr(self, '_flattened'):
1279
+ self.detection.gru.flatten_parameters()
1280
+ x, _ = self.detection.gru(x) # x torch.Size([16, 125, 256])
1281
+ x = self.detection.fc(x)
1282
+ decision_time = torch.softmax(self.detection.outputlayer(x),dim=2) # x torch.Size([16, 125, 2])
1283
+ decision_up = torch.nn.functional.interpolate(
1284
+ decision_time.transpose(1, 2),
1285
+ time, # 501
1286
+ mode='linear',
1287
+ align_corners=False).transpose(1, 2) # 从125插值回 501 ?--> (16,501,2)
1288
+ return decision_time[:,:,0], decision_up, logit
audio_detection/target_sound_detection/src/utils.py ADDED
@@ -0,0 +1,353 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # !/usr/bin/env python
2
+ # -*- coding: utf-8 -*-
3
+ # @Time : 2021/3/9 16:33
4
+ # @Author : dongchao yang
5
+ # @File : train.py
6
+
7
+ import collections
8
+ import sys
9
+ from loguru import logger
10
+ from pprint import pformat
11
+
12
+ import numpy as np
13
+ import pandas as pd
14
+ import scipy
15
+ import six
16
+ import sklearn.preprocessing as pre
17
+ import torch
18
+ import tqdm
19
+ import yaml
20
+
21
+ from scipy.interpolate import interp1d
22
+
23
+ def parse_config_or_kwargs(config_file, **kwargs):
24
+ """parse_config_or_kwargs
25
+ :param config_file: Config file that has parameters, yaml format
26
+ :param **kwargs: Other alternative parameters or overwrites for config
27
+ """
28
+ with open(config_file) as con_read:
29
+ yaml_config = yaml.load(con_read, Loader=yaml.FullLoader)
30
+ arguments = dict(yaml_config, **kwargs)
31
+ return arguments
32
+
33
+
34
+ def find_contiguous_regions(activity_array): # in this part, if you cannot understand the binary operation, I think you can write a O(n) complexity method
35
+ """Find contiguous regions from bool valued numpy.array.
36
+ Copy of https://dcase-repo.github.io/dcase_util/_modules/dcase_util/data/decisions.html#DecisionEncoder
37
+ Reason is:
38
+ 1. This does not belong to a class necessarily
39
+ 2. Import DecisionEncoder requires sndfile over some other imports..which causes some problems on clusters
40
+ """
41
+ change_indices = np.logical_xor(activity_array[1:], activity_array[:-1]).nonzero()[0]
42
+ change_indices += 1
43
+ if activity_array[0]:
44
+ # If the first element of activity_array is True add 0 at the beginning
45
+ change_indices = np.r_[0, change_indices]
46
+
47
+ if activity_array[-1]:
48
+ # If the last element of activity_array is True, add the length of the array
49
+ change_indices = np.r_[change_indices, activity_array.size]
50
+ # print(change_indices.reshape((-1, 2)))
51
+ # Reshape the result into two columns
52
+ return change_indices.reshape((-1, 2))
53
+
54
+
55
+ def split_train_cv(
56
+ data_frame: pd.DataFrame,
57
+ frac: float = 0.9,
58
+ y=None, # Only for stratified, computes necessary split
59
+ **kwargs):
60
+ """split_train_cv
61
+
62
+ :param data_frame:
63
+ :type data_frame: pd.DataFrame
64
+ :param frac:
65
+ :type frac: float
66
+ """
67
+ if kwargs.get('mode',
68
+ None) == 'urbansed': # Filenames are DATA_-1 DATA_-2 etc
69
+ data_frame.loc[:, 'id'] = data_frame.groupby(
70
+ data_frame['filename'].str.split('_').apply(
71
+ lambda x: '_'.join(x[:-1]))).ngroup()
72
+ sampler = np.random.permutation(data_frame['id'].nunique())
73
+ num_train = int(frac * len(sampler))
74
+ train_indexes = sampler[:num_train]
75
+ cv_indexes = sampler[num_train:]
76
+ train_data = data_frame[data_frame['id'].isin(train_indexes)]
77
+ cv_data = data_frame[data_frame['id'].isin(cv_indexes)]
78
+ del train_data['id']
79
+ del cv_data['id']
80
+ elif kwargs.get('mode', None) == 'stratified': # stratified --> 分层的 ?
81
+ # Use statified sampling
82
+ from skmultilearn.model_selection import iterative_train_test_split
83
+ index_train, _, index_cv, _ = iterative_train_test_split(
84
+ data_frame.index.values.reshape(-1, 1), y, test_size=1. - frac)
85
+ train_data = data_frame[data_frame.index.isin(index_train.squeeze())]
86
+ cv_data = data_frame[data_frame.index.isin(index_cv.squeeze())] # cv --> cross validation
87
+ else:
88
+ # Simply split train_test
89
+ train_data = data_frame.sample(frac=frac, random_state=10)
90
+ cv_data = data_frame[~data_frame.index.isin(train_data.index)]
91
+ return train_data, cv_data
92
+
93
+
94
+
95
+ def pprint_dict(in_dict, outputfun=sys.stdout.write, formatter='yaml'): # print yaml file
96
+ """pprint_dict
97
+ :param outputfun: function to use, defaults to sys.stdout
98
+ :param in_dict: dict to print
99
+ """
100
+ if formatter == 'yaml':
101
+ format_fun = yaml.dump
102
+ elif formatter == 'pretty':
103
+ format_fun = pformat
104
+ for line in format_fun(in_dict).split('\n'):
105
+ outputfun(line)
106
+
107
+
108
+ def getfile_outlogger(outputfile):
109
+ log_format = "[<green>{time:YYYY-MM-DD HH:mm:ss}</green>] {message}"
110
+ logger.configure(handlers=[{"sink": sys.stderr, "format": log_format}])
111
+ if outputfile:
112
+ logger.add(outputfile, enqueue=True, format=log_format)
113
+ return logger
114
+
115
+ # according label, get encoder
116
+ def train_labelencoder(labels: pd.Series, sparse=True):
117
+ """encode_labels
118
+
119
+ Encodes labels
120
+
121
+ :param labels: pd.Series representing the raw labels e.g., Speech, Water
122
+ :param encoder (optional): Encoder already fitted
123
+ returns encoded labels (many hot) and the encoder
124
+ """
125
+ assert isinstance(labels, pd.Series), "Labels need to be series"
126
+ if isinstance(labels[0], six.string_types):
127
+ # In case of using non processed strings, e.g., Vaccum, Speech
128
+ label_array = labels.str.split(',').values.tolist() # split label according to ','
129
+ elif isinstance(labels[0], np.ndarray):
130
+ # Encoder does not like to see numpy array
131
+ label_array = [lab.tolist() for lab in labels]
132
+ elif isinstance(labels[0], collections.Iterable):
133
+ label_array = labels
134
+ encoder = pre.MultiLabelBinarizer(sparse_output=sparse)
135
+ encoder.fit(label_array)
136
+ return encoder
137
+
138
+
139
+ def encode_labels(labels: pd.Series, encoder=None, sparse=True):
140
+ """encode_labels
141
+
142
+ Encodes labels
143
+
144
+ :param labels: pd.Series representing the raw labels e.g., Speech, Water
145
+ :param encoder (optional): Encoder already fitted
146
+ returns encoded labels (many hot) and the encoder
147
+ """
148
+ assert isinstance(labels, pd.Series), "Labels need to be series"
149
+ instance = labels.iloc[0]
150
+ if isinstance(instance, six.string_types):
151
+ # In case of using non processed strings, e.g., Vaccum, Speech
152
+ label_array = labels.str.split(',').values.tolist()
153
+ elif isinstance(instance, np.ndarray):
154
+ # Encoder does not like to see numpy array
155
+ label_array = [lab.tolist() for lab in labels]
156
+ elif isinstance(instance, collections.Iterable):
157
+ label_array = labels
158
+ # get label_array, it is a list ,contain a lot of label, this label are string type
159
+ if not encoder:
160
+ encoder = pre.MultiLabelBinarizer(sparse_output=sparse) # if we encoder is None, we should init a encoder firstly.
161
+ encoder.fit(label_array)
162
+ labels_encoded = encoder.transform(label_array) # transform string to digit
163
+ return labels_encoded, encoder
164
+
165
+ # return pd.arrays.SparseArray(
166
+ # [row.toarray().ravel() for row in labels_encoded]), encoder
167
+
168
+
169
+ def decode_with_timestamps(events,labels: np.array):
170
+ """decode_with_timestamps
171
+ Decodes the predicted label array (2d) into a list of
172
+ [(Labelname, onset, offset), ...]
173
+
174
+ :param encoder: Encoder during training
175
+ :type encoder: pre.MultiLabelBinarizer
176
+ :param labels: n-dim array
177
+ :type labels: np.array
178
+ """
179
+ # print('events ',events)
180
+ # print('labels ',labels.shape)
181
+ #assert 1==2
182
+ if labels.ndim == 2:
183
+ #print('...')
184
+ return [_decode_with_timestamps(events[i],labels[i]) for i in range(labels.shape[0])]
185
+ else:
186
+ return _decode_with_timestamps(events,labels)
187
+
188
+
189
+ def median_filter(x, window_size, threshold=0.5):
190
+ """median_filter
191
+ :param x: input prediction array of shape (B, T, C) or (B, T).
192
+ Input is a sequence of probabilities 0 <= x <= 1
193
+ :param window_size: An integer to use
194
+ :param threshold: Binary thresholding threshold
195
+ """
196
+ x = binarize(x, threshold=threshold) # transfer to 0 or 1
197
+ if x.ndim == 3:
198
+ size = (1, window_size, 1)
199
+ elif x.ndim == 2 and x.shape[0] == 1:
200
+ # Assume input is class-specific median filtering
201
+ # E.g, Batch x Time [1, 501]
202
+ size = (1, window_size)
203
+ elif x.ndim == 2 and x.shape[0] > 1:
204
+ # Assume input is standard median pooling, class-independent
205
+ # E.g., Time x Class [501, 10]
206
+ size = (window_size, 1)
207
+ return scipy.ndimage.median_filter(x, size=size)
208
+
209
+
210
+ def _decode_with_timestamps(events,labels):
211
+ result_labels = []
212
+ # print('.......')
213
+ # print('labels ',labels.shape)
214
+ # print(labels)
215
+ change_indices = find_contiguous_regions(labels)
216
+ # print(change_indices)
217
+ # assert 1==2
218
+ for row in change_indices:
219
+ result_labels.append((events,row[0], row[1]))
220
+ return result_labels
221
+
222
+ def inverse_transform_labels(encoder, pred):
223
+ if pred.ndim == 3:
224
+ return [encoder.inverse_transform(x) for x in pred]
225
+ else:
226
+ return encoder.inverse_transform(pred)
227
+
228
+
229
+ def binarize(pred, threshold=0.5):
230
+ # Batch_wise
231
+ if pred.ndim == 3:
232
+ return np.array(
233
+ [pre.binarize(sub, threshold=threshold) for sub in pred])
234
+ else:
235
+ return pre.binarize(pred, threshold=threshold)
236
+
237
+
238
+ def double_threshold(x, high_thres, low_thres, n_connect=1):
239
+ """double_threshold
240
+ Helper function to calculate double threshold for n-dim arrays
241
+
242
+ :param x: input array
243
+ :param high_thres: high threshold value
244
+ :param low_thres: Low threshold value
245
+ :param n_connect: Distance of <= n clusters will be merged
246
+ """
247
+ assert x.ndim <= 3, "Whoops something went wrong with the input ({}), check if its <= 3 dims".format(
248
+ x.shape)
249
+ if x.ndim == 3:
250
+ apply_dim = 1
251
+ elif x.ndim < 3:
252
+ apply_dim = 0
253
+ # x is assumed to be 3d: (batch, time, dim)
254
+ # Assumed to be 2d : (time, dim)
255
+ # Assumed to be 1d : (time)
256
+ # time axis is therefore at 1 for 3d and 0 for 2d (
257
+ return np.apply_along_axis(lambda x: _double_threshold(
258
+ x, high_thres, low_thres, n_connect=n_connect),
259
+ axis=apply_dim,
260
+ arr=x)
261
+
262
+
263
+ def _double_threshold(x, high_thres, low_thres, n_connect=1, return_arr=True): # in nature, double_threshold considers boundary question
264
+ """_double_threshold
265
+ Computes a double threshold over the input array
266
+
267
+ :param x: input array, needs to be 1d
268
+ :param high_thres: High threshold over the array
269
+ :param low_thres: Low threshold over the array
270
+ :param n_connect: Postprocessing, maximal distance between clusters to connect
271
+ :param return_arr: By default this function returns the filtered indiced, but if return_arr = True it returns an array of tsame size as x filled with ones and zeros.
272
+ """
273
+ assert x.ndim == 1, "Input needs to be 1d"
274
+ high_locations = np.where(x > high_thres)[0] # return the index, where value is greater than high_thres
275
+ locations = x > low_thres # return true of false
276
+ encoded_pairs = find_contiguous_regions(locations)
277
+ # print('encoded_pairs ',encoded_pairs)
278
+ filtered_list = list(
279
+ filter(
280
+ lambda pair:
281
+ ((pair[0] <= high_locations) & (high_locations <= pair[1])).any(),
282
+ encoded_pairs)) # find encoded_pair where inclide a high_lacations
283
+ #print('filtered_list ',filtered_list)
284
+ filtered_list = connect_(filtered_list, n_connect) # if the distance of two pair is less than n_connect, we can merge them
285
+ if return_arr:
286
+ zero_one_arr = np.zeros_like(x, dtype=int)
287
+ for sl in filtered_list:
288
+ zero_one_arr[sl[0]:sl[1]] = 1
289
+ return zero_one_arr
290
+ return filtered_list
291
+
292
+
293
+ def connect_clusters(x, n=1):
294
+ if x.ndim == 1:
295
+ return connect_clusters_(x, n)
296
+ if x.ndim >= 2:
297
+ return np.apply_along_axis(lambda a: connect_clusters_(a, n=n), -2, x)
298
+
299
+
300
+ def connect_clusters_(x, n=1):
301
+ """connect_clusters_
302
+ Connects clustered predictions (0,1) in x with range n
303
+
304
+ :param x: Input array. zero-one format
305
+ :param n: Number of frames to skip until connection can be made
306
+ """
307
+ assert x.ndim == 1, "input needs to be 1d"
308
+ reg = find_contiguous_regions(x)
309
+ start_end = connect_(reg, n=n)
310
+ zero_one_arr = np.zeros_like(x, dtype=int)
311
+ for sl in start_end:
312
+ zero_one_arr[sl[0]:sl[1]] = 1
313
+ return zero_one_arr
314
+
315
+
316
+ def connect_(pairs, n=1):
317
+ """connect_
318
+ Connects two adjacent clusters if their distance is <= n
319
+
320
+ :param pairs: Clusters of iterateables e.g., [(1,5),(7,10)]
321
+ :param n: distance between two clusters
322
+ """
323
+ if len(pairs) == 0:
324
+ return []
325
+ start_, end_ = pairs[0]
326
+ new_pairs = []
327
+ for i, (next_item, cur_item) in enumerate(zip(pairs[1:], pairs[0:])):
328
+ end_ = next_item[1]
329
+ if next_item[0] - cur_item[1] <= n:
330
+ pass
331
+ else:
332
+ new_pairs.append((start_, cur_item[1]))
333
+ start_ = next_item[0]
334
+ new_pairs.append((start_, end_))
335
+ return new_pairs
336
+
337
+
338
+ def predictions_to_time(df, ratio):
339
+ df.onset = df.onset * ratio
340
+ df.offset = df.offset * ratio
341
+ return df
342
+
343
+ def upgrade_resolution(arr, scale):
344
+ print('arr ',arr.shape)
345
+ x = np.arange(0, arr.shape[0])
346
+ f = interp1d(x, arr, kind='linear', axis=0, fill_value='extrapolate')
347
+ scale_x = np.arange(0, arr.shape[0], 1 / scale)
348
+ up_scale = f(scale_x)
349
+ return up_scale
350
+ # a = [0.1,0.2,0.3,0.8,0.4,0.1,0.3,0.9,0.4]
351
+ # a = np.array(a)
352
+ # b = a>0.2
353
+ # _double_threshold(a,0.7,0.2)
audio_detection/target_sound_detection/useful_ckpts/tsd/ref_mel.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5e4525ad12621117c3a0fcfe974fd55e51583cd219106bf510438f4bec4edc18
3
+ size 140604911
audio_detection/target_sound_detection/useful_ckpts/tsd/run_config.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1331dab1e4c3ac2bc5850156f2000a95fe333bdf06d08ce9b490550726548ab0
3
+ size 2479
audio_detection/target_sound_detection/useful_ckpts/tsd/run_model_7_loss=-0.0724.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c9b44e30c4800462c177806bbd7009953d70d531c873e3791ca9aa85375d524d
3
+ size 343538489
audio_detection/target_sound_detection/useful_ckpts/tsd/text_emb.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:de482358747778181e4dc530ec61ae94f53ae0b202ac92e99491fe4ceb3cbb1c
3
+ size 255398