diff --git a/data/meta_data/test-frequency-control_onoffFromGpt_multi-event.json b/data/meta_data/test-frequency-control_onoffFromGpt_multi-event.json new file mode 100644 index 0000000000000000000000000000000000000000..de9e98acac39a0935f7baee5baa9a6b98eb8cd8d --- /dev/null +++ b/data/meta_data/test-frequency-control_onoffFromGpt_multi-event.json @@ -0,0 +1,200 @@ +{"filepath": "data/multi_event_test/syn_1.wav", "onoffCaption": "cat meowing at 0.5-2.0, 3.0-4.5 and whistling at 5.0-6.5 and explosion at 7.0-8.0, 8.5-9.5", "frequencyCaption": "cat meowing two times and whistling one times and explosion two times"} +{"filepath": "data/multi_event_test/syn_6.wav", "onoffCaption": "whistling at 2.0-6.0", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_test/syn_8.wav", "onoffCaption": "cow mooing at 1.954-4.954, 6.219-9.219", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_test/syn_11.wav", "onoffCaption": "burping belching at 0.0-2.0, 2.5-4.5 and dog barking at 5.0-7.0", "frequencyCaption": "burping belching two times and dog barking one times"} +{"filepath": "data/multi_event_test/syn_16.wav", "onoffCaption": "duck quacking at 0.0-1.0, 2.0-3.0, 4.0-5.0", "frequencyCaption": "duck quacking three times"} +{"filepath": "data/multi_event_test/syn_18.wav", "onoffCaption": "door knocking at 0.138-2.518, 3.708-6.088 and door slamming at 2.798-4.798", "frequencyCaption": "door knocking two times and door slamming one times"} +{"filepath": "data/multi_event_test/syn_21.wav", "onoffCaption": "dog barking at 0-1, 2-3", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_test/syn_26.wav", "onoffCaption": "whistling at 0.2-4.2", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_test/syn_28.wav", "onoffCaption": "cow mooing at 0.0-1.0 and spraying at 1.0-2.0", "frequencyCaption": "cow mooing one times and spraying one times"} +{"filepath": "data/multi_event_test/syn_32.wav", "onoffCaption": "duck quacking at 0.0-2.0, 2.5-4.5, 5.0-7.0", "frequencyCaption": "duck quacking three times"} +{"filepath": "data/multi_event_test/syn_35.wav", "onoffCaption": "car horn honking at 0.5-2.5, 3.0-5.0", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_test/syn_43.wav", "onoffCaption": "dog barking at 0.0-2.0, 2.5-4.5 and burping belching at 5.0-7.0, 7.5-9.5 and explosion at 4.8-7.8", "frequencyCaption": "dog barking two times and burping belching two times and explosion one times"} +{"filepath": "data/multi_event_test/syn_44.wav", "onoffCaption": "sneeze at 0.5-1.5", "frequencyCaption": "sneeze one times"} +{"filepath": "data/multi_event_test/syn_50.wav", "onoffCaption": "car horn honking at 0.0-2.0, 3.0-5.0 and sneeze at 6.0-7.0 and train horn at 8.0-10.0", "frequencyCaption": "car horn honking two times and sneeze one times and train horn one times"} +{"filepath": "data/multi_event_test/syn_57.wav", "onoffCaption": "dog barking at 0.0-2.0, 3.0-5.0 and cow mooing at 6.0-9.0", "frequencyCaption": "dog barking two times and cow mooing one times"} +{"filepath": "data/multi_event_test/syn_59.wav", "onoffCaption": "door slamming at 0.0-1.0 and explosion at 1.5-4.5, 5.0-8.0", "frequencyCaption": "door slamming one times and explosion two times"} +{"filepath": "data/multi_event_test/syn_60.wav", "onoffCaption": "train horn at 0-3", "frequencyCaption": "train horn one times"} +{"filepath": "data/multi_event_test/syn_67.wav", "onoffCaption": "whistling at 0.204-5.379", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_test/syn_69.wav", "onoffCaption": "door knocking at 0-1, 2-3, 4-5", "frequencyCaption": "door knocking three times"} +{"filepath": "data/multi_event_test/syn_73.wav", "onoffCaption": "door knocking at 0-1, 1-2, 2-3 and sneeze at 3-4, 4-5", "frequencyCaption": "door knocking three times and sneeze two times"} +{"filepath": "data/multi_event_test/syn_74.wav", "onoffCaption": "spraying at 0.5-1.0, 1.5-2.0 and gunshot at 3.0-4.0, 5.0-6.0, 7.0-8.0", "frequencyCaption": "spraying two times and gunshot three times"} +{"filepath": "data/multi_event_test/syn_82.wav", "onoffCaption": "dog barking at 0.464-2.464, 4.19-6.19", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_test/syn_91.wav", "onoffCaption": "gunshot at 0-1, 2-3", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_test/syn_96.wav", "onoffCaption": "door slamming at 0-1, 2-3, 4-5", "frequencyCaption": "door slamming three times"} +{"filepath": "data/multi_event_test/syn_98.wav", "onoffCaption": "thump thud at 1.017-4.684, 5.695-9.362", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_test/syn_101.wav", "onoffCaption": "dog barking at 0.464-2.464", "frequencyCaption": "dog barking one times"} +{"filepath": "data/multi_event_test/syn_106.wav", "onoffCaption": "burping belching at 0.0-1.0, 2.0-3.0, 4.0-5.0", "frequencyCaption": "burping belching three times"} +{"filepath": "data/multi_event_test/syn_108.wav", "onoffCaption": "sneeze at 0.5-1.5", "frequencyCaption": "sneeze one times"} +{"filepath": "data/multi_event_test/syn_112.wav", "onoffCaption": "woman laughing at 0.004-2.372, 3.672-6.653", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_test/syn_115.wav", "onoffCaption": "duck quacking at 0.3-2.3 and tapping clicking clanking at 2.5-5.5, 6.0-9.0", "frequencyCaption": "duck quacking one times and tapping clicking clanking two times"} +{"filepath": "data/multi_event_test/syn_122.wav", "onoffCaption": "door knocking at 0-1, 3-4, 6-7", "frequencyCaption": "door knocking three times"} +{"filepath": "data/multi_event_test/syn_125.wav", "onoffCaption": "cow mooing at 1.5-4.5, 5.5-8.5", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_test/syn_131.wav", "onoffCaption": "whistling at 0-1, 2-3 and cat meowing at 1-2", "frequencyCaption": "whistling two times and cat meowing one times"} +{"filepath": "data/multi_event_test/syn_136.wav", "onoffCaption": "sheep goat bleating at 0.5-2.5, 3.5-5.5 and whistling at 6.0-8.0, 8.5-9.5 and woman laughing at 2.0-4.0", "frequencyCaption": "sheep goat bleating two times and whistling two times and woman laughing one times"} +{"filepath": "data/multi_event_test/syn_138.wav", "onoffCaption": "gunshot at 0.0-1.0 and tapping clicking clanking at 1.5-5.0", "frequencyCaption": "gunshot one times and tapping clicking clanking one times"} +{"filepath": "data/multi_event_test/syn_140.wav", "onoffCaption": "door knocking at 0.00-2.00, 3.00-5.00, 6.00-8.00", "frequencyCaption": "door knocking three times"} +{"filepath": "data/multi_event_test/syn_147.wav", "onoffCaption": "door slamming at 0-1, 2-3, 4-5", "frequencyCaption": "door slamming three times"} +{"filepath": "data/multi_event_test/syn_149.wav", "onoffCaption": "car horn honking at 0.0-2.0, 3.0-5.0 and spraying at 5.5-6.0, 7.0-7.5", "frequencyCaption": "car horn honking two times and spraying two times"} +{"filepath": "data/multi_event_test/syn_153.wav", "onoffCaption": "cat meowing at 0-1.0", "frequencyCaption": "cat meowing one times"} +{"filepath": "data/multi_event_test/syn_154.wav", "onoffCaption": "cat meowing at 0.5-1.5 and door knocking at 2-3.5", "frequencyCaption": "cat meowing one times and door knocking one times"} +{"filepath": "data/multi_event_test/syn_163.wav", "onoffCaption": "sheep goat bleating at 0-1, 2-3, 4-5", "frequencyCaption": "sheep goat bleating three times"} +{"filepath": "data/multi_event_test/syn_164.wav", "onoffCaption": "whistling at 0.204-5.379, 7.724-10.0", "frequencyCaption": "whistling two times"} +{"filepath": "data/multi_event_test/syn_170.wav", "onoffCaption": "dog barking at 0.464-2.464, 4.19-6.19", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_test/syn_177.wav", "onoffCaption": "thump thud at 0-1 and cow mooing at 1-2", "frequencyCaption": "thump thud one times and cow mooing one times"} +{"filepath": "data/multi_event_test/syn_179.wav", "onoffCaption": "cow mooing at 1.954-4.602, 5.719-8.729", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_test/syn_181.wav", "onoffCaption": "cow mooing at 1.0-3.0, 4.0-6.0", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_test/syn_186.wav", "onoffCaption": "gunshot at 0.0-1.0", "frequencyCaption": "gunshot one times"} +{"filepath": "data/multi_event_test/syn_188.wav", "onoffCaption": "gunshot at 0-1, 1-2 and duck quacking at 2-3", "frequencyCaption": "gunshot two times and duck quacking one times"} +{"filepath": "data/multi_event_test/syn_192.wav", "onoffCaption": "spraying at 0.0-1.0, 2.0-3.0", "frequencyCaption": "spraying two times"} +{"filepath": "data/multi_event_test/syn_195.wav", "onoffCaption": "thump thud at 1.017-4.684, 5.695-9.362", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_test/syn_3.wav", "onoffCaption": "tapping clicking clanking at 0.5-3.0, 4.0-7.5", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_test/syn_4.wav", "onoffCaption": "door slamming at 0.145-1.085, 2.545-4.463", "frequencyCaption": "door slamming two times"} +{"filepath": "data/multi_event_test/syn_13.wav", "onoffCaption": "duck quacking at 0.0-2.0, 2.5-4.5, 5.0-7.0", "frequencyCaption": "duck quacking three times"} +{"filepath": "data/multi_event_test/syn_14.wav", "onoffCaption": "sneeze at 0.38-1.38", "frequencyCaption": "sneeze one times"} +{"filepath": "data/multi_event_test/syn_23.wav", "onoffCaption": "sneeze at 0.5-1.5, 2.5-3.5", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_test/syn_24.wav", "onoffCaption": "woman laughing at 2.782-5.368", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/multi_event_test/syn_30.wav", "onoffCaption": "burping belching at 0.871-3.871, 4.871-7.871", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_test/syn_37.wav", "onoffCaption": "thump thud at 0.0-1.5, 5.0-6.5 and door knocking at 1.5-3.5, 6.5-8.5 and burping belching at 3.5-4.5", "frequencyCaption": "thump thud two times and door knocking two times and burping belching one times"} +{"filepath": "data/multi_event_test/syn_39.wav", "onoffCaption": "train horn at 0.0-2.0, 2.5-4.5", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_test/syn_41.wav", "onoffCaption": "thump thud at 0.0-1.0", "frequencyCaption": "thump thud one times"} +{"filepath": "data/multi_event_test/syn_48.wav", "onoffCaption": "cat meowing at 0-1, 1-2, 2-3", "frequencyCaption": "cat meowing three times"} +{"filepath": "data/multi_event_test/syn_52.wav", "onoffCaption": "gunshot at 0.0-1.0 and duck quacking at 1.5-2.5 and tapping clicking clanking at 3.0-4.0", "frequencyCaption": "gunshot one times and duck quacking one times and tapping clicking clanking one times"} +{"filepath": "data/multi_event_test/syn_55.wav", "onoffCaption": "sneeze at 1.3-2.403, 4.759-6.442", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_test/syn_62.wav", "onoffCaption": "woman laughing at 0.004-2.372, 3.672-6.653", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_test/syn_65.wav", "onoffCaption": "gunshot at 0.0-2.0, 2.1-4.1", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_test/syn_71.wav", "onoffCaption": "door slamming at 0.0-1.0, 2.0-3.0 and whistling at 4.0-8.0", "frequencyCaption": "door slamming two times and whistling one times"} +{"filepath": "data/multi_event_test/syn_76.wav", "onoffCaption": "dog barking at 0.464-2.464", "frequencyCaption": "dog barking one times"} +{"filepath": "data/multi_event_test/syn_78.wav", "onoffCaption": "explosion at 0.0-2.0, 2.5-4.5 and duck quacking at 5.0-7.0, 7.5-9.5", "frequencyCaption": "explosion two times and duck quacking two times"} +{"filepath": "data/multi_event_test/syn_80.wav", "onoffCaption": "door slamming at 0.0-1.0 and sheep goat bleating at 2.0-4.0", "frequencyCaption": "door slamming one times and sheep goat bleating one times"} +{"filepath": "data/multi_event_test/syn_85.wav", "onoffCaption": "door knocking at 2.047-4.422", "frequencyCaption": "door knocking one times"} +{"filepath": "data/multi_event_test/syn_87.wav", "onoffCaption": "explosion at 1.773-4.034, 5.15-7.411", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_test/syn_89.wav", "onoffCaption": "car horn honking at 0.0-2.0 and cat meowing at 2.5-4.0", "frequencyCaption": "car horn honking one times and cat meowing one times"} +{"filepath": "data/multi_event_test/syn_93.wav", "onoffCaption": "dog barking at 0-2, 2-4", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_test/syn_94.wav", "onoffCaption": "gunshot at 0.0-2.0, 2.5-4.5, 5.0-7.0 and thump thud at 1.0-3.0, 4.0-6.0 and sheep goat bleating at 2.0-4.0, 7.0-9.0", "frequencyCaption": "gunshot three times and thump thud two times and sheep goat bleating two times"} +{"filepath": "data/multi_event_test/syn_103.wav", "onoffCaption": "whistling at 2.158-10.0", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_test/syn_104.wav", "onoffCaption": "duck quacking at 0-1", "frequencyCaption": "duck quacking one times"} +{"filepath": "data/multi_event_test/syn_110.wav", "onoffCaption": "train horn at 0-1 and duck quacking at 1-2 and cow mooing at 2-3", "frequencyCaption": "train horn one times and duck quacking one times and cow mooing one times"} +{"filepath": "data/multi_event_test/syn_117.wav", "onoffCaption": "sheep goat bleating at 1.0-3.0, 4.5-6.5", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_test/syn_119.wav", "onoffCaption": "train horn at 0.0-2.0 and door knocking at 2.5-4.5, 5.0-7.0", "frequencyCaption": "train horn one times and door knocking two times"} +{"filepath": "data/multi_event_test/syn_120.wav", "onoffCaption": "burping belching at 0.871-2.871, 3.871-5.871", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_test/syn_127.wav", "onoffCaption": "whistling at 2.158-10.0", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_test/syn_129.wav", "onoffCaption": "door knocking at 0-1, 1-2, 2-3", "frequencyCaption": "door knocking three times"} +{"filepath": "data/multi_event_test/syn_133.wav", "onoffCaption": "duck quacking at 2.203-4.203, 5.361-7.361", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_test/syn_134.wav", "onoffCaption": "car horn honking at 1.0-3.0, 4.0-6.0", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_test/syn_142.wav", "onoffCaption": "sneeze at 0.5-1.5, 2.0-3.0", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_test/syn_145.wav", "onoffCaption": "door knocking at 0.002-2.092, 2.842-5.601 and whistling at 1.9-10.0", "frequencyCaption": "door knocking two times and whistling one times"} +{"filepath": "data/multi_event_test/syn_151.wav", "onoffCaption": "dog barking at 0.121-2.121, 3.824-5.824, 7.767-9.767", "frequencyCaption": "dog barking three times"} +{"filepath": "data/multi_event_test/syn_156.wav", "onoffCaption": "car horn honking at 0.0-1.0, 2.0-3.0, 4.0-5.0", "frequencyCaption": "car horn honking three times"} +{"filepath": "data/multi_event_test/syn_158.wav", "onoffCaption": "tapping clicking clanking at 1.5-4.5, 5.5-8.5", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_test/syn_161.wav", "onoffCaption": "spraying at 0-1, 2-3", "frequencyCaption": "spraying two times"} +{"filepath": "data/multi_event_test/syn_166.wav", "onoffCaption": "woman laughing at 1.672-3.955", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/multi_event_test/syn_168.wav", "onoffCaption": "sheep goat bleating at 0.56-2.56", "frequencyCaption": "sheep goat bleating one times"} +{"filepath": "data/multi_event_test/syn_172.wav", "onoffCaption": "door knocking at 0-1, 1-2, 2-3", "frequencyCaption": "door knocking three times"} +{"filepath": "data/multi_event_test/syn_175.wav", "onoffCaption": "cow mooing at 0-3 and spraying at 3-6", "frequencyCaption": "cow mooing one times and spraying one times"} +{"filepath": "data/multi_event_test/syn_183.wav", "onoffCaption": "explosion at 0.0-2.0, 2.1-4.1", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_test/syn_184.wav", "onoffCaption": "sheep goat bleating at 0-1", "frequencyCaption": "sheep goat bleating one times"} +{"filepath": "data/multi_event_test/syn_190.wav", "onoffCaption": "whistling at 0.0-1.0", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_test/syn_197.wav", "onoffCaption": "tapping clicking clanking at 0.032-2.032, 2.532-4.532", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_test/syn_199.wav", "onoffCaption": "duck quacking at 0.0-2.0 and cat meowing at 2.5-4.5", "frequencyCaption": "duck quacking one times and cat meowing one times"} +{"filepath": "data/multi_event_test/syn_200.wav", "onoffCaption": "explosion at 1.0-3.0, 4.0-6.0", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_test/syn_2.wav", "onoffCaption": "door knocking at 0.0-1.0", "frequencyCaption": "door knocking one times"} +{"filepath": "data/multi_event_test/syn_5.wav", "onoffCaption": "burping belching at 0.359-2.774", "frequencyCaption": "burping belching one times"} +{"filepath": "data/multi_event_test/syn_12.wav", "onoffCaption": "sheep goat bleating at 0.0-2.0 and sneeze at 2.5-3.5", "frequencyCaption": "sheep goat bleating one times and sneeze one times"} +{"filepath": "data/multi_event_test/syn_15.wav", "onoffCaption": "tapping clicking clanking at 2.992-6.432", "frequencyCaption": "tapping clicking clanking one times"} +{"filepath": "data/multi_event_test/syn_22.wav", "onoffCaption": "whistling at 2.158-10.0", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_test/syn_25.wav", "onoffCaption": "burping belching at 0.871-3.871, 4.391-7.391", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_test/syn_31.wav", "onoffCaption": "woman laughing at 0-1, 2-3", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_test/syn_36.wav", "onoffCaption": "door slamming at 0.5-1.5, 2.0-3.0, 3.5-4.5", "frequencyCaption": "door slamming three times"} +{"filepath": "data/multi_event_test/syn_38.wav", "onoffCaption": "cat meowing at 0-1, 2-3, 4-5", "frequencyCaption": "cat meowing three times"} +{"filepath": "data/multi_event_test/syn_40.wav", "onoffCaption": "door knocking at 0.138-2.518, 3.708-6.088 and cow mooing at 6.91-9.447", "frequencyCaption": "door knocking two times and cow mooing one times"} +{"filepath": "data/multi_event_test/syn_46.wav", "onoffCaption": "door slamming at 1.145-2.085, 3.545-4.463", "frequencyCaption": "door slamming two times"} +{"filepath": "data/multi_event_test/syn_47.wav", "onoffCaption": "spraying at 0.0-1.0, 2.0-3.0, 4.0-5.0 and cow mooing at 6.0-8.0, 8.5-10.0", "frequencyCaption": "spraying three times and cow mooing two times"} +{"filepath": "data/multi_event_test/syn_49.wav", "onoffCaption": "sheep goat bleating at 0.5-2.5, 3.0-5.0 and tapping clicking clanking at 0.0-4.0, 5.5-9.5", "frequencyCaption": "sheep goat bleating two times and tapping clicking clanking two times"} +{"filepath": "data/multi_event_test/syn_51.wav", "onoffCaption": "train horn at 0.873-4.633, 5.147-8.907", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_test/syn_53.wav", "onoffCaption": "dog barking at 0-1, 2-3, 4-5", "frequencyCaption": "dog barking three times"} +{"filepath": "data/multi_event_test/syn_54.wav", "onoffCaption": "train horn at 0.0-2.0, 2.5-4.5", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_test/syn_63.wav", "onoffCaption": "train horn at 0-1 and cat meowing at 2-3 and dog barking at 4-5", "frequencyCaption": "train horn one times and cat meowing one times and dog barking one times"} +{"filepath": "data/multi_event_test/syn_64.wav", "onoffCaption": "sheep goat bleating at 0.56-2.56", "frequencyCaption": "sheep goat bleating one times"} +{"filepath": "data/multi_event_test/syn_70.wav", "onoffCaption": "whistling at 2.158-10.0", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_test/syn_77.wav", "onoffCaption": "cow mooing at 0.0-3.0", "frequencyCaption": "cow mooing one times"} +{"filepath": "data/multi_event_test/syn_79.wav", "onoffCaption": "dog barking at 0-1, 2-3, 4-5", "frequencyCaption": "dog barking three times"} +{"filepath": "data/multi_event_test/syn_81.wav", "onoffCaption": "gunshot at 0.0-2.0, 3.0-5.0, 6.0-8.0", "frequencyCaption": "gunshot three times"} +{"filepath": "data/multi_event_test/syn_86.wav", "onoffCaption": "whistling at 0-1 and woman laughing at 1-3, 3-5", "frequencyCaption": "whistling one times and woman laughing two times"} +{"filepath": "data/multi_event_test/syn_88.wav", "onoffCaption": "sheep goat bleating at 1.0-3.0, 4.0-6.0", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_test/syn_92.wav", "onoffCaption": "door slamming at 0.0-1.0, 2.0-3.0, 4.0-5.0 and tapping clicking clanking at 6.0-7.0", "frequencyCaption": "door slamming three times and tapping clicking clanking one times"} +{"filepath": "data/multi_event_test/syn_95.wav", "onoffCaption": "door slamming at 0-1, 2-3, 4-5", "frequencyCaption": "door slamming three times"} +{"filepath": "data/multi_event_test/syn_102.wav", "onoffCaption": "door knocking at 1.973-5.029, 6.285-9.132", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_test/syn_105.wav", "onoffCaption": "train horn at 0.0-1.0, 2.0-3.0, 4.0-5.0", "frequencyCaption": "train horn three times"} +{"filepath": "data/multi_event_test/syn_111.wav", "onoffCaption": "whistling at 0.204-2.79, 4.0-6.586 and door slamming at 7.0-8.0", "frequencyCaption": "whistling two times and door slamming one times"} +{"filepath": "data/multi_event_test/syn_116.wav", "onoffCaption": "burping belching at 1.0-3.0, 4.0-6.0", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_test/syn_118.wav", "onoffCaption": "sneeze at 0.0-1.0", "frequencyCaption": "sneeze one times"} +{"filepath": "data/multi_event_test/syn_121.wav", "onoffCaption": "car horn honking at 0.0-1.0, 2.0-3.0, 4.0-5.0", "frequencyCaption": "car horn honking three times"} +{"filepath": "data/multi_event_test/syn_123.wav", "onoffCaption": "sheep goat bleating at 0.65-2.65, 3.65-5.65", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_test/syn_126.wav", "onoffCaption": "sneeze at 0.373-2.332 and car horn honking at 1.03-5.542, 6.081-10.0", "frequencyCaption": "sneeze one times and car horn honking two times"} +{"filepath": "data/multi_event_test/syn_128.wav", "onoffCaption": "sheep goat bleating at 1.0-3.0 and door knocking at 3.5-5.5, 6.0-8.0", "frequencyCaption": "sheep goat bleating one times and door knocking two times"} +{"filepath": "data/multi_event_test/syn_132.wav", "onoffCaption": "sheep goat bleating at 0.0-2.0 and spraying at 2.5-3.0, 4.0-4.5, 5.5-6.0 and duck quacking at 6.5-7.5, 8.0-9.0, 9.5-10.0", "frequencyCaption": "sheep goat bleating one times and spraying three times and duck quacking three times"} +{"filepath": "data/multi_event_test/syn_135.wav", "onoffCaption": "tapping clicking clanking at 1.0-3.0, 4.0-6.0", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_test/syn_139.wav", "onoffCaption": "thump thud at 1.017-4.684, 5.695-9.362", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_test/syn_143.wav", "onoffCaption": "spraying at 0.0-1.0 and explosion at 1.5-4.5", "frequencyCaption": "spraying one times and explosion one times"} +{"filepath": "data/multi_event_test/syn_144.wav", "onoffCaption": "duck quacking at 0-1, 2-3, 4-5", "frequencyCaption": "duck quacking three times"} +{"filepath": "data/multi_event_test/syn_150.wav", "onoffCaption": "gunshot at 0.0-2.0, 2.5-4.5, 5.0-7.0", "frequencyCaption": "gunshot three times"} +{"filepath": "data/multi_event_test/syn_157.wav", "onoffCaption": "train horn at 0-3.5", "frequencyCaption": "train horn one times"} +{"filepath": "data/multi_event_test/syn_159.wav", "onoffCaption": "door slamming at 0.145-1.085, 2.545-4.463", "frequencyCaption": "door slamming two times"} +{"filepath": "data/multi_event_test/syn_160.wav", "onoffCaption": "spraying at 0.0-1.0 and whistling at 1.0-3.0", "frequencyCaption": "spraying one times and whistling one times"} +{"filepath": "data/multi_event_test/syn_167.wav", "onoffCaption": "burping belching at 0.0-2.0, 2.5-4.5 and gunshot at 5.0-7.0", "frequencyCaption": "burping belching two times and gunshot one times"} +{"filepath": "data/multi_event_test/syn_169.wav", "onoffCaption": "sneeze at 0.373-2.332, 3.255-5.716", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_test/syn_173.wav", "onoffCaption": "sheep goat bleating at 0-1, 2-3, 4-5", "frequencyCaption": "sheep goat bleating three times"} +{"filepath": "data/multi_event_test/syn_174.wav", "onoffCaption": "dog barking at 0.0-2.0, 2.5-4.5, 5.0-7.0", "frequencyCaption": "dog barking three times"} +{"filepath": "data/multi_event_test/syn_176.wav", "onoffCaption": "woman laughing at 1.625-3.98, 4.735-6.981", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_test/syn_182.wav", "onoffCaption": "cow mooing at 0.0-3.0 and gunshot at 4.0-5.0", "frequencyCaption": "cow mooing one times and gunshot one times"} +{"filepath": "data/multi_event_test/syn_185.wav", "onoffCaption": "spraying at 0.22-0.847 and door knocking at 2.797-5.334", "frequencyCaption": "spraying one times and door knocking one times"} +{"filepath": "data/multi_event_test/syn_189.wav", "onoffCaption": "door slamming at 0.145-1.085, 2.545-4.463", "frequencyCaption": "door slamming two times"} +{"filepath": "data/multi_event_test/syn_191.wav", "onoffCaption": "burping belching at 0.0-2.0, 2.5-4.5", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_test/syn_193.wav", "onoffCaption": "cow mooing at 1.0-3.0, 4.0-6.0", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_test/syn_196.wav", "onoffCaption": "spraying at 0.0-1.0", "frequencyCaption": "spraying one times"} +{"filepath": "data/multi_event_test/syn_198.wav", "onoffCaption": "gunshot at 0.0-2.0, 3.0-5.0, 6.0-8.0", "frequencyCaption": "gunshot three times"} +{"filepath": "data/multi_event_test/syn_7.wav", "onoffCaption": "spraying at 0.0-1.0 and burping belching at 1.5-2.5", "frequencyCaption": "spraying one times and burping belching one times"} +{"filepath": "data/multi_event_test/syn_9.wav", "onoffCaption": "cow mooing at 0.0-3.0", "frequencyCaption": "cow mooing one times"} +{"filepath": "data/multi_event_test/syn_10.wav", "onoffCaption": "door knocking at 2-4, 5-7", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_test/syn_17.wav", "onoffCaption": "dog barking at 0-1, 2-3", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_test/syn_19.wav", "onoffCaption": "gunshot at 0.0-1.0 and spraying at 1.5-2.5", "frequencyCaption": "gunshot one times and spraying one times"} +{"filepath": "data/multi_event_test/syn_20.wav", "onoffCaption": "tapping clicking clanking at 1.0-3.0, 4.0-6.0", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_test/syn_27.wav", "onoffCaption": "dog barking at 0.464-2.464, 4.19-6.19", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_test/syn_29.wav", "onoffCaption": "tapping clicking clanking at 1.0-3.0, 4.0-6.0", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_test/syn_33.wav", "onoffCaption": "dog barking at 0.0-2.0, 2.5-4.5 and car horn honking at 5.0-7.0", "frequencyCaption": "dog barking two times and car horn honking one times"} +{"filepath": "data/multi_event_test/syn_34.wav", "onoffCaption": "sheep goat bleating at 1.575-3.575", "frequencyCaption": "sheep goat bleating one times"} +{"filepath": "data/multi_event_test/syn_42.wav", "onoffCaption": "tapping clicking clanking at 0.0-2.0, 2.5-4.5", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_test/syn_45.wav", "onoffCaption": "cat meowing at 0.5-1.5 and train horn at 2.0-6.0", "frequencyCaption": "cat meowing one times and train horn one times"} +{"filepath": "data/multi_event_test/syn_56.wav", "onoffCaption": "tapping clicking clanking at 0.961-4.401, 6.37-9.81", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_test/syn_58.wav", "onoffCaption": "door slamming at 0.355-2.581", "frequencyCaption": "door slamming one times"} +{"filepath": "data/multi_event_test/syn_61.wav", "onoffCaption": "explosion at 0.5-3.5, 4.0-7.0 and train horn at 7.5-10.0 and woman laughing at 1.0-4.0", "frequencyCaption": "explosion two times and train horn one times and woman laughing one times"} +{"filepath": "data/multi_event_test/syn_66.wav", "onoffCaption": "sheep goat bleating at 0.56-2.56", "frequencyCaption": "sheep goat bleating one times"} +{"filepath": "data/multi_event_test/syn_68.wav", "onoffCaption": "car horn honking at 0.0-2.0, 2.5-4.5, 5.0-7.0", "frequencyCaption": "car horn honking three times"} +{"filepath": "data/multi_event_test/syn_72.wav", "onoffCaption": "spraying at 0.0-0.6, 1.0-1.6 and thump thud at 2.0-3.6 and dog barking at 4.0-6.0", "frequencyCaption": "spraying two times and thump thud one times and dog barking one times"} +{"filepath": "data/multi_event_test/syn_75.wav", "onoffCaption": "explosion at 0.5-2.5, 2.501-4.501", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_test/syn_83.wav", "onoffCaption": "whistling at 2.158-10.0", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_test/syn_84.wav", "onoffCaption": "burping belching at 0.871-3.871, 4.642-7.642", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_test/syn_90.wav", "onoffCaption": "gunshot at 0.2-1.2", "frequencyCaption": "gunshot one times"} +{"filepath": "data/multi_event_test/syn_97.wav", "onoffCaption": "cat meowing at 0.5-1.5", "frequencyCaption": "cat meowing one times"} +{"filepath": "data/multi_event_test/syn_99.wav", "onoffCaption": "duck quacking at 0.0-2.0, 2.0-4.0", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_test/syn_100.wav", "onoffCaption": "cat meowing at 0.0-2.0 and sheep goat bleating at 3.0-5.0, 6.0-8.0, 9.0-10.0", "frequencyCaption": "cat meowing one times and sheep goat bleating three times"} +{"filepath": "data/multi_event_test/syn_107.wav", "onoffCaption": "spraying at 0.0-1.5, 2.0-3.5 and dog barking at 4.0-6.0, 7.0-9.0 and tapping clicking clanking at 1.6-3.1, 3.6-5.1", "frequencyCaption": "spraying two times and dog barking two times and tapping clicking clanking two times"} +{"filepath": "data/multi_event_test/syn_109.wav", "onoffCaption": "cow mooing at 0.0-3.0 and gunshot at 3.5-4.5", "frequencyCaption": "cow mooing one times and gunshot one times"} +{"filepath": "data/multi_event_test/syn_113.wav", "onoffCaption": "whistling at 0.742-5.917 and tapping clicking clanking at 2.992-6.432", "frequencyCaption": "whistling one times and tapping clicking clanking one times"} +{"filepath": "data/multi_event_test/syn_114.wav", "onoffCaption": "car horn honking at 0-2 and door knocking at 2-4", "frequencyCaption": "car horn honking one times and door knocking one times"} +{"filepath": "data/multi_event_test/syn_124.wav", "onoffCaption": "gunshot at 0.0-2.0, 3.0-5.0", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_test/syn_130.wav", "onoffCaption": "dog barking at 0.0-2.0, 2.5-4.5, 5.0-7.0", "frequencyCaption": "dog barking three times"} +{"filepath": "data/multi_event_test/syn_137.wav", "onoffCaption": "door knocking at 0-1 and cow mooing at 2-3 and gunshot at 4-5", "frequencyCaption": "door knocking one times and cow mooing one times and gunshot one times"} +{"filepath": "data/multi_event_test/syn_141.wav", "onoffCaption": "sneeze at 0.33-1.403, 2.759-3.832", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_test/syn_146.wav", "onoffCaption": "sneeze at 0.0-1.0, 2.0-3.0 and cat meowing at 4.0-5.0", "frequencyCaption": "sneeze two times and cat meowing one times"} +{"filepath": "data/multi_event_test/syn_148.wav", "onoffCaption": "duck quacking at 0-1, 2-3 and cow mooing at 4-5", "frequencyCaption": "duck quacking two times and cow mooing one times"} +{"filepath": "data/multi_event_test/syn_152.wav", "onoffCaption": "tapping clicking clanking at 0.0-1.0, 1.5-2.5 and train horn at 3.0-7.0", "frequencyCaption": "tapping clicking clanking two times and train horn one times"} +{"filepath": "data/multi_event_test/syn_155.wav", "onoffCaption": "tapping clicking clanking at 0.0-1.0 and gunshot at 2.0-3.0 and cat meowing at 4.0-5.0", "frequencyCaption": "tapping clicking clanking one times and gunshot one times and cat meowing one times"} +{"filepath": "data/multi_event_test/syn_162.wav", "onoffCaption": "gunshot at 0.0-2.0, 2.5-4.5, 5.0-7.0", "frequencyCaption": "gunshot three times"} +{"filepath": "data/multi_event_test/syn_165.wav", "onoffCaption": "thump thud at 0.0-1.5, 2.0-3.5 and whistling at 4.0-7.0", "frequencyCaption": "thump thud two times and whistling one times"} +{"filepath": "data/multi_event_test/syn_171.wav", "onoffCaption": "spraying at 0.0-0.5, 1.5-2.0, 3.0-3.5 and thump thud at 4.0-5.0 and sheep goat bleating at 5.5-6.5, 7.0-8.0", "frequencyCaption": "spraying three times and thump thud one times and sheep goat bleating two times"} +{"filepath": "data/multi_event_test/syn_178.wav", "onoffCaption": "door slamming at 0.355-2.581 and woman laughing at 0.964-3.319", "frequencyCaption": "door slamming one times and woman laughing one times"} +{"filepath": "data/multi_event_test/syn_180.wav", "onoffCaption": "spraying at 0.0-1.0 and cow mooing at 2.0-5.0", "frequencyCaption": "spraying one times and cow mooing one times"} +{"filepath": "data/multi_event_test/syn_187.wav", "onoffCaption": "sneeze at 1.3-2.403, 4.759-6.442", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_test/syn_194.wav", "onoffCaption": "duck quacking at 0-1, 2-3", "frequencyCaption": "duck quacking two times"} diff --git a/data/meta_data/test-frequency-control_onoffFromGpt_single-event.json b/data/meta_data/test-frequency-control_onoffFromGpt_single-event.json new file mode 100644 index 0000000000000000000000000000000000000000..9310b20b6657974a4e9a39f0ddf9c0ab7c252c6f --- /dev/null +++ b/data/meta_data/test-frequency-control_onoffFromGpt_single-event.json @@ -0,0 +1,400 @@ +{"filepath": "data/single_event_multi_identity_test/syn_1.wav", "onoffCaption": "cat meowing at 1.674-5.019", "frequencyCaption": "cat meowing one times"} +{"filepath": "data/single_event_multi_identity_test/syn_6.wav", "onoffCaption": "tapping clicking clanking at 0.536-3.976", "frequencyCaption": "tapping clicking clanking one times"} +{"filepath": "data/single_event_multi_identity_test/syn_8.wav", "onoffCaption": "door slamming at 0-1", "frequencyCaption": "door slamming one times"} +{"filepath": "data/single_event_multi_identity_test/syn_11.wav", "onoffCaption": "dog barking at 0.464-2.464, 4.19-6.19", "frequencyCaption": "dog barking two times"} +{"filepath": "data/single_event_multi_identity_test/syn_16.wav", "onoffCaption": "thump thud at 0-1", "frequencyCaption": "thump thud one times"} +{"filepath": "data/single_event_multi_identity_test/syn_18.wav", "onoffCaption": "sheep goat bleating at 0.5-2.5", "frequencyCaption": "sheep goat bleating one times"} +{"filepath": "data/single_event_multi_identity_test/syn_21.wav", "onoffCaption": "sheep goat bleating at 0.56-2.56", "frequencyCaption": "sheep goat bleating one times"} +{"filepath": "data/single_event_multi_identity_test/syn_26.wav", "onoffCaption": "tapping clicking clanking at 0.536-3.976", "frequencyCaption": "tapping clicking clanking one times"} +{"filepath": "data/single_event_multi_identity_test/syn_28.wav", "onoffCaption": "sneeze at 0-1, 2-3", "frequencyCaption": "sneeze two times"} +{"filepath": "data/single_event_multi_identity_test/syn_32.wav", "onoffCaption": "cow mooing at 0-3.309", "frequencyCaption": "cow mooing one times"} +{"filepath": "data/single_event_multi_identity_test/syn_35.wav", "onoffCaption": "door slamming at 0.145-1.085, 2.545-4.463", "frequencyCaption": "door slamming two times"} +{"filepath": "data/single_event_multi_identity_test/syn_43.wav", "onoffCaption": "thump thud at 0-1, 2-3", "frequencyCaption": "thump thud two times"} +{"filepath": "data/single_event_multi_identity_test/syn_44.wav", "onoffCaption": "burping belching at 0-1, 2-3, 4-5", "frequencyCaption": "burping belching three times"} +{"filepath": "data/single_event_multi_identity_test/syn_50.wav", "onoffCaption": "car horn honking at 1.0-3.0, 4.0-6.0", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/single_event_multi_identity_test/syn_57.wav", "onoffCaption": "train horn at 0-3", "frequencyCaption": "train horn one times"} +{"filepath": "data/single_event_multi_identity_test/syn_59.wav", "onoffCaption": "woman laughing at 2.0-4.5, 5.0-7.5", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/single_event_multi_identity_test/syn_60.wav", "onoffCaption": "cat meowing at 1-2, 3-4", "frequencyCaption": "cat meowing two times"} +{"filepath": "data/single_event_multi_identity_test/syn_67.wav", "onoffCaption": "cow mooing at 0-3", "frequencyCaption": "cow mooing one times"} +{"filepath": "data/single_event_multi_identity_test/syn_69.wav", "onoffCaption": "burping belching at 2.0-3.0", "frequencyCaption": "burping belching one times"} +{"filepath": "data/single_event_multi_identity_test/syn_73.wav", "onoffCaption": "whistling at 0-1", "frequencyCaption": "whistling one times"} +{"filepath": "data/single_event_multi_identity_test/syn_74.wav", "onoffCaption": "cat meowing at 0-1", "frequencyCaption": "cat meowing one times"} +{"filepath": "data/single_event_multi_identity_test/syn_82.wav", "onoffCaption": "thump thud at 1.017-4.684, 5.695-8.362", "frequencyCaption": "thump thud two times"} +{"filepath": "data/single_event_multi_identity_test/syn_91.wav", "onoffCaption": "duck quacking at 0-1", "frequencyCaption": "duck quacking one times"} +{"filepath": "data/single_event_multi_identity_test/syn_96.wav", "onoffCaption": "cat meowing at 0-1, 2-3, 4-5", "frequencyCaption": "cat meowing three times"} +{"filepath": "data/single_event_multi_identity_test/syn_98.wav", "onoffCaption": "woman laughing at 0.0-2.0, 2.5-4.5", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/single_event_multi_identity_test/syn_101.wav", "onoffCaption": "burping belching at 0.871-1.871, 2.871-3.871", "frequencyCaption": "burping belching two times"} +{"filepath": "data/single_event_multi_identity_test/syn_106.wav", "onoffCaption": "gunshot at 0-1", "frequencyCaption": "gunshot one times"} +{"filepath": "data/single_event_multi_identity_test/syn_108.wav", "onoffCaption": "cat meowing at 0.0-2.0, 2.5-4.5", "frequencyCaption": "cat meowing two times"} +{"filepath": "data/single_event_multi_identity_test/syn_112.wav", "onoffCaption": "train horn at 0-1", "frequencyCaption": "train horn one times"} +{"filepath": "data/single_event_multi_identity_test/syn_115.wav", "onoffCaption": "sheep goat bleating at 0.5-2.5, 3.0-5.0", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/single_event_multi_identity_test/syn_122.wav", "onoffCaption": "tapping clicking clanking at 1.0-4.0, 5.0-8.0", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/single_event_multi_identity_test/syn_125.wav", "onoffCaption": "car horn honking at 0.0-2.0, 3.0-5.0", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/single_event_multi_identity_test/syn_131.wav", "onoffCaption": "cow mooing at 1.954-4.602, 6.719-9.729", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/single_event_multi_identity_test/syn_136.wav", "onoffCaption": "tapping clicking clanking at 1-3, 6-8", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/single_event_multi_identity_test/syn_138.wav", "onoffCaption": "burping belching at 0-1, 2-3, 4-5", "frequencyCaption": "burping belching three times"} +{"filepath": "data/single_event_multi_identity_test/syn_140.wav", "onoffCaption": "duck quacking at 0-1, 2-3", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/single_event_multi_identity_test/syn_147.wav", "onoffCaption": "burping belching at 0.5-2.5, 3.5-5.5", "frequencyCaption": "burping belching two times"} +{"filepath": "data/single_event_multi_identity_test/syn_149.wav", "onoffCaption": "gunshot at 0-1", "frequencyCaption": "gunshot one times"} +{"filepath": "data/single_event_multi_identity_test/syn_153.wav", "onoffCaption": "cow mooing at 0-1, 2-3", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/single_event_multi_identity_test/syn_154.wav", "onoffCaption": "train horn at 0-1, 2-3", "frequencyCaption": "train horn two times"} +{"filepath": "data/single_event_multi_identity_test/syn_163.wav", "onoffCaption": "cow mooing at 1.954-4.602, 6.719-9.729", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/single_event_multi_identity_test/syn_164.wav", "onoffCaption": "door slamming at 0.145-1.085, 2.545-4.463", "frequencyCaption": "door slamming two times"} +{"filepath": "data/single_event_multi_identity_test/syn_170.wav", "onoffCaption": "whistling at 0-1", "frequencyCaption": "whistling one times"} +{"filepath": "data/single_event_multi_identity_test/syn_177.wav", "onoffCaption": "door knocking at 1-2, 3-4", "frequencyCaption": "door knocking two times"} +{"filepath": "data/single_event_multi_identity_test/syn_179.wav", "onoffCaption": "gunshot at 0.0-2.0", "frequencyCaption": "gunshot one times"} +{"filepath": "data/single_event_multi_identity_test/syn_181.wav", "onoffCaption": "door knocking at 0-1.0", "frequencyCaption": "door knocking one times"} +{"filepath": "data/single_event_multi_identity_test/syn_186.wav", "onoffCaption": "sneeze at 0.3-1.3, 2.3-3.3", "frequencyCaption": "sneeze two times"} +{"filepath": "data/single_event_multi_identity_test/syn_188.wav", "onoffCaption": "explosion at 0-2", "frequencyCaption": "explosion one times"} +{"filepath": "data/single_event_multi_identity_test/syn_192.wav", "onoffCaption": "cat meowing at 0.0-2.0, 2.5-4.5", "frequencyCaption": "cat meowing two times"} +{"filepath": "data/single_event_multi_identity_test/syn_195.wav", "onoffCaption": "duck quacking at 0-1, 2-3", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/single_event_multi_identity_test/syn_3.wav", "onoffCaption": "burping belching at 0.5-2.5, 3.0-5.0", "frequencyCaption": "burping belching two times"} +{"filepath": "data/single_event_multi_identity_test/syn_4.wav", "onoffCaption": "cat meowing at 0-1", "frequencyCaption": "cat meowing one times"} +{"filepath": "data/single_event_multi_identity_test/syn_13.wav", "onoffCaption": "tapping clicking clanking at 0.032-1.032, 2.032-3.032, 4.032-5.032", "frequencyCaption": "tapping clicking clanking three times"} +{"filepath": "data/single_event_multi_identity_test/syn_14.wav", "onoffCaption": "tapping clicking clanking at 0-1, 2-3", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/single_event_multi_identity_test/syn_23.wav", "onoffCaption": "cow mooing at 1.954-4.602, 6.719-9.729", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/single_event_multi_identity_test/syn_24.wav", "onoffCaption": "thump thud at 1-2, 3-4", "frequencyCaption": "thump thud two times"} +{"filepath": "data/single_event_multi_identity_test/syn_30.wav", "onoffCaption": "explosion at 0-1, 2-3", "frequencyCaption": "explosion two times"} +{"filepath": "data/single_event_multi_identity_test/syn_37.wav", "onoffCaption": "tapping clicking clanking at 0.5-2.5, 3-5", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/single_event_multi_identity_test/syn_39.wav", "onoffCaption": "burping belching at 0.5-2.5, 3.0-5.0", "frequencyCaption": "burping belching two times"} +{"filepath": "data/single_event_multi_identity_test/syn_41.wav", "onoffCaption": "car horn honking at 1-2, 3-4", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/single_event_multi_identity_test/syn_48.wav", "onoffCaption": "train horn at 0-1", "frequencyCaption": "train horn one times"} +{"filepath": "data/single_event_multi_identity_test/syn_52.wav", "onoffCaption": "dog barking at 0-1, 2-3", "frequencyCaption": "dog barking two times"} +{"filepath": "data/single_event_multi_identity_test/syn_55.wav", "onoffCaption": "spraying at 0-1, 2-3, 4-5", "frequencyCaption": "spraying three times"} +{"filepath": "data/single_event_multi_identity_test/syn_62.wav", "onoffCaption": "woman laughing at 2.782-5.368, 6.831-8.912", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/single_event_multi_identity_test/syn_65.wav", "onoffCaption": "tapping clicking clanking at 1.0-3.0, 4.0-6.0", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/single_event_multi_identity_test/syn_71.wav", "onoffCaption": "train horn at 0-1", "frequencyCaption": "train horn one times"} +{"filepath": "data/single_event_multi_identity_test/syn_76.wav", "onoffCaption": "door knocking at 0-1", "frequencyCaption": "door knocking one times"} +{"filepath": "data/single_event_multi_identity_test/syn_78.wav", "onoffCaption": "door knocking at 1-2, 3-4", "frequencyCaption": "door knocking two times"} +{"filepath": "data/single_event_multi_identity_test/syn_80.wav", "onoffCaption": "car horn honking at 0-1", "frequencyCaption": "car horn honking one times"} +{"filepath": "data/single_event_multi_identity_test/syn_85.wav", "onoffCaption": "gunshot at 0.0-2.0, 3.0-5.0, 6.0-8.0", "frequencyCaption": "gunshot three times"} +{"filepath": "data/single_event_multi_identity_test/syn_87.wav", "onoffCaption": "thump thud at 1.017-4.684, 5.695-9.362", "frequencyCaption": "thump thud two times"} +{"filepath": "data/single_event_multi_identity_test/syn_89.wav", "onoffCaption": "door knocking at 1.973-5.029, 6.285-9.132", "frequencyCaption": "door knocking two times"} +{"filepath": "data/single_event_multi_identity_test/syn_93.wav", "onoffCaption": "whistling at 2.158-10.0", "frequencyCaption": "whistling one times"} +{"filepath": "data/single_event_multi_identity_test/syn_94.wav", "onoffCaption": "burping belching at 0.871-2.871, 5.218-7.218", "frequencyCaption": "burping belching two times"} +{"filepath": "data/single_event_multi_identity_test/syn_103.wav", "onoffCaption": "dog barking at 0-1, 2-3, 4-5", "frequencyCaption": "dog barking three times"} +{"filepath": "data/single_event_multi_identity_test/syn_104.wav", "onoffCaption": "thump thud at 0-2, 3-5", "frequencyCaption": "thump thud two times"} +{"filepath": "data/single_event_multi_identity_test/syn_110.wav", "onoffCaption": "whistling at 0-1", "frequencyCaption": "whistling one times"} +{"filepath": "data/single_event_multi_identity_test/syn_117.wav", "onoffCaption": "tapping clicking clanking at 0-2", "frequencyCaption": "tapping clicking clanking one times"} +{"filepath": "data/single_event_multi_identity_test/syn_119.wav", "onoffCaption": "duck quacking at 0.235-2.235, 3.085-5.085", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/single_event_multi_identity_test/syn_120.wav", "onoffCaption": "dog barking at 0-1, 2-3, 4-5", "frequencyCaption": "dog barking three times"} +{"filepath": "data/single_event_multi_identity_test/syn_127.wav", "onoffCaption": "duck quacking at 0-1, 2-3", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/single_event_multi_identity_test/syn_129.wav", "onoffCaption": "burping belching at 0.5-1.5, 2.5-3.5", "frequencyCaption": "burping belching two times"} +{"filepath": "data/single_event_multi_identity_test/syn_133.wav", "onoffCaption": "train horn at 0-3", "frequencyCaption": "train horn one times"} +{"filepath": "data/single_event_multi_identity_test/syn_134.wav", "onoffCaption": "spraying at 0.0-1.0, 2.0-3.0, 4.0-5.0", "frequencyCaption": "spraying three times"} +{"filepath": "data/single_event_multi_identity_test/syn_142.wav", "onoffCaption": "cow mooing at 1-2, 3-4", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/single_event_multi_identity_test/syn_145.wav", "onoffCaption": "cow mooing at 0.0-2.0, 3.0-5.0", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/single_event_multi_identity_test/syn_151.wav", "onoffCaption": "duck quacking at 0-1, 2-3", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/single_event_multi_identity_test/syn_156.wav", "onoffCaption": "thump thud at 0.0-2.0, 2.5-4.5, 5.0-7.0", "frequencyCaption": "thump thud three times"} +{"filepath": "data/single_event_multi_identity_test/syn_158.wav", "onoffCaption": "burping belching at 1.5-3.5", "frequencyCaption": "burping belching one times"} +{"filepath": "data/single_event_multi_identity_test/syn_161.wav", "onoffCaption": "car horn honking at 1.5-3.5, 4.0-6.0", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/single_event_multi_identity_test/syn_166.wav", "onoffCaption": "burping belching at 0-1, 1-2, 2-3", "frequencyCaption": "burping belching three times"} +{"filepath": "data/single_event_multi_identity_test/syn_168.wav", "onoffCaption": "door slamming at 0-1, 2-3, 4-5", "frequencyCaption": "door slamming three times"} +{"filepath": "data/single_event_multi_identity_test/syn_172.wav", "onoffCaption": "woman laughing at 0.0-2.0", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/single_event_multi_identity_test/syn_175.wav", "onoffCaption": "spraying at 0-1", "frequencyCaption": "spraying one times"} +{"filepath": "data/single_event_multi_identity_test/syn_183.wav", "onoffCaption": "woman laughing at 2.782-5.368, 6.831-8.912", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/single_event_multi_identity_test/syn_184.wav", "onoffCaption": "door slamming at 0.145-1.085, 2.545-4.463", "frequencyCaption": "door slamming two times"} +{"filepath": "data/single_event_multi_identity_test/syn_190.wav", "onoffCaption": "explosion at 1.773-4.034, 5.15-7.411", "frequencyCaption": "explosion two times"} +{"filepath": "data/single_event_multi_identity_test/syn_197.wav", "onoffCaption": "car horn honking at 1.817-4.404, 5.85-8.437", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/single_event_multi_identity_test/syn_199.wav", "onoffCaption": "car horn honking at 0.664-3.129", "frequencyCaption": "car horn honking one times"} +{"filepath": "data/single_event_multi_identity_test/syn_200.wav", "onoffCaption": "train horn at 0-2", "frequencyCaption": "train horn one times"} +{"filepath": "data/single_event_multi_identity_test/syn_2.wav", "onoffCaption": "cat meowing at 0.5-2.5", "frequencyCaption": "cat meowing one times"} +{"filepath": "data/single_event_multi_identity_test/syn_5.wav", "onoffCaption": "dog barking at 0.464-2.464, 4.19-6.19", "frequencyCaption": "dog barking two times"} +{"filepath": "data/single_event_multi_identity_test/syn_12.wav", "onoffCaption": "tapping clicking clanking at 0.536-3.976", "frequencyCaption": "tapping clicking clanking one times"} +{"filepath": "data/single_event_multi_identity_test/syn_15.wav", "onoffCaption": "explosion at 1.773-4.034, 5.15-7.411", "frequencyCaption": "explosion two times"} +{"filepath": "data/single_event_multi_identity_test/syn_22.wav", "onoffCaption": "sheep goat bleating at 0-1, 2-3, 4-5", "frequencyCaption": "sheep goat bleating three times"} +{"filepath": "data/single_event_multi_identity_test/syn_25.wav", "onoffCaption": "gunshot at 0.0-2.0, 2.1-4.1", "frequencyCaption": "gunshot two times"} +{"filepath": "data/single_event_multi_identity_test/syn_31.wav", "onoffCaption": "sneeze at 0.0-1.0", "frequencyCaption": "sneeze one times"} +{"filepath": "data/single_event_multi_identity_test/syn_36.wav", "onoffCaption": "sheep goat bleating at 0-1, 1-2", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/single_event_multi_identity_test/syn_38.wav", "onoffCaption": "whistling at 0-1", "frequencyCaption": "whistling one times"} +{"filepath": "data/single_event_multi_identity_test/syn_40.wav", "onoffCaption": "woman laughing at 0.0-2.0, 2.5-4.5", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/single_event_multi_identity_test/syn_46.wav", "onoffCaption": "tapping clicking clanking at 1.5-5.0", "frequencyCaption": "tapping clicking clanking one times"} +{"filepath": "data/single_event_multi_identity_test/syn_47.wav", "onoffCaption": "cow mooing at 0.0-2.0, 3.0-5.0, 6.0-8.0", "frequencyCaption": "cow mooing three times"} +{"filepath": "data/single_event_multi_identity_test/syn_49.wav", "onoffCaption": "dog barking at 0-1", "frequencyCaption": "dog barking one times"} +{"filepath": "data/single_event_multi_identity_test/syn_51.wav", "onoffCaption": "whistling at 0-1", "frequencyCaption": "whistling one times"} +{"filepath": "data/single_event_multi_identity_test/syn_53.wav", "onoffCaption": "whistling at 0-1, 2-3", "frequencyCaption": "whistling two times"} +{"filepath": "data/single_event_multi_identity_test/syn_54.wav", "onoffCaption": "cow mooing at 1.954-6.383, 7.52-10.0", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/single_event_multi_identity_test/syn_63.wav", "onoffCaption": "explosion at 0.0-3.0", "frequencyCaption": "explosion one times"} +{"filepath": "data/single_event_multi_identity_test/syn_64.wav", "onoffCaption": "whistling at 0-1, 2-3", "frequencyCaption": "whistling two times"} +{"filepath": "data/single_event_multi_identity_test/syn_70.wav", "onoffCaption": "dog barking at 1-2", "frequencyCaption": "dog barking one times"} +{"filepath": "data/single_event_multi_identity_test/syn_77.wav", "onoffCaption": "train horn at 0-3", "frequencyCaption": "train horn one times"} +{"filepath": "data/single_event_multi_identity_test/syn_79.wav", "onoffCaption": "train horn at 0-2.5", "frequencyCaption": "train horn one times"} +{"filepath": "data/single_event_multi_identity_test/syn_81.wav", "onoffCaption": "tapping clicking clanking at 0-1", "frequencyCaption": "tapping clicking clanking one times"} +{"filepath": "data/single_event_multi_identity_test/syn_86.wav", "onoffCaption": "gunshot at 0.0-2.0, 2.1-4.1", "frequencyCaption": "gunshot two times"} +{"filepath": "data/single_event_multi_identity_test/syn_88.wav", "onoffCaption": "car horn honking at 0.0-1.0, 2.0-3.0, 4.0-5.0", "frequencyCaption": "car horn honking three times"} +{"filepath": "data/single_event_multi_identity_test/syn_92.wav", "onoffCaption": "door slamming at 0-1, 2-4", "frequencyCaption": "door slamming two times"} +{"filepath": "data/single_event_multi_identity_test/syn_95.wav", "onoffCaption": "woman laughing at 1.5-3.5, 4.0-6.0", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/single_event_multi_identity_test/syn_102.wav", "onoffCaption": "duck quacking at 0-1, 2-3", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/single_event_multi_identity_test/syn_105.wav", "onoffCaption": "door slamming at 0.355-2.581", "frequencyCaption": "door slamming one times"} +{"filepath": "data/single_event_multi_identity_test/syn_111.wav", "onoffCaption": "dog barking at 0-1, 2-3", "frequencyCaption": "dog barking two times"} +{"filepath": "data/single_event_multi_identity_test/syn_116.wav", "onoffCaption": "sheep goat bleating at 0-1", "frequencyCaption": "sheep goat bleating one times"} +{"filepath": "data/single_event_multi_identity_test/syn_118.wav", "onoffCaption": "sheep goat bleating at 0.5-1.5", "frequencyCaption": "sheep goat bleating one times"} +{"filepath": "data/single_event_multi_identity_test/syn_121.wav", "onoffCaption": "cat meowing at 0-1", "frequencyCaption": "cat meowing one times"} +{"filepath": "data/single_event_multi_identity_test/syn_123.wav", "onoffCaption": "sheep goat bleating at 0.0-2.0, 3.0-5.0", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/single_event_multi_identity_test/syn_126.wav", "onoffCaption": "burping belching at 0.5-2.5, 3.0-5.0", "frequencyCaption": "burping belching two times"} +{"filepath": "data/single_event_multi_identity_test/syn_128.wav", "onoffCaption": "train horn at 0-1", "frequencyCaption": "train horn one times"} +{"filepath": "data/single_event_multi_identity_test/syn_132.wav", "onoffCaption": "duck quacking at 0-1, 2-3", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/single_event_multi_identity_test/syn_135.wav", "onoffCaption": "whistling at 0-1, 2-3", "frequencyCaption": "whistling two times"} +{"filepath": "data/single_event_multi_identity_test/syn_139.wav", "onoffCaption": "spraying at 0-1, 1-2", "frequencyCaption": "spraying two times"} +{"filepath": "data/single_event_multi_identity_test/syn_143.wav", "onoffCaption": "door knocking at 0.645-2.772, 3.875-6.782, 7.405-9.692", "frequencyCaption": "door knocking three times"} +{"filepath": "data/single_event_multi_identity_test/syn_144.wav", "onoffCaption": "spraying at 0-1, 2-3, 4-5", "frequencyCaption": "spraying three times"} +{"filepath": "data/single_event_multi_identity_test/syn_150.wav", "onoffCaption": "duck quacking at 0-1", "frequencyCaption": "duck quacking one times"} +{"filepath": "data/single_event_multi_identity_test/syn_157.wav", "onoffCaption": "explosion at 0.5-1.5, 2-3", "frequencyCaption": "explosion two times"} +{"filepath": "data/single_event_multi_identity_test/syn_159.wav", "onoffCaption": "sneeze at 0.5-1.5, 2.5-3.5", "frequencyCaption": "sneeze two times"} +{"filepath": "data/single_event_multi_identity_test/syn_160.wav", "onoffCaption": "woman laughing at 0.0-2.0", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/single_event_multi_identity_test/syn_167.wav", "onoffCaption": "thump thud at 1.017-4.684, 5.695-9.362", "frequencyCaption": "thump thud two times"} +{"filepath": "data/single_event_multi_identity_test/syn_169.wav", "onoffCaption": "gunshot at 0.0-2.0", "frequencyCaption": "gunshot one times"} +{"filepath": "data/single_event_multi_identity_test/syn_173.wav", "onoffCaption": "explosion at 0-3", "frequencyCaption": "explosion one times"} +{"filepath": "data/single_event_multi_identity_test/syn_174.wav", "onoffCaption": "duck quacking at 0.2-2.2, 3.2-5.2", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/single_event_multi_identity_test/syn_176.wav", "onoffCaption": "gunshot at 0.0-2.0, 2.5-4.5", "frequencyCaption": "gunshot two times"} +{"filepath": "data/single_event_multi_identity_test/syn_182.wav", "onoffCaption": "car horn honking at 0.653-3.872", "frequencyCaption": "car horn honking one times"} +{"filepath": "data/single_event_multi_identity_test/syn_185.wav", "onoffCaption": "dog barking at 0-2, 3-5", "frequencyCaption": "dog barking two times"} +{"filepath": "data/single_event_multi_identity_test/syn_189.wav", "onoffCaption": "burping belching at 0-1, 2-3", "frequencyCaption": "burping belching two times"} +{"filepath": "data/single_event_multi_identity_test/syn_191.wav", "onoffCaption": "tapping clicking clanking at 0.0-4.0", "frequencyCaption": "tapping clicking clanking one times"} +{"filepath": "data/single_event_multi_identity_test/syn_193.wav", "onoffCaption": "dog barking at 0-1", "frequencyCaption": "dog barking one times"} +{"filepath": "data/single_event_multi_identity_test/syn_196.wav", "onoffCaption": "duck quacking at 0-1", "frequencyCaption": "duck quacking one times"} +{"filepath": "data/single_event_multi_identity_test/syn_198.wav", "onoffCaption": "sheep goat bleating at 0.56-2.56", "frequencyCaption": "sheep goat bleating one times"} +{"filepath": "data/single_event_multi_identity_test/syn_7.wav", "onoffCaption": "door slamming at 0-2, 2-4", "frequencyCaption": "door slamming two times"} +{"filepath": "data/single_event_multi_identity_test/syn_9.wav", "onoffCaption": "sneeze at 0.5-1.5, 2.0-3.0", "frequencyCaption": "sneeze two times"} +{"filepath": "data/single_event_multi_identity_test/syn_10.wav", "onoffCaption": "door slamming at 0.0-1.0", "frequencyCaption": "door slamming one times"} +{"filepath": "data/single_event_multi_identity_test/syn_17.wav", "onoffCaption": "gunshot at 0.0-2.0", "frequencyCaption": "gunshot one times"} +{"filepath": "data/single_event_multi_identity_test/syn_19.wav", "onoffCaption": "thump thud at 1.9-4.5, 5.5-8.1", "frequencyCaption": "thump thud two times"} +{"filepath": "data/single_event_multi_identity_test/syn_20.wav", "onoffCaption": "dog barking at 0.0-2.0, 2.5-4.5, 5.0-7.0", "frequencyCaption": "dog barking three times"} +{"filepath": "data/single_event_multi_identity_test/syn_27.wav", "onoffCaption": "whistling at 0-1, 2-3", "frequencyCaption": "whistling two times"} +{"filepath": "data/single_event_multi_identity_test/syn_29.wav", "onoffCaption": "woman laughing at 0-1", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/single_event_multi_identity_test/syn_33.wav", "onoffCaption": "dog barking at 0.464-2.464, 4.19-6.19", "frequencyCaption": "dog barking two times"} +{"filepath": "data/single_event_multi_identity_test/syn_34.wav", "onoffCaption": "dog barking at 0-1, 1.5-2.5, 3-4", "frequencyCaption": "dog barking three times"} +{"filepath": "data/single_event_multi_identity_test/syn_42.wav", "onoffCaption": "door slamming at 0.145-1.085, 2.545-4.463", "frequencyCaption": "door slamming two times"} +{"filepath": "data/single_event_multi_identity_test/syn_45.wav", "onoffCaption": "woman laughing at 0-1", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/single_event_multi_identity_test/syn_56.wav", "onoffCaption": "cat meowing at 0-1", "frequencyCaption": "cat meowing one times"} +{"filepath": "data/single_event_multi_identity_test/syn_58.wav", "onoffCaption": "spraying at 0.5-1.5", "frequencyCaption": "spraying one times"} +{"filepath": "data/single_event_multi_identity_test/syn_61.wav", "onoffCaption": "sheep goat bleating at 0.8-2.8, 3.8-5.8", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/single_event_multi_identity_test/syn_66.wav", "onoffCaption": "duck quacking at 0-1", "frequencyCaption": "duck quacking one times"} +{"filepath": "data/single_event_multi_identity_test/syn_68.wav", "onoffCaption": "door slamming at 0.355-2.581", "frequencyCaption": "door slamming one times"} +{"filepath": "data/single_event_multi_identity_test/syn_72.wav", "onoffCaption": "duck quacking at 0-1", "frequencyCaption": "duck quacking one times"} +{"filepath": "data/single_event_multi_identity_test/syn_75.wav", "onoffCaption": "door slamming at 0-1, 2-3, 4-5", "frequencyCaption": "door slamming three times"} +{"filepath": "data/single_event_multi_identity_test/syn_83.wav", "onoffCaption": "spraying at 0-1, 2-3, 4-5", "frequencyCaption": "spraying three times"} +{"filepath": "data/single_event_multi_identity_test/syn_84.wav", "onoffCaption": "burping belching at 0-3", "frequencyCaption": "burping belching one times"} +{"filepath": "data/single_event_multi_identity_test/syn_90.wav", "onoffCaption": "whistling at 1-2", "frequencyCaption": "whistling one times"} +{"filepath": "data/single_event_multi_identity_test/syn_97.wav", "onoffCaption": "dog barking at 0-1", "frequencyCaption": "dog barking one times"} +{"filepath": "data/single_event_multi_identity_test/syn_99.wav", "onoffCaption": "gunshot at 0.5-2.5, 3.0-5.0", "frequencyCaption": "gunshot two times"} +{"filepath": "data/single_event_multi_identity_test/syn_100.wav", "onoffCaption": "gunshot at 0-1, 2-3", "frequencyCaption": "gunshot two times"} +{"filepath": "data/single_event_multi_identity_test/syn_107.wav", "onoffCaption": "cat meowing at 0-1, 2-3, 4-5", "frequencyCaption": "cat meowing three times"} +{"filepath": "data/single_event_multi_identity_test/syn_109.wav", "onoffCaption": "cat meowing at 0-1, 2-3, 4-5", "frequencyCaption": "cat meowing three times"} +{"filepath": "data/single_event_multi_identity_test/syn_113.wav", "onoffCaption": "door slamming at 0.145-1.085, 2.545-4.463", "frequencyCaption": "door slamming two times"} +{"filepath": "data/single_event_multi_identity_test/syn_114.wav", "onoffCaption": "explosion at 0.0-3.0", "frequencyCaption": "explosion one times"} +{"filepath": "data/single_event_multi_identity_test/syn_124.wav", "onoffCaption": "woman laughing at 0-1, 1-2", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/single_event_multi_identity_test/syn_130.wav", "onoffCaption": "gunshot at 0.0-2.0", "frequencyCaption": "gunshot one times"} +{"filepath": "data/single_event_multi_identity_test/syn_137.wav", "onoffCaption": "train horn at 0-1, 2-3, 4-5", "frequencyCaption": "train horn three times"} +{"filepath": "data/single_event_multi_identity_test/syn_141.wav", "onoffCaption": "woman laughing at 0.5-3.5", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/single_event_multi_identity_test/syn_146.wav", "onoffCaption": "sneeze at 0.8-1.8", "frequencyCaption": "sneeze one times"} +{"filepath": "data/single_event_multi_identity_test/syn_148.wav", "onoffCaption": "dog barking at 0-1, 2-3, 4-5", "frequencyCaption": "dog barking three times"} +{"filepath": "data/single_event_multi_identity_test/syn_152.wav", "onoffCaption": "dog barking at 0-1", "frequencyCaption": "dog barking one times"} +{"filepath": "data/single_event_multi_identity_test/syn_155.wav", "onoffCaption": "spraying at 0.033-1.519", "frequencyCaption": "spraying one times"} +{"filepath": "data/single_event_multi_identity_test/syn_162.wav", "onoffCaption": "explosion at 0-1, 2-3", "frequencyCaption": "explosion two times"} +{"filepath": "data/single_event_multi_identity_test/syn_165.wav", "onoffCaption": "whistling at 2.158-10.0", "frequencyCaption": "whistling one times"} +{"filepath": "data/single_event_multi_identity_test/syn_171.wav", "onoffCaption": "duck quacking at 0-1", "frequencyCaption": "duck quacking one times"} +{"filepath": "data/single_event_multi_identity_test/syn_178.wav", "onoffCaption": "tapping clicking clanking at 1-3, 4-6", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/single_event_multi_identity_test/syn_180.wav", "onoffCaption": "cow mooing at 0-3", "frequencyCaption": "cow mooing one times"} +{"filepath": "data/single_event_multi_identity_test/syn_187.wav", "onoffCaption": "explosion at 1.5-4.5", "frequencyCaption": "explosion one times"} +{"filepath": "data/single_event_multi_identity_test/syn_194.wav", "onoffCaption": "gunshot at 0-1", "frequencyCaption": "gunshot one times"} +{"filepath": "data/single_event_single_identity_test/syn_11.wav", "onoffCaption": "door knocking at 1-2, 3-4", "frequencyCaption": "door knocking two times"} +{"filepath": "data/single_event_single_identity_test/syn_16.wav", "onoffCaption": "burping belching at 0.5-3.5", "frequencyCaption": "burping belching one times"} +{"filepath": "data/single_event_single_identity_test/syn_18.wav", "onoffCaption": "burping belching at 1.0-2.0", "frequencyCaption": "burping belching one times"} +{"filepath": "data/single_event_single_identity_test/syn_21.wav", "onoffCaption": "burping belching at 0.5-1.5", "frequencyCaption": "burping belching one times"} +{"filepath": "data/single_event_single_identity_test/syn_26.wav", "onoffCaption": "spraying at 0-1", "frequencyCaption": "spraying one times"} +{"filepath": "data/single_event_single_identity_test/syn_28.wav", "onoffCaption": "train horn at 0-3", "frequencyCaption": "train horn one times"} +{"filepath": "data/single_event_single_identity_test/syn_32.wav", "onoffCaption": "gunshot at 0-1, 2-3, 4-5", "frequencyCaption": "gunshot three times"} +{"filepath": "data/single_event_single_identity_test/syn_35.wav", "onoffCaption": "woman laughing at 0-2, 3-5", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/single_event_single_identity_test/syn_43.wav", "onoffCaption": "door slamming at 0.145-1.085, 2.545-4.463", "frequencyCaption": "door slamming two times"} +{"filepath": "data/single_event_single_identity_test/syn_44.wav", "onoffCaption": "dog barking at 0-1, 2-3, 4-5", "frequencyCaption": "dog barking three times"} +{"filepath": "data/single_event_single_identity_test/syn_50.wav", "onoffCaption": "door knocking at 0-1, 2-3", "frequencyCaption": "door knocking two times"} +{"filepath": "data/single_event_single_identity_test/syn_57.wav", "onoffCaption": "train horn at 0-1", "frequencyCaption": "train horn one times"} +{"filepath": "data/single_event_single_identity_test/syn_59.wav", "onoffCaption": "gunshot at 0-1, 2-3, 4-5", "frequencyCaption": "gunshot three times"} +{"filepath": "data/single_event_single_identity_test/syn_60.wav", "onoffCaption": "cow mooing at 0.0-2.0, 2.5-4.5", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/single_event_single_identity_test/syn_67.wav", "onoffCaption": "cow mooing at 1.0-3.0, 4.0-6.0", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/single_event_single_identity_test/syn_69.wav", "onoffCaption": "burping belching at 0.0-2.0, 2.1-4.1", "frequencyCaption": "burping belching two times"} +{"filepath": "data/single_event_single_identity_test/syn_73.wav", "onoffCaption": "dog barking at 0-1, 2-3, 4-5", "frequencyCaption": "dog barking three times"} +{"filepath": "data/single_event_single_identity_test/syn_74.wav", "onoffCaption": "cow mooing at 0-2, 3-5", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/single_event_single_identity_test/syn_82.wav", "onoffCaption": "woman laughing at 2.0-6.0", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/single_event_single_identity_test/syn_85.wav", "onoffCaption": "dog barking at 0-1", "frequencyCaption": "dog barking one times"} +{"filepath": "data/single_event_single_identity_test/syn_91.wav", "onoffCaption": "tapping clicking clanking at 1.0-4.0", "frequencyCaption": "tapping clicking clanking one times"} +{"filepath": "data/single_event_single_identity_test/syn_96.wav", "onoffCaption": "door knocking at 0-1", "frequencyCaption": "door knocking one times"} +{"filepath": "data/single_event_single_identity_test/syn_98.wav", "onoffCaption": "door slamming at 0.355-2.581", "frequencyCaption": "door slamming one times"} +{"filepath": "data/single_event_single_identity_test/syn_101.wav", "onoffCaption": "spraying at 0.0-1.0, 2.0-3.0, 4.0-5.0", "frequencyCaption": "spraying three times"} +{"filepath": "data/single_event_single_identity_test/syn_106.wav", "onoffCaption": "spraying at 0.0-1.0", "frequencyCaption": "spraying one times"} +{"filepath": "data/single_event_single_identity_test/syn_108.wav", "onoffCaption": "gunshot at 0.2-2.2, 3.2-5.2", "frequencyCaption": "gunshot two times"} +{"filepath": "data/single_event_single_identity_test/syn_112.wav", "onoffCaption": "burping belching at 0.871-2.871, 3.871-5.871", "frequencyCaption": "burping belching two times"} +{"filepath": "data/single_event_single_identity_test/syn_115.wav", "onoffCaption": "explosion at 0-1", "frequencyCaption": "explosion one times"} +{"filepath": "data/single_event_single_identity_test/syn_122.wav", "onoffCaption": "tapping clicking clanking at 1.0-5.0", "frequencyCaption": "tapping clicking clanking one times"} +{"filepath": "data/single_event_single_identity_test/syn_125.wav", "onoffCaption": "explosion at 1.0-3.0, 3.5-5.5", "frequencyCaption": "explosion two times"} +{"filepath": "data/single_event_single_identity_test/syn_131.wav", "onoffCaption": "door slamming at 0-1, 2-3, 4-5", "frequencyCaption": "door slamming three times"} +{"filepath": "data/single_event_single_identity_test/syn_136.wav", "onoffCaption": "car horn honking at 0-1", "frequencyCaption": "car horn honking one times"} +{"filepath": "data/single_event_single_identity_test/syn_138.wav", "onoffCaption": "explosion at 0-1", "frequencyCaption": "explosion one times"} +{"filepath": "data/single_event_single_identity_test/syn_140.wav", "onoffCaption": "train horn at 0-1, 1-2", "frequencyCaption": "train horn two times"} +{"filepath": "data/single_event_single_identity_test/syn_147.wav", "onoffCaption": "explosion at 0-1, 2-3, 4-5", "frequencyCaption": "explosion three times"} +{"filepath": "data/single_event_single_identity_test/syn_149.wav", "onoffCaption": "spraying at 0.1-1.1, 1.2-2.2, 3.3-4.3", "frequencyCaption": "spraying three times"} +{"filepath": "data/single_event_single_identity_test/syn_153.wav", "onoffCaption": "dog barking at 0-1, 2-3", "frequencyCaption": "dog barking two times"} +{"filepath": "data/single_event_single_identity_test/syn_154.wav", "onoffCaption": "explosion at 0.0-1.0", "frequencyCaption": "explosion one times"} +{"filepath": "data/single_event_single_identity_test/syn_163.wav", "onoffCaption": "sneeze at 0-1", "frequencyCaption": "sneeze one times"} +{"filepath": "data/single_event_single_identity_test/syn_164.wav", "onoffCaption": "sneeze at 0-1", "frequencyCaption": "sneeze one times"} +{"filepath": "data/single_event_single_identity_test/syn_170.wav", "onoffCaption": "burping belching at 0.5-2.5", "frequencyCaption": "burping belching one times"} +{"filepath": "data/single_event_single_identity_test/syn_175.wav", "onoffCaption": "explosion at 2.941-5.813", "frequencyCaption": "explosion one times"} +{"filepath": "data/single_event_single_identity_test/syn_177.wav", "onoffCaption": "door knocking at 0-1, 2-3", "frequencyCaption": "door knocking two times"} +{"filepath": "data/single_event_single_identity_test/syn_179.wav", "onoffCaption": "explosion at 0.0-4.0", "frequencyCaption": "explosion one times"} +{"filepath": "data/single_event_single_identity_test/syn_181.wav", "onoffCaption": "train horn at 0-3", "frequencyCaption": "train horn one times"} +{"filepath": "data/single_event_single_identity_test/syn_186.wav", "onoffCaption": "sheep goat bleating at 0.0-2.0, 2.5-4.5", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/single_event_single_identity_test/syn_188.wav", "onoffCaption": "whistling at 2.158-10.0", "frequencyCaption": "whistling one times"} +{"filepath": "data/single_event_single_identity_test/syn_190.wav", "onoffCaption": "woman laughing at 0-1, 2-3", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/single_event_single_identity_test/syn_192.wav", "onoffCaption": "door knocking at 1-2, 3-4", "frequencyCaption": "door knocking two times"} +{"filepath": "data/single_event_single_identity_test/syn_195.wav", "onoffCaption": "cow mooing at 1.5-3.5, 4.0-6.0", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/single_event_single_identity_test/syn_13.wav", "onoffCaption": "tapping clicking clanking at 0.536-3.976", "frequencyCaption": "tapping clicking clanking one times"} +{"filepath": "data/single_event_single_identity_test/syn_14.wav", "onoffCaption": "woman laughing at 0-2", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/single_event_single_identity_test/syn_23.wav", "onoffCaption": "whistling at 2.158-10.0", "frequencyCaption": "whistling one times"} +{"filepath": "data/single_event_single_identity_test/syn_24.wav", "onoffCaption": "dog barking at 0.311-2.711", "frequencyCaption": "dog barking one times"} +{"filepath": "data/single_event_single_identity_test/syn_30.wav", "onoffCaption": "whistling at 2.158-10.0", "frequencyCaption": "whistling one times"} +{"filepath": "data/single_event_single_identity_test/syn_37.wav", "onoffCaption": "whistling at 0-1", "frequencyCaption": "whistling one times"} +{"filepath": "data/single_event_single_identity_test/syn_39.wav", "onoffCaption": "whistling at 0-1, 2-3", "frequencyCaption": "whistling two times"} +{"filepath": "data/single_event_single_identity_test/syn_41.wav", "onoffCaption": "sheep goat bleating at 0.5-2.5, 2.75-4.75", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/single_event_single_identity_test/syn_46.wav", "onoffCaption": "car horn honking at 1.0-3.0, 4.0-6.0", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/single_event_single_identity_test/syn_48.wav", "onoffCaption": "thump thud at 1.017-4.684, 5.695-9.362", "frequencyCaption": "thump thud two times"} +{"filepath": "data/single_event_single_identity_test/syn_52.wav", "onoffCaption": "dog barking at 0-1, 2-3, 4-5", "frequencyCaption": "dog barking three times"} +{"filepath": "data/single_event_single_identity_test/syn_55.wav", "onoffCaption": "gunshot at 0.0-2.0, 3.0-5.0", "frequencyCaption": "gunshot two times"} +{"filepath": "data/single_event_single_identity_test/syn_62.wav", "onoffCaption": "burping belching at 1.5-3.5, 4.0-6.0", "frequencyCaption": "burping belching two times"} +{"filepath": "data/single_event_single_identity_test/syn_65.wav", "onoffCaption": "sheep goat bleating at 1.0-3.0, 4.0-6.0", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/single_event_single_identity_test/syn_71.wav", "onoffCaption": "tapping clicking clanking at 0.5-3.0", "frequencyCaption": "tapping clicking clanking one times"} +{"filepath": "data/single_event_single_identity_test/syn_76.wav", "onoffCaption": "sheep goat bleating at 0.5-2.5, 3.0-5.0", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/single_event_single_identity_test/syn_78.wav", "onoffCaption": "train horn at 0-1, 2-3", "frequencyCaption": "train horn two times"} +{"filepath": "data/single_event_single_identity_test/syn_80.wav", "onoffCaption": "whistling at 0-1, 2-3", "frequencyCaption": "whistling two times"} +{"filepath": "data/single_event_single_identity_test/syn_87.wav", "onoffCaption": "car horn honking at 1.817-4.12, 6.106-8.453", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/single_event_single_identity_test/syn_89.wav", "onoffCaption": "train horn at 0-1", "frequencyCaption": "train horn one times"} +{"filepath": "data/single_event_single_identity_test/syn_93.wav", "onoffCaption": "dog barking at 0.464-2.464, 4.19-6.19", "frequencyCaption": "dog barking two times"} +{"filepath": "data/single_event_single_identity_test/syn_94.wav", "onoffCaption": "duck quacking at 0-1", "frequencyCaption": "duck quacking one times"} +{"filepath": "data/single_event_single_identity_test/syn_103.wav", "onoffCaption": "gunshot at 0.0-2.0", "frequencyCaption": "gunshot one times"} +{"filepath": "data/single_event_single_identity_test/syn_104.wav", "onoffCaption": "cat meowing at 0-1, 2-3, 4-5", "frequencyCaption": "cat meowing three times"} +{"filepath": "data/single_event_single_identity_test/syn_110.wav", "onoffCaption": "whistling at 0-1", "frequencyCaption": "whistling one times"} +{"filepath": "data/single_event_single_identity_test/syn_117.wav", "onoffCaption": "cat meowing at 1.0-3.0, 4.0-6.0", "frequencyCaption": "cat meowing two times"} +{"filepath": "data/single_event_single_identity_test/syn_119.wav", "onoffCaption": "car horn honking at 0.0-2.0", "frequencyCaption": "car horn honking one times"} +{"filepath": "data/single_event_single_identity_test/syn_120.wav", "onoffCaption": "door knocking at 0-1", "frequencyCaption": "door knocking one times"} +{"filepath": "data/single_event_single_identity_test/syn_127.wav", "onoffCaption": "sheep goat bleating at 0.5-2.5, 3-5", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/single_event_single_identity_test/syn_129.wav", "onoffCaption": "sheep goat bleating at 0-1, 2-3, 4-5", "frequencyCaption": "sheep goat bleating three times"} +{"filepath": "data/single_event_single_identity_test/syn_133.wav", "onoffCaption": "gunshot at 0.0-2.0, 2.5-4.5", "frequencyCaption": "gunshot two times"} +{"filepath": "data/single_event_single_identity_test/syn_134.wav", "onoffCaption": "sheep goat bleating at 0-1, 2-3", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/single_event_single_identity_test/syn_142.wav", "onoffCaption": "dog barking at 0-1, 2-3", "frequencyCaption": "dog barking two times"} +{"filepath": "data/single_event_single_identity_test/syn_145.wav", "onoffCaption": "train horn at 0-3", "frequencyCaption": "train horn one times"} +{"filepath": "data/single_event_single_identity_test/syn_151.wav", "onoffCaption": "burping belching at 1-3", "frequencyCaption": "burping belching one times"} +{"filepath": "data/single_event_single_identity_test/syn_156.wav", "onoffCaption": "cow mooing at 0-3", "frequencyCaption": "cow mooing one times"} +{"filepath": "data/single_event_single_identity_test/syn_158.wav", "onoffCaption": "door knocking at 0-1", "frequencyCaption": "door knocking one times"} +{"filepath": "data/single_event_single_identity_test/syn_161.wav", "onoffCaption": "spraying at 0-1, 2-3, 4-5", "frequencyCaption": "spraying three times"} +{"filepath": "data/single_event_single_identity_test/syn_166.wav", "onoffCaption": "tapping clicking clanking at 0.032-3.472, 4.758-7.489", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/single_event_single_identity_test/syn_168.wav", "onoffCaption": "explosion at 2.941-5.813", "frequencyCaption": "explosion one times"} +{"filepath": "data/single_event_single_identity_test/syn_172.wav", "onoffCaption": "gunshot at 0.0-2.0", "frequencyCaption": "gunshot one times"} +{"filepath": "data/single_event_single_identity_test/syn_183.wav", "onoffCaption": "duck quacking at 0-1", "frequencyCaption": "duck quacking one times"} +{"filepath": "data/single_event_single_identity_test/syn_184.wav", "onoffCaption": "spraying at 0-1", "frequencyCaption": "spraying one times"} +{"filepath": "data/single_event_single_identity_test/syn_197.wav", "onoffCaption": "sheep goat bleating at 0.0-2.0, 3.0-5.0", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/single_event_single_identity_test/syn_199.wav", "onoffCaption": "dog barking at 0-2, 2-4", "frequencyCaption": "dog barking two times"} +{"filepath": "data/single_event_single_identity_test/syn_200.wav", "onoffCaption": "thump thud at 2.224-5.891, 7.389-9.889", "frequencyCaption": "thump thud two times"} +{"filepath": "data/single_event_single_identity_test/syn_12.wav", "onoffCaption": "dog barking at 0-1", "frequencyCaption": "dog barking one times"} +{"filepath": "data/single_event_single_identity_test/syn_15.wav", "onoffCaption": "dog barking at 0.464-2.464, 4.19-6.19", "frequencyCaption": "dog barking two times"} +{"filepath": "data/single_event_single_identity_test/syn_22.wav", "onoffCaption": "whistling at 2.603-10.0", "frequencyCaption": "whistling one times"} +{"filepath": "data/single_event_single_identity_test/syn_25.wav", "onoffCaption": "explosion at 0.0-2.0, 2.5-4.5", "frequencyCaption": "explosion two times"} +{"filepath": "data/single_event_single_identity_test/syn_31.wav", "onoffCaption": "gunshot at 0.0-2.0, 2.5-4.5, 5.0-7.0", "frequencyCaption": "gunshot three times"} +{"filepath": "data/single_event_single_identity_test/syn_36.wav", "onoffCaption": "dog barking at 0.5-1.5", "frequencyCaption": "dog barking one times"} +{"filepath": "data/single_event_single_identity_test/syn_38.wav", "onoffCaption": "dog barking at 0.464-2.464, 4.19-6.19", "frequencyCaption": "dog barking two times"} +{"filepath": "data/single_event_single_identity_test/syn_40.wav", "onoffCaption": "sheep goat bleating at 0-1", "frequencyCaption": "sheep goat bleating one times"} +{"filepath": "data/single_event_single_identity_test/syn_47.wav", "onoffCaption": "door slamming at 0-1, 2-3, 4-5", "frequencyCaption": "door slamming three times"} +{"filepath": "data/single_event_single_identity_test/syn_49.wav", "onoffCaption": "duck quacking at 0-1", "frequencyCaption": "duck quacking one times"} +{"filepath": "data/single_event_single_identity_test/syn_51.wav", "onoffCaption": "cat meowing at 0.0-1.0, 2.0-3.0, 4.0-5.0", "frequencyCaption": "cat meowing three times"} +{"filepath": "data/single_event_single_identity_test/syn_53.wav", "onoffCaption": "cat meowing at 0-2", "frequencyCaption": "cat meowing one times"} +{"filepath": "data/single_event_single_identity_test/syn_54.wav", "onoffCaption": "gunshot at 0-1, 2-3", "frequencyCaption": "gunshot two times"} +{"filepath": "data/single_event_single_identity_test/syn_63.wav", "onoffCaption": "door slamming at 0.355-2.581", "frequencyCaption": "door slamming one times"} +{"filepath": "data/single_event_single_identity_test/syn_64.wav", "onoffCaption": "sheep goat bleating at 0-1, 2-3, 4-5", "frequencyCaption": "sheep goat bleating three times"} +{"filepath": "data/single_event_single_identity_test/syn_70.wav", "onoffCaption": "sneeze at 1.3-2.403, 4.759-6.442", "frequencyCaption": "sneeze two times"} +{"filepath": "data/single_event_single_identity_test/syn_77.wav", "onoffCaption": "dog barking at 0-1, 2-3", "frequencyCaption": "dog barking two times"} +{"filepath": "data/single_event_single_identity_test/syn_79.wav", "onoffCaption": "tapping clicking clanking at 0.536-3.976", "frequencyCaption": "tapping clicking clanking one times"} +{"filepath": "data/single_event_single_identity_test/syn_81.wav", "onoffCaption": "spraying at 0-1", "frequencyCaption": "spraying one times"} +{"filepath": "data/single_event_single_identity_test/syn_86.wav", "onoffCaption": "door knocking at 1-2", "frequencyCaption": "door knocking one times"} +{"filepath": "data/single_event_single_identity_test/syn_88.wav", "onoffCaption": "cow mooing at 1.0-3.0, 4.0-6.0", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/single_event_single_identity_test/syn_92.wav", "onoffCaption": "train horn at 0.0-2.0, 2.5-4.5", "frequencyCaption": "train horn two times"} +{"filepath": "data/single_event_single_identity_test/syn_95.wav", "onoffCaption": "thump thud at 0.0-1.0", "frequencyCaption": "thump thud one times"} +{"filepath": "data/single_event_single_identity_test/syn_102.wav", "onoffCaption": "thump thud at 0-1, 2-3", "frequencyCaption": "thump thud two times"} +{"filepath": "data/single_event_single_identity_test/syn_105.wav", "onoffCaption": "door slamming at 0.145-1.085, 2.545-4.463", "frequencyCaption": "door slamming two times"} +{"filepath": "data/single_event_single_identity_test/syn_111.wav", "onoffCaption": "door knocking at 0-1, 2-3", "frequencyCaption": "door knocking two times"} +{"filepath": "data/single_event_single_identity_test/syn_116.wav", "onoffCaption": "gunshot at 0-1, 2-3", "frequencyCaption": "gunshot two times"} +{"filepath": "data/single_event_single_identity_test/syn_118.wav", "onoffCaption": "cat meowing at 0-3", "frequencyCaption": "cat meowing one times"} +{"filepath": "data/single_event_single_identity_test/syn_121.wav", "onoffCaption": "door knocking at 1.155-5.305", "frequencyCaption": "door knocking one times"} +{"filepath": "data/single_event_single_identity_test/syn_126.wav", "onoffCaption": "sheep goat bleating at 0.5-2.5, 3.0-5.0", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/single_event_single_identity_test/syn_128.wav", "onoffCaption": "tapping clicking clanking at 0-1", "frequencyCaption": "tapping clicking clanking one times"} +{"filepath": "data/single_event_single_identity_test/syn_132.wav", "onoffCaption": "cat meowing at 0-1, 2-3, 4-5", "frequencyCaption": "cat meowing three times"} +{"filepath": "data/single_event_single_identity_test/syn_135.wav", "onoffCaption": "door slamming at 0.145-1.085, 2.545-4.463", "frequencyCaption": "door slamming two times"} +{"filepath": "data/single_event_single_identity_test/syn_139.wav", "onoffCaption": "duck quacking at 0-1, 2-3", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/single_event_single_identity_test/syn_143.wav", "onoffCaption": "cat meowing at 0-1", "frequencyCaption": "cat meowing one times"} +{"filepath": "data/single_event_single_identity_test/syn_144.wav", "onoffCaption": "cow mooing at 1.954-4.602, 6.719-9.729", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/single_event_single_identity_test/syn_150.wav", "onoffCaption": "dog barking at 0-1", "frequencyCaption": "dog barking one times"} +{"filepath": "data/single_event_single_identity_test/syn_157.wav", "onoffCaption": "sneeze at 0-1", "frequencyCaption": "sneeze one times"} +{"filepath": "data/single_event_single_identity_test/syn_159.wav", "onoffCaption": "sneeze at 0-1", "frequencyCaption": "sneeze one times"} +{"filepath": "data/single_event_single_identity_test/syn_160.wav", "onoffCaption": "tapping clicking clanking at 1.5-3.5, 5-7", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/single_event_single_identity_test/syn_167.wav", "onoffCaption": "cat meowing at 0-1, 2-3", "frequencyCaption": "cat meowing two times"} +{"filepath": "data/single_event_single_identity_test/syn_169.wav", "onoffCaption": "train horn at 0-3.5", "frequencyCaption": "train horn one times"} +{"filepath": "data/single_event_single_identity_test/syn_173.wav", "onoffCaption": "thump thud at 0-1", "frequencyCaption": "thump thud one times"} +{"filepath": "data/single_event_single_identity_test/syn_174.wav", "onoffCaption": "cat meowing at 0-1.2, 2-3.2, 4-5.2", "frequencyCaption": "cat meowing three times"} +{"filepath": "data/single_event_single_identity_test/syn_182.wav", "onoffCaption": "door knocking at 1-3, 4-6", "frequencyCaption": "door knocking two times"} +{"filepath": "data/single_event_single_identity_test/syn_185.wav", "onoffCaption": "gunshot at 0.0-2.0, 3.0-5.0", "frequencyCaption": "gunshot two times"} +{"filepath": "data/single_event_single_identity_test/syn_189.wav", "onoffCaption": "door knocking at 2.047-4.422", "frequencyCaption": "door knocking one times"} +{"filepath": "data/single_event_single_identity_test/syn_191.wav", "onoffCaption": "cow mooing at 0-3", "frequencyCaption": "cow mooing one times"} +{"filepath": "data/single_event_single_identity_test/syn_196.wav", "onoffCaption": "door knocking at 0-1, 2-3, 4-5", "frequencyCaption": "door knocking three times"} +{"filepath": "data/single_event_single_identity_test/syn_198.wav", "onoffCaption": "explosion at 0.0-1.0", "frequencyCaption": "explosion one times"} +{"filepath": "data/single_event_single_identity_test/syn_10.wav", "onoffCaption": "duck quacking at 0-1", "frequencyCaption": "duck quacking one times"} +{"filepath": "data/single_event_single_identity_test/syn_17.wav", "onoffCaption": "burping belching at 0.871-5.871, 7.218-10.0", "frequencyCaption": "burping belching two times"} +{"filepath": "data/single_event_single_identity_test/syn_19.wav", "onoffCaption": "cat meowing at 0-1, 2-3", "frequencyCaption": "cat meowing two times"} +{"filepath": "data/single_event_single_identity_test/syn_20.wav", "onoffCaption": "tapping clicking clanking at 1.851-5.291, 7.569-10.0", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/single_event_single_identity_test/syn_27.wav", "onoffCaption": "spraying at 0-1, 2-3, 4-5", "frequencyCaption": "spraying three times"} +{"filepath": "data/single_event_single_identity_test/syn_29.wav", "onoffCaption": "tapping clicking clanking at 1-3, 4-6", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/single_event_single_identity_test/syn_33.wav", "onoffCaption": "train horn at 0-3", "frequencyCaption": "train horn one times"} +{"filepath": "data/single_event_single_identity_test/syn_34.wav", "onoffCaption": "burping belching at 0-1, 2-3", "frequencyCaption": "burping belching two times"} +{"filepath": "data/single_event_single_identity_test/syn_42.wav", "onoffCaption": "dog barking at 2.579-4.579", "frequencyCaption": "dog barking one times"} +{"filepath": "data/single_event_single_identity_test/syn_45.wav", "onoffCaption": "cat meowing at 0-1", "frequencyCaption": "cat meowing one times"} +{"filepath": "data/single_event_single_identity_test/syn_56.wav", "onoffCaption": "train horn at 0-3", "frequencyCaption": "train horn one times"} +{"filepath": "data/single_event_single_identity_test/syn_58.wav", "onoffCaption": "duck quacking at 0-1, 2-3", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/single_event_single_identity_test/syn_61.wav", "onoffCaption": "spraying at 0.0-1.0", "frequencyCaption": "spraying one times"} +{"filepath": "data/single_event_single_identity_test/syn_66.wav", "onoffCaption": "cat meowing at 0-1, 2-3", "frequencyCaption": "cat meowing two times"} +{"filepath": "data/single_event_single_identity_test/syn_68.wav", "onoffCaption": "duck quacking at 0-1", "frequencyCaption": "duck quacking one times"} +{"filepath": "data/single_event_single_identity_test/syn_72.wav", "onoffCaption": "sneeze at 1.3-2.403, 4.759-6.442", "frequencyCaption": "sneeze two times"} +{"filepath": "data/single_event_single_identity_test/syn_75.wav", "onoffCaption": "door slamming at 0.145-1.085, 2.545-4.463", "frequencyCaption": "door slamming two times"} +{"filepath": "data/single_event_single_identity_test/syn_83.wav", "onoffCaption": "dog barking at 0.464-2.464, 4.19-6.19", "frequencyCaption": "dog barking two times"} +{"filepath": "data/single_event_single_identity_test/syn_84.wav", "onoffCaption": "woman laughing at 0-2, 2-4", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/single_event_single_identity_test/syn_90.wav", "onoffCaption": "dog barking at 0-1, 2-3, 4-5", "frequencyCaption": "dog barking three times"} +{"filepath": "data/single_event_single_identity_test/syn_97.wav", "onoffCaption": "gunshot at 0-1", "frequencyCaption": "gunshot one times"} +{"filepath": "data/single_event_single_identity_test/syn_99.wav", "onoffCaption": "door knocking at 1-3, 4-6", "frequencyCaption": "door knocking two times"} +{"filepath": "data/single_event_single_identity_test/syn_100.wav", "onoffCaption": "burping belching at 0.871-2.871, 3.891-5.891", "frequencyCaption": "burping belching two times"} +{"filepath": "data/single_event_single_identity_test/syn_107.wav", "onoffCaption": "woman laughing at 0-2, 5-7", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/single_event_single_identity_test/syn_109.wav", "onoffCaption": "cat meowing at 0.0-2.0, 3.0-5.0, 6.0-8.0", "frequencyCaption": "cat meowing three times"} +{"filepath": "data/single_event_single_identity_test/syn_113.wav", "onoffCaption": "duck quacking at 0-1", "frequencyCaption": "duck quacking one times"} +{"filepath": "data/single_event_single_identity_test/syn_114.wav", "onoffCaption": "duck quacking at 0-1, 2-3", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/single_event_single_identity_test/syn_123.wav", "onoffCaption": "woman laughing at 2.777-6.165", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/single_event_single_identity_test/syn_124.wav", "onoffCaption": "door slamming at 0.145-1.085, 2.545-4.463", "frequencyCaption": "door slamming two times"} +{"filepath": "data/single_event_single_identity_test/syn_130.wav", "onoffCaption": "duck quacking at 0.0-2.0, 3.0-5.0, 6.0-8.0", "frequencyCaption": "duck quacking three times"} +{"filepath": "data/single_event_single_identity_test/syn_137.wav", "onoffCaption": "dog barking at 0.5-2.5, 3.0-5.0", "frequencyCaption": "dog barking two times"} +{"filepath": "data/single_event_single_identity_test/syn_141.wav", "onoffCaption": "woman laughing at 2.782-5.368", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/single_event_single_identity_test/syn_146.wav", "onoffCaption": "dog barking at 0-1, 2-3", "frequencyCaption": "dog barking two times"} +{"filepath": "data/single_event_single_identity_test/syn_148.wav", "onoffCaption": "thump thud at 2-3", "frequencyCaption": "thump thud one times"} +{"filepath": "data/single_event_single_identity_test/syn_152.wav", "onoffCaption": "sheep goat bleating at 0-1, 2-3", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/single_event_single_identity_test/syn_155.wav", "onoffCaption": "woman laughing at 0.5-2.5, 3.0-5.0", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/single_event_single_identity_test/syn_162.wav", "onoffCaption": "door knocking at 0-1, 1-2", "frequencyCaption": "door knocking two times"} +{"filepath": "data/single_event_single_identity_test/syn_165.wav", "onoffCaption": "door slamming at 0.355-2.581", "frequencyCaption": "door slamming one times"} +{"filepath": "data/single_event_single_identity_test/syn_171.wav", "onoffCaption": "woman laughing at 2.672-5.672", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/single_event_single_identity_test/syn_176.wav", "onoffCaption": "burping belching at 0.5-3.5, 4.5-7.5", "frequencyCaption": "burping belching two times"} +{"filepath": "data/single_event_single_identity_test/syn_178.wav", "onoffCaption": "sheep goat bleating at 1-3, 4-7", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/single_event_single_identity_test/syn_180.wav", "onoffCaption": "cow mooing at 0-3", "frequencyCaption": "cow mooing one times"} +{"filepath": "data/single_event_single_identity_test/syn_187.wav", "onoffCaption": "gunshot at 0.5-2.5, 3.0-5.0", "frequencyCaption": "gunshot two times"} +{"filepath": "data/single_event_single_identity_test/syn_193.wav", "onoffCaption": "tapping clicking clanking at 1.851-5.291, 7.569-10.0", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/single_event_single_identity_test/syn_194.wav", "onoffCaption": "train horn at 0-3", "frequencyCaption": "train horn one times"} +{"filepath": "data/single_event_single_identity_test/syn_1.wav", "onoffCaption": "cat meowing at 1.0-3.0, 4.0-6.0", "frequencyCaption": "cat meowing two times"} +{"filepath": "data/single_event_single_identity_test/syn_2.wav", "onoffCaption": "cat meowing at 0.5-1.5, 2.5-3.5", "frequencyCaption": "cat meowing two times"} +{"filepath": "data/single_event_single_identity_test/syn_3.wav", "onoffCaption": "burping belching at 0-1, 2-3, 4-5", "frequencyCaption": "burping belching three times"} +{"filepath": "data/single_event_single_identity_test/syn_4.wav", "onoffCaption": "car horn honking at 0.664-3.129, 4.357-7.014", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/single_event_single_identity_test/syn_5.wav", "onoffCaption": "dog barking at 0.0-2.0", "frequencyCaption": "dog barking one times"} +{"filepath": "data/single_event_single_identity_test/syn_6.wav", "onoffCaption": "explosion at 0-1", "frequencyCaption": "explosion one times"} +{"filepath": "data/single_event_single_identity_test/syn_7.wav", "onoffCaption": "dog barking at 0-1", "frequencyCaption": "dog barking one times"} +{"filepath": "data/single_event_single_identity_test/syn_8.wav", "onoffCaption": "burping belching at 2.861-8.462", "frequencyCaption": "burping belching one times"} +{"filepath": "data/single_event_single_identity_test/syn_9.wav", "onoffCaption": "burping belching at 0.569-4.438", "frequencyCaption": "burping belching one times"} diff --git a/data/meta_data/test-onoff-control_multi-event.json b/data/meta_data/test-onoff-control_multi-event.json new file mode 100644 index 0000000000000000000000000000000000000000..7c57e688ed2805ae7ac07be7b61dc10e52d3b82e --- /dev/null +++ b/data/meta_data/test-onoff-control_multi-event.json @@ -0,0 +1,200 @@ +{"filepath": "data/multi_event_test/syn_1.wav", "onoffCaption": "cat meowing at 0.393-1.783, 3.975-5.365 and whistling at 0.861-5.455 and explosion at 2.089-4.841, 5.738-8.538", "frequencyCaption": "cat meowing two times and whistling one times and explosion two times"} +{"filepath": "data/multi_event_test/syn_6.wav", "onoffCaption": "whistling at 2.093-10.0", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_test/syn_8.wav", "onoffCaption": "cow mooing at 1.177-3.977, 5.15-7.774", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_test/syn_11.wav", "onoffCaption": "burping belching at 1.039-3.198, 4.085-6.244 and dog barking at 3.119-5.119", "frequencyCaption": "burping belching two times and dog barking one times"} +{"filepath": "data/multi_event_test/syn_16.wav", "onoffCaption": "duck quacking at 0.799-2.799, 3.634-5.634, 6.976-8.976", "frequencyCaption": "duck quacking three times"} +{"filepath": "data/multi_event_test/syn_18.wav", "onoffCaption": "door knocking at 1.225-3.352, 5.173-7.3 and door slamming at 5.439-7.678", "frequencyCaption": "door knocking two times and door slamming one times"} +{"filepath": "data/multi_event_test/syn_21.wav", "onoffCaption": "dog barking at 2.947-4.947, 6.186-8.186", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_test/syn_26.wav", "onoffCaption": "whistling at 2.848-7.442", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_test/syn_28.wav", "onoffCaption": "cow mooing at 2.639-5.263 and spraying at 8.565-9.667", "frequencyCaption": "cow mooing one times and spraying one times"} +{"filepath": "data/multi_event_test/syn_32.wav", "onoffCaption": "duck quacking at 0.039-2.039, 3.171-5.171, 5.938-7.938", "frequencyCaption": "duck quacking three times"} +{"filepath": "data/multi_event_test/syn_35.wav", "onoffCaption": "car horn honking at 2.31-5.271, 5.91-8.871", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_test/syn_43.wav", "onoffCaption": "dog barking at 2.157-4.157, 5.953-7.953 and burping belching at 2.431-5.388, 6.452-8.611 and explosion at 4.8-7.552", "frequencyCaption": "dog barking two times and burping belching two times and explosion one times"} +{"filepath": "data/multi_event_test/syn_44.wav", "onoffCaption": "sneeze at 2.638-6.791", "frequencyCaption": "sneeze one times"} +{"filepath": "data/multi_event_test/syn_50.wav", "onoffCaption": "car horn honking at 0.874-3.835, 4.429-7.39 and sneeze at 1.814-5.167 and train horn at 2.818-7.898", "frequencyCaption": "car horn honking two times and sneeze one times and train horn one times"} +{"filepath": "data/multi_event_test/syn_57.wav", "onoffCaption": "dog barking at 3.007-5.007, 6.103-8.103 and cow mooing at 3.017-5.641", "frequencyCaption": "dog barking two times and cow mooing one times"} +{"filepath": "data/multi_event_test/syn_59.wav", "onoffCaption": "door slamming at 0.035-2.274 and explosion at 3.857-6.609, 7.377-10.0", "frequencyCaption": "door slamming one times and explosion two times"} +{"filepath": "data/multi_event_test/syn_60.wav", "onoffCaption": "train horn at 0.062-3.062", "frequencyCaption": "train horn one times"} +{"filepath": "data/multi_event_test/syn_67.wav", "onoffCaption": "whistling at 1.616-10.0", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_test/syn_69.wav", "onoffCaption": "door knocking at 0.237-2.801, 4.117-6.681, 7.378-9.942", "frequencyCaption": "door knocking three times"} +{"filepath": "data/multi_event_test/syn_73.wav", "onoffCaption": "door knocking at 0.045-2.172, 2.718-5.282, 6.027-8.591 and sneeze at 2.92-6.273, 6.847-9.032", "frequencyCaption": "door knocking three times and sneeze two times"} +{"filepath": "data/multi_event_test/syn_74.wav", "onoffCaption": "spraying at 0.38-1.176, 3.06-3.856 and gunshot at 1.729-3.729, 4.367-6.367, 7.031-9.031", "frequencyCaption": "spraying two times and gunshot three times"} +{"filepath": "data/multi_event_test/syn_82.wav", "onoffCaption": "dog barking at 0.497-2.497, 4.187-6.187", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_test/syn_91.wav", "onoffCaption": "gunshot at 0.501-2.501, 3.148-5.148", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_test/syn_96.wav", "onoffCaption": "door slamming at 0.154-2.393, 3.23-4.641, 5.232-7.471", "frequencyCaption": "door slamming three times"} +{"filepath": "data/multi_event_test/syn_98.wav", "onoffCaption": "thump thud at 1.835-4.135, 6.505-9.18", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_test/syn_101.wav", "onoffCaption": "dog barking at 0.681-2.681", "frequencyCaption": "dog barking one times"} +{"filepath": "data/multi_event_test/syn_106.wav", "onoffCaption": "burping belching at 0.093-3.05, 3.962-6.121, 7.309-9.468", "frequencyCaption": "burping belching three times"} +{"filepath": "data/multi_event_test/syn_108.wav", "onoffCaption": "sneeze at 3.287-7.44", "frequencyCaption": "sneeze one times"} +{"filepath": "data/multi_event_test/syn_112.wav", "onoffCaption": "woman laughing at 1.823-4.587, 6.243-9.007", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_test/syn_115.wav", "onoffCaption": "duck quacking at 0.044-1.862 and tapping clicking clanking at 0.436-3.876, 5.547-7.6", "frequencyCaption": "duck quacking one times and tapping clicking clanking two times"} +{"filepath": "data/multi_event_test/syn_122.wav", "onoffCaption": "door knocking at 1.266-3.83, 4.854-7.418, 7.929-10.0", "frequencyCaption": "door knocking three times"} +{"filepath": "data/multi_event_test/syn_125.wav", "onoffCaption": "cow mooing at 2.954-5.754, 6.384-9.008", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_test/syn_131.wav", "onoffCaption": "whistling at 0.666-5.26, 5.984-8.335 and cat meowing at 0.904-2.294", "frequencyCaption": "whistling two times and cat meowing one times"} +{"filepath": "data/multi_event_test/syn_136.wav", "onoffCaption": "sheep goat bleating at 0.226-2.226, 3.707-5.707 and whistling at 1.058-5.652, 6.943-10.0 and woman laughing at 2.749-7.207", "frequencyCaption": "sheep goat bleating two times and whistling two times and woman laughing one times"} +{"filepath": "data/multi_event_test/syn_138.wav", "onoffCaption": "gunshot at 0.785-2.785 and tapping clicking clanking at 5.685-9.125", "frequencyCaption": "gunshot one times and tapping clicking clanking one times"} +{"filepath": "data/multi_event_test/syn_140.wav", "onoffCaption": "door knocking at 0.341-2.468, 3.382-5.946, 7.206-9.77", "frequencyCaption": "door knocking three times"} +{"filepath": "data/multi_event_test/syn_147.wav", "onoffCaption": "door slamming at 0.305-1.716, 2.95-4.361, 5.691-7.102", "frequencyCaption": "door slamming three times"} +{"filepath": "data/multi_event_test/syn_149.wav", "onoffCaption": "car horn honking at 0.666-3.35, 5.748-8.432 and spraying at 7.494-8.29, 8.904-9.7", "frequencyCaption": "car horn honking two times and spraying two times"} +{"filepath": "data/multi_event_test/syn_153.wav", "onoffCaption": "cat meowing at 3.029-4.355", "frequencyCaption": "cat meowing one times"} +{"filepath": "data/multi_event_test/syn_154.wav", "onoffCaption": "cat meowing at 2.044-3.37 and door knocking at 2.866-5.43", "frequencyCaption": "cat meowing one times and door knocking one times"} +{"filepath": "data/multi_event_test/syn_163.wav", "onoffCaption": "sheep goat bleating at 0.139-2.139, 3.188-5.188, 6.077-8.077", "frequencyCaption": "sheep goat bleating three times"} +{"filepath": "data/multi_event_test/syn_164.wav", "onoffCaption": "whistling at 0.15-4.744, 6.868-8.971", "frequencyCaption": "whistling two times"} +{"filepath": "data/multi_event_test/syn_170.wav", "onoffCaption": "dog barking at 0.286-2.286, 3.801-5.801", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_test/syn_177.wav", "onoffCaption": "thump thud at 0.593-2.893 and cow mooing at 4.617-7.241", "frequencyCaption": "thump thud one times and cow mooing one times"} +{"filepath": "data/multi_event_test/syn_179.wav", "onoffCaption": "cow mooing at 2.754-5.378, 6.145-8.769", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_test/syn_181.wav", "onoffCaption": "cow mooing at 3.381-6.181, 7.936-10.0", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_test/syn_186.wav", "onoffCaption": "gunshot at 0.131-2.131", "frequencyCaption": "gunshot one times"} +{"filepath": "data/multi_event_test/syn_188.wav", "onoffCaption": "gunshot at 0.785-2.785, 3.847-5.847 and duck quacking at 2.99-4.99", "frequencyCaption": "gunshot two times and duck quacking one times"} +{"filepath": "data/multi_event_test/syn_192.wav", "onoffCaption": "spraying at 1.763-2.865, 5.335-6.437", "frequencyCaption": "spraying two times"} +{"filepath": "data/multi_event_test/syn_195.wav", "onoffCaption": "thump thud at 2.422-5.097, 5.945-8.245", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_test/syn_3.wav", "onoffCaption": "tapping clicking clanking at 2.711-6.151, 7.783-10.0", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_test/syn_4.wav", "onoffCaption": "door slamming at 3.076-4.487, 6.877-8.288", "frequencyCaption": "door slamming two times"} +{"filepath": "data/multi_event_test/syn_13.wav", "onoffCaption": "duck quacking at 0.012-2.012, 3.202-5.202, 7.582-9.582", "frequencyCaption": "duck quacking three times"} +{"filepath": "data/multi_event_test/syn_14.wav", "onoffCaption": "sneeze at 1.853-6.006", "frequencyCaption": "sneeze one times"} +{"filepath": "data/multi_event_test/syn_23.wav", "onoffCaption": "sneeze at 0.109-4.262, 6.151-8.608", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_test/syn_24.wav", "onoffCaption": "woman laughing at 3.051-7.509", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/multi_event_test/syn_30.wav", "onoffCaption": "burping belching at 3.234-6.191, 7.597-10.0", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_test/syn_37.wav", "onoffCaption": "thump thud at 1.883-4.558, 6.153-8.453 and door knocking at 2.227-4.791, 5.771-8.335 and burping belching at 6.746-8.905", "frequencyCaption": "thump thud two times and door knocking two times and burping belching one times"} +{"filepath": "data/multi_event_test/syn_39.wav", "onoffCaption": "train horn at 2.197-5.197, 5.755-8.755", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_test/syn_41.wav", "onoffCaption": "thump thud at 1.465-3.765", "frequencyCaption": "thump thud one times"} +{"filepath": "data/multi_event_test/syn_48.wav", "onoffCaption": "cat meowing at 0.07-1.396, 3.738-5.064, 6.912-8.238", "frequencyCaption": "cat meowing three times"} +{"filepath": "data/multi_event_test/syn_52.wav", "onoffCaption": "gunshot at 0.761-2.761 and duck quacking at 0.994-2.994 and tapping clicking clanking at 5.144-8.584", "frequencyCaption": "gunshot one times and duck quacking one times and tapping clicking clanking one times"} +{"filepath": "data/multi_event_test/syn_55.wav", "onoffCaption": "sneeze at 2.529-6.682, 7.206-9.677", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_test/syn_62.wav", "onoffCaption": "woman laughing at 0.152-2.916, 5.112-7.934", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_test/syn_65.wav", "onoffCaption": "gunshot at 3.755-5.755, 6.54-8.54", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_test/syn_71.wav", "onoffCaption": "door slamming at 0.023-2.262, 4.712-6.123 and whistling at 1.979-6.573", "frequencyCaption": "door slamming two times and whistling one times"} +{"filepath": "data/multi_event_test/syn_76.wav", "onoffCaption": "dog barking at 0.741-2.741", "frequencyCaption": "dog barking one times"} +{"filepath": "data/multi_event_test/syn_78.wav", "onoffCaption": "explosion at 0.11-2.862, 4.292-7.044 and duck quacking at 2.338-4.156, 5.898-7.716", "frequencyCaption": "explosion two times and duck quacking two times"} +{"filepath": "data/multi_event_test/syn_80.wav", "onoffCaption": "door slamming at 0.695-2.106 and sheep goat bleating at 0.985-2.985", "frequencyCaption": "door slamming one times and sheep goat bleating one times"} +{"filepath": "data/multi_event_test/syn_85.wav", "onoffCaption": "door knocking at 4.074-6.201", "frequencyCaption": "door knocking one times"} +{"filepath": "data/multi_event_test/syn_87.wav", "onoffCaption": "explosion at 0.371-3.123, 5.335-8.087", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_test/syn_89.wav", "onoffCaption": "car horn honking at 2.099-5.06 and cat meowing at 5.989-7.315", "frequencyCaption": "car horn honking one times and cat meowing one times"} +{"filepath": "data/multi_event_test/syn_93.wav", "onoffCaption": "dog barking at 0.988-2.988, 5.289-7.289", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_test/syn_94.wav", "onoffCaption": "gunshot at 1.463-3.463, 4.41-6.41, 7.226-9.226 and thump thud at 1.729-4.404, 6.318-8.993 and sheep goat bleating at 1.895-3.895, 5.909-7.909", "frequencyCaption": "gunshot three times and thump thud two times and sheep goat bleating two times"} +{"filepath": "data/multi_event_test/syn_103.wav", "onoffCaption": "whistling at 2.759-10.0", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_test/syn_104.wav", "onoffCaption": "duck quacking at 4.149-5.967", "frequencyCaption": "duck quacking one times"} +{"filepath": "data/multi_event_test/syn_110.wav", "onoffCaption": "train horn at 0.111-5.191 and duck quacking at 0.894-2.894 and cow mooing at 5.062-7.862", "frequencyCaption": "train horn one times and duck quacking one times and cow mooing one times"} +{"filepath": "data/multi_event_test/syn_117.wav", "onoffCaption": "sheep goat bleating at 3.487-5.487, 7.705-9.705", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_test/syn_119.wav", "onoffCaption": "train horn at 2.056-5.056 and door knocking at 2.912-5.039, 5.997-8.124", "frequencyCaption": "train horn one times and door knocking two times"} +{"filepath": "data/multi_event_test/syn_120.wav", "onoffCaption": "burping belching at 2.114-5.071, 5.723-8.68", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_test/syn_127.wav", "onoffCaption": "whistling at 1.653-10.0", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_test/syn_129.wav", "onoffCaption": "door knocking at 0.592-2.719, 3.326-5.453, 6.255-8.382", "frequencyCaption": "door knocking three times"} +{"filepath": "data/multi_event_test/syn_133.wav", "onoffCaption": "duck quacking at 1.444-3.262, 4.595-6.413", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_test/syn_134.wav", "onoffCaption": "car horn honking at 0.439-3.123, 5.193-7.877", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_test/syn_142.wav", "onoffCaption": "sneeze at 0.338-4.491, 5.776-7.91", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_test/syn_145.wav", "onoffCaption": "door knocking at 0.308-2.872, 4.395-6.959 and whistling at 0.583-9.383", "frequencyCaption": "door knocking two times and whistling one times"} +{"filepath": "data/multi_event_test/syn_151.wav", "onoffCaption": "dog barking at 0.368-2.368, 3.112-5.112, 5.983-7.983", "frequencyCaption": "dog barking three times"} +{"filepath": "data/multi_event_test/syn_156.wav", "onoffCaption": "car horn honking at 0.03-2.714, 3.401-6.085, 6.775-9.459", "frequencyCaption": "car horn honking three times"} +{"filepath": "data/multi_event_test/syn_158.wav", "onoffCaption": "tapping clicking clanking at 3.057-6.497, 7.876-10.0", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_test/syn_161.wav", "onoffCaption": "spraying at 0.049-1.151, 2.004-2.8", "frequencyCaption": "spraying two times"} +{"filepath": "data/multi_event_test/syn_166.wav", "onoffCaption": "woman laughing at 1.442-5.9", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/multi_event_test/syn_168.wav", "onoffCaption": "sheep goat bleating at 0.016-2.016", "frequencyCaption": "sheep goat bleating one times"} +{"filepath": "data/multi_event_test/syn_172.wav", "onoffCaption": "door knocking at 0.153-2.28, 3.142-5.706, 6.305-8.869", "frequencyCaption": "door knocking three times"} +{"filepath": "data/multi_event_test/syn_175.wav", "onoffCaption": "cow mooing at 0.61-3.41 and spraying at 3.012-4.114", "frequencyCaption": "cow mooing one times and spraying one times"} +{"filepath": "data/multi_event_test/syn_183.wav", "onoffCaption": "explosion at 0.192-5.114, 5.844-8.596", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_test/syn_184.wav", "onoffCaption": "sheep goat bleating at 0.322-2.322", "frequencyCaption": "sheep goat bleating one times"} +{"filepath": "data/multi_event_test/syn_190.wav", "onoffCaption": "whistling at 2.571-7.165", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_test/syn_197.wav", "onoffCaption": "tapping clicking clanking at 1.043-4.483, 5.786-9.226", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_test/syn_199.wav", "onoffCaption": "duck quacking at 3.246-5.246 and cat meowing at 7.245-8.635", "frequencyCaption": "duck quacking one times and cat meowing one times"} +{"filepath": "data/multi_event_test/syn_200.wav", "onoffCaption": "explosion at 3.045-5.797, 7.133-9.196", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_test/syn_2.wav", "onoffCaption": "door knocking at 2.42-4.984", "frequencyCaption": "door knocking one times"} +{"filepath": "data/multi_event_test/syn_5.wav", "onoffCaption": "burping belching at 3.676-5.835", "frequencyCaption": "burping belching one times"} +{"filepath": "data/multi_event_test/syn_12.wav", "onoffCaption": "sheep goat bleating at 1.611-3.611 and sneeze at 5.808-9.161", "frequencyCaption": "sheep goat bleating one times and sneeze one times"} +{"filepath": "data/multi_event_test/syn_15.wav", "onoffCaption": "tapping clicking clanking at 0.807-4.247", "frequencyCaption": "tapping clicking clanking one times"} +{"filepath": "data/multi_event_test/syn_22.wav", "onoffCaption": "whistling at 3.354-7.948", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_test/syn_25.wav", "onoffCaption": "burping belching at 2.316-5.273, 6.42-9.377", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_test/syn_31.wav", "onoffCaption": "woman laughing at 0.674-5.132, 6.464-10.0", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_test/syn_36.wav", "onoffCaption": "door slamming at 0.106-2.345, 2.885-5.124, 5.997-8.236", "frequencyCaption": "door slamming three times"} +{"filepath": "data/multi_event_test/syn_38.wav", "onoffCaption": "cat meowing at 0.245-1.571, 3.125-4.451, 5.016-6.342", "frequencyCaption": "cat meowing three times"} +{"filepath": "data/multi_event_test/syn_40.wav", "onoffCaption": "door knocking at 2.051-4.178, 4.942-7.506 and cow mooing at 2.928-5.728", "frequencyCaption": "door knocking two times and cow mooing one times"} +{"filepath": "data/multi_event_test/syn_46.wav", "onoffCaption": "door slamming at 0.382-1.793, 2.674-4.913", "frequencyCaption": "door slamming two times"} +{"filepath": "data/multi_event_test/syn_47.wav", "onoffCaption": "spraying at 0.719-1.515, 2.813-3.915, 4.469-5.265 and cow mooing at 1.592-4.392, 4.998-7.798", "frequencyCaption": "spraying three times and cow mooing two times"} +{"filepath": "data/multi_event_test/syn_49.wav", "onoffCaption": "sheep goat bleating at 0.44-2.44, 3.141-5.141 and tapping clicking clanking at 1.283-4.723, 6.144-8.215", "frequencyCaption": "sheep goat bleating two times and tapping clicking clanking two times"} +{"filepath": "data/multi_event_test/syn_51.wav", "onoffCaption": "train horn at 0.258-3.258, 4.737-7.277", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_test/syn_53.wav", "onoffCaption": "dog barking at 0.072-2.072, 3.076-5.076, 6.003-8.003", "frequencyCaption": "dog barking three times"} +{"filepath": "data/multi_event_test/syn_54.wav", "onoffCaption": "train horn at 0.347-3.347, 4.652-7.652", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_test/syn_63.wav", "onoffCaption": "train horn at 0.507-3.507 and cat meowing at 7.463-8.789 and dog barking at 7.612-9.612", "frequencyCaption": "train horn one times and cat meowing one times and dog barking one times"} +{"filepath": "data/multi_event_test/syn_64.wav", "onoffCaption": "sheep goat bleating at 1.521-3.521", "frequencyCaption": "sheep goat bleating one times"} +{"filepath": "data/multi_event_test/syn_70.wav", "onoffCaption": "whistling at 2.267-10.0", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_test/syn_77.wav", "onoffCaption": "cow mooing at 0.75-3.55", "frequencyCaption": "cow mooing one times"} +{"filepath": "data/multi_event_test/syn_79.wav", "onoffCaption": "dog barking at 1.282-3.282, 4.117-6.117, 6.789-8.789", "frequencyCaption": "dog barking three times"} +{"filepath": "data/multi_event_test/syn_81.wav", "onoffCaption": "gunshot at 0.019-2.019, 2.851-4.851, 5.918-7.918", "frequencyCaption": "gunshot three times"} +{"filepath": "data/multi_event_test/syn_86.wav", "onoffCaption": "whistling at 1.438-6.032 and woman laughing at 2.351-5.115, 6.601-9.365", "frequencyCaption": "whistling one times and woman laughing two times"} +{"filepath": "data/multi_event_test/syn_88.wav", "onoffCaption": "sheep goat bleating at 3.021-5.021, 6.26-8.26", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_test/syn_92.wav", "onoffCaption": "door slamming at 0.346-1.757, 2.569-3.98, 5.839-7.25 and tapping clicking clanking at 2.508-5.948", "frequencyCaption": "door slamming three times and tapping clicking clanking one times"} +{"filepath": "data/multi_event_test/syn_95.wav", "onoffCaption": "door slamming at 2.522-3.933, 5.673-7.084, 8.486-9.897", "frequencyCaption": "door slamming three times"} +{"filepath": "data/multi_event_test/syn_102.wav", "onoffCaption": "door knocking at 2.145-4.272, 4.881-7.008", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_test/syn_105.wav", "onoffCaption": "train horn at 0.682-3.682, 4.465-6.698, 7.809-10.0", "frequencyCaption": "train horn three times"} +{"filepath": "data/multi_event_test/syn_111.wav", "onoffCaption": "whistling at 0.032-4.626, 6.182-10.0 and door slamming at 0.753-2.164", "frequencyCaption": "whistling two times and door slamming one times"} +{"filepath": "data/multi_event_test/syn_116.wav", "onoffCaption": "burping belching at 3.577-5.736, 6.261-9.218", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_test/syn_118.wav", "onoffCaption": "sneeze at 3.124-6.477", "frequencyCaption": "sneeze one times"} +{"filepath": "data/multi_event_test/syn_121.wav", "onoffCaption": "car horn honking at 0.782-3.743, 4.51-7.194, 7.76-10.0", "frequencyCaption": "car horn honking three times"} +{"filepath": "data/multi_event_test/syn_123.wav", "onoffCaption": "sheep goat bleating at 2.222-4.222, 6.493-8.493", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_test/syn_126.wav", "onoffCaption": "sneeze at 2.136-6.289 and car horn honking at 2.473-5.434, 7.027-9.711", "frequencyCaption": "sneeze one times and car horn honking two times"} +{"filepath": "data/multi_event_test/syn_128.wav", "onoffCaption": "sheep goat bleating at 0.291-2.291 and door knocking at 0.293-2.42, 3.227-5.791", "frequencyCaption": "sheep goat bleating one times and door knocking two times"} +{"filepath": "data/multi_event_test/syn_132.wav", "onoffCaption": "sheep goat bleating at 0.295-2.295 and spraying at 0.328-1.124, 2.065-3.167, 4.421-5.217 and duck quacking at 0.387-2.387, 2.967-4.785, 5.384-7.384", "frequencyCaption": "sheep goat bleating one times and spraying three times and duck quacking three times"} +{"filepath": "data/multi_event_test/syn_135.wav", "onoffCaption": "tapping clicking clanking at 0.458-3.898, 5.425-8.865", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_test/syn_139.wav", "onoffCaption": "thump thud at 2.477-4.777, 6.095-8.77", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_test/syn_143.wav", "onoffCaption": "spraying at 2.679-3.475 and explosion at 5.945-10.0", "frequencyCaption": "spraying one times and explosion one times"} +{"filepath": "data/multi_event_test/syn_144.wav", "onoffCaption": "duck quacking at 1.162-2.98, 3.994-5.994, 8.158-9.976", "frequencyCaption": "duck quacking three times"} +{"filepath": "data/multi_event_test/syn_150.wav", "onoffCaption": "gunshot at 1.946-3.946, 4.6-6.6, 7.322-9.322", "frequencyCaption": "gunshot three times"} +{"filepath": "data/multi_event_test/syn_157.wav", "onoffCaption": "train horn at 1.991-7.071", "frequencyCaption": "train horn one times"} +{"filepath": "data/multi_event_test/syn_159.wav", "onoffCaption": "door slamming at 3.182-5.421, 7.675-9.086", "frequencyCaption": "door slamming two times"} +{"filepath": "data/multi_event_test/syn_160.wav", "onoffCaption": "spraying at 0.179-0.975 and whistling at 3.947-10.0", "frequencyCaption": "spraying one times and whistling one times"} +{"filepath": "data/multi_event_test/syn_167.wav", "onoffCaption": "burping belching at 0.386-3.343, 4.105-6.264 and gunshot at 4.772-6.772", "frequencyCaption": "burping belching two times and gunshot one times"} +{"filepath": "data/multi_event_test/syn_169.wav", "onoffCaption": "sneeze at 0.56-4.713, 5.69-7.783", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_test/syn_173.wav", "onoffCaption": "sheep goat bleating at 0.834-2.834, 3.932-5.932, 6.656-8.656", "frequencyCaption": "sheep goat bleating three times"} +{"filepath": "data/multi_event_test/syn_174.wav", "onoffCaption": "dog barking at 0.021-2.021, 2.529-4.529, 5.505-7.505", "frequencyCaption": "dog barking three times"} +{"filepath": "data/multi_event_test/syn_176.wav", "onoffCaption": "woman laughing at 2.645-5.409, 7.198-9.435", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_test/syn_182.wav", "onoffCaption": "cow mooing at 0.007-2.807 and gunshot at 1.124-3.124", "frequencyCaption": "cow mooing one times and gunshot one times"} +{"filepath": "data/multi_event_test/syn_185.wav", "onoffCaption": "spraying at 2.564-3.666 and door knocking at 6.756-9.32", "frequencyCaption": "spraying one times and door knocking one times"} +{"filepath": "data/multi_event_test/syn_189.wav", "onoffCaption": "door slamming at 2.717-4.956, 5.586-6.997", "frequencyCaption": "door slamming two times"} +{"filepath": "data/multi_event_test/syn_191.wav", "onoffCaption": "burping belching at 2.833-4.992, 6.271-8.43", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_test/syn_193.wav", "onoffCaption": "cow mooing at 0.942-3.742, 4.83-7.454", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_test/syn_196.wav", "onoffCaption": "spraying at 3.461-4.563", "frequencyCaption": "spraying one times"} +{"filepath": "data/multi_event_test/syn_198.wav", "onoffCaption": "gunshot at 1.546-3.546, 4.501-6.501, 7.428-9.428", "frequencyCaption": "gunshot three times"} +{"filepath": "data/multi_event_test/syn_7.wav", "onoffCaption": "spraying at 0.113-0.909 and burping belching at 0.623-3.58", "frequencyCaption": "spraying one times and burping belching one times"} +{"filepath": "data/multi_event_test/syn_9.wav", "onoffCaption": "cow mooing at 1.06-3.86", "frequencyCaption": "cow mooing one times"} +{"filepath": "data/multi_event_test/syn_10.wav", "onoffCaption": "door knocking at 0.3-2.864, 5.022-7.586", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_test/syn_17.wav", "onoffCaption": "dog barking at 3.791-5.791, 7.757-9.757", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_test/syn_19.wav", "onoffCaption": "gunshot at 0.007-2.007 and spraying at 4.251-5.047", "frequencyCaption": "gunshot one times and spraying one times"} +{"filepath": "data/multi_event_test/syn_20.wav", "onoffCaption": "tapping clicking clanking at 0.017-3.457, 5.475-7.882", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_test/syn_27.wav", "onoffCaption": "dog barking at 2.012-4.012, 4.76-6.76", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_test/syn_29.wav", "onoffCaption": "tapping clicking clanking at 2.18-5.62, 6.49-9.93", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_test/syn_33.wav", "onoffCaption": "dog barking at 2.805-4.805, 5.866-7.866 and car horn honking at 5.136-8.097", "frequencyCaption": "dog barking two times and car horn honking one times"} +{"filepath": "data/multi_event_test/syn_34.wav", "onoffCaption": "sheep goat bleating at 1.113-3.113", "frequencyCaption": "sheep goat bleating one times"} +{"filepath": "data/multi_event_test/syn_42.wav", "onoffCaption": "tapping clicking clanking at 2.443-5.883, 7.179-9.684", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_test/syn_45.wav", "onoffCaption": "cat meowing at 0.324-1.65 and train horn at 4.186-9.266", "frequencyCaption": "cat meowing one times and train horn one times"} +{"filepath": "data/multi_event_test/syn_56.wav", "onoffCaption": "tapping clicking clanking at 1.696-5.136, 6.886-9.533", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_test/syn_58.wav", "onoffCaption": "door slamming at 2.48-3.891", "frequencyCaption": "door slamming one times"} +{"filepath": "data/multi_event_test/syn_61.wav", "onoffCaption": "explosion at 2.489-5.241, 5.792-8.521 and train horn at 2.512-7.592 and woman laughing at 6.424-9.188", "frequencyCaption": "explosion two times and train horn one times and woman laughing one times"} +{"filepath": "data/multi_event_test/syn_66.wav", "onoffCaption": "sheep goat bleating at 1.634-3.634", "frequencyCaption": "sheep goat bleating one times"} +{"filepath": "data/multi_event_test/syn_68.wav", "onoffCaption": "car horn honking at 0.051-3.012, 4.062-6.746, 7.319-10.0", "frequencyCaption": "car horn honking three times"} +{"filepath": "data/multi_event_test/syn_72.wav", "onoffCaption": "spraying at 0.013-0.809, 1.742-2.844 and thump thud at 1.117-3.792 and dog barking at 6.065-8.065", "frequencyCaption": "spraying two times and thump thud one times and dog barking one times"} +{"filepath": "data/multi_event_test/syn_75.wav", "onoffCaption": "explosion at 0.266-5.188, 6.431-9.183", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_test/syn_83.wav", "onoffCaption": "whistling at 2.863-10.0", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_test/syn_84.wav", "onoffCaption": "burping belching at 2.009-4.966, 6.768-8.927", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_test/syn_90.wav", "onoffCaption": "gunshot at 0.175-2.175", "frequencyCaption": "gunshot one times"} +{"filepath": "data/multi_event_test/syn_97.wav", "onoffCaption": "cat meowing at 3.666-5.056", "frequencyCaption": "cat meowing one times"} +{"filepath": "data/multi_event_test/syn_99.wav", "onoffCaption": "duck quacking at 0.697-2.515, 3.677-5.677", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_test/syn_100.wav", "onoffCaption": "cat meowing at 0.122-1.512 and sheep goat bleating at 0.564-2.564, 3.078-5.078, 5.762-7.762", "frequencyCaption": "cat meowing one times and sheep goat bleating three times"} +{"filepath": "data/multi_event_test/syn_107.wav", "onoffCaption": "spraying at 0.005-1.107, 3.385-4.487 and dog barking at 1.269-3.269, 4.85-6.85 and tapping clicking clanking at 1.455-4.895, 5.47-8.91", "frequencyCaption": "spraying two times and dog barking two times and tapping clicking clanking two times"} +{"filepath": "data/multi_event_test/syn_109.wav", "onoffCaption": "cow mooing at 1.573-4.373 and gunshot at 7.482-9.482", "frequencyCaption": "cow mooing one times and gunshot one times"} +{"filepath": "data/multi_event_test/syn_113.wav", "onoffCaption": "whistling at 0.12-4.714 and tapping clicking clanking at 0.731-4.171", "frequencyCaption": "whistling one times and tapping clicking clanking one times"} +{"filepath": "data/multi_event_test/syn_114.wav", "onoffCaption": "car horn honking at 3.216-5.9 and door knocking at 3.814-6.378", "frequencyCaption": "car horn honking one times and door knocking one times"} +{"filepath": "data/multi_event_test/syn_124.wav", "onoffCaption": "gunshot at 2.794-4.794, 5.712-7.712", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_test/syn_130.wav", "onoffCaption": "dog barking at 0.835-2.835, 3.911-5.911, 6.459-8.459", "frequencyCaption": "dog barking three times"} +{"filepath": "data/multi_event_test/syn_137.wav", "onoffCaption": "door knocking at 0.152-2.716 and cow mooing at 1.559-4.183 and gunshot at 5.826-7.826", "frequencyCaption": "door knocking one times and cow mooing one times and gunshot one times"} +{"filepath": "data/multi_event_test/syn_141.wav", "onoffCaption": "sneeze at 0.816-4.969, 5.643-9.796", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_test/syn_146.wav", "onoffCaption": "sneeze at 0.145-4.298, 5.107-8.031 and cat meowing at 1.128-2.454", "frequencyCaption": "sneeze two times and cat meowing one times"} +{"filepath": "data/multi_event_test/syn_148.wav", "onoffCaption": "duck quacking at 3.185-5.003, 5.701-7.701 and cow mooing at 3.469-6.093", "frequencyCaption": "duck quacking two times and cow mooing one times"} +{"filepath": "data/multi_event_test/syn_152.wav", "onoffCaption": "tapping clicking clanking at 0.851-4.291, 4.863-7.054 and train horn at 5.524-8.524", "frequencyCaption": "tapping clicking clanking two times and train horn one times"} +{"filepath": "data/multi_event_test/syn_155.wav", "onoffCaption": "tapping clicking clanking at 0.869-4.309 and gunshot at 1.402-3.402 and cat meowing at 6.9-8.226", "frequencyCaption": "tapping clicking clanking one times and gunshot one times and cat meowing one times"} +{"filepath": "data/multi_event_test/syn_162.wav", "onoffCaption": "gunshot at 0.5-2.5, 3.074-5.074, 5.829-7.829", "frequencyCaption": "gunshot three times"} +{"filepath": "data/multi_event_test/syn_165.wav", "onoffCaption": "thump thud at 0.322-2.622, 4.239-6.914 and whistling at 0.361-9.161", "frequencyCaption": "thump thud two times and whistling one times"} +{"filepath": "data/multi_event_test/syn_171.wav", "onoffCaption": "spraying at 1.23-2.332, 3.511-4.613, 5.79-6.892 and thump thud at 1.604-3.904 and sheep goat bleating at 1.985-3.985, 4.796-6.796", "frequencyCaption": "spraying three times and thump thud one times and sheep goat bleating two times"} +{"filepath": "data/multi_event_test/syn_178.wav", "onoffCaption": "door slamming at 0.233-2.472 and woman laughing at 6.658-10.0", "frequencyCaption": "door slamming one times and woman laughing one times"} +{"filepath": "data/multi_event_test/syn_180.wav", "onoffCaption": "spraying at 2.203-3.305 and cow mooing at 4.398-7.198", "frequencyCaption": "spraying one times and cow mooing one times"} +{"filepath": "data/multi_event_test/syn_187.wav", "onoffCaption": "sneeze at 2.13-6.283, 6.866-10.0", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_test/syn_194.wav", "onoffCaption": "duck quacking at 2.028-3.846, 5.612-7.43", "frequencyCaption": "duck quacking two times"} diff --git a/data/meta_data/test-onoff-control_single-event.json b/data/meta_data/test-onoff-control_single-event.json new file mode 100644 index 0000000000000000000000000000000000000000..6071ff006d0d2a2618a7aaf573812f9a058ca4ac --- /dev/null +++ b/data/meta_data/test-onoff-control_single-event.json @@ -0,0 +1,400 @@ +{"filepath": "data/single_event_multi_identity_test/syn_1.wav", "onoffCaption": "cat meowing at 0.258-1.584", "frequencyCaption": "cat meowing one times"} +{"filepath": "data/single_event_multi_identity_test/syn_6.wav", "onoffCaption": "tapping clicking clanking at 1.246-4.686", "frequencyCaption": "tapping clicking clanking one times"} +{"filepath": "data/single_event_multi_identity_test/syn_8.wav", "onoffCaption": "door slamming at 2.564-4.803", "frequencyCaption": "door slamming one times"} +{"filepath": "data/single_event_multi_identity_test/syn_11.wav", "onoffCaption": "dog barking at 0.084-2.084, 2.908-4.908", "frequencyCaption": "dog barking two times"} +{"filepath": "data/single_event_multi_identity_test/syn_16.wav", "onoffCaption": "thump thud at 0.776-3.451", "frequencyCaption": "thump thud one times"} +{"filepath": "data/single_event_multi_identity_test/syn_18.wav", "onoffCaption": "sheep goat bleating at 3.833-5.833", "frequencyCaption": "sheep goat bleating one times"} +{"filepath": "data/single_event_multi_identity_test/syn_21.wav", "onoffCaption": "sheep goat bleating at 2.491-4.491", "frequencyCaption": "sheep goat bleating one times"} +{"filepath": "data/single_event_multi_identity_test/syn_26.wav", "onoffCaption": "tapping clicking clanking at 0.89-4.33", "frequencyCaption": "tapping clicking clanking one times"} +{"filepath": "data/single_event_multi_identity_test/syn_28.wav", "onoffCaption": "sneeze at 0.109-4.262, 6.151-8.608", "frequencyCaption": "sneeze two times"} +{"filepath": "data/single_event_multi_identity_test/syn_32.wav", "onoffCaption": "cow mooing at 1.486-4.11", "frequencyCaption": "cow mooing one times"} +{"filepath": "data/single_event_multi_identity_test/syn_35.wav", "onoffCaption": "door slamming at 0.085-2.324, 4.153-5.564", "frequencyCaption": "door slamming two times"} +{"filepath": "data/single_event_multi_identity_test/syn_43.wav", "onoffCaption": "thump thud at 2.551-4.851, 5.601-8.276", "frequencyCaption": "thump thud two times"} +{"filepath": "data/single_event_multi_identity_test/syn_44.wav", "onoffCaption": "burping belching at 0.979-3.138, 4.115-7.072, 7.609-9.768", "frequencyCaption": "burping belching three times"} +{"filepath": "data/single_event_multi_identity_test/syn_50.wav", "onoffCaption": "car horn honking at 1.566-4.25, 6.473-9.434", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/single_event_multi_identity_test/syn_57.wav", "onoffCaption": "train horn at 3.341-8.421", "frequencyCaption": "train horn one times"} +{"filepath": "data/single_event_multi_identity_test/syn_59.wav", "onoffCaption": "woman laughing at 2.439-5.203, 6.08-8.827", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/single_event_multi_identity_test/syn_60.wav", "onoffCaption": "cat meowing at 0.074-1.464, 3.742-5.068", "frequencyCaption": "cat meowing two times"} +{"filepath": "data/single_event_multi_identity_test/syn_67.wav", "onoffCaption": "cow mooing at 3.535-6.159", "frequencyCaption": "cow mooing one times"} +{"filepath": "data/single_event_multi_identity_test/syn_69.wav", "onoffCaption": "burping belching at 0.799-2.958", "frequencyCaption": "burping belching one times"} +{"filepath": "data/single_event_multi_identity_test/syn_73.wav", "onoffCaption": "whistling at 2.868-7.462", "frequencyCaption": "whistling one times"} +{"filepath": "data/single_event_multi_identity_test/syn_74.wav", "onoffCaption": "cat meowing at 1.655-3.045", "frequencyCaption": "cat meowing one times"} +{"filepath": "data/single_event_multi_identity_test/syn_82.wav", "onoffCaption": "thump thud at 1.925-4.6, 5.398-7.698", "frequencyCaption": "thump thud two times"} +{"filepath": "data/single_event_multi_identity_test/syn_91.wav", "onoffCaption": "duck quacking at 0.497-2.497", "frequencyCaption": "duck quacking one times"} +{"filepath": "data/single_event_multi_identity_test/syn_96.wav", "onoffCaption": "cat meowing at 0.044-1.37, 3.201-4.591, 5.458-6.848", "frequencyCaption": "cat meowing three times"} +{"filepath": "data/single_event_multi_identity_test/syn_98.wav", "onoffCaption": "woman laughing at 2.458-6.916, 7.905-10.0", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/single_event_multi_identity_test/syn_101.wav", "onoffCaption": "burping belching at 1.697-4.654, 5.403-7.562", "frequencyCaption": "burping belching two times"} +{"filepath": "data/single_event_multi_identity_test/syn_106.wav", "onoffCaption": "gunshot at 0.047-2.047", "frequencyCaption": "gunshot one times"} +{"filepath": "data/single_event_multi_identity_test/syn_108.wav", "onoffCaption": "cat meowing at 1.96-3.35, 4.662-5.988", "frequencyCaption": "cat meowing two times"} +{"filepath": "data/single_event_multi_identity_test/syn_112.wav", "onoffCaption": "train horn at 3.416-8.496", "frequencyCaption": "train horn one times"} +{"filepath": "data/single_event_multi_identity_test/syn_115.wav", "onoffCaption": "sheep goat bleating at 3.021-5.021, 6.26-8.26", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/single_event_multi_identity_test/syn_122.wav", "onoffCaption": "tapping clicking clanking at 1.126-4.566, 6.974-9.783", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/single_event_multi_identity_test/syn_125.wav", "onoffCaption": "car horn honking at 3.106-5.79, 6.31-9.271", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/single_event_multi_identity_test/syn_131.wav", "onoffCaption": "cow mooing at 2.423-5.047, 6.252-9.052", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/single_event_multi_identity_test/syn_136.wav", "onoffCaption": "tapping clicking clanking at 0.672-4.112, 5.733-7.916", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/single_event_multi_identity_test/syn_138.wav", "onoffCaption": "burping belching at 0.093-3.05, 3.962-6.121, 7.309-9.468", "frequencyCaption": "burping belching three times"} +{"filepath": "data/single_event_multi_identity_test/syn_140.wav", "onoffCaption": "duck quacking at 1.928-3.928, 5.108-6.926", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/single_event_multi_identity_test/syn_147.wav", "onoffCaption": "burping belching at 2.269-4.428, 5.085-8.042", "frequencyCaption": "burping belching two times"} +{"filepath": "data/single_event_multi_identity_test/syn_149.wav", "onoffCaption": "gunshot at 0.434-2.434", "frequencyCaption": "gunshot one times"} +{"filepath": "data/single_event_multi_identity_test/syn_153.wav", "onoffCaption": "cow mooing at 3.209-5.833, 6.681-9.481", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/single_event_multi_identity_test/syn_154.wav", "onoffCaption": "train horn at 3.48-6.48, 7.121-9.68", "frequencyCaption": "train horn two times"} +{"filepath": "data/single_event_multi_identity_test/syn_163.wav", "onoffCaption": "cow mooing at 1.335-3.959, 6.377-9.177", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/single_event_multi_identity_test/syn_164.wav", "onoffCaption": "door slamming at 3.391-4.802, 5.918-8.157", "frequencyCaption": "door slamming two times"} +{"filepath": "data/single_event_multi_identity_test/syn_170.wav", "onoffCaption": "whistling at 0.053-8.853", "frequencyCaption": "whistling one times"} +{"filepath": "data/single_event_multi_identity_test/syn_177.wav", "onoffCaption": "door knocking at 0.585-2.712, 4.192-6.756", "frequencyCaption": "door knocking two times"} +{"filepath": "data/single_event_multi_identity_test/syn_179.wav", "onoffCaption": "gunshot at 2.477-4.477", "frequencyCaption": "gunshot one times"} +{"filepath": "data/single_event_multi_identity_test/syn_181.wav", "onoffCaption": "door knocking at 2.753-5.317", "frequencyCaption": "door knocking one times"} +{"filepath": "data/single_event_multi_identity_test/syn_186.wav", "onoffCaption": "sneeze at 2.336-6.489, 7.757-10.0", "frequencyCaption": "sneeze two times"} +{"filepath": "data/single_event_multi_identity_test/syn_188.wav", "onoffCaption": "explosion at 1.933-6.855", "frequencyCaption": "explosion one times"} +{"filepath": "data/single_event_multi_identity_test/syn_192.wav", "onoffCaption": "cat meowing at 0.139-1.465, 2.845-4.235", "frequencyCaption": "cat meowing two times"} +{"filepath": "data/single_event_multi_identity_test/syn_195.wav", "onoffCaption": "duck quacking at 3.185-5.003, 5.701-7.701", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/single_event_multi_identity_test/syn_3.wav", "onoffCaption": "burping belching at 0.203-3.16, 3.696-5.855", "frequencyCaption": "burping belching two times"} +{"filepath": "data/single_event_multi_identity_test/syn_4.wav", "onoffCaption": "cat meowing at 1.562-2.888", "frequencyCaption": "cat meowing one times"} +{"filepath": "data/single_event_multi_identity_test/syn_13.wav", "onoffCaption": "tapping clicking clanking at 0.838-4.278, 4.839-6.935, 7.732-9.827", "frequencyCaption": "tapping clicking clanking three times"} +{"filepath": "data/single_event_multi_identity_test/syn_14.wav", "onoffCaption": "tapping clicking clanking at 0.51-3.95, 5.245-8.17", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/single_event_multi_identity_test/syn_23.wav", "onoffCaption": "cow mooing at 0.467-3.267, 4.388-7.012", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/single_event_multi_identity_test/syn_24.wav", "onoffCaption": "thump thud at 3.239-5.539, 6.108-8.783", "frequencyCaption": "thump thud two times"} +{"filepath": "data/single_event_multi_identity_test/syn_30.wav", "onoffCaption": "explosion at 2.75-5.502, 7.44-10.0", "frequencyCaption": "explosion two times"} +{"filepath": "data/single_event_multi_identity_test/syn_37.wav", "onoffCaption": "tapping clicking clanking at 2.357-5.797, 7.176-9.79", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/single_event_multi_identity_test/syn_39.wav", "onoffCaption": "burping belching at 1.038-3.197, 4.613-7.57", "frequencyCaption": "burping belching two times"} +{"filepath": "data/single_event_multi_identity_test/syn_41.wav", "onoffCaption": "car horn honking at 2.524-5.485, 6.594-9.278", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/single_event_multi_identity_test/syn_48.wav", "onoffCaption": "train horn at 2.211-7.291", "frequencyCaption": "train horn one times"} +{"filepath": "data/single_event_multi_identity_test/syn_52.wav", "onoffCaption": "dog barking at 2.157-4.157, 5.953-7.953", "frequencyCaption": "dog barking two times"} +{"filepath": "data/single_event_multi_identity_test/syn_55.wav", "onoffCaption": "spraying at 1.616-2.718, 3.653-4.449, 5.396-6.498", "frequencyCaption": "spraying three times"} +{"filepath": "data/single_event_multi_identity_test/syn_62.wav", "onoffCaption": "woman laughing at 0.881-5.339, 6.657-9.421", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/single_event_multi_identity_test/syn_65.wav", "onoffCaption": "tapping clicking clanking at 1.976-5.416, 6.573-9.12", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/single_event_multi_identity_test/syn_71.wav", "onoffCaption": "train horn at 2.442-7.522", "frequencyCaption": "train horn one times"} +{"filepath": "data/single_event_multi_identity_test/syn_76.wav", "onoffCaption": "door knocking at 0.618-3.182", "frequencyCaption": "door knocking one times"} +{"filepath": "data/single_event_multi_identity_test/syn_78.wav", "onoffCaption": "door knocking at 0.065-2.192, 3.439-6.003", "frequencyCaption": "door knocking two times"} +{"filepath": "data/single_event_multi_identity_test/syn_80.wav", "onoffCaption": "car horn honking at 3.533-6.494", "frequencyCaption": "car horn honking one times"} +{"filepath": "data/single_event_multi_identity_test/syn_85.wav", "onoffCaption": "gunshot at 1.931-3.931, 4.716-6.716, 7.891-9.891", "frequencyCaption": "gunshot three times"} +{"filepath": "data/single_event_multi_identity_test/syn_87.wav", "onoffCaption": "thump thud at 1.759-4.059, 6.133-8.808", "frequencyCaption": "thump thud two times"} +{"filepath": "data/single_event_multi_identity_test/syn_89.wav", "onoffCaption": "door knocking at 0.065-2.192, 3.164-5.728", "frequencyCaption": "door knocking two times"} +{"filepath": "data/single_event_multi_identity_test/syn_93.wav", "onoffCaption": "whistling at 0.042-8.842", "frequencyCaption": "whistling one times"} +{"filepath": "data/single_event_multi_identity_test/syn_94.wav", "onoffCaption": "burping belching at 0.167-2.326, 3.873-6.83", "frequencyCaption": "burping belching two times"} +{"filepath": "data/single_event_multi_identity_test/syn_103.wav", "onoffCaption": "dog barking at 1.282-3.282, 4.117-6.117, 6.789-8.789", "frequencyCaption": "dog barking three times"} +{"filepath": "data/single_event_multi_identity_test/syn_104.wav", "onoffCaption": "thump thud at 1.988-4.663, 7.028-9.328", "frequencyCaption": "thump thud two times"} +{"filepath": "data/single_event_multi_identity_test/syn_110.wav", "onoffCaption": "whistling at 1.555-6.149", "frequencyCaption": "whistling one times"} +{"filepath": "data/single_event_multi_identity_test/syn_117.wav", "onoffCaption": "tapping clicking clanking at 0.487-3.927", "frequencyCaption": "tapping clicking clanking one times"} +{"filepath": "data/single_event_multi_identity_test/syn_119.wav", "onoffCaption": "duck quacking at 2.537-4.355, 5.889-7.889", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/single_event_multi_identity_test/syn_120.wav", "onoffCaption": "dog barking at 0.013-2.013, 3.064-5.064, 5.694-7.694", "frequencyCaption": "dog barking three times"} +{"filepath": "data/single_event_multi_identity_test/syn_127.wav", "onoffCaption": "duck quacking at 0.78-2.78, 5.24-7.058", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/single_event_multi_identity_test/syn_129.wav", "onoffCaption": "burping belching at 1.965-4.922, 6.696-8.855", "frequencyCaption": "burping belching two times"} +{"filepath": "data/single_event_multi_identity_test/syn_133.wav", "onoffCaption": "train horn at 3.059-8.139", "frequencyCaption": "train horn one times"} +{"filepath": "data/single_event_multi_identity_test/syn_134.wav", "onoffCaption": "spraying at 0.184-0.98, 2.498-3.6, 4.402-5.198", "frequencyCaption": "spraying three times"} +{"filepath": "data/single_event_multi_identity_test/syn_142.wav", "onoffCaption": "cow mooing at 2.715-5.339, 6.568-9.368", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/single_event_multi_identity_test/syn_145.wav", "onoffCaption": "cow mooing at 0.071-2.695, 4.586-7.386", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/single_event_multi_identity_test/syn_151.wav", "onoffCaption": "duck quacking at 0.425-2.425, 4.73-6.548", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/single_event_multi_identity_test/syn_156.wav", "onoffCaption": "thump thud at 0.071-2.746, 3.838-6.138, 7.435-9.735", "frequencyCaption": "thump thud three times"} +{"filepath": "data/single_event_multi_identity_test/syn_158.wav", "onoffCaption": "burping belching at 0.027-2.186", "frequencyCaption": "burping belching one times"} +{"filepath": "data/single_event_multi_identity_test/syn_161.wav", "onoffCaption": "car horn honking at 0.937-3.898, 5.036-7.72", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/single_event_multi_identity_test/syn_166.wav", "onoffCaption": "burping belching at 0.323-3.28, 4.07-6.229, 7.049-9.208", "frequencyCaption": "burping belching three times"} +{"filepath": "data/single_event_multi_identity_test/syn_168.wav", "onoffCaption": "door slamming at 0.115-1.526, 2.595-4.834, 5.389-7.628", "frequencyCaption": "door slamming three times"} +{"filepath": "data/single_event_multi_identity_test/syn_172.wav", "onoffCaption": "woman laughing at 3.125-5.889", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/single_event_multi_identity_test/syn_175.wav", "onoffCaption": "spraying at 0.007-0.803", "frequencyCaption": "spraying one times"} +{"filepath": "data/single_event_multi_identity_test/syn_183.wav", "onoffCaption": "woman laughing at 2.259-6.717, 7.786-10.0", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/single_event_multi_identity_test/syn_184.wav", "onoffCaption": "door slamming at 3.397-4.808, 6.096-8.335", "frequencyCaption": "door slamming two times"} +{"filepath": "data/single_event_multi_identity_test/syn_190.wav", "onoffCaption": "explosion at 0.228-5.15, 6.074-8.826", "frequencyCaption": "explosion two times"} +{"filepath": "data/single_event_multi_identity_test/syn_197.wav", "onoffCaption": "car horn honking at 3.732-6.416, 7.567-10.0", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/single_event_multi_identity_test/syn_199.wav", "onoffCaption": "car horn honking at 1.911-4.872", "frequencyCaption": "car horn honking one times"} +{"filepath": "data/single_event_multi_identity_test/syn_200.wav", "onoffCaption": "train horn at 0.413-3.413", "frequencyCaption": "train horn one times"} +{"filepath": "data/single_event_multi_identity_test/syn_2.wav", "onoffCaption": "cat meowing at 1.299-2.689", "frequencyCaption": "cat meowing one times"} +{"filepath": "data/single_event_multi_identity_test/syn_5.wav", "onoffCaption": "dog barking at 3.791-5.791, 6.571-8.571", "frequencyCaption": "dog barking two times"} +{"filepath": "data/single_event_multi_identity_test/syn_12.wav", "onoffCaption": "tapping clicking clanking at 1.245-4.685", "frequencyCaption": "tapping clicking clanking one times"} +{"filepath": "data/single_event_multi_identity_test/syn_15.wav", "onoffCaption": "explosion at 3.815-6.567, 7.214-9.546", "frequencyCaption": "explosion two times"} +{"filepath": "data/single_event_multi_identity_test/syn_22.wav", "onoffCaption": "sheep goat bleating at 0.26-2.26, 3.592-5.592, 7.325-9.325", "frequencyCaption": "sheep goat bleating three times"} +{"filepath": "data/single_event_multi_identity_test/syn_25.wav", "onoffCaption": "gunshot at 0.166-2.166, 3.749-5.749", "frequencyCaption": "gunshot two times"} +{"filepath": "data/single_event_multi_identity_test/syn_31.wav", "onoffCaption": "sneeze at 3.917-8.07", "frequencyCaption": "sneeze one times"} +{"filepath": "data/single_event_multi_identity_test/syn_36.wav", "onoffCaption": "sheep goat bleating at 2.86-4.86, 7.119-9.119", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/single_event_multi_identity_test/syn_38.wav", "onoffCaption": "whistling at 2.996-7.59", "frequencyCaption": "whistling one times"} +{"filepath": "data/single_event_multi_identity_test/syn_40.wav", "onoffCaption": "woman laughing at 0.024-4.482, 5.882-8.646", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/single_event_multi_identity_test/syn_46.wav", "onoffCaption": "tapping clicking clanking at 2.067-5.507", "frequencyCaption": "tapping clicking clanking one times"} +{"filepath": "data/single_event_multi_identity_test/syn_47.wav", "onoffCaption": "cow mooing at 0.008-2.808, 3.956-6.58, 7.995-10.0", "frequencyCaption": "cow mooing three times"} +{"filepath": "data/single_event_multi_identity_test/syn_49.wav", "onoffCaption": "dog barking at 3.464-5.464", "frequencyCaption": "dog barking one times"} +{"filepath": "data/single_event_multi_identity_test/syn_51.wav", "onoffCaption": "whistling at 0.26-9.06", "frequencyCaption": "whistling one times"} +{"filepath": "data/single_event_multi_identity_test/syn_53.wav", "onoffCaption": "whistling at 0.748-5.342, 6.45-8.456", "frequencyCaption": "whistling two times"} +{"filepath": "data/single_event_multi_identity_test/syn_54.wav", "onoffCaption": "cow mooing at 0.48-3.28, 4.237-6.861", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/single_event_multi_identity_test/syn_63.wav", "onoffCaption": "explosion at 1.214-6.136", "frequencyCaption": "explosion one times"} +{"filepath": "data/single_event_multi_identity_test/syn_64.wav", "onoffCaption": "whistling at 0.012-4.606, 5.649-8.052", "frequencyCaption": "whistling two times"} +{"filepath": "data/single_event_multi_identity_test/syn_70.wav", "onoffCaption": "dog barking at 4.239-6.239", "frequencyCaption": "dog barking one times"} +{"filepath": "data/single_event_multi_identity_test/syn_77.wav", "onoffCaption": "train horn at 2.336-7.416", "frequencyCaption": "train horn one times"} +{"filepath": "data/single_event_multi_identity_test/syn_79.wav", "onoffCaption": "train horn at 2.15-7.23", "frequencyCaption": "train horn one times"} +{"filepath": "data/single_event_multi_identity_test/syn_81.wav", "onoffCaption": "tapping clicking clanking at 3.241-6.681", "frequencyCaption": "tapping clicking clanking one times"} +{"filepath": "data/single_event_multi_identity_test/syn_86.wav", "onoffCaption": "gunshot at 0.406-2.406, 4.136-6.136", "frequencyCaption": "gunshot two times"} +{"filepath": "data/single_event_multi_identity_test/syn_88.wav", "onoffCaption": "car horn honking at 0.051-3.012, 4.062-6.746, 7.319-10.0", "frequencyCaption": "car horn honking three times"} +{"filepath": "data/single_event_multi_identity_test/syn_92.wav", "onoffCaption": "door slamming at 1.032-2.443, 4.422-6.661", "frequencyCaption": "door slamming two times"} +{"filepath": "data/single_event_multi_identity_test/syn_95.wav", "onoffCaption": "woman laughing at 0.147-4.605, 5.939-8.703", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/single_event_multi_identity_test/syn_102.wav", "onoffCaption": "duck quacking at 0.363-2.363, 2.979-4.797", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/single_event_multi_identity_test/syn_105.wav", "onoffCaption": "door slamming at 0.253-1.664", "frequencyCaption": "door slamming one times"} +{"filepath": "data/single_event_multi_identity_test/syn_111.wav", "onoffCaption": "dog barking at 0.562-2.562, 4.25-6.25", "frequencyCaption": "dog barking two times"} +{"filepath": "data/single_event_multi_identity_test/syn_116.wav", "onoffCaption": "sheep goat bleating at 2.658-4.658", "frequencyCaption": "sheep goat bleating one times"} +{"filepath": "data/single_event_multi_identity_test/syn_118.wav", "onoffCaption": "sheep goat bleating at 2.634-4.634", "frequencyCaption": "sheep goat bleating one times"} +{"filepath": "data/single_event_multi_identity_test/syn_121.wav", "onoffCaption": "cat meowing at 2.182-3.508", "frequencyCaption": "cat meowing one times"} +{"filepath": "data/single_event_multi_identity_test/syn_123.wav", "onoffCaption": "sheep goat bleating at 2.042-4.042, 5.044-7.044", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/single_event_multi_identity_test/syn_126.wav", "onoffCaption": "burping belching at 0.139-3.096, 4.403-6.562", "frequencyCaption": "burping belching two times"} +{"filepath": "data/single_event_multi_identity_test/syn_128.wav", "onoffCaption": "train horn at 1.814-4.814", "frequencyCaption": "train horn one times"} +{"filepath": "data/single_event_multi_identity_test/syn_132.wav", "onoffCaption": "duck quacking at 1.582-3.582, 4.673-6.491", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/single_event_multi_identity_test/syn_135.wav", "onoffCaption": "whistling at 1.414-6.008, 7.012-9.463", "frequencyCaption": "whistling two times"} +{"filepath": "data/single_event_multi_identity_test/syn_139.wav", "onoffCaption": "spraying at 1.819-2.615, 3.181-4.283", "frequencyCaption": "spraying two times"} +{"filepath": "data/single_event_multi_identity_test/syn_143.wav", "onoffCaption": "door knocking at 0.495-3.059, 4.039-6.166, 7.128-9.692", "frequencyCaption": "door knocking three times"} +{"filepath": "data/single_event_multi_identity_test/syn_144.wav", "onoffCaption": "spraying at 0.584-1.686, 2.49-3.286, 3.892-4.688", "frequencyCaption": "spraying three times"} +{"filepath": "data/single_event_multi_identity_test/syn_150.wav", "onoffCaption": "duck quacking at 2.654-4.654", "frequencyCaption": "duck quacking one times"} +{"filepath": "data/single_event_multi_identity_test/syn_157.wav", "onoffCaption": "explosion at 2.478-5.23, 6.261-9.209", "frequencyCaption": "explosion two times"} +{"filepath": "data/single_event_multi_identity_test/syn_159.wav", "onoffCaption": "sneeze at 1.342-4.695, 6.662-9.384", "frequencyCaption": "sneeze two times"} +{"filepath": "data/single_event_multi_identity_test/syn_160.wav", "onoffCaption": "woman laughing at 0.352-3.116", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/single_event_multi_identity_test/syn_167.wav", "onoffCaption": "thump thud at 0.177-2.852, 4.459-6.759", "frequencyCaption": "thump thud two times"} +{"filepath": "data/single_event_multi_identity_test/syn_169.wav", "onoffCaption": "gunshot at 0.088-2.088", "frequencyCaption": "gunshot one times"} +{"filepath": "data/single_event_multi_identity_test/syn_173.wav", "onoffCaption": "explosion at 0.195-5.117", "frequencyCaption": "explosion one times"} +{"filepath": "data/single_event_multi_identity_test/syn_174.wav", "onoffCaption": "duck quacking at 0.089-2.089, 4.166-5.984", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/single_event_multi_identity_test/syn_176.wav", "onoffCaption": "gunshot at 3.54-5.54, 7.238-9.238", "frequencyCaption": "gunshot two times"} +{"filepath": "data/single_event_multi_identity_test/syn_182.wav", "onoffCaption": "car horn honking at 0.14-2.824", "frequencyCaption": "car horn honking one times"} +{"filepath": "data/single_event_multi_identity_test/syn_185.wav", "onoffCaption": "dog barking at 3.434-5.434, 6.333-8.333", "frequencyCaption": "dog barking two times"} +{"filepath": "data/single_event_multi_identity_test/syn_189.wav", "onoffCaption": "burping belching at 0.432-3.389, 4.403-6.562", "frequencyCaption": "burping belching two times"} +{"filepath": "data/single_event_multi_identity_test/syn_191.wav", "onoffCaption": "tapping clicking clanking at 2.168-5.608", "frequencyCaption": "tapping clicking clanking one times"} +{"filepath": "data/single_event_multi_identity_test/syn_193.wav", "onoffCaption": "dog barking at 3.219-5.219", "frequencyCaption": "dog barking one times"} +{"filepath": "data/single_event_multi_identity_test/syn_196.wav", "onoffCaption": "duck quacking at 1.8-3.618", "frequencyCaption": "duck quacking one times"} +{"filepath": "data/single_event_multi_identity_test/syn_198.wav", "onoffCaption": "sheep goat bleating at 0.073-2.073", "frequencyCaption": "sheep goat bleating one times"} +{"filepath": "data/single_event_multi_identity_test/syn_7.wav", "onoffCaption": "door slamming at 2.809-4.22, 6.263-8.502", "frequencyCaption": "door slamming two times"} +{"filepath": "data/single_event_multi_identity_test/syn_9.wav", "onoffCaption": "sneeze at 0.07-4.223, 4.927-7.216", "frequencyCaption": "sneeze two times"} +{"filepath": "data/single_event_multi_identity_test/syn_10.wav", "onoffCaption": "door slamming at 2.191-3.602", "frequencyCaption": "door slamming one times"} +{"filepath": "data/single_event_multi_identity_test/syn_17.wav", "onoffCaption": "gunshot at 0.033-2.033", "frequencyCaption": "gunshot one times"} +{"filepath": "data/single_event_multi_identity_test/syn_19.wav", "onoffCaption": "thump thud at 2.571-4.871, 6.726-9.401", "frequencyCaption": "thump thud two times"} +{"filepath": "data/single_event_multi_identity_test/syn_20.wav", "onoffCaption": "dog barking at 2.557-4.557, 5.093-7.093, 7.963-9.963", "frequencyCaption": "dog barking three times"} +{"filepath": "data/single_event_multi_identity_test/syn_27.wav", "onoffCaption": "whistling at 2.141-6.735, 7.84-10.0", "frequencyCaption": "whistling two times"} +{"filepath": "data/single_event_multi_identity_test/syn_29.wav", "onoffCaption": "woman laughing at 3.051-7.509", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/single_event_multi_identity_test/syn_33.wav", "onoffCaption": "dog barking at 1.397-3.397, 5.014-7.014", "frequencyCaption": "dog barking two times"} +{"filepath": "data/single_event_multi_identity_test/syn_34.wav", "onoffCaption": "dog barking at 0.691-2.691, 4.339-6.339, 7.597-9.597", "frequencyCaption": "dog barking three times"} +{"filepath": "data/single_event_multi_identity_test/syn_42.wav", "onoffCaption": "door slamming at 0.111-1.522, 2.919-5.158", "frequencyCaption": "door slamming two times"} +{"filepath": "data/single_event_multi_identity_test/syn_45.wav", "onoffCaption": "woman laughing at 0.913-5.371", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/single_event_multi_identity_test/syn_56.wav", "onoffCaption": "cat meowing at 3.25-4.576", "frequencyCaption": "cat meowing one times"} +{"filepath": "data/single_event_multi_identity_test/syn_58.wav", "onoffCaption": "spraying at 2.012-2.808", "frequencyCaption": "spraying one times"} +{"filepath": "data/single_event_multi_identity_test/syn_61.wav", "onoffCaption": "sheep goat bleating at 0.44-2.44, 3.141-5.141", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/single_event_multi_identity_test/syn_66.wav", "onoffCaption": "duck quacking at 0.199-2.199", "frequencyCaption": "duck quacking one times"} +{"filepath": "data/single_event_multi_identity_test/syn_68.wav", "onoffCaption": "door slamming at 0.555-1.966", "frequencyCaption": "door slamming one times"} +{"filepath": "data/single_event_multi_identity_test/syn_72.wav", "onoffCaption": "duck quacking at 3.008-5.008", "frequencyCaption": "duck quacking one times"} +{"filepath": "data/single_event_multi_identity_test/syn_75.wav", "onoffCaption": "door slamming at 2.007-4.246, 5.403-6.814, 7.324-9.563", "frequencyCaption": "door slamming three times"} +{"filepath": "data/single_event_multi_identity_test/syn_83.wav", "onoffCaption": "spraying at 0.42-1.522, 2.179-2.975, 4.216-5.012", "frequencyCaption": "spraying three times"} +{"filepath": "data/single_event_multi_identity_test/syn_84.wav", "onoffCaption": "burping belching at 1.998-4.955", "frequencyCaption": "burping belching one times"} +{"filepath": "data/single_event_multi_identity_test/syn_90.wav", "onoffCaption": "whistling at 0.292-9.092", "frequencyCaption": "whistling one times"} +{"filepath": "data/single_event_multi_identity_test/syn_97.wav", "onoffCaption": "dog barking at 1.995-3.995", "frequencyCaption": "dog barking one times"} +{"filepath": "data/single_event_multi_identity_test/syn_99.wav", "onoffCaption": "gunshot at 1.846-3.846, 5.067-7.067", "frequencyCaption": "gunshot two times"} +{"filepath": "data/single_event_multi_identity_test/syn_100.wav", "onoffCaption": "gunshot at 2.965-4.965, 5.836-7.836", "frequencyCaption": "gunshot two times"} +{"filepath": "data/single_event_multi_identity_test/syn_107.wav", "onoffCaption": "cat meowing at 0.382-1.772, 4.195-5.521, 7.481-8.871", "frequencyCaption": "cat meowing three times"} +{"filepath": "data/single_event_multi_identity_test/syn_109.wav", "onoffCaption": "cat meowing at 1.827-3.217, 5.396-6.722, 8.387-9.777", "frequencyCaption": "cat meowing three times"} +{"filepath": "data/single_event_multi_identity_test/syn_113.wav", "onoffCaption": "door slamming at 1.281-3.52, 4.645-6.056", "frequencyCaption": "door slamming two times"} +{"filepath": "data/single_event_multi_identity_test/syn_114.wav", "onoffCaption": "explosion at 2.267-7.189", "frequencyCaption": "explosion one times"} +{"filepath": "data/single_event_multi_identity_test/syn_124.wav", "onoffCaption": "woman laughing at 0.666-5.124, 7.521-10.0", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/single_event_multi_identity_test/syn_130.wav", "onoffCaption": "gunshot at 2.672-4.672", "frequencyCaption": "gunshot one times"} +{"filepath": "data/single_event_multi_identity_test/syn_137.wav", "onoffCaption": "train horn at 0.682-3.682, 4.465-6.698, 7.809-10.0", "frequencyCaption": "train horn three times"} +{"filepath": "data/single_event_multi_identity_test/syn_141.wav", "onoffCaption": "woman laughing at 0.105-2.869", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/single_event_multi_identity_test/syn_146.wav", "onoffCaption": "sneeze at 1.102-4.455", "frequencyCaption": "sneeze one times"} +{"filepath": "data/single_event_multi_identity_test/syn_148.wav", "onoffCaption": "dog barking at 0.061-2.061, 3.265-5.265, 6.197-8.197", "frequencyCaption": "dog barking three times"} +{"filepath": "data/single_event_multi_identity_test/syn_152.wav", "onoffCaption": "dog barking at 0.127-2.127", "frequencyCaption": "dog barking one times"} +{"filepath": "data/single_event_multi_identity_test/syn_155.wav", "onoffCaption": "spraying at 3.549-4.651", "frequencyCaption": "spraying one times"} +{"filepath": "data/single_event_multi_identity_test/syn_162.wav", "onoffCaption": "explosion at 0.391-3.143, 3.673-5.706", "frequencyCaption": "explosion two times"} +{"filepath": "data/single_event_multi_identity_test/syn_165.wav", "onoffCaption": "whistling at 3.448-8.042", "frequencyCaption": "whistling one times"} +{"filepath": "data/single_event_multi_identity_test/syn_171.wav", "onoffCaption": "duck quacking at 2.752-4.752", "frequencyCaption": "duck quacking one times"} +{"filepath": "data/single_event_multi_identity_test/syn_178.wav", "onoffCaption": "tapping clicking clanking at 1.713-5.153, 6.827-9.222", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/single_event_multi_identity_test/syn_180.wav", "onoffCaption": "cow mooing at 0.181-2.981", "frequencyCaption": "cow mooing one times"} +{"filepath": "data/single_event_multi_identity_test/syn_187.wav", "onoffCaption": "explosion at 2.424-5.176", "frequencyCaption": "explosion one times"} +{"filepath": "data/single_event_multi_identity_test/syn_194.wav", "onoffCaption": "gunshot at 2.339-4.339", "frequencyCaption": "gunshot one times"} +{"filepath": "data/single_event_single_identity_test/syn_11.wav", "onoffCaption": "door knocking at 3.808-5.935, 6.708-8.835", "frequencyCaption": "door knocking two times"} +{"filepath": "data/single_event_single_identity_test/syn_16.wav", "onoffCaption": "burping belching at 2.569-5.526", "frequencyCaption": "burping belching one times"} +{"filepath": "data/single_event_single_identity_test/syn_18.wav", "onoffCaption": "burping belching at 2.907-5.066", "frequencyCaption": "burping belching one times"} +{"filepath": "data/single_event_single_identity_test/syn_21.wav", "onoffCaption": "burping belching at 0.64-2.799", "frequencyCaption": "burping belching one times"} +{"filepath": "data/single_event_single_identity_test/syn_26.wav", "onoffCaption": "spraying at 0.127-0.923", "frequencyCaption": "spraying one times"} +{"filepath": "data/single_event_single_identity_test/syn_28.wav", "onoffCaption": "train horn at 3.589-8.669", "frequencyCaption": "train horn one times"} +{"filepath": "data/single_event_single_identity_test/syn_32.wav", "onoffCaption": "gunshot at 1.173-3.173, 3.96-5.96, 6.617-8.617", "frequencyCaption": "gunshot three times"} +{"filepath": "data/single_event_single_identity_test/syn_35.wav", "onoffCaption": "woman laughing at 0.948-5.406, 7.602-10.0", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/single_event_single_identity_test/syn_43.wav", "onoffCaption": "door slamming at 3.246-4.657, 6.312-7.723", "frequencyCaption": "door slamming two times"} +{"filepath": "data/single_event_single_identity_test/syn_44.wav", "onoffCaption": "dog barking at 1.211-3.211, 4.206-6.206, 6.728-8.728", "frequencyCaption": "dog barking three times"} +{"filepath": "data/single_event_single_identity_test/syn_50.wav", "onoffCaption": "door knocking at 0.488-3.052, 5.244-7.808", "frequencyCaption": "door knocking two times"} +{"filepath": "data/single_event_single_identity_test/syn_57.wav", "onoffCaption": "train horn at 0.177-5.257", "frequencyCaption": "train horn one times"} +{"filepath": "data/single_event_single_identity_test/syn_59.wav", "onoffCaption": "gunshot at 0.24-2.24, 3.277-5.277, 7.394-9.394", "frequencyCaption": "gunshot three times"} +{"filepath": "data/single_event_single_identity_test/syn_60.wav", "onoffCaption": "cow mooing at 1.847-4.471, 6.336-8.96", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/single_event_single_identity_test/syn_67.wav", "onoffCaption": "cow mooing at 2.819-5.443, 6.06-8.684", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/single_event_single_identity_test/syn_69.wav", "onoffCaption": "burping belching at 1.971-4.928, 6.428-9.385", "frequencyCaption": "burping belching two times"} +{"filepath": "data/single_event_single_identity_test/syn_73.wav", "onoffCaption": "dog barking at 0.094-2.094, 3.294-5.294, 6.771-8.771", "frequencyCaption": "dog barking three times"} +{"filepath": "data/single_event_single_identity_test/syn_74.wav", "onoffCaption": "cow mooing at 2.351-4.975, 5.558-8.182", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/single_event_single_identity_test/syn_82.wav", "onoffCaption": "woman laughing at 2.876-7.334", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/single_event_single_identity_test/syn_85.wav", "onoffCaption": "dog barking at 2.785-4.785", "frequencyCaption": "dog barking one times"} +{"filepath": "data/single_event_single_identity_test/syn_91.wav", "onoffCaption": "tapping clicking clanking at 1.295-4.735", "frequencyCaption": "tapping clicking clanking one times"} +{"filepath": "data/single_event_single_identity_test/syn_96.wav", "onoffCaption": "door knocking at 0.452-2.579", "frequencyCaption": "door knocking one times"} +{"filepath": "data/single_event_single_identity_test/syn_98.wav", "onoffCaption": "door slamming at 2.339-4.578", "frequencyCaption": "door slamming one times"} +{"filepath": "data/single_event_single_identity_test/syn_101.wav", "onoffCaption": "spraying at 0.013-1.115, 1.805-2.907, 5.09-6.192", "frequencyCaption": "spraying three times"} +{"filepath": "data/single_event_single_identity_test/syn_106.wav", "onoffCaption": "spraying at 2.518-3.314", "frequencyCaption": "spraying one times"} +{"filepath": "data/single_event_single_identity_test/syn_108.wav", "onoffCaption": "gunshot at 3.946-5.946, 6.959-8.959", "frequencyCaption": "gunshot two times"} +{"filepath": "data/single_event_single_identity_test/syn_112.wav", "onoffCaption": "burping belching at 3.346-6.303, 7.74-10.0", "frequencyCaption": "burping belching two times"} +{"filepath": "data/single_event_single_identity_test/syn_115.wav", "onoffCaption": "explosion at 0.084-5.006", "frequencyCaption": "explosion one times"} +{"filepath": "data/single_event_single_identity_test/syn_122.wav", "onoffCaption": "tapping clicking clanking at 0.407-3.847", "frequencyCaption": "tapping clicking clanking one times"} +{"filepath": "data/single_event_single_identity_test/syn_125.wav", "onoffCaption": "explosion at 0.371-3.123, 5.335-8.087", "frequencyCaption": "explosion two times"} +{"filepath": "data/single_event_single_identity_test/syn_131.wav", "onoffCaption": "door slamming at 0.346-1.757, 2.569-3.98, 5.839-7.25", "frequencyCaption": "door slamming three times"} +{"filepath": "data/single_event_single_identity_test/syn_136.wav", "onoffCaption": "car horn honking at 0.066-2.75", "frequencyCaption": "car horn honking one times"} +{"filepath": "data/single_event_single_identity_test/syn_138.wav", "onoffCaption": "explosion at 2.129-4.881", "frequencyCaption": "explosion one times"} +{"filepath": "data/single_event_single_identity_test/syn_140.wav", "onoffCaption": "train horn at 1.872-6.952, 7.829-10.0", "frequencyCaption": "train horn two times"} +{"filepath": "data/single_event_single_identity_test/syn_147.wav", "onoffCaption": "explosion at 0.38-3.132, 4.352-7.104, 7.977-10.0", "frequencyCaption": "explosion three times"} +{"filepath": "data/single_event_single_identity_test/syn_149.wav", "onoffCaption": "spraying at 0.031-1.133, 1.86-2.962, 3.961-5.063", "frequencyCaption": "spraying three times"} +{"filepath": "data/single_event_single_identity_test/syn_153.wav", "onoffCaption": "dog barking at 0.435-2.435, 4.016-6.016", "frequencyCaption": "dog barking two times"} +{"filepath": "data/single_event_single_identity_test/syn_154.wav", "onoffCaption": "explosion at 1.704-6.626", "frequencyCaption": "explosion one times"} +{"filepath": "data/single_event_single_identity_test/syn_163.wav", "onoffCaption": "sneeze at 2.736-6.889", "frequencyCaption": "sneeze one times"} +{"filepath": "data/single_event_single_identity_test/syn_164.wav", "onoffCaption": "sneeze at 2.624-6.777", "frequencyCaption": "sneeze one times"} +{"filepath": "data/single_event_single_identity_test/syn_170.wav", "onoffCaption": "burping belching at 3.451-6.408", "frequencyCaption": "burping belching one times"} +{"filepath": "data/single_event_single_identity_test/syn_175.wav", "onoffCaption": "explosion at 1.902-6.824", "frequencyCaption": "explosion one times"} +{"filepath": "data/single_event_single_identity_test/syn_177.wav", "onoffCaption": "door knocking at 3.219-5.346, 7.058-9.185", "frequencyCaption": "door knocking two times"} +{"filepath": "data/single_event_single_identity_test/syn_179.wav", "onoffCaption": "explosion at 2.521-5.273", "frequencyCaption": "explosion one times"} +{"filepath": "data/single_event_single_identity_test/syn_181.wav", "onoffCaption": "train horn at 0.212-3.212", "frequencyCaption": "train horn one times"} +{"filepath": "data/single_event_single_identity_test/syn_186.wav", "onoffCaption": "sheep goat bleating at 0.651-2.651, 3.512-5.512", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/single_event_single_identity_test/syn_188.wav", "onoffCaption": "whistling at 0.87-9.67", "frequencyCaption": "whistling one times"} +{"filepath": "data/single_event_single_identity_test/syn_190.wav", "onoffCaption": "woman laughing at 0.484-3.248, 4.163-6.927", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/single_event_single_identity_test/syn_192.wav", "onoffCaption": "door knocking at 1.863-3.99, 5.187-7.314", "frequencyCaption": "door knocking two times"} +{"filepath": "data/single_event_single_identity_test/syn_195.wav", "onoffCaption": "cow mooing at 0.958-3.582, 5.272-7.896", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/single_event_single_identity_test/syn_13.wav", "onoffCaption": "tapping clicking clanking at 3.109-6.549", "frequencyCaption": "tapping clicking clanking one times"} +{"filepath": "data/single_event_single_identity_test/syn_14.wav", "onoffCaption": "woman laughing at 0.127-2.891", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/single_event_single_identity_test/syn_23.wav", "onoffCaption": "whistling at 0.074-8.874", "frequencyCaption": "whistling one times"} +{"filepath": "data/single_event_single_identity_test/syn_24.wav", "onoffCaption": "dog barking at 0.91-2.91", "frequencyCaption": "dog barking one times"} +{"filepath": "data/single_event_single_identity_test/syn_30.wav", "onoffCaption": "whistling at 0.978-5.572", "frequencyCaption": "whistling one times"} +{"filepath": "data/single_event_single_identity_test/syn_37.wav", "onoffCaption": "whistling at 2.107-6.701", "frequencyCaption": "whistling one times"} +{"filepath": "data/single_event_single_identity_test/syn_39.wav", "onoffCaption": "whistling at 0.165-4.759, 5.362-9.956", "frequencyCaption": "whistling two times"} +{"filepath": "data/single_event_single_identity_test/syn_41.wav", "onoffCaption": "sheep goat bleating at 0.023-2.023, 3.507-5.507", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/single_event_single_identity_test/syn_46.wav", "onoffCaption": "car horn honking at 1.978-4.939, 5.578-8.539", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/single_event_single_identity_test/syn_48.wav", "onoffCaption": "thump thud at 1.392-4.067, 5.357-8.032", "frequencyCaption": "thump thud two times"} +{"filepath": "data/single_event_single_identity_test/syn_52.wav", "onoffCaption": "dog barking at 0.25-2.25, 3.486-5.486, 6.439-8.439", "frequencyCaption": "dog barking three times"} +{"filepath": "data/single_event_single_identity_test/syn_55.wav", "onoffCaption": "gunshot at 2.722-4.722, 6.936-8.936", "frequencyCaption": "gunshot two times"} +{"filepath": "data/single_event_single_identity_test/syn_62.wav", "onoffCaption": "burping belching at 0.459-3.416, 4.188-7.145", "frequencyCaption": "burping belching two times"} +{"filepath": "data/single_event_single_identity_test/syn_65.wav", "onoffCaption": "sheep goat bleating at 0.55-2.55, 4.457-6.457", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/single_event_single_identity_test/syn_71.wav", "onoffCaption": "tapping clicking clanking at 3.396-6.836", "frequencyCaption": "tapping clicking clanking one times"} +{"filepath": "data/single_event_single_identity_test/syn_76.wav", "onoffCaption": "sheep goat bleating at 0.056-2.056, 3.47-5.47", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/single_event_single_identity_test/syn_78.wav", "onoffCaption": "train horn at 0.083-5.163, 6.748-10.0", "frequencyCaption": "train horn two times"} +{"filepath": "data/single_event_single_identity_test/syn_80.wav", "onoffCaption": "whistling at 1.269-5.863, 6.498-10.0", "frequencyCaption": "whistling two times"} +{"filepath": "data/single_event_single_identity_test/syn_87.wav", "onoffCaption": "car horn honking at 1.885-4.569, 5.797-8.481", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/single_event_single_identity_test/syn_89.wav", "onoffCaption": "train horn at 0.507-3.507", "frequencyCaption": "train horn one times"} +{"filepath": "data/single_event_single_identity_test/syn_93.wav", "onoffCaption": "dog barking at 3.063-5.063, 6.381-8.381", "frequencyCaption": "dog barking two times"} +{"filepath": "data/single_event_single_identity_test/syn_94.wav", "onoffCaption": "duck quacking at 2.332-4.15", "frequencyCaption": "duck quacking one times"} +{"filepath": "data/single_event_single_identity_test/syn_103.wav", "onoffCaption": "gunshot at 1.066-3.066", "frequencyCaption": "gunshot one times"} +{"filepath": "data/single_event_single_identity_test/syn_104.wav", "onoffCaption": "cat meowing at 0.488-1.878, 4.297-5.687, 6.263-7.653", "frequencyCaption": "cat meowing three times"} +{"filepath": "data/single_event_single_identity_test/syn_110.wav", "onoffCaption": "whistling at 0.407-5.001", "frequencyCaption": "whistling one times"} +{"filepath": "data/single_event_single_identity_test/syn_117.wav", "onoffCaption": "cat meowing at 1.091-2.481, 3.509-4.899", "frequencyCaption": "cat meowing two times"} +{"filepath": "data/single_event_single_identity_test/syn_119.wav", "onoffCaption": "car horn honking at 0.202-2.886", "frequencyCaption": "car horn honking one times"} +{"filepath": "data/single_event_single_identity_test/syn_120.wav", "onoffCaption": "door knocking at 2.729-5.293", "frequencyCaption": "door knocking one times"} +{"filepath": "data/single_event_single_identity_test/syn_127.wav", "onoffCaption": "sheep goat bleating at 2.262-4.262, 5.801-7.801", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/single_event_single_identity_test/syn_129.wav", "onoffCaption": "sheep goat bleating at 0.602-2.602, 4.548-6.548, 7.151-9.151", "frequencyCaption": "sheep goat bleating three times"} +{"filepath": "data/single_event_single_identity_test/syn_133.wav", "onoffCaption": "gunshot at 1.679-3.679, 5.98-7.98", "frequencyCaption": "gunshot two times"} +{"filepath": "data/single_event_single_identity_test/syn_134.wav", "onoffCaption": "sheep goat bleating at 0.091-2.091, 3.322-5.322", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/single_event_single_identity_test/syn_142.wav", "onoffCaption": "dog barking at 0.622-2.622, 5.087-7.087", "frequencyCaption": "dog barking two times"} +{"filepath": "data/single_event_single_identity_test/syn_145.wav", "onoffCaption": "train horn at 2.269-5.269", "frequencyCaption": "train horn one times"} +{"filepath": "data/single_event_single_identity_test/syn_151.wav", "onoffCaption": "burping belching at 0.193-2.352", "frequencyCaption": "burping belching one times"} +{"filepath": "data/single_event_single_identity_test/syn_156.wav", "onoffCaption": "cow mooing at 1.573-4.373", "frequencyCaption": "cow mooing one times"} +{"filepath": "data/single_event_single_identity_test/syn_158.wav", "onoffCaption": "door knocking at 1.174-3.301", "frequencyCaption": "door knocking one times"} +{"filepath": "data/single_event_single_identity_test/syn_161.wav", "onoffCaption": "spraying at 0.159-1.261, 2.033-3.135, 4.44-5.542", "frequencyCaption": "spraying three times"} +{"filepath": "data/single_event_single_identity_test/syn_166.wav", "onoffCaption": "tapping clicking clanking at 1.641-5.081, 6.146-9.586", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/single_event_single_identity_test/syn_168.wav", "onoffCaption": "explosion at 3.277-8.199", "frequencyCaption": "explosion one times"} +{"filepath": "data/single_event_single_identity_test/syn_172.wav", "onoffCaption": "gunshot at 1.58-3.58", "frequencyCaption": "gunshot one times"} +{"filepath": "data/single_event_single_identity_test/syn_183.wav", "onoffCaption": "duck quacking at 0.511-2.511", "frequencyCaption": "duck quacking one times"} +{"filepath": "data/single_event_single_identity_test/syn_184.wav", "onoffCaption": "spraying at 0.044-0.84", "frequencyCaption": "spraying one times"} +{"filepath": "data/single_event_single_identity_test/syn_197.wav", "onoffCaption": "sheep goat bleating at 2.317-4.317, 6.052-8.052", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/single_event_single_identity_test/syn_199.wav", "onoffCaption": "dog barking at 3.728-5.728, 6.93-8.93", "frequencyCaption": "dog barking two times"} +{"filepath": "data/single_event_single_identity_test/syn_200.wav", "onoffCaption": "thump thud at 1.717-4.017, 5.949-8.249", "frequencyCaption": "thump thud two times"} +{"filepath": "data/single_event_single_identity_test/syn_12.wav", "onoffCaption": "dog barking at 2.048-4.048", "frequencyCaption": "dog barking one times"} +{"filepath": "data/single_event_single_identity_test/syn_15.wav", "onoffCaption": "dog barking at 0.046-2.046, 4.09-6.09", "frequencyCaption": "dog barking two times"} +{"filepath": "data/single_event_single_identity_test/syn_22.wav", "onoffCaption": "whistling at 2.136-6.73", "frequencyCaption": "whistling one times"} +{"filepath": "data/single_event_single_identity_test/syn_25.wav", "onoffCaption": "explosion at 1.944-4.696, 6.227-8.979", "frequencyCaption": "explosion two times"} +{"filepath": "data/single_event_single_identity_test/syn_31.wav", "onoffCaption": "gunshot at 0.269-2.269, 3.559-5.559, 6.243-8.243", "frequencyCaption": "gunshot three times"} +{"filepath": "data/single_event_single_identity_test/syn_36.wav", "onoffCaption": "dog barking at 0.991-2.991", "frequencyCaption": "dog barking one times"} +{"filepath": "data/single_event_single_identity_test/syn_38.wav", "onoffCaption": "dog barking at 3.368-5.368, 6.043-8.043", "frequencyCaption": "dog barking two times"} +{"filepath": "data/single_event_single_identity_test/syn_40.wav", "onoffCaption": "sheep goat bleating at 0.185-2.185", "frequencyCaption": "sheep goat bleating one times"} +{"filepath": "data/single_event_single_identity_test/syn_47.wav", "onoffCaption": "door slamming at 0.106-2.345, 2.885-5.124, 5.997-8.236", "frequencyCaption": "door slamming three times"} +{"filepath": "data/single_event_single_identity_test/syn_49.wav", "onoffCaption": "duck quacking at 0.37-2.37", "frequencyCaption": "duck quacking one times"} +{"filepath": "data/single_event_single_identity_test/syn_51.wav", "onoffCaption": "cat meowing at 0.245-1.571, 3.125-4.451, 5.016-6.342", "frequencyCaption": "cat meowing three times"} +{"filepath": "data/single_event_single_identity_test/syn_53.wav", "onoffCaption": "cat meowing at 0.277-1.603", "frequencyCaption": "cat meowing one times"} +{"filepath": "data/single_event_single_identity_test/syn_54.wav", "onoffCaption": "gunshot at 0.17-2.17, 4.644-6.644", "frequencyCaption": "gunshot two times"} +{"filepath": "data/single_event_single_identity_test/syn_63.wav", "onoffCaption": "door slamming at 1.788-4.027", "frequencyCaption": "door slamming one times"} +{"filepath": "data/single_event_single_identity_test/syn_64.wav", "onoffCaption": "sheep goat bleating at 1.736-3.736, 4.735-6.735, 7.944-9.944", "frequencyCaption": "sheep goat bleating three times"} +{"filepath": "data/single_event_single_identity_test/syn_70.wav", "onoffCaption": "sneeze at 0.231-4.384, 5.433-9.586", "frequencyCaption": "sneeze two times"} +{"filepath": "data/single_event_single_identity_test/syn_77.wav", "onoffCaption": "dog barking at 3.416-5.416, 5.973-7.973", "frequencyCaption": "dog barking two times"} +{"filepath": "data/single_event_single_identity_test/syn_79.wav", "onoffCaption": "tapping clicking clanking at 0.931-4.371", "frequencyCaption": "tapping clicking clanking one times"} +{"filepath": "data/single_event_single_identity_test/syn_81.wav", "onoffCaption": "spraying at 2.201-2.997", "frequencyCaption": "spraying one times"} +{"filepath": "data/single_event_single_identity_test/syn_86.wav", "onoffCaption": "door knocking at 0.221-2.785", "frequencyCaption": "door knocking one times"} +{"filepath": "data/single_event_single_identity_test/syn_88.wav", "onoffCaption": "cow mooing at 2.087-4.887, 6.12-8.92", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/single_event_single_identity_test/syn_92.wav", "onoffCaption": "train horn at 0.429-5.509, 6.408-10.0", "frequencyCaption": "train horn two times"} +{"filepath": "data/single_event_single_identity_test/syn_95.wav", "onoffCaption": "thump thud at 2.906-5.581", "frequencyCaption": "thump thud one times"} +{"filepath": "data/single_event_single_identity_test/syn_102.wav", "onoffCaption": "thump thud at 2.581-4.881, 6.222-8.522", "frequencyCaption": "thump thud two times"} +{"filepath": "data/single_event_single_identity_test/syn_105.wav", "onoffCaption": "door slamming at 0.833-3.072, 4.449-6.688", "frequencyCaption": "door slamming two times"} +{"filepath": "data/single_event_single_identity_test/syn_111.wav", "onoffCaption": "door knocking at 1.124-3.688, 6.152-8.716", "frequencyCaption": "door knocking two times"} +{"filepath": "data/single_event_single_identity_test/syn_116.wav", "onoffCaption": "gunshot at 0.875-2.875, 4.735-6.735", "frequencyCaption": "gunshot two times"} +{"filepath": "data/single_event_single_identity_test/syn_118.wav", "onoffCaption": "cat meowing at 0.483-1.809", "frequencyCaption": "cat meowing one times"} +{"filepath": "data/single_event_single_identity_test/syn_121.wav", "onoffCaption": "door knocking at 1.619-4.183", "frequencyCaption": "door knocking one times"} +{"filepath": "data/single_event_single_identity_test/syn_126.wav", "onoffCaption": "sheep goat bleating at 3.885-5.885, 7.836-9.836", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/single_event_single_identity_test/syn_128.wav", "onoffCaption": "tapping clicking clanking at 2.571-6.011", "frequencyCaption": "tapping clicking clanking one times"} +{"filepath": "data/single_event_single_identity_test/syn_132.wav", "onoffCaption": "cat meowing at 2.927-4.317, 5.007-6.397, 6.922-8.312", "frequencyCaption": "cat meowing three times"} +{"filepath": "data/single_event_single_identity_test/syn_135.wav", "onoffCaption": "door slamming at 3.195-5.434, 6.893-9.132", "frequencyCaption": "door slamming two times"} +{"filepath": "data/single_event_single_identity_test/syn_139.wav", "onoffCaption": "duck quacking at 2.765-4.583, 6.906-8.724", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/single_event_single_identity_test/syn_143.wav", "onoffCaption": "cat meowing at 2.231-3.621", "frequencyCaption": "cat meowing one times"} +{"filepath": "data/single_event_single_identity_test/syn_144.wav", "onoffCaption": "cow mooing at 0.562-3.186, 4.31-6.934", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/single_event_single_identity_test/syn_150.wav", "onoffCaption": "dog barking at 0.436-2.436", "frequencyCaption": "dog barking one times"} +{"filepath": "data/single_event_single_identity_test/syn_157.wav", "onoffCaption": "sneeze at 3.222-7.375", "frequencyCaption": "sneeze one times"} +{"filepath": "data/single_event_single_identity_test/syn_159.wav", "onoffCaption": "sneeze at 2.417-6.57", "frequencyCaption": "sneeze one times"} +{"filepath": "data/single_event_single_identity_test/syn_160.wav", "onoffCaption": "tapping clicking clanking at 0.262-3.702, 5.703-9.143", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/single_event_single_identity_test/syn_167.wav", "onoffCaption": "cat meowing at 0.205-1.595, 2.703-4.093", "frequencyCaption": "cat meowing two times"} +{"filepath": "data/single_event_single_identity_test/syn_169.wav", "onoffCaption": "train horn at 3.293-8.373", "frequencyCaption": "train horn one times"} +{"filepath": "data/single_event_single_identity_test/syn_173.wav", "onoffCaption": "thump thud at 3.392-5.692", "frequencyCaption": "thump thud one times"} +{"filepath": "data/single_event_single_identity_test/syn_174.wav", "onoffCaption": "cat meowing at 2.478-3.804, 4.701-6.027, 7.098-8.424", "frequencyCaption": "cat meowing three times"} +{"filepath": "data/single_event_single_identity_test/syn_182.wav", "onoffCaption": "door knocking at 2.598-4.725, 5.428-7.555", "frequencyCaption": "door knocking two times"} +{"filepath": "data/single_event_single_identity_test/syn_185.wav", "onoffCaption": "gunshot at 3.329-5.329, 6.811-8.811", "frequencyCaption": "gunshot two times"} +{"filepath": "data/single_event_single_identity_test/syn_189.wav", "onoffCaption": "door knocking at 2.566-4.693", "frequencyCaption": "door knocking one times"} +{"filepath": "data/single_event_single_identity_test/syn_191.wav", "onoffCaption": "cow mooing at 2.094-4.894", "frequencyCaption": "cow mooing one times"} +{"filepath": "data/single_event_single_identity_test/syn_196.wav", "onoffCaption": "door knocking at 0.398-2.525, 3.558-5.685, 6.802-8.929", "frequencyCaption": "door knocking three times"} +{"filepath": "data/single_event_single_identity_test/syn_198.wav", "onoffCaption": "explosion at 3.575-6.327", "frequencyCaption": "explosion one times"} +{"filepath": "data/single_event_single_identity_test/syn_10.wav", "onoffCaption": "duck quacking at 0.107-1.925", "frequencyCaption": "duck quacking one times"} +{"filepath": "data/single_event_single_identity_test/syn_17.wav", "onoffCaption": "burping belching at 0.839-2.998, 4.442-6.601", "frequencyCaption": "burping belching two times"} +{"filepath": "data/single_event_single_identity_test/syn_19.wav", "onoffCaption": "cat meowing at 2.357-3.683, 5.023-6.349", "frequencyCaption": "cat meowing two times"} +{"filepath": "data/single_event_single_identity_test/syn_20.wav", "onoffCaption": "tapping clicking clanking at 2.446-5.886, 7.886-10.0", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/single_event_single_identity_test/syn_27.wav", "onoffCaption": "spraying at 0.301-1.403, 2.423-3.525, 4.539-5.641", "frequencyCaption": "spraying three times"} +{"filepath": "data/single_event_single_identity_test/syn_29.wav", "onoffCaption": "tapping clicking clanking at 0.69-4.13, 5.59-9.03", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/single_event_single_identity_test/syn_33.wav", "onoffCaption": "train horn at 2.016-7.096", "frequencyCaption": "train horn one times"} +{"filepath": "data/single_event_single_identity_test/syn_34.wav", "onoffCaption": "burping belching at 3.636-5.795, 7.726-9.885", "frequencyCaption": "burping belching two times"} +{"filepath": "data/single_event_single_identity_test/syn_42.wav", "onoffCaption": "dog barking at 2.092-4.092", "frequencyCaption": "dog barking one times"} +{"filepath": "data/single_event_single_identity_test/syn_45.wav", "onoffCaption": "cat meowing at 2.902-4.228", "frequencyCaption": "cat meowing one times"} +{"filepath": "data/single_event_single_identity_test/syn_56.wav", "onoffCaption": "train horn at 0.125-3.125", "frequencyCaption": "train horn one times"} +{"filepath": "data/single_event_single_identity_test/syn_58.wav", "onoffCaption": "duck quacking at 0.179-2.179, 4.629-6.629", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/single_event_single_identity_test/syn_61.wav", "onoffCaption": "spraying at 2.685-3.787", "frequencyCaption": "spraying one times"} +{"filepath": "data/single_event_single_identity_test/syn_66.wav", "onoffCaption": "cat meowing at 0.1-1.426, 2.691-4.017", "frequencyCaption": "cat meowing two times"} +{"filepath": "data/single_event_single_identity_test/syn_68.wav", "onoffCaption": "duck quacking at 0.259-2.077", "frequencyCaption": "duck quacking one times"} +{"filepath": "data/single_event_single_identity_test/syn_72.wav", "onoffCaption": "sneeze at 0.32-3.673, 4.809-8.162", "frequencyCaption": "sneeze two times"} +{"filepath": "data/single_event_single_identity_test/syn_75.wav", "onoffCaption": "door slamming at 3.048-4.459, 6.382-7.793", "frequencyCaption": "door slamming two times"} +{"filepath": "data/single_event_single_identity_test/syn_83.wav", "onoffCaption": "dog barking at 1.005-3.005, 5.367-7.367", "frequencyCaption": "dog barking two times"} +{"filepath": "data/single_event_single_identity_test/syn_84.wav", "onoffCaption": "woman laughing at 0.34-4.798, 6.685-10.0", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/single_event_single_identity_test/syn_90.wav", "onoffCaption": "dog barking at 0.965-2.965, 3.842-5.842, 7.713-9.713", "frequencyCaption": "dog barking three times"} +{"filepath": "data/single_event_single_identity_test/syn_97.wav", "onoffCaption": "gunshot at 1.924-3.924", "frequencyCaption": "gunshot one times"} +{"filepath": "data/single_event_single_identity_test/syn_99.wav", "onoffCaption": "door knocking at 3.167-5.294, 6.941-9.068", "frequencyCaption": "door knocking two times"} +{"filepath": "data/single_event_single_identity_test/syn_100.wav", "onoffCaption": "burping belching at 2.361-4.52, 5.23-7.389", "frequencyCaption": "burping belching two times"} +{"filepath": "data/single_event_single_identity_test/syn_107.wav", "onoffCaption": "woman laughing at 2.849-5.613, 6.83-9.594", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/single_event_single_identity_test/syn_109.wav", "onoffCaption": "cat meowing at 0.321-1.647, 2.314-3.64, 4.695-6.021", "frequencyCaption": "cat meowing three times"} +{"filepath": "data/single_event_single_identity_test/syn_113.wav", "onoffCaption": "duck quacking at 1.194-3.012", "frequencyCaption": "duck quacking one times"} +{"filepath": "data/single_event_single_identity_test/syn_114.wav", "onoffCaption": "duck quacking at 0.737-2.737, 3.972-5.972", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/single_event_single_identity_test/syn_123.wav", "onoffCaption": "woman laughing at 3.064-7.522", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/single_event_single_identity_test/syn_124.wav", "onoffCaption": "door slamming at 0.317-2.556, 3.904-6.143", "frequencyCaption": "door slamming two times"} +{"filepath": "data/single_event_single_identity_test/syn_130.wav", "onoffCaption": "duck quacking at 1.714-3.532, 4.074-5.892, 6.517-8.335", "frequencyCaption": "duck quacking three times"} +{"filepath": "data/single_event_single_identity_test/syn_137.wav", "onoffCaption": "dog barking at 0.126-2.126, 2.714-4.714", "frequencyCaption": "dog barking two times"} +{"filepath": "data/single_event_single_identity_test/syn_141.wav", "onoffCaption": "woman laughing at 3.098-7.556", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/single_event_single_identity_test/syn_146.wav", "onoffCaption": "dog barking at 0.087-2.087, 4.127-6.127", "frequencyCaption": "dog barking two times"} +{"filepath": "data/single_event_single_identity_test/syn_148.wav", "onoffCaption": "thump thud at 2.712-5.387", "frequencyCaption": "thump thud one times"} +{"filepath": "data/single_event_single_identity_test/syn_152.wav", "onoffCaption": "sheep goat bleating at 1.645-3.645, 5.29-7.29", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/single_event_single_identity_test/syn_155.wav", "onoffCaption": "woman laughing at 0.079-4.537, 5.539-9.997", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/single_event_single_identity_test/syn_162.wav", "onoffCaption": "door knocking at 0.465-2.592, 4.247-6.374", "frequencyCaption": "door knocking two times"} +{"filepath": "data/single_event_single_identity_test/syn_165.wav", "onoffCaption": "door slamming at 0.439-2.678", "frequencyCaption": "door slamming one times"} +{"filepath": "data/single_event_single_identity_test/syn_171.wav", "onoffCaption": "woman laughing at 0.467-3.231", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/single_event_single_identity_test/syn_176.wav", "onoffCaption": "burping belching at 0.432-2.591, 5.061-7.22", "frequencyCaption": "burping belching two times"} +{"filepath": "data/single_event_single_identity_test/syn_178.wav", "onoffCaption": "sheep goat bleating at 4.036-6.036, 6.704-8.704", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/single_event_single_identity_test/syn_180.wav", "onoffCaption": "cow mooing at 0.178-2.802", "frequencyCaption": "cow mooing one times"} +{"filepath": "data/single_event_single_identity_test/syn_187.wav", "onoffCaption": "gunshot at 0.523-2.523, 3.427-5.427", "frequencyCaption": "gunshot two times"} +{"filepath": "data/single_event_single_identity_test/syn_193.wav", "onoffCaption": "tapping clicking clanking at 1.074-4.514, 6.811-10.0", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/single_event_single_identity_test/syn_194.wav", "onoffCaption": "train horn at 1.729-6.809", "frequencyCaption": "train horn one times"} +{"filepath": "data/single_event_single_identity_test/syn_1.wav", "onoffCaption": "cat meowing at 0.393-1.783, 3.975-5.365", "frequencyCaption": "cat meowing two times"} +{"filepath": "data/single_event_single_identity_test/syn_2.wav", "onoffCaption": "cat meowing at 2.278-3.668, 5.204-6.594", "frequencyCaption": "cat meowing two times"} +{"filepath": "data/single_event_single_identity_test/syn_3.wav", "onoffCaption": "burping belching at 0.042-2.999, 4.324-7.281, 7.849-10.0", "frequencyCaption": "burping belching three times"} +{"filepath": "data/single_event_single_identity_test/syn_4.wav", "onoffCaption": "car horn honking at 0.38-3.341, 4.605-7.566", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/single_event_single_identity_test/syn_5.wav", "onoffCaption": "dog barking at 0.088-2.088", "frequencyCaption": "dog barking one times"} +{"filepath": "data/single_event_single_identity_test/syn_6.wav", "onoffCaption": "explosion at 2.796-7.718", "frequencyCaption": "explosion one times"} +{"filepath": "data/single_event_single_identity_test/syn_7.wav", "onoffCaption": "dog barking at 2.565-4.565", "frequencyCaption": "dog barking one times"} +{"filepath": "data/single_event_single_identity_test/syn_8.wav", "onoffCaption": "burping belching at 0.45-3.407", "frequencyCaption": "burping belching one times"} +{"filepath": "data/single_event_single_identity_test/syn_9.wav", "onoffCaption": "burping belching at 0.775-3.732", "frequencyCaption": "burping belching one times"} diff --git a/data/meta_data/train.json b/data/meta_data/train.json new file mode 100644 index 0000000000000000000000000000000000000000..c14872a174e0c402586c8572b0104c8929c5df1e --- /dev/null +++ b/data/meta_data/train.json @@ -0,0 +1,5000 @@ +{"filepath": "data/multi_event_train/syn_21.wav", "onoffCaption": "door slamming at 0.45-1.991, 3.019-5.8, 6.623-8.102", "frequencyCaption": "door slamming three times"} +{"filepath": "data/multi_event_train/syn_74.wav", "onoffCaption": "train horn at 2.817-5.697, 6.941-9.151", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_91.wav", "onoffCaption": "door knocking at 1.155-5.305", "frequencyCaption": "door knocking one times"} +{"filepath": "data/multi_event_train/syn_106.wav", "onoffCaption": "duck quacking at 0.309-2.309, 3.42-5.42, 6.714-8.714 and cow mooing at 2.038-5.007, 6.542-8.58", "frequencyCaption": "duck quacking three times and cow mooing two times"} +{"filepath": "data/multi_event_train/syn_153.wav", "onoffCaption": "cow mooing at 1.592-4.602, 6.719-9.729 and explosion at 3.329-6.882", "frequencyCaption": "cow mooing two times and explosion one times"} +{"filepath": "data/multi_event_train/syn_320.wav", "onoffCaption": "train horn at 3.325-5.48, 6.561-9.201", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_375.wav", "onoffCaption": "whistling at 0.48-4.964, 7.059-9.934", "frequencyCaption": "whistling two times"} +{"filepath": "data/multi_event_train/syn_390.wav", "onoffCaption": "dog barking at 3.038-5.038, 5.585-7.585", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_train/syn_505.wav", "onoffCaption": "thump thud at 1.59-6.04, 6.614-10.0", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_550.wav", "onoffCaption": "explosion at 0.707-5.707 and woman laughing at 3.44-5.677, 6.23-8.467", "frequencyCaption": "explosion one times and woman laughing two times"} +{"filepath": "data/multi_event_train/syn_638.wav", "onoffCaption": "door knocking at 1.973-5.029, 6.285-9.132", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_688.wav", "onoffCaption": "burping belching at 0.159-4.028, 6.032-8.977 and door knocking at 0.525-2.902, 4.436-6.813", "frequencyCaption": "burping belching two times and door knocking two times"} +{"filepath": "data/multi_event_train/syn_723.wav", "onoffCaption": "burping belching at 0.211-2.336, 2.942-5.466, 6.496-8.59 and dog barking at 0.78-2.78", "frequencyCaption": "burping belching three times and dog barking one times"} +{"filepath": "data/multi_event_train/syn_776.wav", "onoffCaption": "woman laughing at 2.782-5.368, 6.831-8.912", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_793.wav", "onoffCaption": "explosion at 2.941-5.813 and spraying at 4.494-5.575, 6.097-7.161", "frequencyCaption": "explosion one times and spraying two times"} +{"filepath": "data/multi_event_train/syn_801.wav", "onoffCaption": "whistling at 2.268-8.349 and door slamming at 2.584-4.78, 5.618-7.814", "frequencyCaption": "whistling one times and door slamming two times"} +{"filepath": "data/multi_event_train/syn_854.wav", "onoffCaption": "woman laughing at 0.638-3.925, 5.136-7.364 and dog barking at 1.269-3.269", "frequencyCaption": "woman laughing two times and dog barking one times"} +{"filepath": "data/multi_event_train/syn_1035.wav", "onoffCaption": "tapping clicking clanking at 0.961-4.401, 6.37-9.81 and sheep goat bleating at 1.484-4.78 and door knocking at 1.608-4.664, 5.492-7.703", "frequencyCaption": "tapping clicking clanking two times and sheep goat bleating one times and door knocking two times"} +{"filepath": "data/multi_event_train/syn_1060.wav", "onoffCaption": "spraying at 0.669-1.238, 2.488-3.057, 4.351-4.92 and whistling at 0.918-3.793, 5.909-8.784", "frequencyCaption": "spraying three times and whistling two times"} +{"filepath": "data/multi_event_train/syn_1085.wav", "onoffCaption": "whistling at 2.158-10.0", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_1209.wav", "onoffCaption": "spraying at 1.431-2.181, 2.756-3.34, 4.475-5.722", "frequencyCaption": "spraying three times"} +{"filepath": "data/multi_event_train/syn_1213.wav", "onoffCaption": "gunshot at 3.324-5.324, 6.563-8.563", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_1246.wav", "onoffCaption": "cow mooing at 1.954-6.383, 7.52-10.0", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_1308.wav", "onoffCaption": "duck quacking at 0.295-2.295, 3.085-5.085, 5.734-7.734 and cat meowing at 3.868-4.879, 5.645-7.2 and cow mooing at 5.281-8.579", "frequencyCaption": "duck quacking three times and cat meowing two times and cow mooing one times"} +{"filepath": "data/multi_event_train/syn_1312.wav", "onoffCaption": "dog barking at 3.089-5.089, 6.258-8.258", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_train/syn_1436.wav", "onoffCaption": "train horn at 2.446-5.806, 7.457-10.0", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_1463.wav", "onoffCaption": "sneeze at 0.309-2.016 and car horn honking at 1.819-4.637 and cow mooing at 7.987-10.0", "frequencyCaption": "sneeze one times and car horn honking one times and cow mooing one times"} +{"filepath": "data/multi_event_train/syn_1486.wav", "onoffCaption": "sheep goat bleating at 0.657-4.297, 4.952-8.592 and door slamming at 1.974-4.403, 5.259-7.688", "frequencyCaption": "sheep goat bleating two times and door slamming two times"} +{"filepath": "data/multi_event_train/syn_1578.wav", "onoffCaption": "dog barking at 0.121-2.121, 3.824-5.824, 7.767-9.767", "frequencyCaption": "dog barking three times"} +{"filepath": "data/multi_event_train/syn_1610.wav", "onoffCaption": "tapping clicking clanking at 1.851-5.291, 7.569-10.0", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_1645.wav", "onoffCaption": "door knocking at 0.645-2.772, 3.875-6.782, 7.405-9.692", "frequencyCaption": "door knocking three times"} +{"filepath": "data/multi_event_train/syn_1744.wav", "onoffCaption": "spraying at 0.033-1.519, 2.5-3.986, 4.812-6.298 and burping belching at 0.275-7.443", "frequencyCaption": "spraying three times and burping belching one times"} +{"filepath": "data/multi_event_train/syn_1829.wav", "onoffCaption": "duck quacking at 0.235-2.235, 4.037-6.037 and burping belching at 1.845-5.024, 6.206-8.841", "frequencyCaption": "duck quacking two times and burping belching two times"} +{"filepath": "data/multi_event_train/syn_1899.wav", "onoffCaption": "woman laughing at 2.777-6.165, 7.557-10.0 and gunshot at 2.778-4.778", "frequencyCaption": "woman laughing two times and gunshot one times"} +{"filepath": "data/multi_event_train/syn_1932.wav", "onoffCaption": "sheep goat bleating at 3.113-5.113, 5.767-7.767 and car horn honking at 3.453-5.453, 7.291-9.291 and spraying at 6.748-7.375", "frequencyCaption": "sheep goat bleating two times and car horn honking two times and spraying one times"} +{"filepath": "data/multi_event_train/syn_1967.wav", "onoffCaption": "train horn at 0.91-4.39 and tapping clicking clanking at 1.016-4.456, 6.672-10.0", "frequencyCaption": "train horn one times and tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_1982.wav", "onoffCaption": "tapping clicking clanking at 2.58-6.02, 7.81-10.0", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_3002.wav", "onoffCaption": "gunshot at 0.025-2.025, 3.214-5.214, 6.486-8.486", "frequencyCaption": "gunshot three times"} +{"filepath": "data/multi_event_train/syn_3057.wav", "onoffCaption": "dog barking at 0.464-2.464, 4.19-6.19", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_train/syn_3119.wav", "onoffCaption": "sheep goat bleating at 0.524-2.524, 3.378-5.378, 6.204-8.892", "frequencyCaption": "sheep goat bleating three times"} +{"filepath": "data/multi_event_train/syn_3224.wav", "onoffCaption": "burping belching at 0.304-2.719 and woman laughing at 6.258-10.0", "frequencyCaption": "burping belching one times and woman laughing one times"} +{"filepath": "data/multi_event_train/syn_3271.wav", "onoffCaption": "door knocking at 2.047-4.422", "frequencyCaption": "door knocking one times"} +{"filepath": "data/multi_event_train/syn_3294.wav", "onoffCaption": "cat meowing at 1.674-5.019 and spraying at 2.493-4.188", "frequencyCaption": "cat meowing one times and spraying one times"} +{"filepath": "data/multi_event_train/syn_3395.wav", "onoffCaption": "whistling at 0.204-5.379, 7.724-10.0", "frequencyCaption": "whistling two times"} +{"filepath": "data/multi_event_train/syn_3401.wav", "onoffCaption": "sneeze at 0.283-4.783", "frequencyCaption": "sneeze one times"} +{"filepath": "data/multi_event_train/syn_3454.wav", "onoffCaption": "cow mooing at 0.34-3.309", "frequencyCaption": "cow mooing one times"} +{"filepath": "data/multi_event_train/syn_3627.wav", "onoffCaption": "sneeze at 2.845-4.091", "frequencyCaption": "sneeze one times"} +{"filepath": "data/multi_event_train/syn_3672.wav", "onoffCaption": "duck quacking at 2.203-4.203, 5.361-7.361", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_3697.wav", "onoffCaption": "cow mooing at 1.882-5.18", "frequencyCaption": "cow mooing one times"} +{"filepath": "data/multi_event_train/syn_3769.wav", "onoffCaption": "dog barking at 2.579-4.579", "frequencyCaption": "dog barking one times"} +{"filepath": "data/multi_event_train/syn_3905.wav", "onoffCaption": "door knocking at 0.141-2.362, 2.877-5.098, 5.687-7.908", "frequencyCaption": "door knocking three times"} +{"filepath": "data/multi_event_train/syn_3950.wav", "onoffCaption": "whistling at 2.603-10.0", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_149.wav", "onoffCaption": "door knocking at 0.413-2.54, 3.538-5.665, 6.803-8.93 and burping belching at 6.659-9.282", "frequencyCaption": "door knocking three times and burping belching one times"} +{"filepath": "data/multi_event_train/syn_221.wav", "onoffCaption": "cow mooing at 2.969-6.267, 7.423-10.0", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_250.wav", "onoffCaption": "thump thud at 1.017-4.684, 5.695-9.362", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_274.wav", "onoffCaption": "tapping clicking clanking at 0.404-3.844, 5.186-8.626", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_291.wav", "onoffCaption": "dog barking at 1.244-3.244, 4.164-6.164 and door knocking at 3.598-6.168", "frequencyCaption": "dog barking two times and door knocking one times"} +{"filepath": "data/multi_event_train/syn_404.wav", "onoffCaption": "gunshot at 0.415-2.545, 4.002-6.132", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_451.wav", "onoffCaption": "car horn honking at 0.154-3.067, 3.794-6.707, 7.909-10.0", "frequencyCaption": "car horn honking three times"} +{"filepath": "data/multi_event_train/syn_606.wav", "onoffCaption": "cow mooing at 2.907-7.887", "frequencyCaption": "cow mooing one times"} +{"filepath": "data/multi_event_train/syn_622.wav", "onoffCaption": "thump thud at 3.172-7.622", "frequencyCaption": "thump thud one times"} +{"filepath": "data/multi_event_train/syn_677.wav", "onoffCaption": "car horn honking at 0.664-3.129, 4.357-7.014", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_692.wav", "onoffCaption": "door slamming at 0.005-1.396 and burping belching at 0.843-4.349, 4.994-7.088 and cat meowing at 6.136-7.136, 8.336-9.336", "frequencyCaption": "door slamming one times and burping belching two times and cat meowing two times"} +{"filepath": "data/multi_event_train/syn_739.wav", "onoffCaption": "car horn honking at 1.773-4.12, 6.106-8.453 and gunshot at 2.478-4.478", "frequencyCaption": "car horn honking two times and gunshot one times"} +{"filepath": "data/multi_event_train/syn_789.wav", "onoffCaption": "train horn at 0.264-3.064, 3.64-6.44 and door slamming at 4.141-6.361, 7.801-10.0", "frequencyCaption": "train horn two times and door slamming two times"} +{"filepath": "data/multi_event_train/syn_900.wav", "onoffCaption": "door slamming at 0.098-1.098, 1.787-3.787, 4.856-7.735", "frequencyCaption": "door slamming three times"} +{"filepath": "data/multi_event_train/syn_955.wav", "onoffCaption": "thump thud at 2.224-5.891, 7.389-9.889", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_994.wav", "onoffCaption": "sneeze at 0.59-2.297", "frequencyCaption": "sneeze one times"} +{"filepath": "data/multi_event_train/syn_1134.wav", "onoffCaption": "cow mooing at 3.267-6.236", "frequencyCaption": "cow mooing one times"} +{"filepath": "data/multi_event_train/syn_1145.wav", "onoffCaption": "train horn at 3.352-6.832", "frequencyCaption": "train horn one times"} +{"filepath": "data/multi_event_train/syn_1161.wav", "onoffCaption": "cow mooing at 2.373-5.671, 7.577-10.0", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_1184.wav", "onoffCaption": "burping belching at 2.861-8.462", "frequencyCaption": "burping belching one times"} +{"filepath": "data/multi_event_train/syn_1336.wav", "onoffCaption": "cow mooing at 4.099-7.109, 7.72-10.0", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_1347.wav", "onoffCaption": "woman laughing at 1.672-3.955", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/multi_event_train/syn_1408.wav", "onoffCaption": "spraying at 0.161-0.736, 2.006-4.59, 6.215-7.296 and burping belching at 1.503-5.503 and sheep goat bleating at 6.745-8.745", "frequencyCaption": "spraying three times and burping belching one times and sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_1479.wav", "onoffCaption": "car horn honking at 2.125-5.779, 7.871-10.0", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_1513.wav", "onoffCaption": "door knocking at 3.332-5.635, 6.643-9.518", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_1537.wav", "onoffCaption": "explosion at 1.773-4.034, 5.15-7.411", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_1562.wav", "onoffCaption": "whistling at 3.153-8.328", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_1587.wav", "onoffCaption": "cat meowing at 1.159-2.743, 4.625-6.753", "frequencyCaption": "cat meowing two times"} +{"filepath": "data/multi_event_train/syn_1711.wav", "onoffCaption": "gunshot at 0.349-2.349, 3.25-5.25 and whistling at 7.288-10.0", "frequencyCaption": "gunshot two times and whistling one times"} +{"filepath": "data/multi_event_train/syn_1760.wav", "onoffCaption": "explosion at 2.27-5.138, 5.861-8.729", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_1785.wav", "onoffCaption": "train horn at 1.794-10.0", "frequencyCaption": "train horn one times"} +{"filepath": "data/multi_event_train/syn_1833.wav", "onoffCaption": "thump thud at 0.115-2.454, 3.071-5.41", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_1866.wav", "onoffCaption": "door knocking at 3.895-6.625", "frequencyCaption": "door knocking one times"} +{"filepath": "data/multi_event_train/syn_1883.wav", "onoffCaption": "door knocking at 0.406-3.253 and woman laughing at 0.635-7.369 and explosion at 1.903-6.903", "frequencyCaption": "door knocking one times and woman laughing one times and explosion one times"} +{"filepath": "data/multi_event_train/syn_1928.wav", "onoffCaption": "woman laughing at 0.004-2.372, 3.672-6.653", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_1998.wav", "onoffCaption": "cat meowing at 0.728-2.283, 3.385-4.94, 5.621-7.176", "frequencyCaption": "cat meowing three times"} +{"filepath": "data/multi_event_train/syn_3018.wav", "onoffCaption": "burping belching at 2.502-4.625, 6.121-8.244", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_3103.wav", "onoffCaption": "thump thud at 1.211-4.878, 7.154-9.492", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_3156.wav", "onoffCaption": "cat meowing at 1.931-3.071, 4.724-5.864 and door knocking at 2.857-7.559", "frequencyCaption": "cat meowing two times and door knocking one times"} +{"filepath": "data/multi_event_train/syn_3325.wav", "onoffCaption": "thump thud at 2.22-4.72 and dog barking at 3.589-5.589, 7.361-9.361", "frequencyCaption": "thump thud one times and dog barking two times"} +{"filepath": "data/multi_event_train/syn_3354.wav", "onoffCaption": "gunshot at 0.26-2.26, 4.679-6.679", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_3370.wav", "onoffCaption": "gunshot at 1.812-3.812", "frequencyCaption": "gunshot one times"} +{"filepath": "data/multi_event_train/syn_3500.wav", "onoffCaption": "woman laughing at 0.391-3.091, 4.456-7.156", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_3555.wav", "onoffCaption": "cow mooing at 1.331-4.3, 5.203-8.172", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_3594.wav", "onoffCaption": "train horn at 0.651-4.051, 4.742-8.142", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_3619.wav", "onoffCaption": "gunshot at 0.367-2.367", "frequencyCaption": "gunshot one times"} +{"filepath": "data/multi_event_train/syn_3668.wav", "onoffCaption": "whistling at 2.959-10.0", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_3702.wav", "onoffCaption": "gunshot at 1.811-3.811 and duck quacking at 7.641-9.641", "frequencyCaption": "gunshot one times and duck quacking one times"} +{"filepath": "data/multi_event_train/syn_3726.wav", "onoffCaption": "woman laughing at 1.491-3.728 and whistling at 2.496-6.98", "frequencyCaption": "woman laughing one times and whistling one times"} +{"filepath": "data/multi_event_train/syn_3773.wav", "onoffCaption": "dog barking at 1.003-3.003 and door knocking at 5.52-9.288", "frequencyCaption": "dog barking one times and door knocking one times"} +{"filepath": "data/multi_event_train/syn_3796.wav", "onoffCaption": "cat meowing at 0.365-1.781, 2.545-3.82", "frequencyCaption": "cat meowing two times"} +{"filepath": "data/multi_event_train/syn_3804.wav", "onoffCaption": "explosion at 3.305-8.305 and spraying at 6.953-7.557", "frequencyCaption": "explosion one times and spraying one times"} +{"filepath": "data/multi_event_train/syn_3851.wav", "onoffCaption": "spraying at 0.101-0.952 and whistling at 1.043-6.218, 7.564-10.0", "frequencyCaption": "spraying one times and whistling two times"} +{"filepath": "data/multi_event_train/syn_138.wav", "onoffCaption": "train horn at 3.379-5.819, 6.721-8.914", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_177.wav", "onoffCaption": "car horn honking at 0.74-3.24, 5.316-7.829 and thump thud at 1.929-4.429", "frequencyCaption": "car horn honking two times and thump thud one times"} +{"filepath": "data/multi_event_train/syn_188.wav", "onoffCaption": "woman laughing at 0.562-2.799 and sheep goat bleating at 2.676-4.676, 6.672-8.672", "frequencyCaption": "woman laughing one times and sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_192.wav", "onoffCaption": "thump thud at 3.782-6.829", "frequencyCaption": "thump thud one times"} +{"filepath": "data/multi_event_train/syn_205.wav", "onoffCaption": "cow mooing at 0.72-3.702, 5.102-8.084", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_304.wav", "onoffCaption": "tapping clicking clanking at 3.198-6.638", "frequencyCaption": "tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_420.wav", "onoffCaption": "door slamming at 0.344-1.597, 3.048-4.026 and thump thud at 7.971-10.0", "frequencyCaption": "door slamming two times and thump thud one times"} +{"filepath": "data/multi_event_train/syn_475.wav", "onoffCaption": "gunshot at 0.17-2.17, 3.09-5.09, 6.183-8.183", "frequencyCaption": "gunshot three times"} +{"filepath": "data/multi_event_train/syn_490.wav", "onoffCaption": "explosion at 0.558-5.558, 6.884-10.0", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_521.wav", "onoffCaption": "door knocking at 0.623-3.193 and door slamming at 6.292-6.792, 8.204-8.704", "frequencyCaption": "door knocking one times and door slamming two times"} +{"filepath": "data/multi_event_train/syn_649.wav", "onoffCaption": "door slamming at 3.421-4.594", "frequencyCaption": "door slamming one times"} +{"filepath": "data/multi_event_train/syn_653.wav", "onoffCaption": "explosion at 0.055-5.055, 7.24-10.0", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_748.wav", "onoffCaption": "whistling at 0.244-4.728 and explosion at 0.417-3.008 and door slamming at 1.002-2.393, 3.236-4.627", "frequencyCaption": "whistling one times and explosion one times and door slamming two times"} +{"filepath": "data/multi_event_train/syn_752.wav", "onoffCaption": "whistling at 1.609-9.994", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_825.wav", "onoffCaption": "thump thud at 3.622-6.122, 7.47-9.97", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_870.wav", "onoffCaption": "explosion at 0.987-4.856, 5.954-8.826", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_924.wav", "onoffCaption": "sneeze at 1.0-3.085, 4.232-6.317", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_971.wav", "onoffCaption": "car horn honking at 0.235-5.142", "frequencyCaption": "car horn honking one times"} +{"filepath": "data/multi_event_train/syn_1011.wav", "onoffCaption": "cow mooing at 1.805-4.774, 5.439-7.469", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_1110.wav", "onoffCaption": "gunshot at 2.921-4.921, 7.411-9.411", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_1262.wav", "onoffCaption": "train horn at 0.21-4.65, 6.639-8.799", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_1278.wav", "onoffCaption": "gunshot at 3.554-5.554, 7.567-9.567", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_1287.wav", "onoffCaption": "dog barking at 3.277-5.277, 6.629-8.629", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_train/syn_1363.wav", "onoffCaption": "spraying at 0.161-1.065, 3.194-4.369, 6.556-7.064", "frequencyCaption": "spraying three times"} +{"filepath": "data/multi_event_train/syn_1379.wav", "onoffCaption": "cat meowing at 3.426-4.692", "frequencyCaption": "cat meowing one times"} +{"filepath": "data/multi_event_train/syn_1386.wav", "onoffCaption": "door knocking at 1.508-3.668 and cow mooing at 5.206-8.216", "frequencyCaption": "door knocking one times and cow mooing one times"} +{"filepath": "data/multi_event_train/syn_1447.wav", "onoffCaption": "sneeze at 0.818-3.135, 3.983-6.3 and door knocking at 1.957-4.527", "frequencyCaption": "sneeze two times and door knocking one times"} +{"filepath": "data/multi_event_train/syn_1546.wav", "onoffCaption": "spraying at 1.401-2.252, 3.819-4.67 and whistling at 1.789-6.273, 7.051-10.0", "frequencyCaption": "spraying two times and whistling two times"} +{"filepath": "data/multi_event_train/syn_1634.wav", "onoffCaption": "burping belching at 0.231-2.266 and spraying at 5.82-6.57, 8.991-9.858", "frequencyCaption": "burping belching one times and spraying two times"} +{"filepath": "data/multi_event_train/syn_1735.wav", "onoffCaption": "tapping clicking clanking at 2.463-5.903", "frequencyCaption": "tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_1817.wav", "onoffCaption": "sneeze at 1.669-3.665, 5.139-8.104", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_1842.wav", "onoffCaption": "door slamming at 2.786-3.786", "frequencyCaption": "door slamming one times"} +{"filepath": "data/multi_event_train/syn_1858.wav", "onoffCaption": "burping belching at 1.238-3.361", "frequencyCaption": "burping belching one times"} +{"filepath": "data/multi_event_train/syn_1916.wav", "onoffCaption": "thump thud at 0.025-4.475", "frequencyCaption": "thump thud one times"} +{"filepath": "data/multi_event_train/syn_1943.wav", "onoffCaption": "car horn honking at 0.017-3.192, 3.948-7.123, 7.76-10.0", "frequencyCaption": "car horn honking three times"} +{"filepath": "data/multi_event_train/syn_1959.wav", "onoffCaption": "door slamming at 3.143-5.369, 6.461-7.48", "frequencyCaption": "door slamming two times"} +{"filepath": "data/multi_event_train/syn_3026.wav", "onoffCaption": "explosion at 0.682-5.682 and gunshot at 5.466-7.466", "frequencyCaption": "explosion one times and gunshot one times"} +{"filepath": "data/multi_event_train/syn_3069.wav", "onoffCaption": "car horn honking at 3.111-6.765 and tapping clicking clanking at 4.73-8.17", "frequencyCaption": "car horn honking one times and tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_3073.wav", "onoffCaption": "train horn at 1.238-5.438", "frequencyCaption": "train horn one times"} +{"filepath": "data/multi_event_train/syn_3096.wav", "onoffCaption": "burping belching at 0.356-3.535", "frequencyCaption": "burping belching one times"} +{"filepath": "data/multi_event_train/syn_3127.wav", "onoffCaption": "gunshot at 2.644-5.15, 5.815-7.908", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_3168.wav", "onoffCaption": "cow mooing at 3.298-6.28", "frequencyCaption": "cow mooing one times"} +{"filepath": "data/multi_event_train/syn_3172.wav", "onoffCaption": "sheep goat bleating at 0.162-2.162 and spraying at 4.646-5.497", "frequencyCaption": "sheep goat bleating one times and spraying one times"} +{"filepath": "data/multi_event_train/syn_3197.wav", "onoffCaption": "cow mooing at 0.258-3.268, 4.919-7.929", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_3200.wav", "onoffCaption": "thump thud at 3.356-5.695, 6.815-9.154", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_3301.wav", "onoffCaption": "car horn honking at 0.679-3.192 and sheep goat bleating at 0.684-2.684, 5.024-7.024 and gunshot at 0.815-2.815, 3.554-5.554", "frequencyCaption": "car horn honking one times and sheep goat bleating two times and gunshot two times"} +{"filepath": "data/multi_event_train/syn_3425.wav", "onoffCaption": "sneeze at 3.563-5.88", "frequencyCaption": "sneeze one times"} +{"filepath": "data/multi_event_train/syn_3470.wav", "onoffCaption": "duck quacking at 2.5-4.5, 6.729-8.729", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_3524.wav", "onoffCaption": "door knocking at 4.11-8.485", "frequencyCaption": "door knocking one times"} +{"filepath": "data/multi_event_train/syn_3571.wav", "onoffCaption": "train horn at 2.494-5.134, 7.013-9.653", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_3656.wav", "onoffCaption": "door knocking at 0.171-3.724 and door slamming at 7.536-8.685", "frequencyCaption": "door knocking one times and door slamming one times"} +{"filepath": "data/multi_event_train/syn_3757.wav", "onoffCaption": "woman laughing at 1.808-4.508 and door slamming at 7.615-9.094", "frequencyCaption": "woman laughing one times and door slamming one times"} +{"filepath": "data/multi_event_train/syn_3820.wav", "onoffCaption": "gunshot at 0.478-2.478, 4.009-6.009", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_3875.wav", "onoffCaption": "sheep goat bleating at 1.313-3.313, 5.182-7.182", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_3890.wav", "onoffCaption": "gunshot at 3.266-5.266, 5.827-7.827", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_3921.wav", "onoffCaption": "sheep goat bleating at 2.646-4.646", "frequencyCaption": "sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_50.wav", "onoffCaption": "tapping clicking clanking at 1.23-4.67", "frequencyCaption": "tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_122.wav", "onoffCaption": "dog barking at 0.436-2.436, 4.526-6.526", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_train/syn_170.wav", "onoffCaption": "sneeze at 0.528-3.736, 4.834-8.042", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_195.wav", "onoffCaption": "thump thud at 2.18-4.642, 5.753-8.215", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_218.wav", "onoffCaption": "door slamming at 2.198-4.198", "frequencyCaption": "door slamming one times"} +{"filepath": "data/multi_event_train/syn_303.wav", "onoffCaption": "cow mooing at 3.336-8.316 and dog barking at 7.061-9.061", "frequencyCaption": "cow mooing one times and dog barking one times"} +{"filepath": "data/multi_event_train/syn_351.wav", "onoffCaption": "gunshot at 0.026-2.026 and whistling at 0.049-2.924 and duck quacking at 2.674-4.674, 6.611-8.611", "frequencyCaption": "gunshot one times and whistling one times and duck quacking two times"} +{"filepath": "data/multi_event_train/syn_526.wav", "onoffCaption": "duck quacking at 2.365-4.365, 5.347-7.347", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_574.wav", "onoffCaption": "door knocking at 2.41-6.026, 6.947-10.0", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_591.wav", "onoffCaption": "dog barking at 0.373-2.373 and door knocking at 4.578-8.728", "frequencyCaption": "dog barking one times and door knocking one times"} +{"filepath": "data/multi_event_train/syn_707.wav", "onoffCaption": "sneeze at 0.383-4.912 and duck quacking at 6.614-8.614", "frequencyCaption": "sneeze one times and duck quacking one times"} +{"filepath": "data/multi_event_train/syn_755.wav", "onoffCaption": "thump thud at 0.247-4.697, 5.344-7.86", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_822.wav", "onoffCaption": "gunshot at 2.509-4.983, 7.46-9.934 and tapping clicking clanking at 3.058-6.498 and spraying at 3.322-4.103, 5.476-6.208, 7.466-8.116", "frequencyCaption": "gunshot two times and tapping clicking clanking one times and spraying three times"} +{"filepath": "data/multi_event_train/syn_895.wav", "onoffCaption": "door slamming at 2.693-4.217, 4.832-6.356 and woman laughing at 3.667-6.752", "frequencyCaption": "door slamming two times and woman laughing one times"} +{"filepath": "data/multi_event_train/syn_939.wav", "onoffCaption": "tapping clicking clanking at 0.706-4.146 and sheep goat bleating at 2.877-4.877", "frequencyCaption": "tapping clicking clanking one times and sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_1016.wav", "onoffCaption": "whistling at 0.256-8.641 and woman laughing at 0.274-2.372 and gunshot at 3.743-5.743", "frequencyCaption": "whistling one times and woman laughing one times and gunshot one times"} +{"filepath": "data/multi_event_train/syn_1044.wav", "onoffCaption": "door knocking at 1.427-6.427 and thump thud at 2.18-6.63 and duck quacking at 4.935-6.935", "frequencyCaption": "door knocking one times and thump thud one times and duck quacking one times"} +{"filepath": "data/multi_event_train/syn_1237.wav", "onoffCaption": "sheep goat bleating at 1.542-3.542, 4.23-6.23 and thump thud at 3.688-6.188", "frequencyCaption": "sheep goat bleating two times and thump thud one times"} +{"filepath": "data/multi_event_train/syn_1265.wav", "onoffCaption": "door slamming at 2.016-5.016", "frequencyCaption": "door slamming one times"} +{"filepath": "data/multi_event_train/syn_1280.wav", "onoffCaption": "cat meowing at 0.119-1.479, 3.298-4.445, 5.477-6.752", "frequencyCaption": "cat meowing three times"} +{"filepath": "data/multi_event_train/syn_1412.wav", "onoffCaption": "duck quacking at 3.869-5.869, 7.87-9.87", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_1440.wav", "onoffCaption": "whistling at 0.032-2.907, 5.337-8.212", "frequencyCaption": "whistling two times"} +{"filepath": "data/multi_event_train/syn_1509.wav", "onoffCaption": "sheep goat bleating at 2.97-6.89", "frequencyCaption": "sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_1633.wav", "onoffCaption": "door knocking at 0.593-3.13, 4.121-6.748, 7.286-10.0", "frequencyCaption": "door knocking three times"} +{"filepath": "data/multi_event_train/syn_1661.wav", "onoffCaption": "door slamming at 0.077-1.096", "frequencyCaption": "door slamming one times"} +{"filepath": "data/multi_event_train/syn_1683.wav", "onoffCaption": "door slamming at 0.183-1.574, 2.15-3.001, 4.15-6.068 and explosion at 7.733-10.0", "frequencyCaption": "door slamming three times and explosion one times"} +{"filepath": "data/multi_event_train/syn_1684.wav", "onoffCaption": "train horn at 1.734-8.203", "frequencyCaption": "train horn one times"} +{"filepath": "data/multi_event_train/syn_1728.wav", "onoffCaption": "cow mooing at 3.139-6.121, 6.742-9.724", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_1798.wav", "onoffCaption": "gunshot at 0.031-2.031, 3.728-5.728, 6.569-8.569", "frequencyCaption": "gunshot three times"} +{"filepath": "data/multi_event_train/syn_1944.wav", "onoffCaption": "tapping clicking clanking at 2.026-5.466, 6.277-9.717", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_3074.wav", "onoffCaption": "explosion at 0.526-2.59 and burping belching at 2.463-4.494, 5.45-7.481 and tapping clicking clanking at 4.892-8.332", "frequencyCaption": "explosion one times and burping belching two times and tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_3091.wav", "onoffCaption": "cow mooing at 0.669-5.649, 7.382-10.0 and door knocking at 1.429-3.781", "frequencyCaption": "cow mooing two times and door knocking one times"} +{"filepath": "data/multi_event_train/syn_3207.wav", "onoffCaption": "woman laughing at 3.018-6.594, 7.428-9.674", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_3252.wav", "onoffCaption": "woman laughing at 2.935-6.987, 7.556-10.0", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_3255.wav", "onoffCaption": "cow mooing at 2.968-5.978 and spraying at 5.505-6.255, 7.07-7.82, 9.091-9.841", "frequencyCaption": "cow mooing one times and spraying three times"} +{"filepath": "data/multi_event_train/syn_3349.wav", "onoffCaption": "duck quacking at 4.337-6.337", "frequencyCaption": "duck quacking one times"} +{"filepath": "data/multi_event_train/syn_3422.wav", "onoffCaption": "door knocking at 0.853-4.603, 6.378-8.921", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_3495.wav", "onoffCaption": "whistling at 0.504-2.733, 4.408-6.637", "frequencyCaption": "whistling two times"} +{"filepath": "data/multi_event_train/syn_3539.wav", "onoffCaption": "door slamming at 0.962-2.215, 3.921-5.174, 7.427-8.68 and thump thud at 2.092-6.542 and gunshot at 5.373-7.373", "frequencyCaption": "door slamming three times and thump thud one times and gunshot one times"} +{"filepath": "data/multi_event_train/syn_3603.wav", "onoffCaption": "cat meowing at 1.309-2.88", "frequencyCaption": "cat meowing one times"} +{"filepath": "data/multi_event_train/syn_3604.wav", "onoffCaption": "sneeze at 2.882-5.199, 5.801-8.262", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_3651.wav", "onoffCaption": "sheep goat bleating at 2.163-4.163, 5.115-7.115", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_3718.wav", "onoffCaption": "door slamming at 0.076-1.591, 2.399-3.25, 3.84-5.131 and duck quacking at 2.94-4.94, 5.948-7.948", "frequencyCaption": "door slamming three times and duck quacking two times"} +{"filepath": "data/multi_event_train/syn_3926.wav", "onoffCaption": "door slamming at 1.11-2.634, 3.498-5.022, 6.331-7.855 and gunshot at 2.786-4.786, 6.387-8.387 and spraying at 7.305-8.305", "frequencyCaption": "door slamming three times and gunshot two times and spraying one times"} +{"filepath": "data/multi_event_train/syn_3974.wav", "onoffCaption": "tapping clicking clanking at 1.133-4.573, 5.176-7.376 and burping belching at 3.032-7.032", "frequencyCaption": "tapping clicking clanking two times and burping belching one times"} +{"filepath": "data/multi_event_train/syn_3991.wav", "onoffCaption": "dog barking at 0.342-2.342 and duck quacking at 6.157-8.157", "frequencyCaption": "dog barking one times and duck quacking one times"} +{"filepath": "data/multi_event_train/syn_57.wav", "onoffCaption": "gunshot at 1.054-3.054 and door slamming at 5.275-7.471", "frequencyCaption": "gunshot one times and door slamming one times"} +{"filepath": "data/multi_event_train/syn_125.wav", "onoffCaption": "duck quacking at 1.445-3.445, 4.236-6.236, 7.055-9.055 and sneeze at 2.323-4.282", "frequencyCaption": "duck quacking three times and sneeze one times"} +{"filepath": "data/multi_event_train/syn_257.wav", "onoffCaption": "car horn honking at 2.061-6.461, 7.737-10.0", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_356.wav", "onoffCaption": "gunshot at 0.745-2.745 and cow mooing at 4.159-8.588", "frequencyCaption": "gunshot one times and cow mooing one times"} +{"filepath": "data/multi_event_train/syn_468.wav", "onoffCaption": "cow mooing at 3.409-6.378", "frequencyCaption": "cow mooing one times"} +{"filepath": "data/multi_event_train/syn_472.wav", "onoffCaption": "spraying at 0.583-3.602", "frequencyCaption": "spraying one times"} +{"filepath": "data/multi_event_train/syn_497.wav", "onoffCaption": "train horn at 0.289-4.409, 6.425-10.0", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_569.wav", "onoffCaption": "thump thud at 0.004-2.504, 4.066-6.566", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_573.wav", "onoffCaption": "tapping clicking clanking at 0.237-3.677, 5.378-7.737", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_596.wav", "onoffCaption": "thump thud at 1.237-4.284, 5.393-7.909", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_601.wav", "onoffCaption": "dog barking at 0.134-2.134 and burping belching at 5.233-10.0", "frequencyCaption": "dog barking one times and burping belching one times"} +{"filepath": "data/multi_event_train/syn_700.wav", "onoffCaption": "dog barking at 1.016-3.016, 5.055-7.055", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_train/syn_838.wav", "onoffCaption": "car horn honking at 2.969-7.369", "frequencyCaption": "car horn honking one times"} +{"filepath": "data/multi_event_train/syn_877.wav", "onoffCaption": "door knocking at 3.48-6.327, 6.917-8.966", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_888.wav", "onoffCaption": "duck quacking at 0.569-2.569, 3.902-5.902, 6.419-8.419", "frequencyCaption": "duck quacking three times"} +{"filepath": "data/multi_event_train/syn_892.wav", "onoffCaption": "door slamming at 2.212-5.173, 6.664-7.642", "frequencyCaption": "door slamming two times"} +{"filepath": "data/multi_event_train/syn_976.wav", "onoffCaption": "train horn at 0.105-4.173, 5.738-9.806", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_989.wav", "onoffCaption": "cat meowing at 0.055-1.204, 3.495-4.507, 5.761-7.755", "frequencyCaption": "cat meowing three times"} +{"filepath": "data/multi_event_train/syn_993.wav", "onoffCaption": "door knocking at 2.388-6.921 and sheep goat bleating at 2.785-6.081, 7.459-10.0", "frequencyCaption": "door knocking one times and sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_1043.wav", "onoffCaption": "cat meowing at 0.138-1.709, 2.9-5.761 and door knocking at 4.303-6.683, 7.227-9.607", "frequencyCaption": "cat meowing two times and door knocking two times"} +{"filepath": "data/multi_event_train/syn_1059.wav", "onoffCaption": "car horn honking at 0.031-4.431, 4.972-6.972, 7.512-10.0", "frequencyCaption": "car horn honking three times"} +{"filepath": "data/multi_event_train/syn_1142.wav", "onoffCaption": "tapping clicking clanking at 1.363-4.803, 5.875-8.084", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_1158.wav", "onoffCaption": "door knocking at 0.327-2.827, 3.933-6.701 and car horn honking at 2.786-7.035", "frequencyCaption": "door knocking two times and car horn honking one times"} +{"filepath": "data/multi_event_train/syn_1230.wav", "onoffCaption": "thump thud at 0.0-3.667, 6.045-9.712", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_1331.wav", "onoffCaption": "gunshot at 0.021-2.021 and explosion at 0.033-2.762, 5.092-7.821", "frequencyCaption": "gunshot one times and explosion two times"} +{"filepath": "data/multi_event_train/syn_1415.wav", "onoffCaption": "door slamming at 0.772-1.577, 2.542-5.516 and sneeze at 1.092-2.592", "frequencyCaption": "door slamming two times and sneeze one times"} +{"filepath": "data/multi_event_train/syn_1514.wav", "onoffCaption": "door knocking at 0.852-5.291, 6.978-10.0", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_1666.wav", "onoffCaption": "tapping clicking clanking at 0.654-4.094", "frequencyCaption": "tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_1699.wav", "onoffCaption": "burping belching at 0.365-2.396, 4.748-7.307", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_1767.wav", "onoffCaption": "door slamming at 2.159-3.276, 4.526-7.487", "frequencyCaption": "door slamming two times"} +{"filepath": "data/multi_event_train/syn_1782.wav", "onoffCaption": "sheep goat bleating at 0.888-2.888, 4.833-6.833 and train horn at 5.074-7.748", "frequencyCaption": "sheep goat bleating two times and train horn one times"} +{"filepath": "data/multi_event_train/syn_1810.wav", "onoffCaption": "sneeze at 0.452-1.698 and duck quacking at 0.778-2.778, 3.694-5.694, 6.436-8.436", "frequencyCaption": "sneeze one times and duck quacking three times"} +{"filepath": "data/multi_event_train/syn_1911.wav", "onoffCaption": "woman laughing at 2.363-5.463, 7.505-9.873", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_3021.wav", "onoffCaption": "explosion at 0.157-5.157, 6.381-8.972", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_3120.wav", "onoffCaption": "dog barking at 0.042-2.042 and woman laughing at 1.67-4.025", "frequencyCaption": "dog barking one times and woman laughing one times"} +{"filepath": "data/multi_event_train/syn_3175.wav", "onoffCaption": "burping belching at 3.834-7.093, 7.803-10.0", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_3248.wav", "onoffCaption": "explosion at 0.563-3.291", "frequencyCaption": "explosion one times"} +{"filepath": "data/multi_event_train/syn_3353.wav", "onoffCaption": "dog barking at 1.301-4.222, 5.025-7.946 and cat meowing at 4.497-7.738", "frequencyCaption": "dog barking two times and cat meowing one times"} +{"filepath": "data/multi_event_train/syn_3438.wav", "onoffCaption": "spraying at 1.37-2.151, 3.483-4.052, 4.616-5.467 and door knocking at 7.804-10.0", "frequencyCaption": "spraying three times and door knocking one times"} +{"filepath": "data/multi_event_train/syn_3477.wav", "onoffCaption": "dog barking at 1.283-3.283, 5.184-7.184", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_train/syn_3488.wav", "onoffCaption": "burping belching at 0.015-3.559, 4.328-7.328, 7.932-10.0", "frequencyCaption": "burping belching three times"} +{"filepath": "data/multi_event_train/syn_3492.wav", "onoffCaption": "door knocking at 0.811-5.321 and dog barking at 2.454-4.454 and sneeze at 4.318-6.001", "frequencyCaption": "door knocking one times and dog barking one times and sneeze one times"} +{"filepath": "data/multi_event_train/syn_3576.wav", "onoffCaption": "thump thud at 0.352-4.802 and cat meowing at 7.774-9.722", "frequencyCaption": "thump thud one times and cat meowing one times"} +{"filepath": "data/multi_event_train/syn_3589.wav", "onoffCaption": "duck quacking at 0.293-2.293, 3.357-5.357, 6.355-8.355", "frequencyCaption": "duck quacking three times"} +{"filepath": "data/multi_event_train/syn_3593.wav", "onoffCaption": "dog barking at 0.036-2.036, 3.013-5.013, 5.719-7.719", "frequencyCaption": "dog barking three times"} +{"filepath": "data/multi_event_train/syn_3705.wav", "onoffCaption": "dog barking at 3.735-5.735", "frequencyCaption": "dog barking one times"} +{"filepath": "data/multi_event_train/syn_3868.wav", "onoffCaption": "train horn at 0.332-4.772, 6.048-10.0", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_3872.wav", "onoffCaption": "door slamming at 0.661-1.342, 3.734-4.415, 6.72-7.401", "frequencyCaption": "door slamming three times"} +{"filepath": "data/multi_event_train/syn_3897.wav", "onoffCaption": "thump thud at 2.086-4.857", "frequencyCaption": "thump thud one times"} +{"filepath": "data/multi_event_train/syn_3969.wav", "onoffCaption": "thump thud at 2.95-5.45, 6.105-8.605", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_3973.wav", "onoffCaption": "car horn honking at 2.448-4.961, 6.029-8.542", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_3996.wav", "onoffCaption": "burping belching at 2.047-4.082, 4.884-6.919, 7.802-9.837 and explosion at 2.284-7.284 and sneeze at 2.504-3.961, 4.923-6.38, 7.442-8.899", "frequencyCaption": "burping belching three times and explosion one times and sneeze three times"} +{"filepath": "data/multi_event_train/syn_18.wav", "onoffCaption": "thump thud at 0.402-2.63, 4.475-6.937", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_69.wav", "onoffCaption": "car horn honking at 1.273-5.114 and cat meowing at 1.976-3.517, 4.196-5.223", "frequencyCaption": "car horn honking one times and cat meowing two times"} +{"filepath": "data/multi_event_train/syn_202.wav", "onoffCaption": "sheep goat bleating at 3.609-5.609, 7.145-9.145", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_273.wav", "onoffCaption": "tapping clicking clanking at 0.069-3.509, 4.304-6.615", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_296.wav", "onoffCaption": "door slamming at 1.008-2.549, 4.839-6.604, 7.937-9.328", "frequencyCaption": "door slamming three times"} +{"filepath": "data/multi_event_train/syn_319.wav", "onoffCaption": "door slamming at 0.803-1.781", "frequencyCaption": "door slamming one times"} +{"filepath": "data/multi_event_train/syn_368.wav", "onoffCaption": "burping belching at 0.832-3.593, 4.807-7.037, 7.772-10.0", "frequencyCaption": "burping belching three times"} +{"filepath": "data/multi_event_train/syn_427.wav", "onoffCaption": "cow mooing at 0.73-3.699", "frequencyCaption": "cow mooing one times"} +{"filepath": "data/multi_event_train/syn_456.wav", "onoffCaption": "sneeze at 0.319-1.864, 3.011-4.556 and train horn at 7.979-10.0", "frequencyCaption": "sneeze two times and train horn one times"} +{"filepath": "data/multi_event_train/syn_625.wav", "onoffCaption": "door slamming at 1.746-2.863, 3.942-5.466", "frequencyCaption": "door slamming two times"} +{"filepath": "data/multi_event_train/syn_654.wav", "onoffCaption": "tapping clicking clanking at 0.273-3.713, 4.322-7.189, 7.877-10.0", "frequencyCaption": "tapping clicking clanking three times"} +{"filepath": "data/multi_event_train/syn_849.wav", "onoffCaption": "cow mooing at 2.184-5.482, 7.665-10.0 and woman laughing at 2.751-5.034", "frequencyCaption": "cow mooing two times and woman laughing one times"} +{"filepath": "data/multi_event_train/syn_923.wav", "onoffCaption": "tapping clicking clanking at 3.813-7.253, 7.927-10.0", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_952.wav", "onoffCaption": "gunshot at 0.715-2.885 and whistling at 0.92-9.305", "frequencyCaption": "gunshot one times and whistling one times"} +{"filepath": "data/multi_event_train/syn_1098.wav", "onoffCaption": "cow mooing at 0.964-3.933, 4.576-7.273", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_1117.wav", "onoffCaption": "sheep goat bleating at 0.773-2.773, 3.983-5.983, 6.891-9.811", "frequencyCaption": "sheep goat bleating three times"} +{"filepath": "data/multi_event_train/syn_1166.wav", "onoffCaption": "cow mooing at 2.671-5.653, 7.472-10.0", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_1183.wav", "onoffCaption": "door slamming at 2.346-4.346, 5.646-7.646", "frequencyCaption": "door slamming two times"} +{"filepath": "data/multi_event_train/syn_1315.wav", "onoffCaption": "sneeze at 1.326-2.938 and door slamming at 6.273-7.251, 8.082-9.06", "frequencyCaption": "sneeze one times and door slamming two times"} +{"filepath": "data/multi_event_train/syn_1364.wav", "onoffCaption": "car horn honking at 1.483-5.883, 7.161-10.0", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_1381.wav", "onoffCaption": "whistling at 0.727-8.598 and sneeze at 2.712-4.0", "frequencyCaption": "whistling one times and sneeze one times"} +{"filepath": "data/multi_event_train/syn_1530.wav", "onoffCaption": "tapping clicking clanking at 2.798-6.238 and cow mooing at 2.997-6.007, 7.616-10.0", "frequencyCaption": "tapping clicking clanking one times and cow mooing two times"} +{"filepath": "data/multi_event_train/syn_1541.wav", "onoffCaption": "sheep goat bleating at 0.579-2.579 and cat meowing at 3.673-4.684", "frequencyCaption": "sheep goat bleating one times and cat meowing one times"} +{"filepath": "data/multi_event_train/syn_1580.wav", "onoffCaption": "woman laughing at 2.859-6.435, 7.53-10.0", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_1629.wav", "onoffCaption": "dog barking at 0.638-2.638", "frequencyCaption": "dog barking one times"} +{"filepath": "data/multi_event_train/syn_1658.wav", "onoffCaption": "spraying at 0.41-1.011, 3.235-3.836, 5.746-6.347 and thump thud at 6.685-9.147", "frequencyCaption": "spraying three times and thump thud one times"} +{"filepath": "data/multi_event_train/syn_1732.wav", "onoffCaption": "sheep goat bleating at 0.343-2.343, 3.836-5.836", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_1743.wav", "onoffCaption": "explosion at 2.23-5.102, 6.265-9.265", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_1834.wav", "onoffCaption": "explosion at 3.121-5.993, 6.705-9.577", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_1845.wav", "onoffCaption": "cow mooing at 1.29-4.259, 6.477-8.876", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_3104.wav", "onoffCaption": "burping belching at 0.345-4.345 and duck quacking at 1.432-3.432", "frequencyCaption": "burping belching one times and duck quacking one times"} +{"filepath": "data/multi_event_train/syn_3151.wav", "onoffCaption": "cat meowing at 0.059-1.144", "frequencyCaption": "cat meowing one times"} +{"filepath": "data/multi_event_train/syn_3190.wav", "onoffCaption": "sneeze at 0.959-5.488, 6.743-10.0", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_3289.wav", "onoffCaption": "cat meowing at 0.933-4.174 and door knocking at 1.443-3.818 and dog barking at 1.738-3.738", "frequencyCaption": "cat meowing one times and door knocking one times and dog barking one times"} +{"filepath": "data/multi_event_train/syn_3306.wav", "onoffCaption": "tapping clicking clanking at 0.901-4.341 and spraying at 2.023-3.718, 4.646-5.513 and gunshot at 7.421-9.421", "frequencyCaption": "tapping clicking clanking one times and spraying two times and gunshot one times"} +{"filepath": "data/multi_event_train/syn_3377.wav", "onoffCaption": "whistling at 1.018-3.027, 4.596-7.243 and sheep goat bleating at 1.491-3.491, 4.68-6.68", "frequencyCaption": "whistling two times and sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_3392.wav", "onoffCaption": "whistling at 2.377-5.352", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_3449.wav", "onoffCaption": "car horn honking at 2.799-7.311 and spraying at 7.997-8.624", "frequencyCaption": "car horn honking one times and spraying one times"} +{"filepath": "data/multi_event_train/syn_3507.wav", "onoffCaption": "door knocking at 1.768-6.143, 7.28-10.0", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_3523.wav", "onoffCaption": "cat meowing at 2.864-3.874, 4.793-6.329, 7.107-8.107", "frequencyCaption": "cat meowing three times"} +{"filepath": "data/multi_event_train/syn_3552.wav", "onoffCaption": "burping belching at 0.815-4.375, 5.148-8.069", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_3721.wav", "onoffCaption": "dog barking at 0.194-2.194, 4.11-6.11 and sneeze at 2.888-4.847, 6.691-8.65", "frequencyCaption": "dog barking two times and sneeze two times"} +{"filepath": "data/multi_event_train/syn_3750.wav", "onoffCaption": "duck quacking at 1.107-3.107, 3.755-5.755, 7.819-9.819", "frequencyCaption": "duck quacking three times"} +{"filepath": "data/multi_event_train/syn_3827.wav", "onoffCaption": "door slamming at 3.427-4.718, 6.79-8.181 and burping belching at 5.419-8.419", "frequencyCaption": "door slamming two times and burping belching one times"} +{"filepath": "data/multi_event_train/syn_3856.wav", "onoffCaption": "sneeze at 2.238-4.197 and woman laughing at 6.756-9.361", "frequencyCaption": "sneeze one times and woman laughing one times"} +{"filepath": "data/multi_event_train/syn_26.wav", "onoffCaption": "whistling at 2.265-9.298", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_154.wav", "onoffCaption": "cow mooing at 0.083-3.065, 4.203-6.641, 7.573-10.0", "frequencyCaption": "cow mooing three times"} +{"filepath": "data/multi_event_train/syn_226.wav", "onoffCaption": "cow mooing at 3.057-6.355", "frequencyCaption": "cow mooing one times"} +{"filepath": "data/multi_event_train/syn_327.wav", "onoffCaption": "duck quacking at 2.283-4.283, 6.514-8.514", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_403.wav", "onoffCaption": "sneeze at 3.343-4.577 and thump thud at 7.198-9.969", "frequencyCaption": "sneeze one times and thump thud one times"} +{"filepath": "data/multi_event_train/syn_419.wav", "onoffCaption": "explosion at 0.829-3.429 and spraying at 3.018-3.518, 4.092-4.592, 5.533-6.033", "frequencyCaption": "explosion one times and spraying three times"} +{"filepath": "data/multi_event_train/syn_502.wav", "onoffCaption": "whistling at 0.199-8.584 and sneeze at 3.066-5.312", "frequencyCaption": "whistling one times and sneeze one times"} +{"filepath": "data/multi_event_train/syn_518.wav", "onoffCaption": "duck quacking at 0.924-2.924, 4.185-6.185, 7.586-9.586", "frequencyCaption": "duck quacking three times"} +{"filepath": "data/multi_event_train/syn_670.wav", "onoffCaption": "tapping clicking clanking at 0.126-3.566, 4.832-8.272", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_695.wav", "onoffCaption": "thump thud at 0.106-4.024 and door slamming at 0.509-3.29", "frequencyCaption": "thump thud one times and door slamming one times"} +{"filepath": "data/multi_event_train/syn_771.wav", "onoffCaption": "whistling at 1.179-9.19", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_794.wav", "onoffCaption": "door slamming at 2.185-5.185", "frequencyCaption": "door slamming one times"} +{"filepath": "data/multi_event_train/syn_806.wav", "onoffCaption": "sheep goat bleating at 0.148-2.148, 3.428-5.428, 6.242-8.242 and dog barking at 1.14-3.14, 5.434-7.434 and spraying at 1.374-2.225", "frequencyCaption": "sheep goat bleating three times and dog barking two times and spraying one times"} +{"filepath": "data/multi_event_train/syn_907.wav", "onoffCaption": "door slamming at 0.681-1.486, 2.744-3.549, 4.715-5.52 and car horn honking at 2.057-6.457", "frequencyCaption": "door slamming three times and car horn honking one times"} +{"filepath": "data/multi_event_train/syn_1028.wav", "onoffCaption": "door knocking at 0.558-3.326, 4.653-7.472", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_1032.wav", "onoffCaption": "gunshot at 0.97-3.243, 5.737-8.01 and whistling at 1.394-5.878, 6.976-9.839 and dog barking at 6.431-8.431", "frequencyCaption": "gunshot two times and whistling two times and dog barking one times"} +{"filepath": "data/multi_event_train/syn_1129.wav", "onoffCaption": "duck quacking at 1.938-3.938, 4.687-6.687", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_1133.wav", "onoffCaption": "door knocking at 0.162-2.662 and gunshot at 6.396-8.396", "frequencyCaption": "door knocking one times and gunshot one times"} +{"filepath": "data/multi_event_train/syn_1241.wav", "onoffCaption": "door knocking at 0.891-5.593, 6.815-9.127", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_1340.wav", "onoffCaption": "tapping clicking clanking at 0.427-3.867 and train horn at 6.281-9.161", "frequencyCaption": "tapping clicking clanking one times and train horn one times"} +{"filepath": "data/multi_event_train/syn_1464.wav", "onoffCaption": "sneeze at 1.81-4.127, 5.404-7.323 and dog barking at 2.707-4.707, 6.039-8.039", "frequencyCaption": "sneeze two times and dog barking two times"} +{"filepath": "data/multi_event_train/syn_1481.wav", "onoffCaption": "gunshot at 2.87-4.87, 6.198-8.198", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_1565.wav", "onoffCaption": "explosion at 0.188-2.276, 3.785-5.873 and woman laughing at 1.189-4.577, 5.689-7.781", "frequencyCaption": "explosion two times and woman laughing two times"} +{"filepath": "data/multi_event_train/syn_1617.wav", "onoffCaption": "spraying at 4.123-6.707, 8.0-8.867", "frequencyCaption": "spraying two times"} +{"filepath": "data/multi_event_train/syn_1716.wav", "onoffCaption": "burping belching at 0.962-3.288, 5.002-7.328", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_1861.wav", "onoffCaption": "duck quacking at 0.358-2.358, 4.545-6.545 and sneeze at 5.937-7.214", "frequencyCaption": "duck quacking two times and sneeze one times"} +{"filepath": "data/multi_event_train/syn_1884.wav", "onoffCaption": "train horn at 0.134-2.934 and door knocking at 5.52-7.895", "frequencyCaption": "train horn one times and door knocking one times"} +{"filepath": "data/multi_event_train/syn_1960.wav", "onoffCaption": "whistling at 2.883-10.0", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_1985.wav", "onoffCaption": "door knocking at 1.766-3.926, 4.914-7.074", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_3050.wav", "onoffCaption": "thump thud at 2.535-6.985", "frequencyCaption": "thump thud one times"} +{"filepath": "data/multi_event_train/syn_3223.wav", "onoffCaption": "sneeze at 1.918-3.082", "frequencyCaption": "sneeze one times"} +{"filepath": "data/multi_event_train/syn_3239.wav", "onoffCaption": "car horn honking at 1.927-6.834, 7.521-10.0 and door knocking at 3.368-8.368", "frequencyCaption": "car horn honking two times and door knocking one times"} +{"filepath": "data/multi_event_train/syn_3276.wav", "onoffCaption": "thump thud at 0.898-3.398", "frequencyCaption": "thump thud one times"} +{"filepath": "data/multi_event_train/syn_3322.wav", "onoffCaption": "door slamming at 3.403-5.403, 7.253-9.253", "frequencyCaption": "door slamming two times"} +{"filepath": "data/multi_event_train/syn_3338.wav", "onoffCaption": "door knocking at 0.462-2.65 and car horn honking at 6.561-10.0", "frequencyCaption": "door knocking one times and car horn honking one times"} +{"filepath": "data/multi_event_train/syn_3406.wav", "onoffCaption": "sneeze at 0.145-2.064 and thump thud at 4.654-7.116", "frequencyCaption": "sneeze one times and thump thud one times"} +{"filepath": "data/multi_event_train/syn_3620.wav", "onoffCaption": "train horn at 3.384-6.584", "frequencyCaption": "train horn one times"} +{"filepath": "data/multi_event_train/syn_3675.wav", "onoffCaption": "dog barking at 3.128-5.566, 7.331-9.769", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_train/syn_3690.wav", "onoffCaption": "sneeze at 0.221-1.515, 2.707-4.001, 4.655-5.949 and woman laughing at 4.131-6.736 and door slamming at 4.556-7.273", "frequencyCaption": "sneeze three times and woman laughing one times and door slamming one times"} +{"filepath": "data/multi_event_train/syn_3774.wav", "onoffCaption": "woman laughing at 0.469-3.523, 4.471-7.24", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_3791.wav", "onoffCaption": "gunshot at 2.794-4.794", "frequencyCaption": "gunshot one times"} +{"filepath": "data/multi_event_train/syn_3803.wav", "onoffCaption": "car horn honking at 3.31-5.775", "frequencyCaption": "car horn honking one times"} +{"filepath": "data/multi_event_train/syn_3819.wav", "onoffCaption": "dog barking at 1.059-3.059, 4.623-7.061", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_train/syn_3902.wav", "onoffCaption": "sneeze at 2.138-3.384, 4.942-6.188, 8.009-9.255", "frequencyCaption": "sneeze three times"} +{"filepath": "data/multi_event_train/syn_3918.wav", "onoffCaption": "explosion at 0.429-3.301", "frequencyCaption": "explosion one times"} +{"filepath": "data/multi_event_train/syn_73.wav", "onoffCaption": "cat meowing at 1.611-6.611", "frequencyCaption": "cat meowing one times"} +{"filepath": "data/multi_event_train/syn_96.wav", "onoffCaption": "door knocking at 0.339-4.107, 5.594-8.094", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_101.wav", "onoffCaption": "spraying at 3.192-5.628, 6.912-9.348", "frequencyCaption": "spraying two times"} +{"filepath": "data/multi_event_train/syn_140.wav", "onoffCaption": "burping belching at 0.155-2.385 and spraying at 1.861-3.594 and cat meowing at 3.753-5.941", "frequencyCaption": "burping belching one times and spraying one times and cat meowing one times"} +{"filepath": "data/multi_event_train/syn_269.wav", "onoffCaption": "car horn honking at 0.696-5.208, 7.272-9.272", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_372.wav", "onoffCaption": "spraying at 3.375-3.883, 6.226-6.734", "frequencyCaption": "spraying two times"} +{"filepath": "data/multi_event_train/syn_397.wav", "onoffCaption": "sneeze at 3.122-4.579 and gunshot at 6.347-8.347", "frequencyCaption": "sneeze one times and gunshot one times"} +{"filepath": "data/multi_event_train/syn_516.wav", "onoffCaption": "explosion at 2.108-4.636, 5.554-8.082 and cat meowing at 2.607-3.651, 5.572-6.581", "frequencyCaption": "explosion two times and cat meowing two times"} +{"filepath": "data/multi_event_train/syn_557.wav", "onoffCaption": "cow mooing at 0.457-3.467", "frequencyCaption": "cow mooing one times"} +{"filepath": "data/multi_event_train/syn_724.wav", "onoffCaption": "sneeze at 2.444-5.554, 6.077-7.371", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_812.wav", "onoffCaption": "door slamming at 2.125-3.144, 5.228-6.128", "frequencyCaption": "door slamming two times"} +{"filepath": "data/multi_event_train/syn_853.wav", "onoffCaption": "tapping clicking clanking at 0.486-3.926, 5.27-8.038 and spraying at 1.615-2.615, 3.253-4.51, 5.128-7.256", "frequencyCaption": "tapping clicking clanking two times and spraying three times"} +{"filepath": "data/multi_event_train/syn_909.wav", "onoffCaption": "tapping clicking clanking at 0.702-4.142, 5.289-7.949", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_948.wav", "onoffCaption": "explosion at 3.11-6.95", "frequencyCaption": "explosion one times"} +{"filepath": "data/multi_event_train/syn_1067.wav", "onoffCaption": "thump thud at 0.112-3.159 and burping belching at 0.191-2.556, 4.378-6.804", "frequencyCaption": "thump thud one times and burping belching two times"} +{"filepath": "data/multi_event_train/syn_1082.wav", "onoffCaption": "sneeze at 2.318-4.263, 5.955-7.9", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_1199.wav", "onoffCaption": "dog barking at 1.241-6.878", "frequencyCaption": "dog barking one times"} +{"filepath": "data/multi_event_train/syn_1214.wav", "onoffCaption": "sheep goat bleating at 2.417-4.417, 5.379-7.613 and cow mooing at 5.246-8.228", "frequencyCaption": "sheep goat bleating two times and cow mooing one times"} +{"filepath": "data/multi_event_train/syn_1431.wav", "onoffCaption": "thump thud at 0.131-3.178, 5.554-8.167", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_1470.wav", "onoffCaption": "spraying at 0.07-1.803, 2.542-3.623, 4.2-4.722", "frequencyCaption": "spraying three times"} +{"filepath": "data/multi_event_train/syn_1495.wav", "onoffCaption": "thump thud at 0.204-3.871 and door knocking at 5.99-10.0", "frequencyCaption": "thump thud one times and door knocking one times"} +{"filepath": "data/multi_event_train/syn_1642.wav", "onoffCaption": "woman laughing at 1.271-8.716", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/multi_event_train/syn_1759.wav", "onoffCaption": "cat meowing at 3.734-5.044, 5.722-7.032, 7.879-9.189", "frequencyCaption": "cat meowing three times"} +{"filepath": "data/multi_event_train/syn_1935.wav", "onoffCaption": "door slamming at 0.387-1.068, 3.499-5.499, 6.392-8.392", "frequencyCaption": "door slamming three times"} +{"filepath": "data/multi_event_train/syn_1974.wav", "onoffCaption": "train horn at 0.923-3.363, 4.373-6.429 and sneeze at 4.66-6.16", "frequencyCaption": "train horn two times and sneeze one times"} +{"filepath": "data/multi_event_train/syn_1991.wav", "onoffCaption": "sneeze at 4.039-5.142, 5.76-7.288 and whistling at 5.589-7.598", "frequencyCaption": "sneeze two times and whistling one times"} +{"filepath": "data/multi_event_train/syn_3005.wav", "onoffCaption": "door slamming at 2.507-3.831, 4.569-7.052, 8.333-9.233", "frequencyCaption": "door slamming three times"} +{"filepath": "data/multi_event_train/syn_3044.wav", "onoffCaption": "door knocking at 0.268-4.778, 5.999-10.0", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_3293.wav", "onoffCaption": "sneeze at 0.238-3.886 and train horn at 0.981-4.221, 6.222-8.722 and whistling at 1.282-4.257, 4.777-7.669", "frequencyCaption": "sneeze one times and train horn two times and whistling two times"} +{"filepath": "data/multi_event_train/syn_3388.wav", "onoffCaption": "cow mooing at 2.335-5.345, 7.12-10.0", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_3412.wav", "onoffCaption": "spraying at 0.422-2.55, 3.472-5.6", "frequencyCaption": "spraying two times"} +{"filepath": "data/multi_event_train/syn_3453.wav", "onoffCaption": "dog barking at 0.309-2.309, 2.831-4.831, 5.437-7.437 and tapping clicking clanking at 2.146-5.586, 6.531-9.971", "frequencyCaption": "dog barking three times and tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_3509.wav", "onoffCaption": "tapping clicking clanking at 0.602-4.042, 4.582-6.631 and burping belching at 6.007-8.237 and car horn honking at 6.487-8.952", "frequencyCaption": "tapping clicking clanking two times and burping belching one times and car horn honking one times"} +{"filepath": "data/multi_event_train/syn_3548.wav", "onoffCaption": "burping belching at 1.491-4.75, 5.696-8.955 and gunshot at 1.925-4.018 and duck quacking at 3.075-5.075", "frequencyCaption": "burping belching two times and gunshot one times and duck quacking one times"} +{"filepath": "data/multi_event_train/syn_3916.wav", "onoffCaption": "burping belching at 0.422-2.453, 4.46-7.46 and explosion at 4.672-7.544", "frequencyCaption": "burping belching two times and explosion one times"} +{"filepath": "data/multi_event_train/syn_3957.wav", "onoffCaption": "tapping clicking clanking at 0.344-3.784, 5.183-8.623 and dog barking at 3.175-5.175", "frequencyCaption": "tapping clicking clanking two times and dog barking one times"} +{"filepath": "data/multi_event_train/syn_32.wav", "onoffCaption": "whistling at 1.978-10.0", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_67.wav", "onoffCaption": "sneeze at 1.634-3.947", "frequencyCaption": "sneeze one times"} +{"filepath": "data/multi_event_train/syn_82.wav", "onoffCaption": "sheep goat bleating at 2.09-4.09, 5.408-7.408 and door knocking at 3.653-6.055", "frequencyCaption": "sheep goat bleating two times and door knocking one times"} +{"filepath": "data/multi_event_train/syn_115.wav", "onoffCaption": "thump thud at 1.139-3.639, 4.976-7.438, 7.978-10.0", "frequencyCaption": "thump thud three times"} +{"filepath": "data/multi_event_train/syn_228.wav", "onoffCaption": "burping belching at 1.339-3.37, 4.917-7.12 and dog barking at 5.396-7.396, 7.946-9.946", "frequencyCaption": "burping belching two times and dog barking two times"} +{"filepath": "data/multi_event_train/syn_267.wav", "onoffCaption": "explosion at 0.424-2.517 and door knocking at 6.923-9.111", "frequencyCaption": "explosion one times and door knocking one times"} +{"filepath": "data/multi_event_train/syn_282.wav", "onoffCaption": "train horn at 2.266-4.746", "frequencyCaption": "train horn one times"} +{"filepath": "data/multi_event_train/syn_298.wav", "onoffCaption": "door slamming at 0.022-0.703, 1.758-3.011", "frequencyCaption": "door slamming two times"} +{"filepath": "data/multi_event_train/syn_333.wav", "onoffCaption": "dog barking at 0.293-2.293 and door slamming at 4.141-4.946, 5.517-7.88, 8.508-9.189", "frequencyCaption": "dog barking one times and door slamming three times"} +{"filepath": "data/multi_event_train/syn_366.wav", "onoffCaption": "spraying at 2.509-3.017 and cat meowing at 6.585-8.169", "frequencyCaption": "spraying one times and cat meowing one times"} +{"filepath": "data/multi_event_train/syn_383.wav", "onoffCaption": "door knocking at 0.352-3.727, 4.364-7.739", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_399.wav", "onoffCaption": "cow mooing at 2.658-5.627, 7.837-10.0", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_458.wav", "onoffCaption": "whistling at 1.057-9.068 and explosion at 4.286-7.158", "frequencyCaption": "whistling one times and explosion one times"} +{"filepath": "data/multi_event_train/syn_543.wav", "onoffCaption": "cow mooing at 1.056-4.038, 5.843-8.812", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_631.wav", "onoffCaption": "gunshot at 2.915-4.915, 6.448-8.448", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_730.wav", "onoffCaption": "cat meowing at 3.413-4.562, 6.613-7.762", "frequencyCaption": "cat meowing two times"} +{"filepath": "data/multi_event_train/syn_765.wav", "onoffCaption": "car horn honking at 0.945-4.786, 5.97-9.811", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_780.wav", "onoffCaption": "whistling at 0.72-8.09", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_847.wav", "onoffCaption": "burping belching at 1.931-9.099", "frequencyCaption": "burping belching one times"} +{"filepath": "data/multi_event_train/syn_1026.wav", "onoffCaption": "tapping clicking clanking at 2.614-6.054 and gunshot at 2.933-4.933, 6.04-8.04 and door slamming at 3.285-4.402", "frequencyCaption": "tapping clicking clanking one times and gunshot two times and door slamming one times"} +{"filepath": "data/multi_event_train/syn_1073.wav", "onoffCaption": "train horn at 2.629-5.949, 6.477-9.797", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_1096.wav", "onoffCaption": "door slamming at 2.215-4.215 and whistling at 7.117-10.0", "frequencyCaption": "door slamming one times and whistling one times"} +{"filepath": "data/multi_event_train/syn_1168.wav", "onoffCaption": "duck quacking at 0.384-2.384", "frequencyCaption": "duck quacking one times"} +{"filepath": "data/multi_event_train/syn_1200.wav", "onoffCaption": "tapping clicking clanking at 0.097-3.537, 4.07-6.583 and train horn at 3.145-6.679", "frequencyCaption": "tapping clicking clanking two times and train horn one times"} +{"filepath": "data/multi_event_train/syn_1255.wav", "onoffCaption": "woman laughing at 0.763-4.151 and cow mooing at 5.988-8.97", "frequencyCaption": "woman laughing one times and cow mooing one times"} +{"filepath": "data/multi_event_train/syn_1425.wav", "onoffCaption": "gunshot at 0.046-2.046, 2.925-4.925, 5.487-7.487", "frequencyCaption": "gunshot three times"} +{"filepath": "data/multi_event_train/syn_1603.wav", "onoffCaption": "sneeze at 0.369-1.533", "frequencyCaption": "sneeze one times"} +{"filepath": "data/multi_event_train/syn_1656.wav", "onoffCaption": "dog barking at 0.624-2.624", "frequencyCaption": "dog barking one times"} +{"filepath": "data/multi_event_train/syn_1718.wav", "onoffCaption": "woman laughing at 1.553-3.651", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/multi_event_train/syn_1921.wav", "onoffCaption": "duck quacking at 0.196-2.196 and tapping clicking clanking at 4.003-7.443", "frequencyCaption": "duck quacking one times and tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_3011.wav", "onoffCaption": "door knocking at 0.914-3.484 and sheep goat bleating at 2.889-4.889, 6.023-8.023", "frequencyCaption": "door knocking one times and sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_3237.wav", "onoffCaption": "door knocking at 2.281-4.53 and cow mooing at 7.956-10.0", "frequencyCaption": "door knocking one times and cow mooing one times"} +{"filepath": "data/multi_event_train/syn_3262.wav", "onoffCaption": "cat meowing at 1.237-4.582, 5.9-7.26 and spraying at 2.83-3.562, 4.76-5.492, 7.812-8.544 and burping belching at 4.274-7.476", "frequencyCaption": "cat meowing two times and spraying three times and burping belching one times"} +{"filepath": "data/multi_event_train/syn_3278.wav", "onoffCaption": "duck quacking at 3.644-5.644, 7.565-9.565", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_3287.wav", "onoffCaption": "sheep goat bleating at 0.66-2.66, 3.969-5.969", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_3363.wav", "onoffCaption": "dog barking at 2.469-4.469, 4.984-6.984 and sneeze at 5.02-6.52", "frequencyCaption": "dog barking two times and sneeze one times"} +{"filepath": "data/multi_event_train/syn_3379.wav", "onoffCaption": "cow mooing at 2.657-5.667, 6.343-9.006", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_3447.wav", "onoffCaption": "car horn honking at 0.089-3.308 and cow mooing at 0.324-4.753, 6.437-9.419", "frequencyCaption": "car horn honking one times and cow mooing two times"} +{"filepath": "data/multi_event_train/syn_3634.wav", "onoffCaption": "cow mooing at 0.997-5.426, 6.53-10.0 and duck quacking at 2.647-4.647, 5.454-7.454", "frequencyCaption": "cow mooing two times and duck quacking two times"} +{"filepath": "data/multi_event_train/syn_3661.wav", "onoffCaption": "dog barking at 1.926-4.326, 6.456-8.456", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_train/syn_3684.wav", "onoffCaption": "duck quacking at 2.617-4.617, 5.935-7.935", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_3735.wav", "onoffCaption": "door knocking at 1.259-5.634 and tapping clicking clanking at 3.553-6.993", "frequencyCaption": "door knocking one times and tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_3858.wav", "onoffCaption": "door knocking at 2.954-5.203, 5.759-8.008", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_3943.wav", "onoffCaption": "spraying at 0.374-2.81, 4.757-5.507", "frequencyCaption": "spraying two times"} +{"filepath": "data/multi_event_train/syn_28.wav", "onoffCaption": "thump thud at 1.425-4.472, 5.233-8.28", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_98.wav", "onoffCaption": "sneeze at 3.369-5.482 and sheep goat bleating at 7.725-9.725", "frequencyCaption": "sneeze one times and sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_232.wav", "onoffCaption": "train horn at 2.722-9.191", "frequencyCaption": "train horn one times"} +{"filepath": "data/multi_event_train/syn_329.wav", "onoffCaption": "woman laughing at 0.281-2.386, 3.414-5.66 and gunshot at 0.769-2.769", "frequencyCaption": "woman laughing two times and gunshot one times"} +{"filepath": "data/multi_event_train/syn_417.wav", "onoffCaption": "tapping clicking clanking at 0.114-3.554 and door slamming at 5.506-7.03, 8.007-9.007", "frequencyCaption": "tapping clicking clanking one times and door slamming two times"} +{"filepath": "data/multi_event_train/syn_442.wav", "onoffCaption": "car horn honking at 0.262-3.175 and spraying at 2.001-3.763, 4.393-4.915", "frequencyCaption": "car horn honking one times and spraying two times"} +{"filepath": "data/multi_event_train/syn_559.wav", "onoffCaption": "door slamming at 0.382-2.84, 4.65-6.129, 7.21-9.436 and spraying at 1.394-2.144, 3.499-4.249, 5.971-6.721", "frequencyCaption": "door slamming three times and spraying three times"} +{"filepath": "data/multi_event_train/syn_664.wav", "onoffCaption": "car horn honking at 2.537-7.049", "frequencyCaption": "car horn honking one times"} +{"filepath": "data/multi_event_train/syn_681.wav", "onoffCaption": "duck quacking at 3.168-5.168, 6.271-8.271", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_808.wav", "onoffCaption": "explosion at 3.702-7.542", "frequencyCaption": "explosion one times"} +{"filepath": "data/multi_event_train/syn_913.wav", "onoffCaption": "explosion at 1.558-4.43, 5.523-8.276", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_946.wav", "onoffCaption": "duck quacking at 0.315-2.315, 4.783-6.783 and dog barking at 4.076-6.076", "frequencyCaption": "duck quacking two times and dog barking one times"} +{"filepath": "data/multi_event_train/syn_1069.wav", "onoffCaption": "train horn at 1.208-4.448, 5.077-7.082", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_1127.wav", "onoffCaption": "car horn honking at 0.006-2.792 and spraying at 6.323-6.845, 7.424-7.946", "frequencyCaption": "car horn honking one times and spraying two times"} +{"filepath": "data/multi_event_train/syn_1172.wav", "onoffCaption": "gunshot at 0.688-2.688, 4.069-6.069", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_1197.wav", "onoffCaption": "train horn at 0.206-3.006, 4.395-7.195", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_1301.wav", "onoffCaption": "cow mooing at 1.996-6.425", "frequencyCaption": "cow mooing one times"} +{"filepath": "data/multi_event_train/syn_1354.wav", "onoffCaption": "cow mooing at 1.681-4.663", "frequencyCaption": "cow mooing one times"} +{"filepath": "data/multi_event_train/syn_1524.wav", "onoffCaption": "dog barking at 0.558-2.558, 3.395-5.395, 6.074-8.074 and sheep goat bleating at 1.962-3.962", "frequencyCaption": "dog barking three times and sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_1571.wav", "onoffCaption": "thump thud at 0.702-2.93, 4.42-7.262 and explosion at 1.683-4.555 and burping belching at 2.168-5.427", "frequencyCaption": "thump thud two times and explosion one times and burping belching one times"} +{"filepath": "data/multi_event_train/syn_1594.wav", "onoffCaption": "thump thud at 0.537-4.987, 6.972-9.172", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_1619.wav", "onoffCaption": "dog barking at 2.648-5.086, 5.864-8.302 and woman laughing at 6.145-8.227", "frequencyCaption": "dog barking two times and woman laughing one times"} +{"filepath": "data/multi_event_train/syn_1702.wav", "onoffCaption": "cat meowing at 2.721-6.066, 6.608-7.693, 8.471-9.618", "frequencyCaption": "cat meowing three times"} +{"filepath": "data/multi_event_train/syn_1757.wav", "onoffCaption": "gunshot at 1.51-3.51, 5.146-7.386 and thump thud at 2.06-4.56", "frequencyCaption": "gunshot two times and thump thud one times"} +{"filepath": "data/multi_event_train/syn_1796.wav", "onoffCaption": "tapping clicking clanking at 0.181-3.621 and duck quacking at 2.364-4.364 and train horn at 7.722-10.0", "frequencyCaption": "tapping clicking clanking one times and duck quacking one times and train horn one times"} +{"filepath": "data/multi_event_train/syn_1820.wav", "onoffCaption": "cat meowing at 0.661-4.006, 4.781-6.047, 6.6-7.685 and woman laughing at 6.484-9.569", "frequencyCaption": "cat meowing three times and woman laughing one times"} +{"filepath": "data/multi_event_train/syn_1875.wav", "onoffCaption": "dog barking at 2.881-4.881 and cow mooing at 7.22-10.0", "frequencyCaption": "dog barking one times and cow mooing one times"} +{"filepath": "data/multi_event_train/syn_1890.wav", "onoffCaption": "spraying at 0.707-1.648, 3.552-4.493, 6.603-7.544", "frequencyCaption": "spraying three times"} +{"filepath": "data/multi_event_train/syn_3110.wav", "onoffCaption": "cat meowing at 0.442-2.436 and duck quacking at 1.45-3.45, 3.969-5.969, 6.517-8.517", "frequencyCaption": "cat meowing one times and duck quacking three times"} +{"filepath": "data/multi_event_train/syn_3145.wav", "onoffCaption": "train horn at 0.327-6.796 and door knocking at 6.15-8.525", "frequencyCaption": "train horn one times and door knocking one times"} +{"filepath": "data/multi_event_train/syn_3336.wav", "onoffCaption": "thump thud at 2.128-4.628, 5.384-8.155", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_3347.wav", "onoffCaption": "explosion at 3.829-7.669", "frequencyCaption": "explosion one times"} +{"filepath": "data/multi_event_train/syn_3386.wav", "onoffCaption": "door knocking at 2.413-6.029, 6.842-9.217", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_3408.wav", "onoffCaption": "explosion at 3.363-8.363", "frequencyCaption": "explosion one times"} +{"filepath": "data/multi_event_train/syn_3513.wav", "onoffCaption": "woman laughing at 1.221-5.273, 6.213-10.0", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_3546.wav", "onoffCaption": "gunshot at 2.824-4.824", "frequencyCaption": "gunshot one times"} +{"filepath": "data/multi_event_train/syn_3711.wav", "onoffCaption": "spraying at 0.063-0.585, 1.438-1.96, 3.397-3.919 and cow mooing at 1.311-4.293, 6.776-9.397", "frequencyCaption": "spraying three times and cow mooing two times"} +{"filepath": "data/multi_event_train/syn_3760.wav", "onoffCaption": "duck quacking at 0.042-2.042, 3.248-5.248, 6.411-8.411", "frequencyCaption": "duck quacking three times"} +{"filepath": "data/multi_event_train/syn_3785.wav", "onoffCaption": "tapping clicking clanking at 0.561-4.001, 6.477-9.917", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_3817.wav", "onoffCaption": "spraying at 0.006-0.61", "frequencyCaption": "spraying one times"} +{"filepath": "data/multi_event_train/syn_3842.wav", "onoffCaption": "sneeze at 0.021-1.521, 2.134-3.634", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_3959.wav", "onoffCaption": "burping belching at 1.098-4.098, 5.432-7.873", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_59.wav", "onoffCaption": "train horn at 1.154-5.484 and cat meowing at 8.009-9.021", "frequencyCaption": "train horn one times and cat meowing one times"} +{"filepath": "data/multi_event_train/syn_164.wav", "onoffCaption": "burping belching at 0.151-4.151, 6.609-10.0", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_181.wav", "onoffCaption": "sneeze at 1.324-4.972, 6.303-9.951", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_216.wav", "onoffCaption": "thump thud at 0.386-2.886, 4.342-6.391 and door slamming at 0.64-1.779", "frequencyCaption": "thump thud two times and door slamming one times"} +{"filepath": "data/multi_event_train/syn_243.wav", "onoffCaption": "gunshot at 0.792-2.792, 4.535-6.535, 7.287-9.287", "frequencyCaption": "gunshot three times"} +{"filepath": "data/multi_event_train/syn_358.wav", "onoffCaption": "door knocking at 2.31-6.843", "frequencyCaption": "door knocking one times"} +{"filepath": "data/multi_event_train/syn_429.wav", "onoffCaption": "dog barking at 0.504-2.504, 3.032-5.032 and sheep goat bleating at 7.803-9.803", "frequencyCaption": "dog barking two times and sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_433.wav", "onoffCaption": "duck quacking at 0.816-2.816 and door knocking at 5.455-9.205", "frequencyCaption": "duck quacking one times and door knocking one times"} +{"filepath": "data/multi_event_train/syn_466.wav", "onoffCaption": "duck quacking at 0.463-2.463, 3.794-5.794, 7.209-9.209", "frequencyCaption": "duck quacking three times"} +{"filepath": "data/multi_event_train/syn_483.wav", "onoffCaption": "whistling at 3.824-6.799, 7.476-10.0", "frequencyCaption": "whistling two times"} +{"filepath": "data/multi_event_train/syn_528.wav", "onoffCaption": "tapping clicking clanking at 2.682-6.122, 7.378-9.544", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_598.wav", "onoffCaption": "whistling at 0.363-2.592 and woman laughing at 6.028-8.311", "frequencyCaption": "whistling one times and woman laughing one times"} +{"filepath": "data/multi_event_train/syn_615.wav", "onoffCaption": "whistling at 0.199-2.428", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_640.wav", "onoffCaption": "thump thud at 0.872-4.79, 5.882-9.8", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_836.wav", "onoffCaption": "whistling at 1.112-6.226, 7.367-10.0", "frequencyCaption": "whistling two times"} +{"filepath": "data/multi_event_train/syn_879.wav", "onoffCaption": "door knocking at 2.646-5.702, 7.465-10.0", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_937.wav", "onoffCaption": "dog barking at 4.0-6.0, 7.139-9.139", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_train/syn_962.wav", "onoffCaption": "cat meowing at 0.143-5.143, 6.242-10.0 and car horn honking at 1.548-3.895", "frequencyCaption": "cat meowing two times and car horn honking one times"} +{"filepath": "data/multi_event_train/syn_987.wav", "onoffCaption": "car horn honking at 0.823-4.477, 6.669-10.0", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_1018.wav", "onoffCaption": "car horn honking at 0.255-4.767, 5.748-8.386", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_1103.wav", "onoffCaption": "car horn honking at 3.214-7.536", "frequencyCaption": "car horn honking one times"} +{"filepath": "data/multi_event_train/syn_1156.wav", "onoffCaption": "whistling at 3.274-10.0", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_1325.wav", "onoffCaption": "sneeze at 2.663-5.124, 6.447-8.908 and gunshot at 3.303-5.303, 6.37-8.37", "frequencyCaption": "sneeze two times and gunshot two times"} +{"filepath": "data/multi_event_train/syn_1370.wav", "onoffCaption": "cow mooing at 0.829-5.258, 6.489-9.114", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_1395.wav", "onoffCaption": "duck quacking at 2.874-4.874, 7.229-9.229", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_1500.wav", "onoffCaption": "thump thud at 1.523-4.294 and burping belching at 3.024-6.283", "frequencyCaption": "thump thud one times and burping belching one times"} +{"filepath": "data/multi_event_train/syn_1555.wav", "onoffCaption": "whistling at 0.923-3.798, 5.507-7.736 and gunshot at 2.133-4.133", "frequencyCaption": "whistling two times and gunshot one times"} +{"filepath": "data/multi_event_train/syn_1668.wav", "onoffCaption": "burping belching at 0.939-4.141 and car horn honking at 2.97-5.788, 7.057-9.875", "frequencyCaption": "burping belching one times and car horn honking two times"} +{"filepath": "data/multi_event_train/syn_1726.wav", "onoffCaption": "explosion at 2.333-7.333", "frequencyCaption": "explosion one times"} +{"filepath": "data/multi_event_train/syn_1773.wav", "onoffCaption": "woman laughing at 2.701-10.0", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/multi_event_train/syn_1804.wav", "onoffCaption": "explosion at 0.379-4.379, 6.146-8.899 and cat meowing at 0.504-1.516 and whistling at 3.877-9.052", "frequencyCaption": "explosion two times and cat meowing one times and whistling one times"} +{"filepath": "data/multi_event_train/syn_1851.wav", "onoffCaption": "car horn honking at 3.222-6.008, 7.231-9.239", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_1950.wav", "onoffCaption": "whistling at 3.046-8.221 and sheep goat bleating at 7.003-9.003", "frequencyCaption": "whistling one times and sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_3060.wav", "onoffCaption": "door knocking at 1.078-5.911, 7.79-10.0", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_3134.wav", "onoffCaption": "thump thud at 2.316-4.655, 6.441-8.89", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_3161.wav", "onoffCaption": "thump thud at 1.467-4.514, 5.451-8.498", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_3184.wav", "onoffCaption": "thump thud at 0.155-2.617, 4.858-7.467 and dog barking at 6.665-8.665", "frequencyCaption": "thump thud two times and dog barking one times"} +{"filepath": "data/multi_event_train/syn_3209.wav", "onoffCaption": "car horn honking at 0.261-4.102, 5.871-9.712", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_3312.wav", "onoffCaption": "door knocking at 0.796-3.108 and thump thud at 6.247-10.0", "frequencyCaption": "door knocking one times and thump thud one times"} +{"filepath": "data/multi_event_train/syn_3436.wav", "onoffCaption": "door knocking at 2.451-6.219, 6.734-10.0", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_3479.wav", "onoffCaption": "car horn honking at 0.4-2.865", "frequencyCaption": "car horn honking one times"} +{"filepath": "data/multi_event_train/syn_3537.wav", "onoffCaption": "duck quacking at 0.27-2.27 and car horn honking at 6.138-10.0", "frequencyCaption": "duck quacking one times and car horn honking one times"} +{"filepath": "data/multi_event_train/syn_3562.wav", "onoffCaption": "sneeze at 2.315-3.592, 5.084-6.372, 7.543-10.0", "frequencyCaption": "sneeze three times"} +{"filepath": "data/multi_event_train/syn_3587.wav", "onoffCaption": "car horn honking at 1.174-3.687", "frequencyCaption": "car horn honking one times"} +{"filepath": "data/multi_event_train/syn_3744.wav", "onoffCaption": "tapping clicking clanking at 1.635-5.075, 5.921-9.361", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_3829.wav", "onoffCaption": "duck quacking at 0.57-2.57, 3.69-5.69", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_3833.wav", "onoffCaption": "duck quacking at 2.933-4.933, 6.082-8.082", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_3866.wav", "onoffCaption": "cow mooing at 0.249-3.259 and sheep goat bleating at 2.129-4.129, 4.999-6.999", "frequencyCaption": "cow mooing one times and sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_3883.wav", "onoffCaption": "whistling at 0.366-8.377", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_3928.wav", "onoffCaption": "gunshot at 2.58-4.58, 6.96-8.96 and door knocking at 3.521-5.898", "frequencyCaption": "gunshot two times and door knocking one times"} +{"filepath": "data/multi_event_train/syn_3998.wav", "onoffCaption": "car horn honking at 1.23-4.817, 7.133-10.0", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_16.wav", "onoffCaption": "woman laughing at 1.302-3.894, 4.898-7.49", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_43.wav", "onoffCaption": "sheep goat bleating at 1.381-3.381, 4.024-6.942, 7.88-9.88", "frequencyCaption": "sheep goat bleating three times"} +{"filepath": "data/multi_event_train/syn_131.wav", "onoffCaption": "train horn at 2.615-5.975", "frequencyCaption": "train horn one times"} +{"filepath": "data/multi_event_train/syn_259.wav", "onoffCaption": "dog barking at 1.087-3.087, 5.583-7.583", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_train/syn_317.wav", "onoffCaption": "door slamming at 0.245-2.01, 3.859-5.624 and explosion at 1.731-6.731", "frequencyCaption": "door slamming two times and explosion one times"} +{"filepath": "data/multi_event_train/syn_342.wav", "onoffCaption": "woman laughing at 0.341-2.907, 4.104-6.67, 7.981-10.0 and train horn at 3.333-5.973", "frequencyCaption": "woman laughing three times and train horn one times"} +{"filepath": "data/multi_event_train/syn_499.wav", "onoffCaption": "woman laughing at 0.549-3.634, 4.963-7.2", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_532.wav", "onoffCaption": "woman laughing at 2.264-4.856, 5.681-7.7", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_567.wav", "onoffCaption": "whistling at 0.302-8.052", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_582.wav", "onoffCaption": "sheep goat bleating at 0.147-2.147, 4.118-6.118", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_714.wav", "onoffCaption": "door slamming at 1.142-3.142, 5.231-7.231", "frequencyCaption": "door slamming two times"} +{"filepath": "data/multi_event_train/syn_741.wav", "onoffCaption": "explosion at 2.483-5.483, 5.99-8.99", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_863.wav", "onoffCaption": "spraying at 2.737-4.432, 5.492-7.187, 7.813-9.508", "frequencyCaption": "spraying three times"} +{"filepath": "data/multi_event_train/syn_886.wav", "onoffCaption": "dog barking at 0.055-2.055, 3.364-5.364, 6.632-8.632 and sneeze at 2.661-4.978", "frequencyCaption": "dog barking three times and sneeze one times"} +{"filepath": "data/multi_event_train/syn_978.wav", "onoffCaption": "duck quacking at 3.758-5.758, 6.667-8.667", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_1002.wav", "onoffCaption": "cat meowing at 2.9-5.804, 8.273-9.583", "frequencyCaption": "cat meowing two times"} +{"filepath": "data/multi_event_train/syn_1057.wav", "onoffCaption": "train horn at 0.616-3.056 and woman laughing at 2.221-5.502", "frequencyCaption": "train horn one times and woman laughing one times"} +{"filepath": "data/multi_event_train/syn_1119.wav", "onoffCaption": "gunshot at 0.328-2.328, 3.264-5.264, 6.146-8.146", "frequencyCaption": "gunshot three times"} +{"filepath": "data/multi_event_train/syn_1224.wav", "onoffCaption": "sheep goat bleating at 1.04-3.04", "frequencyCaption": "sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_1271.wav", "onoffCaption": "woman laughing at 3.563-6.658, 7.737-10.0", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_1293.wav", "onoffCaption": "burping belching at 1.074-4.276, 5.175-7.481", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_1294.wav", "onoffCaption": "dog barking at 0.663-2.663, 3.258-5.258 and explosion at 0.759-5.759 and sheep goat bleating at 1.279-3.279, 5.169-7.169", "frequencyCaption": "dog barking two times and explosion one times and sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_1388.wav", "onoffCaption": "whistling at 0.114-8.125 and gunshot at 1.695-3.695, 4.819-6.819 and explosion at 4.022-7.022", "frequencyCaption": "whistling one times and gunshot two times and explosion one times"} +{"filepath": "data/multi_event_train/syn_1401.wav", "onoffCaption": "thump thud at 0.379-2.841, 3.369-5.708, 6.811-9.748", "frequencyCaption": "thump thud three times"} +{"filepath": "data/multi_event_train/syn_1454.wav", "onoffCaption": "door knocking at 2.104-4.941 and explosion at 5.967-10.0", "frequencyCaption": "door knocking one times and explosion one times"} +{"filepath": "data/multi_event_train/syn_1627.wav", "onoffCaption": "thump thud at 0.558-3.058, 4.154-6.977", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_1672.wav", "onoffCaption": "cat meowing at 2.314-7.314 and spraying at 5.588-8.048", "frequencyCaption": "cat meowing one times and spraying one times"} +{"filepath": "data/multi_event_train/syn_1697.wav", "onoffCaption": "woman laughing at 0.865-3.634, 4.773-7.73", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_1769.wav", "onoffCaption": "tapping clicking clanking at 0.115-3.555 and duck quacking at 5.928-7.928 and dog barking at 6.357-8.795", "frequencyCaption": "tapping clicking clanking one times and duck quacking one times and dog barking one times"} +{"filepath": "data/multi_event_train/syn_1905.wav", "onoffCaption": "thump thud at 2.283-4.783, 5.933-8.433 and sneeze at 7.176-9.095", "frequencyCaption": "thump thud two times and sneeze one times"} +{"filepath": "data/multi_event_train/syn_3035.wav", "onoffCaption": "whistling at 1.36-6.535", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_3085.wav", "onoffCaption": "tapping clicking clanking at 2.948-6.388, 7.912-10.0", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_3213.wav", "onoffCaption": "door slamming at 0.257-1.736 and whistling at 0.27-2.279, 3.595-6.47", "frequencyCaption": "door slamming one times and whistling two times"} +{"filepath": "data/multi_event_train/syn_3214.wav", "onoffCaption": "thump thud at 2.382-4.721, 5.845-8.607", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_3246.wav", "onoffCaption": "tapping clicking clanking at 0.331-3.771 and cow mooing at 6.725-10.0", "frequencyCaption": "tapping clicking clanking one times and cow mooing one times"} +{"filepath": "data/multi_event_train/syn_3308.wav", "onoffCaption": "woman laughing at 0.073-5.112 and duck quacking at 0.649-2.649", "frequencyCaption": "woman laughing one times and duck quacking one times"} +{"filepath": "data/multi_event_train/syn_3463.wav", "onoffCaption": "dog barking at 0.095-2.095, 3.294-5.294, 6.96-8.96", "frequencyCaption": "dog barking three times"} +{"filepath": "data/multi_event_train/syn_3486.wav", "onoffCaption": "sheep goat bleating at 0.432-3.432, 4.38-6.38", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_3578.wav", "onoffCaption": "dog barking at 0.095-2.495, 3.497-5.935, 7.9-9.9", "frequencyCaption": "dog barking three times"} +{"filepath": "data/multi_event_train/syn_3610.wav", "onoffCaption": "explosion at 0.757-3.757, 5.449-8.449", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_3642.wav", "onoffCaption": "door knocking at 0.246-2.746, 5.236-7.736", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_3645.wav", "onoffCaption": "tapping clicking clanking at 1.919-5.359", "frequencyCaption": "tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_3759.wav", "onoffCaption": "door knocking at 2.919-5.649, 6.984-9.048", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_3899.wav", "onoffCaption": "woman laughing at 2.366-5.135, 6.523-9.277", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_3932.wav", "onoffCaption": "woman laughing at 0.413-2.696, 3.687-6.456, 7.42-9.645", "frequencyCaption": "woman laughing three times"} +{"filepath": "data/multi_event_train/syn_3967.wav", "onoffCaption": "woman laughing at 0.22-2.312, 3.688-5.78", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_3982.wav", "onoffCaption": "whistling at 0.296-5.796, 6.441-10.0", "frequencyCaption": "whistling two times"} +{"filepath": "data/multi_event_train/syn_11.wav", "onoffCaption": "spraying at 0.048-0.675, 2.079-3.163 and explosion at 6.554-10.0", "frequencyCaption": "spraying two times and explosion one times"} +{"filepath": "data/multi_event_train/syn_44.wav", "onoffCaption": "sheep goat bleating at 0.24-2.24, 3.277-5.277, 7.394-9.394", "frequencyCaption": "sheep goat bleating three times"} +{"filepath": "data/multi_event_train/syn_163.wav", "onoffCaption": "duck quacking at 0.386-2.386, 3.148-5.148 and sheep goat bleating at 3.949-5.949", "frequencyCaption": "duck quacking two times and sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_186.wav", "onoffCaption": "door slamming at 0.044-1.161 and burping belching at 1.036-4.316", "frequencyCaption": "door slamming one times and burping belching one times"} +{"filepath": "data/multi_event_train/syn_310.wav", "onoffCaption": "sneeze at 1.983-4.386, 5.995-7.495", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_345.wav", "onoffCaption": "spraying at 1.07-3.462 and explosion at 4.971-9.971", "frequencyCaption": "spraying one times and explosion one times"} +{"filepath": "data/multi_event_train/syn_535.wav", "onoffCaption": "woman laughing at 2.988-5.343, 6.095-8.681", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_608.wav", "onoffCaption": "explosion at 2.457-5.329", "frequencyCaption": "explosion one times"} +{"filepath": "data/multi_event_train/syn_713.wav", "onoffCaption": "duck quacking at 0.237-2.237, 3.979-5.979, 7.934-9.934", "frequencyCaption": "duck quacking three times"} +{"filepath": "data/multi_event_train/syn_746.wav", "onoffCaption": "train horn at 0.123-4.453, 5.099-9.429", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_831.wav", "onoffCaption": "dog barking at 0.341-2.341, 2.889-4.889, 6.503-8.503", "frequencyCaption": "dog barking three times"} +{"filepath": "data/multi_event_train/syn_881.wav", "onoffCaption": "tapping clicking clanking at 0.683-4.123 and thump thud at 7.869-10.0", "frequencyCaption": "tapping clicking clanking one times and thump thud one times"} +{"filepath": "data/multi_event_train/syn_1005.wav", "onoffCaption": "door knocking at 1.384-7.444 and explosion at 5.109-8.109", "frequencyCaption": "door knocking one times and explosion one times"} +{"filepath": "data/multi_event_train/syn_1050.wav", "onoffCaption": "explosion at 2.575-4.639, 5.936-8.753", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_1223.wav", "onoffCaption": "sheep goat bleating at 1.401-3.401, 5.028-7.028", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_1276.wav", "onoffCaption": "gunshot at 2.945-4.945, 6.205-8.706", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_1338.wav", "onoffCaption": "tapping clicking clanking at 0.123-3.563, 4.576-8.016 and sheep goat bleating at 6.188-8.188", "frequencyCaption": "tapping clicking clanking two times and sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_1453.wav", "onoffCaption": "train horn at 2.239-5.119, 5.847-8.727", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_1548.wav", "onoffCaption": "thump thud at 0.102-3.149, 3.878-6.469", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_1620.wav", "onoffCaption": "gunshot at 0.078-2.552 and thump thud at 0.946-4.864", "frequencyCaption": "gunshot one times and thump thud one times"} +{"filepath": "data/multi_event_train/syn_1675.wav", "onoffCaption": "door knocking at 2.094-4.469, 5.66-8.035", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_1690.wav", "onoffCaption": "door slamming at 1.034-3.517 and spraying at 7.495-8.099", "frequencyCaption": "door slamming one times and spraying one times"} +{"filepath": "data/multi_event_train/syn_1957.wav", "onoffCaption": "sneeze at 1.883-3.59, 4.837-6.544, 8.083-9.79", "frequencyCaption": "sneeze three times"} +{"filepath": "data/multi_event_train/syn_3067.wav", "onoffCaption": "thump thud at 0.31-2.81 and burping belching at 6.33-9.091", "frequencyCaption": "thump thud one times and burping belching one times"} +{"filepath": "data/multi_event_train/syn_3082.wav", "onoffCaption": "cow mooing at 0.949-3.959, 5.766-7.948", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_3199.wav", "onoffCaption": "thump thud at 1.786-4.833, 6.026-9.073", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_3241.wav", "onoffCaption": "sheep goat bleating at 2.492-4.492, 5.259-7.259, 7.956-9.956", "frequencyCaption": "sheep goat bleating three times"} +{"filepath": "data/multi_event_train/syn_3431.wav", "onoffCaption": "woman laughing at 0.558-3.258, 3.954-6.039, 6.909-9.007 and whistling at 2.627-7.111", "frequencyCaption": "woman laughing three times and whistling one times"} +{"filepath": "data/multi_event_train/syn_3617.wav", "onoffCaption": "train horn at 0.479-6.948, 7.832-10.0", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_3935.wav", "onoffCaption": "sneeze at 1.497-3.958, 4.871-6.197", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_136.wav", "onoffCaption": "thump thud at 0.761-3.532 and whistling at 5.796-10.0", "frequencyCaption": "thump thud one times and whistling one times"} +{"filepath": "data/multi_event_train/syn_244.wav", "onoffCaption": "spraying at 0.842-1.709 and door slamming at 2.747-3.247", "frequencyCaption": "spraying one times and door slamming one times"} +{"filepath": "data/multi_event_train/syn_461.wav", "onoffCaption": "cat meowing at 2.228-3.978 and explosion at 3.017-5.024, 6.181-8.188 and cow mooing at 5.927-8.909", "frequencyCaption": "cat meowing one times and explosion two times and cow mooing one times"} +{"filepath": "data/multi_event_train/syn_484.wav", "onoffCaption": "car horn honking at 1.94-5.781", "frequencyCaption": "car horn honking one times"} +{"filepath": "data/multi_event_train/syn_560.wav", "onoffCaption": "dog barking at 2.223-4.223, 5.485-7.485", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_train/syn_585.wav", "onoffCaption": "spraying at 3.005-3.609, 4.518-6.646, 7.52-9.215", "frequencyCaption": "spraying three times"} +{"filepath": "data/multi_event_train/syn_612.wav", "onoffCaption": "dog barking at 0.012-2.012, 4.228-6.228, 6.945-8.945", "frequencyCaption": "dog barking three times"} +{"filepath": "data/multi_event_train/syn_864.wav", "onoffCaption": "explosion at 1.183-4.183 and woman laughing at 7.821-10.0", "frequencyCaption": "explosion one times and woman laughing one times"} +{"filepath": "data/multi_event_train/syn_965.wav", "onoffCaption": "dog barking at 2.009-4.009, 5.762-7.762 and explosion at 2.108-4.861, 5.845-8.598", "frequencyCaption": "dog barking two times and explosion two times"} +{"filepath": "data/multi_event_train/syn_980.wav", "onoffCaption": "cat meowing at 0.306-1.446, 2.819-3.959", "frequencyCaption": "cat meowing two times"} +{"filepath": "data/multi_event_train/syn_1151.wav", "onoffCaption": "sneeze at 0.567-3.181 and explosion at 1.075-6.075", "frequencyCaption": "sneeze one times and explosion one times"} +{"filepath": "data/multi_event_train/syn_1406.wav", "onoffCaption": "sheep goat bleating at 1.684-3.684, 4.252-6.252, 7.758-9.758", "frequencyCaption": "sheep goat bleating three times"} +{"filepath": "data/multi_event_train/syn_1507.wav", "onoffCaption": "cat meowing at 0.787-2.328, 4.605-5.915 and burping belching at 1.025-6.149, 7.732-9.857", "frequencyCaption": "cat meowing two times and burping belching two times"} +{"filepath": "data/multi_event_train/syn_1791.wav", "onoffCaption": "duck quacking at 0.18-2.18 and cat meowing at 0.29-3.531, 4.935-7.682", "frequencyCaption": "duck quacking one times and cat meowing two times"} +{"filepath": "data/multi_event_train/syn_1803.wav", "onoffCaption": "whistling at 2.821-10.0", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_1819.wav", "onoffCaption": "sneeze at 0.143-2.389, 2.913-4.832, 5.475-7.588", "frequencyCaption": "sneeze three times"} +{"filepath": "data/multi_event_train/syn_1902.wav", "onoffCaption": "dog barking at 2.405-4.405, 6.495-8.495", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_train/syn_1918.wav", "onoffCaption": "burping belching at 3.973-6.973", "frequencyCaption": "burping belching one times"} +{"filepath": "data/multi_event_train/syn_3028.wav", "onoffCaption": "sneeze at 0.602-1.879", "frequencyCaption": "sneeze one times"} +{"filepath": "data/multi_event_train/syn_3032.wav", "onoffCaption": "woman laughing at 1.827-5.215, 6.908-9.263", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_3129.wav", "onoffCaption": "spraying at 1.764-2.828, 4.27-5.334 and door knocking at 6.965-9.502", "frequencyCaption": "spraying two times and door knocking one times"} +{"filepath": "data/multi_event_train/syn_3133.wav", "onoffCaption": "woman laughing at 1.628-3.836, 5.97-8.178", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_3340.wav", "onoffCaption": "door slamming at 0.33-2.526 and cow mooing at 6.04-9.022", "frequencyCaption": "door slamming one times and cow mooing one times"} +{"filepath": "data/multi_event_train/syn_3464.wav", "onoffCaption": "dog barking at 3.184-5.184, 6.269-8.269", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_train/syn_3481.wav", "onoffCaption": "explosion at 1.66-4.528, 6.089-8.957", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_3565.wav", "onoffCaption": "cow mooing at 0.862-3.872 and spraying at 7.02-7.624, 9.258-9.862", "frequencyCaption": "cow mooing one times and spraying two times"} +{"filepath": "data/multi_event_train/syn_3580.wav", "onoffCaption": "whistling at 0.212-7.611", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_3716.wav", "onoffCaption": "thump thud at 3.076-7.526", "frequencyCaption": "thump thud one times"} +{"filepath": "data/multi_event_train/syn_3861.wav", "onoffCaption": "whistling at 0.139-7.889", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_3884.wav", "onoffCaption": "cat meowing at 3.525-8.525", "frequencyCaption": "cat meowing one times"} +{"filepath": "data/multi_event_train/syn_3960.wav", "onoffCaption": "cow mooing at 3.13-6.099, 6.661-9.63", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_3985.wav", "onoffCaption": "thump thud at 1.04-4.958", "frequencyCaption": "thump thud one times"} +{"filepath": "data/multi_event_train/syn_108.wav", "onoffCaption": "cow mooing at 0.556-4.985 and train horn at 0.71-8.91 and woman laughing at 2.098-4.466, 5.962-8.33", "frequencyCaption": "cow mooing one times and train horn one times and woman laughing two times"} +{"filepath": "data/multi_event_train/syn_179.wav", "onoffCaption": "gunshot at 2.115-4.115, 5.617-7.617", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_211.wav", "onoffCaption": "burping belching at 0.658-6.602, 7.664-10.0", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_434.wav", "onoffCaption": "explosion at 2.431-5.16 and train horn at 7.14-10.0", "frequencyCaption": "explosion one times and train horn one times"} +{"filepath": "data/multi_event_train/syn_647.wav", "onoffCaption": "sheep goat bleating at 1.751-3.751", "frequencyCaption": "sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_709.wav", "onoffCaption": "cow mooing at 0.149-3.447, 4.752-7.721", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_930.wav", "onoffCaption": "duck quacking at 2.399-4.399, 5.505-7.505 and woman laughing at 3.444-5.526, 6.964-9.448 and dog barking at 4.535-6.535", "frequencyCaption": "duck quacking two times and woman laughing two times and dog barking one times"} +{"filepath": "data/multi_event_train/syn_941.wav", "onoffCaption": "cow mooing at 0.379-5.359, 7.153-9.591", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_1104.wav", "onoffCaption": "door slamming at 0.251-2.016, 2.613-4.378, 5.27-7.035 and gunshot at 2.093-4.093, 5.224-7.243 and sheep goat bleating at 2.203-4.203, 6.075-8.075", "frequencyCaption": "door slamming three times and gunshot two times and sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_1190.wav", "onoffCaption": "woman laughing at 0.835-3.035 and spraying at 6.648-7.223", "frequencyCaption": "woman laughing one times and spraying one times"} +{"filepath": "data/multi_event_train/syn_1239.wav", "onoffCaption": "woman laughing at 0.599-3.185, 4.736-7.341", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_1289.wav", "onoffCaption": "car horn honking at 3.261-6.047, 7.282-10.0", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_1322.wav", "onoffCaption": "duck quacking at 3.022-5.022, 6.303-8.303", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_1377.wav", "onoffCaption": "sheep goat bleating at 0.256-2.256 and sneeze at 4.422-6.105, 7.36-9.043", "frequencyCaption": "sheep goat bleating one times and sneeze two times"} +{"filepath": "data/multi_event_train/syn_1392.wav", "onoffCaption": "car horn honking at 0.604-3.117, 5.468-7.981", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_1449.wav", "onoffCaption": "explosion at 2.052-4.313, 6.672-8.933", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_1552.wav", "onoffCaption": "burping belching at 0.716-3.716 and duck quacking at 5.025-7.025, 7.592-9.592", "frequencyCaption": "burping belching one times and duck quacking two times"} +{"filepath": "data/multi_event_train/syn_1721.wav", "onoffCaption": "cow mooing at 1.991-4.973, 5.974-8.943", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_1774.wav", "onoffCaption": "door slamming at 1.63-2.863, 4.075-6.075, 6.681-9.11", "frequencyCaption": "door slamming three times"} +{"filepath": "data/multi_event_train/syn_1827.wav", "onoffCaption": "woman laughing at 3.327-5.919", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/multi_event_train/syn_1856.wav", "onoffCaption": "sneeze at 2.355-4.969", "frequencyCaption": "sneeze one times"} +{"filepath": "data/multi_event_train/syn_3098.wav", "onoffCaption": "car horn honking at 1.715-6.037", "frequencyCaption": "car horn honking one times"} +{"filepath": "data/multi_event_train/syn_3117.wav", "onoffCaption": "sneeze at 0.541-2.604, 4.626-6.689 and sheep goat bleating at 2.158-4.158, 5.39-7.39", "frequencyCaption": "sneeze two times and sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_3166.wav", "onoffCaption": "spraying at 2.954-3.805", "frequencyCaption": "spraying one times"} +{"filepath": "data/multi_event_train/syn_3183.wav", "onoffCaption": "sheep goat bleating at 0.991-2.991, 3.74-5.74, 6.273-8.273", "frequencyCaption": "sheep goat bleating three times"} +{"filepath": "data/multi_event_train/syn_3315.wav", "onoffCaption": "duck quacking at 0.019-2.019, 3.177-5.177, 6.468-8.468", "frequencyCaption": "duck quacking three times"} +{"filepath": "data/multi_event_train/syn_3530.wav", "onoffCaption": "woman laughing at 2.851-5.443", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/multi_event_train/syn_3541.wav", "onoffCaption": "spraying at 0.143-0.793, 1.737-2.588, 3.538-4.27 and train horn at 5.735-10.0", "frequencyCaption": "spraying three times and train horn one times"} +{"filepath": "data/multi_event_train/syn_3658.wav", "onoffCaption": "cat meowing at 0.634-2.529, 3.868-5.763 and explosion at 3.879-6.879, 7.905-10.0", "frequencyCaption": "cat meowing two times and explosion two times"} +{"filepath": "data/multi_event_train/syn_3743.wav", "onoffCaption": "thump thud at 2.308-6.758", "frequencyCaption": "thump thud one times"} +{"filepath": "data/multi_event_train/syn_3834.wav", "onoffCaption": "sneeze at 1.737-4.397 and woman laughing at 6.52-9.106", "frequencyCaption": "sneeze one times and woman laughing one times"} +{"filepath": "data/multi_event_train/syn_3845.wav", "onoffCaption": "tapping clicking clanking at 3.386-6.826 and door slamming at 6.172-8.63", "frequencyCaption": "tapping clicking clanking one times and door slamming one times"} +{"filepath": "data/multi_event_train/syn_260.wav", "onoffCaption": "car horn honking at 0.679-3.497, 4.414-6.414, 7.4-9.747 and thump thud at 0.765-3.104", "frequencyCaption": "car horn honking three times and thump thud one times"} +{"filepath": "data/multi_event_train/syn_285.wav", "onoffCaption": "cat meowing at 2.662-3.674", "frequencyCaption": "cat meowing one times"} +{"filepath": "data/multi_event_train/syn_410.wav", "onoffCaption": "sneeze at 2.922-4.918", "frequencyCaption": "sneeze one times"} +{"filepath": "data/multi_event_train/syn_445.wav", "onoffCaption": "burping belching at 2.349-4.443 and sheep goat bleating at 7.468-9.468", "frequencyCaption": "burping belching one times and sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_636.wav", "onoffCaption": "thump thud at 0.085-3.752, 4.485-8.152", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_778.wav", "onoffCaption": "cat meowing at 0.073-1.968, 4.277-5.463", "frequencyCaption": "cat meowing two times"} +{"filepath": "data/multi_event_train/syn_914.wav", "onoffCaption": "cat meowing at 2.239-3.266, 3.834-4.861, 5.407-6.434", "frequencyCaption": "cat meowing three times"} +{"filepath": "data/multi_event_train/syn_1120.wav", "onoffCaption": "sheep goat bleating at 3.176-5.176, 6.418-8.418", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_1175.wav", "onoffCaption": "spraying at 0.156-2.284", "frequencyCaption": "spraying one times"} +{"filepath": "data/multi_event_train/syn_1248.wav", "onoffCaption": "woman laughing at 0.652-3.447, 4.195-6.99 and burping belching at 2.568-6.591, 7.652-10.0", "frequencyCaption": "woman laughing two times and burping belching two times"} +{"filepath": "data/multi_event_train/syn_1306.wav", "onoffCaption": "woman laughing at 0.786-3.154, 5.194-7.562 and spraying at 1.837-2.412, 4.389-7.386 and explosion at 5.217-8.211", "frequencyCaption": "woman laughing two times and spraying two times and explosion one times"} +{"filepath": "data/multi_event_train/syn_1353.wav", "onoffCaption": "woman laughing at 2.632-4.832 and tapping clicking clanking at 7.798-10.0", "frequencyCaption": "woman laughing one times and tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_1438.wav", "onoffCaption": "dog barking at 0.127-5.764 and tapping clicking clanking at 7.741-10.0", "frequencyCaption": "dog barking one times and tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_1488.wav", "onoffCaption": "woman laughing at 0.084-3.365, 4.111-7.392", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_1523.wav", "onoffCaption": "cat meowing at 3.057-4.628, 5.333-6.904, 8.188-9.759", "frequencyCaption": "cat meowing three times"} +{"filepath": "data/multi_event_train/syn_1576.wav", "onoffCaption": "woman laughing at 0.377-2.602", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/multi_event_train/syn_1593.wav", "onoffCaption": "duck quacking at 2.419-4.419, 5.123-7.123, 7.708-9.708", "frequencyCaption": "duck quacking three times"} +{"filepath": "data/multi_event_train/syn_1705.wav", "onoffCaption": "gunshot at 0.188-2.188, 3.006-5.006, 6.0-8.0 and cow mooing at 2.515-5.813, 6.954-9.655", "frequencyCaption": "gunshot three times and cow mooing two times"} +{"filepath": "data/multi_event_train/syn_1750.wav", "onoffCaption": "thump thud at 0.121-4.039, 5.53-8.03", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_1872.wav", "onoffCaption": "cat meowing at 0.257-1.267 and train horn at 5.374-7.814", "frequencyCaption": "cat meowing one times and train horn one times"} +{"filepath": "data/multi_event_train/syn_1897.wav", "onoffCaption": "cow mooing at 3.107-8.087", "frequencyCaption": "cow mooing one times"} +{"filepath": "data/multi_event_train/syn_1969.wav", "onoffCaption": "duck quacking at 0.218-2.218", "frequencyCaption": "duck quacking one times"} +{"filepath": "data/multi_event_train/syn_3059.wav", "onoffCaption": "burping belching at 0.083-2.286", "frequencyCaption": "burping belching one times"} +{"filepath": "data/multi_event_train/syn_3142.wav", "onoffCaption": "woman laughing at 0.185-2.885, 4.611-7.311 and burping belching at 2.382-5.822 and duck quacking at 5.807-7.807", "frequencyCaption": "woman laughing two times and burping belching one times and duck quacking one times"} +{"filepath": "data/multi_event_train/syn_3364.wav", "onoffCaption": "door slamming at 1.137-3.333, 4.292-6.488, 7.604-9.8", "frequencyCaption": "door slamming three times"} +{"filepath": "data/multi_event_train/syn_3381.wav", "onoffCaption": "duck quacking at 2.558-4.558 and dog barking at 3.089-5.089, 6.993-8.993", "frequencyCaption": "duck quacking one times and dog barking two times"} +{"filepath": "data/multi_event_train/syn_3514.wav", "onoffCaption": "sneeze at 1.47-6.47, 7.74-10.0", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_3629.wav", "onoffCaption": "duck quacking at 2.888-4.888, 7.107-9.107", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_3699.wav", "onoffCaption": "woman laughing at 0.287-3.568, 4.142-6.944, 7.835-9.884", "frequencyCaption": "woman laughing three times"} +{"filepath": "data/multi_event_train/syn_3732.wav", "onoffCaption": "cow mooing at 0.364-3.662, 4.801-7.605", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_3782.wav", "onoffCaption": "spraying at 0.427-2.887, 4.194-5.451", "frequencyCaption": "spraying two times"} +{"filepath": "data/multi_event_train/syn_3810.wav", "onoffCaption": "thump thud at 2.015-5.682, 7.857-10.0", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_35.wav", "onoffCaption": "thump thud at 3.337-5.676, 7.377-10.0", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_60.wav", "onoffCaption": "cat meowing at 0.035-1.619 and explosion at 0.384-3.256, 3.9-6.772 and spraying at 3.01-5.47", "frequencyCaption": "cat meowing one times and explosion two times and spraying one times"} +{"filepath": "data/multi_event_train/syn_147.wav", "onoffCaption": "gunshot at 0.792-2.792, 3.335-5.335", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_235.wav", "onoffCaption": "cat meowing at 0.778-1.787, 2.553-3.565 and gunshot at 2.067-4.067, 6.151-8.151", "frequencyCaption": "cat meowing two times and gunshot two times"} +{"filepath": "data/multi_event_train/syn_334.wav", "onoffCaption": "duck quacking at 3.078-5.078", "frequencyCaption": "duck quacking one times"} +{"filepath": "data/multi_event_train/syn_361.wav", "onoffCaption": "car horn honking at 2.942-5.289, 5.9-8.413", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_384.wav", "onoffCaption": "duck quacking at 1.589-3.589, 4.206-6.206", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_511.wav", "onoffCaption": "woman laughing at 0.627-2.91 and thump thud at 6.446-9.217", "frequencyCaption": "woman laughing one times and thump thud one times"} +{"filepath": "data/multi_event_train/syn_663.wav", "onoffCaption": "whistling at 0.402-4.886 and dog barking at 2.751-4.751, 6.775-8.775 and door slamming at 8.642-9.142", "frequencyCaption": "whistling one times and dog barking two times and door slamming one times"} +{"filepath": "data/multi_event_train/syn_679.wav", "onoffCaption": "sneeze at 0.263-2.326, 3.042-5.105, 6.243-8.306", "frequencyCaption": "sneeze three times"} +{"filepath": "data/multi_event_train/syn_686.wav", "onoffCaption": "cat meowing at 0.012-3.357, 5.746-7.478", "frequencyCaption": "cat meowing two times"} +{"filepath": "data/multi_event_train/syn_737.wav", "onoffCaption": "thump thud at 3.081-7.531", "frequencyCaption": "thump thud one times"} +{"filepath": "data/multi_event_train/syn_762.wav", "onoffCaption": "car horn honking at 3.016-7.528", "frequencyCaption": "car horn honking one times"} +{"filepath": "data/multi_event_train/syn_787.wav", "onoffCaption": "sneeze at 0.267-2.506, 3.642-5.881 and cat meowing at 1.741-6.101", "frequencyCaption": "sneeze two times and cat meowing one times"} +{"filepath": "data/multi_event_train/syn_815.wav", "onoffCaption": "dog barking at 0.838-2.838, 4.348-6.348 and woman laughing at 5.774-7.982", "frequencyCaption": "dog barking two times and woman laughing one times"} +{"filepath": "data/multi_event_train/syn_1021.wav", "onoffCaption": "explosion at 2.342-5.342, 5.92-8.92 and burping belching at 2.438-5.438", "frequencyCaption": "explosion two times and burping belching one times"} +{"filepath": "data/multi_event_train/syn_1074.wav", "onoffCaption": "burping belching at 2.126-4.233, 5.913-8.02", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_1207.wav", "onoffCaption": "sheep goat bleating at 0.427-2.427, 3.863-5.863, 6.741-8.741", "frequencyCaption": "sheep goat bleating three times"} +{"filepath": "data/multi_event_train/syn_1252.wav", "onoffCaption": "thump thud at 2.779-5.55, 6.397-8.569", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_1349.wav", "onoffCaption": "burping belching at 3.22-5.251, 5.977-8.943", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_1477.wav", "onoffCaption": "door knocking at 0.762-5.058", "frequencyCaption": "door knocking one times"} +{"filepath": "data/multi_event_train/syn_1492.wav", "onoffCaption": "train horn at 0.546-3.706", "frequencyCaption": "train horn one times"} +{"filepath": "data/multi_event_train/syn_1539.wav", "onoffCaption": "whistling at 0.69-5.174, 5.721-10.0", "frequencyCaption": "whistling two times"} +{"filepath": "data/multi_event_train/syn_1589.wav", "onoffCaption": "cow mooing at 1.966-4.935, 6.103-9.072", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_1604.wav", "onoffCaption": "sneeze at 0.952-5.481, 6.461-10.0", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_1651.wav", "onoffCaption": "thump thud at 0.031-4.481, 5.06-7.831 and cow mooing at 1.949-6.929 and gunshot at 2.458-4.458, 5.594-7.594", "frequencyCaption": "thump thud two times and cow mooing one times and gunshot two times"} +{"filepath": "data/multi_event_train/syn_1868.wav", "onoffCaption": "dog barking at 2.736-4.736, 5.262-7.262", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_train/syn_1973.wav", "onoffCaption": "sheep goat bleating at 0.478-2.478", "frequencyCaption": "sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_1996.wav", "onoffCaption": "sheep goat bleating at 0.337-2.337, 4.071-6.071", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_3043.wav", "onoffCaption": "duck quacking at 1.343-3.343 and door slamming at 6.658-8.576", "frequencyCaption": "duck quacking one times and door slamming one times"} +{"filepath": "data/multi_event_train/syn_3158.wav", "onoffCaption": "spraying at 2.079-3.841, 4.379-6.141, 7.114-8.876", "frequencyCaption": "spraying three times"} +{"filepath": "data/multi_event_train/syn_3230.wav", "onoffCaption": "car horn honking at 0.711-5.111 and burping belching at 7.655-10.0", "frequencyCaption": "car horn honking one times and burping belching one times"} +{"filepath": "data/multi_event_train/syn_3265.wav", "onoffCaption": "gunshot at 0.842-2.861, 3.712-5.731, 6.886-8.905", "frequencyCaption": "gunshot three times"} +{"filepath": "data/multi_event_train/syn_3280.wav", "onoffCaption": "thump thud at 0.098-4.548 and train horn at 1.037-4.807, 5.772-8.572", "frequencyCaption": "thump thud one times and train horn two times"} +{"filepath": "data/multi_event_train/syn_3331.wav", "onoffCaption": "explosion at 0.213-3.213, 4.115-6.28 and gunshot at 7.473-9.473", "frequencyCaption": "explosion two times and gunshot one times"} +{"filepath": "data/multi_event_train/syn_3415.wav", "onoffCaption": "tapping clicking clanking at 1.663-5.103 and thump thud at 1.668-4.168, 5.582-8.082", "frequencyCaption": "tapping clicking clanking one times and thump thud two times"} +{"filepath": "data/multi_event_train/syn_3633.wav", "onoffCaption": "cow mooing at 0.348-4.777", "frequencyCaption": "cow mooing one times"} +{"filepath": "data/multi_event_train/syn_3666.wav", "onoffCaption": "whistling at 3.006-7.49", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_3683.wav", "onoffCaption": "tapping clicking clanking at 0.355-3.795, 6.028-8.443", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_3728.wav", "onoffCaption": "door knocking at 0.613-2.993 and sheep goat bleating at 1.374-3.374, 4.594-6.594", "frequencyCaption": "door knocking one times and sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_3767.wav", "onoffCaption": "cow mooing at 1.682-4.651, 6.711-9.68", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_3798.wav", "onoffCaption": "sneeze at 2.406-6.054, 7.169-10.0", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_3911.wav", "onoffCaption": "explosion at 3.713-6.713, 7.614-10.0", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_5000.wav", "onoffCaption": "woman laughing at 2.643-4.868, 7.346-9.571", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_85.wav", "onoffCaption": "thump thud at 0.092-4.542, 5.186-9.636", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_112.wav", "onoffCaption": "door slamming at 2.357-3.474, 4.082-6.0", "frequencyCaption": "door slamming two times"} +{"filepath": "data/multi_event_train/syn_175.wav", "onoffCaption": "whistling at 0.475-8.713", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_190.wav", "onoffCaption": "burping belching at 2.79-5.969", "frequencyCaption": "burping belching one times"} +{"filepath": "data/multi_event_train/syn_248.wav", "onoffCaption": "whistling at 0.668-3.643, 6.008-8.983", "frequencyCaption": "whistling two times"} +{"filepath": "data/multi_event_train/syn_306.wav", "onoffCaption": "whistling at 2.191-9.535", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_353.wav", "onoffCaption": "explosion at 1.105-6.105, 7.952-9.971", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_438.wav", "onoffCaption": "sheep goat bleating at 1.945-3.945", "frequencyCaption": "sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_523.wav", "onoffCaption": "explosion at 2.55-5.141, 5.651-8.242", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_544.wav", "onoffCaption": "gunshot at 1.072-3.242, 3.961-6.002, 7.293-9.293", "frequencyCaption": "gunshot three times"} +{"filepath": "data/multi_event_train/syn_705.wav", "onoffCaption": "train horn at 0.471-5.329", "frequencyCaption": "train horn one times"} +{"filepath": "data/multi_event_train/syn_750.wav", "onoffCaption": "cow mooing at 2.139-5.149, 5.874-8.836", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_827.wav", "onoffCaption": "tapping clicking clanking at 0.111-3.551, 5.819-8.222 and whistling at 3.405-8.58 and explosion at 5.426-8.154", "frequencyCaption": "tapping clicking clanking two times and whistling one times and explosion one times"} +{"filepath": "data/multi_event_train/syn_840.wav", "onoffCaption": "gunshot at 0.056-2.056 and whistling at 4.934-10.0", "frequencyCaption": "gunshot one times and whistling one times"} +{"filepath": "data/multi_event_train/syn_1013.wav", "onoffCaption": "door slamming at 0.266-0.766, 1.741-3.659, 4.964-6.964 and explosion at 1.158-6.158", "frequencyCaption": "door slamming three times and explosion one times"} +{"filepath": "data/multi_event_train/syn_1091.wav", "onoffCaption": "explosion at 0.702-3.302", "frequencyCaption": "explosion one times"} +{"filepath": "data/multi_event_train/syn_1108.wav", "onoffCaption": "door knocking at 0.744-3.047, 4.096-6.399", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_1235.wav", "onoffCaption": "explosion at 0.214-2.967", "frequencyCaption": "explosion one times"} +{"filepath": "data/multi_event_train/syn_1260.wav", "onoffCaption": "train horn at 1.145-3.945 and explosion at 6.009-8.609", "frequencyCaption": "train horn one times and explosion one times"} +{"filepath": "data/multi_event_train/syn_1285.wav", "onoffCaption": "thump thud at 0.028-2.799 and gunshot at 0.633-2.633, 4.968-6.968", "frequencyCaption": "thump thud one times and gunshot two times"} +{"filepath": "data/multi_event_train/syn_1422.wav", "onoffCaption": "whistling at 1.541-6.716", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_1445.wav", "onoffCaption": "gunshot at 2.192-4.192, 6.123-8.123", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_1636.wav", "onoffCaption": "cow mooing at 0.692-3.674, 4.245-7.07 and explosion at 1.708-4.576, 6.62-9.614 and door slamming at 7.173-8.312", "frequencyCaption": "cow mooing two times and explosion two times and door slamming one times"} +{"filepath": "data/multi_event_train/syn_1663.wav", "onoffCaption": "thump thud at 1.73-3.958, 5.73-8.425", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_1686.wav", "onoffCaption": "spraying at 1.212-3.648, 4.274-6.71, 7.857-10.0", "frequencyCaption": "spraying three times"} +{"filepath": "data/multi_event_train/syn_1778.wav", "onoffCaption": "dog barking at 2.24-4.24 and explosion at 6.124-10.0", "frequencyCaption": "dog barking one times and explosion one times"} +{"filepath": "data/multi_event_train/syn_1926.wav", "onoffCaption": "whistling at 3.395-10.0", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_1941.wav", "onoffCaption": "whistling at 0.213-7.868 and gunshot at 3.4-5.4", "frequencyCaption": "whistling one times and gunshot one times"} +{"filepath": "data/multi_event_train/syn_3016.wav", "onoffCaption": "cow mooing at 2.11-5.408", "frequencyCaption": "cow mooing one times"} +{"filepath": "data/multi_event_train/syn_3071.wav", "onoffCaption": "car horn honking at 0.372-2.837, 5.051-7.551", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_3094.wav", "onoffCaption": "gunshot at 2.875-4.875", "frequencyCaption": "gunshot one times"} +{"filepath": "data/multi_event_train/syn_3202.wav", "onoffCaption": "duck quacking at 0.628-2.628, 3.29-5.29, 6.192-8.192", "frequencyCaption": "duck quacking three times"} +{"filepath": "data/multi_event_train/syn_3257.wav", "onoffCaption": "car horn honking at 0.974-3.321, 5.028-7.375 and train horn at 3.119-5.759, 6.861-9.328", "frequencyCaption": "car horn honking two times and train horn two times"} +{"filepath": "data/multi_event_train/syn_3319.wav", "onoffCaption": "sneeze at 3.802-5.036, 6.027-8.488", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_3427.wav", "onoffCaption": "spraying at 0.506-2.966, 4.117-5.85, 7.285-9.895 and woman laughing at 1.012-4.588", "frequencyCaption": "spraying three times and woman laughing one times"} +{"filepath": "data/multi_event_train/syn_3440.wav", "onoffCaption": "car horn honking at 1.313-5.713, 7.865-10.0", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_3497.wav", "onoffCaption": "duck quacking at 1.589-3.589, 4.21-6.21, 7.047-9.047", "frequencyCaption": "duck quacking three times"} +{"filepath": "data/multi_event_train/syn_3601.wav", "onoffCaption": "spraying at 0.049-1.133 and burping belching at 5.237-8.237", "frequencyCaption": "spraying one times and burping belching one times"} +{"filepath": "data/multi_event_train/syn_3654.wav", "onoffCaption": "sneeze at 2.713-4.325", "frequencyCaption": "sneeze one times"} +{"filepath": "data/multi_event_train/syn_3838.wav", "onoffCaption": "train horn at 2.023-7.738", "frequencyCaption": "train horn one times"} +{"filepath": "data/multi_event_train/syn_3923.wav", "onoffCaption": "gunshot at 0.039-2.209, 3.821-5.821", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_3944.wav", "onoffCaption": "sheep goat bleating at 0.704-2.704, 4.835-6.835 and cat meowing at 1.039-2.252, 3.829-5.245", "frequencyCaption": "sheep goat bleating two times and cat meowing two times"} +{"filepath": "data/multi_event_train/syn_48.wav", "onoffCaption": "door knocking at 1.798-3.925, 6.24-8.864", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_52.wav", "onoffCaption": "cat meowing at 0.243-5.243 and whistling at 1.921-4.15, 5.629-8.103", "frequencyCaption": "cat meowing one times and whistling two times"} +{"filepath": "data/multi_event_train/syn_120.wav", "onoffCaption": "duck quacking at 2.114-4.114, 4.766-6.766, 7.77-9.77", "frequencyCaption": "duck quacking three times"} +{"filepath": "data/multi_event_train/syn_252.wav", "onoffCaption": "gunshot at 0.236-2.236, 4.669-6.71", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_349.wav", "onoffCaption": "gunshot at 2.895-4.895, 7.074-9.074 and duck quacking at 3.758-5.758", "frequencyCaption": "gunshot two times and duck quacking one times"} +{"filepath": "data/multi_event_train/syn_422.wav", "onoffCaption": "spraying at 2.662-5.054, 5.777-8.169", "frequencyCaption": "spraying two times"} +{"filepath": "data/multi_event_train/syn_477.wav", "onoffCaption": "dog barking at 2.616-4.616, 5.85-7.85", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_train/syn_488.wav", "onoffCaption": "thump thud at 0.294-4.744 and explosion at 2.961-5.833", "frequencyCaption": "thump thud one times and explosion one times"} +{"filepath": "data/multi_event_train/syn_492.wav", "onoffCaption": "burping belching at 3.015-6.515, 7.914-10.0", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_539.wav", "onoffCaption": "duck quacking at 0.626-2.626 and gunshot at 0.647-2.647 and sheep goat bleating at 6.015-8.015", "frequencyCaption": "duck quacking one times and gunshot one times and sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_576.wav", "onoffCaption": "cow mooing at 0.017-2.986", "frequencyCaption": "cow mooing one times"} +{"filepath": "data/multi_event_train/syn_589.wav", "onoffCaption": "sneeze at 0.227-2.34", "frequencyCaption": "sneeze one times"} +{"filepath": "data/multi_event_train/syn_593.wav", "onoffCaption": "spraying at 0.736-1.386, 2.43-4.89 and burping belching at 2.475-5.734 and car horn honking at 3.11-7.51", "frequencyCaption": "spraying two times and burping belching one times and car horn honking one times"} +{"filepath": "data/multi_event_train/syn_604.wav", "onoffCaption": "sneeze at 0.064-2.381, 4.864-7.181 and woman laughing at 1.889-4.658, 5.873-8.11", "frequencyCaption": "sneeze two times and woman laughing two times"} +{"filepath": "data/multi_event_train/syn_868.wav", "onoffCaption": "duck quacking at 1.107-3.107, 5.382-7.382", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_872.wav", "onoffCaption": "sheep goat bleating at 1.92-4.92", "frequencyCaption": "sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_897.wav", "onoffCaption": "woman laughing at 2.093-7.132, 7.959-10.0", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_969.wav", "onoffCaption": "thump thud at 0.204-3.871, 4.373-6.774 and duck quacking at 0.295-2.295, 4.429-6.429", "frequencyCaption": "thump thud two times and duck quacking two times"} +{"filepath": "data/multi_event_train/syn_973.wav", "onoffCaption": "woman laughing at 2.152-4.636, 7.017-9.501", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_996.wav", "onoffCaption": "door knocking at 1.037-3.127 and train horn at 4.574-7.054", "frequencyCaption": "door knocking one times and train horn one times"} +{"filepath": "data/multi_event_train/syn_1009.wav", "onoffCaption": "explosion at 0.047-5.047, 6.829-10.0", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_1046.wav", "onoffCaption": "thump thud at 2.605-7.055", "frequencyCaption": "thump thud one times"} +{"filepath": "data/multi_event_train/syn_1112.wav", "onoffCaption": "dog barking at 0.813-3.251", "frequencyCaption": "dog barking one times"} +{"filepath": "data/multi_event_train/syn_1147.wav", "onoffCaption": "door slamming at 0.008-0.986, 2.768-3.746 and burping belching at 6.192-9.192", "frequencyCaption": "door slamming two times and burping belching one times"} +{"filepath": "data/multi_event_train/syn_1334.wav", "onoffCaption": "burping belching at 2.724-9.892", "frequencyCaption": "burping belching one times"} +{"filepath": "data/multi_event_train/syn_1410.wav", "onoffCaption": "spraying at 2.671-3.298, 4.313-4.94, 5.78-6.407", "frequencyCaption": "spraying three times"} +{"filepath": "data/multi_event_train/syn_1511.wav", "onoffCaption": "cow mooing at 2.906-5.875, 6.784-9.753", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_1544.wav", "onoffCaption": "tapping clicking clanking at 0.031-3.471, 4.249-7.055", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_1679.wav", "onoffCaption": "whistling at 0.412-5.912", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_1762.wav", "onoffCaption": "sneeze at 0.149-5.149", "frequencyCaption": "sneeze one times"} +{"filepath": "data/multi_event_train/syn_1787.wav", "onoffCaption": "tapping clicking clanking at 3.695-7.135", "frequencyCaption": "tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_1815.wav", "onoffCaption": "cat meowing at 2.792-3.978, 4.638-6.388", "frequencyCaption": "cat meowing two times"} +{"filepath": "data/multi_event_train/syn_1840.wav", "onoffCaption": "whistling at 0.127-7.877 and duck quacking at 0.626-2.626, 4.514-6.514", "frequencyCaption": "whistling one times and duck quacking two times"} +{"filepath": "data/multi_event_train/syn_1914.wav", "onoffCaption": "duck quacking at 0.36-2.36 and spraying at 5.948-6.68, 7.206-7.938, 9.061-9.793", "frequencyCaption": "duck quacking one times and spraying three times"} +{"filepath": "data/multi_event_train/syn_3024.wav", "onoffCaption": "whistling at 1.51-4.385, 4.941-7.136 and sheep goat bleating at 4.617-6.617", "frequencyCaption": "whistling two times and sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_3125.wav", "onoffCaption": "cow mooing at 0.525-3.535 and sneeze at 7.273-10.0", "frequencyCaption": "cow mooing one times and sneeze one times"} +{"filepath": "data/multi_event_train/syn_3170.wav", "onoffCaption": "thump thud at 2.4-6.775 and cow mooing at 3.142-7.571", "frequencyCaption": "thump thud one times and cow mooing one times"} +{"filepath": "data/multi_event_train/syn_3195.wav", "onoffCaption": "duck quacking at 3.632-5.632, 6.157-8.157", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_3356.wav", "onoffCaption": "dog barking at 0.004-2.004 and sneeze at 0.554-2.867", "frequencyCaption": "dog barking one times and sneeze one times"} +{"filepath": "data/multi_event_train/syn_3468.wav", "onoffCaption": "tapping clicking clanking at 1.654-5.094, 6.11-9.55", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_3472.wav", "onoffCaption": "thump thud at 0.086-3.133, 4.191-7.238 and cow mooing at 2.851-6.149, 7.782-10.0", "frequencyCaption": "thump thud two times and cow mooing two times"} +{"filepath": "data/multi_event_train/syn_3526.wav", "onoffCaption": "burping belching at 2.329-5.329, 5.967-8.967 and spraying at 3.37-5.498", "frequencyCaption": "burping belching two times and spraying one times"} +{"filepath": "data/multi_event_train/syn_3569.wav", "onoffCaption": "woman laughing at 3.645-6.717", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/multi_event_train/syn_3573.wav", "onoffCaption": "tapping clicking clanking at 0.078-3.518 and woman laughing at 2.07-4.438", "frequencyCaption": "tapping clicking clanking one times and woman laughing one times"} +{"filepath": "data/multi_event_train/syn_3596.wav", "onoffCaption": "gunshot at 0.42-2.42, 3.855-5.855 and whistling at 0.821-3.796, 5.538-7.962", "frequencyCaption": "gunshot two times and whistling two times"} +{"filepath": "data/multi_event_train/syn_3700.wav", "onoffCaption": "thump thud at 2.566-6.484", "frequencyCaption": "thump thud one times"} +{"filepath": "data/multi_event_train/syn_3822.wav", "onoffCaption": "tapping clicking clanking at 0.026-3.466, 4.038-7.478 and sneeze at 0.281-2.594 and whistling at 0.577-8.588", "frequencyCaption": "tapping clicking clanking two times and sneeze one times and whistling one times"} +{"filepath": "data/multi_event_train/syn_3877.wav", "onoffCaption": "sheep goat bleating at 2.689-5.985, 7.249-10.0", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_3888.wav", "onoffCaption": "gunshot at 0.011-2.011, 4.076-6.076 and woman laughing at 2.145-4.353, 5.515-7.798 and burping belching at 3.798-6.321, 7.51-9.581", "frequencyCaption": "gunshot two times and woman laughing two times and burping belching two times"} +{"filepath": "data/multi_event_train/syn_3892.wav", "onoffCaption": "whistling at 0.003-5.178, 6.434-10.0 and gunshot at 0.281-2.281, 4.664-6.664", "frequencyCaption": "whistling two times and gunshot two times"} +{"filepath": "data/multi_event_train/syn_3939.wav", "onoffCaption": "door knocking at 2.256-5.103, 6.83-8.99", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_3976.wav", "onoffCaption": "sheep goat bleating at 4.316-6.316, 7.624-9.624", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_3989.wav", "onoffCaption": "duck quacking at 0.753-2.753, 4.201-6.201", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_3993.wav", "onoffCaption": "sheep goat bleating at 0.539-3.619, 5.059-8.139", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_39.wav", "onoffCaption": "explosion at 0.263-2.327, 4.015-6.079", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_89.wav", "onoffCaption": "sheep goat bleating at 1.399-3.399, 4.145-6.145, 6.832-9.296", "frequencyCaption": "sheep goat bleating three times"} +{"filepath": "data/multi_event_train/syn_207.wav", "onoffCaption": "train horn at 0.007-4.188 and cat meowing at 0.737-2.714", "frequencyCaption": "train horn one times and cat meowing one times"} +{"filepath": "data/multi_event_train/syn_276.wav", "onoffCaption": "dog barking at 0.676-2.676 and sheep goat bleating at 1.893-3.893, 4.654-7.441", "frequencyCaption": "dog barking one times and sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_293.wav", "onoffCaption": "spraying at 3.538-4.795", "frequencyCaption": "spraying one times"} +{"filepath": "data/multi_event_train/syn_338.wav", "onoffCaption": "tapping clicking clanking at 3.456-6.896", "frequencyCaption": "tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_388.wav", "onoffCaption": "car horn honking at 0.262-4.662, 6.687-10.0", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_406.wav", "onoffCaption": "sheep goat bleating at 1.038-3.038, 4.725-6.725, 7.491-9.491", "frequencyCaption": "sheep goat bleating three times"} +{"filepath": "data/multi_event_train/syn_453.wav", "onoffCaption": "tapping clicking clanking at 0.529-3.969, 5.003-7.612", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_548.wav", "onoffCaption": "burping belching at 3.305-8.305", "frequencyCaption": "burping belching one times"} +{"filepath": "data/multi_event_train/syn_620.wav", "onoffCaption": "spraying at 1.449-1.971, 3.051-3.573, 4.44-4.962", "frequencyCaption": "spraying three times"} +{"filepath": "data/multi_event_train/syn_651.wav", "onoffCaption": "explosion at 0.475-5.396, 6.197-10.0", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_819.wav", "onoffCaption": "cow mooing at 0.503-3.485, 4.402-7.384 and dog barking at 3.328-5.328, 6.489-8.489", "frequencyCaption": "cow mooing two times and dog barking two times"} +{"filepath": "data/multi_event_train/syn_902.wav", "onoffCaption": "explosion at 0.945-5.945, 7.669-10.0", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_926.wav", "onoffCaption": "woman laughing at 1.915-5.196, 6.014-9.295", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_957.wav", "onoffCaption": "spraying at 3.156-3.783, 4.327-4.896, 5.659-6.743", "frequencyCaption": "spraying three times"} +{"filepath": "data/multi_event_train/syn_1078.wav", "onoffCaption": "dog barking at 2.572-4.572", "frequencyCaption": "dog barking one times"} +{"filepath": "data/multi_event_train/syn_1136.wav", "onoffCaption": "sneeze at 1.816-3.761 and woman laughing at 6.146-9.722", "frequencyCaption": "sneeze one times and woman laughing one times"} +{"filepath": "data/multi_event_train/syn_1163.wav", "onoffCaption": "thump thud at 0.616-3.663, 4.265-6.765 and cat meowing at 1.658-6.658, 7.892-8.904", "frequencyCaption": "thump thud two times and cat meowing two times"} +{"filepath": "data/multi_event_train/syn_1186.wav", "onoffCaption": "tapping clicking clanking at 2.584-6.024, 7.048-9.723", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_1310.wav", "onoffCaption": "cat meowing at 0.11-1.122, 1.672-3.404, 4.651-7.141", "frequencyCaption": "cat meowing three times"} +{"filepath": "data/multi_event_train/syn_1345.wav", "onoffCaption": "tapping clicking clanking at 0.045-3.485, 4.395-7.835", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_1361.wav", "onoffCaption": "whistling at 2.651-8.484", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_1384.wav", "onoffCaption": "burping belching at 1.138-3.959 and explosion at 7.473-10.0", "frequencyCaption": "burping belching one times and explosion one times"} +{"filepath": "data/multi_event_train/syn_1535.wav", "onoffCaption": "cat meowing at 0.403-1.616, 2.396-3.951", "frequencyCaption": "cat meowing two times"} +{"filepath": "data/multi_event_train/syn_1560.wav", "onoffCaption": "dog barking at 0.055-2.055, 2.803-4.803, 6.123-8.123", "frequencyCaption": "dog barking three times"} +{"filepath": "data/multi_event_train/syn_1585.wav", "onoffCaption": "car horn honking at 1.4-5.649", "frequencyCaption": "car horn honking one times"} +{"filepath": "data/multi_event_train/syn_1608.wav", "onoffCaption": "tapping clicking clanking at 0.833-4.273 and sneeze at 1.635-2.738", "frequencyCaption": "tapping clicking clanking one times and sneeze one times"} +{"filepath": "data/multi_event_train/syn_1713.wav", "onoffCaption": "burping belching at 0.208-3.708 and train horn at 5.339-8.219", "frequencyCaption": "burping belching one times and train horn one times"} +{"filepath": "data/multi_event_train/syn_1737.wav", "onoffCaption": "woman laughing at 2.723-4.948, 6.995-9.6", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_1746.wav", "onoffCaption": "cat meowing at 2.19-4.167, 5.515-7.651", "frequencyCaption": "cat meowing two times"} +{"filepath": "data/multi_event_train/syn_1831.wav", "onoffCaption": "gunshot at 2.959-4.959, 6.448-8.448", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_1864.wav", "onoffCaption": "duck quacking at 2.461-4.461, 5.492-7.492 and woman laughing at 3.466-6.261", "frequencyCaption": "duck quacking two times and woman laughing one times"} +{"filepath": "data/multi_event_train/syn_1881.wav", "onoffCaption": "sheep goat bleating at 2.941-4.941, 5.649-7.649", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_3101.wav", "onoffCaption": "door slamming at 0.408-2.628, 4.108-6.328 and woman laughing at 2.274-4.474 and door knocking at 3.199-6.752, 7.928-10.0", "frequencyCaption": "door slamming two times and woman laughing one times and door knocking two times"} +{"filepath": "data/multi_event_train/syn_3154.wav", "onoffCaption": "gunshot at 2.635-4.635", "frequencyCaption": "gunshot one times"} +{"filepath": "data/multi_event_train/syn_3218.wav", "onoffCaption": "gunshot at 0.374-2.374 and sneeze at 5.184-7.798", "frequencyCaption": "gunshot one times and sneeze one times"} +{"filepath": "data/multi_event_train/syn_3269.wav", "onoffCaption": "woman laughing at 2.804-5.573", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/multi_event_train/syn_3303.wav", "onoffCaption": "train horn at 0.168-3.408, 4.333-7.573", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_3372.wav", "onoffCaption": "whistling at 2.433-5.408, 7.427-10.0", "frequencyCaption": "whistling two times"} +{"filepath": "data/multi_event_train/syn_3397.wav", "onoffCaption": "train horn at 3.047-8.791 and spraying at 3.625-4.229, 6.445-7.445", "frequencyCaption": "train horn one times and spraying two times"} +{"filepath": "data/multi_event_train/syn_3419.wav", "onoffCaption": "duck quacking at 0.225-2.225 and dog barking at 1.184-3.184, 4.339-6.339", "frequencyCaption": "duck quacking one times and dog barking two times"} +{"filepath": "data/multi_event_train/syn_3502.wav", "onoffCaption": "sneeze at 0.887-3.501, 4.361-6.975 and door slamming at 2.409-3.214, 5.124-7.905", "frequencyCaption": "sneeze two times and door slamming two times"} +{"filepath": "data/multi_event_train/syn_3557.wav", "onoffCaption": "thump thud at 1.144-5.519, 6.289-8.744 and sheep goat bleating at 2.984-6.904, 7.408-9.604", "frequencyCaption": "thump thud two times and sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_3724.wav", "onoffCaption": "duck quacking at 0.417-2.417, 3.645-5.645", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_3755.wav", "onoffCaption": "dog barking at 1.109-3.109, 4.896-6.896, 7.76-9.76 and duck quacking at 3.606-5.606", "frequencyCaption": "dog barking three times and duck quacking one times"} +{"filepath": "data/multi_event_train/syn_3794.wav", "onoffCaption": "whistling at 0.449-3.324, 4.92-7.795", "frequencyCaption": "whistling two times"} +{"filepath": "data/multi_event_train/syn_3806.wav", "onoffCaption": "car horn honking at 2.923-5.27, 6.695-9.042", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_3853.wav", "onoffCaption": "car horn honking at 2.237-6.749, 7.707-10.0", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_3948.wav", "onoffCaption": "cow mooing at 2.363-7.343 and gunshot at 6.261-8.501", "frequencyCaption": "cow mooing one times and gunshot one times"} +{"filepath": "data/multi_event_train/syn_23.wav", "onoffCaption": "sheep goat bleating at 2.024-4.024, 4.681-6.681, 7.699-9.699", "frequencyCaption": "sheep goat bleating three times"} +{"filepath": "data/multi_event_train/syn_76.wav", "onoffCaption": "cow mooing at 0.517-5.497", "frequencyCaption": "cow mooing one times"} +{"filepath": "data/multi_event_train/syn_151.wav", "onoffCaption": "burping belching at 0.578-4.447 and door slamming at 2.144-3.468", "frequencyCaption": "burping belching one times and door slamming one times"} +{"filepath": "data/multi_event_train/syn_223.wav", "onoffCaption": "thump thud at 0.521-3.292 and sneeze at 2.031-3.285", "frequencyCaption": "thump thud one times and sneeze one times"} +{"filepath": "data/multi_event_train/syn_239.wav", "onoffCaption": "sheep goat bleating at 0.693-2.693, 4.197-6.197", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_289.wav", "onoffCaption": "cat meowing at 0.229-1.25, 2.111-3.132, 4.459-5.48 and woman laughing at 2.584-5.684, 7.136-9.504", "frequencyCaption": "cat meowing three times and woman laughing two times"} +{"filepath": "data/multi_event_train/syn_322.wav", "onoffCaption": "door slamming at 1.672-4.551, 7.01-8.401 and thump thud at 2.44-4.779", "frequencyCaption": "door slamming two times and thump thud one times"} +{"filepath": "data/multi_event_train/syn_377.wav", "onoffCaption": "whistling at 2.592-5.567, 6.838-9.813", "frequencyCaption": "whistling two times"} +{"filepath": "data/multi_event_train/syn_392.wav", "onoffCaption": "door slamming at 0.562-2.69, 3.661-5.789, 7.08-9.208", "frequencyCaption": "door slamming three times"} +{"filepath": "data/multi_event_train/syn_507.wav", "onoffCaption": "duck quacking at 2.858-4.858", "frequencyCaption": "duck quacking one times"} +{"filepath": "data/multi_event_train/syn_675.wav", "onoffCaption": "woman laughing at 2.855-5.063, 5.692-8.111", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_690.wav", "onoffCaption": "thump thud at 2.745-4.973, 7.024-9.486", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_721.wav", "onoffCaption": "car horn honking at 0.361-4.761, 7.058-10.0", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_774.wav", "onoffCaption": "burping belching at 2.738-4.832 and cow mooing at 7.019-10.0", "frequencyCaption": "burping belching one times and cow mooing one times"} +{"filepath": "data/multi_event_train/syn_791.wav", "onoffCaption": "train horn at 0.204-2.671, 3.684-6.181 and whistling at 0.286-8.671 and burping belching at 5.965-8.195", "frequencyCaption": "train horn two times and whistling one times and burping belching one times"} +{"filepath": "data/multi_event_train/syn_803.wav", "onoffCaption": "dog barking at 1.213-3.213, 5.389-7.389 and explosion at 3.633-6.505, 7.382-10.0", "frequencyCaption": "dog barking two times and explosion two times"} +{"filepath": "data/multi_event_train/syn_918.wav", "onoffCaption": "door knocking at 1.957-4.084, 6.555-8.682", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_1037.wav", "onoffCaption": "cow mooing at 0.323-3.292, 4.556-7.542", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_1211.wav", "onoffCaption": "train horn at 0.755-4.515, 5.812-9.572", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_1244.wav", "onoffCaption": "gunshot at 0.224-2.698 and woman laughing at 6.812-9.912", "frequencyCaption": "gunshot one times and woman laughing one times"} +{"filepath": "data/multi_event_train/syn_1461.wav", "onoffCaption": "duck quacking at 0.523-2.523, 4.907-6.907 and woman laughing at 1.227-3.421", "frequencyCaption": "duck quacking two times and woman laughing one times"} +{"filepath": "data/multi_event_train/syn_1484.wav", "onoffCaption": "burping belching at 2.002-8.979 and tapping clicking clanking at 3.197-6.637", "frequencyCaption": "burping belching one times and tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_1612.wav", "onoffCaption": "whistling at 2.42-10.0", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_1647.wav", "onoffCaption": "gunshot at 0.059-2.059, 2.889-4.889, 5.928-7.928 and sneeze at 0.303-1.986, 2.839-4.133, 5.618-8.191", "frequencyCaption": "gunshot three times and sneeze three times"} +{"filepath": "data/multi_event_train/syn_1709.wav", "onoffCaption": "dog barking at 3.101-5.101", "frequencyCaption": "dog barking one times"} +{"filepath": "data/multi_event_train/syn_1965.wav", "onoffCaption": "spraying at 3.478-4.329 and thump thud at 6.947-10.0", "frequencyCaption": "spraying one times and thump thud one times"} +{"filepath": "data/multi_event_train/syn_1980.wav", "onoffCaption": "car horn honking at 3.67-6.135, 7.16-9.625", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_3055.wav", "onoffCaption": "thump thud at 0.803-3.85", "frequencyCaption": "thump thud one times"} +{"filepath": "data/multi_event_train/syn_3226.wav", "onoffCaption": "whistling at 2.624-7.108, 7.973-10.0", "frequencyCaption": "whistling two times"} +{"filepath": "data/multi_event_train/syn_3273.wav", "onoffCaption": "cow mooing at 3.464-6.446", "frequencyCaption": "cow mooing one times"} +{"filepath": "data/multi_event_train/syn_3296.wav", "onoffCaption": "sheep goat bleating at 2.22-4.22, 5.571-7.571", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_3327.wav", "onoffCaption": "sheep goat bleating at 0.979-2.979, 5.085-7.085", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_3368.wav", "onoffCaption": "car horn honking at 2.877-6.531, 7.446-9.989", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_3403.wav", "onoffCaption": "door knocking at 2.848-5.472, 7.49-10.0", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_3518.wav", "onoffCaption": "cat meowing at 0.029-1.924, 3.136-5.031", "frequencyCaption": "cat meowing two times"} +{"filepath": "data/multi_event_train/syn_3625.wav", "onoffCaption": "whistling at 1.721-4.696, 5.425-8.4", "frequencyCaption": "whistling two times"} +{"filepath": "data/multi_event_train/syn_3670.wav", "onoffCaption": "whistling at 1.871-9.882", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_3695.wav", "onoffCaption": "explosion at 2.44-5.993 and sheep goat bleating at 4.285-6.285", "frequencyCaption": "explosion one times and sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_3771.wav", "onoffCaption": "explosion at 2.574-5.574, 6.762-9.762", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_3907.wav", "onoffCaption": "train horn at 0.688-2.825 and door slamming at 1.492-2.47 and sneeze at 5.398-7.01, 7.662-9.274", "frequencyCaption": "train horn one times and door slamming one times and sneeze two times"} +{"filepath": "data/multi_event_train/syn_24.wav", "onoffCaption": "door slamming at 0.185-2.098, 3.22-5.133, 7.597-9.51", "frequencyCaption": "door slamming three times"} +{"filepath": "data/multi_event_train/syn_93.wav", "onoffCaption": "woman laughing at 3.523-6.804", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/multi_event_train/syn_104.wav", "onoffCaption": "car horn honking at 1.439-3.786, 5.888-8.792", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_156.wav", "onoffCaption": "cow mooing at 0.403-3.413 and cat meowing at 1.774-6.774", "frequencyCaption": "cow mooing one times and cat meowing one times"} +{"filepath": "data/multi_event_train/syn_449.wav", "onoffCaption": "cat meowing at 0.961-2.377 and car horn honking at 1.892-5.067", "frequencyCaption": "cat meowing one times and car horn honking one times"} +{"filepath": "data/multi_event_train/syn_500.wav", "onoffCaption": "train horn at 0.129-3.489, 5.32-7.76", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_552.wav", "onoffCaption": "duck quacking at 1.62-3.62, 4.619-6.619 and cow mooing at 1.955-4.965, 5.474-8.484", "frequencyCaption": "duck quacking two times and cow mooing two times"} +{"filepath": "data/multi_event_train/syn_668.wav", "onoffCaption": "door knocking at 0.117-3.617", "frequencyCaption": "door knocking one times"} +{"filepath": "data/multi_event_train/syn_773.wav", "onoffCaption": "woman laughing at 2.284-6.336", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/multi_event_train/syn_796.wav", "onoffCaption": "door slamming at 0.302-1.475, 2.628-3.801, 5.758-6.931 and car horn honking at 2.659-7.566", "frequencyCaption": "door slamming three times and car horn honking one times"} +{"filepath": "data/multi_event_train/syn_804.wav", "onoffCaption": "sneeze at 1.835-4.91, 5.489-8.564", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_856.wav", "onoffCaption": "cow mooing at 0.081-5.061 and explosion at 1.242-3.833, 4.45-6.514, 7.335-10.0", "frequencyCaption": "cow mooing one times and explosion three times"} +{"filepath": "data/multi_event_train/syn_1030.wav", "onoffCaption": "cat meowing at 0.829-1.84, 3.044-4.055, 4.829-5.84 and gunshot at 2.193-4.193, 4.87-6.87, 7.81-9.81", "frequencyCaption": "cat meowing three times and gunshot three times"} +{"filepath": "data/multi_event_train/syn_1062.wav", "onoffCaption": "whistling at 0.568-8.579 and train horn at 1.664-6.566, 7.957-10.0", "frequencyCaption": "whistling one times and train horn two times"} +{"filepath": "data/multi_event_train/syn_1087.wav", "onoffCaption": "train horn at 2.12-5.52, 6.427-9.827 and thump thud at 2.993-6.911", "frequencyCaption": "train horn two times and thump thud one times"} +{"filepath": "data/multi_event_train/syn_1179.wav", "onoffCaption": "thump thud at 0.225-2.725, 4.517-7.017 and train horn at 2.293-5.653, 6.302-9.662", "frequencyCaption": "thump thud two times and train horn two times"} +{"filepath": "data/multi_event_train/syn_1434.wav", "onoffCaption": "cat meowing at 2.858-7.858", "frequencyCaption": "cat meowing one times"} +{"filepath": "data/multi_event_train/syn_1466.wav", "onoffCaption": "train horn at 0.155-10.0", "frequencyCaption": "train horn one times"} +{"filepath": "data/multi_event_train/syn_1483.wav", "onoffCaption": "sneeze at 1.223-2.906 and sheep goat bleating at 1.495-3.495 and dog barking at 2.465-4.465", "frequencyCaption": "sneeze one times and sheep goat bleating one times and dog barking one times"} +{"filepath": "data/multi_event_train/syn_1598.wav", "onoffCaption": "door slamming at 0.59-1.707, 3.721-5.045", "frequencyCaption": "door slamming two times"} +{"filepath": "data/multi_event_train/syn_1879.wav", "onoffCaption": "woman laughing at 0.565-8.01 and gunshot at 2.025-4.025", "frequencyCaption": "woman laughing one times and gunshot one times"} +{"filepath": "data/multi_event_train/syn_1930.wav", "onoffCaption": "train horn at 0.044-4.484 and thump thud at 0.511-2.85, 4.648-6.987 and sneeze at 3.344-7.4", "frequencyCaption": "train horn one times and thump thud two times and sneeze one times"} +{"filepath": "data/multi_event_train/syn_1962.wav", "onoffCaption": "explosion at 2.685-5.685, 6.915-9.787", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_1987.wav", "onoffCaption": "burping belching at 0.826-6.427, 6.931-9.038 and dog barking at 1.463-3.463, 4.507-6.507 and thump thud at 4.019-6.519", "frequencyCaption": "burping belching two times and dog barking two times and thump thud one times"} +{"filepath": "data/multi_event_train/syn_3000.wav", "onoffCaption": "sneeze at 2.175-4.421, 6.906-9.152", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_3052.wav", "onoffCaption": "duck quacking at 3.237-5.237", "frequencyCaption": "duck quacking one times"} +{"filepath": "data/multi_event_train/syn_3149.wav", "onoffCaption": "whistling at 0.327-8.712", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_3221.wav", "onoffCaption": "whistling at 0.692-10.0", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_3404.wav", "onoffCaption": "door knocking at 1.875-6.314", "frequencyCaption": "door knocking one times"} +{"filepath": "data/multi_event_train/syn_3456.wav", "onoffCaption": "explosion at 0.557-3.557, 4.707-7.707 and gunshot at 0.902-2.902, 3.819-6.059, 7.052-9.052", "frequencyCaption": "explosion two times and gunshot three times"} +{"filepath": "data/multi_event_train/syn_3677.wav", "onoffCaption": "woman laughing at 2.965-5.063", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/multi_event_train/syn_3849.wav", "onoffCaption": "car horn honking at 0.921-5.828, 6.938-10.0", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_3900.wav", "onoffCaption": "gunshot at 0.379-2.379, 3.498-5.498, 6.33-8.33", "frequencyCaption": "gunshot three times"} +{"filepath": "data/multi_event_train/syn_3952.wav", "onoffCaption": "tapping clicking clanking at 0.053-3.493, 5.295-8.735 and door slamming at 3.514-4.687, 5.589-6.762, 7.398-8.571", "frequencyCaption": "tapping clicking clanking two times and door slamming three times"} +{"filepath": "data/multi_event_train/syn_71.wav", "onoffCaption": "car horn honking at 0.054-2.054, 2.567-5.48, 6.163-8.547 and dog barking at 1.396-3.396", "frequencyCaption": "car horn honking three times and dog barking one times"} +{"filepath": "data/multi_event_train/syn_94.wav", "onoffCaption": "whistling at 0.664-2.673 and dog barking at 5.547-7.547", "frequencyCaption": "whistling one times and dog barking one times"} +{"filepath": "data/multi_event_train/syn_103.wav", "onoffCaption": "train horn at 3.383-6.917, 7.644-10.0", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_119.wav", "onoffCaption": "explosion at 1.17-6.17", "frequencyCaption": "explosion one times"} +{"filepath": "data/multi_event_train/syn_224.wav", "onoffCaption": "explosion at 0.69-3.443, 4.949-7.678", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_271.wav", "onoffCaption": "sheep goat bleating at 0.252-2.252, 3.168-5.863, 7.483-9.483", "frequencyCaption": "sheep goat bleating three times"} +{"filepath": "data/multi_event_train/syn_294.wav", "onoffCaption": "dog barking at 4.009-6.009, 6.713-8.713", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_train/syn_325.wav", "onoffCaption": "train horn at 2.454-4.934", "frequencyCaption": "train horn one times"} +{"filepath": "data/multi_event_train/syn_370.wav", "onoffCaption": "woman laughing at 0.539-3.611", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/multi_event_train/syn_395.wav", "onoffCaption": "explosion at 0.154-5.154, 6.077-10.0", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_454.wav", "onoffCaption": "cat meowing at 0.659-1.744, 2.58-3.665 and car horn honking at 6.169-10.0", "frequencyCaption": "cat meowing two times and car horn honking one times"} +{"filepath": "data/multi_event_train/syn_555.wav", "onoffCaption": "train horn at 2.939-6.139, 7.128-9.199", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_627.wav", "onoffCaption": "sneeze at 0.592-2.299, 3.698-5.783", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_726.wav", "onoffCaption": "sneeze at 1.533-3.061, 4.94-6.468", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_769.wav", "onoffCaption": "burping belching at 3.011-6.19, 7.094-10.0", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_851.wav", "onoffCaption": "duck quacking at 0.562-2.562, 4.942-6.942", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_950.wav", "onoffCaption": "sheep goat bleating at 0.278-2.278, 3.82-6.577", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_1065.wav", "onoffCaption": "burping belching at 0.199-2.293, 3.025-5.119, 6.426-8.52", "frequencyCaption": "burping belching three times"} +{"filepath": "data/multi_event_train/syn_1080.wav", "onoffCaption": "woman laughing at 2.234-8.968", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/multi_event_train/syn_1164.wav", "onoffCaption": "door knocking at 2.967-5.319, 6.283-8.635", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_1181.wav", "onoffCaption": "cat meowing at 0.605-2.793, 3.637-5.597, 7.0-8.011", "frequencyCaption": "cat meowing three times"} +{"filepath": "data/multi_event_train/syn_1216.wav", "onoffCaption": "whistling at 0.333-7.988", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_1243.wav", "onoffCaption": "cat meowing at 0.456-1.596 and sheep goat bleating at 0.866-2.866 and sneeze at 1.641-3.186, 5.174-7.413, 8.442-9.534", "frequencyCaption": "cat meowing one times and sheep goat bleating one times and sneeze three times"} +{"filepath": "data/multi_event_train/syn_1259.wav", "onoffCaption": "explosion at 0.006-2.094, 3.253-5.341", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_1317.wav", "onoffCaption": "door knocking at 0.43-3.539 and duck quacking at 2.429-4.429", "frequencyCaption": "door knocking one times and duck quacking one times"} +{"filepath": "data/multi_event_train/syn_1342.wav", "onoffCaption": "train horn at 2.787-6.147, 7.196-10.0", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_1358.wav", "onoffCaption": "burping belching at 1.915-3.95, 4.703-6.738", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_1429.wav", "onoffCaption": "sneeze at 2.999-4.091, 5.749-7.988 and door slamming at 5.615-8.332", "frequencyCaption": "sneeze two times and door slamming one times"} +{"filepath": "data/multi_event_train/syn_1433.wav", "onoffCaption": "thump thud at 2.715-6.382, 7.011-9.511", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_1528.wav", "onoffCaption": "car horn honking at 3.34-5.84, 6.745-9.245", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_1532.wav", "onoffCaption": "door slamming at 0.251-1.089, 2.484-3.322, 5.322-6.16", "frequencyCaption": "door slamming three times"} +{"filepath": "data/multi_event_train/syn_1615.wav", "onoffCaption": "car horn honking at 3.507-7.907", "frequencyCaption": "car horn honking one times"} +{"filepath": "data/multi_event_train/syn_1640.wav", "onoffCaption": "thump thud at 2.133-4.633", "frequencyCaption": "thump thud one times"} +{"filepath": "data/multi_event_train/syn_1714.wav", "onoffCaption": "cow mooing at 2.052-7.032 and cat meowing at 3.252-6.493 and burping belching at 3.449-5.48, 6.955-9.955", "frequencyCaption": "cow mooing one times and cat meowing one times and burping belching two times"} +{"filepath": "data/multi_event_train/syn_1741.wav", "onoffCaption": "cat meowing at 3.699-4.708, 7.085-8.094, 8.677-9.686", "frequencyCaption": "cat meowing three times"} +{"filepath": "data/multi_event_train/syn_1836.wav", "onoffCaption": "whistling at 1.123-3.998, 6.213-9.088", "frequencyCaption": "whistling two times"} +{"filepath": "data/multi_event_train/syn_1937.wav", "onoffCaption": "explosion at 0.473-5.473, 7.065-10.0", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_3007.wav", "onoffCaption": "spraying at 3.438-4.379, 5.961-8.545", "frequencyCaption": "spraying two times"} +{"filepath": "data/multi_event_train/syn_3106.wav", "onoffCaption": "thump thud at 0.143-4.518 and burping belching at 7.45-9.485", "frequencyCaption": "thump thud one times and burping belching one times"} +{"filepath": "data/multi_event_train/syn_3274.wav", "onoffCaption": "sheep goat bleating at 2.531-4.531, 5.953-8.314", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_3291.wav", "onoffCaption": "cat meowing at 2.873-4.768, 5.368-7.308, 8.311-9.866", "frequencyCaption": "cat meowing three times"} +{"filepath": "data/multi_event_train/syn_3375.wav", "onoffCaption": "gunshot at 1.097-3.097, 3.963-5.963, 7.832-9.832 and sneeze at 3.728-5.016", "frequencyCaption": "gunshot three times and sneeze one times"} +{"filepath": "data/multi_event_train/syn_3390.wav", "onoffCaption": "train horn at 1.323-4.643", "frequencyCaption": "train horn one times"} +{"filepath": "data/multi_event_train/syn_3451.wav", "onoffCaption": "door knocking at 1.209-3.299 and spraying at 3.065-3.916, 5.992-6.896", "frequencyCaption": "door knocking one times and spraying two times"} +{"filepath": "data/multi_event_train/syn_3550.wav", "onoffCaption": "car horn honking at 1.828-4.341", "frequencyCaption": "car horn honking one times"} +{"filepath": "data/multi_event_train/syn_3622.wav", "onoffCaption": "door knocking at 3.698-8.137", "frequencyCaption": "door knocking one times"} +{"filepath": "data/multi_event_train/syn_3638.wav", "onoffCaption": "dog barking at 3.178-5.178, 7.122-9.122", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_train/syn_3688.wav", "onoffCaption": "explosion at 0.51-2.512, 3.826-6.694, 7.756-10.0 and dog barking at 1.669-3.669, 6.087-8.087 and tapping clicking clanking at 4.081-7.521", "frequencyCaption": "explosion three times and dog barking two times and tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_3692.wav", "onoffCaption": "cat meowing at 0.796-2.691, 3.33-5.225", "frequencyCaption": "cat meowing two times"} +{"filepath": "data/multi_event_train/syn_3723.wav", "onoffCaption": "door knocking at 0.865-4.633, 5.792-9.56", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_3739.wav", "onoffCaption": "explosion at 1.129-4.969, 5.71-8.318 and sneeze at 5.171-7.488, 8.553-9.656", "frequencyCaption": "explosion two times and sneeze two times"} +{"filepath": "data/multi_event_train/syn_3789.wav", "onoffCaption": "train horn at 1.777-5.977, 7.19-10.0 and whistling at 3.008-8.183", "frequencyCaption": "train horn two times and whistling one times"} +{"filepath": "data/multi_event_train/syn_3793.wav", "onoffCaption": "woman laughing at 0.114-2.397, 4.314-6.412", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_3854.wav", "onoffCaption": "sheep goat bleating at 0.091-2.091", "frequencyCaption": "sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_3955.wav", "onoffCaption": "burping belching at 0.1-2.303 and sneeze at 0.938-2.226, 2.785-4.073, 5.144-6.432 and spraying at 2.713-3.235", "frequencyCaption": "burping belching one times and sneeze three times and spraying one times"} +{"filepath": "data/multi_event_train/syn_255.wav", "onoffCaption": "duck quacking at 2.246-4.246 and car horn honking at 6.104-8.104", "frequencyCaption": "duck quacking one times and car horn honking one times"} +{"filepath": "data/multi_event_train/syn_401.wav", "onoffCaption": "explosion at 1.188-4.188, 6.166-8.234 and door slamming at 6.603-7.284", "frequencyCaption": "explosion two times and door slamming one times"} +{"filepath": "data/multi_event_train/syn_495.wav", "onoffCaption": "tapping clicking clanking at 1.277-4.717 and spraying at 6.204-7.268, 9.239-9.814", "frequencyCaption": "tapping clicking clanking one times and spraying two times"} +{"filepath": "data/multi_event_train/syn_603.wav", "onoffCaption": "door slamming at 0.31-1.21", "frequencyCaption": "door slamming one times"} +{"filepath": "data/multi_event_train/syn_672.wav", "onoffCaption": "sheep goat bleating at 2.385-5.705, 7.194-9.194", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_697.wav", "onoffCaption": "car horn honking at 2.257-5.17, 6.998-9.911", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_718.wav", "onoffCaption": "explosion at 0.272-3.272, 3.864-6.864", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_905.wav", "onoffCaption": "dog barking at 0.109-2.109, 2.928-4.928, 6.585-8.585", "frequencyCaption": "dog barking three times"} +{"filepath": "data/multi_event_train/syn_974.wav", "onoffCaption": "cow mooing at 0.23-3.24, 3.796-6.806", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_1131.wav", "onoffCaption": "thump thud at 3.383-7.301", "frequencyCaption": "thump thud one times"} +{"filepath": "data/multi_event_train/syn_1228.wav", "onoffCaption": "spraying at 2.431-3.181, 4.888-5.829, 7.392-9.784", "frequencyCaption": "spraying three times"} +{"filepath": "data/multi_event_train/syn_1333.wav", "onoffCaption": "tapping clicking clanking at 0.156-3.596", "frequencyCaption": "tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_1499.wav", "onoffCaption": "door knocking at 0.679-2.9, 4.216-6.437, 7.091-9.312", "frequencyCaption": "door knocking three times"} +{"filepath": "data/multi_event_train/syn_1567.wav", "onoffCaption": "spraying at 1.396-2.653", "frequencyCaption": "spraying one times"} +{"filepath": "data/multi_event_train/syn_1582.wav", "onoffCaption": "tapping clicking clanking at 0.881-4.321, 5.587-9.027 and cow mooing at 0.881-5.861", "frequencyCaption": "tapping clicking clanking two times and cow mooing one times"} +{"filepath": "data/multi_event_train/syn_1765.wav", "onoffCaption": "gunshot at 3.529-5.529, 7.826-9.826", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_1780.wav", "onoffCaption": "whistling at 0.548-3.523", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_1863.wav", "onoffCaption": "dog barking at 0.423-2.423 and whistling at 6.542-10.0", "frequencyCaption": "dog barking one times and whistling one times"} +{"filepath": "data/multi_event_train/syn_1886.wav", "onoffCaption": "duck quacking at 3.169-5.169 and train horn at 7.932-10.0", "frequencyCaption": "duck quacking one times and train horn one times"} +{"filepath": "data/multi_event_train/syn_1909.wav", "onoffCaption": "gunshot at 3.317-5.317, 7.169-9.169", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_1978.wav", "onoffCaption": "door knocking at 1.1-5.61 and gunshot at 3.2-5.2, 6.765-9.271", "frequencyCaption": "door knocking one times and gunshot two times"} +{"filepath": "data/multi_event_train/syn_3039.wav", "onoffCaption": "thump thud at 0.21-2.71, 3.351-5.851, 6.52-9.02", "frequencyCaption": "thump thud three times"} +{"filepath": "data/multi_event_train/syn_3048.wav", "onoffCaption": "tapping clicking clanking at 2.359-5.799, 7.591-9.639", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_3122.wav", "onoffCaption": "door knocking at 0.12-3.24 and duck quacking at 0.249-2.249, 2.872-4.872 and cow mooing at 0.975-3.944", "frequencyCaption": "door knocking one times and duck quacking two times and cow mooing one times"} +{"filepath": "data/multi_event_train/syn_3153.wav", "onoffCaption": "spraying at 2.568-3.3, 4.971-5.703", "frequencyCaption": "spraying two times"} +{"filepath": "data/multi_event_train/syn_3320.wav", "onoffCaption": "door slamming at 0.339-3.12, 5.228-6.603 and car horn honking at 1.221-6.128", "frequencyCaption": "door slamming two times and car horn honking one times"} +{"filepath": "data/multi_event_train/syn_3351.wav", "onoffCaption": "whistling at 0.082-2.091, 2.634-4.643, 5.173-7.182", "frequencyCaption": "whistling three times"} +{"filepath": "data/multi_event_train/syn_3505.wav", "onoffCaption": "gunshot at 0.828-2.828, 3.51-5.551, 6.26-8.26", "frequencyCaption": "gunshot three times"} +{"filepath": "data/multi_event_train/syn_3574.wav", "onoffCaption": "dog barking at 0.732-2.732, 3.583-5.583, 6.4-8.4", "frequencyCaption": "dog barking three times"} +{"filepath": "data/multi_event_train/syn_3707.wav", "onoffCaption": "thump thud at 2.729-5.776, 7.199-9.97", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_3776.wav", "onoffCaption": "sneeze at 0.514-3.174, 3.942-6.345, 7.027-8.527 and duck quacking at 3.844-5.844, 6.531-8.531", "frequencyCaption": "sneeze three times and duck quacking two times"} +{"filepath": "data/multi_event_train/syn_3801.wav", "onoffCaption": "woman laughing at 0.433-3.72, 4.347-7.634", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_3895.wav", "onoffCaption": "tapping clicking clanking at 0.957-4.397, 6.045-8.62", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_168.wav", "onoffCaption": "gunshot at 0.302-2.302, 3.007-5.007, 5.65-7.65 and sneeze at 1.729-4.804, 5.308-7.304", "frequencyCaption": "gunshot three times and sneeze two times"} +{"filepath": "data/multi_event_train/syn_200.wav", "onoffCaption": "thump thud at 0.027-4.402, 5.833-10.0", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_425.wav", "onoffCaption": "whistling at 0.103-7.853", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_470.wav", "onoffCaption": "dog barking at 0.337-2.337 and cat meowing at 1.04-2.051, 2.899-4.443, 4.964-6.15 and gunshot at 2.741-4.741", "frequencyCaption": "dog barking one times and cat meowing three times and gunshot one times"} +{"filepath": "data/multi_event_train/syn_656.wav", "onoffCaption": "explosion at 2.865-7.865", "frequencyCaption": "explosion one times"} +{"filepath": "data/multi_event_train/syn_820.wav", "onoffCaption": "burping belching at 2.113-7.714", "frequencyCaption": "burping belching one times"} +{"filepath": "data/multi_event_train/syn_921.wav", "onoffCaption": "tapping clicking clanking at 3.542-6.982", "frequencyCaption": "tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_991.wav", "onoffCaption": "gunshot at 3.601-5.642, 7.694-9.735", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_1115.wav", "onoffCaption": "tapping clicking clanking at 1.078-4.518 and cat meowing at 2.764-4.381, 6.623-8.24", "frequencyCaption": "tapping clicking clanking one times and cat meowing two times"} +{"filepath": "data/multi_event_train/syn_1140.wav", "onoffCaption": "car horn honking at 0.279-3.192, 5.549-8.462", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_1298.wav", "onoffCaption": "cat meowing at 1.184-2.459, 3.186-4.461", "frequencyCaption": "cat meowing two times"} +{"filepath": "data/multi_event_train/syn_1366.wav", "onoffCaption": "door slamming at 0.202-1.717, 2.995-4.51, 5.792-7.307 and spraying at 1.267-2.524 and woman laughing at 1.332-3.526, 5.309-7.503", "frequencyCaption": "door slamming three times and spraying one times and woman laughing two times"} +{"filepath": "data/multi_event_train/syn_1383.wav", "onoffCaption": "cow mooing at 2.663-5.961", "frequencyCaption": "cow mooing one times"} +{"filepath": "data/multi_event_train/syn_1458.wav", "onoffCaption": "door knocking at 2.411-6.85", "frequencyCaption": "door knocking one times"} +{"filepath": "data/multi_event_train/syn_1516.wav", "onoffCaption": "sneeze at 2.129-4.442, 6.085-8.893", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_1543.wav", "onoffCaption": "car horn honking at 1.994-4.341, 6.759-9.106", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_1730.wav", "onoffCaption": "door slamming at 0.66-1.913, 3.512-5.512 and spraying at 7.065-7.666", "frequencyCaption": "door slamming two times and spraying one times"} +{"filepath": "data/multi_event_train/syn_1812.wav", "onoffCaption": "sneeze at 0.01-2.134, 4.32-6.444", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_1847.wav", "onoffCaption": "train horn at 2.659-6.419", "frequencyCaption": "train horn one times"} +{"filepath": "data/multi_event_train/syn_3076.wav", "onoffCaption": "cow mooing at 0.452-5.432, 7.721-10.0", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_3089.wav", "onoffCaption": "explosion at 2.8-5.4, 7.474-10.0", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_3177.wav", "onoffCaption": "thump thud at 1.086-4.133", "frequencyCaption": "thump thud one times"} +{"filepath": "data/multi_event_train/syn_3192.wav", "onoffCaption": "woman laughing at 3.884-6.239, 6.838-9.193", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_3304.wav", "onoffCaption": "train horn at 2.616-6.816, 7.46-9.94", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_3420.wav", "onoffCaption": "door knocking at 0.306-3.153, 5.11-7.174 and car horn honking at 4.113-7.608", "frequencyCaption": "door knocking two times and car horn honking one times"} +{"filepath": "data/multi_event_train/syn_3521.wav", "onoffCaption": "tapping clicking clanking at 1.082-4.522, 5.042-7.335", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_3591.wav", "onoffCaption": "door knocking at 2.49-5.546 and car horn honking at 7.149-10.0", "frequencyCaption": "door knocking one times and car horn honking one times"} +{"filepath": "data/multi_event_train/syn_3649.wav", "onoffCaption": "dog barking at 0.503-3.823, 4.345-7.665", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_train/syn_3752.wav", "onoffCaption": "sheep goat bleating at 1.849-3.849, 6.339-8.339 and duck quacking at 2.367-4.367, 5.96-7.96", "frequencyCaption": "sheep goat bleating two times and duck quacking two times"} +{"filepath": "data/multi_event_train/syn_3825.wav", "onoffCaption": "tapping clicking clanking at 0.088-3.528, 4.234-7.674 and sneeze at 0.459-2.455, 3.231-5.593, 6.22-7.394", "frequencyCaption": "tapping clicking clanking two times and sneeze three times"} +{"filepath": "data/multi_event_train/syn_3870.wav", "onoffCaption": "dog barking at 0.159-2.159 and burping belching at 3.641-10.0", "frequencyCaption": "dog barking one times and burping belching one times"} +{"filepath": "data/multi_event_train/syn_55.wav", "onoffCaption": "car horn honking at 0.516-3.302, 4.306-7.092", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_127.wav", "onoffCaption": "explosion at 0.637-2.701, 3.39-5.454, 6.295-8.359", "frequencyCaption": "explosion three times"} +{"filepath": "data/multi_event_train/syn_172.wav", "onoffCaption": "woman laughing at 0.259-2.678, 3.762-6.181, 7.293-9.712", "frequencyCaption": "woman laughing three times"} +{"filepath": "data/multi_event_train/syn_197.wav", "onoffCaption": "sheep goat bleating at 1.284-3.284, 3.836-5.836, 7.193-9.193", "frequencyCaption": "sheep goat bleating three times"} +{"filepath": "data/multi_event_train/syn_301.wav", "onoffCaption": "door knocking at 3.503-6.57", "frequencyCaption": "door knocking one times"} +{"filepath": "data/multi_event_train/syn_354.wav", "onoffCaption": "explosion at 2.583-6.136, 6.737-10.0", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_524.wav", "onoffCaption": "duck quacking at 0.427-2.427, 3.177-5.177, 6.113-8.113 and burping belching at 2.667-5.667, 6.53-9.53", "frequencyCaption": "duck quacking three times and burping belching two times"} +{"filepath": "data/multi_event_train/syn_571.wav", "onoffCaption": "cow mooing at 0.128-3.138, 4.344-7.354 and dog barking at 4.593-6.593", "frequencyCaption": "cow mooing two times and dog barking one times"} +{"filepath": "data/multi_event_train/syn_594.wav", "onoffCaption": "cat meowing at 2.648-3.66, 4.641-5.653", "frequencyCaption": "cat meowing two times"} +{"filepath": "data/multi_event_train/syn_619.wav", "onoffCaption": "whistling at 0.822-7.241", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_702.wav", "onoffCaption": "duck quacking at 0.54-2.54", "frequencyCaption": "duck quacking one times"} +{"filepath": "data/multi_event_train/syn_757.wav", "onoffCaption": "cow mooing at 0.723-3.705, 4.306-6.958 and train horn at 1.18-7.649", "frequencyCaption": "cow mooing two times and train horn one times"} +{"filepath": "data/multi_event_train/syn_875.wav", "onoffCaption": "duck quacking at 1.336-3.336, 5.54-7.54 and car horn honking at 1.58-6.487, 7.594-9.594 and gunshot at 2.079-4.079, 5.251-7.251", "frequencyCaption": "duck quacking two times and car horn honking two times and gunshot two times"} +{"filepath": "data/multi_event_train/syn_890.wav", "onoffCaption": "sneeze at 3.586-5.505, 6.712-8.631", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_1014.wav", "onoffCaption": "train horn at 0.202-2.357 and spraying at 6.382-6.882", "frequencyCaption": "train horn one times and spraying one times"} +{"filepath": "data/multi_event_train/syn_1041.wav", "onoffCaption": "cat meowing at 0.344-1.353 and sheep goat bleating at 4.842-6.842, 7.989-9.989", "frequencyCaption": "cat meowing one times and sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_1232.wav", "onoffCaption": "explosion at 0.237-5.237", "frequencyCaption": "explosion one times"} +{"filepath": "data/multi_event_train/syn_1267.wav", "onoffCaption": "car horn honking at 0.041-3.882, 5.425-7.938 and spraying at 2.028-2.895", "frequencyCaption": "car horn honking two times and spraying one times"} +{"filepath": "data/multi_event_train/syn_1282.wav", "onoffCaption": "gunshot at 1.822-3.822 and cow mooing at 2.568-7.548", "frequencyCaption": "gunshot one times and cow mooing one times"} +{"filepath": "data/multi_event_train/syn_1329.wav", "onoffCaption": "door knocking at 0.595-3.651, 4.593-6.72", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_1399.wav", "onoffCaption": "sneeze at 0.117-3.765, 4.445-5.537 and explosion at 0.46-3.46 and car horn honking at 1.044-3.044", "frequencyCaption": "sneeze two times and explosion one times and car horn honking one times"} +{"filepath": "data/multi_event_train/syn_1417.wav", "onoffCaption": "sheep goat bleating at 1.462-3.462, 4.727-6.727 and tapping clicking clanking at 2.217-5.657, 7.32-10.0", "frequencyCaption": "sheep goat bleating two times and tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_1442.wav", "onoffCaption": "tapping clicking clanking at 0.277-3.717, 5.707-9.147", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_1559.wav", "onoffCaption": "thump thud at 0.397-4.064, 5.612-9.279", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_1631.wav", "onoffCaption": "car horn honking at 0.189-2.702", "frequencyCaption": "car horn honking one times"} +{"filepath": "data/multi_event_train/syn_1664.wav", "onoffCaption": "train horn at 0.002-4.07", "frequencyCaption": "train horn one times"} +{"filepath": "data/multi_event_train/syn_1808.wav", "onoffCaption": "woman laughing at 2.822-5.876, 6.644-9.698", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_1913.wav", "onoffCaption": "sneeze at 1.993-3.239, 4.458-6.571, 7.242-9.645", "frequencyCaption": "sneeze three times"} +{"filepath": "data/multi_event_train/syn_1946.wav", "onoffCaption": "burping belching at 2.326-6.662 and door slamming at 4.842-5.82", "frequencyCaption": "burping belching one times and door slamming one times"} +{"filepath": "data/multi_event_train/syn_3023.wav", "onoffCaption": "door slamming at 0.184-2.312, 3.655-5.783, 6.912-9.04", "frequencyCaption": "door slamming three times"} +{"filepath": "data/multi_event_train/syn_3093.wav", "onoffCaption": "woman laughing at 0.977-3.569", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/multi_event_train/syn_3138.wav", "onoffCaption": "train horn at 2.088-7.165 and explosion at 2.61-5.61 and door slamming at 3.402-4.655", "frequencyCaption": "train horn one times and explosion one times and door slamming one times"} +{"filepath": "data/multi_event_train/syn_3188.wav", "onoffCaption": "sneeze at 0.033-1.321, 2.162-4.776, 5.79-7.402", "frequencyCaption": "sneeze three times"} +{"filepath": "data/multi_event_train/syn_3205.wav", "onoffCaption": "thump thud at 1.166-4.213, 4.916-7.242 and gunshot at 6.167-8.167", "frequencyCaption": "thump thud two times and gunshot one times"} +{"filepath": "data/multi_event_train/syn_3250.wav", "onoffCaption": "duck quacking at 0.323-2.323, 4.398-6.398", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_3475.wav", "onoffCaption": "woman laughing at 0.214-2.806, 3.565-5.773", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_3490.wav", "onoffCaption": "spraying at 0.003-0.784, 2.076-4.204, 5.949-6.457", "frequencyCaption": "spraying three times"} +{"filepath": "data/multi_event_train/syn_3606.wav", "onoffCaption": "cow mooing at 0.694-3.676, 5.839-8.821", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_3653.wav", "onoffCaption": "thump thud at 0.056-3.974, 5.313-9.231", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_3748.wav", "onoffCaption": "door slamming at 0.261-1.74", "frequencyCaption": "door slamming one times"} +{"filepath": "data/multi_event_train/syn_3924.wav", "onoffCaption": "woman laughing at 0.478-3.532, 5.351-8.05", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_3971.wav", "onoffCaption": "sheep goat bleating at 1.131-3.131 and thump thud at 4.915-9.365 and dog barking at 6.582-8.582", "frequencyCaption": "sheep goat bleating one times and thump thud one times and dog barking one times"} +{"filepath": "data/multi_event_train/syn_3994.wav", "onoffCaption": "burping belching at 1.116-3.739, 5.597-8.418", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_14.wav", "onoffCaption": "gunshot at 1.876-3.876, 5.278-7.278 and explosion at 2.328-5.057", "frequencyCaption": "gunshot two times and explosion one times"} +{"filepath": "data/multi_event_train/syn_133.wav", "onoffCaption": "cow mooing at 1.851-6.831", "frequencyCaption": "cow mooing one times"} +{"filepath": "data/multi_event_train/syn_166.wav", "onoffCaption": "car horn honking at 1.533-3.88, 6.373-8.72 and tapping clicking clanking at 3.047-6.487", "frequencyCaption": "car horn honking two times and tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_183.wav", "onoffCaption": "spraying at 0.373-1.314, 2.792-3.733 and cat meowing at 2.599-3.611", "frequencyCaption": "spraying two times and cat meowing one times"} +{"filepath": "data/multi_event_train/syn_315.wav", "onoffCaption": "door slamming at 0.256-1.489, 2.931-4.164", "frequencyCaption": "door slamming two times"} +{"filepath": "data/multi_event_train/syn_340.wav", "onoffCaption": "door slamming at 2.894-5.323 and cow mooing at 7.457-10.0", "frequencyCaption": "door slamming one times and cow mooing one times"} +{"filepath": "data/multi_event_train/syn_530.wav", "onoffCaption": "whistling at 0.753-9.138", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_565.wav", "onoffCaption": "cow mooing at 0.66-3.629 and woman laughing at 7.388-10.0", "frequencyCaption": "cow mooing one times and woman laughing one times"} +{"filepath": "data/multi_event_train/syn_580.wav", "onoffCaption": "door slamming at 3.578-6.578, 7.636-9.832", "frequencyCaption": "door slamming two times"} +{"filepath": "data/multi_event_train/syn_658.wav", "onoffCaption": "explosion at 2.357-7.357", "frequencyCaption": "explosion one times"} +{"filepath": "data/multi_event_train/syn_716.wav", "onoffCaption": "dog barking at 0.108-2.108, 2.668-4.668, 5.421-7.421 and gunshot at 1.186-3.186, 5.192-7.192 and door knocking at 4.129-6.193", "frequencyCaption": "dog barking three times and gunshot two times and door knocking one times"} +{"filepath": "data/multi_event_train/syn_743.wav", "onoffCaption": "cow mooing at 0.853-3.835, 5.646-8.205", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_834.wav", "onoffCaption": "duck quacking at 2.186-4.186, 5.978-7.978", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_861.wav", "onoffCaption": "thump thud at 2.521-6.439, 7.515-10.0", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_1000.wav", "onoffCaption": "gunshot at 2.613-4.613, 6.416-8.416 and door knocking at 3.178-5.399, 6.317-8.686", "frequencyCaption": "gunshot two times and door knocking two times"} +{"filepath": "data/multi_event_train/syn_1055.wav", "onoffCaption": "door slamming at 1.103-2.22, 4.078-5.195, 7.409-8.526", "frequencyCaption": "door slamming three times"} +{"filepath": "data/multi_event_train/syn_1226.wav", "onoffCaption": "dog barking at 1.16-3.16", "frequencyCaption": "dog barking one times"} +{"filepath": "data/multi_event_train/syn_1273.wav", "onoffCaption": "whistling at 0.636-2.865", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_1296.wav", "onoffCaption": "whistling at 0.45-2.679", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_1368.wav", "onoffCaption": "door knocking at 2.254-4.656, 6.714-8.841", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_1456.wav", "onoffCaption": "tapping clicking clanking at 1.369-4.809", "frequencyCaption": "tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_1518.wav", "onoffCaption": "explosion at 0.098-5.098 and tapping clicking clanking at 0.832-4.272", "frequencyCaption": "explosion one times and tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_1625.wav", "onoffCaption": "cat meowing at 0.696-2.446, 3.021-4.771, 5.412-7.162", "frequencyCaption": "cat meowing three times"} +{"filepath": "data/multi_event_train/syn_1670.wav", "onoffCaption": "burping belching at 2.796-4.919, 5.517-7.64", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_1681.wav", "onoffCaption": "cow mooing at 1.387-4.356, 6.831-9.813", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_1695.wav", "onoffCaption": "gunshot at 0.724-2.724, 4.717-6.717 and duck quacking at 1.185-3.185, 4.117-6.117, 7.028-9.028", "frequencyCaption": "gunshot two times and duck quacking three times"} +{"filepath": "data/multi_event_train/syn_1849.wav", "onoffCaption": "thump thud at 0.642-5.092, 5.855-7.909", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_1907.wav", "onoffCaption": "explosion at 1.151-6.151", "frequencyCaption": "explosion one times"} +{"filepath": "data/multi_event_train/syn_1952.wav", "onoffCaption": "door knocking at 0.422-3.975 and duck quacking at 1.2-3.2 and car horn honking at 6.424-9.242", "frequencyCaption": "door knocking one times and duck quacking one times and car horn honking one times"} +{"filepath": "data/multi_event_train/syn_3037.wav", "onoffCaption": "duck quacking at 0.241-2.241, 3.858-5.858 and thump thud at 4.667-6.895, 7.566-10.0", "frequencyCaption": "duck quacking two times and thump thud two times"} +{"filepath": "data/multi_event_train/syn_3062.wav", "onoffCaption": "woman laughing at 2.849-5.049", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/multi_event_train/syn_3087.wav", "onoffCaption": "duck quacking at 0.031-2.031, 3.164-5.164 and burping belching at 7.688-10.0", "frequencyCaption": "duck quacking two times and burping belching one times"} +{"filepath": "data/multi_event_train/syn_3179.wav", "onoffCaption": "gunshot at 1.377-3.377, 4.054-6.054", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_3211.wav", "onoffCaption": "sheep goat bleating at 0.633-4.553, 5.305-7.305 and tapping clicking clanking at 1.74-5.18, 7.248-10.0", "frequencyCaption": "sheep goat bleating two times and tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_3244.wav", "onoffCaption": "cow mooing at 2.773-6.071, 7.377-10.0", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_3434.wav", "onoffCaption": "duck quacking at 3.054-5.054, 7.036-9.036", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_3461.wav", "onoffCaption": "door slamming at 2.061-4.842, 6.857-9.638", "frequencyCaption": "door slamming two times"} +{"filepath": "data/multi_event_train/syn_3484.wav", "onoffCaption": "duck quacking at 0.197-2.197, 2.792-4.792, 5.774-7.774 and thump thud at 3.437-6.484", "frequencyCaption": "duck quacking three times and thump thud one times"} +{"filepath": "data/multi_event_train/syn_3612.wav", "onoffCaption": "whistling at 0.85-5.334, 7.349-10.0", "frequencyCaption": "whistling two times"} +{"filepath": "data/multi_event_train/syn_3647.wav", "onoffCaption": "dog barking at 0.531-3.851, 5.617-7.617 and train horn at 0.823-3.303, 4.388-6.934 and tapping clicking clanking at 2.164-5.604", "frequencyCaption": "dog barking two times and train horn two times and tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_3709.wav", "onoffCaption": "train horn at 0.105-2.242, 2.805-4.942, 5.66-7.797", "frequencyCaption": "train horn three times"} +{"filepath": "data/multi_event_train/syn_3930.wav", "onoffCaption": "door knocking at 0.868-4.421", "frequencyCaption": "door knocking one times"} +{"filepath": "data/multi_event_train/syn_3965.wav", "onoffCaption": "woman laughing at 0.226-3.614, 4.453-7.841 and cow mooing at 3.081-6.05 and door slamming at 3.829-5.829", "frequencyCaption": "woman laughing two times and cow mooing one times and door slamming one times"} +{"filepath": "data/multi_event_train/syn_3980.wav", "onoffCaption": "door slamming at 0.164-3.138 and sheep goat bleating at 0.911-2.911, 5.018-7.018 and car horn honking at 2.298-7.205", "frequencyCaption": "door slamming one times and sheep goat bleating two times and car horn honking one times"} +{"filepath": "data/multi_event_train/syn_41.wav", "onoffCaption": "tapping clicking clanking at 2.541-5.981", "frequencyCaption": "tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_129.wav", "onoffCaption": "dog barking at 1.053-3.053, 4.927-6.927 and cat meowing at 4.681-9.041 and sneeze at 6.661-8.273", "frequencyCaption": "dog barking two times and cat meowing one times and sneeze one times"} +{"filepath": "data/multi_event_train/syn_199.wav", "onoffCaption": "woman laughing at 0.853-7.587 and door knocking at 2.12-4.21 and door slamming at 6.068-7.185, 7.926-9.043", "frequencyCaption": "woman laughing one times and door knocking one times and door slamming two times"} +{"filepath": "data/multi_event_train/syn_214.wav", "onoffCaption": "sneeze at 2.226-3.838, 4.85-6.395", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_241.wav", "onoffCaption": "explosion at 0.978-5.978", "frequencyCaption": "explosion one times"} +{"filepath": "data/multi_event_train/syn_431.wav", "onoffCaption": "spraying at 2.906-3.847, 4.661-5.602, 6.824-7.765", "frequencyCaption": "spraying three times"} +{"filepath": "data/multi_event_train/syn_464.wav", "onoffCaption": "door knocking at 0.881-3.728, 4.455-7.302 and door slamming at 1.699-4.578, 5.851-6.656 and explosion at 2.481-6.481", "frequencyCaption": "door knocking two times and door slamming two times and explosion one times"} +{"filepath": "data/multi_event_train/syn_481.wav", "onoffCaption": "gunshot at 2.466-4.466, 5.127-7.127 and door knocking at 3.505-8.207 and train horn at 3.535-6.935", "frequencyCaption": "gunshot two times and door knocking one times and train horn one times"} +{"filepath": "data/multi_event_train/syn_617.wav", "onoffCaption": "spraying at 2.92-3.57, 5.893-6.543, 8.392-9.042", "frequencyCaption": "spraying three times"} +{"filepath": "data/multi_event_train/syn_642.wav", "onoffCaption": "gunshot at 2.277-4.277, 5.537-7.537 and cat meowing at 6.26-7.26", "frequencyCaption": "gunshot two times and cat meowing one times"} +{"filepath": "data/multi_event_train/syn_759.wav", "onoffCaption": "door slamming at 3.234-5.46 and gunshot at 5.625-7.625", "frequencyCaption": "door slamming one times and gunshot one times"} +{"filepath": "data/multi_event_train/syn_884.wav", "onoffCaption": "spraying at 0.199-0.699, 1.763-2.263, 2.935-3.435 and cat meowing at 0.762-2.333, 2.93-4.501", "frequencyCaption": "spraying three times and cat meowing two times"} +{"filepath": "data/multi_event_train/syn_935.wav", "onoffCaption": "sheep goat bleating at 2.538-4.538, 6.466-8.466", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_960.wav", "onoffCaption": "sneeze at 0.1-3.21, 4.615-6.7", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_985.wav", "onoffCaption": "burping belching at 3.179-6.358, 7.834-10.0", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_1101.wav", "onoffCaption": "sneeze at 0.201-2.44, 3.115-5.028, 5.81-7.895 and gunshot at 1.528-3.528", "frequencyCaption": "sneeze three times and gunshot one times"} +{"filepath": "data/multi_event_train/syn_1154.wav", "onoffCaption": "spraying at 0.02-1.104 and whistling at 0.446-5.946", "frequencyCaption": "spraying one times and whistling one times"} +{"filepath": "data/multi_event_train/syn_1170.wav", "onoffCaption": "spraying at 2.656-4.418, 6.913-8.17", "frequencyCaption": "spraying two times"} +{"filepath": "data/multi_event_train/syn_1269.wav", "onoffCaption": "duck quacking at 0.627-2.627, 3.358-5.358", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_1327.wav", "onoffCaption": "burping belching at 0.01-2.135, 3.537-5.662, 6.649-8.774 and door knocking at 1.426-3.586", "frequencyCaption": "burping belching three times and door knocking one times"} +{"filepath": "data/multi_event_train/syn_1372.wav", "onoffCaption": "dog barking at 2.736-4.736, 6.873-8.873", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_train/syn_1397.wav", "onoffCaption": "cat meowing at 0.314-1.898, 2.63-4.214", "frequencyCaption": "cat meowing two times"} +{"filepath": "data/multi_event_train/syn_1403.wav", "onoffCaption": "explosion at 1.981-4.734, 6.313-9.313", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_1419.wav", "onoffCaption": "train horn at 0.012-4.012 and spraying at 6.3-7.381", "frequencyCaption": "train horn one times and spraying one times"} +{"filepath": "data/multi_event_train/syn_1502.wav", "onoffCaption": "duck quacking at 0.082-2.082 and car horn honking at 0.559-4.054 and spraying at 5.939-8.523", "frequencyCaption": "duck quacking one times and car horn honking one times and spraying one times"} +{"filepath": "data/multi_event_train/syn_1526.wav", "onoffCaption": "thump thud at 0.761-2.989, 4.754-6.982", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_1557.wav", "onoffCaption": "train horn at 0.528-8.728", "frequencyCaption": "train horn one times"} +{"filepath": "data/multi_event_train/syn_1724.wav", "onoffCaption": "door slamming at 0.416-1.267, 2.1-2.951, 4.414-5.265", "frequencyCaption": "door slamming three times"} +{"filepath": "data/multi_event_train/syn_1771.wav", "onoffCaption": "thump thud at 0.394-2.622, 3.385-5.613, 6.502-8.73", "frequencyCaption": "thump thud three times"} +{"filepath": "data/multi_event_train/syn_1794.wav", "onoffCaption": "door knocking at 0.846-3.576, 5.727-8.351", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_1806.wav", "onoffCaption": "explosion at 1.47-6.47 and door slamming at 1.716-4.497, 5.429-6.908 and burping belching at 3.429-7.429", "frequencyCaption": "explosion one times and door slamming two times and burping belching one times"} +{"filepath": "data/multi_event_train/syn_1853.wav", "onoffCaption": "cow mooing at 0.434-3.416, 3.954-6.789, 7.523-9.86", "frequencyCaption": "cow mooing three times"} +{"filepath": "data/multi_event_train/syn_1948.wav", "onoffCaption": "train horn at 1.774-6.851 and tapping clicking clanking at 3.442-6.882", "frequencyCaption": "train horn one times and tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_3078.wav", "onoffCaption": "sneeze at 0.026-1.483 and thump thud at 3.614-6.114, 7.183-10.0", "frequencyCaption": "sneeze one times and thump thud two times"} +{"filepath": "data/multi_event_train/syn_3136.wav", "onoffCaption": "train horn at 0.866-5.768, 7.973-10.0", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_3163.wav", "onoffCaption": "car horn honking at 0.131-2.478", "frequencyCaption": "car horn honking one times"} +{"filepath": "data/multi_event_train/syn_3186.wav", "onoffCaption": "spraying at 1.654-3.416, 4.687-7.123 and cat meowing at 2.384-5.625, 7.118-10.0", "frequencyCaption": "spraying two times and cat meowing two times"} +{"filepath": "data/multi_event_train/syn_3310.wav", "onoffCaption": "train horn at 1.864-5.398 and door knocking at 4.175-6.239, 6.937-9.001", "frequencyCaption": "train horn one times and door knocking two times"} +{"filepath": "data/multi_event_train/syn_3345.wav", "onoffCaption": "door knocking at 3.904-8.176", "frequencyCaption": "door knocking one times"} +{"filepath": "data/multi_event_train/syn_3535.wav", "onoffCaption": "gunshot at 1.061-3.061, 4.558-6.558 and duck quacking at 3.044-5.044", "frequencyCaption": "gunshot two times and duck quacking one times"} +{"filepath": "data/multi_event_train/syn_3560.wav", "onoffCaption": "car horn honking at 0.198-4.71, 6.293-10.0", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_3585.wav", "onoffCaption": "cat meowing at 2.877-4.017", "frequencyCaption": "cat meowing one times"} +{"filepath": "data/multi_event_train/syn_3608.wav", "onoffCaption": "cow mooing at 2.655-5.953 and sneeze at 7.997-10.0", "frequencyCaption": "cow mooing one times and sneeze one times"} +{"filepath": "data/multi_event_train/syn_3713.wav", "onoffCaption": "whistling at 0.531-5.015, 7.0-10.0", "frequencyCaption": "whistling two times"} +{"filepath": "data/multi_event_train/syn_3746.wav", "onoffCaption": "car horn honking at 0.427-3.213", "frequencyCaption": "car horn honking one times"} +{"filepath": "data/multi_event_train/syn_3831.wav", "onoffCaption": "sheep goat bleating at 2.297-4.297, 6.101-8.101", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_3864.wav", "onoffCaption": "sneeze at 0.203-1.703", "frequencyCaption": "sneeze one times"} +{"filepath": "data/multi_event_train/syn_3881.wav", "onoffCaption": "sneeze at 0.434-2.117, 2.96-4.643, 5.251-6.934", "frequencyCaption": "sneeze three times"} +{"filepath": "data/multi_event_train/syn_158.wav", "onoffCaption": "sheep goat bleating at 0.22-2.22, 4.379-6.379, 7.198-9.198", "frequencyCaption": "sheep goat bleating three times"} +{"filepath": "data/multi_event_train/syn_230.wav", "onoffCaption": "thump thud at 0.17-2.509, 4.2-6.539", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_265.wav", "onoffCaption": "burping belching at 0.549-2.58, 3.304-5.335 and cow mooing at 1.658-4.64, 5.717-8.699", "frequencyCaption": "burping belching two times and cow mooing two times"} +{"filepath": "data/multi_event_train/syn_280.wav", "onoffCaption": "sheep goat bleating at 3.352-5.352, 6.27-9.27", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_331.wav", "onoffCaption": "woman laughing at 0.703-2.986, 4.385-6.668 and burping belching at 3.892-6.892", "frequencyCaption": "woman laughing two times and burping belching one times"} +{"filepath": "data/multi_event_train/syn_415.wav", "onoffCaption": "gunshot at 0.476-2.476 and cat meowing at 0.509-2.503, 3.064-4.648, 5.727-7.962", "frequencyCaption": "gunshot one times and cat meowing three times"} +{"filepath": "data/multi_event_train/syn_440.wav", "onoffCaption": "duck quacking at 0.184-2.184, 3.112-5.112", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_633.wav", "onoffCaption": "cat meowing at 0.287-1.434, 2.463-3.61", "frequencyCaption": "cat meowing two times"} +{"filepath": "data/multi_event_train/syn_666.wav", "onoffCaption": "cow mooing at 2.642-5.94", "frequencyCaption": "cow mooing one times"} +{"filepath": "data/multi_event_train/syn_683.wav", "onoffCaption": "door slamming at 2.21-3.601", "frequencyCaption": "door slamming one times"} +{"filepath": "data/multi_event_train/syn_699.wav", "onoffCaption": "door slamming at 2.043-4.269, 4.82-6.211, 8.048-9.572", "frequencyCaption": "door slamming three times"} +{"filepath": "data/multi_event_train/syn_728.wav", "onoffCaption": "train horn at 2.321-6.389, 7.291-9.903", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_767.wav", "onoffCaption": "duck quacking at 0.926-2.926, 4.515-6.515 and cat meowing at 1.965-3.925", "frequencyCaption": "duck quacking two times and cat meowing one times"} +{"filepath": "data/multi_event_train/syn_798.wav", "onoffCaption": "whistling at 0.489-6.899", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_911.wav", "onoffCaption": "cat meowing at 0.48-1.666 and whistling at 0.61-10.0", "frequencyCaption": "cat meowing one times and whistling one times"} +{"filepath": "data/multi_event_train/syn_944.wav", "onoffCaption": "burping belching at 1.37-5.393, 6.183-9.183", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_1125.wav", "onoffCaption": "spraying at 2.52-3.371, 4.122-6.582, 7.329-7.956", "frequencyCaption": "spraying three times"} +{"filepath": "data/multi_event_train/syn_1195.wav", "onoffCaption": "door knocking at 0.96-3.2", "frequencyCaption": "door knocking one times"} +{"filepath": "data/multi_event_train/syn_1218.wav", "onoffCaption": "tapping clicking clanking at 1.668-5.108", "frequencyCaption": "tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_1257.wav", "onoffCaption": "sneeze at 1.556-2.844, 3.917-6.002 and duck quacking at 2.435-4.435, 5.674-7.674 and spraying at 3.075-3.659, 5.829-6.413", "frequencyCaption": "sneeze two times and duck quacking two times and spraying two times"} +{"filepath": "data/multi_event_train/syn_1303.wav", "onoffCaption": "car horn honking at 1.45-3.797 and spraying at 2.235-3.086 and dog barking at 6.931-9.852", "frequencyCaption": "car horn honking one times and spraying one times and dog barking one times"} +{"filepath": "data/multi_event_train/syn_1356.wav", "onoffCaption": "door knocking at 3.378-6.931", "frequencyCaption": "door knocking one times"} +{"filepath": "data/multi_event_train/syn_1468.wav", "onoffCaption": "cat meowing at 0.276-1.378, 2.979-4.245 and gunshot at 7.143-9.143", "frequencyCaption": "cat meowing two times and gunshot one times"} +{"filepath": "data/multi_event_train/syn_1573.wav", "onoffCaption": "dog barking at 0.216-2.216, 3.355-5.355, 6.148-8.148 and spraying at 0.276-0.845 and tapping clicking clanking at 5.304-8.744", "frequencyCaption": "dog barking three times and spraying one times and tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_1596.wav", "onoffCaption": "cat meowing at 2.368-3.554, 4.623-7.014", "frequencyCaption": "cat meowing two times"} +{"filepath": "data/multi_event_train/syn_1601.wav", "onoffCaption": "door knocking at 0.127-2.149 and cow mooing at 3.366-6.376", "frequencyCaption": "door knocking one times and cow mooing one times"} +{"filepath": "data/multi_event_train/syn_1700.wav", "onoffCaption": "whistling at 1.146-8.764", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_1755.wav", "onoffCaption": "sneeze at 0.305-1.892, 4.262-6.375", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_1822.wav", "onoffCaption": "train horn at 1.537-4.417, 6.155-9.035", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_1877.wav", "onoffCaption": "gunshot at 2.109-4.109 and whistling at 7.924-10.0", "frequencyCaption": "gunshot one times and whistling one times"} +{"filepath": "data/multi_event_train/syn_1892.wav", "onoffCaption": "explosion at 0.321-3.377", "frequencyCaption": "explosion one times"} +{"filepath": "data/multi_event_train/syn_1939.wav", "onoffCaption": "door knocking at 0.279-3.126, 3.663-6.51", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_1989.wav", "onoffCaption": "sheep goat bleating at 0.042-2.042 and woman laughing at 4.477-7.069", "frequencyCaption": "sheep goat bleating one times and woman laughing one times"} +{"filepath": "data/multi_event_train/syn_3009.wav", "onoffCaption": "sheep goat bleating at 0.509-2.509", "frequencyCaption": "sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_3112.wav", "onoffCaption": "woman laughing at 3.607-6.212 and duck quacking at 5.845-7.845", "frequencyCaption": "woman laughing one times and duck quacking one times"} +{"filepath": "data/multi_event_train/syn_3147.wav", "onoffCaption": "whistling at 2.305-6.789, 7.817-10.0", "frequencyCaption": "whistling two times"} +{"filepath": "data/multi_event_train/syn_3334.wav", "onoffCaption": "spraying at 0.947-1.697, 3.096-3.846, 4.38-5.13 and whistling at 7.494-10.0", "frequencyCaption": "spraying three times and whistling one times"} +{"filepath": "data/multi_event_train/syn_3361.wav", "onoffCaption": "dog barking at 0.659-2.659, 4.075-6.075 and cow mooing at 2.011-6.991, 7.547-10.0", "frequencyCaption": "dog barking two times and cow mooing two times"} +{"filepath": "data/multi_event_train/syn_3384.wav", "onoffCaption": "gunshot at 0.991-2.991 and spraying at 3.058-4.753", "frequencyCaption": "gunshot one times and spraying one times"} +{"filepath": "data/multi_event_train/syn_3511.wav", "onoffCaption": "cat meowing at 0.223-1.232, 1.827-2.836 and whistling at 0.758-8.413", "frequencyCaption": "cat meowing two times and whistling one times"} +{"filepath": "data/multi_event_train/syn_3544.wav", "onoffCaption": "tapping clicking clanking at 1.273-4.713, 5.387-8.827 and car horn honking at 1.592-3.939 and sheep goat bleating at 1.733-3.733", "frequencyCaption": "tapping clicking clanking two times and car horn honking one times and sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_3679.wav", "onoffCaption": "spraying at 0.11-0.737, 1.279-1.787, 3.007-5.467 and door knocking at 7.869-10.0", "frequencyCaption": "spraying three times and door knocking one times"} +{"filepath": "data/multi_event_train/syn_3686.wav", "onoffCaption": "explosion at 1.044-6.044, 6.567-10.0", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_3737.wav", "onoffCaption": "sheep goat bleating at 0.425-2.425, 4.108-6.108 and door slamming at 7.848-8.748", "frequencyCaption": "sheep goat bleating two times and door slamming one times"} +{"filepath": "data/multi_event_train/syn_3762.wav", "onoffCaption": "duck quacking at 2.729-4.729", "frequencyCaption": "duck quacking one times"} +{"filepath": "data/multi_event_train/syn_3787.wav", "onoffCaption": "burping belching at 2.836-6.095, 7.143-9.467", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_3815.wav", "onoffCaption": "cow mooing at 2.141-5.151 and explosion at 2.372-7.372", "frequencyCaption": "cow mooing one times and explosion one times"} +{"filepath": "data/multi_event_train/syn_3840.wav", "onoffCaption": "duck quacking at 1.918-3.918, 5.313-7.313", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_30.wav", "onoffCaption": "duck quacking at 1.581-3.581, 5.379-7.379", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_65.wav", "onoffCaption": "gunshot at 0.859-2.859", "frequencyCaption": "gunshot one times"} +{"filepath": "data/multi_event_train/syn_80.wav", "onoffCaption": "sneeze at 0.482-2.478, 3.427-5.423, 7.453-9.449 and explosion at 0.882-3.754, 4.584-6.904", "frequencyCaption": "sneeze three times and explosion two times"} +{"filepath": "data/multi_event_train/syn_117.wav", "onoffCaption": "explosion at 1.659-4.715, 6.7-9.756", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_142.wav", "onoffCaption": "spraying at 0.225-0.733, 1.499-2.007 and dog barking at 0.699-4.019, 4.823-8.143 and door knocking at 0.984-4.734, 5.73-7.951", "frequencyCaption": "spraying two times and dog barking two times and door knocking two times"} +{"filepath": "data/multi_event_train/syn_364.wav", "onoffCaption": "car horn honking at 0.766-3.692, 5.467-8.228", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_381.wav", "onoffCaption": "thump thud at 0.034-3.952, 6.217-8.717 and gunshot at 1.044-3.085", "frequencyCaption": "thump thud two times and gunshot one times"} +{"filepath": "data/multi_event_train/syn_514.wav", "onoffCaption": "cow mooing at 0.534-3.516 and door slamming at 3.555-4.574, 6.074-7.093", "frequencyCaption": "cow mooing one times and door slamming two times"} +{"filepath": "data/multi_event_train/syn_541.wav", "onoffCaption": "car horn honking at 2.908-6.749", "frequencyCaption": "car horn honking one times"} +{"filepath": "data/multi_event_train/syn_629.wav", "onoffCaption": "burping belching at 1.02-6.144, 6.845-9.4", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_732.wav", "onoffCaption": "explosion at 0.038-2.126 and thump thud at 0.211-2.982, 4.408-6.482", "frequencyCaption": "explosion one times and thump thud two times"} +{"filepath": "data/multi_event_train/syn_782.wav", "onoffCaption": "thump thud at 1.036-3.375, 4.184-6.955 and cat meowing at 2.618-5.859, 7.36-10.0", "frequencyCaption": "thump thud two times and cat meowing two times"} +{"filepath": "data/multi_event_train/syn_810.wav", "onoffCaption": "dog barking at 0.258-2.258, 4.132-6.132, 7.99-9.99", "frequencyCaption": "dog barking three times"} +{"filepath": "data/multi_event_train/syn_845.wav", "onoffCaption": "thump thud at 2.152-6.07", "frequencyCaption": "thump thud one times"} +{"filepath": "data/multi_event_train/syn_1024.wav", "onoffCaption": "woman laughing at 0.196-2.277, 4.543-6.624", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_1071.wav", "onoffCaption": "door slamming at 0.205-2.205, 3.528-5.528, 6.22-7.473 and duck quacking at 2.613-4.613", "frequencyCaption": "door slamming three times and duck quacking one times"} +{"filepath": "data/multi_event_train/syn_1094.wav", "onoffCaption": "sheep goat bleating at 2.335-4.335, 5.743-7.743", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_1202.wav", "onoffCaption": "cat meowing at 0.067-1.076", "frequencyCaption": "cat meowing one times"} +{"filepath": "data/multi_event_train/syn_1319.wav", "onoffCaption": "train horn at 0.993-3.873, 4.733-7.613", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_1472.wav", "onoffCaption": "thump thud at 0.435-4.353 and sheep goat bleating at 1.679-3.679, 5.085-7.085", "frequencyCaption": "thump thud one times and sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_1497.wav", "onoffCaption": "door slamming at 0.326-2.326, 4.646-6.646", "frequencyCaption": "door slamming two times"} +{"filepath": "data/multi_event_train/syn_1569.wav", "onoffCaption": "cow mooing at 0.08-3.09, 4.676-7.686 and door slamming at 7.197-9.197", "frequencyCaption": "cow mooing two times and door slamming one times"} +{"filepath": "data/multi_event_train/syn_1654.wav", "onoffCaption": "tapping clicking clanking at 3.172-6.612", "frequencyCaption": "tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_1838.wav", "onoffCaption": "sheep goat bleating at 1.288-4.608, 6.25-9.57 and duck quacking at 2.365-4.365", "frequencyCaption": "sheep goat bleating two times and duck quacking one times"} +{"filepath": "data/multi_event_train/syn_1888.wav", "onoffCaption": "tapping clicking clanking at 0.995-4.435 and cow mooing at 2.244-5.226, 6.414-9.383", "frequencyCaption": "tapping clicking clanking one times and cow mooing two times"} +{"filepath": "data/multi_event_train/syn_1923.wav", "onoffCaption": "gunshot at 1.011-3.011, 4.239-6.239, 7.057-9.057", "frequencyCaption": "gunshot three times"} +{"filepath": "data/multi_event_train/syn_1976.wav", "onoffCaption": "cat meowing at 2.801-4.795, 5.356-7.544, 8.821-9.961", "frequencyCaption": "cat meowing three times"} +{"filepath": "data/multi_event_train/syn_1993.wav", "onoffCaption": "woman laughing at 2.007-4.089, 6.48-8.562", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_3013.wav", "onoffCaption": "sheep goat bleating at 2.166-4.166 and cow mooing at 4.473-7.442 and gunshot at 6.86-8.86", "frequencyCaption": "sheep goat bleating one times and cow mooing one times and gunshot one times"} +{"filepath": "data/multi_event_train/syn_3046.wav", "onoffCaption": "sheep goat bleating at 0.979-2.979 and door slamming at 5.08-6.604, 7.268-9.697", "frequencyCaption": "sheep goat bleating one times and door slamming two times"} +{"filepath": "data/multi_event_train/syn_3108.wav", "onoffCaption": "spraying at 0.384-2.512, 4.662-7.003 and sneeze at 0.575-2.821", "frequencyCaption": "spraying two times and sneeze one times"} +{"filepath": "data/multi_event_train/syn_3235.wav", "onoffCaption": "cat meowing at 0.814-1.825", "frequencyCaption": "cat meowing one times"} +{"filepath": "data/multi_event_train/syn_3260.wav", "onoffCaption": "cat meowing at 0.62-2.56, 3.135-5.075, 6.248-8.188", "frequencyCaption": "cat meowing three times"} +{"filepath": "data/multi_event_train/syn_3285.wav", "onoffCaption": "gunshot at 0.533-2.533", "frequencyCaption": "gunshot one times"} +{"filepath": "data/multi_event_train/syn_3410.wav", "onoffCaption": "duck quacking at 0.698-2.698, 4.798-6.798", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_3445.wav", "onoffCaption": "cow mooing at 1.175-4.185, 4.703-7.672 and door knocking at 6.169-8.549", "frequencyCaption": "cow mooing two times and door knocking one times"} +{"filepath": "data/multi_event_train/syn_3636.wav", "onoffCaption": "train horn at 1.218-3.873, 5.846-8.552", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_3663.wav", "onoffCaption": "explosion at 2.042-4.049", "frequencyCaption": "explosion one times"} +{"filepath": "data/multi_event_train/syn_3778.wav", "onoffCaption": "train horn at 2.917-6.237, 7.741-10.0", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_3914.wav", "onoffCaption": "thump thud at 0.368-2.868, 3.643-6.143, 7.238-9.738", "frequencyCaption": "thump thud three times"} +{"filepath": "data/multi_event_train/syn_3941.wav", "onoffCaption": "car horn honking at 0.289-5.196, 7.372-10.0", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_37.wav", "onoffCaption": "burping belching at 0.202-5.202, 6.756-10.0", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_62.wav", "onoffCaption": "door slamming at 0.077-0.758, 1.309-3.309 and car horn honking at 7.208-10.0", "frequencyCaption": "door slamming two times and car horn honking one times"} +{"filepath": "data/multi_event_train/syn_87.wav", "onoffCaption": "cat meowing at 3.989-5.929, 7.639-9.389", "frequencyCaption": "cat meowing two times"} +{"filepath": "data/multi_event_train/syn_110.wav", "onoffCaption": "woman laughing at 1.572-3.654, 4.928-7.01", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_145.wav", "onoffCaption": "cat meowing at 2.595-3.808", "frequencyCaption": "cat meowing one times"} +{"filepath": "data/multi_event_train/syn_336.wav", "onoffCaption": "spraying at 2.725-3.725", "frequencyCaption": "spraying one times"} +{"filepath": "data/multi_event_train/syn_408.wav", "onoffCaption": "spraying at 0.198-0.979, 2.793-3.42, 4.839-6.572", "frequencyCaption": "spraying three times"} +{"filepath": "data/multi_event_train/syn_513.wav", "onoffCaption": "thump thud at 0.607-4.525, 5.536-9.454 and woman laughing at 3.351-7.403", "frequencyCaption": "thump thud two times and woman laughing one times"} +{"filepath": "data/multi_event_train/syn_546.wav", "onoffCaption": "burping belching at 0.499-2.597", "frequencyCaption": "burping belching one times"} +{"filepath": "data/multi_event_train/syn_760.wav", "onoffCaption": "car horn honking at 2.233-6.555, 7.446-10.0", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_785.wav", "onoffCaption": "sheep goat bleating at 0.214-2.214 and tapping clicking clanking at 4.133-7.573", "frequencyCaption": "sheep goat bleating one times and tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_817.wav", "onoffCaption": "burping belching at 0.67-2.768, 5.205-7.33", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_842.wav", "onoffCaption": "explosion at 3.225-6.225", "frequencyCaption": "explosion one times"} +{"filepath": "data/multi_event_train/syn_959.wav", "onoffCaption": "door knocking at 0.278-2.581, 4.559-6.961, 7.965-10.0", "frequencyCaption": "door knocking three times"} +{"filepath": "data/multi_event_train/syn_1023.wav", "onoffCaption": "spraying at 0.154-1.005, 3.277-4.972", "frequencyCaption": "spraying two times"} +{"filepath": "data/multi_event_train/syn_1076.wav", "onoffCaption": "woman laughing at 0.237-7.249, 7.811-10.0", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_1093.wav", "onoffCaption": "spraying at 0.32-1.052, 3.287-5.415 and burping belching at 6.898-10.0", "frequencyCaption": "spraying two times and burping belching one times"} +{"filepath": "data/multi_event_train/syn_1138.wav", "onoffCaption": "car horn honking at 0.532-2.532, 3.585-6.576, 7.794-10.0", "frequencyCaption": "car horn honking three times"} +{"filepath": "data/multi_event_train/syn_1188.wav", "onoffCaption": "cow mooing at 3.604-8.033", "frequencyCaption": "cow mooing one times"} +{"filepath": "data/multi_event_train/syn_1250.wav", "onoffCaption": "sheep goat bleating at 3.127-5.127, 5.942-7.942", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_1420.wav", "onoffCaption": "door slamming at 3.092-4.07, 5.862-6.713 and thump thud at 5.014-7.514", "frequencyCaption": "door slamming two times and thump thud one times"} +{"filepath": "data/multi_event_train/syn_1427.wav", "onoffCaption": "tapping clicking clanking at 3.687-7.127", "frequencyCaption": "tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_1475.wav", "onoffCaption": "dog barking at 2.524-4.524 and duck quacking at 2.891-4.891, 5.41-7.41", "frequencyCaption": "dog barking one times and duck quacking two times"} +{"filepath": "data/multi_event_train/syn_1490.wav", "onoffCaption": "woman laughing at 0.474-3.269", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/multi_event_train/syn_1606.wav", "onoffCaption": "train horn at 3.052-6.372", "frequencyCaption": "train horn one times"} +{"filepath": "data/multi_event_train/syn_1924.wav", "onoffCaption": "woman laughing at 0.787-3.353, 4.339-6.905 and door knocking at 2.189-5.957", "frequencyCaption": "woman laughing two times and door knocking one times"} +{"filepath": "data/multi_event_train/syn_1971.wav", "onoffCaption": "duck quacking at 0.795-2.795, 4.693-6.693", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_1994.wav", "onoffCaption": "burping belching at 0.079-2.11", "frequencyCaption": "burping belching one times"} +{"filepath": "data/multi_event_train/syn_3014.wav", "onoffCaption": "duck quacking at 2.193-4.193, 4.922-6.922, 7.799-9.799", "frequencyCaption": "duck quacking three times"} +{"filepath": "data/multi_event_train/syn_3041.wav", "onoffCaption": "door slamming at 1.161-2.926 and whistling at 7.103-10.0", "frequencyCaption": "door slamming one times and whistling one times"} +{"filepath": "data/multi_event_train/syn_3232.wav", "onoffCaption": "cat meowing at 2.11-4.05, 5.92-7.86", "frequencyCaption": "cat meowing two times"} +{"filepath": "data/multi_event_train/syn_3267.wav", "onoffCaption": "thump thud at 0.009-4.459, 6.04-8.228", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_3329.wav", "onoffCaption": "thump thud at 0.967-4.014, 6.031-9.078", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_3417.wav", "onoffCaption": "spraying at 0.093-1.034", "frequencyCaption": "spraying one times"} +{"filepath": "data/multi_event_train/syn_3442.wav", "onoffCaption": "gunshot at 0.47-2.47, 4.029-6.029 and cow mooing at 4.86-7.842", "frequencyCaption": "gunshot two times and cow mooing one times"} +{"filepath": "data/multi_event_train/syn_3559.wav", "onoffCaption": "burping belching at 0.082-2.176, 4.473-7.294 and woman laughing at 0.393-2.676", "frequencyCaption": "burping belching two times and woman laughing one times"} +{"filepath": "data/multi_event_train/syn_3631.wav", "onoffCaption": "sneeze at 3.473-4.761, 6.665-9.279", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_3664.wav", "onoffCaption": "tapping clicking clanking at 2.176-5.616, 6.881-9.797", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_3681.wav", "onoffCaption": "tapping clicking clanking at 0.021-3.461", "frequencyCaption": "tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_3808.wav", "onoffCaption": "car horn honking at 0.792-3.139 and woman laughing at 1.428-3.783, 4.399-7.212", "frequencyCaption": "car horn honking one times and woman laughing two times"} +{"filepath": "data/multi_event_train/syn_3913.wav", "onoffCaption": "train horn at 0.685-4.045", "frequencyCaption": "train horn one times"} +{"filepath": "data/multi_event_train/syn_3946.wav", "onoffCaption": "car horn honking at 0.998-3.911 and thump thud at 2.478-6.396", "frequencyCaption": "car horn honking one times and thump thud one times"} +{"filepath": "data/multi_event_train/syn_78.wav", "onoffCaption": "door slamming at 2.514-3.494, 4.288-5.268, 6.573-7.553 and thump thud at 3.927-6.698", "frequencyCaption": "door slamming three times and thump thud one times"} +{"filepath": "data/multi_event_train/syn_237.wav", "onoffCaption": "dog barking at 1.736-3.736, 5.817-7.817 and door slamming at 3.506-4.006, 4.74-5.24", "frequencyCaption": "dog barking two times and door slamming two times"} +{"filepath": "data/multi_event_train/syn_262.wav", "onoffCaption": "explosion at 0.161-2.879", "frequencyCaption": "explosion one times"} +{"filepath": "data/multi_event_train/syn_278.wav", "onoffCaption": "whistling at 0.888-3.863, 5.403-8.378 and spraying at 1.588-3.98", "frequencyCaption": "whistling two times and spraying one times"} +{"filepath": "data/multi_event_train/syn_287.wav", "onoffCaption": "train horn at 2.767-6.887", "frequencyCaption": "train horn one times"} +{"filepath": "data/multi_event_train/syn_363.wav", "onoffCaption": "car horn honking at 0.079-2.897, 4.356-7.174 and thump thud at 5.249-7.588", "frequencyCaption": "car horn honking two times and thump thud one times"} +{"filepath": "data/multi_event_train/syn_379.wav", "onoffCaption": "cat meowing at 0.541-3.445 and sheep goat bleating at 3.471-5.471, 7.835-9.835 and burping belching at 5.151-8.651", "frequencyCaption": "cat meowing one times and sheep goat bleating two times and burping belching one times"} +{"filepath": "data/multi_event_train/syn_386.wav", "onoffCaption": "tapping clicking clanking at 3.089-6.529", "frequencyCaption": "tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_447.wav", "onoffCaption": "door knocking at 1.369-7.429 and cat meowing at 1.777-3.725, 5.36-7.308", "frequencyCaption": "door knocking one times and cat meowing two times"} +{"filepath": "data/multi_event_train/syn_634.wav", "onoffCaption": "sheep goat bleating at 3.048-5.048, 6.235-8.355 and woman laughing at 4.232-6.348, 7.658-9.852", "frequencyCaption": "sheep goat bleating two times and woman laughing two times"} +{"filepath": "data/multi_event_train/syn_661.wav", "onoffCaption": "car horn honking at 0.639-5.039, 6.08-8.118", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_684.wav", "onoffCaption": "whistling at 1.082-8.737", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_735.wav", "onoffCaption": "thump thud at 2.444-4.672, 5.658-7.997 and gunshot at 2.509-4.528", "frequencyCaption": "thump thud two times and gunshot one times"} +{"filepath": "data/multi_event_train/syn_858.wav", "onoffCaption": "woman laughing at 1.337-3.62 and spraying at 6.255-7.339", "frequencyCaption": "woman laughing one times and spraying one times"} +{"filepath": "data/multi_event_train/syn_916.wav", "onoffCaption": "duck quacking at 0.154-2.154, 3.43-5.43", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_943.wav", "onoffCaption": "thump thud at 0.617-4.992", "frequencyCaption": "thump thud one times"} +{"filepath": "data/multi_event_train/syn_1089.wav", "onoffCaption": "duck quacking at 0.838-2.838, 4.658-6.658 and sheep goat bleating at 3.276-5.276", "frequencyCaption": "duck quacking two times and sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_1177.wav", "onoffCaption": "door slamming at 0.485-2.398, 4.742-7.703", "frequencyCaption": "door slamming two times"} +{"filepath": "data/multi_event_train/syn_1192.wav", "onoffCaption": "burping belching at 3.097-7.097", "frequencyCaption": "burping belching one times"} +{"filepath": "data/multi_event_train/syn_1205.wav", "onoffCaption": "door slamming at 1.534-2.434, 3.424-4.324 and burping belching at 6.945-9.945", "frequencyCaption": "door slamming two times and burping belching one times"} +{"filepath": "data/multi_event_train/syn_1304.wav", "onoffCaption": "sneeze at 0.136-2.382, 3.132-5.378, 6.294-8.54 and cat meowing at 3.431-5.391, 6.289-8.237", "frequencyCaption": "sneeze three times and cat meowing two times"} +{"filepath": "data/multi_event_train/syn_1351.wav", "onoffCaption": "door slamming at 0.002-1.002, 2.305-4.305", "frequencyCaption": "door slamming two times"} +{"filepath": "data/multi_event_train/syn_1521.wav", "onoffCaption": "sheep goat bleating at 0.099-2.099 and whistling at 0.517-8.267", "frequencyCaption": "sheep goat bleating one times and whistling one times"} +{"filepath": "data/multi_event_train/syn_1591.wav", "onoffCaption": "cow mooing at 0.046-3.015, 4.496-7.465, 7.986-10.0 and door slamming at 0.486-1.719", "frequencyCaption": "cow mooing three times and door slamming one times"} +{"filepath": "data/multi_event_train/syn_1649.wav", "onoffCaption": "sheep goat bleating at 1.382-3.382 and duck quacking at 6.325-8.325", "frequencyCaption": "sheep goat bleating one times and duck quacking one times"} +{"filepath": "data/multi_event_train/syn_1653.wav", "onoffCaption": "duck quacking at 2.236-4.236", "frequencyCaption": "duck quacking one times"} +{"filepath": "data/multi_event_train/syn_1707.wav", "onoffCaption": "train horn at 0.201-3.001 and car horn honking at 0.44-2.44", "frequencyCaption": "train horn one times and car horn honking one times"} +{"filepath": "data/multi_event_train/syn_1748.wav", "onoffCaption": "car horn honking at 1.705-5.292 and cow mooing at 2.18-7.16", "frequencyCaption": "car horn honking one times and cow mooing one times"} +{"filepath": "data/multi_event_train/syn_1752.wav", "onoffCaption": "burping belching at 1.373-5.242", "frequencyCaption": "burping belching one times"} +{"filepath": "data/multi_event_train/syn_1825.wav", "onoffCaption": "door knocking at 2.788-5.037, 6.324-8.552", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_1870.wav", "onoffCaption": "duck quacking at 2.886-4.886, 7.079-9.079", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_3115.wav", "onoffCaption": "train horn at 1.259-4.499, 5.745-8.4", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_3140.wav", "onoffCaption": "whistling at 0.617-2.626, 3.818-6.71 and door knocking at 2.508-5.132, 6.443-9.067", "frequencyCaption": "whistling two times and door knocking two times"} +{"filepath": "data/multi_event_train/syn_3228.wav", "onoffCaption": "duck quacking at 1.877-3.877", "frequencyCaption": "duck quacking one times"} +{"filepath": "data/multi_event_train/syn_3282.wav", "onoffCaption": "cat meowing at 0.19-1.745, 3.887-5.442 and woman laughing at 7.712-9.81", "frequencyCaption": "cat meowing two times and woman laughing one times"} +{"filepath": "data/multi_event_train/syn_3298.wav", "onoffCaption": "door slamming at 3.537-6.511", "frequencyCaption": "door slamming one times"} +{"filepath": "data/multi_event_train/syn_3333.wav", "onoffCaption": "door slamming at 0.342-1.515, 2.303-3.452, 4.241-6.437", "frequencyCaption": "door slamming three times"} +{"filepath": "data/multi_event_train/syn_3366.wav", "onoffCaption": "car horn honking at 3.033-7.282", "frequencyCaption": "car horn honking one times"} +{"filepath": "data/multi_event_train/syn_3383.wav", "onoffCaption": "dog barking at 0.353-5.99 and door slamming at 5.231-7.594", "frequencyCaption": "dog barking one times and door slamming one times"} +{"filepath": "data/multi_event_train/syn_3399.wav", "onoffCaption": "woman laughing at 1.997-5.284 and car horn honking at 4.179-8.02", "frequencyCaption": "woman laughing one times and car horn honking one times"} +{"filepath": "data/multi_event_train/syn_3458.wav", "onoffCaption": "gunshot at 2.555-4.555, 6.02-8.02", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_3516.wav", "onoffCaption": "thump thud at 1.147-3.918 and sneeze at 5.679-9.327", "frequencyCaption": "thump thud one times and sneeze one times"} +{"filepath": "data/multi_event_train/syn_3543.wav", "onoffCaption": "car horn honking at 1.374-5.028, 5.948-8.323", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_3730.wav", "onoffCaption": "duck quacking at 2.802-4.802, 5.763-7.763", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_3765.wav", "onoffCaption": "cow mooing at 2.874-5.843 and cat meowing at 6.157-7.184", "frequencyCaption": "cow mooing one times and cat meowing one times"} +{"filepath": "data/multi_event_train/syn_3780.wav", "onoffCaption": "sheep goat bleating at 1.923-5.563, 7.028-10.0", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_3847.wav", "onoffCaption": "train horn at 2.831-7.733 and car horn honking at 3.775-8.024", "frequencyCaption": "train horn one times and car horn honking one times"} +{"filepath": "data/multi_event_train/syn_213.wav", "onoffCaption": "sheep goat bleating at 4.077-6.077 and whistling at 4.832-7.807 and woman laughing at 5.713-8.132", "frequencyCaption": "sheep goat bleating one times and whistling one times and woman laughing one times"} +{"filepath": "data/multi_event_train/syn_246.wav", "onoffCaption": "gunshot at 1.206-3.206", "frequencyCaption": "gunshot one times"} +{"filepath": "data/multi_event_train/syn_308.wav", "onoffCaption": "thump thud at 2.127-4.898, 7.007-9.507", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_412.wav", "onoffCaption": "cat meowing at 0.117-1.129 and woman laughing at 2.319-4.435", "frequencyCaption": "cat meowing one times and woman laughing one times"} +{"filepath": "data/multi_event_train/syn_463.wav", "onoffCaption": "sneeze at 1.924-3.027, 3.851-4.954 and explosion at 7.605-10.0", "frequencyCaption": "sneeze two times and explosion one times"} +{"filepath": "data/multi_event_train/syn_486.wav", "onoffCaption": "tapping clicking clanking at 2.505-5.945, 7.953-10.0", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_509.wav", "onoffCaption": "thump thud at 1.212-4.879, 5.485-8.191", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_578.wav", "onoffCaption": "explosion at 1.569-4.569 and door slamming at 6.423-8.852", "frequencyCaption": "explosion one times and door slamming one times"} +{"filepath": "data/multi_event_train/syn_610.wav", "onoffCaption": "tapping clicking clanking at 1.008-4.448, 6.821-10.0", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_645.wav", "onoffCaption": "gunshot at 2.581-4.581, 5.606-7.606", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_899.wav", "onoffCaption": "door slamming at 1.574-2.965, 4.028-5.419, 6.017-7.408", "frequencyCaption": "door slamming three times"} +{"filepath": "data/multi_event_train/syn_967.wav", "onoffCaption": "explosion at 2.942-7.863", "frequencyCaption": "explosion one times"} +{"filepath": "data/multi_event_train/syn_982.wav", "onoffCaption": "tapping clicking clanking at 3.045-6.485", "frequencyCaption": "tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_1039.wav", "onoffCaption": "gunshot at 1.37-3.37, 4.286-6.286, 7.048-9.048", "frequencyCaption": "gunshot three times"} +{"filepath": "data/multi_event_train/syn_1048.wav", "onoffCaption": "woman laughing at 0.025-3.11", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/multi_event_train/syn_1122.wav", "onoffCaption": "thump thud at 3.834-8.284", "frequencyCaption": "thump thud one times"} +{"filepath": "data/multi_event_train/syn_1153.wav", "onoffCaption": "sneeze at 0.825-2.437, 4.345-5.957 and dog barking at 2.113-4.513", "frequencyCaption": "sneeze two times and dog barking one times"} +{"filepath": "data/multi_event_train/syn_1320.wav", "onoffCaption": "cow mooing at 0.004-3.302", "frequencyCaption": "cow mooing one times"} +{"filepath": "data/multi_event_train/syn_1375.wav", "onoffCaption": "explosion at 3.847-6.375, 7.628-10.0", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_1390.wav", "onoffCaption": "sheep goat bleating at 0.762-2.762, 4.683-6.683", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_1505.wav", "onoffCaption": "tapping clicking clanking at 0.221-3.661, 5.651-8.267", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_1574.wav", "onoffCaption": "cow mooing at 0.572-3.582, 5.412-8.311", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_1638.wav", "onoffCaption": "cow mooing at 1.479-4.777, 5.348-8.33 and duck quacking at 4.231-6.231, 7.487-9.487", "frequencyCaption": "cow mooing two times and duck quacking two times"} +{"filepath": "data/multi_event_train/syn_1688.wav", "onoffCaption": "cat meowing at 0.14-1.184 and burping belching at 4.809-6.916", "frequencyCaption": "cat meowing one times and burping belching one times"} +{"filepath": "data/multi_event_train/syn_1723.wav", "onoffCaption": "cow mooing at 0.814-3.824, 5.162-8.172", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_1776.wav", "onoffCaption": "train horn at 2.256-6.324 and tapping clicking clanking at 2.356-5.796, 6.811-9.098", "frequencyCaption": "train horn one times and tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_1793.wav", "onoffCaption": "sheep goat bleating at 0.474-3.77, 4.683-7.979", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_1801.wav", "onoffCaption": "thump thud at 1.867-4.329, 5.915-8.377", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_1895.wav", "onoffCaption": "thump thud at 3.141-6.188", "frequencyCaption": "thump thud one times"} +{"filepath": "data/multi_event_train/syn_3131.wav", "onoffCaption": "sheep goat bleating at 0.574-2.574, 5.046-7.046 and burping belching at 0.963-4.523", "frequencyCaption": "sheep goat bleating two times and burping belching one times"} +{"filepath": "data/multi_event_train/syn_3259.wav", "onoffCaption": "cat meowing at 1.523-4.553", "frequencyCaption": "cat meowing one times"} +{"filepath": "data/multi_event_train/syn_3317.wav", "onoffCaption": "tapping clicking clanking at 2.77-6.21", "frequencyCaption": "tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_3342.wav", "onoffCaption": "thump thud at 0.556-3.603, 4.335-7.382", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_3499.wav", "onoffCaption": "thump thud at 0.149-3.816", "frequencyCaption": "thump thud one times"} +{"filepath": "data/multi_event_train/syn_3567.wav", "onoffCaption": "woman laughing at 1.141-3.246", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/multi_event_train/syn_3582.wav", "onoffCaption": "train horn at 2.172-4.639, 6.039-8.506 and thump thud at 7.093-9.555", "frequencyCaption": "train horn two times and thump thud one times"} +{"filepath": "data/multi_event_train/syn_3714.wav", "onoffCaption": "spraying at 1.752-2.356, 3.004-3.945, 4.761-7.197", "frequencyCaption": "spraying three times"} +{"filepath": "data/multi_event_train/syn_3741.wav", "onoffCaption": "thump thud at 2.691-6.609", "frequencyCaption": "thump thud one times"} +{"filepath": "data/multi_event_train/syn_3812.wav", "onoffCaption": "woman laughing at 1.805-4.005, 5.087-7.287", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_3863.wav", "onoffCaption": "sneeze at 0.002-1.328, 2.29-5.202 and gunshot at 7.546-9.546", "frequencyCaption": "sneeze two times and gunshot one times"} +{"filepath": "data/multi_event_train/syn_3886.wav", "onoffCaption": "whistling at 0.26-6.542, 7.991-10.0", "frequencyCaption": "whistling two times"} +{"filepath": "data/multi_event_train/syn_3909.wav", "onoffCaption": "woman laughing at 1.825-3.923, 4.569-6.667, 7.773-9.871", "frequencyCaption": "woman laughing three times"} +{"filepath": "data/multi_event_train/syn_3978.wav", "onoffCaption": "cat meowing at 0.014-1.598, 3.728-5.312", "frequencyCaption": "cat meowing two times"} +{"filepath": "data/multi_event_train/syn_13.wav", "onoffCaption": "train horn at 1.267-6.125", "frequencyCaption": "train horn one times"} +{"filepath": "data/multi_event_train/syn_161.wav", "onoffCaption": "duck quacking at 2.251-4.251, 5.882-7.882", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_184.wav", "onoffCaption": "door slamming at 1.129-4.09, 5.351-8.312", "frequencyCaption": "door slamming two times"} +{"filepath": "data/multi_event_train/syn_209.wav", "onoffCaption": "cow mooing at 0.235-3.217, 4.196-7.178 and duck quacking at 0.314-2.314, 3.528-5.528, 6.184-8.184", "frequencyCaption": "cow mooing two times and duck quacking three times"} +{"filepath": "data/multi_event_train/syn_312.wav", "onoffCaption": "door slamming at 2.09-4.548, 5.452-7.91", "frequencyCaption": "door slamming two times"} +{"filepath": "data/multi_event_train/syn_436.wav", "onoffCaption": "sneeze at 0.157-1.84, 2.888-4.801, 5.695-6.859 and explosion at 1.858-6.452", "frequencyCaption": "sneeze three times and explosion one times"} +{"filepath": "data/multi_event_train/syn_537.wav", "onoffCaption": "thump thud at 0.307-4.682, 5.581-9.956", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_744.wav", "onoffCaption": "car horn honking at 0.921-3.834, 6.306-9.219", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_829.wav", "onoffCaption": "thump thud at 1.369-5.036, 5.978-8.234", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_833.wav", "onoffCaption": "thump thud at 0.767-3.106, 4.114-6.453, 7.09-9.429", "frequencyCaption": "thump thud three times"} +{"filepath": "data/multi_event_train/syn_866.wav", "onoffCaption": "sheep goat bleating at 0.53-4.45", "frequencyCaption": "sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_928.wav", "onoffCaption": "tapping clicking clanking at 1.238-4.678, 6.197-8.498", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_932.wav", "onoffCaption": "woman laughing at 1.002-4.074, 6.289-9.361", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_1007.wav", "onoffCaption": "door slamming at 1.093-3.093, 4.313-6.313", "frequencyCaption": "door slamming two times"} +{"filepath": "data/multi_event_train/syn_1106.wav", "onoffCaption": "car horn honking at 3.019-6.194, 7.499-10.0", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_1274.wav", "onoffCaption": "duck quacking at 1.809-3.809 and whistling at 5.361-10.0", "frequencyCaption": "duck quacking one times and whistling one times"} +{"filepath": "data/multi_event_train/syn_1291.wav", "onoffCaption": "train horn at 0.326-3.726, 4.95-7.162", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_1451.wav", "onoffCaption": "thump thud at 0.034-3.952, 4.674-7.174 and spraying at 3.911-5.644, 6.476-8.912", "frequencyCaption": "thump thud two times and spraying two times"} +{"filepath": "data/multi_event_train/syn_1550.wav", "onoffCaption": "cow mooing at 2.579-5.589, 7.075-10.0", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_1622.wav", "onoffCaption": "door slamming at 0.324-2.782, 3.662-6.12, 7.194-9.652", "frequencyCaption": "door slamming three times"} +{"filepath": "data/multi_event_train/syn_1739.wav", "onoffCaption": "train horn at 0.383-10.0 and sneeze at 3.175-4.421, 5.109-6.355, 7.415-8.661", "frequencyCaption": "train horn one times and sneeze three times"} +{"filepath": "data/multi_event_train/syn_1854.wav", "onoffCaption": "spraying at 0.11-0.685, 1.718-2.622, 3.534-4.266", "frequencyCaption": "spraying three times"} +{"filepath": "data/multi_event_train/syn_1955.wav", "onoffCaption": "spraying at 2.243-2.818 and explosion at 2.78-5.78", "frequencyCaption": "spraying one times and explosion one times"} +{"filepath": "data/multi_event_train/syn_3030.wav", "onoffCaption": "door knocking at 0.001-2.848, 4.27-7.117", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_3065.wav", "onoffCaption": "car horn honking at 2.783-6.624, 7.492-10.0", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_3080.wav", "onoffCaption": "sheep goat bleating at 0.185-3.825", "frequencyCaption": "sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_3164.wav", "onoffCaption": "door slamming at 3.034-5.492, 6.137-7.678", "frequencyCaption": "door slamming two times"} +{"filepath": "data/multi_event_train/syn_3181.wav", "onoffCaption": "thump thud at 0.103-4.553 and cat meowing at 1.125-3.102", "frequencyCaption": "thump thud one times and cat meowing one times"} +{"filepath": "data/multi_event_train/syn_3216.wav", "onoffCaption": "whistling at 3.804-10.0", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_3429.wav", "onoffCaption": "door knocking at 0.197-3.317, 5.04-7.55", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_3433.wav", "onoffCaption": "sheep goat bleating at 1.432-3.432, 5.775-7.775", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_3466.wav", "onoffCaption": "tapping clicking clanking at 0.407-3.847, 5.443-8.883", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_3528.wav", "onoffCaption": "tapping clicking clanking at 3.215-6.655, 7.623-10.0", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_3532.wav", "onoffCaption": "gunshot at 1.231-3.231, 4.12-6.12 and explosion at 7.291-10.0", "frequencyCaption": "gunshot two times and explosion one times"} +{"filepath": "data/multi_event_train/syn_3640.wav", "onoffCaption": "sneeze at 0.411-2.524 and dog barking at 0.973-2.973 and spraying at 5.929-6.556, 7.556-8.183, 9.228-9.855", "frequencyCaption": "sneeze one times and dog barking one times and spraying three times"} +{"filepath": "data/multi_event_train/syn_3836.wav", "onoffCaption": "cat meowing at 0.111-1.666, 2.677-4.232, 6.241-7.796", "frequencyCaption": "cat meowing three times"} +{"filepath": "data/multi_event_train/syn_3937.wav", "onoffCaption": "car horn honking at 0.116-3.029, 3.973-6.427, 7.604-9.604", "frequencyCaption": "car horn honking three times"} +{"filepath": "data/multi_event_train/syn_46.wav", "onoffCaption": "duck quacking at 1.528-3.528", "frequencyCaption": "duck quacking one times"} +{"filepath": "data/multi_event_train/syn_134.wav", "onoffCaption": "sheep goat bleating at 3.009-7.729", "frequencyCaption": "sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_222.wav", "onoffCaption": "spraying at 0.061-0.569, 1.14-2.081", "frequencyCaption": "spraying two times"} +{"filepath": "data/multi_event_train/syn_339.wav", "onoffCaption": "car horn honking at 1.248-3.248, 3.785-5.785 and spraying at 1.875-2.939 and gunshot at 3.353-5.353", "frequencyCaption": "car horn honking two times and spraying one times and gunshot one times"} +{"filepath": "data/multi_event_train/syn_347.wav", "onoffCaption": "door knocking at 2.775-5.895, 6.592-9.712", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_407.wav", "onoffCaption": "duck quacking at 1.025-3.025, 4.357-6.357, 7.309-9.309", "frequencyCaption": "duck quacking three times"} +{"filepath": "data/multi_event_train/syn_479.wav", "onoffCaption": "car horn honking at 2.446-6.033", "frequencyCaption": "car horn honking one times"} +{"filepath": "data/multi_event_train/syn_562.wav", "onoffCaption": "door knocking at 2.335-4.462", "frequencyCaption": "door knocking one times"} +{"filepath": "data/multi_event_train/syn_587.wav", "onoffCaption": "duck quacking at 2.07-4.07, 5.856-7.856", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_674.wav", "onoffCaption": "burping belching at 0.475-2.573, 3.598-6.116, 6.89-9.015", "frequencyCaption": "burping belching three times"} +{"filepath": "data/multi_event_train/syn_691.wav", "onoffCaption": "explosion at 0.294-4.888, 6.62-10.0", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_711.wav", "onoffCaption": "woman laughing at 2.479-4.716, 6.449-8.729 and car horn honking at 3.55-6.063", "frequencyCaption": "woman laughing two times and car horn honking one times"} +{"filepath": "data/multi_event_train/syn_883.wav", "onoffCaption": "woman laughing at 1.491-3.975, 4.989-7.473", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_998.wav", "onoffCaption": "tapping clicking clanking at 0.324-3.764 and car horn honking at 6.065-8.851", "frequencyCaption": "tapping clicking clanking one times and car horn honking one times"} +{"filepath": "data/multi_event_train/syn_1052.wav", "onoffCaption": "woman laughing at 1.457-8.902", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/multi_event_train/syn_1149.wav", "onoffCaption": "car horn honking at 0.002-4.909, 5.614-10.0", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_1221.wav", "onoffCaption": "sneeze at 0.416-4.916 and cat meowing at 6.14-8.328", "frequencyCaption": "sneeze one times and cat meowing one times"} +{"filepath": "data/multi_event_train/syn_1344.wav", "onoffCaption": "cow mooing at 0.37-3.352 and train horn at 6.332-9.866", "frequencyCaption": "cow mooing one times and train horn one times"} +{"filepath": "data/multi_event_train/syn_1404.wav", "onoffCaption": "cow mooing at 0.174-3.156, 3.958-6.94, 7.572-10.0", "frequencyCaption": "cow mooing three times"} +{"filepath": "data/multi_event_train/syn_1609.wav", "onoffCaption": "tapping clicking clanking at 3.452-6.892", "frequencyCaption": "tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_1677.wav", "onoffCaption": "tapping clicking clanking at 0.138-3.578 and gunshot at 1.678-3.678, 4.883-7.389", "frequencyCaption": "tapping clicking clanking one times and gunshot two times"} +{"filepath": "data/multi_event_train/syn_1692.wav", "onoffCaption": "sneeze at 2.938-4.169 and thump thud at 6.382-9.153", "frequencyCaption": "sneeze one times and thump thud one times"} +{"filepath": "data/multi_event_train/syn_1712.wav", "onoffCaption": "explosion at 4.023-7.023", "frequencyCaption": "explosion one times"} +{"filepath": "data/multi_event_train/syn_1789.wav", "onoffCaption": "cat meowing at 2.803-4.387, 5.33-6.516 and cow mooing at 4.765-7.775", "frequencyCaption": "cat meowing two times and cow mooing one times"} +{"filepath": "data/multi_event_train/syn_1880.wav", "onoffCaption": "spraying at 1.107-3.543", "frequencyCaption": "spraying one times"} +{"filepath": "data/multi_event_train/syn_1900.wav", "onoffCaption": "cat meowing at 0.685-2.256 and duck quacking at 5.166-7.166", "frequencyCaption": "cat meowing one times and duck quacking one times"} +{"filepath": "data/multi_event_train/syn_3243.wav", "onoffCaption": "dog barking at 2.647-4.647", "frequencyCaption": "dog barking one times"} +{"filepath": "data/multi_event_train/syn_3326.wav", "onoffCaption": "door knocking at 1.874-6.17 and dog barking at 2.187-4.187", "frequencyCaption": "door knocking one times and dog barking one times"} +{"filepath": "data/multi_event_train/syn_3358.wav", "onoffCaption": "thump thud at 0.013-2.241", "frequencyCaption": "thump thud one times"} +{"filepath": "data/multi_event_train/syn_3483.wav", "onoffCaption": "sneeze at 1.519-3.98, 5.122-7.583 and door slamming at 5.232-7.595, 8.514-9.747", "frequencyCaption": "sneeze two times and door slamming two times"} +{"filepath": "data/multi_event_train/syn_3598.wav", "onoffCaption": "train horn at 3.652-7.652", "frequencyCaption": "train horn one times"} +{"filepath": "data/multi_event_train/syn_3615.wav", "onoffCaption": "cat meowing at 2.269-3.84, 5.6-7.171, 7.761-9.332", "frequencyCaption": "cat meowing three times"} +{"filepath": "data/multi_event_train/syn_3770.wav", "onoffCaption": "spraying at 2.216-3.463, 4.491-7.075", "frequencyCaption": "spraying two times"} +{"filepath": "data/multi_event_train/syn_3795.wav", "onoffCaption": "whistling at 0.521-9.132", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_3807.wav", "onoffCaption": "door knocking at 0.858-2.948 and gunshot at 1.961-4.131", "frequencyCaption": "door knocking one times and gunshot one times"} +{"filepath": "data/multi_event_train/syn_3879.wav", "onoffCaption": "duck quacking at 0.504-2.504, 3.187-5.187", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_3962.wav", "onoffCaption": "door slamming at 0.085-0.936 and dog barking at 0.218-2.218", "frequencyCaption": "door slamming one times and dog barking one times"} +{"filepath": "data/multi_event_train/syn_3987.wav", "onoffCaption": "woman laughing at 0.024-2.616, 4.505-6.534 and cow mooing at 1.114-4.124, 5.734-8.744 and explosion at 1.707-4.575", "frequencyCaption": "woman laughing two times and cow mooing two times and explosion one times"} +{"filepath": "data/multi_event_train/syn_38.wav", "onoffCaption": "whistling at 2.329-10.0", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_77.wav", "onoffCaption": "whistling at 0.632-8.382", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_88.wav", "onoffCaption": "explosion at 0.507-5.507, 6.588-10.0", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_105.wav", "onoffCaption": "sheep goat bleating at 2.711-4.711, 5.831-7.831", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_277.wav", "onoffCaption": "sheep goat bleating at 3.395-5.395, 6.002-8.002 and door knocking at 4.127-8.277", "frequencyCaption": "sheep goat bleating two times and door knocking one times"} +{"filepath": "data/multi_event_train/syn_292.wav", "onoffCaption": "woman laughing at 2.321-4.437, 6.796-8.912", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_389.wav", "onoffCaption": "woman laughing at 2.815-5.234", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/multi_event_train/syn_448.wav", "onoffCaption": "burping belching at 2.701-5.701, 6.788-9.788", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_452.wav", "onoffCaption": "cow mooing at 1.714-5.012, 7.457-9.998 and train horn at 1.894-4.374", "frequencyCaption": "cow mooing two times and train horn one times"} +{"filepath": "data/multi_event_train/syn_549.wav", "onoffCaption": "train horn at 2.855-7.932 and dog barking at 5.735-7.735", "frequencyCaption": "train horn one times and dog barking one times"} +{"filepath": "data/multi_event_train/syn_553.wav", "onoffCaption": "door knocking at 0.301-2.489, 3.653-5.841", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_621.wav", "onoffCaption": "thump thud at 3.315-6.086, 7.481-10.0", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_818.wav", "onoffCaption": "door knocking at 0.557-2.684 and spraying at 5.889-7.064, 8.823-9.998", "frequencyCaption": "door knocking one times and spraying two times"} +{"filepath": "data/multi_event_train/syn_903.wav", "onoffCaption": "car horn honking at 0.806-4.393", "frequencyCaption": "car horn honking one times"} +{"filepath": "data/multi_event_train/syn_956.wav", "onoffCaption": "door slamming at 0.088-1.066, 1.694-2.532, 3.141-5.141", "frequencyCaption": "door slamming three times"} +{"filepath": "data/multi_event_train/syn_1063.wav", "onoffCaption": "woman laughing at 2.638-5.733, 6.69-8.916", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_1079.wav", "onoffCaption": "cat meowing at 0.552-2.688, 5.012-6.159, 7.267-10.0", "frequencyCaption": "cat meowing three times"} +{"filepath": "data/multi_event_train/syn_1137.wav", "onoffCaption": "whistling at 2.697-7.181, 7.71-10.0", "frequencyCaption": "whistling two times"} +{"filepath": "data/multi_event_train/syn_1162.wav", "onoffCaption": "door slamming at 0.678-3.678, 4.346-5.495 and train horn at 7.049-10.0", "frequencyCaption": "door slamming two times and train horn one times"} +{"filepath": "data/multi_event_train/syn_1178.wav", "onoffCaption": "thump thud at 3.23-7.148", "frequencyCaption": "thump thud one times"} +{"filepath": "data/multi_event_train/syn_1187.wav", "onoffCaption": "dog barking at 1.943-3.943, 4.593-6.593, 7.488-9.497", "frequencyCaption": "dog barking three times"} +{"filepath": "data/multi_event_train/syn_1311.wav", "onoffCaption": "spraying at 0.006-1.768, 3.268-5.03 and door knocking at 0.879-3.342, 4.739-7.202 and door slamming at 0.879-2.403, 2.917-4.441", "frequencyCaption": "spraying two times and door knocking two times and door slamming two times"} +{"filepath": "data/multi_event_train/syn_1435.wav", "onoffCaption": "burping belching at 1.147-3.473 and cow mooing at 1.605-4.574", "frequencyCaption": "burping belching one times and cow mooing one times"} +{"filepath": "data/multi_event_train/syn_1534.wav", "onoffCaption": "tapping clicking clanking at 2.592-6.032, 7.042-10.0", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_1561.wav", "onoffCaption": "sheep goat bleating at 0.655-2.655, 4.69-6.69", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_1584.wav", "onoffCaption": "woman laughing at 0.521-3.575 and tapping clicking clanking at 2.057-5.497, 7.773-10.0", "frequencyCaption": "woman laughing one times and tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_1747.wav", "onoffCaption": "door knocking at 0.364-3.201", "frequencyCaption": "door knocking one times"} +{"filepath": "data/multi_event_train/syn_1830.wav", "onoffCaption": "whistling at 1.244-4.119", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_1865.wav", "onoffCaption": "door slamming at 3.362-4.841", "frequencyCaption": "door slamming one times"} +{"filepath": "data/multi_event_train/syn_3001.wav", "onoffCaption": "door slamming at 0.226-1.399, 2.054-4.054", "frequencyCaption": "door slamming two times"} +{"filepath": "data/multi_event_train/syn_3100.wav", "onoffCaption": "woman laughing at 0.453-2.937 and dog barking at 1.239-3.239 and cat meowing at 5.498-7.438", "frequencyCaption": "woman laughing one times and dog barking one times and cat meowing one times"} +{"filepath": "data/multi_event_train/syn_3155.wav", "onoffCaption": "tapping clicking clanking at 1.428-4.868, 5.411-8.851 and dog barking at 2.667-4.667, 5.798-7.798", "frequencyCaption": "tapping clicking clanking two times and dog barking two times"} +{"filepath": "data/multi_event_train/syn_3268.wav", "onoffCaption": "door slamming at 0.163-2.944, 4.553-5.531", "frequencyCaption": "door slamming two times"} +{"filepath": "data/multi_event_train/syn_3373.wav", "onoffCaption": "door knocking at 0.756-5.458, 6.45-10.0", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_3396.wav", "onoffCaption": "spraying at 0.33-2.79, 3.695-6.155 and whistling at 0.506-8.345", "frequencyCaption": "spraying two times and whistling one times"} +{"filepath": "data/multi_event_train/syn_3418.wav", "onoffCaption": "tapping clicking clanking at 2.902-6.342, 7.418-10.0", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_3503.wav", "onoffCaption": "door knocking at 0.979-3.603, 5.118-7.245", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_3556.wav", "onoffCaption": "whistling at 1.284-6.459, 7.504-10.0", "frequencyCaption": "whistling two times"} +{"filepath": "data/multi_event_train/syn_3725.wav", "onoffCaption": "sheep goat bleating at 0.533-2.533, 4.782-7.45 and spraying at 3.404-4.308", "frequencyCaption": "sheep goat bleating two times and spraying one times"} +{"filepath": "data/multi_event_train/syn_3848.wav", "onoffCaption": "door slamming at 0.005-2.133 and duck quacking at 0.54-2.54, 3.892-5.892", "frequencyCaption": "door slamming one times and duck quacking two times"} +{"filepath": "data/multi_event_train/syn_3852.wav", "onoffCaption": "spraying at 0.81-1.31 and duck quacking at 3.213-5.213", "frequencyCaption": "spraying one times and duck quacking one times"} +{"filepath": "data/multi_event_train/syn_3949.wav", "onoffCaption": "door slamming at 2.945-5.726, 6.358-9.139", "frequencyCaption": "door slamming two times"} +{"filepath": "data/multi_event_train/syn_3953.wav", "onoffCaption": "door slamming at 1.582-3.778 and gunshot at 1.671-3.671, 4.86-6.86, 7.927-9.927 and explosion at 1.95-5.077, 5.781-8.781", "frequencyCaption": "door slamming one times and gunshot three times and explosion two times"} +{"filepath": "data/multi_event_train/syn_22.wav", "onoffCaption": "gunshot at 0.208-2.208, 3.513-5.513", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_92.wav", "onoffCaption": "train horn at 1.487-4.287 and car horn honking at 6.554-10.0", "frequencyCaption": "train horn one times and car horn honking one times"} +{"filepath": "data/multi_event_train/syn_121.wav", "onoffCaption": "dog barking at 0.074-2.074, 4.268-6.268", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_train/syn_150.wav", "onoffCaption": "cat meowing at 3.961-5.236, 7.028-8.612 and sheep goat bleating at 5.34-7.34", "frequencyCaption": "cat meowing two times and sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_238.wav", "onoffCaption": "cat meowing at 1.32-3.26, 5.223-6.489", "frequencyCaption": "cat meowing two times"} +{"filepath": "data/multi_event_train/syn_288.wav", "onoffCaption": "whistling at 2.216-6.7", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_323.wav", "onoffCaption": "woman laughing at 2.454-5.842", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/multi_event_train/syn_376.wav", "onoffCaption": "tapping clicking clanking at 0.29-3.73, 6.02-9.46", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_393.wav", "onoffCaption": "burping belching at 2.451-4.777, 5.569-7.604 and cow mooing at 2.659-5.957, 6.533-9.831", "frequencyCaption": "burping belching two times and cow mooing two times"} +{"filepath": "data/multi_event_train/syn_506.wav", "onoffCaption": "train horn at 0.082-2.756", "frequencyCaption": "train horn one times"} +{"filepath": "data/multi_event_train/syn_577.wav", "onoffCaption": "spraying at 3.475-4.05, 6.365-7.429", "frequencyCaption": "spraying two times"} +{"filepath": "data/multi_event_train/syn_720.wav", "onoffCaption": "train horn at 0.504-3.904, 6.135-8.272 and thump thud at 3.277-6.048, 6.725-9.496", "frequencyCaption": "train horn two times and thump thud two times"} +{"filepath": "data/multi_event_train/syn_775.wav", "onoffCaption": "cat meowing at 2.974-3.985, 4.638-5.649, 6.693-7.704", "frequencyCaption": "cat meowing three times"} +{"filepath": "data/multi_event_train/syn_790.wav", "onoffCaption": "explosion at 0.948-2.95, 3.893-6.893", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_802.wav", "onoffCaption": "woman laughing at 0.011-4.063", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/multi_event_train/syn_857.wav", "onoffCaption": "woman laughing at 1.419-4.214", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/multi_event_train/syn_896.wav", "onoffCaption": "door slamming at 2.591-5.565 and cat meowing at 8.569-9.879", "frequencyCaption": "door slamming one times and cat meowing one times"} +{"filepath": "data/multi_event_train/syn_919.wav", "onoffCaption": "explosion at 0.047-3.047 and train horn at 0.453-3.853, 4.62-6.757 and sheep goat bleating at 5.955-7.955", "frequencyCaption": "explosion one times and train horn two times and sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_1036.wav", "onoffCaption": "thump thud at 0.983-5.433, 7.61-10.0", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_1086.wav", "onoffCaption": "door slamming at 0.124-1.273 and explosion at 0.735-2.823, 4.138-6.226, 6.896-8.984", "frequencyCaption": "door slamming one times and explosion three times"} +{"filepath": "data/multi_event_train/syn_1210.wav", "onoffCaption": "spraying at 0.162-1.246, 1.853-2.937, 3.553-4.637 and burping belching at 0.281-7.449", "frequencyCaption": "spraying three times and burping belching one times"} +{"filepath": "data/multi_event_train/syn_1245.wav", "onoffCaption": "whistling at 2.827-5.056, 6.187-8.416", "frequencyCaption": "whistling two times"} +{"filepath": "data/multi_event_train/syn_1411.wav", "onoffCaption": "sneeze at 2.848-5.087", "frequencyCaption": "sneeze one times"} +{"filepath": "data/multi_event_train/syn_1460.wav", "onoffCaption": "thump thud at 0.766-3.537, 4.514-6.887", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_1485.wav", "onoffCaption": "train horn at 0.635-3.115, 4.967-7.447 and whistling at 2.075-4.95 and dog barking at 2.827-4.827, 5.893-7.893", "frequencyCaption": "train horn two times and whistling one times and dog barking two times"} +{"filepath": "data/multi_event_train/syn_1613.wav", "onoffCaption": "thump thud at 1.417-3.917, 6.277-9.048", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_1646.wav", "onoffCaption": "burping belching at 0.959-4.239, 6.326-8.357", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_1708.wav", "onoffCaption": "gunshot at 3.501-5.501, 6.563-8.563", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_1931.wav", "onoffCaption": "tapping clicking clanking at 2.988-6.428 and whistling at 3.277-5.286, 5.868-8.31", "frequencyCaption": "tapping clicking clanking one times and whistling two times"} +{"filepath": "data/multi_event_train/syn_1964.wav", "onoffCaption": "cat meowing at 2.184-3.916 and spraying at 5.222-6.126, 7.621-8.525", "frequencyCaption": "cat meowing one times and spraying two times"} +{"filepath": "data/multi_event_train/syn_1981.wav", "onoffCaption": "sneeze at 2.341-4.955 and spraying at 3.88-4.784, 6.693-9.277", "frequencyCaption": "sneeze one times and spraying two times"} +{"filepath": "data/multi_event_train/syn_3054.wav", "onoffCaption": "whistling at 3.252-10.0", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_3227.wav", "onoffCaption": "burping belching at 1.558-5.894", "frequencyCaption": "burping belching one times"} +{"filepath": "data/multi_event_train/syn_3272.wav", "onoffCaption": "burping belching at 0.208-2.534, 3.348-5.674", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_3297.wav", "onoffCaption": "train horn at 0.472-3.792 and duck quacking at 5.278-7.278, 7.887-9.887", "frequencyCaption": "train horn one times and duck quacking two times"} +{"filepath": "data/multi_event_train/syn_3369.wav", "onoffCaption": "spraying at 0.138-0.989, 2.815-5.251 and cat meowing at 0.541-5.541, 6.668-8.204 and car horn honking at 5.496-7.961", "frequencyCaption": "spraying two times and cat meowing two times and car horn honking one times"} +{"filepath": "data/multi_event_train/syn_3402.wav", "onoffCaption": "dog barking at 0.304-2.304, 3.917-5.917 and spraying at 3.578-4.228, 4.758-5.408 and duck quacking at 4.149-6.149", "frequencyCaption": "dog barking two times and spraying two times and duck quacking one times"} +{"filepath": "data/multi_event_train/syn_3457.wav", "onoffCaption": "car horn honking at 0.049-4.449 and door slamming at 2.436-4.436, 6.293-7.466", "frequencyCaption": "car horn honking one times and door slamming two times"} +{"filepath": "data/multi_event_train/syn_3519.wav", "onoffCaption": "woman laughing at 0.968-4.356, 6.084-8.682", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_3624.wav", "onoffCaption": "door knocking at 1.229-4.845 and cat meowing at 1.724-2.999 and spraying at 7.504-8.679", "frequencyCaption": "door knocking one times and cat meowing one times and spraying one times"} +{"filepath": "data/multi_event_train/syn_3671.wav", "onoffCaption": "door knocking at 3.586-5.898, 6.835-9.147", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_3694.wav", "onoffCaption": "dog barking at 1.084-3.084", "frequencyCaption": "dog barking one times"} +{"filepath": "data/multi_event_train/syn_3906.wav", "onoffCaption": "gunshot at 1.193-3.193, 5.106-7.106", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_3977.wav", "onoffCaption": "dog barking at 0.384-2.822 and train horn at 5.374-7.854", "frequencyCaption": "dog barking one times and train horn one times"} +{"filepath": "data/multi_event_train/syn_53.wav", "onoffCaption": "gunshot at 1.059-3.059", "frequencyCaption": "gunshot one times"} +{"filepath": "data/multi_event_train/syn_174.wav", "onoffCaption": "car horn honking at 2.435-5.93, 6.754-9.219", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_191.wav", "onoffCaption": "dog barking at 1.037-3.037, 4.049-6.049, 7.259-9.259", "frequencyCaption": "dog barking three times"} +{"filepath": "data/multi_event_train/syn_206.wav", "onoffCaption": "door slamming at 0.219-1.368, 2.751-3.9 and sheep goat bleating at 2.449-7.329", "frequencyCaption": "door slamming two times and sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_249.wav", "onoffCaption": "spraying at 1.489-3.949, 5.232-5.74, 6.275-7.025 and explosion at 3.381-8.302", "frequencyCaption": "spraying three times and explosion one times"} +{"filepath": "data/multi_event_train/syn_307.wav", "onoffCaption": "gunshot at 0.873-2.873, 4.967-6.967 and duck quacking at 4.095-6.095, 7.153-9.153", "frequencyCaption": "gunshot two times and duck quacking two times"} +{"filepath": "data/multi_event_train/syn_352.wav", "onoffCaption": "door knocking at 0.553-2.774, 3.711-5.932", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_439.wav", "onoffCaption": "car horn honking at 3.541-7.195", "frequencyCaption": "car horn honking one times"} +{"filepath": "data/multi_event_train/syn_489.wav", "onoffCaption": "burping belching at 0.236-3.236, 3.906-6.906, 7.783-10.0 and sheep goat bleating at 0.829-4.149", "frequencyCaption": "burping belching three times and sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_522.wav", "onoffCaption": "sneeze at 1.585-3.824, 4.766-7.005, 7.533-9.772", "frequencyCaption": "sneeze three times"} +{"filepath": "data/multi_event_train/syn_592.wav", "onoffCaption": "car horn honking at 3.597-6.523", "frequencyCaption": "car horn honking one times"} +{"filepath": "data/multi_event_train/syn_650.wav", "onoffCaption": "burping belching at 0.679-3.679, 4.496-7.496", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_704.wav", "onoffCaption": "spraying at 0.353-0.853, 2.125-2.625, 4.729-5.229 and train horn at 2.949-7.149", "frequencyCaption": "spraying three times and train horn one times"} +{"filepath": "data/multi_event_train/syn_751.wav", "onoffCaption": "dog barking at 0.236-2.236, 2.934-4.934, 6.462-8.462 and cat meowing at 0.323-1.598", "frequencyCaption": "dog barking three times and cat meowing one times"} +{"filepath": "data/multi_event_train/syn_826.wav", "onoffCaption": "cat meowing at 1.916-3.46 and tapping clicking clanking at 5.548-8.988", "frequencyCaption": "cat meowing one times and tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_873.wav", "onoffCaption": "burping belching at 0.036-3.905, 5.109-7.574 and whistling at 0.109-2.118, 2.672-4.681", "frequencyCaption": "burping belching two times and whistling two times"} +{"filepath": "data/multi_event_train/syn_968.wav", "onoffCaption": "burping belching at 2.876-5.876, 6.506-8.876 and door slamming at 8.343-9.343", "frequencyCaption": "burping belching two times and door slamming one times"} +{"filepath": "data/multi_event_train/syn_1012.wav", "onoffCaption": "cow mooing at 2.858-7.838", "frequencyCaption": "cow mooing one times"} +{"filepath": "data/multi_event_train/syn_1047.wav", "onoffCaption": "door knocking at 0.765-3.495 and cat meowing at 6.322-8.217", "frequencyCaption": "door knocking one times and cat meowing one times"} +{"filepath": "data/multi_event_train/syn_1109.wav", "onoffCaption": "door knocking at 0.19-2.317, 3.942-6.775", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_1234.wav", "onoffCaption": "woman laughing at 0.092-2.792, 4.844-7.544", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_1261.wav", "onoffCaption": "dog barking at 0.85-2.85, 3.551-5.551 and door knocking at 1.585-4.694, 5.258-8.367 and spraying at 2.752-4.88", "frequencyCaption": "dog barking two times and door knocking two times and spraying one times"} +{"filepath": "data/multi_event_train/syn_1284.wav", "onoffCaption": "dog barking at 0.114-2.114, 3.412-5.412 and cat meowing at 2.154-4.342 and gunshot at 2.395-4.395, 6.662-8.662", "frequencyCaption": "dog barking two times and cat meowing one times and gunshot two times"} +{"filepath": "data/multi_event_train/syn_1360.wav", "onoffCaption": "door knocking at 2.493-6.255, 7.002-10.0", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_1444.wav", "onoffCaption": "gunshot at 0.506-2.506 and burping belching at 0.555-5.555", "frequencyCaption": "gunshot one times and burping belching one times"} +{"filepath": "data/multi_event_train/syn_1637.wav", "onoffCaption": "tapping clicking clanking at 4.251-7.691", "frequencyCaption": "tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_1662.wav", "onoffCaption": "tapping clicking clanking at 0.128-3.568, 4.812-7.009, 7.776-9.841 and door slamming at 1.132-3.59, 5.164-7.622", "frequencyCaption": "tapping clicking clanking three times and door slamming two times"} +{"filepath": "data/multi_event_train/syn_1687.wav", "onoffCaption": "woman laughing at 0.062-7.507", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/multi_event_train/syn_1736.wav", "onoffCaption": "train horn at 0.102-3.502, 4.222-6.522", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_1779.wav", "onoffCaption": "cat meowing at 0.502-3.532, 5.329-8.359", "frequencyCaption": "cat meowing two times"} +{"filepath": "data/multi_event_train/syn_1915.wav", "onoffCaption": "train horn at 0.937-3.577, 5.909-8.278", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_1940.wav", "onoffCaption": "cat meowing at 3.102-4.251 and sheep goat bleating at 7.5-9.5", "frequencyCaption": "cat meowing one times and sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_3025.wav", "onoffCaption": "train horn at 0.202-3.002, 5.381-8.181", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_3070.wav", "onoffCaption": "spraying at 2.319-2.946, 5.07-5.697", "frequencyCaption": "spraying two times"} +{"filepath": "data/multi_event_train/syn_3095.wav", "onoffCaption": "door knocking at 1.645-4.182, 6.269-9.116", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_3203.wav", "onoffCaption": "train horn at 2.208-5.978 and door slamming at 7.84-9.231", "frequencyCaption": "train horn one times and door slamming one times"} +{"filepath": "data/multi_event_train/syn_3219.wav", "onoffCaption": "gunshot at 2.327-4.327, 6.446-8.446", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_3256.wav", "onoffCaption": "cat meowing at 2.573-4.144, 6.409-7.98 and sneeze at 3.459-4.713", "frequencyCaption": "cat meowing two times and sneeze one times"} +{"filepath": "data/multi_event_train/syn_3318.wav", "onoffCaption": "car horn honking at 0.37-4.77 and sneeze at 5.499-6.663, 7.188-8.352", "frequencyCaption": "car horn honking one times and sneeze two times"} +{"filepath": "data/multi_event_train/syn_3426.wav", "onoffCaption": "spraying at 2.304-4.888", "frequencyCaption": "spraying one times"} +{"filepath": "data/multi_event_train/syn_3473.wav", "onoffCaption": "train horn at 3.563-9.623", "frequencyCaption": "train horn one times"} +{"filepath": "data/multi_event_train/syn_3496.wav", "onoffCaption": "door knocking at 1.616-6.318", "frequencyCaption": "door knocking one times"} +{"filepath": "data/multi_event_train/syn_3568.wav", "onoffCaption": "spraying at 2.545-3.545, 4.18-5.18, 6.037-7.037", "frequencyCaption": "spraying three times"} +{"filepath": "data/multi_event_train/syn_3600.wav", "onoffCaption": "sneeze at 1.585-3.498, 4.742-7.001", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_3655.wav", "onoffCaption": "gunshot at 1.525-3.655, 4.353-6.353", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_3839.wav", "onoffCaption": "sheep goat bleating at 1.587-4.883, 6.121-8.121 and door knocking at 2.008-4.383, 5.019-7.248 and duck quacking at 5.02-7.02", "frequencyCaption": "sheep goat bleating two times and door knocking two times and duck quacking one times"} +{"filepath": "data/multi_event_train/syn_3889.wav", "onoffCaption": "sheep goat bleating at 0.727-4.647, 5.179-7.179", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_3922.wav", "onoffCaption": "tapping clicking clanking at 2.51-5.95, 6.743-9.167", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_3992.wav", "onoffCaption": "car horn honking at 0.694-4.943, 6.084-10.0", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_49.wav", "onoffCaption": "cat meowing at 2.928-4.545 and tapping clicking clanking at 6.788-10.0", "frequencyCaption": "cat meowing one times and tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_169.wav", "onoffCaption": "car horn honking at 3.743-8.65", "frequencyCaption": "car horn honking one times"} +{"filepath": "data/multi_event_train/syn_253.wav", "onoffCaption": "car horn honking at 0.476-4.063, 6.176-9.023 and door slamming at 3.689-5.885", "frequencyCaption": "car horn honking two times and door slamming one times"} +{"filepath": "data/multi_event_train/syn_348.wav", "onoffCaption": "duck quacking at 1.696-3.696 and car horn honking at 6.827-10.0", "frequencyCaption": "duck quacking one times and car horn honking one times"} +{"filepath": "data/multi_event_train/syn_423.wav", "onoffCaption": "sneeze at 0.039-1.984, 3.045-5.358 and duck quacking at 0.331-2.331, 4.696-6.696 and cat meowing at 4.868-6.758, 7.943-9.833", "frequencyCaption": "sneeze two times and duck quacking two times and cat meowing two times"} +{"filepath": "data/multi_event_train/syn_424.wav", "onoffCaption": "dog barking at 2.088-4.088, 5.769-7.769", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_train/syn_476.wav", "onoffCaption": "cow mooing at 2.637-5.619, 6.636-9.618", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_493.wav", "onoffCaption": "train horn at 2.842-6.082 and thump thud at 4.328-6.79", "frequencyCaption": "train horn one times and thump thud one times"} +{"filepath": "data/multi_event_train/syn_538.wav", "onoffCaption": "door slamming at 3.054-4.357 and sneeze at 7.84-9.785", "frequencyCaption": "door slamming one times and sneeze one times"} +{"filepath": "data/multi_event_train/syn_588.wav", "onoffCaption": "sneeze at 0.202-3.85, 5.004-6.096", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_605.wav", "onoffCaption": "woman laughing at 3.674-6.26", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/multi_event_train/syn_869.wav", "onoffCaption": "gunshot at 3.09-5.09, 7.337-9.337", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_920.wav", "onoffCaption": "tapping clicking clanking at 0.304-3.744, 5.854-9.294", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_927.wav", "onoffCaption": "whistling at 2.837-7.321", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_972.wav", "onoffCaption": "whistling at 0.085-9.75 and cow mooing at 0.316-4.745, 6.432-8.861 and spraying at 0.327-1.059", "frequencyCaption": "whistling one times and cow mooing two times and spraying one times"} +{"filepath": "data/multi_event_train/syn_997.wav", "onoffCaption": "cat meowing at 0.354-1.456, 2.312-3.414 and cow mooing at 1.639-6.068, 7.489-10.0 and duck quacking at 2.356-4.356", "frequencyCaption": "cat meowing two times and cow mooing two times and duck quacking one times"} +{"filepath": "data/multi_event_train/syn_1008.wav", "onoffCaption": "gunshot at 2.084-4.324 and sneeze at 2.735-4.974", "frequencyCaption": "gunshot one times and sneeze one times"} +{"filepath": "data/multi_event_train/syn_1113.wav", "onoffCaption": "explosion at 3.046-7.046 and thump thud at 4.891-8.558", "frequencyCaption": "explosion one times and thump thud one times"} +{"filepath": "data/multi_event_train/syn_1114.wav", "onoffCaption": "car horn honking at 2.766-6.261, 7.379-9.379", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_1146.wav", "onoffCaption": "sneeze at 3.579-5.896, 6.434-9.281", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_1335.wav", "onoffCaption": "explosion at 1.32-4.873 and sheep goat bleating at 1.995-3.995", "frequencyCaption": "explosion one times and sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_1385.wav", "onoffCaption": "cat meowing at 0.38-1.924, 3.984-5.528, 6.641-8.185", "frequencyCaption": "cat meowing three times"} +{"filepath": "data/multi_event_train/syn_1459.wav", "onoffCaption": "whistling at 0.017-8.361", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_1510.wav", "onoffCaption": "tapping clicking clanking at 1.596-5.036", "frequencyCaption": "tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_1542.wav", "onoffCaption": "car horn honking at 2.759-7.159, 7.739-10.0", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_1545.wav", "onoffCaption": "thump thud at 1.587-4.358 and car horn honking at 7.312-9.777", "frequencyCaption": "thump thud one times and car horn honking one times"} +{"filepath": "data/multi_event_train/syn_1678.wav", "onoffCaption": "explosion at 0.267-3.82, 5.934-8.806", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_1763.wav", "onoffCaption": "whistling at 2.661-5.536, 6.975-9.85", "frequencyCaption": "whistling two times"} +{"filepath": "data/multi_event_train/syn_1786.wav", "onoffCaption": "cow mooing at 0.537-3.835 and thump thud at 5.986-8.757", "frequencyCaption": "cow mooing one times and thump thud one times"} +{"filepath": "data/multi_event_train/syn_1814.wav", "onoffCaption": "woman laughing at 0.504-2.741, 3.614-5.818, 7.061-9.416", "frequencyCaption": "woman laughing three times"} +{"filepath": "data/multi_event_train/syn_1841.wav", "onoffCaption": "burping belching at 0.979-5.002", "frequencyCaption": "burping belching one times"} +{"filepath": "data/multi_event_train/syn_1846.wav", "onoffCaption": "door knocking at 3.081-5.171, 6.785-8.875", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_3088.wav", "onoffCaption": "woman laughing at 1.437-8.882", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/multi_event_train/syn_3124.wav", "onoffCaption": "sheep goat bleating at 1.512-3.512, 4.025-6.025", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_3171.wav", "onoffCaption": "whistling at 0.029-5.427, 6.409-10.0", "frequencyCaption": "whistling two times"} +{"filepath": "data/multi_event_train/syn_3176.wav", "onoffCaption": "woman laughing at 2.821-5.104 and spraying at 5.22-6.301, 7.497-8.578", "frequencyCaption": "woman laughing one times and spraying two times"} +{"filepath": "data/multi_event_train/syn_3193.wav", "onoffCaption": "thump thud at 0.666-3.166 and sneeze at 1.27-5.326, 6.655-8.041", "frequencyCaption": "thump thud one times and sneeze two times"} +{"filepath": "data/multi_event_train/syn_3194.wav", "onoffCaption": "woman laughing at 2.365-5.437, 6.472-8.84 and spraying at 3.387-4.468, 6.773-7.854", "frequencyCaption": "woman laughing two times and spraying two times"} +{"filepath": "data/multi_event_train/syn_3302.wav", "onoffCaption": "burping belching at 2.505-4.612, 5.841-7.872", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_3357.wav", "onoffCaption": "cow mooing at 0.093-3.075 and explosion at 7.184-10.0", "frequencyCaption": "cow mooing one times and explosion one times"} +{"filepath": "data/multi_event_train/syn_3469.wav", "onoffCaption": "sheep goat bleating at 3.297-5.297, 7.545-9.545", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_3520.wav", "onoffCaption": "cow mooing at 0.447-3.745", "frequencyCaption": "cow mooing one times"} +{"filepath": "data/multi_event_train/syn_3527.wav", "onoffCaption": "cow mooing at 0.083-3.381", "frequencyCaption": "cow mooing one times"} +{"filepath": "data/multi_event_train/syn_3572.wav", "onoffCaption": "door knocking at 2.958-7.397 and door slamming at 4.054-4.994", "frequencyCaption": "door knocking one times and door slamming one times"} +{"filepath": "data/multi_event_train/syn_3597.wav", "onoffCaption": "door slamming at 0.711-2.839, 4.986-6.377", "frequencyCaption": "door slamming two times"} +{"filepath": "data/multi_event_train/syn_3701.wav", "onoffCaption": "explosion at 1.073-3.393, 4.548-6.868, 7.945-10.0", "frequencyCaption": "explosion three times"} +{"filepath": "data/multi_event_train/syn_3754.wav", "onoffCaption": "car horn honking at 0.048-4.448, 5.584-7.585 and spraying at 2.765-3.849", "frequencyCaption": "car horn honking two times and spraying one times"} +{"filepath": "data/multi_event_train/syn_3823.wav", "onoffCaption": "door slamming at 2.956-5.917 and dog barking at 4.289-6.289", "frequencyCaption": "door slamming one times and dog barking one times"} +{"filepath": "data/multi_event_train/syn_3824.wav", "onoffCaption": "explosion at 0.787-3.787, 4.651-7.651 and sheep goat bleating at 3.192-5.192, 5.983-8.134", "frequencyCaption": "explosion two times and sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_3876.wav", "onoffCaption": "burping belching at 0.142-2.507, 3.011-5.376", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_3893.wav", "onoffCaption": "duck quacking at 0.014-2.014, 3.379-5.379, 7.391-9.391", "frequencyCaption": "duck quacking three times"} +{"filepath": "data/multi_event_train/syn_3938.wav", "onoffCaption": "spraying at 1.836-4.228, 5.119-6.183, 6.916-9.044", "frequencyCaption": "spraying three times"} +{"filepath": "data/multi_event_train/syn_3988.wav", "onoffCaption": "door slamming at 2.053-4.416 and burping belching at 7.773-10.0", "frequencyCaption": "door slamming one times and burping belching one times"} +{"filepath": "data/multi_event_train/syn_201.wav", "onoffCaption": "duck quacking at 0.022-2.022, 3.195-5.195, 5.934-7.934 and door slamming at 0.033-1.15", "frequencyCaption": "duck quacking three times and door slamming one times"} +{"filepath": "data/multi_event_train/syn_471.wav", "onoffCaption": "sneeze at 0.419-1.583 and explosion at 3.872-5.965, 7.719-10.0", "frequencyCaption": "sneeze one times and explosion two times"} +{"filepath": "data/multi_event_train/syn_494.wav", "onoffCaption": "cat meowing at 1.089-2.644, 5.032-6.587 and thump thud at 4.146-6.485 and sheep goat bleating at 5.667-7.667", "frequencyCaption": "cat meowing two times and thump thud one times and sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_657.wav", "onoffCaption": "cow mooing at 0.315-3.613, 5.99-8.959", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_719.wav", "onoffCaption": "cat meowing at 0.265-1.531, 3.646-5.378", "frequencyCaption": "cat meowing two times"} +{"filepath": "data/multi_event_train/syn_975.wav", "onoffCaption": "car horn honking at 0.708-5.615", "frequencyCaption": "car horn honking one times"} +{"filepath": "data/multi_event_train/syn_990.wav", "onoffCaption": "spraying at 0.108-0.84, 1.923-3.685, 4.653-5.9", "frequencyCaption": "spraying three times"} +{"filepath": "data/multi_event_train/syn_1141.wav", "onoffCaption": "sneeze at 0.18-3.388, 4.291-5.585", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_1229.wav", "onoffCaption": "dog barking at 2.048-5.368, 7.558-10.0", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_train/syn_1299.wav", "onoffCaption": "cat meowing at 0.318-4.678, 5.599-7.154 and burping belching at 0.472-4.472", "frequencyCaption": "cat meowing two times and burping belching one times"} +{"filepath": "data/multi_event_train/syn_1332.wav", "onoffCaption": "dog barking at 0.323-2.323, 3.124-5.124, 7.606-9.606", "frequencyCaption": "dog barking three times"} +{"filepath": "data/multi_event_train/syn_1367.wav", "onoffCaption": "whistling at 1.395-3.624 and duck quacking at 2.012-4.012 and woman laughing at 6.398-8.753", "frequencyCaption": "whistling one times and duck quacking one times and woman laughing one times"} +{"filepath": "data/multi_event_train/syn_1382.wav", "onoffCaption": "door knocking at 1.252-3.627, 4.328-6.703, 7.558-9.933", "frequencyCaption": "door knocking three times"} +{"filepath": "data/multi_event_train/syn_1517.wav", "onoffCaption": "explosion at 0.423-3.423 and spraying at 1.514-2.141, 4.439-5.008", "frequencyCaption": "explosion one times and spraying two times"} +{"filepath": "data/multi_event_train/syn_1731.wav", "onoffCaption": "thump thud at 0.204-4.654 and door slamming at 0.552-1.691", "frequencyCaption": "thump thud one times and door slamming one times"} +{"filepath": "data/multi_event_train/syn_1764.wav", "onoffCaption": "door knocking at 2.123-5.873", "frequencyCaption": "door knocking one times"} +{"filepath": "data/multi_event_train/syn_1781.wav", "onoffCaption": "dog barking at 1.246-3.246, 5.194-7.194", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_train/syn_1813.wav", "onoffCaption": "spraying at 0.171-2.631, 3.957-4.861, 6.029-8.421", "frequencyCaption": "spraying three times"} +{"filepath": "data/multi_event_train/syn_1908.wav", "onoffCaption": "tapping clicking clanking at 1.365-4.805", "frequencyCaption": "tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_3038.wav", "onoffCaption": "cow mooing at 1.901-4.87", "frequencyCaption": "cow mooing one times"} +{"filepath": "data/multi_event_train/syn_3123.wav", "onoffCaption": "spraying at 1.022-3.414 and burping belching at 5.862-9.862", "frequencyCaption": "spraying one times and burping belching one times"} +{"filepath": "data/multi_event_train/syn_3305.wav", "onoffCaption": "cow mooing at 0.655-5.084, 5.786-8.564", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_3350.wav", "onoffCaption": "sheep goat bleating at 0.712-2.712, 3.787-6.394 and duck quacking at 6.274-8.274", "frequencyCaption": "sheep goat bleating two times and duck quacking one times"} +{"filepath": "data/multi_event_train/syn_3575.wav", "onoffCaption": "tapping clicking clanking at 0.812-4.252, 5.887-8.473", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_3590.wav", "onoffCaption": "whistling at 0.635-6.135, 7.948-10.0", "frequencyCaption": "whistling two times"} +{"filepath": "data/multi_event_train/syn_3648.wav", "onoffCaption": "spraying at 3.426-4.076 and sheep goat bleating at 7.566-9.566", "frequencyCaption": "spraying one times and sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_3706.wav", "onoffCaption": "whistling at 0.945-8.6", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_3753.wav", "onoffCaption": "tapping clicking clanking at 3.307-6.747", "frequencyCaption": "tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_3871.wav", "onoffCaption": "explosion at 3.077-5.83, 6.473-9.226", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_3894.wav", "onoffCaption": "sheep goat bleating at 0.201-2.201, 3.431-5.431", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_54.wav", "onoffCaption": "door slamming at 3.055-4.446, 5.885-8.248", "frequencyCaption": "door slamming two times"} +{"filepath": "data/multi_event_train/syn_126.wav", "onoffCaption": "dog barking at 0.327-2.327", "frequencyCaption": "dog barking one times"} +{"filepath": "data/multi_event_train/syn_254.wav", "onoffCaption": "gunshot at 0.466-2.466, 3.589-5.589, 7.552-9.552", "frequencyCaption": "gunshot three times"} +{"filepath": "data/multi_event_train/syn_355.wav", "onoffCaption": "duck quacking at 2.57-4.57, 5.358-7.358 and sheep goat bleating at 7.642-9.642", "frequencyCaption": "duck quacking two times and sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_570.wav", "onoffCaption": "train horn at 2.982-5.782, 6.951-9.751", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_595.wav", "onoffCaption": "sneeze at 3.065-7.594", "frequencyCaption": "sneeze one times"} +{"filepath": "data/multi_event_train/syn_602.wav", "onoffCaption": "duck quacking at 0.964-2.964, 5.143-7.143", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_618.wav", "onoffCaption": "door slamming at 2.714-3.853, 4.445-5.584, 6.529-7.668", "frequencyCaption": "door slamming three times"} +{"filepath": "data/multi_event_train/syn_703.wav", "onoffCaption": "tapping clicking clanking at 1.009-4.449, 6.831-10.0", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_874.wav", "onoffCaption": "thump thud at 0.65-5.1, 6.105-8.333", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_891.wav", "onoffCaption": "thump thud at 2.63-5.13 and cat meowing at 3.305-5.037, 5.676-7.408", "frequencyCaption": "thump thud one times and cat meowing two times"} +{"filepath": "data/multi_event_train/syn_1040.wav", "onoffCaption": "sheep goat bleating at 1.747-3.747", "frequencyCaption": "sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_1233.wav", "onoffCaption": "gunshot at 3.194-5.194", "frequencyCaption": "gunshot one times"} +{"filepath": "data/multi_event_train/syn_1328.wav", "onoffCaption": "gunshot at 0.77-2.77, 4.869-7.375, 7.885-9.885", "frequencyCaption": "gunshot three times"} +{"filepath": "data/multi_event_train/syn_1416.wav", "onoffCaption": "door slamming at 0.159-0.84 and whistling at 0.385-4.869", "frequencyCaption": "door slamming one times and whistling one times"} +{"filepath": "data/multi_event_train/syn_1665.wav", "onoffCaption": "duck quacking at 1.743-3.743, 4.623-6.623, 7.499-9.499", "frequencyCaption": "duck quacking three times"} +{"filepath": "data/multi_event_train/syn_1680.wav", "onoffCaption": "train horn at 0.177-2.577, 3.945-6.558 and explosion at 0.391-3.263, 3.995-6.867, 7.672-10.0 and cat meowing at 1.708-6.068", "frequencyCaption": "train horn two times and explosion three times and cat meowing one times"} +{"filepath": "data/multi_event_train/syn_1809.wav", "onoffCaption": "tapping clicking clanking at 0.425-3.865, 4.963-7.504", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_1912.wav", "onoffCaption": "tapping clicking clanking at 2.261-5.701", "frequencyCaption": "tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_3022.wav", "onoffCaption": "sheep goat bleating at 0.296-2.296 and train horn at 4.943-8.423", "frequencyCaption": "sheep goat bleating one times and train horn one times"} +{"filepath": "data/multi_event_train/syn_3139.wav", "onoffCaption": "sheep goat bleating at 2.639-4.639", "frequencyCaption": "sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_3204.wav", "onoffCaption": "duck quacking at 0.444-2.444 and thump thud at 5.921-8.968", "frequencyCaption": "duck quacking one times and thump thud one times"} +{"filepath": "data/multi_event_train/syn_3251.wav", "onoffCaption": "tapping clicking clanking at 0.42-3.86 and spraying at 2.566-3.141", "frequencyCaption": "tapping clicking clanking one times and spraying one times"} +{"filepath": "data/multi_event_train/syn_3474.wav", "onoffCaption": "tapping clicking clanking at 0.664-4.104, 6.255-8.849", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_3491.wav", "onoffCaption": "cat meowing at 0.031-1.391 and door knocking at 4.522-9.224", "frequencyCaption": "cat meowing one times and door knocking one times"} +{"filepath": "data/multi_event_train/syn_3607.wav", "onoffCaption": "thump thud at 0.385-4.303, 5.233-7.577 and duck quacking at 5.528-7.528", "frequencyCaption": "thump thud two times and duck quacking one times"} +{"filepath": "data/multi_event_train/syn_3652.wav", "onoffCaption": "gunshot at 0.653-2.653", "frequencyCaption": "gunshot one times"} +{"filepath": "data/multi_event_train/syn_3749.wav", "onoffCaption": "explosion at 0.571-5.165", "frequencyCaption": "explosion one times"} +{"filepath": "data/multi_event_train/syn_3970.wav", "onoffCaption": "dog barking at 3.47-5.47, 6.969-8.969", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_train/syn_3995.wav", "onoffCaption": "dog barking at 0.185-2.185, 2.955-4.955", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_train/syn_70.wav", "onoffCaption": "dog barking at 1.993-3.993, 4.548-6.548", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_train/syn_95.wav", "onoffCaption": "thump thud at 0.47-3.517, 4.762-7.809", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_102.wav", "onoffCaption": "door knocking at 2.705-5.825, 7.509-9.76", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_173.wav", "onoffCaption": "tapping clicking clanking at 0.086-3.526 and whistling at 0.707-9.805 and explosion at 2.13-4.137, 5.322-7.329", "frequencyCaption": "tapping clicking clanking one times and whistling one times and explosion two times"} +{"filepath": "data/multi_event_train/syn_196.wav", "onoffCaption": "explosion at 0.777-2.779", "frequencyCaption": "explosion one times"} +{"filepath": "data/multi_event_train/syn_300.wav", "onoffCaption": "thump thud at 1.121-5.571", "frequencyCaption": "thump thud one times"} +{"filepath": "data/multi_event_train/syn_371.wav", "onoffCaption": "duck quacking at 1.22-3.22, 5.626-7.626", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_394.wav", "onoffCaption": "woman laughing at 2.892-5.138", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/multi_event_train/syn_525.wav", "onoffCaption": "explosion at 0.454-2.715, 3.764-6.025 and whistling at 1.919-7.094", "frequencyCaption": "explosion two times and whistling one times"} +{"filepath": "data/multi_event_train/syn_554.wav", "onoffCaption": "cat meowing at 1.653-2.928, 4.67-5.945", "frequencyCaption": "cat meowing two times"} +{"filepath": "data/multi_event_train/syn_669.wav", "onoffCaption": "train horn at 2.326-6.326, 7.15-10.0", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_727.wav", "onoffCaption": "whistling at 0.48-10.0 and door knocking at 1.258-6.091", "frequencyCaption": "whistling one times and door knocking one times"} +{"filepath": "data/multi_event_train/syn_756.wav", "onoffCaption": "duck quacking at 2.343-4.343, 4.875-6.875, 7.865-9.865", "frequencyCaption": "duck quacking three times"} +{"filepath": "data/multi_event_train/syn_772.wav", "onoffCaption": "explosion at 2.289-5.161, 5.982-8.854", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_797.wav", "onoffCaption": "sheep goat bleating at 1.447-3.447, 5.844-7.844 and dog barking at 2.75-4.75", "frequencyCaption": "sheep goat bleating two times and dog barking one times"} +{"filepath": "data/multi_event_train/syn_821.wav", "onoffCaption": "gunshot at 0.209-2.209, 3.627-5.627 and tapping clicking clanking at 0.683-4.123", "frequencyCaption": "gunshot two times and tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_850.wav", "onoffCaption": "train horn at 0.525-4.295 and door knocking at 0.696-3.233 and cat meowing at 2.715-5.619, 6.598-8.169", "frequencyCaption": "train horn one times and door knocking one times and cat meowing two times"} +{"filepath": "data/multi_event_train/syn_1015.wav", "onoffCaption": "car horn honking at 0.191-3.117 and spraying at 2.06-2.568, 3.885-4.393, 5.536-6.044 and door slamming at 4.087-5.027, 6.379-7.319", "frequencyCaption": "car horn honking one times and spraying three times and door slamming two times"} +{"filepath": "data/multi_event_train/syn_1064.wav", "onoffCaption": "door slamming at 1.414-4.293", "frequencyCaption": "door slamming one times"} +{"filepath": "data/multi_event_train/syn_1081.wav", "onoffCaption": "door slamming at 1.436-4.153, 4.833-5.852", "frequencyCaption": "door slamming two times"} +{"filepath": "data/multi_event_train/syn_1217.wav", "onoffCaption": "explosion at 0.391-3.391 and dog barking at 2.243-4.243", "frequencyCaption": "explosion one times and dog barking one times"} +{"filepath": "data/multi_event_train/syn_1242.wav", "onoffCaption": "spraying at 0.567-1.067, 2.157-2.938, 4.063-5.32", "frequencyCaption": "spraying three times"} +{"filepath": "data/multi_event_train/syn_1266.wav", "onoffCaption": "woman laughing at 0.647-2.93, 5.093-7.293 and dog barking at 4.855-6.855, 7.951-9.951", "frequencyCaption": "woman laughing two times and dog barking two times"} +{"filepath": "data/multi_event_train/syn_1283.wav", "onoffCaption": "whistling at 1.596-9.981", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_1359.wav", "onoffCaption": "train horn at 0.245-3.645, 4.367-7.767 and woman laughing at 3.448-6.148", "frequencyCaption": "train horn two times and woman laughing one times"} +{"filepath": "data/multi_event_train/syn_1398.wav", "onoffCaption": "woman laughing at 0.118-4.17, 5.424-7.707", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_1432.wav", "onoffCaption": "door slamming at 3.069-4.61", "frequencyCaption": "door slamming one times"} +{"filepath": "data/multi_event_train/syn_1443.wav", "onoffCaption": "cow mooing at 2.99-7.97", "frequencyCaption": "cow mooing one times"} +{"filepath": "data/multi_event_train/syn_1529.wav", "onoffCaption": "spraying at 0.045-3.064, 4.628-5.136, 5.647-6.498 and whistling at 0.17-7.92", "frequencyCaption": "spraying three times and whistling one times"} +{"filepath": "data/multi_event_train/syn_1558.wav", "onoffCaption": "spraying at 0.1-0.675", "frequencyCaption": "spraying one times"} +{"filepath": "data/multi_event_train/syn_1614.wav", "onoffCaption": "train horn at 2.717-10.0", "frequencyCaption": "train horn one times"} +{"filepath": "data/multi_event_train/syn_1630.wav", "onoffCaption": "spraying at 3.453-3.975, 6.279-6.787, 7.637-8.264", "frequencyCaption": "spraying three times"} +{"filepath": "data/multi_event_train/syn_1641.wav", "onoffCaption": "tapping clicking clanking at 0.201-3.641, 4.988-8.428", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_1936.wav", "onoffCaption": "woman laughing at 0.152-2.852, 4.099-6.799", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_1947.wav", "onoffCaption": "door knocking at 3.182-5.912, 7.132-10.0", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_3006.wav", "onoffCaption": "door knocking at 3.963-7.516", "frequencyCaption": "door knocking one times"} +{"filepath": "data/multi_event_train/syn_3077.wav", "onoffCaption": "whistling at 1.496-5.98", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_3092.wav", "onoffCaption": "thump thud at 1.644-6.019, 7.447-10.0", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_3189.wav", "onoffCaption": "explosion at 0.54-5.54, 6.412-10.0 and door knocking at 2.556-5.286, 6.463-8.926", "frequencyCaption": "explosion two times and door knocking two times"} +{"filepath": "data/multi_event_train/syn_3220.wav", "onoffCaption": "sneeze at 3.349-4.513, 5.349-7.752", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_3275.wav", "onoffCaption": "duck quacking at 0.417-2.417, 3.876-5.876", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_3290.wav", "onoffCaption": "tapping clicking clanking at 0.227-3.667, 4.679-6.849 and duck quacking at 3.852-5.852, 7.038-9.038 and explosion at 5.023-9.617", "frequencyCaption": "tapping clicking clanking two times and duck quacking two times and explosion one times"} +{"filepath": "data/multi_event_train/syn_3421.wav", "onoffCaption": "cat meowing at 0.013-1.584, 2.383-4.331, 5.483-8.387 and door knocking at 0.75-5.283, 7.441-10.0", "frequencyCaption": "cat meowing three times and door knocking two times"} +{"filepath": "data/multi_event_train/syn_3450.wav", "onoffCaption": "duck quacking at 2.7-4.7, 5.509-7.509", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_3623.wav", "onoffCaption": "car horn honking at 3.793-6.579", "frequencyCaption": "car horn honking one times"} +{"filepath": "data/multi_event_train/syn_3676.wav", "onoffCaption": "burping belching at 1.812-3.843, 6.336-8.367", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_3693.wav", "onoffCaption": "sheep goat bleating at 2.944-4.944", "frequencyCaption": "sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_3738.wav", "onoffCaption": "thump thud at 0.424-4.799", "frequencyCaption": "thump thud one times"} +{"filepath": "data/multi_event_train/syn_3788.wav", "onoffCaption": "sheep goat bleating at 0.787-5.507, 6.694-8.694", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_3925.wav", "onoffCaption": "woman laughing at 0.292-7.737", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/multi_event_train/syn_3954.wav", "onoffCaption": "sneeze at 0.104-2.023, 2.539-4.484, 6.855-8.101", "frequencyCaption": "sneeze three times"} +{"filepath": "data/multi_event_train/syn_25.wav", "onoffCaption": "gunshot at 1.625-3.625, 5.899-8.172 and whistling at 3.782-8.957", "frequencyCaption": "gunshot two times and whistling one times"} +{"filepath": "data/multi_event_train/syn_157.wav", "onoffCaption": "train horn at 0.233-2.37, 3.224-5.601", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_225.wav", "onoffCaption": "tapping clicking clanking at 2.105-5.545, 7.367-10.0", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_324.wav", "onoffCaption": "spraying at 0.009-0.509, 2.341-3.425", "frequencyCaption": "spraying two times"} +{"filepath": "data/multi_event_train/syn_400.wav", "onoffCaption": "train horn at 2.694-5.574", "frequencyCaption": "train horn one times"} +{"filepath": "data/multi_event_train/syn_501.wav", "onoffCaption": "cow mooing at 0.234-3.532 and whistling at 1.104-6.737 and explosion at 2.284-7.284, 7.816-10.0", "frequencyCaption": "cow mooing one times and whistling one times and explosion two times"} +{"filepath": "data/multi_event_train/syn_673.wav", "onoffCaption": "woman laughing at 0.788-3.583 and door knocking at 2.854-5.701", "frequencyCaption": "woman laughing one times and door knocking one times"} +{"filepath": "data/multi_event_train/syn_805.wav", "onoffCaption": "spraying at 1.964-4.548", "frequencyCaption": "spraying one times"} +{"filepath": "data/multi_event_train/syn_904.wav", "onoffCaption": "car horn honking at 0.948-4.535, 6.325-8.809", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_1031.wav", "onoffCaption": "explosion at 0.294-2.468", "frequencyCaption": "explosion one times"} +{"filepath": "data/multi_event_train/syn_1130.wav", "onoffCaption": "woman laughing at 1.1-3.455, 4.433-6.788 and cat meowing at 1.581-3.575", "frequencyCaption": "woman laughing two times and cat meowing one times"} +{"filepath": "data/multi_event_train/syn_1467.wav", "onoffCaption": "door knocking at 1.655-5.405, 7.308-9.808", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_1482.wav", "onoffCaption": "tapping clicking clanking at 0.246-3.686, 4.457-7.897 and door slamming at 2.901-3.739", "frequencyCaption": "tapping clicking clanking two times and door slamming one times"} +{"filepath": "data/multi_event_train/syn_1498.wav", "onoffCaption": "whistling at 0.046-8.057 and sneeze at 2.099-3.644, 5.951-8.197", "frequencyCaption": "whistling one times and sneeze two times"} +{"filepath": "data/multi_event_train/syn_1566.wav", "onoffCaption": "train horn at 2.16-7.904", "frequencyCaption": "train horn one times"} +{"filepath": "data/multi_event_train/syn_1583.wav", "onoffCaption": "door slamming at 0.07-2.29, 3.986-6.206", "frequencyCaption": "door slamming two times"} +{"filepath": "data/multi_event_train/syn_1599.wav", "onoffCaption": "whistling at 0.128-9.793", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_1862.wav", "onoffCaption": "gunshot at 0.694-2.694, 3.323-5.323 and burping belching at 2.026-6.026, 6.855-10.0", "frequencyCaption": "gunshot two times and burping belching two times"} +{"filepath": "data/multi_event_train/syn_1878.wav", "onoffCaption": "whistling at 0.389-8.4", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_1887.wav", "onoffCaption": "cow mooing at 0.512-4.941 and thump thud at 7.964-10.0", "frequencyCaption": "cow mooing one times and thump thud one times"} +{"filepath": "data/multi_event_train/syn_1963.wav", "onoffCaption": "woman laughing at 1.24-3.595 and tapping clicking clanking at 1.503-4.943", "frequencyCaption": "woman laughing one times and tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_1979.wav", "onoffCaption": "door slamming at 0.075-1.214, 3.128-5.909", "frequencyCaption": "door slamming two times"} +{"filepath": "data/multi_event_train/syn_1986.wav", "onoffCaption": "door slamming at 1.025-3.806, 5.303-7.303", "frequencyCaption": "door slamming two times"} +{"filepath": "data/multi_event_train/syn_3049.wav", "onoffCaption": "woman laughing at 2.442-7.481", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/multi_event_train/syn_3053.wav", "onoffCaption": "woman laughing at 2.766-5.861, 7.24-10.0", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_3148.wav", "onoffCaption": "tapping clicking clanking at 0.276-3.716", "frequencyCaption": "tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_3152.wav", "onoffCaption": "sheep goat bleating at 0.454-2.454, 3.049-5.049, 6.197-8.197", "frequencyCaption": "sheep goat bleating three times"} +{"filepath": "data/multi_event_train/syn_3405.wav", "onoffCaption": "whistling at 0.099-8.11", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_3504.wav", "onoffCaption": "door knocking at 0.793-5.495 and thump thud at 3.052-5.391", "frequencyCaption": "door knocking one times and thump thud one times"} +{"filepath": "data/multi_event_train/syn_3800.wav", "onoffCaption": "tapping clicking clanking at 0.979-4.419, 5.28-7.969 and explosion at 2.588-5.588 and sneeze at 7.902-9.196", "frequencyCaption": "tapping clicking clanking two times and explosion one times and sneeze one times"} +{"filepath": "data/multi_event_train/syn_3901.wav", "onoffCaption": "sheep goat bleating at 0.683-2.683, 4.233-6.233", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_118.wav", "onoffCaption": "train horn at 0.018-2.818, 3.936-6.736, 7.383-10.0", "frequencyCaption": "train horn three times"} +{"filepath": "data/multi_event_train/syn_270.wav", "onoffCaption": "duck quacking at 0.329-2.329, 3.818-5.818 and sheep goat bleating at 2.212-7.092", "frequencyCaption": "duck quacking two times and sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_295.wav", "onoffCaption": "burping belching at 0.305-2.336, 4.218-6.249, 6.891-8.922", "frequencyCaption": "burping belching three times"} +{"filepath": "data/multi_event_train/syn_455.wav", "onoffCaption": "thump thud at 2.92-7.37 and sneeze at 4.19-5.424, 6.465-7.922", "frequencyCaption": "thump thud one times and sneeze two times"} +{"filepath": "data/multi_event_train/syn_626.wav", "onoffCaption": "dog barking at 1.493-3.493", "frequencyCaption": "dog barking one times"} +{"filepath": "data/multi_event_train/syn_696.wav", "onoffCaption": "gunshot at 1.907-3.907, 4.559-6.559, 7.45-9.45", "frequencyCaption": "gunshot three times"} +{"filepath": "data/multi_event_train/syn_768.wav", "onoffCaption": "duck quacking at 3.151-5.151, 6.78-8.78", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_951.wav", "onoffCaption": "cow mooing at 2.278-5.26, 6.421-9.403", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_1165.wav", "onoffCaption": "spraying at 2.992-3.992, 4.604-5.208", "frequencyCaption": "spraying two times"} +{"filepath": "data/multi_event_train/syn_1180.wav", "onoffCaption": "duck quacking at 0.539-2.539, 3.257-5.257, 6.102-8.102", "frequencyCaption": "duck quacking three times"} +{"filepath": "data/multi_event_train/syn_1258.wav", "onoffCaption": "whistling at 0.723-6.223 and woman laughing at 1.377-8.111 and sheep goat bleating at 6.748-8.748", "frequencyCaption": "whistling one times and woman laughing one times and sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_1316.wav", "onoffCaption": "car horn honking at 2.275-5.862", "frequencyCaption": "car horn honking one times"} +{"filepath": "data/multi_event_train/syn_1343.wav", "onoffCaption": "duck quacking at 1.608-3.608, 4.465-6.465, 7.576-9.576", "frequencyCaption": "duck quacking three times"} +{"filepath": "data/multi_event_train/syn_1428.wav", "onoffCaption": "train horn at 0.016-2.153, 3.202-5.339, 5.864-8.001", "frequencyCaption": "train horn three times"} +{"filepath": "data/multi_event_train/syn_1533.wav", "onoffCaption": "cow mooing at 2.782-5.764, 6.836-9.177", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_1715.wav", "onoffCaption": "thump thud at 1.218-3.718, 4.628-7.128", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_1740.wav", "onoffCaption": "train horn at 1.767-5.127, 6.095-9.455", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_1837.wav", "onoffCaption": "sheep goat bleating at 2.059-6.779 and whistling at 2.884-8.059 and woman laughing at 4.959-7.378", "frequencyCaption": "sheep goat bleating one times and whistling one times and woman laughing one times"} +{"filepath": "data/multi_event_train/syn_3107.wav", "onoffCaption": "door slamming at 0.022-0.873 and cat meowing at 1.093-2.403 and train horn at 5.959-10.0", "frequencyCaption": "door slamming one times and cat meowing one times and train horn one times"} +{"filepath": "data/multi_event_train/syn_3321.wav", "onoffCaption": "cat meowing at 3.21-4.21, 5.7-6.712", "frequencyCaption": "cat meowing two times"} +{"filepath": "data/multi_event_train/syn_3374.wav", "onoffCaption": "dog barking at 0.078-2.078, 3.208-5.208, 5.9-7.9", "frequencyCaption": "dog barking three times"} +{"filepath": "data/multi_event_train/syn_3391.wav", "onoffCaption": "explosion at 0.269-3.141 and duck quacking at 0.455-2.455 and door knocking at 5.976-8.328", "frequencyCaption": "explosion one times and duck quacking one times and door knocking one times"} +{"filepath": "data/multi_event_train/syn_3551.wav", "onoffCaption": "door knocking at 2.84-7.673", "frequencyCaption": "door knocking one times"} +{"filepath": "data/multi_event_train/syn_3639.wav", "onoffCaption": "woman laughing at 0.177-2.377, 3.151-5.351, 6.045-8.245", "frequencyCaption": "woman laughing three times"} +{"filepath": "data/multi_event_train/syn_3689.wav", "onoffCaption": "sheep goat bleating at 2.212-4.212, 5.084-7.569", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_3722.wav", "onoffCaption": "cat meowing at 0.333-2.327, 3.269-4.805", "frequencyCaption": "cat meowing two times"} +{"filepath": "data/multi_event_train/syn_3777.wav", "onoffCaption": "duck quacking at 2.801-4.801 and spraying at 3.013-5.141, 6.062-6.663", "frequencyCaption": "duck quacking one times and spraying two times"} +{"filepath": "data/multi_event_train/syn_3792.wav", "onoffCaption": "explosion at 2.566-4.659, 7.128-9.212", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_3855.wav", "onoffCaption": "duck quacking at 1.625-3.625 and door knocking at 7.025-9.525", "frequencyCaption": "duck quacking one times and door knocking one times"} +{"filepath": "data/multi_event_train/syn_159.wav", "onoffCaption": "sneeze at 0.134-3.209, 4.258-7.138, 7.933-9.852", "frequencyCaption": "sneeze three times"} +{"filepath": "data/multi_event_train/syn_231.wav", "onoffCaption": "duck quacking at 0.318-2.318, 2.949-4.949, 6.008-8.008", "frequencyCaption": "duck quacking three times"} +{"filepath": "data/multi_event_train/syn_264.wav", "onoffCaption": "dog barking at 0.128-2.128 and gunshot at 1.2-3.2, 5.28-7.28", "frequencyCaption": "dog barking one times and gunshot two times"} +{"filepath": "data/multi_event_train/syn_414.wav", "onoffCaption": "door knocking at 2.845-5.469, 6.563-9.4", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_632.wav", "onoffCaption": "explosion at 2.325-4.389, 4.942-7.006 and tapping clicking clanking at 3.383-6.823", "frequencyCaption": "explosion two times and tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_667.wav", "onoffCaption": "thump thud at 2.149-6.599, 7.743-10.0", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_682.wav", "onoffCaption": "cow mooing at 0.843-5.272, 7.321-10.0", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_729.wav", "onoffCaption": "door knocking at 0.601-4.369, 5.79-8.142", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_799.wav", "onoffCaption": "sheep goat bleating at 0.047-2.047 and whistling at 1.195-5.679, 7.057-10.0", "frequencyCaption": "sheep goat bleating one times and whistling two times"} +{"filepath": "data/multi_event_train/syn_910.wav", "onoffCaption": "thump thud at 0.422-3.193, 3.817-6.156, 7.264-9.455", "frequencyCaption": "thump thud three times"} +{"filepath": "data/multi_event_train/syn_1124.wav", "onoffCaption": "woman laughing at 2.461-4.829 and explosion at 2.722-4.81, 5.629-7.693", "frequencyCaption": "woman laughing one times and explosion two times"} +{"filepath": "data/multi_event_train/syn_1302.wav", "onoffCaption": "explosion at 3.167-5.428, 6.839-9.1", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_1357.wav", "onoffCaption": "explosion at 0.493-3.246, 4.246-6.607 and door slamming at 1.075-2.554", "frequencyCaption": "explosion two times and door slamming one times"} +{"filepath": "data/multi_event_train/syn_1469.wav", "onoffCaption": "duck quacking at 0.592-2.592, 3.215-5.215, 5.836-7.836 and train horn at 1.241-4.601, 5.638-8.998 and door slamming at 5.79-6.768", "frequencyCaption": "duck quacking three times and train horn two times and door slamming one times"} +{"filepath": "data/multi_event_train/syn_1572.wav", "onoffCaption": "sneeze at 0.074-1.987, 3.317-4.42, 5.2-7.661 and door knocking at 1.161-5.671, 7.944-10.0", "frequencyCaption": "sneeze three times and door knocking two times"} +{"filepath": "data/multi_event_train/syn_1597.wav", "onoffCaption": "cat meowing at 0.941-2.496 and dog barking at 1.668-3.668 and explosion at 6.21-10.0", "frequencyCaption": "cat meowing one times and dog barking one times and explosion one times"} +{"filepath": "data/multi_event_train/syn_1701.wav", "onoffCaption": "spraying at 2.038-2.642", "frequencyCaption": "spraying one times"} +{"filepath": "data/multi_event_train/syn_1754.wav", "onoffCaption": "car horn honking at 0.031-2.817, 4.405-6.905", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_1876.wav", "onoffCaption": "woman laughing at 0.697-3.181, 3.691-5.891", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_1893.wav", "onoffCaption": "door slamming at 0.078-1.217 and dog barking at 3.861-5.861, 6.925-8.925 and thump thud at 4.033-8.483", "frequencyCaption": "door slamming one times and dog barking two times and thump thud one times"} +{"filepath": "data/multi_event_train/syn_1988.wav", "onoffCaption": "door knocking at 0.078-2.168, 2.716-4.806, 5.45-7.54 and cat meowing at 1.282-2.866, 3.698-5.282, 5.908-7.492", "frequencyCaption": "door knocking three times and cat meowing three times"} +{"filepath": "data/multi_event_train/syn_3146.wav", "onoffCaption": "thump thud at 0.966-3.737, 4.659-7.43 and sneeze at 1.51-2.764, 3.704-4.95, 5.513-7.729 and cow mooing at 1.812-6.792", "frequencyCaption": "thump thud two times and sneeze three times and cow mooing one times"} +{"filepath": "data/multi_event_train/syn_3335.wav", "onoffCaption": "whistling at 3.008-8.508", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_3385.wav", "onoffCaption": "sneeze at 3.023-7.523", "frequencyCaption": "sneeze one times"} +{"filepath": "data/multi_event_train/syn_3510.wav", "onoffCaption": "dog barking at 2.689-4.689, 6.283-8.283", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_train/syn_3678.wav", "onoffCaption": "gunshot at 2.519-4.519 and whistling at 2.939-8.114", "frequencyCaption": "gunshot one times and whistling one times"} +{"filepath": "data/multi_event_train/syn_3763.wav", "onoffCaption": "explosion at 2.855-5.573, 6.2-8.918", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_3786.wav", "onoffCaption": "door slamming at 2.563-3.854, 5.558-6.363", "frequencyCaption": "door slamming two times"} +{"filepath": "data/multi_event_train/syn_3814.wav", "onoffCaption": "whistling at 1.399-9.41", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_64.wav", "onoffCaption": "dog barking at 0.582-2.582, 4.995-6.995 and spraying at 0.846-1.596 and door knocking at 4.54-8.29", "frequencyCaption": "dog barking two times and spraying one times and door knocking one times"} +{"filepath": "data/multi_event_train/syn_81.wav", "onoffCaption": "thump thud at 1.031-5.481", "frequencyCaption": "thump thud one times"} +{"filepath": "data/multi_event_train/syn_281.wav", "onoffCaption": "cow mooing at 0.963-3.973, 4.899-7.683 and train horn at 3.96-7.96", "frequencyCaption": "cow mooing two times and train horn one times"} +{"filepath": "data/multi_event_train/syn_441.wav", "onoffCaption": "gunshot at 0.774-2.774, 3.707-5.707, 7.02-9.02", "frequencyCaption": "gunshot three times"} +{"filepath": "data/multi_event_train/syn_540.wav", "onoffCaption": "thump thud at 2.697-6.364, 7.409-10.0", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_844.wav", "onoffCaption": "tapping clicking clanking at 0.698-4.138, 5.254-8.184", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_945.wav", "onoffCaption": "burping belching at 2.96-7.296", "frequencyCaption": "burping belching one times"} +{"filepath": "data/multi_event_train/syn_1070.wav", "onoffCaption": "gunshot at 0.081-2.081, 3.247-5.247, 6.513-8.513", "frequencyCaption": "gunshot three times"} +{"filepath": "data/multi_event_train/syn_1095.wav", "onoffCaption": "spraying at 0.659-2.145 and gunshot at 5.491-7.491", "frequencyCaption": "spraying one times and gunshot one times"} +{"filepath": "data/multi_event_train/syn_1171.wav", "onoffCaption": "dog barking at 0.481-2.481, 3.695-5.695", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_train/syn_1194.wav", "onoffCaption": "sneeze at 1.52-4.134, 5.429-7.668 and door slamming at 2.583-3.886", "frequencyCaption": "sneeze two times and door slamming one times"} +{"filepath": "data/multi_event_train/syn_1219.wav", "onoffCaption": "sheep goat bleating at 2.316-4.316 and train horn at 7.392-10.0", "frequencyCaption": "sheep goat bleating one times and train horn one times"} +{"filepath": "data/multi_event_train/syn_1426.wav", "onoffCaption": "sheep goat bleating at 2.896-4.896 and door slamming at 4.863-5.701, 7.543-9.067", "frequencyCaption": "sheep goat bleating one times and door slamming two times"} +{"filepath": "data/multi_event_train/syn_1527.wav", "onoffCaption": "spraying at 0.031-1.115, 2.086-3.17 and car horn honking at 2.198-5.417", "frequencyCaption": "spraying two times and car horn honking one times"} +{"filepath": "data/multi_event_train/syn_1823.wav", "onoffCaption": "tapping clicking clanking at 0.895-4.335, 5.021-8.461 and cow mooing at 2.805-5.787", "frequencyCaption": "tapping clicking clanking two times and cow mooing one times"} +{"filepath": "data/multi_event_train/syn_1839.wav", "onoffCaption": "cat meowing at 2.895-4.205, 5.243-6.553, 7.151-8.461", "frequencyCaption": "cat meowing three times"} +{"filepath": "data/multi_event_train/syn_1922.wav", "onoffCaption": "burping belching at 0.283-3.789, 4.889-8.395", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_1938.wav", "onoffCaption": "cat meowing at 2.138-3.15, 4.397-5.409 and sneeze at 7.522-8.625", "frequencyCaption": "cat meowing two times and sneeze one times"} +{"filepath": "data/multi_event_train/syn_3008.wav", "onoffCaption": "gunshot at 0.446-2.616 and burping belching at 4.722-6.753, 7.735-10.0", "frequencyCaption": "gunshot one times and burping belching two times"} +{"filepath": "data/multi_event_train/syn_3012.wav", "onoffCaption": "train horn at 2.914-7.816", "frequencyCaption": "train horn one times"} +{"filepath": "data/multi_event_train/syn_3109.wav", "onoffCaption": "whistling at 0.123-8.134 and dog barking at 2.714-4.714, 6.881-8.881", "frequencyCaption": "whistling one times and dog barking two times"} +{"filepath": "data/multi_event_train/syn_3113.wav", "onoffCaption": "thump thud at 0.383-2.611", "frequencyCaption": "thump thud one times"} +{"filepath": "data/multi_event_train/syn_3360.wav", "onoffCaption": "dog barking at 0.244-2.244, 4.643-6.643 and sneeze at 1.545-3.228, 4.283-6.202 and woman laughing at 2.999-5.282, 7.507-9.943", "frequencyCaption": "dog barking two times and sneeze two times and woman laughing two times"} +{"filepath": "data/multi_event_train/syn_3444.wav", "onoffCaption": "door knocking at 0.208-3.958, 4.484-6.611, 7.771-10.0", "frequencyCaption": "door knocking three times"} +{"filepath": "data/multi_event_train/syn_3545.wav", "onoffCaption": "door slamming at 2.58-4.58, 5.255-7.255 and car horn honking at 5.744-7.744", "frequencyCaption": "door slamming two times and car horn honking one times"} +{"filepath": "data/multi_event_train/syn_3736.wav", "onoffCaption": "sheep goat bleating at 0.976-2.976 and gunshot at 1.201-3.201, 4.086-6.086, 7.889-9.889", "frequencyCaption": "sheep goat bleating one times and gunshot three times"} +{"filepath": "data/multi_event_train/syn_3841.wav", "onoffCaption": "thump thud at 3.493-7.868 and door knocking at 6.458-8.77", "frequencyCaption": "thump thud one times and door knocking one times"} +{"filepath": "data/multi_event_train/syn_3940.wav", "onoffCaption": "burping belching at 3.845-6.075 and thump thud at 6.247-8.586", "frequencyCaption": "burping belching one times and thump thud one times"} +{"filepath": "data/multi_event_train/syn_31.wav", "onoffCaption": "explosion at 0.114-5.035, 6.617-10.0", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_40.wav", "onoffCaption": "cow mooing at 0.732-5.712, 6.321-10.0", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_116.wav", "onoffCaption": "burping belching at 2.633-6.656, 7.887-10.0", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_143.wav", "onoffCaption": "burping belching at 1.128-3.651 and spraying at 1.561-2.561, 3.832-4.916, 5.585-7.851", "frequencyCaption": "burping belching one times and spraying three times"} +{"filepath": "data/multi_event_train/syn_330.wav", "onoffCaption": "sneeze at 0.333-3.443, 4.453-7.563", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_365.wav", "onoffCaption": "duck quacking at 0.019-2.019 and sheep goat bleating at 0.592-2.592 and explosion at 5.364-10.0", "frequencyCaption": "duck quacking one times and sheep goat bleating one times and explosion one times"} +{"filepath": "data/multi_event_train/syn_380.wav", "onoffCaption": "car horn honking at 2.009-6.916 and tapping clicking clanking at 2.997-6.437", "frequencyCaption": "car horn honking one times and tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_515.wav", "onoffCaption": "cow mooing at 0.715-3.684 and thump thud at 1.196-3.424 and cat meowing at 8.242-9.253", "frequencyCaption": "cow mooing one times and thump thud one times and cat meowing one times"} +{"filepath": "data/multi_event_train/syn_581.wav", "onoffCaption": "dog barking at 0.183-2.183", "frequencyCaption": "dog barking one times"} +{"filepath": "data/multi_event_train/syn_628.wav", "onoffCaption": "dog barking at 2.434-4.434", "frequencyCaption": "dog barking one times"} +{"filepath": "data/multi_event_train/syn_698.wav", "onoffCaption": "cat meowing at 1.384-4.414", "frequencyCaption": "cat meowing one times"} +{"filepath": "data/multi_event_train/syn_733.wav", "onoffCaption": "duck quacking at 3.485-5.485, 7.687-9.687", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_766.wav", "onoffCaption": "cat meowing at 0.072-1.967, 2.942-4.936 and sneeze at 0.947-2.178, 4.106-6.423", "frequencyCaption": "cat meowing two times and sneeze two times"} +{"filepath": "data/multi_event_train/syn_783.wav", "onoffCaption": "explosion at 0.107-3.101, 5.265-8.259", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_811.wav", "onoffCaption": "thump thud at 0.206-2.668, 4.309-6.537", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_860.wav", "onoffCaption": "car horn honking at 1.847-5.434 and burping belching at 2.504-5.504, 6.182-9.182", "frequencyCaption": "car horn honking one times and burping belching two times"} +{"filepath": "data/multi_event_train/syn_885.wav", "onoffCaption": "door knocking at 3.613-8.613", "frequencyCaption": "door knocking one times"} +{"filepath": "data/multi_event_train/syn_1025.wav", "onoffCaption": "tapping clicking clanking at 0.035-3.475, 5.44-7.618 and whistling at 0.845-2.854 and cow mooing at 2.891-7.871", "frequencyCaption": "tapping clicking clanking two times and whistling one times and cow mooing one times"} +{"filepath": "data/multi_event_train/syn_1054.wav", "onoffCaption": "explosion at 2.388-7.388", "frequencyCaption": "explosion one times"} +{"filepath": "data/multi_event_train/syn_1203.wav", "onoffCaption": "sneeze at 0.363-2.359, 3.331-5.327, 6.245-8.241 and whistling at 1.162-5.646 and burping belching at 4.98-7.21", "frequencyCaption": "sneeze three times and whistling one times and burping belching one times"} +{"filepath": "data/multi_event_train/syn_1256.wav", "onoffCaption": "sheep goat bleating at 1.756-3.756, 6.06-8.06", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_1318.wav", "onoffCaption": "dog barking at 2.585-4.585, 5.452-7.452", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_train/syn_1402.wav", "onoffCaption": "car horn honking at 0.149-2.614, 3.555-6.02, 6.806-9.271", "frequencyCaption": "car horn honking three times"} +{"filepath": "data/multi_event_train/syn_1473.wav", "onoffCaption": "spraying at 0.374-1.106, 2.858-3.939 and burping belching at 2.746-5.746, 7.61-10.0", "frequencyCaption": "spraying two times and burping belching two times"} +{"filepath": "data/multi_event_train/syn_1496.wav", "onoffCaption": "burping belching at 1.063-5.063, 6.187-10.0 and train horn at 1.338-3.738", "frequencyCaption": "burping belching two times and train horn one times"} +{"filepath": "data/multi_event_train/syn_1519.wav", "onoffCaption": "explosion at 0.451-3.578, 4.185-7.312 and car horn honking at 4.164-7.383", "frequencyCaption": "explosion two times and car horn honking one times"} +{"filepath": "data/multi_event_train/syn_1568.wav", "onoffCaption": "sneeze at 0.072-4.572 and car horn honking at 2.743-5.208", "frequencyCaption": "sneeze one times and car horn honking one times"} +{"filepath": "data/multi_event_train/syn_1600.wav", "onoffCaption": "tapping clicking clanking at 2.86-6.3, 7.003-9.229 and gunshot at 3.295-5.425", "frequencyCaption": "tapping clicking clanking two times and gunshot one times"} +{"filepath": "data/multi_event_train/syn_1655.wav", "onoffCaption": "woman laughing at 0.265-2.684 and train horn at 0.391-4.459", "frequencyCaption": "woman laughing one times and train horn one times"} +{"filepath": "data/multi_event_train/syn_1889.wav", "onoffCaption": "door knocking at 3.623-6.086, 7.931-10.0", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_1906.wav", "onoffCaption": "door slamming at 2.942-3.747, 5.972-6.777", "frequencyCaption": "door slamming two times"} +{"filepath": "data/multi_event_train/syn_1977.wav", "onoffCaption": "gunshot at 3.187-5.187 and thump thud at 7.862-10.0", "frequencyCaption": "gunshot one times and thump thud one times"} +{"filepath": "data/multi_event_train/syn_1992.wav", "onoffCaption": "cat meowing at 0.094-3.335", "frequencyCaption": "cat meowing one times"} +{"filepath": "data/multi_event_train/syn_3036.wav", "onoffCaption": "cat meowing at 2.02-3.38, 4.785-5.812 and woman laughing at 2.276-5.557, 7.167-10.0 and dog barking at 3.171-5.171, 6.364-8.364", "frequencyCaption": "cat meowing two times and woman laughing two times and dog barking two times"} +{"filepath": "data/multi_event_train/syn_3047.wav", "onoffCaption": "car horn honking at 0.568-3.481, 4.098-6.975", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_3234.wav", "onoffCaption": "cow mooing at 0.597-5.026, 6.061-8.703", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_3261.wav", "onoffCaption": "explosion at 3.738-7.578", "frequencyCaption": "explosion one times"} +{"filepath": "data/multi_event_train/syn_3284.wav", "onoffCaption": "thump thud at 0.415-4.865, 6.186-8.495", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_3411.wav", "onoffCaption": "door knocking at 0.393-2.642 and whistling at 5.46-9.944", "frequencyCaption": "door knocking one times and whistling one times"} +{"filepath": "data/multi_event_train/syn_3460.wav", "onoffCaption": "explosion at 4.039-6.792", "frequencyCaption": "explosion one times"} +{"filepath": "data/multi_event_train/syn_3485.wav", "onoffCaption": "whistling at 1.733-6.217 and cow mooing at 2.774-6.072, 7.142-10.0", "frequencyCaption": "whistling one times and cow mooing two times"} +{"filepath": "data/multi_event_train/syn_3637.wav", "onoffCaption": "sheep goat bleating at 0.468-2.468, 3.357-5.579, 6.358-8.358", "frequencyCaption": "sheep goat bleating three times"} +{"filepath": "data/multi_event_train/syn_3662.wav", "onoffCaption": "spraying at 0.628-2.114, 3.145-5.537, 6.065-8.117", "frequencyCaption": "spraying three times"} +{"filepath": "data/multi_event_train/syn_3687.wav", "onoffCaption": "cat meowing at 3.218-5.108, 5.993-6.993", "frequencyCaption": "cat meowing two times"} +{"filepath": "data/multi_event_train/syn_3779.wav", "onoffCaption": "cow mooing at 0.863-3.832", "frequencyCaption": "cow mooing one times"} +{"filepath": "data/multi_event_train/syn_3915.wav", "onoffCaption": "spraying at 0.661-1.918, 2.505-4.633", "frequencyCaption": "spraying two times"} +{"filepath": "data/multi_event_train/syn_3981.wav", "onoffCaption": "dog barking at 1.467-3.467, 5.657-7.657", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_train/syn_15.wav", "onoffCaption": "duck quacking at 0.518-2.518, 3.085-5.085, 6.404-8.404 and cow mooing at 2.413-6.842, 7.53-10.0 and car horn honking at 3.791-7.632", "frequencyCaption": "duck quacking three times and cow mooing two times and car horn honking one times"} +{"filepath": "data/multi_event_train/syn_132.wav", "onoffCaption": "whistling at 2.824-5.053, 5.615-7.844", "frequencyCaption": "whistling two times"} +{"filepath": "data/multi_event_train/syn_167.wav", "onoffCaption": "explosion at 2.758-5.511, 7.256-10.0", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_182.wav", "onoffCaption": "door slamming at 2.185-3.664, 5.817-7.12 and car horn honking at 2.817-7.724", "frequencyCaption": "door slamming two times and car horn honking one times"} +{"filepath": "data/multi_event_train/syn_314.wav", "onoffCaption": "tapping clicking clanking at 0.093-3.533, 4.054-7.494", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_341.wav", "onoffCaption": "car horn honking at 0.634-5.146, 6.739-10.0", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_531.wav", "onoffCaption": "explosion at 0.061-5.061, 7.036-9.994", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_564.wav", "onoffCaption": "cow mooing at 3.722-6.704", "frequencyCaption": "cow mooing one times"} +{"filepath": "data/multi_event_train/syn_643.wav", "onoffCaption": "spraying at 2.746-3.83, 5.719-6.803", "frequencyCaption": "spraying two times"} +{"filepath": "data/multi_event_train/syn_659.wav", "onoffCaption": "door slamming at 1.024-1.829, 2.415-3.22, 5.384-6.189", "frequencyCaption": "door slamming three times"} +{"filepath": "data/multi_event_train/syn_717.wav", "onoffCaption": "sheep goat bleating at 1.082-3.082, 3.853-5.853 and thump thud at 7.758-10.0", "frequencyCaption": "sheep goat bleating two times and thump thud one times"} +{"filepath": "data/multi_event_train/syn_742.wav", "onoffCaption": "burping belching at 3.38-6.639", "frequencyCaption": "burping belching one times"} +{"filepath": "data/multi_event_train/syn_758.wav", "onoffCaption": "train horn at 1.569-6.646, 7.351-10.0", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_835.wav", "onoffCaption": "sheep goat bleating at 1.813-3.813, 5.405-7.405 and burping belching at 2.387-5.931", "frequencyCaption": "sheep goat bleating two times and burping belching one times"} +{"filepath": "data/multi_event_train/syn_1001.wav", "onoffCaption": "sneeze at 0.148-4.204, 5.432-9.488", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_1227.wav", "onoffCaption": "tapping clicking clanking at 0.311-3.751", "frequencyCaption": "tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_1268.wav", "onoffCaption": "spraying at 0.814-1.383 and sheep goat bleating at 0.996-2.996 and train horn at 6.867-10.0", "frequencyCaption": "spraying one times and sheep goat bleating one times and train horn one times"} +{"filepath": "data/multi_event_train/syn_1272.wav", "onoffCaption": "door slamming at 1.698-4.659, 6.438-8.801 and cat meowing at 2.838-3.85", "frequencyCaption": "door slamming two times and cat meowing one times"} +{"filepath": "data/multi_event_train/syn_1297.wav", "onoffCaption": "door slamming at 1.376-3.376 and burping belching at 6.395-8.625", "frequencyCaption": "door slamming one times and burping belching one times"} +{"filepath": "data/multi_event_train/syn_1369.wav", "onoffCaption": "woman laughing at 0.167-3.239, 4.685-7.757", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_1373.wav", "onoffCaption": "explosion at 0.042-2.91 and sneeze at 1.567-3.68, 5.675-7.829 and dog barking at 6.99-8.99", "frequencyCaption": "explosion one times and sneeze two times and dog barking one times"} +{"filepath": "data/multi_event_train/syn_1396.wav", "onoffCaption": "sheep goat bleating at 1.953-6.833, 7.799-9.799", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_1457.wav", "onoffCaption": "spraying at 2.206-2.728 and sheep goat bleating at 2.967-4.967", "frequencyCaption": "spraying one times and sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_1624.wav", "onoffCaption": "train horn at 0.453-2.893", "frequencyCaption": "train horn one times"} +{"filepath": "data/multi_event_train/syn_1671.wav", "onoffCaption": "sneeze at 0.545-1.648", "frequencyCaption": "sneeze one times"} +{"filepath": "data/multi_event_train/syn_1694.wav", "onoffCaption": "sneeze at 0.873-3.983", "frequencyCaption": "sneeze one times"} +{"filepath": "data/multi_event_train/syn_1725.wav", "onoffCaption": "train horn at 0.363-4.133 and whistling at 0.652-6.152, 6.997-10.0", "frequencyCaption": "train horn one times and whistling two times"} +{"filepath": "data/multi_event_train/syn_1848.wav", "onoffCaption": "sneeze at 0.755-2.049, 2.763-4.057, 4.699-5.993 and cow mooing at 0.817-3.786", "frequencyCaption": "sneeze three times and cow mooing one times"} +{"filepath": "data/multi_event_train/syn_1953.wav", "onoffCaption": "whistling at 0.392-9.463 and car horn honking at 2.538-4.538", "frequencyCaption": "whistling one times and car horn honking one times"} +{"filepath": "data/multi_event_train/syn_3063.wav", "onoffCaption": "tapping clicking clanking at 1.047-4.487 and sheep goat bleating at 2.67-4.67", "frequencyCaption": "tapping clicking clanking one times and sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_3086.wav", "onoffCaption": "train horn at 1.792-4.259, 5.145-7.569", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_3178.wav", "onoffCaption": "burping belching at 0.665-4.209, 6.437-9.981", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_3210.wav", "onoffCaption": "tapping clicking clanking at 2.508-5.948, 7.415-9.678", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_3245.wav", "onoffCaption": "tapping clicking clanking at 1.293-4.733, 5.996-9.436 and train horn at 3.296-5.951, 6.594-9.249", "frequencyCaption": "tapping clicking clanking two times and train horn two times"} +{"filepath": "data/multi_event_train/syn_3311.wav", "onoffCaption": "sneeze at 1.807-3.193, 4.448-5.834, 6.424-7.81", "frequencyCaption": "sneeze three times"} +{"filepath": "data/multi_event_train/syn_3435.wav", "onoffCaption": "thump thud at 2.366-4.828, 6.309-8.771", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_3613.wav", "onoffCaption": "tapping clicking clanking at 0.319-3.759 and dog barking at 5.607-7.607", "frequencyCaption": "tapping clicking clanking one times and dog barking one times"} +{"filepath": "data/multi_event_train/syn_3646.wav", "onoffCaption": "tapping clicking clanking at 1.605-5.045, 6.511-8.868", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_3708.wav", "onoffCaption": "dog barking at 0.956-2.956 and sneeze at 1.521-3.466", "frequencyCaption": "dog barking one times and sneeze one times"} +{"filepath": "data/multi_event_train/syn_3747.wav", "onoffCaption": "door slamming at 3.944-5.083, 5.988-7.127", "frequencyCaption": "door slamming two times"} +{"filepath": "data/multi_event_train/syn_3931.wav", "onoffCaption": "explosion at 0.693-3.422, 4.387-7.116", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_3964.wav", "onoffCaption": "sheep goat bleating at 2.099-4.099, 6.494-8.494", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_128.wav", "onoffCaption": "train horn at 0.374-5.276, 6.495-9.026", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_198.wav", "onoffCaption": "gunshot at 0.784-2.784, 3.801-5.801, 6.312-8.312", "frequencyCaption": "gunshot three times"} +{"filepath": "data/multi_event_train/syn_215.wav", "onoffCaption": "whistling at 0.949-8.699", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_240.wav", "onoffCaption": "door slamming at 1.471-2.371, 3.25-4.15, 5.324-6.224", "frequencyCaption": "door slamming three times"} +{"filepath": "data/multi_event_train/syn_430.wav", "onoffCaption": "spraying at 1.877-2.461, 3.411-3.995, 4.65-5.234", "frequencyCaption": "spraying three times"} +{"filepath": "data/multi_event_train/syn_465.wav", "onoffCaption": "dog barking at 0.878-6.515, 7.656-10.0 and tapping clicking clanking at 3.261-6.701", "frequencyCaption": "dog barking two times and tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_616.wav", "onoffCaption": "sheep goat bleating at 2.186-4.186, 4.941-6.941", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_934.wav", "onoffCaption": "spraying at 1.534-2.709, 5.115-5.69, 7.889-10.0", "frequencyCaption": "spraying three times"} +{"filepath": "data/multi_event_train/syn_984.wav", "onoffCaption": "door knocking at 3.227-7.737", "frequencyCaption": "door knocking one times"} +{"filepath": "data/multi_event_train/syn_1100.wav", "onoffCaption": "sneeze at 2.114-5.189, 6.823-9.898", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_1155.wav", "onoffCaption": "explosion at 0.63-3.383, 4.501-7.194", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_1326.wav", "onoffCaption": "tapping clicking clanking at 2.131-5.571, 7.638-10.0 and explosion at 2.898-7.898", "frequencyCaption": "tapping clicking clanking two times and explosion one times"} +{"filepath": "data/multi_event_train/syn_1418.wav", "onoffCaption": "woman laughing at 1.728-3.928, 4.951-7.688", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_1503.wav", "onoffCaption": "train horn at 1.911-4.711, 5.396-7.503", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_1556.wav", "onoffCaption": "explosion at 1.122-3.875 and dog barking at 7.283-9.283", "frequencyCaption": "explosion one times and dog barking one times"} +{"filepath": "data/multi_event_train/syn_1770.wav", "onoffCaption": "sneeze at 1.912-4.987, 5.626-7.238", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_1795.wav", "onoffCaption": "thump thud at 0.158-3.205, 5.267-7.767", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_1852.wav", "onoffCaption": "woman laughing at 0.084-2.2, 2.942-5.058, 5.698-7.814 and duck quacking at 6.149-8.149", "frequencyCaption": "woman laughing three times and duck quacking one times"} +{"filepath": "data/multi_event_train/syn_1949.wav", "onoffCaption": "cat meowing at 2.368-5.272, 6.225-9.129 and sheep goat bleating at 7.594-9.594", "frequencyCaption": "cat meowing two times and sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_3079.wav", "onoffCaption": "burping belching at 2.466-4.696", "frequencyCaption": "burping belching one times"} +{"filepath": "data/multi_event_train/syn_3137.wav", "onoffCaption": "sneeze at 2.975-4.069", "frequencyCaption": "sneeze one times"} +{"filepath": "data/multi_event_train/syn_3162.wav", "onoffCaption": "duck quacking at 2.2-4.2 and train horn at 2.651-5.811", "frequencyCaption": "duck quacking one times and train horn one times"} +{"filepath": "data/multi_event_train/syn_3187.wav", "onoffCaption": "cow mooing at 0.219-4.648, 6.771-9.484", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_3344.wav", "onoffCaption": "dog barking at 0.909-2.909", "frequencyCaption": "dog barking one times"} +{"filepath": "data/multi_event_train/syn_3534.wav", "onoffCaption": "cat meowing at 3.579-6.609", "frequencyCaption": "cat meowing one times"} +{"filepath": "data/multi_event_train/syn_3584.wav", "onoffCaption": "explosion at 2.028-7.028, 7.591-10.0", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_3609.wav", "onoffCaption": "cat meowing at 0.249-1.793, 3.33-4.874, 6.573-8.117 and thump thud at 2.851-5.079", "frequencyCaption": "cat meowing three times and thump thud one times"} +{"filepath": "data/multi_event_train/syn_3712.wav", "onoffCaption": "tapping clicking clanking at 0.164-3.604 and cat meowing at 1.057-2.332 and explosion at 7.493-10.0", "frequencyCaption": "tapping clicking clanking one times and cat meowing one times and explosion one times"} +{"filepath": "data/multi_event_train/syn_3830.wav", "onoffCaption": "sneeze at 2.907-4.435, 6.484-8.887", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_3865.wav", "onoffCaption": "door knocking at 0.025-2.337 and dog barking at 0.545-2.545 and woman laughing at 6.061-8.166", "frequencyCaption": "door knocking one times and dog barking one times and woman laughing one times"} +{"filepath": "data/multi_event_train/syn_212.wav", "onoffCaption": "woman laughing at 3.453-5.736, 6.978-9.178", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_309.wav", "onoffCaption": "sheep goat bleating at 0.718-2.718, 4.398-6.398, 7.843-9.843", "frequencyCaption": "sheep goat bleating three times"} +{"filepath": "data/multi_event_train/syn_437.wav", "onoffCaption": "car horn honking at 2.062-6.311, 6.944-9.159", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_462.wav", "onoffCaption": "duck quacking at 0.153-2.153, 2.773-4.773, 5.307-7.307", "frequencyCaption": "duck quacking three times"} +{"filepath": "data/multi_event_train/syn_480.wav", "onoffCaption": "explosion at 1.567-3.631, 4.557-6.621, 7.919-9.983", "frequencyCaption": "explosion three times"} +{"filepath": "data/multi_event_train/syn_579.wav", "onoffCaption": "cat meowing at 0.489-1.849, 2.754-3.763, 5.08-6.83 and car horn honking at 4.296-6.809 and spraying at 7.112-7.963", "frequencyCaption": "cat meowing three times and car horn honking one times and spraying one times"} +{"filepath": "data/multi_event_train/syn_644.wav", "onoffCaption": "duck quacking at 0.586-2.586", "frequencyCaption": "duck quacking one times"} +{"filepath": "data/multi_event_train/syn_828.wav", "onoffCaption": "spraying at 2.197-4.781", "frequencyCaption": "spraying one times"} +{"filepath": "data/multi_event_train/syn_933.wav", "onoffCaption": "spraying at 0.268-0.843, 1.489-2.064, 3.367-3.942 and duck quacking at 1.62-3.62", "frequencyCaption": "spraying three times and duck quacking one times"} +{"filepath": "data/multi_event_train/syn_961.wav", "onoffCaption": "cat meowing at 2.641-3.653, 4.491-5.502", "frequencyCaption": "cat meowing two times"} +{"filepath": "data/multi_event_train/syn_983.wav", "onoffCaption": "dog barking at 2.408-4.408, 5.296-7.296", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_train/syn_1107.wav", "onoffCaption": "dog barking at 0.471-2.471, 4.869-6.869, 7.846-9.846", "frequencyCaption": "dog barking three times"} +{"filepath": "data/multi_event_train/syn_1374.wav", "onoffCaption": "car horn honking at 2.04-5.215, 6.66-9.835", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_1391.wav", "onoffCaption": "cat meowing at 0.847-2.579", "frequencyCaption": "cat meowing one times"} +{"filepath": "data/multi_event_train/syn_1551.wav", "onoffCaption": "cat meowing at 2.852-5.04, 7.233-8.508", "frequencyCaption": "cat meowing two times"} +{"filepath": "data/multi_event_train/syn_1639.wav", "onoffCaption": "whistling at 1.187-10.0 and thump thud at 3.478-7.928", "frequencyCaption": "whistling one times and thump thud one times"} +{"filepath": "data/multi_event_train/syn_1722.wav", "onoffCaption": "woman laughing at 0.231-7.676 and duck quacking at 3.115-5.115, 6.127-8.127", "frequencyCaption": "woman laughing one times and duck quacking two times"} +{"filepath": "data/multi_event_train/syn_1807.wav", "onoffCaption": "burping belching at 0.067-2.888, 4.14-6.961", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_1855.wav", "onoffCaption": "thump thud at 3.516-5.744, 6.49-8.718", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_3165.wav", "onoffCaption": "thump thud at 0.594-5.044 and whistling at 7.097-10.0", "frequencyCaption": "thump thud one times and whistling one times"} +{"filepath": "data/multi_event_train/syn_3180.wav", "onoffCaption": "woman laughing at 3.115-5.72", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/multi_event_train/syn_3316.wav", "onoffCaption": "sneeze at 0.781-2.488, 3.315-4.489 and thump thud at 7.487-10.0", "frequencyCaption": "sneeze two times and thump thud one times"} +{"filepath": "data/multi_event_train/syn_3428.wav", "onoffCaption": "spraying at 3.09-6.109, 7.503-9.198", "frequencyCaption": "spraying two times"} +{"filepath": "data/multi_event_train/syn_3533.wav", "onoffCaption": "spraying at 2.095-3.176, 3.935-5.668 and door knocking at 3.221-5.951, 7.684-10.0", "frequencyCaption": "spraying two times and door knocking two times"} +{"filepath": "data/multi_event_train/syn_3561.wav", "onoffCaption": "tapping clicking clanking at 2.666-6.106, 7.024-10.0", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_3583.wav", "onoffCaption": "cow mooing at 1.846-6.275, 7.561-9.783", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_3740.wav", "onoffCaption": "duck quacking at 0.909-2.909, 3.443-5.443, 6.475-8.475 and spraying at 3.475-4.079, 4.593-5.197, 6.294-6.898 and sneeze at 4.982-7.443", "frequencyCaption": "duck quacking three times and spraying three times and sneeze one times"} +{"filepath": "data/multi_event_train/syn_3837.wav", "onoffCaption": "explosion at 0.018-2.02, 2.817-4.905, 6.975-9.847", "frequencyCaption": "explosion three times"} +{"filepath": "data/multi_event_train/syn_3862.wav", "onoffCaption": "duck quacking at 0.105-2.105, 2.639-4.639, 5.211-7.211 and sheep goat bleating at 0.824-2.824, 3.832-5.832, 7.049-9.049 and train horn at 1.048-7.108", "frequencyCaption": "duck quacking three times and sheep goat bleating three times and train horn one times"} +{"filepath": "data/multi_event_train/syn_3880.wav", "onoffCaption": "explosion at 2.536-5.592, 6.352-9.352", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_3979.wav", "onoffCaption": "thump thud at 1.813-4.584 and sheep goat bleating at 2.77-4.77 and cat meowing at 7.863-9.407", "frequencyCaption": "thump thud one times and sheep goat bleating one times and cat meowing one times"} +{"filepath": "data/multi_event_train/syn_247.wav", "onoffCaption": "car horn honking at 1.058-5.458 and spraying at 8.191-8.775", "frequencyCaption": "car horn honking one times and spraying one times"} +{"filepath": "data/multi_event_train/syn_346.wav", "onoffCaption": "tapping clicking clanking at 3.424-6.864 and gunshot at 4.765-6.765 and sneeze at 4.781-7.094", "frequencyCaption": "tapping clicking clanking one times and gunshot one times and sneeze one times"} +{"filepath": "data/multi_event_train/syn_487.wav", "onoffCaption": "gunshot at 2.351-4.521 and door slamming at 6.019-7.252", "frequencyCaption": "gunshot one times and door slamming one times"} +{"filepath": "data/multi_event_train/syn_611.wav", "onoffCaption": "train horn at 0.168-10.0 and dog barking at 0.823-2.823, 5.236-7.236", "frequencyCaption": "train horn one times and dog barking two times"} +{"filepath": "data/multi_event_train/syn_710.wav", "onoffCaption": "burping belching at 2.117-5.661, 6.597-10.0", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_898.wav", "onoffCaption": "duck quacking at 3.594-5.594, 7.372-9.372", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_966.wav", "onoffCaption": "train horn at 3.481-6.801", "frequencyCaption": "train horn one times"} +{"filepath": "data/multi_event_train/syn_1049.wav", "onoffCaption": "woman laughing at 2.805-5.03", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/multi_event_train/syn_1152.wav", "onoffCaption": "dog barking at 3.6-6.521", "frequencyCaption": "dog barking one times"} +{"filepath": "data/multi_event_train/syn_1220.wav", "onoffCaption": "gunshot at 0.582-2.582, 3.6-5.6, 7.272-9.778 and cat meowing at 3.225-4.372", "frequencyCaption": "gunshot three times and cat meowing one times"} +{"filepath": "data/multi_event_train/syn_1321.wav", "onoffCaption": "cow mooing at 0.732-3.701 and gunshot at 2.699-4.699, 5.95-7.95", "frequencyCaption": "cow mooing one times and gunshot two times"} +{"filepath": "data/multi_event_train/syn_1504.wav", "onoffCaption": "gunshot at 2.245-4.245, 4.846-6.846", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_1676.wav", "onoffCaption": "thump thud at 0.322-4.772, 6.46-9.046 and gunshot at 1.752-3.752", "frequencyCaption": "thump thud two times and gunshot one times"} +{"filepath": "data/multi_event_train/syn_1689.wav", "onoffCaption": "tapping clicking clanking at 0.371-3.811, 5.342-8.311", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_1693.wav", "onoffCaption": "sneeze at 2.165-3.872, 4.533-6.24 and door knocking at 2.828-5.068", "frequencyCaption": "sneeze two times and door knocking one times"} +{"filepath": "data/multi_event_train/syn_1777.wav", "onoffCaption": "whistling at 3.115-8.29", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_1788.wav", "onoffCaption": "door knocking at 0.823-3.932 and train horn at 6.027-9.797", "frequencyCaption": "door knocking one times and train horn one times"} +{"filepath": "data/multi_event_train/syn_1792.wav", "onoffCaption": "train horn at 1.612-6.514, 7.852-10.0", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_1800.wav", "onoffCaption": "duck quacking at 0.833-2.833 and dog barking at 4.411-6.411, 7.889-9.889", "frequencyCaption": "duck quacking one times and dog barking two times"} +{"filepath": "data/multi_event_train/syn_3130.wav", "onoffCaption": "gunshot at 3.509-6.015", "frequencyCaption": "gunshot one times"} +{"filepath": "data/multi_event_train/syn_3242.wav", "onoffCaption": "whistling at 0.169-3.144, 4.486-7.391", "frequencyCaption": "whistling two times"} +{"filepath": "data/multi_event_train/syn_3258.wav", "onoffCaption": "gunshot at 1.913-3.913 and tapping clicking clanking at 7.042-10.0", "frequencyCaption": "gunshot one times and tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_3343.wav", "onoffCaption": "woman laughing at 0.244-2.527", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/multi_event_train/syn_3359.wav", "onoffCaption": "door slamming at 0.337-2.465", "frequencyCaption": "door slamming one times"} +{"filepath": "data/multi_event_train/syn_3498.wav", "onoffCaption": "door slamming at 0.132-2.59, 4.435-7.409 and tapping clicking clanking at 0.201-3.641, 5.333-8.773", "frequencyCaption": "door slamming two times and tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_3566.wav", "onoffCaption": "gunshot at 0.056-2.056, 3.398-5.398 and spraying at 2.521-3.029, 4.355-4.855, 5.358-7.942", "frequencyCaption": "gunshot two times and spraying three times"} +{"filepath": "data/multi_event_train/syn_3614.wav", "onoffCaption": "spraying at 3.09-3.871, 4.998-5.779", "frequencyCaption": "spraying two times"} +{"filepath": "data/multi_event_train/syn_3715.wav", "onoffCaption": "whistling at 2.984-10.0", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_3887.wav", "onoffCaption": "dog barking at 1.311-3.311, 4.547-7.468", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_train/syn_47.wav", "onoffCaption": "sneeze at 2.538-3.702, 5.546-6.8 and duck quacking at 3.634-5.634, 7.978-9.978", "frequencyCaption": "sneeze two times and duck quacking two times"} +{"filepath": "data/multi_event_train/syn_135.wav", "onoffCaption": "sneeze at 0.624-1.716, 2.751-5.365, 5.998-8.974", "frequencyCaption": "sneeze three times"} +{"filepath": "data/multi_event_train/syn_208.wav", "onoffCaption": "car horn honking at 0.093-4.493, 6.927-10.0", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_313.wav", "onoffCaption": "car horn honking at 0.15-2.15 and door knocking at 4.327-6.576, 7.544-10.0", "frequencyCaption": "car horn honking one times and door knocking two times"} +{"filepath": "data/multi_event_train/syn_478.wav", "onoffCaption": "burping belching at 0.132-2.362, 3.987-6.313 and woman laughing at 3.778-7.166", "frequencyCaption": "burping belching two times and woman laughing one times"} +{"filepath": "data/multi_event_train/syn_563.wav", "onoffCaption": "sneeze at 0.216-1.51, 3.715-4.809", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_586.wav", "onoffCaption": "cat meowing at 0.42-2.17 and door knocking at 2.95-6.712 and door slamming at 6.165-7.145", "frequencyCaption": "cat meowing one times and door knocking one times and door slamming one times"} +{"filepath": "data/multi_event_train/syn_745.wav", "onoffCaption": "whistling at 1.282-9.032 and sneeze at 3.16-5.156, 7.293-9.289", "frequencyCaption": "whistling one times and sneeze two times"} +{"filepath": "data/multi_event_train/syn_867.wav", "onoffCaption": "tapping clicking clanking at 0.487-3.927, 6.34-9.002", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_882.wav", "onoffCaption": "gunshot at 0.137-2.137, 4.515-6.515", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_999.wav", "onoffCaption": "thump thud at 0.977-4.024, 5.736-8.075", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_1053.wav", "onoffCaption": "sheep goat bleating at 2.992-4.992, 5.618-7.618", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_1148.wav", "onoffCaption": "whistling at 1.666-7.607 and cat meowing at 7.548-9.084", "frequencyCaption": "whistling one times and cat meowing one times"} +{"filepath": "data/multi_event_train/syn_1275.wav", "onoffCaption": "duck quacking at 1.585-3.585, 4.114-6.114", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_1290.wav", "onoffCaption": "whistling at 1.248-9.259 and door knocking at 1.729-4.576, 5.851-8.698", "frequencyCaption": "whistling one times and door knocking two times"} +{"filepath": "data/multi_event_train/syn_1405.wav", "onoffCaption": "explosion at 0.883-5.883 and spraying at 6.432-6.94, 8.695-9.203", "frequencyCaption": "explosion one times and spraying two times"} +{"filepath": "data/multi_event_train/syn_1623.wav", "onoffCaption": "door knocking at 1.398-7.458", "frequencyCaption": "door knocking one times"} +{"filepath": "data/multi_event_train/syn_1738.wav", "onoffCaption": "sneeze at 2.589-3.692, 4.688-6.684, 7.629-9.174", "frequencyCaption": "sneeze three times"} +{"filepath": "data/multi_event_train/syn_1901.wav", "onoffCaption": "tapping clicking clanking at 1.129-4.569", "frequencyCaption": "tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_3031.wav", "onoffCaption": "whistling at 3.449-8.949", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_3217.wav", "onoffCaption": "door slamming at 1.776-3.167, 5.234-6.134", "frequencyCaption": "door slamming two times"} +{"filepath": "data/multi_event_train/syn_3467.wav", "onoffCaption": "cat meowing at 3.211-4.755, 5.561-7.538", "frequencyCaption": "cat meowing two times"} +{"filepath": "data/multi_event_train/syn_3482.wav", "onoffCaption": "sheep goat bleating at 3.012-5.012, 6.095-8.095", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_3599.wav", "onoffCaption": "burping belching at 0.315-3.605, 4.439-6.669 and dog barking at 4.742-6.742 and train horn at 5.884-8.364", "frequencyCaption": "burping belching two times and dog barking one times and train horn one times"} +{"filepath": "data/multi_event_train/syn_3641.wav", "onoffCaption": "car horn honking at 0.367-3.542, 4.043-7.218 and cow mooing at 0.835-5.815, 7.094-9.789", "frequencyCaption": "car horn honking two times and cow mooing two times"} +{"filepath": "data/multi_event_train/syn_3878.wav", "onoffCaption": "cow mooing at 2.415-7.395 and car horn honking at 2.709-7.031", "frequencyCaption": "cow mooing one times and car horn honking one times"} +{"filepath": "data/multi_event_train/syn_3963.wav", "onoffCaption": "tapping clicking clanking at 0.761-4.201, 6.468-9.015", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_3986.wav", "onoffCaption": "tapping clicking clanking at 1.468-4.908, 6.936-9.688", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_12.wav", "onoffCaption": "car horn honking at 0.54-5.447 and duck quacking at 3.391-5.391, 7.677-9.677", "frequencyCaption": "car horn honking one times and duck quacking two times"} +{"filepath": "data/multi_event_train/syn_63.wav", "onoffCaption": "woman laughing at 1.87-4.436, 6.27-8.553", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_86.wav", "onoffCaption": "train horn at 3.875-7.875", "frequencyCaption": "train horn one times"} +{"filepath": "data/multi_event_train/syn_111.wav", "onoffCaption": "car horn honking at 0.756-2.756, 4.181-6.708", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_160.wav", "onoffCaption": "cow mooing at 0.675-5.655", "frequencyCaption": "cow mooing one times"} +{"filepath": "data/multi_event_train/syn_185.wav", "onoffCaption": "cow mooing at 1.172-4.182 and door knocking at 2.643-6.915", "frequencyCaption": "cow mooing one times and door knocking one times"} +{"filepath": "data/multi_event_train/syn_279.wav", "onoffCaption": "cat meowing at 2.652-4.84, 5.951-8.087", "frequencyCaption": "cat meowing two times"} +{"filepath": "data/multi_event_train/syn_337.wav", "onoffCaption": "spraying at 3.394-3.963", "frequencyCaption": "spraying one times"} +{"filepath": "data/multi_event_train/syn_362.wav", "onoffCaption": "gunshot at 3.922-5.922", "frequencyCaption": "gunshot one times"} +{"filepath": "data/multi_event_train/syn_387.wav", "onoffCaption": "thump thud at 0.64-2.979, 3.98-6.751", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_536.wav", "onoffCaption": "thump thud at 1.602-6.052 and burping belching at 2.209-4.316, 5.552-7.659", "frequencyCaption": "thump thud one times and burping belching two times"} +{"filepath": "data/multi_event_train/syn_547.wav", "onoffCaption": "tapping clicking clanking at 1.737-5.177, 6.879-10.0", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_734.wav", "onoffCaption": "sneeze at 1.264-3.51, 5.768-8.014", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_832.wav", "onoffCaption": "door knocking at 0.635-2.699", "frequencyCaption": "door knocking one times"} +{"filepath": "data/multi_event_train/syn_843.wav", "onoffCaption": "dog barking at 0.476-2.476, 3.128-5.128 and duck quacking at 7.098-9.098", "frequencyCaption": "dog barking two times and duck quacking one times"} +{"filepath": "data/multi_event_train/syn_929.wav", "onoffCaption": "burping belching at 1.548-7.492 and cat meowing at 2.021-3.065, 4.919-5.963", "frequencyCaption": "burping belching one times and cat meowing two times"} +{"filepath": "data/multi_event_train/syn_958.wav", "onoffCaption": "burping belching at 3.236-6.236", "frequencyCaption": "burping belching one times"} +{"filepath": "data/multi_event_train/syn_1006.wav", "onoffCaption": "tapping clicking clanking at 0.14-3.58, 5.135-8.575", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_1077.wav", "onoffCaption": "tapping clicking clanking at 0.177-3.617, 5.908-7.999 and cat meowing at 0.395-1.581 and whistling at 0.574-8.959", "frequencyCaption": "tapping clicking clanking two times and cat meowing one times and whistling one times"} +{"filepath": "data/multi_event_train/syn_1092.wav", "onoffCaption": "burping belching at 0.135-4.004, 5.916-8.783", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_1189.wav", "onoffCaption": "whistling at 3.358-10.0", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_1204.wav", "onoffCaption": "sheep goat bleating at 0.234-2.234, 2.756-4.756, 5.332-7.332", "frequencyCaption": "sheep goat bleating three times"} +{"filepath": "data/multi_event_train/syn_1421.wav", "onoffCaption": "cat meowing at 0.461-1.563 and burping belching at 0.476-5.476, 7.964-10.0", "frequencyCaption": "cat meowing one times and burping belching two times"} +{"filepath": "data/multi_event_train/syn_1450.wav", "onoffCaption": "gunshot at 0.897-2.897 and cat meowing at 5.661-7.197", "frequencyCaption": "gunshot one times and cat meowing one times"} +{"filepath": "data/multi_event_train/syn_1652.wav", "onoffCaption": "door slamming at 0.328-1.179 and cat meowing at 4.049-5.198, 6.096-7.245 and spraying at 5.556-6.497, 8.78-9.955", "frequencyCaption": "door slamming one times and cat meowing two times and spraying two times"} +{"filepath": "data/multi_event_train/syn_1749.wav", "onoffCaption": "gunshot at 1.998-3.998 and cow mooing at 5.91-8.879", "frequencyCaption": "gunshot one times and cow mooing one times"} +{"filepath": "data/multi_event_train/syn_1925.wav", "onoffCaption": "explosion at 1.732-4.604", "frequencyCaption": "explosion one times"} +{"filepath": "data/multi_event_train/syn_1954.wav", "onoffCaption": "duck quacking at 1.19-3.19, 5.027-7.027", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_3015.wav", "onoffCaption": "explosion at 0.061-5.061 and door knocking at 0.093-3.855", "frequencyCaption": "explosion one times and door knocking one times"} +{"filepath": "data/multi_event_train/syn_3064.wav", "onoffCaption": "door knocking at 0.013-2.365, 3.513-5.865", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_3081.wav", "onoffCaption": "woman laughing at 1.463-4.548, 5.645-7.891", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_3266.wav", "onoffCaption": "burping belching at 0.198-3.4 and gunshot at 7.023-9.524", "frequencyCaption": "burping belching one times and gunshot one times"} +{"filepath": "data/multi_event_train/syn_3283.wav", "onoffCaption": "cow mooing at 3.078-8.058", "frequencyCaption": "cow mooing one times"} +{"filepath": "data/multi_event_train/syn_3398.wav", "onoffCaption": "sheep goat bleating at 1.518-3.518 and sneeze at 6.834-8.16", "frequencyCaption": "sheep goat bleating one times and sneeze one times"} +{"filepath": "data/multi_event_train/syn_3432.wav", "onoffCaption": "sneeze at 2.248-3.482, 5.54-6.774", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_3443.wav", "onoffCaption": "whistling at 0.159-5.334", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_3529.wav", "onoffCaption": "door knocking at 1.623-4.46", "frequencyCaption": "door knocking one times"} +{"filepath": "data/multi_event_train/syn_3558.wav", "onoffCaption": "spraying at 1.0-2.486 and cow mooing at 5.754-8.723", "frequencyCaption": "spraying one times and cow mooing one times"} +{"filepath": "data/multi_event_train/syn_3630.wav", "onoffCaption": "train horn at 0.886-4.286, 6.072-8.872 and tapping clicking clanking at 2.266-5.706", "frequencyCaption": "train horn two times and tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_3936.wav", "onoffCaption": "spraying at 0.586-2.281, 4.305-6.0, 6.736-8.431 and sheep goat bleating at 3.311-7.231, 7.902-9.902", "frequencyCaption": "spraying three times and sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_3947.wav", "onoffCaption": "train horn at 0.112-2.912 and whistling at 1.057-4.032 and cow mooing at 5.466-8.476", "frequencyCaption": "train horn one times and whistling one times and cow mooing one times"} +{"filepath": "data/multi_event_train/syn_36.wav", "onoffCaption": "sheep goat bleating at 2.518-5.518, 6.541-9.541", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_144.wav", "onoffCaption": "cat meowing at 0.305-5.305, 6.539-10.0", "frequencyCaption": "cat meowing two times"} +{"filepath": "data/multi_event_train/syn_409.wav", "onoffCaption": "explosion at 1.848-4.848, 5.553-8.553", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_413.wav", "onoffCaption": "door slamming at 1.337-1.837, 2.978-3.478", "frequencyCaption": "door slamming two times"} +{"filepath": "data/multi_event_train/syn_508.wav", "onoffCaption": "gunshot at 2.17-4.17, 5.36-7.36", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_512.wav", "onoffCaption": "dog barking at 0.622-2.622", "frequencyCaption": "dog barking one times"} +{"filepath": "data/multi_event_train/syn_660.wav", "onoffCaption": "whistling at 2.177-10.0", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_685.wav", "onoffCaption": "cow mooing at 0.06-3.029, 4.064-6.951, 7.727-9.748", "frequencyCaption": "cow mooing three times"} +{"filepath": "data/multi_event_train/syn_761.wav", "onoffCaption": "thump thud at 0.843-5.293", "frequencyCaption": "thump thud one times"} +{"filepath": "data/multi_event_train/syn_784.wav", "onoffCaption": "cow mooing at 0.475-3.444, 5.211-7.742 and tapping clicking clanking at 3.309-6.749", "frequencyCaption": "cow mooing two times and tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_816.wav", "onoffCaption": "explosion at 1.724-3.726, 5.689-7.691", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_917.wav", "onoffCaption": "explosion at 1.982-6.982 and tapping clicking clanking at 3.567-7.007", "frequencyCaption": "explosion one times and tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_1022.wav", "onoffCaption": "whistling at 0.122-8.133 and door slamming at 0.182-1.321, 2.275-2.775, 4.526-6.526", "frequencyCaption": "whistling one times and door slamming three times"} +{"filepath": "data/multi_event_train/syn_1038.wav", "onoffCaption": "train horn at 0.264-2.938, 5.056-7.536 and tapping clicking clanking at 0.704-4.144 and door knocking at 2.107-4.944, 5.54-8.377", "frequencyCaption": "train horn two times and tapping clicking clanking one times and door knocking two times"} +{"filepath": "data/multi_event_train/syn_1123.wav", "onoffCaption": "tapping clicking clanking at 0.144-3.584, 5.286-7.478", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_1139.wav", "onoffCaption": "woman laughing at 0.626-2.82, 5.16-7.354", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_1251.wav", "onoffCaption": "thump thud at 2.421-6.796", "frequencyCaption": "thump thud one times"} +{"filepath": "data/multi_event_train/syn_1350.wav", "onoffCaption": "door knocking at 0.09-2.927, 5.354-7.487 and woman laughing at 1.506-3.7, 4.763-6.851", "frequencyCaption": "door knocking two times and woman laughing two times"} +{"filepath": "data/multi_event_train/syn_1474.wav", "onoffCaption": "gunshot at 2.464-4.464", "frequencyCaption": "gunshot one times"} +{"filepath": "data/multi_event_train/syn_1491.wav", "onoffCaption": "tapping clicking clanking at 0.394-3.834, 5.02-8.46", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_1575.wav", "onoffCaption": "tapping clicking clanking at 0.154-3.594, 4.396-6.695 and burping belching at 0.413-2.507, 3.041-5.148 and door slamming at 0.419-3.38, 4.399-7.36", "frequencyCaption": "tapping clicking clanking two times and burping belching two times and door slamming two times"} +{"filepath": "data/multi_event_train/syn_1590.wav", "onoffCaption": "tapping clicking clanking at 2.991-6.431 and sneeze at 4.173-6.132", "frequencyCaption": "tapping clicking clanking one times and sneeze one times"} +{"filepath": "data/multi_event_train/syn_1607.wav", "onoffCaption": "duck quacking at 2.35-4.35, 5.08-7.08", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_1706.wav", "onoffCaption": "train horn at 2.838-7.019", "frequencyCaption": "train horn one times"} +{"filepath": "data/multi_event_train/syn_1871.wav", "onoffCaption": "woman laughing at 0.217-2.855", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/multi_event_train/syn_1894.wav", "onoffCaption": "gunshot at 0.413-2.919, 4.301-6.807", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_1970.wav", "onoffCaption": "tapping clicking clanking at 0.321-3.761, 5.461-8.901", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_1995.wav", "onoffCaption": "tapping clicking clanking at 0.432-3.872, 4.555-7.09", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_3040.wav", "onoffCaption": "duck quacking at 2.579-4.579, 6.775-8.775", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_3141.wav", "onoffCaption": "cat meowing at 0.69-1.83, 2.623-3.763, 4.769-5.909 and car horn honking at 7.826-10.0", "frequencyCaption": "cat meowing three times and car horn honking one times"} +{"filepath": "data/multi_event_train/syn_3229.wav", "onoffCaption": "woman laughing at 3.984-10.0", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/multi_event_train/syn_3233.wav", "onoffCaption": "thump thud at 2.174-4.674, 6.749-9.249 and door knocking at 2.65-6.266", "frequencyCaption": "thump thud two times and door knocking one times"} +{"filepath": "data/multi_event_train/syn_3328.wav", "onoffCaption": "whistling at 0.727-7.976 and thump thud at 1.013-5.463", "frequencyCaption": "whistling one times and thump thud one times"} +{"filepath": "data/multi_event_train/syn_3332.wav", "onoffCaption": "whistling at 0.582-7.158", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_3416.wav", "onoffCaption": "cow mooing at 0.067-3.365, 3.932-6.914 and train horn at 2.227-6.667", "frequencyCaption": "cow mooing two times and train horn one times"} +{"filepath": "data/multi_event_train/syn_3517.wav", "onoffCaption": "gunshot at 1.138-3.138", "frequencyCaption": "gunshot one times"} +{"filepath": "data/multi_event_train/syn_3665.wav", "onoffCaption": "car horn honking at 0.511-4.833 and whistling at 3.066-5.295, 6.173-8.456", "frequencyCaption": "car horn honking one times and whistling two times"} +{"filepath": "data/multi_event_train/syn_3680.wav", "onoffCaption": "sneeze at 0.858-2.386", "frequencyCaption": "sneeze one times"} +{"filepath": "data/multi_event_train/syn_3764.wav", "onoffCaption": "tapping clicking clanking at 1.057-4.497, 6.107-8.928 and whistling at 1.373-3.382, 4.394-7.369", "frequencyCaption": "tapping clicking clanking two times and whistling two times"} +{"filepath": "data/multi_event_train/syn_3781.wav", "onoffCaption": "car horn honking at 2.047-5.634 and door slamming at 8.135-8.94", "frequencyCaption": "car horn honking one times and door slamming one times"} +{"filepath": "data/multi_event_train/syn_3809.wav", "onoffCaption": "thump thud at 3.075-5.846, 6.39-9.161", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_3813.wav", "onoffCaption": "train horn at 1.522-10.0", "frequencyCaption": "train horn one times"} +{"filepath": "data/multi_event_train/syn_3908.wav", "onoffCaption": "spraying at 0.562-2.295 and gunshot at 5.254-7.384", "frequencyCaption": "spraying one times and gunshot one times"} +{"filepath": "data/multi_event_train/syn_3912.wav", "onoffCaption": "gunshot at 2.345-4.345, 5.839-7.839", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_79.wav", "onoffCaption": "door knocking at 1.555-4.675, 6.777-9.897", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_189.wav", "onoffCaption": "dog barking at 2.828-4.828, 6.53-8.53", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_train/syn_204.wav", "onoffCaption": "car horn honking at 2.9-5.365, 7.793-10.0", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_236.wav", "onoffCaption": "car horn honking at 2.477-4.977, 5.601-8.002", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_263.wav", "onoffCaption": "cow mooing at 0.559-3.857, 5.176-8.474", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_286.wav", "onoffCaption": "spraying at 3.75-6.186, 7.898-10.0", "frequencyCaption": "spraying two times"} +{"filepath": "data/multi_event_train/syn_378.wav", "onoffCaption": "woman laughing at 0.099-2.894, 4.837-7.297 and explosion at 0.157-3.157", "frequencyCaption": "woman laughing two times and explosion one times"} +{"filepath": "data/multi_event_train/syn_421.wav", "onoffCaption": "tapping clicking clanking at 2.375-5.815 and sheep goat bleating at 4.545-6.545", "frequencyCaption": "tapping clicking clanking one times and sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_446.wav", "onoffCaption": "cat meowing at 0.124-1.337, 2.502-3.715, 4.842-6.055", "frequencyCaption": "cat meowing three times"} +{"filepath": "data/multi_event_train/syn_635.wav", "onoffCaption": "sheep goat bleating at 0.123-2.123, 2.76-4.76, 5.892-7.892", "frequencyCaption": "sheep goat bleating three times"} +{"filepath": "data/multi_event_train/syn_652.wav", "onoffCaption": "cow mooing at 3.561-6.571, 7.654-10.0 and duck quacking at 4.115-6.115", "frequencyCaption": "cow mooing two times and duck quacking one times"} +{"filepath": "data/multi_event_train/syn_859.wav", "onoffCaption": "duck quacking at 0.31-2.31, 4.52-6.52 and thump thud at 4.117-8.035", "frequencyCaption": "duck quacking two times and thump thud one times"} +{"filepath": "data/multi_event_train/syn_925.wav", "onoffCaption": "whistling at 1.132-8.882", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_942.wav", "onoffCaption": "door slamming at 2.154-3.054", "frequencyCaption": "door slamming one times"} +{"filepath": "data/multi_event_train/syn_1088.wav", "onoffCaption": "cow mooing at 0.067-3.365 and explosion at 6.581-10.0", "frequencyCaption": "cow mooing one times and explosion one times"} +{"filepath": "data/multi_event_train/syn_1111.wav", "onoffCaption": "whistling at 2.179-9.929", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_1176.wav", "onoffCaption": "spraying at 0.051-0.801, 2.592-3.342, 5.836-6.586 and dog barking at 1.423-3.423", "frequencyCaption": "spraying three times and dog barking one times"} +{"filepath": "data/multi_event_train/syn_1193.wav", "onoffCaption": "cow mooing at 0.157-3.455, 4.239-7.537 and door slamming at 1.499-2.479, 3.672-4.652", "frequencyCaption": "cow mooing two times and door slamming two times"} +{"filepath": "data/multi_event_train/syn_1279.wav", "onoffCaption": "tapping clicking clanking at 0.933-4.373, 4.929-7.481", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_1305.wav", "onoffCaption": "woman laughing at 3.907-6.19 and duck quacking at 7.724-9.724", "frequencyCaption": "woman laughing one times and duck quacking one times"} +{"filepath": "data/multi_event_train/syn_1362.wav", "onoffCaption": "sheep goat bleating at 2.838-4.838", "frequencyCaption": "sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_1387.wav", "onoffCaption": "dog barking at 1.304-3.304, 3.955-5.955, 6.664-8.664", "frequencyCaption": "dog barking three times"} +{"filepath": "data/multi_event_train/syn_1520.wav", "onoffCaption": "tapping clicking clanking at 2.832-6.272, 6.792-9.216", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_1547.wav", "onoffCaption": "dog barking at 0.286-2.286, 4.345-6.345", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_train/syn_1648.wav", "onoffCaption": "gunshot at 0.398-2.398 and cat meowing at 2.098-3.11, 5.228-6.24", "frequencyCaption": "gunshot one times and cat meowing two times"} +{"filepath": "data/multi_event_train/syn_1734.wav", "onoffCaption": "explosion at 2.33-5.33, 6.621-8.676 and duck quacking at 3.013-5.013, 5.937-7.937", "frequencyCaption": "explosion two times and duck quacking two times"} +{"filepath": "data/multi_event_train/syn_1753.wav", "onoffCaption": "thump thud at 1.028-4.695, 5.376-9.043", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_1824.wav", "onoffCaption": "dog barking at 0.275-2.275, 4.236-6.636", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_train/syn_1843.wav", "onoffCaption": "train horn at 2.459-5.779 and cow mooing at 2.492-5.502", "frequencyCaption": "train horn one times and cow mooing one times"} +{"filepath": "data/multi_event_train/syn_1958.wav", "onoffCaption": "sheep goat bleating at 2.891-4.891 and spraying at 8.283-9.347", "frequencyCaption": "sheep goat bleating one times and spraying one times"} +{"filepath": "data/multi_event_train/syn_3068.wav", "onoffCaption": "door knocking at 2.829-6.204, 7.014-10.0", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_3114.wav", "onoffCaption": "cow mooing at 2.794-5.776, 6.601-9.583", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_3173.wav", "onoffCaption": "woman laughing at 3.122-6.222", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/multi_event_train/syn_3196.wav", "onoffCaption": "duck quacking at 0.461-2.461, 2.976-4.976, 5.619-7.619", "frequencyCaption": "duck quacking three times"} +{"filepath": "data/multi_event_train/syn_3299.wav", "onoffCaption": "whistling at 0.056-8.441", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_3300.wav", "onoffCaption": "spraying at 0.055-1.055 and woman laughing at 0.776-4.057, 4.642-7.923", "frequencyCaption": "spraying one times and woman laughing two times"} +{"filepath": "data/multi_event_train/syn_3367.wav", "onoffCaption": "cow mooing at 0.244-4.673 and thump thud at 2.481-5.528, 7.722-9.95", "frequencyCaption": "cow mooing one times and thump thud two times"} +{"filepath": "data/multi_event_train/syn_3382.wav", "onoffCaption": "cat meowing at 1.522-6.522, 8.431-9.443", "frequencyCaption": "cat meowing two times"} +{"filepath": "data/multi_event_train/syn_3459.wav", "onoffCaption": "train horn at 2.054-6.912 and door slamming at 4.29-6.486, 7.105-9.233", "frequencyCaption": "train horn one times and door slamming two times"} +{"filepath": "data/multi_event_train/syn_3525.wav", "onoffCaption": "dog barking at 2.799-4.799, 6.347-8.347", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_train/syn_3542.wav", "onoffCaption": "car horn honking at 0.751-3.664 and duck quacking at 6.723-8.723", "frequencyCaption": "car horn honking one times and duck quacking one times"} +{"filepath": "data/multi_event_train/syn_3731.wav", "onoffCaption": "sheep goat bleating at 0.142-2.142, 3.083-5.083 and burping belching at 7.866-9.897", "frequencyCaption": "sheep goat bleating two times and burping belching one times"} +{"filepath": "data/multi_event_train/syn_3756.wav", "onoffCaption": "cow mooing at 1.082-6.062, 7.102-10.0", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_3821.wav", "onoffCaption": "sneeze at 1.225-2.611, 3.422-4.808, 5.872-7.258 and cat meowing at 2.894-4.465, 6.911-7.922", "frequencyCaption": "sneeze three times and cat meowing two times"} +{"filepath": "data/multi_event_train/syn_3846.wav", "onoffCaption": "car horn honking at 0.208-4.53, 6.828-9.725 and whistling at 1.886-7.888", "frequencyCaption": "car horn honking two times and whistling one times"} +{"filepath": "data/multi_event_train/syn_51.wav", "onoffCaption": "cat meowing at 0.538-5.538, 6.28-8.719 and door knocking at 5.551-7.678", "frequencyCaption": "cat meowing two times and door knocking one times"} +{"filepath": "data/multi_event_train/syn_139.wav", "onoffCaption": "car horn honking at 0.702-3.628, 4.583-7.509", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_251.wav", "onoffCaption": "whistling at 1.799-6.974, 7.85-10.0", "frequencyCaption": "whistling two times"} +{"filepath": "data/multi_event_train/syn_350.wav", "onoffCaption": "train horn at 0.188-3.668, 5.14-8.62 and gunshot at 2.187-4.187, 5.945-7.945 and door knocking at 2.794-5.914, 6.611-8.799", "frequencyCaption": "train horn two times and gunshot two times and door knocking two times"} +{"filepath": "data/multi_event_train/syn_474.wav", "onoffCaption": "dog barking at 0.186-3.107, 4.014-6.014, 6.644-8.644 and duck quacking at 0.882-2.882, 5.239-7.239", "frequencyCaption": "dog barking three times and duck quacking two times"} +{"filepath": "data/multi_event_train/syn_491.wav", "onoffCaption": "duck quacking at 3.523-5.523", "frequencyCaption": "duck quacking one times"} +{"filepath": "data/multi_event_train/syn_590.wav", "onoffCaption": "cat meowing at 2.657-7.017, 7.526-10.0", "frequencyCaption": "cat meowing two times"} +{"filepath": "data/multi_event_train/syn_607.wav", "onoffCaption": "whistling at 1.666-10.0", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_706.wav", "onoffCaption": "sneeze at 0.5-2.183, 4.167-5.85", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_749.wav", "onoffCaption": "thump thud at 0.647-3.147, 4.243-6.582 and dog barking at 4.148-6.148", "frequencyCaption": "thump thud two times and dog barking one times"} +{"filepath": "data/multi_event_train/syn_871.wav", "onoffCaption": "train horn at 1.148-3.822, 5.745-8.419 and sheep goat bleating at 2.434-4.434", "frequencyCaption": "train horn two times and sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_894.wav", "onoffCaption": "cow mooing at 2.365-6.794, 7.82-10.0", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_970.wav", "onoffCaption": "door knocking at 2.028-4.796, 6.209-8.977 and whistling at 2.521-5.496", "frequencyCaption": "door knocking two times and whistling one times"} +{"filepath": "data/multi_event_train/syn_995.wav", "onoffCaption": "woman laughing at 3.336-6.131", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/multi_event_train/syn_1045.wav", "onoffCaption": "woman laughing at 3.529-10.0", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/multi_event_train/syn_1144.wav", "onoffCaption": "thump thud at 0.479-4.929 and tapping clicking clanking at 3.187-6.627", "frequencyCaption": "thump thud one times and tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_1236.wav", "onoffCaption": "cow mooing at 2.627-5.596, 6.917-9.886", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_1337.wav", "onoffCaption": "burping belching at 2.433-4.636, 5.194-7.367", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_1409.wav", "onoffCaption": "sneeze at 3.14-5.601, 7.313-9.789 and thump thud at 3.253-6.3", "frequencyCaption": "sneeze two times and thump thud one times"} +{"filepath": "data/multi_event_train/syn_1413.wav", "onoffCaption": "sneeze at 0.398-2.105 and spraying at 0.939-3.067", "frequencyCaption": "sneeze one times and spraying one times"} +{"filepath": "data/multi_event_train/syn_1508.wav", "onoffCaption": "explosion at 0.103-3.29", "frequencyCaption": "explosion one times"} +{"filepath": "data/multi_event_train/syn_1512.wav", "onoffCaption": "door slamming at 0.683-2.596 and car horn honking at 5.318-9.159", "frequencyCaption": "door slamming one times and car horn honking one times"} +{"filepath": "data/multi_event_train/syn_1660.wav", "onoffCaption": "cow mooing at 0.418-3.428, 3.973-6.583", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_1685.wav", "onoffCaption": "sheep goat bleating at 0.076-2.076, 2.698-4.698, 5.861-7.861", "frequencyCaption": "sheep goat bleating three times"} +{"filepath": "data/multi_event_train/syn_1761.wav", "onoffCaption": "thump thud at 0.738-5.188, 7.319-9.819", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_1784.wav", "onoffCaption": "cow mooing at 0.738-5.167, 5.903-8.899", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_1816.wav", "onoffCaption": "cat meowing at 0.631-4.764, 6.378-7.527", "frequencyCaption": "cat meowing two times"} +{"filepath": "data/multi_event_train/syn_1917.wav", "onoffCaption": "burping belching at 0.12-3.664, 4.722-8.266", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_3027.wav", "onoffCaption": "explosion at 1.301-6.301, 7.603-9.923 and gunshot at 2.288-4.762, 5.447-7.921", "frequencyCaption": "explosion two times and gunshot two times"} +{"filepath": "data/multi_event_train/syn_3126.wav", "onoffCaption": "spraying at 0.002-0.906, 1.602-3.335 and gunshot at 6.789-8.789", "frequencyCaption": "spraying two times and gunshot one times"} +{"filepath": "data/multi_event_train/syn_3254.wav", "onoffCaption": "burping belching at 1.066-4.346 and sheep goat bleating at 2.429-4.429, 5.198-7.198", "frequencyCaption": "burping belching one times and sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_3355.wav", "onoffCaption": "cat meowing at 0.682-1.709, 2.888-3.915, 4.78-5.807 and burping belching at 0.736-3.359", "frequencyCaption": "cat meowing three times and burping belching one times"} +{"filepath": "data/multi_event_train/syn_3471.wav", "onoffCaption": "spraying at 1.916-3.0", "frequencyCaption": "spraying one times"} +{"filepath": "data/multi_event_train/syn_3494.wav", "onoffCaption": "explosion at 2.713-5.441, 7.005-9.733", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_3570.wav", "onoffCaption": "gunshot at 1.296-3.296, 4.812-6.812", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_3595.wav", "onoffCaption": "cat meowing at 2.868-7.228, 8.142-9.291", "frequencyCaption": "cat meowing two times"} +{"filepath": "data/multi_event_train/syn_3602.wav", "onoffCaption": "explosion at 0.527-3.527 and train horn at 1.368-3.808, 5.59-8.03", "frequencyCaption": "explosion one times and train horn two times"} +{"filepath": "data/multi_event_train/syn_3618.wav", "onoffCaption": "sheep goat bleating at 3.257-5.257", "frequencyCaption": "sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_3703.wav", "onoffCaption": "explosion at 0.155-5.155 and sheep goat bleating at 0.686-3.686, 4.666-7.666 and duck quacking at 6.741-8.741", "frequencyCaption": "explosion one times and sheep goat bleating two times and duck quacking one times"} +{"filepath": "data/multi_event_train/syn_3719.wav", "onoffCaption": "sheep goat bleating at 0.275-2.275, 3.223-5.223, 5.77-7.77", "frequencyCaption": "sheep goat bleating three times"} +{"filepath": "data/multi_event_train/syn_3874.wav", "onoffCaption": "duck quacking at 3.559-5.559, 6.561-8.561", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_3891.wav", "onoffCaption": "whistling at 0.043-2.918 and dog barking at 0.052-2.052", "frequencyCaption": "whistling one times and dog barking one times"} +{"filepath": "data/multi_event_train/syn_3990.wav", "onoffCaption": "tapping clicking clanking at 0.705-4.145, 5.601-7.818", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_90.wav", "onoffCaption": "woman laughing at 0.803-3.408, 3.992-6.787", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_123.wav", "onoffCaption": "sheep goat bleating at 0.202-2.202, 3.375-5.375 and sneeze at 3.476-5.472, 6.037-8.033 and cow mooing at 5.23-8.24", "frequencyCaption": "sheep goat bleating two times and sneeze two times and cow mooing one times"} +{"filepath": "data/multi_event_train/syn_176.wav", "onoffCaption": "cow mooing at 3.536-6.834, 7.601-10.0", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_193.wav", "onoffCaption": "explosion at 0.339-3.339, 5.515-7.877", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_305.wav", "onoffCaption": "train horn at 3.158-5.625, 6.413-9.115 and cow mooing at 5.053-8.022", "frequencyCaption": "train horn two times and cow mooing one times"} +{"filepath": "data/multi_event_train/syn_374.wav", "onoffCaption": "dog barking at 0.106-2.106, 3.22-5.22", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_train/syn_391.wav", "onoffCaption": "explosion at 0.435-3.164, 4.541-6.834 and spraying at 3.273-6.292", "frequencyCaption": "explosion two times and spraying one times"} +{"filepath": "data/multi_event_train/syn_520.wav", "onoffCaption": "thump thud at 1.037-3.376 and gunshot at 5.909-7.909", "frequencyCaption": "thump thud one times and gunshot one times"} +{"filepath": "data/multi_event_train/syn_575.wav", "onoffCaption": "door knocking at 2.665-5.433 and spraying at 7.735-8.339, 8.877-9.481", "frequencyCaption": "door knocking one times and spraying two times"} +{"filepath": "data/multi_event_train/syn_639.wav", "onoffCaption": "dog barking at 0.024-2.024, 2.943-4.943, 5.802-7.802", "frequencyCaption": "dog barking three times"} +{"filepath": "data/multi_event_train/syn_648.wav", "onoffCaption": "car horn honking at 0.672-4.326 and tapping clicking clanking at 6.904-10.0", "frequencyCaption": "car horn honking one times and tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_722.wav", "onoffCaption": "whistling at 0.563-8.574 and tapping clicking clanking at 1.14-4.58", "frequencyCaption": "whistling one times and tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_753.wav", "onoffCaption": "explosion at 0.834-3.834, 5.832-8.832", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_824.wav", "onoffCaption": "burping belching at 2.968-5.198, 6.24-9.24", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_855.wav", "onoffCaption": "sneeze at 1.041-3.655, 5.037-7.651", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_1010.wav", "onoffCaption": "cow mooing at 2.924-5.893", "frequencyCaption": "cow mooing one times"} +{"filepath": "data/multi_event_train/syn_1084.wav", "onoffCaption": "explosion at 2.02-7.02", "frequencyCaption": "explosion one times"} +{"filepath": "data/multi_event_train/syn_1212.wav", "onoffCaption": "cow mooing at 0.686-3.696 and door knocking at 7.844-10.0", "frequencyCaption": "cow mooing one times and door knocking one times"} +{"filepath": "data/multi_event_train/syn_1263.wav", "onoffCaption": "whistling at 2.648-5.523, 6.602-9.287", "frequencyCaption": "whistling two times"} +{"filepath": "data/multi_event_train/syn_1286.wav", "onoffCaption": "sneeze at 2.554-4.513 and thump thud at 6.691-10.0", "frequencyCaption": "sneeze one times and thump thud one times"} +{"filepath": "data/multi_event_train/syn_1309.wav", "onoffCaption": "cow mooing at 3.532-7.961 and spraying at 4.142-5.389, 6.187-7.128", "frequencyCaption": "cow mooing one times and spraying two times"} +{"filepath": "data/multi_event_train/syn_1378.wav", "onoffCaption": "door slamming at 0.234-2.234 and explosion at 5.159-7.877 and cat meowing at 5.508-7.502", "frequencyCaption": "door slamming one times and explosion one times and cat meowing one times"} +{"filepath": "data/multi_event_train/syn_1437.wav", "onoffCaption": "car horn honking at 0.038-2.964, 4.164-6.34 and dog barking at 0.425-2.425", "frequencyCaption": "car horn honking two times and dog barking one times"} +{"filepath": "data/multi_event_train/syn_1446.wav", "onoffCaption": "cow mooing at 3.706-6.688, 7.502-10.0", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_1635.wav", "onoffCaption": "car horn honking at 3.734-6.081, 6.76-8.924", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_1644.wav", "onoffCaption": "woman laughing at 4.06-6.479, 7.285-9.704", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_1828.wav", "onoffCaption": "dog barking at 0.029-2.029", "frequencyCaption": "dog barking one times"} +{"filepath": "data/multi_event_train/syn_1859.wav", "onoffCaption": "sneeze at 0.081-3.156 and door slamming at 1.281-1.962", "frequencyCaption": "sneeze one times and door slamming one times"} +{"filepath": "data/multi_event_train/syn_1933.wav", "onoffCaption": "spraying at 0.054-0.835, 1.62-2.189, 3.452-5.185 and sheep goat bleating at 1.264-3.264, 3.814-5.814, 7.277-9.277", "frequencyCaption": "spraying three times and sheep goat bleating three times"} +{"filepath": "data/multi_event_train/syn_1942.wav", "onoffCaption": "sheep goat bleating at 1.739-3.739, 6.232-8.232", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_3003.wav", "onoffCaption": "whistling at 1.472-9.857 and burping belching at 1.552-4.552", "frequencyCaption": "whistling one times and burping belching one times"} +{"filepath": "data/multi_event_train/syn_3072.wav", "onoffCaption": "gunshot at 0.892-2.892, 4.482-6.983", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_3097.wav", "onoffCaption": "tapping clicking clanking at 0.349-3.789, 4.563-8.003", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_3118.wav", "onoffCaption": "door knocking at 1.689-5.457", "frequencyCaption": "door knocking one times"} +{"filepath": "data/multi_event_train/syn_3169.wav", "onoffCaption": "train horn at 3.286-8.188", "frequencyCaption": "train horn one times"} +{"filepath": "data/multi_event_train/syn_3201.wav", "onoffCaption": "woman laughing at 0.017-6.751 and sheep goat bleating at 4.882-6.882", "frequencyCaption": "woman laughing one times and sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_3270.wav", "onoffCaption": "spraying at 0.023-1.718, 3.241-4.936 and burping belching at 2.094-5.374", "frequencyCaption": "spraying two times and burping belching one times"} +{"filepath": "data/multi_event_train/syn_3295.wav", "onoffCaption": "gunshot at 0.894-2.894 and tapping clicking clanking at 1.696-5.136, 6.245-8.873 and cat meowing at 3.659-4.744", "frequencyCaption": "gunshot one times and tapping clicking clanking two times and cat meowing one times"} +{"filepath": "data/multi_event_train/syn_3424.wav", "onoffCaption": "door slamming at 2.693-3.832, 5.0-6.139, 6.66-7.799", "frequencyCaption": "door slamming three times"} +{"filepath": "data/multi_event_train/syn_3455.wav", "onoffCaption": "whistling at 0.61-9.665", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_3626.wav", "onoffCaption": "tapping clicking clanking at 3.218-6.658", "frequencyCaption": "tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_3657.wav", "onoffCaption": "gunshot at 0.034-2.034, 3.662-5.662, 7.081-9.081", "frequencyCaption": "gunshot three times"} +{"filepath": "data/multi_event_train/syn_3920.wav", "onoffCaption": "door slamming at 2.209-5.17", "frequencyCaption": "door slamming one times"} +{"filepath": "data/multi_event_train/syn_3975.wav", "onoffCaption": "whistling at 1.833-7.333 and thump thud at 3.092-7.467 and cat meowing at 4.531-6.115", "frequencyCaption": "whistling one times and thump thud one times and cat meowing one times"} +{"filepath": "data/multi_event_train/syn_20.wav", "onoffCaption": "thump thud at 0.02-4.47", "frequencyCaption": "thump thud one times"} +{"filepath": "data/multi_event_train/syn_75.wav", "onoffCaption": "burping belching at 0.181-2.704, 4.987-7.081 and duck quacking at 1.551-3.551, 5.137-7.137", "frequencyCaption": "burping belching two times and duck quacking two times"} +{"filepath": "data/multi_event_train/syn_107.wav", "onoffCaption": "cow mooing at 2.043-7.023", "frequencyCaption": "cow mooing one times"} +{"filepath": "data/multi_event_train/syn_148.wav", "onoffCaption": "door knocking at 2.621-4.809 and explosion at 6.949-9.821", "frequencyCaption": "door knocking one times and explosion one times"} +{"filepath": "data/multi_event_train/syn_152.wav", "onoffCaption": "burping belching at 0.519-3.778 and tapping clicking clanking at 6.986-10.0", "frequencyCaption": "burping belching one times and tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_321.wav", "onoffCaption": "woman laughing at 0.731-3.336, 4.07-6.675, 7.954-10.0", "frequencyCaption": "woman laughing three times"} +{"filepath": "data/multi_event_train/syn_405.wav", "onoffCaption": "cow mooing at 0.652-5.632, 6.45-10.0", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_504.wav", "onoffCaption": "door knocking at 0.327-5.16, 5.971-8.528 and dog barking at 2.262-4.262 and cat meowing at 3.064-7.424", "frequencyCaption": "door knocking two times and dog barking one times and cat meowing one times"} +{"filepath": "data/multi_event_train/syn_551.wav", "onoffCaption": "cow mooing at 0.409-3.378, 3.897-6.651", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_689.wav", "onoffCaption": "gunshot at 1.993-3.993 and tapping clicking clanking at 6.029-9.469", "frequencyCaption": "gunshot one times and tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_777.wav", "onoffCaption": "burping belching at 0.758-2.961", "frequencyCaption": "burping belching one times"} +{"filepath": "data/multi_event_train/syn_792.wav", "onoffCaption": "dog barking at 0.781-2.781", "frequencyCaption": "dog barking one times"} +{"filepath": "data/multi_event_train/syn_800.wav", "onoffCaption": "spraying at 0.055-0.563, 1.558-2.558, 3.071-3.922 and woman laughing at 5.64-7.84", "frequencyCaption": "spraying three times and woman laughing one times"} +{"filepath": "data/multi_event_train/syn_901.wav", "onoffCaption": "door slamming at 0.191-1.715, 3.34-4.359 and sneeze at 7.275-10.0", "frequencyCaption": "door slamming two times and sneeze one times"} +{"filepath": "data/multi_event_train/syn_1034.wav", "onoffCaption": "whistling at 2.152-9.807", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_1061.wav", "onoffCaption": "train horn at 0.129-2.266 and sheep goat bleating at 1.564-4.86, 5.722-7.722 and spraying at 6.161-7.408", "frequencyCaption": "train horn one times and sheep goat bleating two times and spraying one times"} +{"filepath": "data/multi_event_train/syn_1135.wav", "onoffCaption": "cat meowing at 0.983-2.123, 2.981-5.67 and thump thud at 1.87-6.32", "frequencyCaption": "cat meowing two times and thump thud one times"} +{"filepath": "data/multi_event_train/syn_1247.wav", "onoffCaption": "burping belching at 3.068-6.612, 7.278-9.991", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_1462.wav", "onoffCaption": "explosion at 0.227-3.095, 4.092-6.774, 7.66-9.67", "frequencyCaption": "explosion three times"} +{"filepath": "data/multi_event_train/syn_1478.wav", "onoffCaption": "explosion at 0.04-2.912, 4.447-7.319", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_1487.wav", "onoffCaption": "thump thud at 2.941-7.391", "frequencyCaption": "thump thud one times"} +{"filepath": "data/multi_event_train/syn_1563.wav", "onoffCaption": "duck quacking at 2.444-4.444 and woman laughing at 7.55-10.0", "frequencyCaption": "duck quacking one times and woman laughing one times"} +{"filepath": "data/multi_event_train/syn_1579.wav", "onoffCaption": "door knocking at 0.166-3.928, 4.859-8.621 and door slamming at 6.402-9.183", "frequencyCaption": "door knocking two times and door slamming one times"} +{"filepath": "data/multi_event_train/syn_1586.wav", "onoffCaption": "door knocking at 1.791-4.031, 5.253-7.493 and train horn at 2.702-5.902", "frequencyCaption": "door knocking two times and train horn one times"} +{"filepath": "data/multi_event_train/syn_1611.wav", "onoffCaption": "thump thud at 0.696-4.363 and sheep goat bleating at 2.839-5.919, 7.879-9.879", "frequencyCaption": "thump thud one times and sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_1867.wav", "onoffCaption": "cow mooing at 2.159-5.141, 5.718-8.591", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_1882.wav", "onoffCaption": "gunshot at 0.885-3.125, 5.094-7.334 and sheep goat bleating at 2.826-4.826, 6.678-8.678", "frequencyCaption": "gunshot two times and sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_1898.wav", "onoffCaption": "duck quacking at 3.245-5.245, 6.684-8.684", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_1966.wav", "onoffCaption": "explosion at 0.09-2.097, 4.572-7.1", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_1983.wav", "onoffCaption": "dog barking at 0.369-2.369, 4.251-6.251", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_train/syn_1999.wav", "onoffCaption": "cat meowing at 2.229-3.24", "frequencyCaption": "cat meowing one times"} +{"filepath": "data/multi_event_train/syn_3056.wav", "onoffCaption": "train horn at 1.134-3.774", "frequencyCaption": "train horn one times"} +{"filepath": "data/multi_event_train/syn_3157.wav", "onoffCaption": "train horn at 0.176-4.296 and cat meowing at 7.151-8.706", "frequencyCaption": "train horn one times and cat meowing one times"} +{"filepath": "data/multi_event_train/syn_3225.wav", "onoffCaption": "car horn honking at 2.801-5.727, 6.289-8.754 and duck quacking at 4.606-6.606", "frequencyCaption": "car horn honking two times and duck quacking one times"} +{"filepath": "data/multi_event_train/syn_3400.wav", "onoffCaption": "explosion at 1.732-4.726, 7.146-10.0", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_3501.wav", "onoffCaption": "duck quacking at 0.252-2.252", "frequencyCaption": "duck quacking one times"} +{"filepath": "data/multi_event_train/syn_3673.wav", "onoffCaption": "spraying at 0.097-1.161, 2.725-4.853 and gunshot at 0.222-2.222, 4.038-6.038, 7.509-9.509", "frequencyCaption": "spraying two times and gunshot three times"} +{"filepath": "data/multi_event_train/syn_3696.wav", "onoffCaption": "train horn at 1.872-5.992", "frequencyCaption": "train horn one times"} +{"filepath": "data/multi_event_train/syn_3768.wav", "onoffCaption": "burping belching at 0.085-4.085, 5.224-9.224 and train horn at 5.33-9.53", "frequencyCaption": "burping belching two times and train horn one times"} +{"filepath": "data/multi_event_train/syn_3805.wav", "onoffCaption": "explosion at 0.285-2.813, 5.27-8.27 and duck quacking at 1.387-3.387", "frequencyCaption": "explosion two times and duck quacking one times"} +{"filepath": "data/multi_event_train/syn_3904.wav", "onoffCaption": "tapping clicking clanking at 2.524-5.964, 7.219-10.0", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_3951.wav", "onoffCaption": "burping belching at 2.919-6.121, 6.638-9.84", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_220.wav", "onoffCaption": "cow mooing at 1.917-6.346, 7.969-10.0", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_227.wav", "onoffCaption": "cat meowing at 3.979-5.006 and cow mooing at 7.653-10.0", "frequencyCaption": "cat meowing one times and cow mooing one times"} +{"filepath": "data/multi_event_train/syn_275.wav", "onoffCaption": "tapping clicking clanking at 0.18-3.62", "frequencyCaption": "tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_290.wav", "onoffCaption": "train horn at 0.463-3.703, 5.072-7.818 and whistling at 5.549-7.778", "frequencyCaption": "train horn two times and whistling one times"} +{"filepath": "data/multi_event_train/syn_402.wav", "onoffCaption": "woman laughing at 1.858-4.496, 5.165-7.649 and burping belching at 2.466-5.466", "frequencyCaption": "woman laughing two times and burping belching one times"} +{"filepath": "data/multi_event_train/syn_450.wav", "onoffCaption": "tapping clicking clanking at 3.642-7.082", "frequencyCaption": "tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_519.wav", "onoffCaption": "tapping clicking clanking at 2.995-6.435", "frequencyCaption": "tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_623.wav", "onoffCaption": "dog barking at 0.836-2.836, 3.847-5.847, 6.983-8.983", "frequencyCaption": "dog barking three times"} +{"filepath": "data/multi_event_train/syn_671.wav", "onoffCaption": "explosion at 2.427-4.52, 5.452-8.143", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_676.wav", "onoffCaption": "spraying at 2.443-3.443 and train horn at 2.832-6.192", "frequencyCaption": "spraying one times and train horn one times"} +{"filepath": "data/multi_event_train/syn_693.wav", "onoffCaption": "burping belching at 1.052-6.653, 7.502-10.0", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_694.wav", "onoffCaption": "tapping clicking clanking at 0.286-3.726, 4.727-8.167", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_738.wav", "onoffCaption": "burping belching at 2.978-9.955", "frequencyCaption": "burping belching one times"} +{"filepath": "data/multi_event_train/syn_788.wav", "onoffCaption": "woman laughing at 2.872-4.964, 6.418-8.711", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_906.wav", "onoffCaption": "duck quacking at 1.199-3.199, 5.451-7.451 and spraying at 1.857-2.798", "frequencyCaption": "duck quacking two times and spraying one times"} +{"filepath": "data/multi_event_train/syn_954.wav", "onoffCaption": "woman laughing at 1.448-3.685, 5.85-8.087", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_1029.wav", "onoffCaption": "woman laughing at 0.266-2.852 and spraying at 1.809-2.393, 4.645-5.229", "frequencyCaption": "woman laughing one times and spraying two times"} +{"filepath": "data/multi_event_train/syn_1132.wav", "onoffCaption": "train horn at 0.148-3.508, 4.725-8.085", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_1160.wav", "onoffCaption": "sheep goat bleating at 0.318-2.318, 3.094-5.094, 7.441-9.441", "frequencyCaption": "sheep goat bleating three times"} +{"filepath": "data/multi_event_train/syn_1185.wav", "onoffCaption": "thump thud at 0.834-3.605 and duck quacking at 1.364-3.364, 5.81-7.81", "frequencyCaption": "thump thud one times and duck quacking two times"} +{"filepath": "data/multi_event_train/syn_1208.wav", "onoffCaption": "spraying at 1.244-1.752, 4.209-4.959 and sneeze at 7.711-9.037", "frequencyCaption": "spraying two times and sneeze one times"} +{"filepath": "data/multi_event_train/syn_1313.wav", "onoffCaption": "cow mooing at 0.194-3.163, 5.187-7.601 and burping belching at 0.273-2.304 and woman laughing at 3.885-6.585", "frequencyCaption": "cow mooing two times and burping belching one times and woman laughing one times"} +{"filepath": "data/multi_event_train/syn_1341.wav", "onoffCaption": "cow mooing at 2.719-5.701, 6.832-9.814", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_1346.wav", "onoffCaption": "spraying at 0.168-1.019", "frequencyCaption": "spraying one times"} +{"filepath": "data/multi_event_train/syn_1536.wav", "onoffCaption": "car horn honking at 1.992-5.833, 6.819-10.0", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_1564.wav", "onoffCaption": "cat meowing at 0.177-2.072, 4.308-6.203 and duck quacking at 0.678-2.678, 4.915-6.915", "frequencyCaption": "cat meowing two times and duck quacking two times"} +{"filepath": "data/multi_event_train/syn_1581.wav", "onoffCaption": "train horn at 2.551-10.0", "frequencyCaption": "train horn one times"} +{"filepath": "data/multi_event_train/syn_1710.wav", "onoffCaption": "cat meowing at 0.096-1.198, 2.271-3.373, 4.359-5.461 and explosion at 0.76-2.824, 3.965-6.029 and sheep goat bleating at 4.978-6.978", "frequencyCaption": "cat meowing three times and explosion two times and sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_1717.wav", "onoffCaption": "explosion at 0.595-3.595 and woman laughing at 6.279-8.634", "frequencyCaption": "explosion one times and woman laughing one times"} +{"filepath": "data/multi_event_train/syn_1745.wav", "onoffCaption": "burping belching at 0.264-3.264, 4.921-7.921", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_1832.wav", "onoffCaption": "train horn at 2.432-5.072 and cow mooing at 2.509-5.478, 6.707-9.676", "frequencyCaption": "train horn one times and cow mooing two times"} +{"filepath": "data/multi_event_train/syn_1860.wav", "onoffCaption": "cat meowing at 0.869-2.009, 4.238-5.265, 6.706-8.666", "frequencyCaption": "cat meowing three times"} +{"filepath": "data/multi_event_train/syn_1885.wav", "onoffCaption": "thump thud at 0.058-2.286", "frequencyCaption": "thump thud one times"} +{"filepath": "data/multi_event_train/syn_1929.wav", "onoffCaption": "train horn at 1.408-3.563, 4.706-6.861", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_3019.wav", "onoffCaption": "tapping clicking clanking at 1.778-5.218, 6.534-9.974", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_3102.wav", "onoffCaption": "dog barking at 2.444-4.444 and tapping clicking clanking at 5.38-8.82 and thump thud at 6.34-8.568", "frequencyCaption": "dog barking one times and tapping clicking clanking one times and thump thud one times"} +{"filepath": "data/multi_event_train/syn_3150.wav", "onoffCaption": "door knocking at 0.459-2.922 and sneeze at 7.356-9.419", "frequencyCaption": "door knocking one times and sneeze one times"} +{"filepath": "data/multi_event_train/syn_3238.wav", "onoffCaption": "whistling at 0.15-9.815 and sheep goat bleating at 1.787-3.787, 4.729-6.729", "frequencyCaption": "whistling one times and sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_3323.wav", "onoffCaption": "spraying at 0.067-0.817", "frequencyCaption": "spraying one times"} +{"filepath": "data/multi_event_train/syn_3324.wav", "onoffCaption": "sneeze at 2.984-7.04", "frequencyCaption": "sneeze one times"} +{"filepath": "data/multi_event_train/syn_3371.wav", "onoffCaption": "duck quacking at 2.226-4.226, 5.912-7.912", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_3394.wav", "onoffCaption": "explosion at 2.848-6.401, 7.091-10.0", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_3506.wav", "onoffCaption": "whistling at 0.097-7.847 and door knocking at 0.498-3.266, 4.128-6.508, 7.796-10.0", "frequencyCaption": "whistling one times and door knocking three times"} +{"filepath": "data/multi_event_train/syn_3554.wav", "onoffCaption": "train horn at 0.045-4.113, 4.781-7.661", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_3669.wav", "onoffCaption": "burping belching at 0.131-2.892, 4.13-6.891, 7.797-10.0", "frequencyCaption": "burping belching three times"} +{"filepath": "data/multi_event_train/syn_3727.wav", "onoffCaption": "sneeze at 1.669-3.665", "frequencyCaption": "sneeze one times"} +{"filepath": "data/multi_event_train/syn_3772.wav", "onoffCaption": "door knocking at 2.434-5.501, 6.209-9.276", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_3775.wav", "onoffCaption": "car horn honking at 0.698-3.516 and sheep goat bleating at 4.98-6.98", "frequencyCaption": "car horn honking one times and sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_3790.wav", "onoffCaption": "cow mooing at 3.114-6.083, 7.337-10.0", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_3797.wav", "onoffCaption": "duck quacking at 0.574-2.574, 3.148-5.148, 5.713-7.713 and dog barking at 2.459-4.459, 6.087-8.087", "frequencyCaption": "duck quacking three times and dog barking two times"} +{"filepath": "data/multi_event_train/syn_3802.wav", "onoffCaption": "whistling at 0.586-8.597 and sheep goat bleating at 1.539-3.539, 5.134-7.134", "frequencyCaption": "whistling one times and sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_3850.wav", "onoffCaption": "thump thud at 0.06-4.51, 6.619-8.847", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_3919.wav", "onoffCaption": "sheep goat bleating at 0.38-2.38", "frequencyCaption": "sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_68.wav", "onoffCaption": "spraying at 1.145-1.72, 2.499-3.074, 3.755-4.33 and tapping clicking clanking at 1.944-5.384, 6.002-8.278", "frequencyCaption": "spraying three times and tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_72.wav", "onoffCaption": "spraying at 2.92-3.547, 4.121-5.378, 6.063-8.191", "frequencyCaption": "spraying three times"} +{"filepath": "data/multi_event_train/syn_97.wav", "onoffCaption": "thump thud at 0.697-4.364, 5.526-7.754", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_100.wav", "onoffCaption": "door knocking at 2.269-5.389 and explosion at 7.884-10.0", "frequencyCaption": "door knocking one times and explosion one times"} +{"filepath": "data/multi_event_train/syn_272.wav", "onoffCaption": "burping belching at 0.523-4.029, 6.193-9.699", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_297.wav", "onoffCaption": "tapping clicking clanking at 1.345-4.785, 6.911-10.0", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_369.wav", "onoffCaption": "burping belching at 0.269-2.367", "frequencyCaption": "burping belching one times"} +{"filepath": "data/multi_event_train/syn_396.wav", "onoffCaption": "cow mooing at 0.073-3.055, 5.173-8.042", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_457.wav", "onoffCaption": "explosion at 1.716-4.903 and door knocking at 3.096-5.16, 6.335-8.399", "frequencyCaption": "explosion one times and door knocking two times"} +{"filepath": "data/multi_event_train/syn_556.wav", "onoffCaption": "sneeze at 0.228-3.436, 4.412-5.658 and door slamming at 1.53-4.504, 6.22-9.194", "frequencyCaption": "sneeze two times and door slamming two times"} +{"filepath": "data/multi_event_train/syn_624.wav", "onoffCaption": "sneeze at 0.49-2.449 and thump thud at 5.007-9.457", "frequencyCaption": "sneeze one times and thump thud one times"} +{"filepath": "data/multi_event_train/syn_848.wav", "onoffCaption": "thump thud at 3.74-6.24, 7.599-9.797", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_852.wav", "onoffCaption": "tapping clicking clanking at 1.814-5.254, 6.511-9.021", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_949.wav", "onoffCaption": "spraying at 2.482-4.215, 6.71-7.794, 8.844-9.445", "frequencyCaption": "spraying three times"} +{"filepath": "data/multi_event_train/syn_953.wav", "onoffCaption": "car horn honking at 3.959-6.872", "frequencyCaption": "car horn honking one times"} +{"filepath": "data/multi_event_train/syn_1066.wav", "onoffCaption": "explosion at 2.375-4.695, 5.778-8.098 and woman laughing at 4.233-6.349, 7.227-9.343", "frequencyCaption": "explosion two times and woman laughing two times"} +{"filepath": "data/multi_event_train/syn_1083.wav", "onoffCaption": "woman laughing at 2.835-5.043, 7.399-9.607", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_1099.wav", "onoffCaption": "burping belching at 3.336-6.515", "frequencyCaption": "burping belching one times"} +{"filepath": "data/multi_event_train/syn_1167.wav", "onoffCaption": "burping belching at 0.344-3.344, 5.134-7.499", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_1182.wav", "onoffCaption": "spraying at 1.885-4.277, 5.008-5.508, 7.252-9.014", "frequencyCaption": "spraying three times"} +{"filepath": "data/multi_event_train/syn_1198.wav", "onoffCaption": "door knocking at 1.186-6.019, 7.123-10.0", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_1215.wav", "onoffCaption": "tapping clicking clanking at 0.347-3.787, 5.034-8.474", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_1314.wav", "onoffCaption": "thump thud at 3.539-7.989", "frequencyCaption": "thump thud one times"} +{"filepath": "data/multi_event_train/syn_1430.wav", "onoffCaption": "tapping clicking clanking at 2.991-6.431, 7.373-10.0", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_1531.wav", "onoffCaption": "cat meowing at 0.302-2.296, 3.126-5.12 and car horn honking at 1.986-5.481", "frequencyCaption": "cat meowing two times and car horn honking one times"} +{"filepath": "data/multi_event_train/syn_1643.wav", "onoffCaption": "thump thud at 2.695-7.07", "frequencyCaption": "thump thud one times"} +{"filepath": "data/multi_event_train/syn_1659.wav", "onoffCaption": "explosion at 3.679-6.551, 7.448-10.0", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_1742.wav", "onoffCaption": "tapping clicking clanking at 0.157-3.597, 5.964-8.192 and gunshot at 2.244-4.374, 4.978-6.978, 7.529-9.699", "frequencyCaption": "tapping clicking clanking two times and gunshot three times"} +{"filepath": "data/multi_event_train/syn_1758.wav", "onoffCaption": "burping belching at 0.668-3.191, 4.416-7.208 and car horn honking at 1.278-4.191", "frequencyCaption": "burping belching two times and car horn honking one times"} +{"filepath": "data/multi_event_train/syn_1835.wav", "onoffCaption": "woman laughing at 2.231-4.869, 6.638-9.276", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_1934.wav", "onoffCaption": "thump thud at 0.624-4.291 and explosion at 7.861-10.0", "frequencyCaption": "thump thud one times and explosion one times"} +{"filepath": "data/multi_event_train/syn_3004.wav", "onoffCaption": "duck quacking at 0.12-2.12, 3.932-5.932 and door slamming at 8.291-9.191", "frequencyCaption": "duck quacking two times and door slamming one times"} +{"filepath": "data/multi_event_train/syn_3105.wav", "onoffCaption": "thump thud at 2.398-4.898, 6.624-9.124", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_3277.wav", "onoffCaption": "woman laughing at 0.86-3.143, 4.265-6.548, 7.463-9.746", "frequencyCaption": "woman laughing three times"} +{"filepath": "data/multi_event_train/syn_3288.wav", "onoffCaption": "sneeze at 0.323-1.415, 2.705-3.797, 5.231-6.323 and train horn at 2.628-5.068", "frequencyCaption": "sneeze three times and train horn one times"} +{"filepath": "data/multi_event_train/syn_3292.wav", "onoffCaption": "burping belching at 1.879-5.879", "frequencyCaption": "burping belching one times"} +{"filepath": "data/multi_event_train/syn_3376.wav", "onoffCaption": "train horn at 0.694-4.094, 5.406-8.061", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_3389.wav", "onoffCaption": "thump thud at 0.609-5.059", "frequencyCaption": "thump thud one times"} +{"filepath": "data/multi_event_train/syn_3393.wav", "onoffCaption": "door slamming at 0.336-1.509", "frequencyCaption": "door slamming one times"} +{"filepath": "data/multi_event_train/syn_3448.wav", "onoffCaption": "gunshot at 2.818-4.818 and door knocking at 4.83-6.99, 7.814-9.974", "frequencyCaption": "gunshot one times and door knocking two times"} +{"filepath": "data/multi_event_train/syn_3452.wav", "onoffCaption": "spraying at 3.48-4.002, 5.912-6.434", "frequencyCaption": "spraying two times"} +{"filepath": "data/multi_event_train/syn_3549.wav", "onoffCaption": "cow mooing at 1.512-4.522 and sheep goat bleating at 6.085-9.381", "frequencyCaption": "cow mooing one times and sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_3553.wav", "onoffCaption": "explosion at 0.046-3.102, 4.365-7.421 and cow mooing at 1.437-6.417", "frequencyCaption": "explosion two times and cow mooing one times"} +{"filepath": "data/multi_event_train/syn_3621.wav", "onoffCaption": "duck quacking at 0.251-2.251, 3.603-5.603 and door slamming at 3.149-4.914, 5.503-6.503", "frequencyCaption": "duck quacking two times and door slamming two times"} +{"filepath": "data/multi_event_train/syn_3720.wav", "onoffCaption": "gunshot at 0.119-2.119, 3.389-5.389, 6.323-8.323", "frequencyCaption": "gunshot three times"} +{"filepath": "data/multi_event_train/syn_3857.wav", "onoffCaption": "car horn honking at 1.094-4.02, 4.596-7.522", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_3956.wav", "onoffCaption": "car horn honking at 3.505-8.412", "frequencyCaption": "car horn honking one times"} +{"filepath": "data/multi_event_train/syn_27.wav", "onoffCaption": "sheep goat bleating at 3.479-5.479, 6.791-8.791 and cow mooing at 3.583-8.012", "frequencyCaption": "sheep goat bleating two times and cow mooing one times"} +{"filepath": "data/multi_event_train/syn_56.wav", "onoffCaption": "duck quacking at 0.787-2.787 and cat meowing at 1.168-2.18, 3.484-4.496", "frequencyCaption": "duck quacking one times and cat meowing two times"} +{"filepath": "data/multi_event_train/syn_124.wav", "onoffCaption": "explosion at 0.32-2.413 and dog barking at 4.449-6.449, 7.727-9.727 and tapping clicking clanking at 4.578-8.018", "frequencyCaption": "explosion one times and dog barking two times and tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_155.wav", "onoffCaption": "sheep goat bleating at 3.874-6.954", "frequencyCaption": "sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_268.wav", "onoffCaption": "burping belching at 2.076-7.076, 7.833-10.0", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_326.wav", "onoffCaption": "sneeze at 0.217-1.32 and duck quacking at 3.736-5.736", "frequencyCaption": "sneeze one times and duck quacking one times"} +{"filepath": "data/multi_event_train/syn_373.wav", "onoffCaption": "door knocking at 2.926-5.694, 6.774-9.493", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_418.wav", "onoffCaption": "train horn at 0.085-3.619", "frequencyCaption": "train horn one times"} +{"filepath": "data/multi_event_train/syn_469.wav", "onoffCaption": "train horn at 2.048-5.808, 6.803-9.524", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_503.wav", "onoffCaption": "door slamming at 2.492-4.975 and explosion at 2.856-5.117, 6.1-8.361", "frequencyCaption": "door slamming one times and explosion two times"} +{"filepath": "data/multi_event_train/syn_597.wav", "onoffCaption": "dog barking at 2.154-4.154 and car horn honking at 6.717-9.643", "frequencyCaption": "dog barking one times and car horn honking one times"} +{"filepath": "data/multi_event_train/syn_725.wav", "onoffCaption": "car horn honking at 0.486-3.661, 4.479-6.592", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_770.wav", "onoffCaption": "explosion at 0.494-2.668, 3.329-5.503 and gunshot at 2.729-4.859 and spraying at 9.35-10.0", "frequencyCaption": "explosion two times and gunshot one times and spraying one times"} +{"filepath": "data/multi_event_train/syn_795.wav", "onoffCaption": "woman laughing at 0.279-2.36, 4.524-6.605, 7.54-9.621", "frequencyCaption": "woman laughing three times"} +{"filepath": "data/multi_event_train/syn_807.wav", "onoffCaption": "dog barking at 0.412-2.412 and sneeze at 3.819-5.347, 6.581-7.967", "frequencyCaption": "dog barking one times and sneeze two times"} +{"filepath": "data/multi_event_train/syn_876.wav", "onoffCaption": "whistling at 2.475-4.704, 5.69-7.699 and train horn at 6.152-8.952", "frequencyCaption": "whistling two times and train horn one times"} +{"filepath": "data/multi_event_train/syn_893.wav", "onoffCaption": "explosion at 0.254-3.254 and sneeze at 5.264-7.327", "frequencyCaption": "explosion one times and sneeze one times"} +{"filepath": "data/multi_event_train/syn_988.wav", "onoffCaption": "sheep goat bleating at 1.533-3.533 and cat meowing at 6.128-7.488", "frequencyCaption": "sheep goat bleating one times and cat meowing one times"} +{"filepath": "data/multi_event_train/syn_1033.wav", "onoffCaption": "gunshot at 1.931-4.437", "frequencyCaption": "gunshot one times"} +{"filepath": "data/multi_event_train/syn_1042.wav", "onoffCaption": "whistling at 1.292-9.042", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_1128.wav", "onoffCaption": "sheep goat bleating at 1.761-3.761, 5.687-7.687", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_1159.wav", "onoffCaption": "burping belching at 0.102-2.517 and door slamming at 0.169-3.048", "frequencyCaption": "burping belching one times and door slamming one times"} +{"filepath": "data/multi_event_train/syn_1240.wav", "onoffCaption": "car horn honking at 3.031-6.618 and spraying at 7.178-8.435", "frequencyCaption": "car horn honking one times and spraying one times"} +{"filepath": "data/multi_event_train/syn_1414.wav", "onoffCaption": "car horn honking at 2.864-5.79, 7.277-10.0", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_1465.wav", "onoffCaption": "door knocking at 0.143-3.21, 4.674-7.054", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_1480.wav", "onoffCaption": "train horn at 3.285-6.445", "frequencyCaption": "train horn one times"} +{"filepath": "data/multi_event_train/syn_1616.wav", "onoffCaption": "sheep goat bleating at 1.724-3.724, 4.694-6.694, 7.757-9.757", "frequencyCaption": "sheep goat bleating three times"} +{"filepath": "data/multi_event_train/syn_1910.wav", "onoffCaption": "gunshot at 3.62-5.62, 7.01-9.01", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_1961.wav", "onoffCaption": "explosion at 0.83-5.83 and car horn honking at 2.125-6.525, 7.843-10.0", "frequencyCaption": "explosion one times and car horn honking two times"} +{"filepath": "data/multi_event_train/syn_1984.wav", "onoffCaption": "explosion at 0.271-3.271 and spraying at 0.606-3.042, 4.649-7.085 and gunshot at 4.573-6.592", "frequencyCaption": "explosion one times and spraying two times and gunshot one times"} +{"filepath": "data/multi_event_train/syn_3020.wav", "onoffCaption": "thump thud at 0.274-2.613, 4.736-7.075 and door slamming at 1.616-2.421, 4.032-5.335 and cat meowing at 4.18-7.421", "frequencyCaption": "thump thud two times and door slamming two times and cat meowing one times"} +{"filepath": "data/multi_event_train/syn_3051.wav", "onoffCaption": "explosion at 0.583-5.583, 7.425-10.0", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_3222.wav", "onoffCaption": "whistling at 2.234-10.0", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_3339.wav", "onoffCaption": "sneeze at 3.7-5.659 and car horn honking at 7.905-10.0", "frequencyCaption": "sneeze one times and car horn honking one times"} +{"filepath": "data/multi_event_train/syn_3407.wav", "onoffCaption": "cat meowing at 0.704-1.716, 3.124-5.064 and spraying at 1.93-4.322 and woman laughing at 7.89-10.0", "frequencyCaption": "cat meowing two times and spraying one times and woman laughing one times"} +{"filepath": "data/multi_event_train/syn_3476.wav", "onoffCaption": "sneeze at 0.2-1.294, 2.325-4.449", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_3493.wav", "onoffCaption": "explosion at 2.754-5.015", "frequencyCaption": "explosion one times"} +{"filepath": "data/multi_event_train/syn_3588.wav", "onoffCaption": "door knocking at 3.157-5.727, 7.114-9.684", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_3674.wav", "onoffCaption": "burping belching at 0.178-3.738, 4.276-7.836 and duck quacking at 7.421-9.421", "frequencyCaption": "burping belching two times and duck quacking one times"} +{"filepath": "data/multi_event_train/syn_3691.wav", "onoffCaption": "train horn at 2.007-10.0", "frequencyCaption": "train horn one times"} +{"filepath": "data/multi_event_train/syn_3818.wav", "onoffCaption": "train horn at 0.086-2.966, 5.129-7.598", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_3869.wav", "onoffCaption": "spraying at 0.068-1.152, 3.159-3.734", "frequencyCaption": "spraying two times"} +{"filepath": "data/multi_event_train/syn_3903.wav", "onoffCaption": "gunshot at 0.325-2.325, 3.275-5.275, 6.343-8.343", "frequencyCaption": "gunshot three times"} +{"filepath": "data/multi_event_train/syn_3972.wav", "onoffCaption": "explosion at 2.923-4.93 and cat meowing at 7.772-9.308", "frequencyCaption": "explosion one times and cat meowing one times"} +{"filepath": "data/multi_event_train/syn_3997.wav", "onoffCaption": "spraying at 0.219-1.086, 2.175-3.042 and gunshot at 1.073-3.073, 4.828-6.828", "frequencyCaption": "spraying two times and gunshot two times"} +{"filepath": "data/multi_event_train/syn_171.wav", "onoffCaption": "sheep goat bleating at 0.622-2.622, 3.342-5.342 and explosion at 0.656-5.577, 6.689-10.0", "frequencyCaption": "sheep goat bleating two times and explosion two times"} +{"filepath": "data/multi_event_train/syn_194.wav", "onoffCaption": "cow mooing at 4.267-9.247", "frequencyCaption": "cow mooing one times"} +{"filepath": "data/multi_event_train/syn_203.wav", "onoffCaption": "spraying at 0.497-2.192, 2.836-3.777, 4.996-5.847", "frequencyCaption": "spraying three times"} +{"filepath": "data/multi_event_train/syn_219.wav", "onoffCaption": "burping belching at 0.188-3.188 and door knocking at 0.709-4.471", "frequencyCaption": "burping belching one times and door knocking one times"} +{"filepath": "data/multi_event_train/syn_302.wav", "onoffCaption": "spraying at 2.0-2.75", "frequencyCaption": "spraying one times"} +{"filepath": "data/multi_event_train/syn_318.wav", "onoffCaption": "door knocking at 3.232-6.079, 7.926-10.0", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_357.wav", "onoffCaption": "car horn honking at 0.444-3.939", "frequencyCaption": "car horn honking one times"} +{"filepath": "data/multi_event_train/syn_527.wav", "onoffCaption": "explosion at 0.505-3.373 and tapping clicking clanking at 6.208-9.648", "frequencyCaption": "explosion one times and tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_572.wav", "onoffCaption": "whistling at 1.684-3.693, 5.499-7.606", "frequencyCaption": "whistling two times"} +{"filepath": "data/multi_event_train/syn_701.wav", "onoffCaption": "door slamming at 0.027-0.967, 2.312-3.252 and sneeze at 6.756-8.256", "frequencyCaption": "door slamming two times and sneeze one times"} +{"filepath": "data/multi_event_train/syn_754.wav", "onoffCaption": "dog barking at 2.725-4.725, 5.904-7.904", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_train/syn_823.wav", "onoffCaption": "whistling at 0.976-6.151, 7.109-10.0", "frequencyCaption": "whistling two times"} +{"filepath": "data/multi_event_train/syn_938.wav", "onoffCaption": "cat meowing at 0.419-3.323, 4.149-6.126, 7.37-9.364", "frequencyCaption": "cat meowing three times"} +{"filepath": "data/multi_event_train/syn_1017.wav", "onoffCaption": "sneeze at 1.526-3.765 and dog barking at 3.672-5.672, 6.695-8.695", "frequencyCaption": "sneeze one times and dog barking two times"} +{"filepath": "data/multi_event_train/syn_1231.wav", "onoffCaption": "door knocking at 0.655-3.03, 4.154-6.529, 7.337-9.712", "frequencyCaption": "door knocking three times"} +{"filepath": "data/multi_event_train/syn_1264.wav", "onoffCaption": "sneeze at 2.384-3.912, 4.476-5.802, 6.605-9.066", "frequencyCaption": "sneeze three times"} +{"filepath": "data/multi_event_train/syn_1281.wav", "onoffCaption": "burping belching at 0.058-5.058, 7.551-9.892 and door slamming at 1.441-3.637", "frequencyCaption": "burping belching two times and door slamming one times"} +{"filepath": "data/multi_event_train/syn_1365.wav", "onoffCaption": "train horn at 3.067-9.536", "frequencyCaption": "train horn one times"} +{"filepath": "data/multi_event_train/syn_1380.wav", "onoffCaption": "thump thud at 1.007-3.507, 5.962-8.301", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_1441.wav", "onoffCaption": "thump thud at 1.625-4.125, 5.821-7.956", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_1628.wav", "onoffCaption": "spraying at 1.654-3.387", "frequencyCaption": "spraying one times"} +{"filepath": "data/multi_event_train/syn_1632.wav", "onoffCaption": "explosion at 0.414-3.142", "frequencyCaption": "explosion one times"} +{"filepath": "data/multi_event_train/syn_1667.wav", "onoffCaption": "train horn at 0.181-5.258, 7.479-10.0", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_1682.wav", "onoffCaption": "train horn at 3.047-5.202, 5.893-8.048", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_1729.wav", "onoffCaption": "burping belching at 2.765-4.968, 6.119-8.322", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_1733.wav", "onoffCaption": "spraying at 2.841-4.098, 6.577-8.339", "frequencyCaption": "spraying two times"} +{"filepath": "data/multi_event_train/syn_1799.wav", "onoffCaption": "cat meowing at 0.443-2.027, 4.267-5.851", "frequencyCaption": "cat meowing two times"} +{"filepath": "data/multi_event_train/syn_1945.wav", "onoffCaption": "dog barking at 2.352-4.352 and gunshot at 7.525-9.525", "frequencyCaption": "dog barking one times and gunshot one times"} +{"filepath": "data/multi_event_train/syn_3075.wav", "onoffCaption": "gunshot at 0.101-2.101", "frequencyCaption": "gunshot one times"} +{"filepath": "data/multi_event_train/syn_3090.wav", "onoffCaption": "sheep goat bleating at 0.67-2.67, 3.618-5.618 and gunshot at 2.733-4.752, 6.302-8.321", "frequencyCaption": "sheep goat bleating two times and gunshot two times"} +{"filepath": "data/multi_event_train/syn_3206.wav", "onoffCaption": "car horn honking at 0.644-3.109, 5.341-7.806", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_3253.wav", "onoffCaption": "cat meowing at 0.657-2.017", "frequencyCaption": "cat meowing one times"} +{"filepath": "data/multi_event_train/syn_3307.wav", "onoffCaption": "burping belching at 2.496-6.04, 6.739-9.063 and woman laughing at 3.176-5.768", "frequencyCaption": "burping belching two times and woman laughing one times"} +{"filepath": "data/multi_event_train/syn_3348.wav", "onoffCaption": "sheep goat bleating at 1.975-3.975 and burping belching at 2.183-4.944, 5.965-8.088", "frequencyCaption": "sheep goat bleating one times and burping belching two times"} +{"filepath": "data/multi_event_train/syn_3423.wav", "onoffCaption": "thump thud at 2.28-4.508, 6.92-9.148", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_3538.wav", "onoffCaption": "duck quacking at 2.278-4.278, 5.652-7.652", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_3605.wav", "onoffCaption": "dog barking at 0.121-2.121", "frequencyCaption": "dog barking one times"} +{"filepath": "data/multi_event_train/syn_3650.wav", "onoffCaption": "whistling at 0.406-2.415, 3.048-5.057, 6.24-8.249 and spraying at 4.176-4.826, 5.812-6.876", "frequencyCaption": "whistling three times and spraying two times"} +{"filepath": "data/multi_event_train/syn_3751.wav", "onoffCaption": "gunshot at 0.272-2.272, 4.238-6.331, 7.459-9.459", "frequencyCaption": "gunshot three times"} +{"filepath": "data/multi_event_train/syn_3927.wav", "onoffCaption": "door slamming at 0.028-2.028", "frequencyCaption": "door slamming one times"} +{"filepath": "data/multi_event_train/syn_19.wav", "onoffCaption": "gunshot at 2.917-4.917", "frequencyCaption": "gunshot one times"} +{"filepath": "data/multi_event_train/syn_256.wav", "onoffCaption": "sheep goat bleating at 1.495-3.495, 4.567-6.567, 7.123-9.123", "frequencyCaption": "sheep goat bleating three times"} +{"filepath": "data/multi_event_train/syn_426.wav", "onoffCaption": "woman laughing at 2.649-4.932, 5.692-7.975", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_432.wav", "onoffCaption": "door knocking at 0.055-3.43 and spraying at 7.599-8.2", "frequencyCaption": "door knocking one times and spraying one times"} +{"filepath": "data/multi_event_train/syn_473.wav", "onoffCaption": "gunshot at 0.698-2.698, 3.947-5.947, 6.742-8.742 and explosion at 4.807-7.863", "frequencyCaption": "gunshot three times and explosion one times"} +{"filepath": "data/multi_event_train/syn_496.wav", "onoffCaption": "car horn honking at 2.094-5.313, 5.949-9.168", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_529.wav", "onoffCaption": "door knocking at 3.432-5.592", "frequencyCaption": "door knocking one times"} +{"filepath": "data/multi_event_train/syn_568.wav", "onoffCaption": "dog barking at 0.403-2.403, 4.637-6.637", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_train/syn_600.wav", "onoffCaption": "train horn at 2.315-8.03", "frequencyCaption": "train horn one times"} +{"filepath": "data/multi_event_train/syn_641.wav", "onoffCaption": "cow mooing at 0.533-3.515, 4.376-7.358 and duck quacking at 3.39-5.39 and whistling at 5.257-8.232", "frequencyCaption": "cow mooing two times and duck quacking one times and whistling one times"} +{"filepath": "data/multi_event_train/syn_655.wav", "onoffCaption": "spraying at 0.001-0.852", "frequencyCaption": "spraying one times"} +{"filepath": "data/multi_event_train/syn_839.wav", "onoffCaption": "door slamming at 2.325-3.442, 4.75-5.867, 6.927-8.044 and sneeze at 3.071-4.359, 4.959-6.247, 7.169-8.457", "frequencyCaption": "door slamming three times and sneeze three times"} +{"filepath": "data/multi_event_train/syn_889.wav", "onoffCaption": "sheep goat bleating at 3.745-5.745, 7.04-9.04", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_922.wav", "onoffCaption": "door knocking at 2.749-5.596, 6.697-9.593", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_936.wav", "onoffCaption": "gunshot at 2.615-4.656, 5.659-7.7", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_977.wav", "onoffCaption": "car horn honking at 0.024-3.199, 4.429-7.604 and door slamming at 1.132-1.983, 2.772-3.623, 5.279-6.13 and spraying at 3.137-3.918, 5.76-6.541", "frequencyCaption": "car horn honking two times and door slamming three times and spraying two times"} +{"filepath": "data/multi_event_train/syn_992.wav", "onoffCaption": "train horn at 0.88-6.595, 7.622-10.0", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_1019.wav", "onoffCaption": "door slamming at 0.69-3.664 and duck quacking at 7.922-9.922", "frequencyCaption": "door slamming one times and duck quacking one times"} +{"filepath": "data/multi_event_train/syn_1058.wav", "onoffCaption": "door slamming at 0.388-2.388, 3.626-5.626 and explosion at 2.533-5.286, 6.989-9.497", "frequencyCaption": "door slamming two times and explosion two times"} +{"filepath": "data/multi_event_train/syn_1102.wav", "onoffCaption": "gunshot at 3.478-5.608, 6.53-8.77", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_1116.wav", "onoffCaption": "door slamming at 2.112-4.24, 5.163-7.291", "frequencyCaption": "door slamming two times"} +{"filepath": "data/multi_event_train/syn_1143.wav", "onoffCaption": "sneeze at 2.226-4.185, 5.315-7.274, 7.89-9.849", "frequencyCaption": "sneeze three times"} +{"filepath": "data/multi_event_train/syn_1330.wav", "onoffCaption": "burping belching at 3.154-6.154", "frequencyCaption": "burping belching one times"} +{"filepath": "data/multi_event_train/syn_1515.wav", "onoffCaption": "spraying at 0.068-0.669 and woman laughing at 2.812-4.904, 6.245-8.327", "frequencyCaption": "spraying one times and woman laughing two times"} +{"filepath": "data/multi_event_train/syn_1540.wav", "onoffCaption": "sheep goat bleating at 1.643-3.643, 4.39-6.39, 7.124-9.124", "frequencyCaption": "sheep goat bleating three times"} +{"filepath": "data/multi_event_train/syn_1554.wav", "onoffCaption": "cat meowing at 2.76-3.781 and door slamming at 5.94-7.079", "frequencyCaption": "cat meowing one times and door slamming one times"} +{"filepath": "data/multi_event_train/syn_1698.wav", "onoffCaption": "spraying at 3.088-5.216", "frequencyCaption": "spraying one times"} +{"filepath": "data/multi_event_train/syn_1766.wav", "onoffCaption": "dog barking at 2.815-4.815, 5.782-8.22", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_train/syn_1783.wav", "onoffCaption": "door knocking at 2.091-5.644, 6.754-10.0", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_1811.wav", "onoffCaption": "duck quacking at 0.315-2.315, 3.397-5.397 and dog barking at 0.403-2.403, 3.261-5.261, 6.58-8.58", "frequencyCaption": "duck quacking two times and dog barking three times"} +{"filepath": "data/multi_event_train/syn_1844.wav", "onoffCaption": "train horn at 2.463-6.903", "frequencyCaption": "train horn one times"} +{"filepath": "data/multi_event_train/syn_1850.wav", "onoffCaption": "sheep goat bleating at 0.308-2.308, 3.201-5.201, 7.564-10.0", "frequencyCaption": "sheep goat bleating three times"} +{"filepath": "data/multi_event_train/syn_3121.wav", "onoffCaption": "burping belching at 0.675-4.698, 6.615-10.0", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_3160.wav", "onoffCaption": "burping belching at 0.221-2.344, 4.641-6.672, 7.976-10.0", "frequencyCaption": "burping belching three times"} +{"filepath": "data/multi_event_train/syn_3174.wav", "onoffCaption": "cow mooing at 0.128-3.138, 4.765-7.734 and gunshot at 0.227-2.227, 2.757-4.85, 6.127-8.127 and tapping clicking clanking at 5.439-8.879", "frequencyCaption": "cow mooing two times and gunshot three times and tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_3185.wav", "onoffCaption": "gunshot at 1.368-3.368, 4.358-6.358", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_3191.wav", "onoffCaption": "explosion at 3.122-6.122", "frequencyCaption": "explosion one times"} +{"filepath": "data/multi_event_train/syn_3249.wav", "onoffCaption": "car horn honking at 0.422-2.887, 3.961-6.426", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_3352.wav", "onoffCaption": "dog barking at 0.73-4.05, 6.487-9.807", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_train/syn_3439.wav", "onoffCaption": "sneeze at 0.638-5.638, 6.306-10.0", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_3489.wav", "onoffCaption": "burping belching at 0.191-2.952 and car horn honking at 6.918-10.0", "frequencyCaption": "burping belching one times and car horn honking one times"} +{"filepath": "data/multi_event_train/syn_3522.wav", "onoffCaption": "tapping clicking clanking at 2.936-6.376 and dog barking at 3.613-5.613 and spraying at 4.705-6.467, 7.598-9.293", "frequencyCaption": "tapping clicking clanking one times and dog barking one times and spraying two times"} +{"filepath": "data/multi_event_train/syn_3536.wav", "onoffCaption": "gunshot at 1.616-3.616, 5.17-7.17", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_3577.wav", "onoffCaption": "car horn honking at 0.724-5.631, 6.312-10.0", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_3592.wav", "onoffCaption": "sheep goat bleating at 0.138-3.218, 3.865-5.865 and car horn honking at 2.443-6.765", "frequencyCaption": "sheep goat bleating two times and car horn honking one times"} +{"filepath": "data/multi_event_train/syn_3704.wav", "onoffCaption": "sneeze at 1.924-3.027, 4.925-6.028 and cow mooing at 7.95-10.0", "frequencyCaption": "sneeze two times and cow mooing one times"} +{"filepath": "data/multi_event_train/syn_3826.wav", "onoffCaption": "spraying at 0.306-2.001 and cat meowing at 6.478-10.0", "frequencyCaption": "spraying one times and cat meowing one times"} +{"filepath": "data/multi_event_train/syn_3832.wav", "onoffCaption": "burping belching at 0.032-3.032 and explosion at 5.43-10.0", "frequencyCaption": "burping belching one times and explosion one times"} +{"filepath": "data/multi_event_train/syn_3873.wav", "onoffCaption": "thump thud at 0.405-4.855, 5.664-10.0", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_3896.wav", "onoffCaption": "gunshot at 0.419-2.419, 3.141-5.141, 6.145-8.145 and burping belching at 1.956-5.979", "frequencyCaption": "gunshot three times and burping belching one times"} +{"filepath": "data/multi_event_train/syn_3929.wav", "onoffCaption": "burping belching at 2.342-4.377, 5.498-7.533", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_3968.wav", "onoffCaption": "thump thud at 1.08-5.53, 7.974-10.0", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_58.wav", "onoffCaption": "tapping clicking clanking at 2.24-5.68, 6.942-10.0", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_217.wav", "onoffCaption": "duck quacking at 1.061-3.061, 4.757-6.757, 7.312-9.312", "frequencyCaption": "duck quacking three times"} +{"filepath": "data/multi_event_train/syn_242.wav", "onoffCaption": "woman laughing at 0.008-2.233, 3.414-5.519, 7.665-10.0", "frequencyCaption": "woman laughing three times"} +{"filepath": "data/multi_event_train/syn_258.wav", "onoffCaption": "door knocking at 2.89-6.506, 7.322-9.482", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_343.wav", "onoffCaption": "duck quacking at 3.426-5.426, 6.592-8.592", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_359.wav", "onoffCaption": "cow mooing at 0.104-3.086 and woman laughing at 5.605-10.0", "frequencyCaption": "cow mooing one times and woman laughing one times"} +{"filepath": "data/multi_event_train/syn_467.wav", "onoffCaption": "thump thud at 1.125-3.896, 5.7-8.471", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_482.wav", "onoffCaption": "door knocking at 2.099-4.723, 6.801-9.425", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_566.wav", "onoffCaption": "dog barking at 2.388-4.388, 6.567-8.567", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_train/syn_599.wav", "onoffCaption": "whistling at 0.26-5.435, 6.002-8.011", "frequencyCaption": "whistling two times"} +{"filepath": "data/multi_event_train/syn_614.wav", "onoffCaption": "woman laughing at 0.863-2.968", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/multi_event_train/syn_715.wav", "onoffCaption": "woman laughing at 2.516-6.568", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/multi_event_train/syn_878.wav", "onoffCaption": "spraying at 0.47-0.992 and whistling at 2.956-5.831", "frequencyCaption": "spraying one times and whistling one times"} +{"filepath": "data/multi_event_train/syn_963.wav", "onoffCaption": "cow mooing at 0.407-3.705, 4.406-7.704", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_986.wav", "onoffCaption": "duck quacking at 2.219-4.219, 5.442-7.442", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_1157.wav", "onoffCaption": "car horn honking at 1.378-4.553", "frequencyCaption": "car horn honking one times"} +{"filepath": "data/multi_event_train/syn_1225.wav", "onoffCaption": "train horn at 2.208-5.978", "frequencyCaption": "train horn one times"} +{"filepath": "data/multi_event_train/syn_1324.wav", "onoffCaption": "duck quacking at 0.456-2.456, 3.38-5.38, 6.356-8.356 and thump thud at 2.341-4.68, 5.337-7.676", "frequencyCaption": "duck quacking three times and thump thud two times"} +{"filepath": "data/multi_event_train/syn_1371.wav", "onoffCaption": "gunshot at 0.281-2.411 and whistling at 4.833-9.317", "frequencyCaption": "gunshot one times and whistling one times"} +{"filepath": "data/multi_event_train/syn_1394.wav", "onoffCaption": "sheep goat bleating at 0.453-2.453, 3.709-5.709", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_1501.wav", "onoffCaption": "door slamming at 3.171-5.629", "frequencyCaption": "door slamming one times"} +{"filepath": "data/multi_event_train/syn_1669.wav", "onoffCaption": "door slamming at 0.076-0.881, 1.714-2.519 and car horn honking at 5.815-8.28", "frequencyCaption": "door slamming two times and car horn honking one times"} +{"filepath": "data/multi_event_train/syn_1673.wav", "onoffCaption": "train horn at 2.681-5.881, 7.622-10.0", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_1696.wav", "onoffCaption": "woman laughing at 2.735-9.469", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/multi_event_train/syn_1727.wav", "onoffCaption": "woman laughing at 2.101-4.295, 5.621-7.848 and whistling at 2.263-5.138", "frequencyCaption": "woman laughing two times and whistling one times"} +{"filepath": "data/multi_event_train/syn_1768.wav", "onoffCaption": "train horn at 0.389-2.789", "frequencyCaption": "train horn one times"} +{"filepath": "data/multi_event_train/syn_1772.wav", "onoffCaption": "cow mooing at 3.636-6.934", "frequencyCaption": "cow mooing one times"} +{"filepath": "data/multi_event_train/syn_1797.wav", "onoffCaption": "door slamming at 0.328-2.456, 3.815-5.943", "frequencyCaption": "door slamming two times"} +{"filepath": "data/multi_event_train/syn_1805.wav", "onoffCaption": "gunshot at 3.296-5.296", "frequencyCaption": "gunshot one times"} +{"filepath": "data/multi_event_train/syn_3135.wav", "onoffCaption": "car horn honking at 2.114-6.436, 7.636-10.0", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_3208.wav", "onoffCaption": "door slamming at 3.143-4.121, 5.763-6.741, 7.247-8.225", "frequencyCaption": "door slamming three times"} +{"filepath": "data/multi_event_train/syn_3247.wav", "onoffCaption": "spraying at 1.987-2.487, 4.039-4.539 and door slamming at 7.66-9.425", "frequencyCaption": "spraying two times and door slamming one times"} +{"filepath": "data/multi_event_train/syn_3313.wav", "onoffCaption": "cow mooing at 0.136-4.565, 6.674-10.0", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_3346.wav", "onoffCaption": "tapping clicking clanking at 2.996-6.436, 7.718-10.0", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_3478.wav", "onoffCaption": "door slamming at 2.783-5.757, 6.792-8.988", "frequencyCaption": "door slamming two times"} +{"filepath": "data/multi_event_train/syn_3563.wav", "onoffCaption": "explosion at 0.035-2.763, 3.269-5.997", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_3586.wav", "onoffCaption": "dog barking at 2.001-4.001, 6.436-8.436", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_train/syn_3611.wav", "onoffCaption": "burping belching at 0.545-3.545", "frequencyCaption": "burping belching one times"} +{"filepath": "data/multi_event_train/syn_3710.wav", "onoffCaption": "sheep goat bleating at 0.418-2.418, 3.248-5.248, 6.327-8.327", "frequencyCaption": "sheep goat bleating three times"} +{"filepath": "data/multi_event_train/syn_3745.wav", "onoffCaption": "whistling at 2.291-10.0", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_3867.wav", "onoffCaption": "burping belching at 1.62-4.381, 6.604-9.365", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_3882.wav", "onoffCaption": "woman laughing at 0.645-3.699 and sneeze at 6.075-7.167, 7.708-8.8", "frequencyCaption": "woman laughing one times and sneeze two times"} +{"filepath": "data/multi_event_train/syn_3966.wav", "onoffCaption": "thump thud at 0.699-3.746, 6.205-9.122", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_3999.wav", "onoffCaption": "tapping clicking clanking at 2.904-6.344, 7.265-10.0", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_17.wav", "onoffCaption": "cow mooing at 0.467-5.447, 6.568-8.92", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_42.wav", "onoffCaption": "sheep goat bleating at 3.727-5.727", "frequencyCaption": "sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_130.wav", "onoffCaption": "thump thud at 0.503-3.274, 5.351-8.122 and cow mooing at 2.094-6.523", "frequencyCaption": "thump thud two times and cow mooing one times"} +{"filepath": "data/multi_event_train/syn_165.wav", "onoffCaption": "explosion at 0.033-3.033, 4.03-6.902", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_180.wav", "onoffCaption": "door knocking at 1.887-4.199, 5.442-7.754", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_316.wav", "onoffCaption": "cow mooing at 0.186-4.615 and gunshot at 6.965-8.965", "frequencyCaption": "cow mooing one times and gunshot one times"} +{"filepath": "data/multi_event_train/syn_498.wav", "onoffCaption": "woman laughing at 0.389-3.184 and spraying at 0.479-1.736 and sheep goat bleating at 6.814-8.814", "frequencyCaption": "woman laughing one times and spraying one times and sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_583.wav", "onoffCaption": "cat meowing at 2.992-4.609, 5.325-6.942, 8.008-9.625", "frequencyCaption": "cat meowing three times"} +{"filepath": "data/multi_event_train/syn_740.wav", "onoffCaption": "woman laughing at 0.233-2.933, 4.001-6.701, 7.57-10.0", "frequencyCaption": "woman laughing three times"} +{"filepath": "data/multi_event_train/syn_837.wav", "onoffCaption": "tapping clicking clanking at 1.154-4.594", "frequencyCaption": "tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_862.wav", "onoffCaption": "cow mooing at 0.176-3.158", "frequencyCaption": "cow mooing one times"} +{"filepath": "data/multi_event_train/syn_887.wav", "onoffCaption": "gunshot at 1.975-3.975 and explosion at 6.851-8.853", "frequencyCaption": "gunshot one times and explosion one times"} +{"filepath": "data/multi_event_train/syn_979.wav", "onoffCaption": "cow mooing at 0.418-5.398, 6.857-10.0", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_1003.wav", "onoffCaption": "door knocking at 2.873-7.575", "frequencyCaption": "door knocking one times"} +{"filepath": "data/multi_event_train/syn_1056.wav", "onoffCaption": "spraying at 0.772-3.791, 4.499-5.126, 6.351-7.432 and door knocking at 2.666-6.816", "frequencyCaption": "spraying three times and door knocking one times"} +{"filepath": "data/multi_event_train/syn_1118.wav", "onoffCaption": "sneeze at 1.98-4.219, 5.391-8.105", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_1270.wav", "onoffCaption": "explosion at 0.304-3.057, 5.349-7.61", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_1295.wav", "onoffCaption": "tapping clicking clanking at 0.66-4.1, 5.72-8.425", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_1400.wav", "onoffCaption": "cat meowing at 0.025-1.046, 1.59-4.35", "frequencyCaption": "cat meowing two times"} +{"filepath": "data/multi_event_train/syn_1455.wav", "onoffCaption": "tapping clicking clanking at 0.635-4.075, 5.126-8.566 and car horn honking at 1.938-6.338", "frequencyCaption": "tapping clicking clanking two times and car horn honking one times"} +{"filepath": "data/multi_event_train/syn_1626.wav", "onoffCaption": "tapping clicking clanking at 2.667-6.107, 7.915-10.0", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_1904.wav", "onoffCaption": "gunshot at 0.344-2.344, 3.876-5.876", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_1951.wav", "onoffCaption": "thump thud at 0.001-2.501, 3.395-5.623", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_3034.wav", "onoffCaption": "thump thud at 2.608-5.108, 6.01-8.51", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_3061.wav", "onoffCaption": "cow mooing at 1.872-4.841", "frequencyCaption": "cow mooing one times"} +{"filepath": "data/multi_event_train/syn_3084.wav", "onoffCaption": "whistling at 0.882-3.757, 4.534-6.763 and cat meowing at 1.107-2.651, 3.757-5.023", "frequencyCaption": "whistling two times and cat meowing two times"} +{"filepath": "data/multi_event_train/syn_3212.wav", "onoffCaption": "explosion at 0.067-2.131", "frequencyCaption": "explosion one times"} +{"filepath": "data/multi_event_train/syn_3309.wav", "onoffCaption": "train horn at 0.142-4.142 and duck quacking at 1.183-3.183, 4.552-6.552 and door knocking at 2.065-4.377, 5.166-7.478", "frequencyCaption": "train horn one times and duck quacking two times and door knocking two times"} +{"filepath": "data/multi_event_train/syn_3437.wav", "onoffCaption": "train horn at 3.389-7.457", "frequencyCaption": "train horn one times"} +{"filepath": "data/multi_event_train/syn_3462.wav", "onoffCaption": "cat meowing at 1.262-4.503, 6.9-10.0 and spraying at 1.386-3.846 and duck quacking at 2.352-4.352", "frequencyCaption": "cat meowing two times and spraying one times and duck quacking one times"} +{"filepath": "data/multi_event_train/syn_3487.wav", "onoffCaption": "cow mooing at 0.381-3.391, 4.246-7.074 and car horn honking at 2.377-6.889", "frequencyCaption": "cow mooing two times and car horn honking one times"} +{"filepath": "data/multi_event_train/syn_3579.wav", "onoffCaption": "door knocking at 0.011-4.544, 5.239-9.772", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_3644.wav", "onoffCaption": "dog barking at 1.946-3.946, 4.723-6.723 and sneeze at 4.503-6.906, 7.706-10.0", "frequencyCaption": "dog barking two times and sneeze two times"} +{"filepath": "data/multi_event_train/syn_3898.wav", "onoffCaption": "explosion at 0.238-3.791", "frequencyCaption": "explosion one times"} +{"filepath": "data/multi_event_train/syn_3933.wav", "onoffCaption": "door slamming at 0.57-3.449, 4.57-6.085, 6.734-7.572", "frequencyCaption": "door slamming three times"} +{"filepath": "data/multi_event_train/syn_3983.wav", "onoffCaption": "sneeze at 1.528-3.652, 4.23-5.775, 6.779-9.439", "frequencyCaption": "sneeze three times"} +{"filepath": "data/multi_event_train/syn_66.wav", "onoffCaption": "sheep goat bleating at 3.755-5.755, 6.54-8.54", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_83.wav", "onoffCaption": "burping belching at 1.755-4.957, 6.169-8.93", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_114.wav", "onoffCaption": "thump thud at 0.273-3.32, 4.785-7.832 and whistling at 2.289-7.464", "frequencyCaption": "thump thud two times and whistling one times"} +{"filepath": "data/multi_event_train/syn_299.wav", "onoffCaption": "tapping clicking clanking at 2.854-6.294", "frequencyCaption": "tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_367.wav", "onoffCaption": "burping belching at 2.702-5.992", "frequencyCaption": "burping belching one times"} +{"filepath": "data/multi_event_train/syn_382.wav", "onoffCaption": "tapping clicking clanking at 3.22-6.66, 7.835-10.0", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_428.wav", "onoffCaption": "door knocking at 3.238-5.426, 6.557-8.745", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_459.wav", "onoffCaption": "door slamming at 0.017-1.27 and train horn at 0.31-4.31, 5.293-7.405 and duck quacking at 2.271-4.271, 5.79-7.79", "frequencyCaption": "door slamming one times and train horn two times and duck quacking two times"} +{"filepath": "data/multi_event_train/syn_533.wav", "onoffCaption": "train horn at 2.06-4.527, 5.17-7.637", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_542.wav", "onoffCaption": "door knocking at 2.58-4.932, 5.435-8.165", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_731.wav", "onoffCaption": "cow mooing at 1.42-4.718", "frequencyCaption": "cow mooing one times"} +{"filepath": "data/multi_event_train/syn_781.wav", "onoffCaption": "cat meowing at 2.711-3.924, 4.973-6.186, 7.019-8.232", "frequencyCaption": "cat meowing three times"} +{"filepath": "data/multi_event_train/syn_846.wav", "onoffCaption": "cow mooing at 2.238-6.667, 7.302-10.0", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_1072.wav", "onoffCaption": "train horn at 1.178-7.238", "frequencyCaption": "train horn one times"} +{"filepath": "data/multi_event_train/syn_1097.wav", "onoffCaption": "whistling at 0.339-4.823, 6.015-8.024", "frequencyCaption": "whistling two times"} +{"filepath": "data/multi_event_train/syn_1169.wav", "onoffCaption": "sneeze at 0.092-1.186, 1.806-2.9, 3.554-4.648 and cat meowing at 6.817-8.765", "frequencyCaption": "sneeze three times and cat meowing one times"} +{"filepath": "data/multi_event_train/syn_1201.wav", "onoffCaption": "train horn at 2.979-5.116, 5.771-7.908", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_1254.wav", "onoffCaption": "sheep goat bleating at 0.196-2.196, 3.225-5.225 and cow mooing at 7.435-10.0", "frequencyCaption": "sheep goat bleating two times and cow mooing one times"} +{"filepath": "data/multi_event_train/syn_1424.wav", "onoffCaption": "gunshot at 0.785-2.785, 3.691-5.691, 6.961-8.961", "frequencyCaption": "gunshot three times"} +{"filepath": "data/multi_event_train/syn_1602.wav", "onoffCaption": "explosion at 2.752-4.926, 6.126-8.3", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_1657.wav", "onoffCaption": "burping belching at 1.032-4.211 and spraying at 7.591-9.077", "frequencyCaption": "burping belching one times and spraying one times"} +{"filepath": "data/multi_event_train/syn_1719.wav", "onoffCaption": "car horn honking at 2.991-5.917, 7.165-9.757", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_1920.wav", "onoffCaption": "cat meowing at 2.121-5.151, 6.69-8.226", "frequencyCaption": "cat meowing two times"} +{"filepath": "data/multi_event_train/syn_3010.wav", "onoffCaption": "car horn honking at 0.242-3.896, 5.279-8.933", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_3236.wav", "onoffCaption": "burping belching at 1.875-5.875", "frequencyCaption": "burping belching one times"} +{"filepath": "data/multi_event_train/syn_3263.wav", "onoffCaption": "dog barking at 3.569-5.569, 6.486-8.486 and thump thud at 4.01-7.928", "frequencyCaption": "dog barking two times and thump thud one times"} +{"filepath": "data/multi_event_train/syn_3286.wav", "onoffCaption": "gunshot at 3.647-5.887, 7.673-9.913", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_3378.wav", "onoffCaption": "door knocking at 0.404-3.172, 5.052-7.899 and tapping clicking clanking at 3.784-7.224 and car horn honking at 6.433-8.433", "frequencyCaption": "door knocking two times and tapping clicking clanking one times and car horn honking one times"} +{"filepath": "data/multi_event_train/syn_3446.wav", "onoffCaption": "sheep goat bleating at 0.347-3.987, 5.155-8.795", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_3635.wav", "onoffCaption": "cat meowing at 0.466-2.01 and duck quacking at 5.427-7.427", "frequencyCaption": "cat meowing one times and duck quacking one times"} +{"filepath": "data/multi_event_train/syn_3660.wav", "onoffCaption": "tapping clicking clanking at 2.607-6.047, 6.693-10.0", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_3685.wav", "onoffCaption": "gunshot at 3.213-5.719", "frequencyCaption": "gunshot one times"} +{"filepath": "data/multi_event_train/syn_3828.wav", "onoffCaption": "dog barking at 3.288-5.288 and burping belching at 3.369-5.572, 7.868-10.0", "frequencyCaption": "dog barking one times and burping belching two times"} +{"filepath": "data/multi_event_train/syn_3859.wav", "onoffCaption": "spraying at 0.065-1.065, 2.246-3.246, 5.682-6.682", "frequencyCaption": "spraying three times"} +{"filepath": "data/multi_event_train/syn_3942.wav", "onoffCaption": "door knocking at 0.673-2.922, 4.302-6.551 and cow mooing at 3.207-6.189", "frequencyCaption": "door knocking two times and cow mooing one times"} +{"filepath": "data/multi_event_train/syn_29.wav", "onoffCaption": "tapping clicking clanking at 2.876-6.316, 7.077-10.0", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_33.wav", "onoffCaption": "sheep goat bleating at 0.232-2.232, 3.068-5.068, 5.691-7.691", "frequencyCaption": "sheep goat bleating three times"} +{"filepath": "data/multi_event_train/syn_141.wav", "onoffCaption": "dog barking at 2.679-4.679, 5.988-7.988", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_train/syn_229.wav", "onoffCaption": "cow mooing at 0.504-3.802 and sheep goat bleating at 6.873-10.0", "frequencyCaption": "cow mooing one times and sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_332.wav", "onoffCaption": "explosion at 2.929-5.658, 6.447-9.447", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_416.wav", "onoffCaption": "cat meowing at 1.978-3.873, 4.787-6.682", "frequencyCaption": "cat meowing two times"} +{"filepath": "data/multi_event_train/syn_517.wav", "onoffCaption": "woman laughing at 0.043-7.055 and sheep goat bleating at 1.034-3.034, 4.335-6.335", "frequencyCaption": "woman laughing one times and sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_665.wav", "onoffCaption": "thump thud at 0.333-3.104, 4.26-6.811", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_764.wav", "onoffCaption": "explosion at 2.603-5.332", "frequencyCaption": "explosion one times"} +{"filepath": "data/multi_event_train/syn_809.wav", "onoffCaption": "tapping clicking clanking at 0.597-4.037, 5.572-8.29", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_813.wav", "onoffCaption": "sneeze at 0.338-1.664, 3.859-6.726, 7.402-8.902", "frequencyCaption": "sneeze three times"} +{"filepath": "data/multi_event_train/syn_908.wav", "onoffCaption": "duck quacking at 2.231-4.231, 5.703-7.703", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_912.wav", "onoffCaption": "whistling at 0.37-5.87", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_1027.wav", "onoffCaption": "burping belching at 0.405-2.928 and spraying at 7.113-8.808", "frequencyCaption": "burping belching one times and spraying one times"} +{"filepath": "data/multi_event_train/syn_1126.wav", "onoffCaption": "cat meowing at 2.066-3.798, 4.339-6.071, 6.707-8.439", "frequencyCaption": "cat meowing three times"} +{"filepath": "data/multi_event_train/syn_1471.wav", "onoffCaption": "whistling at 2.484-5.459, 7.427-9.449", "frequencyCaption": "whistling two times"} +{"filepath": "data/multi_event_train/syn_1494.wav", "onoffCaption": "whistling at 0.825-3.7, 4.527-7.523", "frequencyCaption": "whistling two times"} +{"filepath": "data/multi_event_train/syn_1570.wav", "onoffCaption": "cow mooing at 0.068-3.078 and dog barking at 5.42-7.858", "frequencyCaption": "cow mooing one times and dog barking one times"} +{"filepath": "data/multi_event_train/syn_1595.wav", "onoffCaption": "door slamming at 1.444-4.444, 6.63-9.63", "frequencyCaption": "door slamming two times"} +{"filepath": "data/multi_event_train/syn_1874.wav", "onoffCaption": "gunshot at 0.023-2.023", "frequencyCaption": "gunshot one times"} +{"filepath": "data/multi_event_train/syn_1891.wav", "onoffCaption": "burping belching at 0.954-3.157 and tapping clicking clanking at 1.277-4.717, 6.507-9.947", "frequencyCaption": "burping belching one times and tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_1975.wav", "onoffCaption": "explosion at 1.831-4.831, 5.888-8.888 and spraying at 2.557-3.307, 4.647-5.514 and tapping clicking clanking at 4.125-7.565", "frequencyCaption": "explosion two times and spraying two times and tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_1990.wav", "onoffCaption": "door slamming at 0.013-1.186, 1.716-2.889, 3.808-4.981", "frequencyCaption": "door slamming three times"} +{"filepath": "data/multi_event_train/syn_3045.wav", "onoffCaption": "explosion at 2.17-4.49 and train horn at 6.537-10.0", "frequencyCaption": "explosion one times and train horn one times"} +{"filepath": "data/multi_event_train/syn_3144.wav", "onoffCaption": "gunshot at 0.205-2.205, 4.257-6.257", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_3409.wav", "onoffCaption": "door knocking at 0.161-2.382, 3.18-5.984", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_3413.wav", "onoffCaption": "explosion at 0.773-3.773, 4.786-7.786 and duck quacking at 1.458-3.458, 5.083-7.083", "frequencyCaption": "explosion two times and duck quacking two times"} +{"filepath": "data/multi_event_train/syn_3508.wav", "onoffCaption": "cow mooing at 2.491-5.789, 6.803-8.835", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_3512.wav", "onoffCaption": "explosion at 2.04-7.04 and thump thud at 2.667-7.042, 7.744-10.0", "frequencyCaption": "explosion one times and thump thud two times"} +{"filepath": "data/multi_event_train/syn_3816.wav", "onoffCaption": "train horn at 3.343-6.143, 7.821-10.0", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_3917.wav", "onoffCaption": "sneeze at 2.112-4.573", "frequencyCaption": "sneeze one times"} +{"filepath": "data/multi_event_train/syn_99.wav", "onoffCaption": "cow mooing at 0.562-3.86, 4.984-8.282", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_233.wav", "onoffCaption": "dog barking at 0.177-2.177 and woman laughing at 0.336-3.036, 4.006-6.644, 7.419-9.665", "frequencyCaption": "dog barking one times and woman laughing three times"} +{"filepath": "data/multi_event_train/syn_266.wav", "onoffCaption": "cow mooing at 1.671-4.681, 6.283-9.293 and woman laughing at 2.644-5.236, 5.829-7.91", "frequencyCaption": "cow mooing two times and woman laughing two times"} +{"filepath": "data/multi_event_train/syn_283.wav", "onoffCaption": "whistling at 0.42-3.295, 3.993-6.222, 7.259-9.502", "frequencyCaption": "whistling three times"} +{"filepath": "data/multi_event_train/syn_328.wav", "onoffCaption": "whistling at 1.879-7.054 and tapping clicking clanking at 3.661-7.101, 7.666-9.803", "frequencyCaption": "whistling one times and tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_398.wav", "onoffCaption": "sneeze at 2.719-4.832, 6.205-8.318", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_443.wav", "onoffCaption": "explosion at 0.593-5.593, 7.99-10.0", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_558.wav", "onoffCaption": "sheep goat bleating at 2.197-4.197, 4.74-6.74, 7.47-10.0 and door knocking at 2.662-6.215", "frequencyCaption": "sheep goat bleating three times and door knocking one times"} +{"filepath": "data/multi_event_train/syn_630.wav", "onoffCaption": "thump thud at 2.609-5.109, 6.484-8.984", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_662.wav", "onoffCaption": "gunshot at 1.884-3.884, 6.184-8.184", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_680.wav", "onoffCaption": "thump thud at 1.832-5.499, 6.258-9.925", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_779.wav", "onoffCaption": "thump thud at 2.493-4.832, 7.066-9.405", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_947.wav", "onoffCaption": "train horn at 0.341-2.821, 3.65-6.05", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_1068.wav", "onoffCaption": "car horn honking at 0.181-4.43, 6.693-10.0", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_1173.wav", "onoffCaption": "explosion at 0.221-2.314, 3.781-5.899", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_1196.wav", "onoffCaption": "sheep goat bleating at 1.568-3.568, 4.446-6.446", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_1300.wav", "onoffCaption": "sneeze at 0.212-1.458, 3.742-4.988 and dog barking at 1.234-3.234", "frequencyCaption": "sneeze two times and dog barking one times"} +{"filepath": "data/multi_event_train/syn_1355.wav", "onoffCaption": "cat meowing at 2.938-4.886, 6.198-8.146", "frequencyCaption": "cat meowing two times"} +{"filepath": "data/multi_event_train/syn_1525.wav", "onoffCaption": "train horn at 0.469-3.109, 4.904-7.77", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_1618.wav", "onoffCaption": "cat meowing at 3.514-4.535, 6.027-7.24, 8.265-9.575", "frequencyCaption": "cat meowing three times"} +{"filepath": "data/multi_event_train/syn_1703.wav", "onoffCaption": "burping belching at 2.136-7.737 and sheep goat bleating at 4.673-6.673, 7.794-9.794", "frequencyCaption": "burping belching one times and sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_1756.wav", "onoffCaption": "car horn honking at 1.746-4.211, 5.004-7.53", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_1821.wav", "onoffCaption": "door knocking at 0.277-4.979, 6.493-10.0", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_3111.wav", "onoffCaption": "thump thud at 1.355-5.805 and door slamming at 1.839-3.839, 4.949-6.862, 8.179-9.328", "frequencyCaption": "thump thud one times and door slamming three times"} +{"filepath": "data/multi_event_train/syn_3279.wav", "onoffCaption": "dog barking at 0.184-3.105 and door knocking at 0.409-2.63 and cat meowing at 5.215-6.236", "frequencyCaption": "dog barking one times and door knocking one times and cat meowing one times"} +{"filepath": "data/multi_event_train/syn_3337.wav", "onoffCaption": "dog barking at 0.337-2.337", "frequencyCaption": "dog barking one times"} +{"filepath": "data/multi_event_train/syn_3362.wav", "onoffCaption": "train horn at 0.443-2.58, 3.154-5.291, 7.352-9.489", "frequencyCaption": "train horn three times"} +{"filepath": "data/multi_event_train/syn_3387.wav", "onoffCaption": "sheep goat bleating at 1.837-3.837, 4.859-6.859, 7.452-9.452 and car horn honking at 3.359-6.578", "frequencyCaption": "sheep goat bleating three times and car horn honking one times"} +{"filepath": "data/multi_event_train/syn_3547.wav", "onoffCaption": "gunshot at 3.96-6.053, 7.128-9.128", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_3734.wav", "onoffCaption": "train horn at 2.954-5.434, 7.906-10.0", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_3761.wav", "onoffCaption": "tapping clicking clanking at 1.41-4.85, 6.821-10.0", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_3784.wav", "onoffCaption": "sneeze at 1.346-3.47", "frequencyCaption": "sneeze one times"} +{"filepath": "data/multi_event_train/syn_3843.wav", "onoffCaption": "woman laughing at 0.101-3.173 and spraying at 2.896-3.747", "frequencyCaption": "woman laughing one times and spraying one times"} +{"filepath": "data/multi_event_train/syn_3958.wav", "onoffCaption": "burping belching at 0.069-7.237", "frequencyCaption": "burping belching one times"} +{"filepath": "data/multi_event_train/syn_109.wav", "onoffCaption": "spraying at 3.713-4.297", "frequencyCaption": "spraying one times"} +{"filepath": "data/multi_event_train/syn_113.wav", "onoffCaption": "thump thud at 2.654-6.572", "frequencyCaption": "thump thud one times"} +{"filepath": "data/multi_event_train/syn_234.wav", "onoffCaption": "explosion at 3.255-6.255", "frequencyCaption": "explosion one times"} +{"filepath": "data/multi_event_train/syn_261.wav", "onoffCaption": "car horn honking at 0.263-2.776, 3.88-6.374, 7.098-9.563 and explosion at 0.45-2.452", "frequencyCaption": "car horn honking three times and explosion one times"} +{"filepath": "data/multi_event_train/syn_284.wav", "onoffCaption": "woman laughing at 2.194-4.894, 6.241-8.941 and explosion at 2.707-7.707 and tapping clicking clanking at 3.378-6.818", "frequencyCaption": "woman laughing two times and explosion one times and tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_411.wav", "onoffCaption": "door slamming at 2.769-4.002, 5.099-6.332", "frequencyCaption": "door slamming two times"} +{"filepath": "data/multi_event_train/syn_444.wav", "onoffCaption": "duck quacking at 0.65-2.65, 3.391-5.391, 6.599-8.599", "frequencyCaption": "duck quacking three times"} +{"filepath": "data/multi_event_train/syn_545.wav", "onoffCaption": "whistling at 1.186-4.161, 5.356-8.315", "frequencyCaption": "whistling two times"} +{"filepath": "data/multi_event_train/syn_637.wav", "onoffCaption": "door slamming at 2.817-4.817, 6.014-7.389", "frequencyCaption": "door slamming two times"} +{"filepath": "data/multi_event_train/syn_687.wav", "onoffCaption": "duck quacking at 0.504-2.504, 4.873-6.873 and sheep goat bleating at 5.253-7.253", "frequencyCaption": "duck quacking two times and sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_915.wav", "onoffCaption": "cat meowing at 2.392-5.296, 6.243-9.147", "frequencyCaption": "cat meowing two times"} +{"filepath": "data/multi_event_train/syn_940.wav", "onoffCaption": "cat meowing at 0.108-1.418, 3.144-4.715 and explosion at 2.253-4.427, 6.188-8.362", "frequencyCaption": "cat meowing two times and explosion two times"} +{"filepath": "data/multi_event_train/syn_1075.wav", "onoffCaption": "door slamming at 3.377-4.668, 7.131-8.27", "frequencyCaption": "door slamming two times"} +{"filepath": "data/multi_event_train/syn_1121.wav", "onoffCaption": "tapping clicking clanking at 0.164-3.604, 4.325-6.915", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_1174.wav", "onoffCaption": "burping belching at 2.91-9.59", "frequencyCaption": "burping belching one times"} +{"filepath": "data/multi_event_train/syn_1191.wav", "onoffCaption": "dog barking at 0.073-2.073, 2.957-4.957", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_train/syn_1249.wav", "onoffCaption": "thump thud at 0.104-4.479, 5.297-8.068", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_1307.wav", "onoffCaption": "whistling at 3.185-6.16, 7.3-9.569", "frequencyCaption": "whistling two times"} +{"filepath": "data/multi_event_train/syn_1352.wav", "onoffCaption": "sheep goat bleating at 0.42-2.42, 3.209-5.209, 5.845-7.845 and gunshot at 3.536-5.536", "frequencyCaption": "sheep goat bleating three times and gunshot one times"} +{"filepath": "data/multi_event_train/syn_1423.wav", "onoffCaption": "door slamming at 2.046-4.409 and cow mooing at 7.179-10.0", "frequencyCaption": "door slamming one times and cow mooing one times"} +{"filepath": "data/multi_event_train/syn_1439.wav", "onoffCaption": "thump thud at 1.856-6.306", "frequencyCaption": "thump thud one times"} +{"filepath": "data/multi_event_train/syn_1489.wav", "onoffCaption": "door knocking at 0.208-3.976, 5.205-8.973", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_1522.wav", "onoffCaption": "burping belching at 0.811-3.811, 5.905-8.428 and cat meowing at 0.94-4.285", "frequencyCaption": "burping belching two times and cat meowing one times"} +{"filepath": "data/multi_event_train/syn_1577.wav", "onoffCaption": "dog barking at 2.246-4.246", "frequencyCaption": "dog barking one times"} +{"filepath": "data/multi_event_train/syn_1592.wav", "onoffCaption": "tapping clicking clanking at 1.029-4.469 and woman laughing at 6.167-9.448", "frequencyCaption": "tapping clicking clanking one times and woman laughing one times"} +{"filepath": "data/multi_event_train/syn_1704.wav", "onoffCaption": "door slamming at 3.323-4.576, 5.335-6.588 and cow mooing at 3.682-6.651", "frequencyCaption": "door slamming two times and cow mooing one times"} +{"filepath": "data/multi_event_train/syn_1751.wav", "onoffCaption": "spraying at 0.112-1.196, 1.83-2.914 and duck quacking at 1.325-3.325 and train horn at 1.764-4.438", "frequencyCaption": "spraying two times and duck quacking one times and train horn one times"} +{"filepath": "data/multi_event_train/syn_1826.wav", "onoffCaption": "explosion at 2.068-5.068 and cow mooing at 7.619-10.0", "frequencyCaption": "explosion one times and cow mooing one times"} +{"filepath": "data/multi_event_train/syn_1873.wav", "onoffCaption": "spraying at 0.0-0.867, 1.694-2.561", "frequencyCaption": "spraying two times"} +{"filepath": "data/multi_event_train/syn_1896.wav", "onoffCaption": "cat meowing at 0.271-1.826 and duck quacking at 5.576-7.576", "frequencyCaption": "cat meowing one times and duck quacking one times"} +{"filepath": "data/multi_event_train/syn_1968.wav", "onoffCaption": "duck quacking at 0.499-2.499, 4.923-6.923", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_3058.wav", "onoffCaption": "woman laughing at 0.114-6.848, 7.984-10.0", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_3116.wav", "onoffCaption": "cat meowing at 0.504-1.516, 3.439-4.451, 5.303-6.315 and train horn at 0.902-4.102 and duck quacking at 2.916-4.916", "frequencyCaption": "cat meowing three times and train horn one times and duck quacking one times"} +{"filepath": "data/multi_event_train/syn_3143.wav", "onoffCaption": "duck quacking at 1.039-3.039, 4.428-6.428 and sneeze at 3.454-5.767, 7.844-10.0", "frequencyCaption": "duck quacking two times and sneeze two times"} +{"filepath": "data/multi_event_train/syn_3330.wav", "onoffCaption": "thump thud at 1.915-6.365", "frequencyCaption": "thump thud one times"} +{"filepath": "data/multi_event_train/syn_3365.wav", "onoffCaption": "burping belching at 2.845-6.351, 7.13-9.237 and cow mooing at 6.748-9.717", "frequencyCaption": "burping belching two times and cow mooing one times"} +{"filepath": "data/multi_event_train/syn_3380.wav", "onoffCaption": "tapping clicking clanking at 0.218-3.658, 4.449-7.889 and burping belching at 3.457-5.564, 7.966-10.0", "frequencyCaption": "tapping clicking clanking two times and burping belching two times"} +{"filepath": "data/multi_event_train/syn_3515.wav", "onoffCaption": "car horn honking at 0.091-2.091, 3.107-5.559", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_3540.wav", "onoffCaption": "burping belching at 2.061-5.561", "frequencyCaption": "burping belching one times"} +{"filepath": "data/multi_event_train/syn_3628.wav", "onoffCaption": "door slamming at 0.036-1.551, 2.263-3.402, 3.906-6.623", "frequencyCaption": "door slamming three times"} +{"filepath": "data/multi_event_train/syn_3698.wav", "onoffCaption": "car horn honking at 0.33-3.243, 4.313-6.313 and door slamming at 8.776-9.276", "frequencyCaption": "car horn honking two times and door slamming one times"} +{"filepath": "data/multi_event_train/syn_3733.wav", "onoffCaption": "whistling at 1.749-10.0", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_3766.wav", "onoffCaption": "door slamming at 3.189-4.208, 5.391-7.391, 8.414-9.929", "frequencyCaption": "door slamming three times"} +{"filepath": "data/multi_event_train/syn_3783.wav", "onoffCaption": "woman laughing at 0.348-2.767, 4.374-6.793", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_3811.wav", "onoffCaption": "cow mooing at 0.547-3.529 and dog barking at 6.774-8.774", "frequencyCaption": "cow mooing one times and dog barking one times"} +{"filepath": "data/multi_event_train/syn_3844.wav", "onoffCaption": "duck quacking at 0.575-2.575, 3.754-5.754 and spraying at 2.908-4.083, 5.036-6.211, 7.264-8.439", "frequencyCaption": "duck quacking two times and spraying three times"} +{"filepath": "data/multi_event_train/syn_3945.wav", "onoffCaption": "burping belching at 2.053-8.14 and door knocking at 3.198-6.254", "frequencyCaption": "burping belching one times and door knocking one times"} +{"filepath": "data/multi_event_train/syn_34.wav", "onoffCaption": "train horn at 1.481-5.549, 7.631-9.823", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_61.wav", "onoffCaption": "duck quacking at 0.2-2.2", "frequencyCaption": "duck quacking one times"} +{"filepath": "data/multi_event_train/syn_84.wav", "onoffCaption": "car horn honking at 0.489-4.143, 5.186-8.038", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_137.wav", "onoffCaption": "door slamming at 0.068-2.849", "frequencyCaption": "door slamming one times"} +{"filepath": "data/multi_event_train/syn_146.wav", "onoffCaption": "tapping clicking clanking at 1.273-4.713, 6.496-9.936 and spraying at 1.889-3.146, 3.685-4.536, 5.216-5.8", "frequencyCaption": "tapping clicking clanking two times and spraying three times"} +{"filepath": "data/multi_event_train/syn_335.wav", "onoffCaption": "whistling at 0.294-8.679", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_360.wav", "onoffCaption": "gunshot at 2.465-4.971, 6.528-9.034", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_385.wav", "onoffCaption": "cow mooing at 3.411-6.421", "frequencyCaption": "cow mooing one times"} +{"filepath": "data/multi_event_train/syn_510.wav", "onoffCaption": "whistling at 0.347-10.0", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_561.wav", "onoffCaption": "door slamming at 0.024-0.964, 2.21-3.975 and thump thud at 6.63-9.13", "frequencyCaption": "door slamming two times and thump thud one times"} +{"filepath": "data/multi_event_train/syn_678.wav", "onoffCaption": "burping belching at 0.375-3.554, 4.177-6.336", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_736.wav", "onoffCaption": "cat meowing at 1.867-2.877, 3.724-5.295", "frequencyCaption": "cat meowing two times"} +{"filepath": "data/multi_event_train/syn_763.wav", "onoffCaption": "sheep goat bleating at 2.894-4.894 and cat meowing at 3.637-4.649", "frequencyCaption": "sheep goat bleating one times and cat meowing one times"} +{"filepath": "data/multi_event_train/syn_786.wav", "onoffCaption": "door slamming at 3.21-4.734", "frequencyCaption": "door slamming one times"} +{"filepath": "data/multi_event_train/syn_814.wav", "onoffCaption": "spraying at 3.628-4.378, 5.721-6.471", "frequencyCaption": "spraying two times"} +{"filepath": "data/multi_event_train/syn_841.wav", "onoffCaption": "train horn at 3.28-7.04", "frequencyCaption": "train horn one times"} +{"filepath": "data/multi_event_train/syn_1020.wav", "onoffCaption": "sneeze at 2.574-6.63, 7.459-10.0", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_1090.wav", "onoffCaption": "door knocking at 3.538-5.602, 7.279-9.343", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_1206.wav", "onoffCaption": "sheep goat bleating at 1.226-3.226", "frequencyCaption": "sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_1253.wav", "onoffCaption": "dog barking at 2.277-4.277, 6.028-8.028", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_train/syn_1348.wav", "onoffCaption": "car horn honking at 3.545-7.945", "frequencyCaption": "car horn honking one times"} +{"filepath": "data/multi_event_train/syn_1476.wav", "onoffCaption": "cow mooing at 1.587-4.569, 5.899-8.881", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_1493.wav", "onoffCaption": "tapping clicking clanking at 0.435-3.875, 5.267-7.646 and spraying at 1.453-2.08", "frequencyCaption": "tapping clicking clanking two times and spraying one times"} +{"filepath": "data/multi_event_train/syn_1538.wav", "onoffCaption": "door knocking at 0.508-2.91, 5.169-7.571", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_1588.wav", "onoffCaption": "cow mooing at 0.188-3.17, 5.076-8.058", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_1605.wav", "onoffCaption": "spraying at 3.885-5.58, 7.472-8.556", "frequencyCaption": "spraying two times"} +{"filepath": "data/multi_event_train/syn_1650.wav", "onoffCaption": "door knocking at 0.332-3.388, 4.519-7.366", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_1869.wav", "onoffCaption": "door knocking at 0.021-2.148, 4.194-6.321, 7.719-9.846", "frequencyCaption": "door knocking three times"} +{"filepath": "data/multi_event_train/syn_1927.wav", "onoffCaption": "duck quacking at 0.096-2.096, 3.435-5.435", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_1972.wav", "onoffCaption": "train horn at 0.413-4.853", "frequencyCaption": "train horn one times"} +{"filepath": "data/multi_event_train/syn_1997.wav", "onoffCaption": "spraying at 1.763-3.02 and sheep goat bleating at 4.59-6.59", "frequencyCaption": "spraying one times and sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_3017.wav", "onoffCaption": "duck quacking at 0.399-2.399 and explosion at 0.665-5.665 and cat meowing at 7.386-8.386", "frequencyCaption": "duck quacking one times and explosion one times and cat meowing one times"} +{"filepath": "data/multi_event_train/syn_3042.wav", "onoffCaption": "train horn at 0.39-8.59 and spraying at 2.788-3.692, 4.267-4.836, 5.639-6.147", "frequencyCaption": "train horn one times and spraying three times"} +{"filepath": "data/multi_event_train/syn_3159.wav", "onoffCaption": "tapping clicking clanking at 0.107-3.547", "frequencyCaption": "tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_3231.wav", "onoffCaption": "burping belching at 0.186-5.787, 7.524-9.555 and door slamming at 2.231-3.996", "frequencyCaption": "burping belching two times and door slamming one times"} +{"filepath": "data/multi_event_train/syn_3264.wav", "onoffCaption": "whistling at 2.172-5.047, 6.058-8.933", "frequencyCaption": "whistling two times"} +{"filepath": "data/multi_event_train/syn_3281.wav", "onoffCaption": "burping belching at 0.199-3.639, 5.384-7.667 and cat meowing at 4.673-6.568, 7.795-9.69", "frequencyCaption": "burping belching two times and cat meowing two times"} +{"filepath": "data/multi_event_train/syn_3414.wav", "onoffCaption": "thump thud at 3.291-5.791, 7.021-9.521", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_3441.wav", "onoffCaption": "spraying at 0.092-2.552, 4.511-5.161", "frequencyCaption": "spraying two times"} +{"filepath": "data/multi_event_train/syn_3632.wav", "onoffCaption": "thump thud at 1.952-4.452, 6.848-9.348", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_3667.wav", "onoffCaption": "tapping clicking clanking at 3.039-6.479, 7.37-9.609", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_3682.wav", "onoffCaption": "burping belching at 1.994-4.755, 5.399-8.16", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_3729.wav", "onoffCaption": "thump thud at 2.368-4.83", "frequencyCaption": "thump thud one times"} +{"filepath": "data/multi_event_train/syn_3799.wav", "onoffCaption": "woman laughing at 1.673-4.773, 5.556-8.656 and spraying at 2.654-3.738, 5.882-8.025", "frequencyCaption": "woman laughing two times and spraying two times"} +{"filepath": "data/multi_event_train/syn_3910.wav", "onoffCaption": "woman laughing at 0.775-3.194, 5.597-8.563", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_3961.wav", "onoffCaption": "sneeze at 0.001-1.255, 1.77-3.024 and explosion at 6.263-10.0", "frequencyCaption": "sneeze two times and explosion one times"} +{"filepath": "data/multi_event_train/syn_10.wav", "onoffCaption": "sneeze at 0.063-1.976, 2.801-4.714 and explosion at 7.673-10.0", "frequencyCaption": "sneeze two times and explosion one times"} +{"filepath": "data/multi_event_train/syn_45.wav", "onoffCaption": "cow mooing at 0.355-3.337, 4.617-7.599", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_162.wav", "onoffCaption": "woman laughing at 1.442-4.496", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/multi_event_train/syn_187.wav", "onoffCaption": "burping belching at 0.292-7.269 and spraying at 6.756-7.34", "frequencyCaption": "burping belching one times and spraying one times"} +{"filepath": "data/multi_event_train/syn_311.wav", "onoffCaption": "dog barking at 2.879-4.879, 5.486-7.486", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_train/syn_344.wav", "onoffCaption": "gunshot at 3.671-5.671, 7.994-9.994", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_534.wav", "onoffCaption": "cow mooing at 0.076-5.056 and door knocking at 0.179-2.559 and woman laughing at 5.2-7.281", "frequencyCaption": "cow mooing one times and door knocking one times and woman laughing one times"} +{"filepath": "data/multi_event_train/syn_584.wav", "onoffCaption": "gunshot at 0.179-2.179 and burping belching at 0.417-2.647 and door slamming at 5.169-7.169", "frequencyCaption": "gunshot one times and burping belching one times and door slamming one times"} +{"filepath": "data/multi_event_train/syn_609.wav", "onoffCaption": "train horn at 0.029-2.684, 3.212-5.405 and door slamming at 2.16-3.138", "frequencyCaption": "train horn two times and door slamming one times"} +{"filepath": "data/multi_event_train/syn_712.wav", "onoffCaption": "burping belching at 0.041-2.076, 2.801-4.836, 5.602-7.637", "frequencyCaption": "burping belching three times"} +{"filepath": "data/multi_event_train/syn_747.wav", "onoffCaption": "duck quacking at 0.484-2.484, 3.334-5.334, 5.954-7.954", "frequencyCaption": "duck quacking three times"} +{"filepath": "data/multi_event_train/syn_830.wav", "onoffCaption": "woman laughing at 0.458-2.574 and spraying at 0.798-1.32 and burping belching at 5.364-8.364", "frequencyCaption": "woman laughing one times and spraying one times and burping belching one times"} +{"filepath": "data/multi_event_train/syn_865.wav", "onoffCaption": "duck quacking at 2.366-4.366, 5.105-7.105 and cow mooing at 3.278-6.26", "frequencyCaption": "duck quacking two times and cow mooing one times"} +{"filepath": "data/multi_event_train/syn_880.wav", "onoffCaption": "car horn honking at 1.336-4.249 and spraying at 2.806-4.063", "frequencyCaption": "car horn honking one times and spraying one times"} +{"filepath": "data/multi_event_train/syn_1004.wav", "onoffCaption": "whistling at 3.77-10.0", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_1051.wav", "onoffCaption": "car horn honking at 2.969-5.316, 6.834-8.89", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_1222.wav", "onoffCaption": "burping belching at 3.161-7.497", "frequencyCaption": "burping belching one times"} +{"filepath": "data/multi_event_train/syn_1277.wav", "onoffCaption": "explosion at 0.578-5.578", "frequencyCaption": "explosion one times"} +{"filepath": "data/multi_event_train/syn_1292.wav", "onoffCaption": "dog barking at 0.479-2.479, 4.616-6.616", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_train/syn_1339.wav", "onoffCaption": "train horn at 1.157-7.217", "frequencyCaption": "train horn one times"} +{"filepath": "data/multi_event_train/syn_1389.wav", "onoffCaption": "cat meowing at 0.527-1.674, 2.783-3.93, 4.549-5.696 and gunshot at 2.167-4.167, 6.628-8.628", "frequencyCaption": "cat meowing three times and gunshot two times"} +{"filepath": "data/multi_event_train/syn_1407.wav", "onoffCaption": "gunshot at 2.758-4.758, 6.25-8.25", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_1452.wav", "onoffCaption": "thump thud at 0.107-4.557, 6.901-10.0 and gunshot at 0.577-2.596, 4.566-6.585", "frequencyCaption": "thump thud two times and gunshot two times"} +{"filepath": "data/multi_event_train/syn_1549.wav", "onoffCaption": "burping belching at 0.348-3.348 and door knocking at 2.068-4.445, 5.651-8.028", "frequencyCaption": "burping belching one times and door knocking two times"} +{"filepath": "data/multi_event_train/syn_1621.wav", "onoffCaption": "door knocking at 1.772-6.211", "frequencyCaption": "door knocking one times"} +{"filepath": "data/multi_event_train/syn_1674.wav", "onoffCaption": "thump thud at 3.223-5.685", "frequencyCaption": "thump thud one times"} +{"filepath": "data/multi_event_train/syn_1691.wav", "onoffCaption": "woman laughing at 0.558-2.752 and duck quacking at 1.9-3.9, 5.909-7.909", "frequencyCaption": "woman laughing one times and duck quacking two times"} +{"filepath": "data/multi_event_train/syn_1818.wav", "onoffCaption": "woman laughing at 1.357-4.452, 5.885-8.98", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_1903.wav", "onoffCaption": "spraying at 2.334-2.842, 3.905-4.413, 5.561-6.069", "frequencyCaption": "spraying three times"} +{"filepath": "data/multi_event_train/syn_1956.wav", "onoffCaption": "sheep goat bleating at 2.9-4.9 and dog barking at 2.959-4.959", "frequencyCaption": "sheep goat bleating one times and dog barking one times"} +{"filepath": "data/multi_event_train/syn_3033.wav", "onoffCaption": "thump thud at 1.152-5.527", "frequencyCaption": "thump thud one times"} +{"filepath": "data/multi_event_train/syn_3066.wav", "onoffCaption": "car horn honking at 0.514-3.027 and sheep goat bleating at 0.548-2.548", "frequencyCaption": "car horn honking one times and sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_3083.wav", "onoffCaption": "whistling at 3.592-10.0", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_3128.wav", "onoffCaption": "burping belching at 0.775-4.335 and tapping clicking clanking at 2.973-6.413", "frequencyCaption": "burping belching one times and tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_3198.wav", "onoffCaption": "cow mooing at 0.412-5.392, 6.283-10.0 and dog barking at 0.442-2.442, 3.318-5.318, 6.398-8.398", "frequencyCaption": "cow mooing two times and dog barking three times"} +{"filepath": "data/multi_event_train/syn_3215.wav", "onoffCaption": "dog barking at 0.091-2.091, 3.213-5.213, 6.426-8.426", "frequencyCaption": "dog barking three times"} +{"filepath": "data/multi_event_train/syn_3240.wav", "onoffCaption": "tapping clicking clanking at 1.525-4.965, 5.786-9.226", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_3430.wav", "onoffCaption": "whistling at 1.553-9.491", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_3465.wav", "onoffCaption": "duck quacking at 1.309-3.309", "frequencyCaption": "duck quacking one times"} +{"filepath": "data/multi_event_train/syn_3480.wav", "onoffCaption": "spraying at 0.255-2.383, 4.595-6.723", "frequencyCaption": "spraying two times"} +{"filepath": "data/multi_event_train/syn_3616.wav", "onoffCaption": "train horn at 0.002-2.482, 3.51-5.99 and door slamming at 0.294-1.669", "frequencyCaption": "train horn two times and door slamming one times"} +{"filepath": "data/multi_event_train/syn_3643.wav", "onoffCaption": "cow mooing at 0.5-3.469", "frequencyCaption": "cow mooing one times"} +{"filepath": "data/multi_event_train/syn_3758.wav", "onoffCaption": "door knocking at 0.079-2.847, 4.619-6.84 and dog barking at 5.333-7.333", "frequencyCaption": "door knocking two times and dog barking one times"} +{"filepath": "data/multi_event_train/syn_3934.wav", "onoffCaption": "door knocking at 0.916-4.684", "frequencyCaption": "door knocking one times"} +{"filepath": "data/multi_event_train/syn_3984.wav", "onoffCaption": "train horn at 1.384-4.744 and thump thud at 2.294-4.794, 5.817-7.852 and car horn honking at 3.846-6.759", "frequencyCaption": "train horn one times and thump thud two times and car horn honking one times"} +{"filepath": "data/multi_event_train/syn_178.wav", "onoffCaption": "cow mooing at 3.381-7.81", "frequencyCaption": "cow mooing one times"} +{"filepath": "data/multi_event_train/syn_210.wav", "onoffCaption": "whistling at 0.071-2.08, 2.921-4.93 and explosion at 0.913-3.913, 5.782-8.782", "frequencyCaption": "whistling two times and explosion two times"} +{"filepath": "data/multi_event_train/syn_245.wav", "onoffCaption": "explosion at 1.121-4.177, 6.294-9.35", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_435.wav", "onoffCaption": "woman laughing at 0.561-3.199, 4.189-6.827 and gunshot at 3.425-5.425", "frequencyCaption": "woman laughing two times and gunshot one times"} +{"filepath": "data/multi_event_train/syn_460.wav", "onoffCaption": "duck quacking at 3.484-5.484, 6.175-8.175", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_485.wav", "onoffCaption": "tapping clicking clanking at 0.224-3.664 and door knocking at 6.512-9.28", "frequencyCaption": "tapping clicking clanking one times and door knocking one times"} +{"filepath": "data/multi_event_train/syn_613.wav", "onoffCaption": "whistling at 0.43-8.085 and dog barking at 2.187-4.625, 5.225-7.225 and cow mooing at 3.707-6.689", "frequencyCaption": "whistling one times and dog barking two times and cow mooing one times"} +{"filepath": "data/multi_event_train/syn_646.wav", "onoffCaption": "train horn at 3.064-7.922", "frequencyCaption": "train horn one times"} +{"filepath": "data/multi_event_train/syn_708.wav", "onoffCaption": "gunshot at 2.261-4.767, 7.175-9.175", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_931.wav", "onoffCaption": "thump thud at 0.167-2.667", "frequencyCaption": "thump thud one times"} +{"filepath": "data/multi_event_train/syn_964.wav", "onoffCaption": "sheep goat bleating at 1.528-3.528, 4.975-6.975", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_981.wav", "onoffCaption": "car horn honking at 0.323-4.645 and gunshot at 7.394-9.394", "frequencyCaption": "car horn honking one times and gunshot one times"} +{"filepath": "data/multi_event_train/syn_1105.wav", "onoffCaption": "sheep goat bleating at 2.371-4.371, 6.611-8.611", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_1150.wav", "onoffCaption": "duck quacking at 2.106-4.106, 5.268-7.268", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_1238.wav", "onoffCaption": "sheep goat bleating at 3.412-6.492", "frequencyCaption": "sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_1288.wav", "onoffCaption": "cat meowing at 0.746-2.363", "frequencyCaption": "cat meowing one times"} +{"filepath": "data/multi_event_train/syn_1323.wav", "onoffCaption": "dog barking at 0.979-2.979, 4.467-6.467, 7.269-9.269", "frequencyCaption": "dog barking three times"} +{"filepath": "data/multi_event_train/syn_1376.wav", "onoffCaption": "explosion at 2.257-7.257", "frequencyCaption": "explosion one times"} +{"filepath": "data/multi_event_train/syn_1393.wav", "onoffCaption": "tapping clicking clanking at 0.14-3.58, 5.201-7.253 and duck quacking at 2.426-4.426", "frequencyCaption": "tapping clicking clanking two times and duck quacking one times"} +{"filepath": "data/multi_event_train/syn_1448.wav", "onoffCaption": "woman laughing at 0.367-2.851, 5.089-7.789", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_1506.wav", "onoffCaption": "dog barking at 0.726-2.726, 4.078-6.078", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_train/syn_1553.wav", "onoffCaption": "tapping clicking clanking at 0.156-3.596, 4.737-7.641", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_1720.wav", "onoffCaption": "gunshot at 1.155-3.155, 5.398-7.528 and thump thud at 3.363-7.738", "frequencyCaption": "gunshot two times and thump thud one times"} +{"filepath": "data/multi_event_train/syn_1775.wav", "onoffCaption": "door knocking at 2.969-5.816, 7.75-10.0", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_1790.wav", "onoffCaption": "door knocking at 0.011-2.391, 4.036-6.546, 7.09-9.7 and train horn at 1.029-5.469 and thump thud at 1.711-4.211", "frequencyCaption": "door knocking three times and train horn one times and thump thud one times"} +{"filepath": "data/multi_event_train/syn_1802.wav", "onoffCaption": "sneeze at 0.384-2.697, 4.475-6.788", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_1857.wav", "onoffCaption": "whistling at 1.733-4.608", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_1919.wav", "onoffCaption": "sheep goat bleating at 0.776-2.776, 3.517-5.517, 6.164-8.164 and car horn honking at 4.011-8.411", "frequencyCaption": "sheep goat bleating three times and car horn honking one times"} +{"filepath": "data/multi_event_train/syn_3029.wav", "onoffCaption": "cow mooing at 3.488-6.47, 7.941-10.0 and burping belching at 3.869-6.869", "frequencyCaption": "cow mooing two times and burping belching one times"} +{"filepath": "data/multi_event_train/syn_3099.wav", "onoffCaption": "gunshot at 1.876-3.876, 4.38-6.38 and woman laughing at 2.584-4.778", "frequencyCaption": "gunshot two times and woman laughing one times"} +{"filepath": "data/multi_event_train/syn_3132.wav", "onoffCaption": "door knocking at 0.692-5.525, 6.427-10.0", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_3167.wav", "onoffCaption": "woman laughing at 3.333-10.0", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/multi_event_train/syn_3182.wav", "onoffCaption": "tapping clicking clanking at 1.809-5.249, 6.999-9.867", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_3314.wav", "onoffCaption": "sheep goat bleating at 1.93-3.93", "frequencyCaption": "sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_3341.wav", "onoffCaption": "spraying at 3.082-5.666, 7.209-9.793", "frequencyCaption": "spraying two times"} +{"filepath": "data/multi_event_train/syn_3531.wav", "onoffCaption": "sneeze at 0.185-3.26, 3.84-6.316, 7.242-10.0", "frequencyCaption": "sneeze three times"} +{"filepath": "data/multi_event_train/syn_3564.wav", "onoffCaption": "sheep goat bleating at 0.809-2.809, 4.118-6.65, 7.416-9.416", "frequencyCaption": "sheep goat bleating three times"} +{"filepath": "data/multi_event_train/syn_3581.wav", "onoffCaption": "explosion at 0.101-2.189", "frequencyCaption": "explosion one times"} +{"filepath": "data/multi_event_train/syn_3659.wav", "onoffCaption": "sneeze at 0.638-2.751, 3.548-5.661, 6.812-8.925", "frequencyCaption": "sneeze three times"} +{"filepath": "data/multi_event_train/syn_3717.wav", "onoffCaption": "door knocking at 1.199-3.359, 4.572-7.531", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_3742.wav", "onoffCaption": "cat meowing at 0.275-1.286", "frequencyCaption": "cat meowing one times"} +{"filepath": "data/multi_event_train/syn_3835.wav", "onoffCaption": "cat meowing at 2.698-3.707, 4.225-6.202", "frequencyCaption": "cat meowing two times"} +{"filepath": "data/multi_event_train/syn_3860.wav", "onoffCaption": "duck quacking at 0.859-2.859, 3.637-5.637, 6.544-8.544", "frequencyCaption": "duck quacking three times"} +{"filepath": "data/multi_event_train/syn_3885.wav", "onoffCaption": "cow mooing at 3.53-8.51", "frequencyCaption": "cow mooing one times"} +{"filepath": "data/multi_event_train/syn_2120.wav", "onoffCaption": "spraying at 0.057-2.449, 3.255-3.763", "frequencyCaption": "spraying two times"} +{"filepath": "data/multi_event_train/syn_2175.wav", "onoffCaption": "spraying at 2.071-3.071 and sheep goat bleating at 3.431-7.351", "frequencyCaption": "spraying one times and sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_2190.wav", "onoffCaption": "train horn at 1.771-5.305", "frequencyCaption": "train horn one times"} +{"filepath": "data/multi_event_train/syn_2248.wav", "onoffCaption": "explosion at 1.654-4.372, 5.029-7.747 and car horn honking at 2.113-4.626, 5.21-7.675 and burping belching at 5.214-7.54", "frequencyCaption": "explosion two times and car horn honking two times and burping belching one times"} +{"filepath": "data/multi_event_train/syn_2306.wav", "onoffCaption": "cow mooing at 0.725-3.694, 6.08-8.942", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_2353.wav", "onoffCaption": "car horn honking at 1.862-3.862, 4.452-6.452, 7.24-9.24", "frequencyCaption": "car horn honking three times"} +{"filepath": "data/multi_event_train/syn_2438.wav", "onoffCaption": "sheep goat bleating at 1.107-4.107 and duck quacking at 3.052-5.052", "frequencyCaption": "sheep goat bleating one times and duck quacking one times"} +{"filepath": "data/multi_event_train/syn_2477.wav", "onoffCaption": "whistling at 2.127-5.002, 5.545-8.42", "frequencyCaption": "whistling two times"} +{"filepath": "data/multi_event_train/syn_2488.wav", "onoffCaption": "sheep goat bleating at 0.735-2.735, 3.386-5.386 and door slamming at 1.8-3.033, 3.723-4.956 and tapping clicking clanking at 2.198-5.638, 6.329-9.769", "frequencyCaption": "sheep goat bleating two times and door slamming two times and tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_2523.wav", "onoffCaption": "woman laughing at 2.767-4.967, 5.806-8.006", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_2576.wav", "onoffCaption": "explosion at 0.277-2.868 and cat meowing at 6.026-10.0", "frequencyCaption": "explosion one times and cat meowing one times"} +{"filepath": "data/multi_event_train/syn_2593.wav", "onoffCaption": "dog barking at 1.825-3.825, 4.517-6.517", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_train/syn_2705.wav", "onoffCaption": "woman laughing at 0.971-4.071 and explosion at 7.935-10.0", "frequencyCaption": "woman laughing one times and explosion one times"} +{"filepath": "data/multi_event_train/syn_2750.wav", "onoffCaption": "gunshot at 2.845-4.845, 5.516-7.516", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_2827.wav", "onoffCaption": "burping belching at 0.845-3.171", "frequencyCaption": "burping belching one times"} +{"filepath": "data/multi_event_train/syn_2872.wav", "onoffCaption": "door slamming at 3.272-6.233", "frequencyCaption": "door slamming one times"} +{"filepath": "data/multi_event_train/syn_2897.wav", "onoffCaption": "car horn honking at 1.909-5.75, 7.713-10.0", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_2969.wav", "onoffCaption": "duck quacking at 0.892-2.892", "frequencyCaption": "duck quacking one times"} +{"filepath": "data/multi_event_train/syn_2996.wav", "onoffCaption": "dog barking at 2.902-4.902, 7.003-9.003", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_train/syn_4078.wav", "onoffCaption": "door slamming at 0.238-0.738", "frequencyCaption": "door slamming one times"} +{"filepath": "data/multi_event_train/syn_4136.wav", "onoffCaption": "burping belching at 3.19-7.213 and duck quacking at 3.224-5.224, 5.953-7.953", "frequencyCaption": "burping belching one times and duck quacking two times"} +{"filepath": "data/multi_event_train/syn_4163.wav", "onoffCaption": "train horn at 1.452-4.332 and cow mooing at 6.448-9.746", "frequencyCaption": "train horn one times and cow mooing one times"} +{"filepath": "data/multi_event_train/syn_4186.wav", "onoffCaption": "car horn honking at 1.257-3.604 and dog barking at 6.705-10.0", "frequencyCaption": "car horn honking one times and dog barking one times"} +{"filepath": "data/multi_event_train/syn_4310.wav", "onoffCaption": "train horn at 2.161-6.342, 6.935-9.277", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_4345.wav", "onoffCaption": "gunshot at 0.772-2.772", "frequencyCaption": "gunshot one times"} +{"filepath": "data/multi_event_train/syn_4535.wav", "onoffCaption": "sheep goat bleating at 2.485-6.125", "frequencyCaption": "sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_4560.wav", "onoffCaption": "burping belching at 0.436-3.197 and gunshot at 1.84-4.346", "frequencyCaption": "burping belching one times and gunshot one times"} +{"filepath": "data/multi_event_train/syn_4585.wav", "onoffCaption": "thump thud at 0.62-4.538, 6.81-9.31", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_4608.wav", "onoffCaption": "whistling at 3.318-7.802", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_4713.wav", "onoffCaption": "train horn at 0.267-3.427, 5.079-8.239", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_4746.wav", "onoffCaption": "gunshot at 1.968-3.968, 4.801-6.801", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_4831.wav", "onoffCaption": "tapping clicking clanking at 0.749-4.189", "frequencyCaption": "tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_4864.wav", "onoffCaption": "sheep goat bleating at 0.308-2.308, 4.509-6.509", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_4881.wav", "onoffCaption": "sneeze at 0.109-2.512, 3.965-6.397 and whistling at 2.282-7.782", "frequencyCaption": "sneeze two times and whistling one times"} +{"filepath": "data/multi_event_train/syn_6.wav", "onoffCaption": "thump thud at 0.14-3.807, 5.41-9.077 and spraying at 0.454-0.954, 1.799-2.65, 4.17-4.745", "frequencyCaption": "thump thud two times and spraying three times"} +{"filepath": "data/multi_event_train/syn_2021.wav", "onoffCaption": "door knocking at 0.429-5.429, 6.778-10.0", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_2074.wav", "onoffCaption": "whistling at 0.727-5.211, 6.879-10.0", "frequencyCaption": "whistling two times"} +{"filepath": "data/multi_event_train/syn_2091.wav", "onoffCaption": "spraying at 1.831-4.85, 6.142-7.837, 8.47-9.12", "frequencyCaption": "spraying three times"} +{"filepath": "data/multi_event_train/syn_2207.wav", "onoffCaption": "thump thud at 2.65-5.15", "frequencyCaption": "thump thud one times"} +{"filepath": "data/multi_event_train/syn_2252.wav", "onoffCaption": "explosion at 0.012-3.565, 5.39-7.843", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_2349.wav", "onoffCaption": "thump thud at 0.309-2.809, 3.923-6.423, 7.405-9.905", "frequencyCaption": "thump thud three times"} +{"filepath": "data/multi_event_train/syn_2422.wav", "onoffCaption": "car horn honking at 2.361-6.761, 7.274-10.0", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_2453.wav", "onoffCaption": "thump thud at 3.225-5.687, 6.839-9.605", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_2492.wav", "onoffCaption": "explosion at 0.455-5.455 and burping belching at 2.437-6.306, 7.467-9.59", "frequencyCaption": "explosion one times and burping belching two times"} +{"filepath": "data/multi_event_train/syn_2539.wav", "onoffCaption": "sneeze at 0.488-2.612, 3.718-6.686, 7.684-9.212", "frequencyCaption": "sneeze three times"} +{"filepath": "data/multi_event_train/syn_2548.wav", "onoffCaption": "door knocking at 2.153-4.616, 5.711-8.174", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_2589.wav", "onoffCaption": "gunshot at 3.892-5.892", "frequencyCaption": "gunshot one times"} +{"filepath": "data/multi_event_train/syn_2604.wav", "onoffCaption": "thump thud at 0.045-3.092, 3.677-5.905 and train horn at 1.656-4.856, 5.734-8.934", "frequencyCaption": "thump thud two times and train horn two times"} +{"filepath": "data/multi_event_train/syn_2651.wav", "onoffCaption": "explosion at 2.423-5.151, 6.028-8.756", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_2868.wav", "onoffCaption": "thump thud at 0.855-5.23, 6.386-10.0", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_2926.wav", "onoffCaption": "dog barking at 0.219-3.14, 4.211-7.132", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_train/syn_2973.wav", "onoffCaption": "tapping clicking clanking at 2.703-6.143, 7.187-9.19", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_4037.wav", "onoffCaption": "train horn at 1.92-10.0", "frequencyCaption": "train horn one times"} +{"filepath": "data/multi_event_train/syn_4062.wav", "onoffCaption": "cat meowing at 0.012-1.596, 2.868-3.877, 5.167-7.299 and dog barking at 6.274-8.274", "frequencyCaption": "cat meowing three times and dog barking one times"} +{"filepath": "data/multi_event_train/syn_4087.wav", "onoffCaption": "explosion at 2.553-6.106, 7.68-10.0", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_4179.wav", "onoffCaption": "duck quacking at 2.786-4.786, 5.324-7.324", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_4211.wav", "onoffCaption": "thump thud at 0.315-2.654, 4.121-6.645 and spraying at 5.969-6.75", "frequencyCaption": "thump thud two times and spraying one times"} +{"filepath": "data/multi_event_train/syn_4244.wav", "onoffCaption": "car horn honking at 0.049-3.636 and dog barking at 0.885-2.885 and spraying at 1.371-2.435", "frequencyCaption": "car horn honking one times and dog barking one times and spraying one times"} +{"filepath": "data/multi_event_train/syn_4434.wav", "onoffCaption": "sneeze at 3.28-5.519, 7.824-10.0", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_4461.wav", "onoffCaption": "cat meowing at 3.162-4.311, 5.935-6.946", "frequencyCaption": "cat meowing two times"} +{"filepath": "data/multi_event_train/syn_4484.wav", "onoffCaption": "door knocking at 3.211-5.674, 6.972-9.435", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_4612.wav", "onoffCaption": "door knocking at 0.089-4.528, 5.195-9.634", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_4647.wav", "onoffCaption": "sneeze at 0.569-2.156, 3.144-4.731, 5.782-7.369 and gunshot at 5.878-7.878", "frequencyCaption": "sneeze three times and gunshot one times"} +{"filepath": "data/multi_event_train/syn_4709.wav", "onoffCaption": "whistling at 0.206-6.583, 7.637-10.0 and door knocking at 2.165-4.735", "frequencyCaption": "whistling two times and door knocking one times"} +{"filepath": "data/multi_event_train/syn_4930.wav", "onoffCaption": "explosion at 2.446-7.446", "frequencyCaption": "explosion one times"} +{"filepath": "data/multi_event_train/syn_4965.wav", "onoffCaption": "explosion at 3.189-6.245, 7.568-10.0", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_4980.wav", "onoffCaption": "cat meowing at 2.685-3.712 and door slamming at 2.893-4.032, 6.014-7.153", "frequencyCaption": "cat meowing one times and door slamming two times"} +{"filepath": "data/multi_event_train/syn_2005.wav", "onoffCaption": "thump thud at 0.343-4.01, 4.582-7.082 and car horn honking at 1.183-5.583, 7.486-10.0", "frequencyCaption": "thump thud two times and car horn honking two times"} +{"filepath": "data/multi_event_train/syn_2050.wav", "onoffCaption": "sneeze at 2.987-4.09 and spraying at 4.169-5.073, 5.726-6.327, 7.545-8.629", "frequencyCaption": "sneeze one times and spraying three times"} +{"filepath": "data/multi_event_train/syn_2223.wav", "onoffCaption": "duck quacking at 0.332-2.332, 2.942-4.942, 6.627-8.627", "frequencyCaption": "duck quacking three times"} +{"filepath": "data/multi_event_train/syn_2239.wav", "onoffCaption": "sheep goat bleating at 0.339-2.339, 3.156-5.156", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_2276.wav", "onoffCaption": "thump thud at 0.322-4.697, 6.983-9.599", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_2293.wav", "onoffCaption": "car horn honking at 0.996-3.461, 4.536-7.001, 7.588-10.0 and tapping clicking clanking at 2.689-6.129", "frequencyCaption": "car horn honking three times and tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_2322.wav", "onoffCaption": "thump thud at 0.418-3.465, 4.569-7.161", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_2338.wav", "onoffCaption": "spraying at 0.027-1.513, 3.975-5.232, 7.548-8.723 and burping belching at 2.07-4.593", "frequencyCaption": "spraying three times and burping belching one times"} +{"filepath": "data/multi_event_train/syn_2388.wav", "onoffCaption": "woman laughing at 0.794-3.002, 4.227-6.435", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_2406.wav", "onoffCaption": "car horn honking at 0.603-2.603 and train horn at 3.813-6.973", "frequencyCaption": "car horn honking one times and train horn one times"} +{"filepath": "data/multi_event_train/syn_2620.wav", "onoffCaption": "cat meowing at 2.446-4.582, 5.348-7.484", "frequencyCaption": "cat meowing two times"} +{"filepath": "data/multi_event_train/syn_2675.wav", "onoffCaption": "dog barking at 2.962-4.962, 7.387-9.387", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_train/syn_2690.wav", "onoffCaption": "duck quacking at 0.393-2.393, 3.405-5.405, 5.915-7.915", "frequencyCaption": "duck quacking three times"} +{"filepath": "data/multi_event_train/syn_2774.wav", "onoffCaption": "sheep goat bleating at 2.619-4.619, 5.242-7.242", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_2819.wav", "onoffCaption": "sheep goat bleating at 0.032-2.032, 3.521-5.521 and door knocking at 0.864-3.984, 6.406-9.526", "frequencyCaption": "sheep goat bleating two times and door knocking two times"} +{"filepath": "data/multi_event_train/syn_2902.wav", "onoffCaption": "sneeze at 0.001-2.462, 4.092-6.553", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_2957.wav", "onoffCaption": "door slamming at 0.986-3.865, 4.823-7.702", "frequencyCaption": "door slamming two times"} +{"filepath": "data/multi_event_train/syn_4013.wav", "onoffCaption": "tapping clicking clanking at 0.088-3.528, 5.482-8.922", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_4046.wav", "onoffCaption": "cow mooing at 0.641-5.07 and door slamming at 1.475-3.24, 4.431-6.196, 6.906-8.671", "frequencyCaption": "cow mooing one times and door slamming three times"} +{"filepath": "data/multi_event_train/syn_4108.wav", "onoffCaption": "spraying at 1.708-2.649, 4.881-5.508", "frequencyCaption": "spraying two times"} +{"filepath": "data/multi_event_train/syn_4112.wav", "onoffCaption": "sneeze at 3.141-4.418", "frequencyCaption": "sneeze one times"} +{"filepath": "data/multi_event_train/syn_4235.wav", "onoffCaption": "car horn honking at 0.77-5.092, 6.546-9.404", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_4260.wav", "onoffCaption": "woman laughing at 0.463-3.517, 5.483-8.537", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_4285.wav", "onoffCaption": "cow mooing at 0.753-3.722, 4.97-7.106", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_4410.wav", "onoffCaption": "woman laughing at 0.635-7.369, 7.95-10.0", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_4445.wav", "onoffCaption": "whistling at 2.422-8.914", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_4544.wav", "onoffCaption": "spraying at 1.576-2.326, 2.849-3.599, 4.646-5.396 and gunshot at 1.949-3.949, 5.622-7.622", "frequencyCaption": "spraying three times and gunshot two times"} +{"filepath": "data/multi_event_train/syn_4636.wav", "onoffCaption": "train horn at 0.178-2.852", "frequencyCaption": "train horn one times"} +{"filepath": "data/multi_event_train/syn_4663.wav", "onoffCaption": "sneeze at 2.824-5.484, 6.738-7.832 and duck quacking at 2.914-4.914", "frequencyCaption": "sneeze two times and duck quacking one times"} +{"filepath": "data/multi_event_train/syn_4686.wav", "onoffCaption": "car horn honking at 1.868-5.363, 6.193-9.688", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_4778.wav", "onoffCaption": "dog barking at 1.856-3.856, 4.606-6.606, 7.924-9.924 and tapping clicking clanking at 2.775-6.215", "frequencyCaption": "dog barking three times and tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_4840.wav", "onoffCaption": "whistling at 0.068-4.552", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_4914.wav", "onoffCaption": "dog barking at 2.546-4.546, 5.133-7.484", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_train/syn_4941.wav", "onoffCaption": "duck quacking at 0.364-2.364, 4.648-6.648", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_2104.wav", "onoffCaption": "car horn honking at 2.211-4.558, 6.798-9.145", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_2151.wav", "onoffCaption": "gunshot at 2.256-4.256, 4.891-6.891", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_2156.wav", "onoffCaption": "dog barking at 0.052-2.052, 2.948-4.948, 5.728-7.728 and door slamming at 1.308-4.282, 5.326-8.3", "frequencyCaption": "dog barking three times and door slamming two times"} +{"filepath": "data/multi_event_train/syn_2289.wav", "onoffCaption": "spraying at 1.58-2.312, 3.778-4.51, 5.034-5.766 and gunshot at 2.716-4.716, 5.541-7.541", "frequencyCaption": "spraying three times and gunshot two times"} +{"filepath": "data/multi_event_train/syn_2377.wav", "onoffCaption": "cat meowing at 3.482-7.842", "frequencyCaption": "cat meowing one times"} +{"filepath": "data/multi_event_train/syn_2392.wav", "onoffCaption": "sneeze at 3.873-5.869, 6.876-8.872", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_2449.wav", "onoffCaption": "train horn at 0.152-2.632 and spraying at 0.716-1.583 and woman laughing at 5.347-7.702", "frequencyCaption": "train horn one times and spraying one times and woman laughing one times"} +{"filepath": "data/multi_event_train/syn_2500.wav", "onoffCaption": "door slamming at 1.297-2.135 and spraying at 4.361-4.869, 7.135-7.704", "frequencyCaption": "door slamming one times and spraying two times"} +{"filepath": "data/multi_event_train/syn_2507.wav", "onoffCaption": "door knocking at 0.199-2.387", "frequencyCaption": "door knocking one times"} +{"filepath": "data/multi_event_train/syn_2552.wav", "onoffCaption": "sheep goat bleating at 0.008-2.008 and woman laughing at 0.341-3.441, 4.758-7.858", "frequencyCaption": "sheep goat bleating one times and woman laughing two times"} +{"filepath": "data/multi_event_train/syn_2668.wav", "onoffCaption": "tapping clicking clanking at 2.077-5.517, 6.443-8.613 and cow mooing at 5.63-8.612", "frequencyCaption": "tapping clicking clanking two times and cow mooing one times"} +{"filepath": "data/multi_event_train/syn_2721.wav", "onoffCaption": "gunshot at 3.487-5.487", "frequencyCaption": "gunshot one times"} +{"filepath": "data/multi_event_train/syn_2773.wav", "onoffCaption": "door knocking at 0.433-2.896 and door slamming at 3.612-4.987, 7.366-8.741", "frequencyCaption": "door knocking one times and door slamming two times"} +{"filepath": "data/multi_event_train/syn_2791.wav", "onoffCaption": "door knocking at 0.652-4.402", "frequencyCaption": "door knocking one times"} +{"filepath": "data/multi_event_train/syn_2803.wav", "onoffCaption": "thump thud at 0.011-2.35 and spraying at 4.888-6.145", "frequencyCaption": "thump thud one times and spraying one times"} +{"filepath": "data/multi_event_train/syn_2804.wav", "onoffCaption": "cow mooing at 0.049-4.478, 6.606-9.378 and thump thud at 2.151-4.922, 6.328-8.828 and burping belching at 3.01-5.108", "frequencyCaption": "cow mooing two times and thump thud two times and burping belching one times"} +{"filepath": "data/multi_event_train/syn_2856.wav", "onoffCaption": "duck quacking at 1.872-3.872, 6.151-8.151", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_2918.wav", "onoffCaption": "gunshot at 3.13-5.13, 6.539-8.539", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_4009.wav", "onoffCaption": "whistling at 1.047-6.547, 7.7-10.0", "frequencyCaption": "whistling two times"} +{"filepath": "data/multi_event_train/syn_4147.wav", "onoffCaption": "whistling at 1.949-7.124", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_4298.wav", "onoffCaption": "train horn at 0.227-2.667, 4.148-6.296", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_4334.wav", "onoffCaption": "explosion at 0.551-3.551 and cat meowing at 0.943-3.079", "frequencyCaption": "explosion one times and cat meowing one times"} +{"filepath": "data/multi_event_train/syn_4361.wav", "onoffCaption": "cow mooing at 2.731-5.7, 6.649-9.618", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_4366.wav", "onoffCaption": "duck quacking at 0.872-2.872, 4.328-6.328 and explosion at 1.918-4.79, 5.543-8.271", "frequencyCaption": "duck quacking two times and explosion two times"} +{"filepath": "data/multi_event_train/syn_4383.wav", "onoffCaption": "car horn honking at 1.956-5.175, 5.822-8.12 and burping belching at 4.017-7.017", "frequencyCaption": "car horn honking two times and burping belching one times"} +{"filepath": "data/multi_event_train/syn_4384.wav", "onoffCaption": "door knocking at 0.945-4.001 and train horn at 6.737-8.892", "frequencyCaption": "door knocking one times and train horn one times"} +{"filepath": "data/multi_event_train/syn_4511.wav", "onoffCaption": "car horn honking at 3.701-8.213", "frequencyCaption": "car horn honking one times"} +{"filepath": "data/multi_event_train/syn_4679.wav", "onoffCaption": "thump thud at 0.164-4.614, 5.439-9.889", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_4730.wav", "onoffCaption": "train horn at 1.755-4.429, 5.275-7.949", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_4737.wav", "onoffCaption": "gunshot at 1.048-3.048, 4.748-6.748 and spraying at 3.137-4.201 and sheep goat bleating at 3.936-5.936", "frequencyCaption": "gunshot two times and spraying one times and sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_4762.wav", "onoffCaption": "door slamming at 2.032-4.49, 6.927-8.927", "frequencyCaption": "door slamming two times"} +{"filepath": "data/multi_event_train/syn_4787.wav", "onoffCaption": "woman laughing at 0.311-2.511, 3.934-6.526 and sneeze at 2.213-3.8, 6.089-7.676", "frequencyCaption": "woman laughing two times and sneeze two times"} +{"filepath": "data/multi_event_train/syn_4815.wav", "onoffCaption": "sneeze at 0.265-1.499 and whistling at 5.673-7.902", "frequencyCaption": "sneeze one times and whistling one times"} +{"filepath": "data/multi_event_train/syn_4847.wav", "onoffCaption": "whistling at 0.013-7.763 and tapping clicking clanking at 0.111-3.551 and sneeze at 1.548-3.135, 4.059-5.646", "frequencyCaption": "whistling one times and tapping clicking clanking one times and sneeze two times"} +{"filepath": "data/multi_event_train/syn_2002.wav", "onoffCaption": "woman laughing at 3.333-10.0", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/multi_event_train/syn_2018.wav", "onoffCaption": "spraying at 0.405-1.032, 1.643-2.27, 3.029-3.656 and explosion at 0.527-5.527, 6.694-10.0", "frequencyCaption": "spraying three times and explosion two times"} +{"filepath": "data/multi_event_train/syn_2103.wav", "onoffCaption": "sneeze at 1.671-3.984, 5.21-7.523 and burping belching at 1.76-3.99, 5.309-7.346 and door slamming at 4.12-6.837", "frequencyCaption": "sneeze two times and burping belching two times and door slamming one times"} +{"filepath": "data/multi_event_train/syn_2119.wav", "onoffCaption": "dog barking at 2.203-4.203, 6.323-8.323", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_train/syn_2271.wav", "onoffCaption": "tapping clicking clanking at 0.592-4.032, 5.924-9.364", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_2294.wav", "onoffCaption": "gunshot at 0.764-2.764 and spraying at 5.053-6.134, 8.451-9.532", "frequencyCaption": "gunshot one times and spraying two times"} +{"filepath": "data/multi_event_train/syn_2325.wav", "onoffCaption": "dog barking at 0.642-2.642, 3.414-5.414, 6.681-8.681 and cow mooing at 4.632-7.601", "frequencyCaption": "dog barking three times and cow mooing one times"} +{"filepath": "data/multi_event_train/syn_2370.wav", "onoffCaption": "thump thud at 0.04-2.502", "frequencyCaption": "thump thud one times"} +{"filepath": "data/multi_event_train/syn_2395.wav", "onoffCaption": "spraying at 0.189-0.697, 1.468-1.976, 2.924-3.432 and burping belching at 1.098-3.328", "frequencyCaption": "spraying three times and burping belching one times"} +{"filepath": "data/multi_event_train/syn_2454.wav", "onoffCaption": "cat meowing at 0.477-2.425, 3.605-5.553, 7.46-9.408", "frequencyCaption": "cat meowing three times"} +{"filepath": "data/multi_event_train/syn_2555.wav", "onoffCaption": "cat meowing at 0.068-1.484, 3.808-5.768", "frequencyCaption": "cat meowing two times"} +{"filepath": "data/multi_event_train/syn_2627.wav", "onoffCaption": "burping belching at 0.031-4.031, 5.198-9.198", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_2697.wav", "onoffCaption": "whistling at 2.205-4.214, 4.925-6.934 and sneeze at 4.976-6.15, 8.57-9.744", "frequencyCaption": "whistling two times and sneeze two times"} +{"filepath": "data/multi_event_train/syn_2726.wav", "onoffCaption": "sneeze at 1.043-3.446 and thump thud at 6.486-8.825", "frequencyCaption": "sneeze one times and thump thud one times"} +{"filepath": "data/multi_event_train/syn_2796.wav", "onoffCaption": "spraying at 0.031-1.206, 2.852-4.099, 5.641-8.225 and tapping clicking clanking at 6.006-9.446", "frequencyCaption": "spraying three times and tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_2851.wav", "onoffCaption": "burping belching at 0.624-4.13, 5.824-9.33", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_2950.wav", "onoffCaption": "explosion at 3.244-5.973 and sheep goat bleating at 3.421-5.421", "frequencyCaption": "explosion one times and sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_4014.wav", "onoffCaption": "door knocking at 2.683-5.063, 7.022-9.402", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_4041.wav", "onoffCaption": "explosion at 2.544-7.544", "frequencyCaption": "explosion one times"} +{"filepath": "data/multi_event_train/syn_4115.wav", "onoffCaption": "cow mooing at 3.295-8.275", "frequencyCaption": "cow mooing one times"} +{"filepath": "data/multi_event_train/syn_4140.wav", "onoffCaption": "door slamming at 1.027-3.906 and whistling at 6.755-9.73", "frequencyCaption": "door slamming one times and whistling one times"} +{"filepath": "data/multi_event_train/syn_4228.wav", "onoffCaption": "woman laughing at 0.009-2.107, 4.329-6.427", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_4232.wav", "onoffCaption": "cat meowing at 0.174-1.449, 2.265-3.54, 4.479-5.754", "frequencyCaption": "cat meowing three times"} +{"filepath": "data/multi_event_train/syn_4329.wav", "onoffCaption": "burping belching at 2.373-5.873", "frequencyCaption": "burping belching one times"} +{"filepath": "data/multi_event_train/syn_4333.wav", "onoffCaption": "door knocking at 0.096-4.629 and cow mooing at 2.461-5.471, 6.445-9.405", "frequencyCaption": "door knocking one times and cow mooing two times"} +{"filepath": "data/multi_event_train/syn_4417.wav", "onoffCaption": "explosion at 0.436-3.027, 4.242-6.244, 7.375-10.0", "frequencyCaption": "explosion three times"} +{"filepath": "data/multi_event_train/syn_4458.wav", "onoffCaption": "door slamming at 2.041-3.806", "frequencyCaption": "door slamming one times"} +{"filepath": "data/multi_event_train/syn_4516.wav", "onoffCaption": "thump thud at 3.061-6.108, 6.938-9.985", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_4543.wav", "onoffCaption": "dog barking at 3.213-5.213 and sheep goat bleating at 7.736-9.736", "frequencyCaption": "dog barking one times and sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_4664.wav", "onoffCaption": "car horn honking at 0.073-2.586 and cat meowing at 0.304-1.304, 2.382-3.382, 4.485-5.485", "frequencyCaption": "car horn honking one times and cat meowing three times"} +{"filepath": "data/multi_event_train/syn_4681.wav", "onoffCaption": "explosion at 1.201-6.201, 7.318-10.0 and train horn at 3.436-6.636", "frequencyCaption": "explosion two times and train horn one times"} +{"filepath": "data/multi_event_train/syn_4765.wav", "onoffCaption": "sheep goat bleating at 0.926-2.926, 3.878-6.477, 7.176-9.176", "frequencyCaption": "sheep goat bleating three times"} +{"filepath": "data/multi_event_train/syn_4780.wav", "onoffCaption": "cat meowing at 0.028-1.303, 2.305-3.876, 5.456-6.483", "frequencyCaption": "cat meowing three times"} +{"filepath": "data/multi_event_train/syn_4808.wav", "onoffCaption": "duck quacking at 2.642-4.642, 6.256-8.256", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_4812.wav", "onoffCaption": "burping belching at 3.173-7.509", "frequencyCaption": "burping belching one times"} +{"filepath": "data/multi_event_train/syn_4909.wav", "onoffCaption": "train horn at 0.691-4.891, 5.967-8.26", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_4913.wav", "onoffCaption": "sneeze at 0.276-1.51 and whistling at 5.819-10.0", "frequencyCaption": "sneeze one times and whistling one times"} +{"filepath": "data/multi_event_train/syn_2026.wav", "onoffCaption": "sheep goat bleating at 0.154-2.154, 3.022-5.022, 6.172-8.172", "frequencyCaption": "sheep goat bleating three times"} +{"filepath": "data/multi_event_train/syn_2057.wav", "onoffCaption": "burping belching at 2.683-6.227, 6.907-10.0", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_2200.wav", "onoffCaption": "whistling at 2.022-9.772", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_2224.wav", "onoffCaption": "burping belching at 2.351-4.382", "frequencyCaption": "burping belching one times"} +{"filepath": "data/multi_event_train/syn_2255.wav", "onoffCaption": "tapping clicking clanking at 2.125-5.565 and sneeze at 2.99-5.466, 6.608-7.772", "frequencyCaption": "tapping clicking clanking one times and sneeze two times"} +{"filepath": "data/multi_event_train/syn_2401.wav", "onoffCaption": "cow mooing at 0.674-5.103, 6.783-10.0", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_2470.wav", "onoffCaption": "door slamming at 0.396-1.771, 2.767-4.142", "frequencyCaption": "door slamming two times"} +{"filepath": "data/multi_event_train/syn_2495.wav", "onoffCaption": "door slamming at 0.565-2.928, 3.911-4.762, 6.499-7.439", "frequencyCaption": "door slamming three times"} +{"filepath": "data/multi_event_train/syn_2603.wav", "onoffCaption": "sheep goat bleating at 1.829-3.829 and cow mooing at 6.879-9.848", "frequencyCaption": "sheep goat bleating one times and cow mooing one times"} +{"filepath": "data/multi_event_train/syn_2672.wav", "onoffCaption": "cat meowing at 1.868-3.485 and door slamming at 2.29-4.516, 6.876-9.102", "frequencyCaption": "cat meowing one times and door slamming two times"} +{"filepath": "data/multi_event_train/syn_2718.wav", "onoffCaption": "sneeze at 0.124-1.581, 3.955-5.483", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_2769.wav", "onoffCaption": "car horn honking at 0.692-4.941", "frequencyCaption": "car horn honking one times"} +{"filepath": "data/multi_event_train/syn_2905.wav", "onoffCaption": "burping belching at 0.043-4.442, 6.234-10.0 and sheep goat bleating at 0.792-2.792, 3.816-5.816, 6.776-8.776", "frequencyCaption": "burping belching two times and sheep goat bleating three times"} +{"filepath": "data/multi_event_train/syn_2974.wav", "onoffCaption": "sheep goat bleating at 3.18-6.5", "frequencyCaption": "sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_2991.wav", "onoffCaption": "thump thud at 0.605-4.272 and tapping clicking clanking at 0.78-4.22 and sheep goat bleating at 1.8-3.8, 4.865-6.865, 7.674-9.674", "frequencyCaption": "thump thud one times and tapping clicking clanking one times and sheep goat bleating three times"} +{"filepath": "data/multi_event_train/syn_4030.wav", "onoffCaption": "sheep goat bleating at 2.192-4.192, 4.895-6.895 and whistling at 3.502-9.002", "frequencyCaption": "sheep goat bleating two times and whistling one times"} +{"filepath": "data/multi_event_train/syn_4065.wav", "onoffCaption": "thump thud at 0.945-3.992, 5.083-8.13", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_4080.wav", "onoffCaption": "whistling at 1.188-3.197, 5.237-7.246 and car horn honking at 3.639-7.961", "frequencyCaption": "whistling two times and car horn honking one times"} +{"filepath": "data/multi_event_train/syn_4216.wav", "onoffCaption": "woman laughing at 0.33-3.611, 4.272-6.404", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_4267.wav", "onoffCaption": "tapping clicking clanking at 2.832-6.272 and car horn honking at 5.367-7.832", "frequencyCaption": "tapping clicking clanking one times and car horn honking one times"} +{"filepath": "data/multi_event_train/syn_4282.wav", "onoffCaption": "dog barking at 1.217-3.217 and train horn at 6.035-9.515", "frequencyCaption": "dog barking one times and train horn one times"} +{"filepath": "data/multi_event_train/syn_4399.wav", "onoffCaption": "door knocking at 1.174-3.637 and sheep goat bleating at 6.536-10.0", "frequencyCaption": "door knocking one times and sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_4433.wav", "onoffCaption": "woman laughing at 0.612-2.98, 5.346-7.714", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_4442.wav", "onoffCaption": "door knocking at 2.41-4.631, 5.512-7.733", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_4466.wav", "onoffCaption": "thump thud at 0.065-2.565, 3.598-5.778, 6.412-8.912 and gunshot at 2.169-4.169, 4.865-6.865, 7.796-9.796", "frequencyCaption": "thump thud three times and gunshot three times"} +{"filepath": "data/multi_event_train/syn_4483.wav", "onoffCaption": "cow mooing at 0.202-3.171, 4.097-7.066, 7.97-10.0", "frequencyCaption": "cow mooing three times"} +{"filepath": "data/multi_event_train/syn_4528.wav", "onoffCaption": "burping belching at 0.152-3.152, 5.315-8.315 and woman laughing at 3.008-5.613 and door slamming at 3.552-4.785, 6.323-7.556", "frequencyCaption": "burping belching two times and woman laughing one times and door slamming two times"} +{"filepath": "data/multi_event_train/syn_4559.wav", "onoffCaption": "tapping clicking clanking at 0.302-3.742, 4.486-6.714 and burping belching at 1.799-6.799", "frequencyCaption": "tapping clicking clanking two times and burping belching one times"} +{"filepath": "data/multi_event_train/syn_4598.wav", "onoffCaption": "duck quacking at 1.01-3.01, 3.848-5.848, 6.532-8.532", "frequencyCaption": "duck quacking three times"} +{"filepath": "data/multi_event_train/syn_4631.wav", "onoffCaption": "burping belching at 0.8-3.621, 5.735-8.514", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_4640.wav", "onoffCaption": "cat meowing at 2.515-3.825, 4.843-6.593", "frequencyCaption": "cat meowing two times"} +{"filepath": "data/multi_event_train/syn_4937.wav", "onoffCaption": "spraying at 0.644-1.891, 3.986-4.986", "frequencyCaption": "spraying two times"} +{"filepath": "data/multi_event_train/syn_4946.wav", "onoffCaption": "car horn honking at 0.421-3.347, 3.927-6.853, 7.568-10.0", "frequencyCaption": "car horn honking three times"} +{"filepath": "data/multi_event_train/syn_4987.wav", "onoffCaption": "gunshot at 0.526-2.526 and dog barking at 5.505-7.505", "frequencyCaption": "gunshot one times and dog barking one times"} +{"filepath": "data/multi_event_train/syn_1.wav", "onoffCaption": "burping belching at 1.324-8.492", "frequencyCaption": "burping belching one times"} +{"filepath": "data/multi_event_train/syn_2069.wav", "onoffCaption": "sheep goat bleating at 0.568-2.568, 3.305-5.305 and door slamming at 7.71-8.561", "frequencyCaption": "sheep goat bleating two times and door slamming one times"} +{"filepath": "data/multi_event_train/syn_2073.wav", "onoffCaption": "dog barking at 0.81-2.81, 4.698-6.698 and sneeze at 1.441-3.687, 5.397-7.873", "frequencyCaption": "dog barking two times and sneeze two times"} +{"filepath": "data/multi_event_train/syn_2096.wav", "onoffCaption": "sneeze at 0.54-1.643 and car horn honking at 0.905-5.305, 6.304-10.0", "frequencyCaption": "sneeze one times and car horn honking two times"} +{"filepath": "data/multi_event_train/syn_2168.wav", "onoffCaption": "woman laughing at 2.695-5.976, 6.949-9.534", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_2172.wav", "onoffCaption": "woman laughing at 2.653-6.229", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/multi_event_train/syn_2197.wav", "onoffCaption": "woman laughing at 0.631-3.05, 5.474-7.715", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_2301.wav", "onoffCaption": "door slamming at 0.391-2.309, 3.454-5.372 and sneeze at 2.098-5.208", "frequencyCaption": "door slamming two times and sneeze one times"} +{"filepath": "data/multi_event_train/syn_2425.wav", "onoffCaption": "car horn honking at 0.069-3.564, 5.485-8.276", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_2524.wav", "onoffCaption": "cow mooing at 3.119-7.548", "frequencyCaption": "cow mooing one times"} +{"filepath": "data/multi_event_train/syn_2656.wav", "onoffCaption": "tapping clicking clanking at 0.232-3.672, 4.981-8.421", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_2757.wav", "onoffCaption": "car horn honking at 1.995-6.395, 7.171-10.0", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_2820.wav", "onoffCaption": "burping belching at 0.103-2.138, 3.472-5.507, 6.832-8.867", "frequencyCaption": "burping belching three times"} +{"filepath": "data/multi_event_train/syn_2875.wav", "onoffCaption": "door knocking at 1.909-6.442", "frequencyCaption": "door knocking one times"} +{"filepath": "data/multi_event_train/syn_2921.wav", "onoffCaption": "sheep goat bleating at 1.122-3.122, 4.877-6.877", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_4131.wav", "onoffCaption": "train horn at 0.546-3.22 and sheep goat bleating at 1.516-3.516, 4.159-6.159, 7.47-9.47", "frequencyCaption": "train horn one times and sheep goat bleating three times"} +{"filepath": "data/multi_event_train/syn_4243.wav", "onoffCaption": "woman laughing at 1.329-3.411, 5.369-7.451", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_4259.wav", "onoffCaption": "tapping clicking clanking at 0.446-3.886, 5.188-7.422 and spraying at 5.404-6.404", "frequencyCaption": "tapping clicking clanking two times and spraying one times"} +{"filepath": "data/multi_event_train/syn_4342.wav", "onoffCaption": "woman laughing at 1.285-4.357 and sneeze at 1.929-4.405, 5.129-7.605", "frequencyCaption": "woman laughing one times and sneeze two times"} +{"filepath": "data/multi_event_train/syn_4358.wav", "onoffCaption": "dog barking at 1.161-3.161, 5.169-7.169", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_train/syn_4499.wav", "onoffCaption": "car horn honking at 0.823-5.73 and door slamming at 2.357-3.748, 5.822-7.213 and duck quacking at 3.094-5.094, 7.242-9.242", "frequencyCaption": "car horn honking one times and door slamming two times and duck quacking two times"} +{"filepath": "data/multi_event_train/syn_4567.wav", "onoffCaption": "dog barking at 2.991-4.991, 7.131-9.131", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_train/syn_4582.wav", "onoffCaption": "explosion at 0.407-3.125 and gunshot at 3.084-5.084, 6.253-8.253 and burping belching at 4.468-7.647", "frequencyCaption": "explosion one times and gunshot two times and burping belching one times"} +{"filepath": "data/multi_event_train/syn_4615.wav", "onoffCaption": "tapping clicking clanking at 0.349-3.789, 5.332-7.939 and cow mooing at 2.213-5.511, 7.329-10.0", "frequencyCaption": "tapping clicking clanking two times and cow mooing two times"} +{"filepath": "data/multi_event_train/syn_4714.wav", "onoffCaption": "thump thud at 1.441-4.488, 5.626-8.673", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_4863.wav", "onoffCaption": "sheep goat bleating at 2.651-5.731, 7.327-9.327", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_4879.wav", "onoffCaption": "sneeze at 1.717-3.005, 4.541-5.829", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_4886.wav", "onoffCaption": "woman laughing at 2.456-5.048, 5.701-8.293", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_4962.wav", "onoffCaption": "spraying at 0.625-2.32, 2.931-3.798", "frequencyCaption": "spraying two times"} +{"filepath": "data/multi_event_train/syn_4978.wav", "onoffCaption": "door knocking at 1.907-4.095", "frequencyCaption": "door knocking one times"} +{"filepath": "data/multi_event_train/syn_2098.wav", "onoffCaption": "duck quacking at 1.532-3.532", "frequencyCaption": "duck quacking one times"} +{"filepath": "data/multi_event_train/syn_2127.wav", "onoffCaption": "explosion at 3.111-5.983, 6.942-9.582", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_2183.wav", "onoffCaption": "thump thud at 2.161-5.208, 6.064-8.472", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_2315.wav", "onoffCaption": "explosion at 1.424-6.424 and cat meowing at 2.368-4.316 and car horn honking at 2.424-4.424", "frequencyCaption": "explosion one times and cat meowing one times and car horn honking one times"} +{"filepath": "data/multi_event_train/syn_2354.wav", "onoffCaption": "door knocking at 0.652-4.152, 5.012-7.135, 7.746-9.81", "frequencyCaption": "door knocking three times"} +{"filepath": "data/multi_event_train/syn_2571.wav", "onoffCaption": "spraying at 2.456-4.584, 5.711-7.473", "frequencyCaption": "spraying two times"} +{"filepath": "data/multi_event_train/syn_2594.wav", "onoffCaption": "car horn honking at 2.033-4.533, 5.2-7.7", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_2619.wav", "onoffCaption": "duck quacking at 1.522-3.522 and cat meowing at 1.603-3.791", "frequencyCaption": "duck quacking one times and cat meowing one times"} +{"filepath": "data/multi_event_train/syn_2658.wav", "onoffCaption": "explosion at 0.811-5.677, 7.813-10.0 and door knocking at 2.02-5.77", "frequencyCaption": "explosion two times and door knocking one times"} +{"filepath": "data/multi_event_train/syn_2702.wav", "onoffCaption": "whistling at 0.515-3.39, 3.92-6.795 and explosion at 2.138-5.01, 6.901-9.773", "frequencyCaption": "whistling two times and explosion two times"} +{"filepath": "data/multi_event_train/syn_2743.wav", "onoffCaption": "tapping clicking clanking at 2.399-5.839, 6.987-10.0", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_2834.wav", "onoffCaption": "cow mooing at 2.952-5.962, 7.639-10.0 and woman laughing at 3.706-6.406", "frequencyCaption": "cow mooing two times and woman laughing one times"} +{"filepath": "data/multi_event_train/syn_2890.wav", "onoffCaption": "duck quacking at 2.845-4.845", "frequencyCaption": "duck quacking one times"} +{"filepath": "data/multi_event_train/syn_4125.wav", "onoffCaption": "thump thud at 1.182-3.682 and door knocking at 6.723-9.57", "frequencyCaption": "thump thud one times and door knocking one times"} +{"filepath": "data/multi_event_train/syn_4164.wav", "onoffCaption": "whistling at 3.425-5.654", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_4181.wav", "onoffCaption": "car horn honking at 0.313-3.488, 5.773-8.948", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_4317.wav", "onoffCaption": "sneeze at 0.088-2.201, 3.06-5.377, 6.262-7.496 and door slamming at 0.488-2.708, 3.344-4.244 and train horn at 0.872-6.587, 7.647-10.0", "frequencyCaption": "sneeze three times and door slamming two times and train horn two times"} +{"filepath": "data/multi_event_train/syn_4356.wav", "onoffCaption": "train horn at 2.962-6.722", "frequencyCaption": "train horn one times"} +{"filepath": "data/multi_event_train/syn_4429.wav", "onoffCaption": "duck quacking at 0.833-2.833, 3.531-5.531, 6.133-8.133 and woman laughing at 5.524-8.624", "frequencyCaption": "duck quacking three times and woman laughing one times"} +{"filepath": "data/multi_event_train/syn_4468.wav", "onoffCaption": "door slamming at 0.051-2.509", "frequencyCaption": "door slamming one times"} +{"filepath": "data/multi_event_train/syn_4532.wav", "onoffCaption": "door slamming at 2.31-4.793, 5.41-6.701", "frequencyCaption": "door slamming two times"} +{"filepath": "data/multi_event_train/syn_4573.wav", "onoffCaption": "burping belching at 1.245-5.114 and door slamming at 2.463-4.463, 5.179-7.179", "frequencyCaption": "burping belching one times and door slamming two times"} +{"filepath": "data/multi_event_train/syn_4596.wav", "onoffCaption": "dog barking at 0.023-2.023, 2.648-4.648, 6.804-8.804", "frequencyCaption": "dog barking three times"} +{"filepath": "data/multi_event_train/syn_4700.wav", "onoffCaption": "door slamming at 1.79-4.01 and door knocking at 7.65-10.0", "frequencyCaption": "door slamming one times and door knocking one times"} +{"filepath": "data/multi_event_train/syn_4741.wav", "onoffCaption": "sneeze at 3.285-7.785", "frequencyCaption": "sneeze one times"} +{"filepath": "data/multi_event_train/syn_4836.wav", "onoffCaption": "car horn honking at 0.067-2.993, 3.783-6.709, 7.913-10.0", "frequencyCaption": "car horn honking three times"} +{"filepath": "data/multi_event_train/syn_4877.wav", "onoffCaption": "explosion at 0.204-2.524, 3.236-5.41, 6.621-8.623", "frequencyCaption": "explosion three times"} +{"filepath": "data/multi_event_train/syn_4892.wav", "onoffCaption": "thump thud at 0.249-3.916, 5.15-8.817 and cat meowing at 0.688-1.697, 2.233-3.242, 3.778-4.787", "frequencyCaption": "thump thud two times and cat meowing three times"} +{"filepath": "data/multi_event_train/syn_4989.wav", "onoffCaption": "sheep goat bleating at 0.79-2.79 and woman laughing at 1.659-4.251, 4.803-7.395 and gunshot at 3.514-5.514, 6.423-8.696", "frequencyCaption": "sheep goat bleating one times and woman laughing two times and gunshot two times"} +{"filepath": "data/multi_event_train/syn_2028.wav", "onoffCaption": "tapping clicking clanking at 3.282-6.722", "frequencyCaption": "tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_2032.wav", "onoffCaption": "thump thud at 1.878-4.217", "frequencyCaption": "thump thud one times"} +{"filepath": "data/multi_event_train/syn_2129.wav", "onoffCaption": "cat meowing at 2.578-3.663, 4.726-6.036, 7.089-8.089", "frequencyCaption": "cat meowing three times"} +{"filepath": "data/multi_event_train/syn_2133.wav", "onoffCaption": "train horn at 3.33-6.57, 7.334-10.0", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_2166.wav", "onoffCaption": "cow mooing at 2.414-6.843, 7.633-10.0", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_2241.wav", "onoffCaption": "whistling at 3.139-8.314", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_2340.wav", "onoffCaption": "car horn honking at 0.508-4.349 and explosion at 7.279-9.281", "frequencyCaption": "car horn honking one times and explosion one times"} +{"filepath": "data/multi_event_train/syn_2464.wav", "onoffCaption": "tapping clicking clanking at 0.896-4.336 and duck quacking at 1.153-3.153, 4.089-6.089, 6.905-8.905", "frequencyCaption": "tapping clicking clanking one times and duck quacking three times"} +{"filepath": "data/multi_event_train/syn_2481.wav", "onoffCaption": "spraying at 1.831-2.331 and door slamming at 4.61-6.61, 7.521-9.521", "frequencyCaption": "spraying one times and door slamming two times"} +{"filepath": "data/multi_event_train/syn_2530.wav", "onoffCaption": "train horn at 3.401-6.641", "frequencyCaption": "train horn one times"} +{"filepath": "data/multi_event_train/syn_2565.wav", "onoffCaption": "sneeze at 0.005-1.505", "frequencyCaption": "sneeze one times"} +{"filepath": "data/multi_event_train/syn_2580.wav", "onoffCaption": "gunshot at 3.815-5.815, 7.3-9.3", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_2617.wav", "onoffCaption": "duck quacking at 1.56-3.56, 4.821-6.821", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_2716.wav", "onoffCaption": "sheep goat bleating at 0.826-2.826 and thump thud at 5.91-8.41", "frequencyCaption": "sheep goat bleating one times and thump thud one times"} +{"filepath": "data/multi_event_train/syn_2861.wav", "onoffCaption": "sneeze at 2.056-7.056 and tapping clicking clanking at 4.55-7.99 and thump thud at 5.027-7.798", "frequencyCaption": "sneeze one times and tapping clicking clanking one times and thump thud one times"} +{"filepath": "data/multi_event_train/syn_2884.wav", "onoffCaption": "door knocking at 0.648-2.951, 4.123-6.605, 7.909-9.931", "frequencyCaption": "door knocking three times"} +{"filepath": "data/multi_event_train/syn_2960.wav", "onoffCaption": "woman laughing at 0.175-2.257 and door slamming at 0.823-2.823 and sneeze at 6.102-7.559", "frequencyCaption": "woman laughing one times and door slamming one times and sneeze one times"} +{"filepath": "data/multi_event_train/syn_2985.wav", "onoffCaption": "thump thud at 0.261-2.6, 3.314-5.653 and sneeze at 4.087-6.172", "frequencyCaption": "thump thud two times and sneeze one times"} +{"filepath": "data/multi_event_train/syn_4071.wav", "onoffCaption": "cat meowing at 2.995-4.097, 4.741-5.843", "frequencyCaption": "cat meowing two times"} +{"filepath": "data/multi_event_train/syn_4094.wav", "onoffCaption": "whistling at 3.031-9.342", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_4170.wav", "onoffCaption": "thump thud at 1.319-5.237 and cat meowing at 4.051-5.587", "frequencyCaption": "thump thud one times and cat meowing one times"} +{"filepath": "data/multi_event_train/syn_4195.wav", "onoffCaption": "whistling at 2.822-5.051, 5.722-8.376 and explosion at 6.118-8.836", "frequencyCaption": "whistling two times and explosion one times"} +{"filepath": "data/multi_event_train/syn_4202.wav", "onoffCaption": "whistling at 2.434-10.0", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_4218.wav", "onoffCaption": "cow mooing at 0.267-3.565 and woman laughing at 1.052-3.657", "frequencyCaption": "cow mooing one times and woman laughing one times"} +{"filepath": "data/multi_event_train/syn_4303.wav", "onoffCaption": "tapping clicking clanking at 0.66-4.1, 6.49-9.93 and cat meowing at 1.059-2.603, 4.596-6.14 and sheep goat bleating at 3.085-5.085", "frequencyCaption": "tapping clicking clanking two times and cat meowing two times and sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_4319.wav", "onoffCaption": "door slamming at 3.854-5.003, 5.659-7.885", "frequencyCaption": "door slamming two times"} +{"filepath": "data/multi_event_train/syn_4427.wav", "onoffCaption": "sheep goat bleating at 3.094-5.094, 6.139-8.139", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_4526.wav", "onoffCaption": "spraying at 0.254-1.335, 3.355-5.05", "frequencyCaption": "spraying two times"} +{"filepath": "data/multi_event_train/syn_4654.wav", "onoffCaption": "cat meowing at 3.661-6.565, 7.709-8.895", "frequencyCaption": "cat meowing two times"} +{"filepath": "data/multi_event_train/syn_4755.wav", "onoffCaption": "cow mooing at 4.05-7.019 and spraying at 6.688-7.769, 9.086-9.713", "frequencyCaption": "cow mooing one times and spraying two times"} +{"filepath": "data/multi_event_train/syn_4822.wav", "onoffCaption": "train horn at 3.056-5.523 and car horn honking at 3.381-5.846 and burping belching at 4.458-7.458", "frequencyCaption": "train horn one times and car horn honking one times and burping belching one times"} +{"filepath": "data/multi_event_train/syn_4838.wav", "onoffCaption": "door knocking at 0.085-2.306 and spraying at 5.037-5.621 and duck quacking at 5.649-7.649", "frequencyCaption": "door knocking one times and spraying one times and duck quacking one times"} +{"filepath": "data/multi_event_train/syn_4923.wav", "onoffCaption": "gunshot at 0.444-2.444, 3.118-5.248 and explosion at 2.474-7.474", "frequencyCaption": "gunshot two times and explosion one times"} +{"filepath": "data/multi_event_train/syn_4939.wav", "onoffCaption": "gunshot at 2.558-5.032, 5.941-7.941", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_2016.wav", "onoffCaption": "whistling at 0.036-5.211 and burping belching at 2.935-5.058, 6.513-9.513", "frequencyCaption": "whistling one times and burping belching two times"} +{"filepath": "data/multi_event_train/syn_2067.wav", "onoffCaption": "sneeze at 0.627-2.69", "frequencyCaption": "sneeze one times"} +{"filepath": "data/multi_event_train/syn_2082.wav", "onoffCaption": "spraying at 2.94-5.959", "frequencyCaption": "spraying one times"} +{"filepath": "data/multi_event_train/syn_2158.wav", "onoffCaption": "whistling at 0.357-8.012 and gunshot at 3.468-5.468, 7.306-9.306", "frequencyCaption": "whistling one times and gunshot two times"} +{"filepath": "data/multi_event_train/syn_2199.wav", "onoffCaption": "thump thud at 3.283-6.95", "frequencyCaption": "thump thud one times"} +{"filepath": "data/multi_event_train/syn_2214.wav", "onoffCaption": "cat meowing at 0.151-1.417, 3.482-4.748", "frequencyCaption": "cat meowing two times"} +{"filepath": "data/multi_event_train/syn_2265.wav", "onoffCaption": "whistling at 1.369-5.853 and tapping clicking clanking at 2.623-6.063, 7.495-10.0", "frequencyCaption": "whistling one times and tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_2280.wav", "onoffCaption": "burping belching at 0.359-5.359, 6.238-8.564 and tapping clicking clanking at 3.033-6.473", "frequencyCaption": "burping belching two times and tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_2415.wav", "onoffCaption": "car horn honking at 0.386-3.881, 5.676-9.171", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_2431.wav", "onoffCaption": "explosion at 0.062-5.062, 7.284-9.469", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_2440.wav", "onoffCaption": "door slamming at 0.123-0.623, 2.142-2.642 and sheep goat bleating at 1.348-3.348, 3.893-5.893, 6.837-8.837", "frequencyCaption": "door slamming two times and sheep goat bleating three times"} +{"filepath": "data/multi_event_train/syn_2633.wav", "onoffCaption": "whistling at 0.759-3.634, 4.275-6.577", "frequencyCaption": "whistling two times"} +{"filepath": "data/multi_event_train/syn_2642.wav", "onoffCaption": "tapping clicking clanking at 1.34-4.78, 7.05-10.0", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_2728.wav", "onoffCaption": "cat meowing at 0.493-3.523 and woman laughing at 0.638-2.73", "frequencyCaption": "cat meowing one times and woman laughing one times"} +{"filepath": "data/multi_event_train/syn_2759.wav", "onoffCaption": "gunshot at 1.592-3.611", "frequencyCaption": "gunshot one times"} +{"filepath": "data/multi_event_train/syn_2935.wav", "onoffCaption": "door knocking at 0.728-4.478 and duck quacking at 2.452-4.452, 5.784-7.784", "frequencyCaption": "door knocking one times and duck quacking two times"} +{"filepath": "data/multi_event_train/syn_2944.wav", "onoffCaption": "door slamming at 3.33-5.33", "frequencyCaption": "door slamming one times"} +{"filepath": "data/multi_event_train/syn_4024.wav", "onoffCaption": "car horn honking at 2.651-5.164, 5.914-8.427 and thump thud at 3.286-6.057, 7.195-9.966", "frequencyCaption": "car horn honking two times and thump thud two times"} +{"filepath": "data/multi_event_train/syn_4055.wav", "onoffCaption": "tapping clicking clanking at 2.017-5.457, 7.274-10.0", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_4226.wav", "onoffCaption": "tapping clicking clanking at 0.981-4.421, 5.748-8.421", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_4257.wav", "onoffCaption": "cow mooing at 2.78-5.762", "frequencyCaption": "cow mooing one times"} +{"filepath": "data/multi_event_train/syn_4296.wav", "onoffCaption": "cat meowing at 0.666-2.626, 3.341-5.301", "frequencyCaption": "cat meowing two times"} +{"filepath": "data/multi_event_train/syn_4403.wav", "onoffCaption": "door slamming at 2.323-3.626, 4.97-6.273, 7.601-8.904", "frequencyCaption": "door slamming three times"} +{"filepath": "data/multi_event_train/syn_4472.wav", "onoffCaption": "door knocking at 2.045-4.422, 5.41-7.787", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_4497.wav", "onoffCaption": "whistling at 2.277-10.0", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_4518.wav", "onoffCaption": "spraying at 3.256-4.503, 5.811-7.058, 8.236-9.483", "frequencyCaption": "spraying three times"} +{"filepath": "data/multi_event_train/syn_4569.wav", "onoffCaption": "train horn at 2.431-5.965, 7.28-9.954", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_4601.wav", "onoffCaption": "cow mooing at 0.4-5.38, 7.824-10.0", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_4670.wav", "onoffCaption": "whistling at 3.544-9.044", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_4695.wav", "onoffCaption": "burping belching at 0.668-4.668, 6.056-10.0", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_4888.wav", "onoffCaption": "burping belching at 3.083-6.083", "frequencyCaption": "burping belching one times"} +{"filepath": "data/multi_event_train/syn_4907.wav", "onoffCaption": "whistling at 1.293-9.304", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_4976.wav", "onoffCaption": "tapping clicking clanking at 0.552-3.992, 5.863-9.303", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_4993.wav", "onoffCaption": "train horn at 0.195-2.869 and cat meowing at 6.247-7.291, 8.178-9.488", "frequencyCaption": "train horn one times and cat meowing two times"} +{"filepath": "data/multi_event_train/syn_2043.wav", "onoffCaption": "cat meowing at 3.812-5.752", "frequencyCaption": "cat meowing one times"} +{"filepath": "data/multi_event_train/syn_2059.wav", "onoffCaption": "burping belching at 0.09-2.197 and door knocking at 0.721-3.788, 5.023-8.09", "frequencyCaption": "burping belching one times and door knocking two times"} +{"filepath": "data/multi_event_train/syn_2142.wav", "onoffCaption": "door slamming at 0.363-1.616, 2.579-4.805, 6.932-8.223", "frequencyCaption": "door slamming three times"} +{"filepath": "data/multi_event_train/syn_2230.wav", "onoffCaption": "train horn at 0.572-6.632", "frequencyCaption": "train horn one times"} +{"filepath": "data/multi_event_train/syn_2331.wav", "onoffCaption": "thump thud at 0.794-3.022, 3.604-5.832, 6.585-8.813", "frequencyCaption": "thump thud three times"} +{"filepath": "data/multi_event_train/syn_2364.wav", "onoffCaption": "explosion at 2.299-5.293, 6.682-9.682", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_2381.wav", "onoffCaption": "woman laughing at 0.245-2.327, 2.828-5.334, 7.526-9.642", "frequencyCaption": "woman laughing three times"} +{"filepath": "data/multi_event_train/syn_2514.wav", "onoffCaption": "gunshot at 3.315-5.315, 7.012-9.012", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_2629.wav", "onoffCaption": "spraying at 0.982-2.715, 4.089-5.822 and cow mooing at 1.108-6.088, 7.379-10.0", "frequencyCaption": "spraying two times and cow mooing two times"} +{"filepath": "data/multi_event_train/syn_2666.wav", "onoffCaption": "thump thud at 0.125-2.896, 4.014-6.785, 7.495-10.0 and woman laughing at 2.778-4.978", "frequencyCaption": "thump thud three times and woman laughing one times"} +{"filepath": "data/multi_event_train/syn_2683.wav", "onoffCaption": "train horn at 2.278-10.0", "frequencyCaption": "train horn one times"} +{"filepath": "data/multi_event_train/syn_2699.wav", "onoffCaption": "sheep goat bleating at 3.383-5.383, 7.401-9.401", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_2732.wav", "onoffCaption": "cow mooing at 3.624-8.604", "frequencyCaption": "cow mooing one times"} +{"filepath": "data/multi_event_train/syn_2767.wav", "onoffCaption": "dog barking at 2.432-4.432, 5.383-7.383", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_train/syn_2782.wav", "onoffCaption": "door knocking at 0.89-4.01, 5.434-7.971 and burping belching at 4.746-7.072", "frequencyCaption": "door knocking two times and burping belching one times"} +{"filepath": "data/multi_event_train/syn_2798.wav", "onoffCaption": "thump thud at 0.059-2.559, 3.231-5.68, 6.442-8.781 and spraying at 2.588-3.096", "frequencyCaption": "thump thud three times and spraying one times"} +{"filepath": "data/multi_event_train/syn_2810.wav", "onoffCaption": "door slamming at 2.837-3.642", "frequencyCaption": "door slamming one times"} +{"filepath": "data/multi_event_train/syn_2911.wav", "onoffCaption": "gunshot at 1.534-3.704, 6.11-8.11", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_4000.wav", "onoffCaption": "burping belching at 2.612-6.118, 6.622-10.0", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_4101.wav", "onoffCaption": "whistling at 0.049-7.142 and door slamming at 1.786-3.786, 5.681-7.681", "frequencyCaption": "whistling one times and door slamming two times"} +{"filepath": "data/multi_event_train/syn_4269.wav", "onoffCaption": "car horn honking at 0.1-2.565", "frequencyCaption": "car horn honking one times"} +{"filepath": "data/multi_event_train/syn_4273.wav", "onoffCaption": "woman laughing at 3.317-5.672", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/multi_event_train/syn_4368.wav", "onoffCaption": "sneeze at 3.702-5.615, 6.946-8.859", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_4372.wav", "onoffCaption": "cat meowing at 1.592-2.592, 3.323-4.333, 4.964-7.647", "frequencyCaption": "cat meowing three times"} +{"filepath": "data/multi_event_train/syn_4397.wav", "onoffCaption": "dog barking at 0.044-2.444 and cow mooing at 6.758-10.0", "frequencyCaption": "dog barking one times and cow mooing one times"} +{"filepath": "data/multi_event_train/syn_4419.wav", "onoffCaption": "car horn honking at 3.16-5.946, 6.874-9.66", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_4456.wav", "onoffCaption": "cat meowing at 3.282-4.592, 6.073-8.963 and woman laughing at 3.328-5.933", "frequencyCaption": "cat meowing two times and woman laughing one times"} +{"filepath": "data/multi_event_train/syn_4557.wav", "onoffCaption": "car horn honking at 0.072-3.913, 6.076-9.917 and duck quacking at 0.139-2.139, 3.74-5.74", "frequencyCaption": "car horn honking two times and duck quacking two times"} +{"filepath": "data/multi_event_train/syn_4625.wav", "onoffCaption": "woman laughing at 0.112-3.5, 4.993-7.276 and cow mooing at 2.294-7.274", "frequencyCaption": "woman laughing two times and cow mooing one times"} +{"filepath": "data/multi_event_train/syn_4724.wav", "onoffCaption": "whistling at 3.107-10.0", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_4806.wav", "onoffCaption": "explosion at 1.533-6.399", "frequencyCaption": "explosion one times"} +{"filepath": "data/multi_event_train/syn_4849.wav", "onoffCaption": "burping belching at 2.476-5.237, 6.847-9.668", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_4853.wav", "onoffCaption": "car horn honking at 2.537-4.884, 6.616-8.963", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_4948.wav", "onoffCaption": "woman laughing at 0.01-3.11 and door slamming at 0.11-2.593, 4.13-7.091", "frequencyCaption": "woman laughing one times and door slamming two times"} +{"filepath": "data/multi_event_train/syn_4952.wav", "onoffCaption": "tapping clicking clanking at 2.225-5.665", "frequencyCaption": "tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_2110.wav", "onoffCaption": "whistling at 0.11-8.121 and gunshot at 0.66-2.66, 4.32-6.32", "frequencyCaption": "whistling one times and gunshot two times"} +{"filepath": "data/multi_event_train/syn_2117.wav", "onoffCaption": "duck quacking at 2.303-4.303, 4.821-6.821, 7.375-9.375", "frequencyCaption": "duck quacking three times"} +{"filepath": "data/multi_event_train/syn_2145.wav", "onoffCaption": "car horn honking at 2.249-5.067, 6.86-9.325", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_2278.wav", "onoffCaption": "car horn honking at 0.005-3.18 and cat meowing at 0.584-2.532 and sneeze at 1.507-2.784", "frequencyCaption": "car horn honking one times and cat meowing one times and sneeze one times"} +{"filepath": "data/multi_event_train/syn_2336.wav", "onoffCaption": "burping belching at 2.727-6.75, 7.974-10.0", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_2363.wav", "onoffCaption": "explosion at 0.209-2.937, 4.197-6.925", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_2408.wav", "onoffCaption": "duck quacking at 0.919-2.919, 4.192-6.192, 6.918-8.918", "frequencyCaption": "duck quacking three times"} +{"filepath": "data/multi_event_train/syn_2513.wav", "onoffCaption": "tapping clicking clanking at 1.575-5.015, 5.631-7.968", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_2541.wav", "onoffCaption": "door knocking at 2.431-7.431 and spraying at 3.38-3.984, 6.273-8.401", "frequencyCaption": "door knocking one times and spraying two times"} +{"filepath": "data/multi_event_train/syn_2546.wav", "onoffCaption": "car horn honking at 0.134-3.629, 4.572-6.776", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_2735.wav", "onoffCaption": "train horn at 2.719-7.049", "frequencyCaption": "train horn one times"} +{"filepath": "data/multi_event_train/syn_2760.wav", "onoffCaption": "woman laughing at 0.243-2.829 and whistling at 6.242-10.0", "frequencyCaption": "woman laughing one times and whistling one times"} +{"filepath": "data/multi_event_train/syn_2785.wav", "onoffCaption": "sheep goat bleating at 0.364-3.364 and gunshot at 2.49-4.49, 5.326-7.326 and thump thud at 4.036-6.375, 7.809-10.0", "frequencyCaption": "sheep goat bleating one times and gunshot two times and thump thud two times"} +{"filepath": "data/multi_event_train/syn_2817.wav", "onoffCaption": "door slamming at 0.034-2.162, 3.44-6.414", "frequencyCaption": "door slamming two times"} +{"filepath": "data/multi_event_train/syn_2842.wav", "onoffCaption": "woman laughing at 3.832-6.398, 7.087-9.653 and thump thud at 3.997-7.664", "frequencyCaption": "woman laughing two times and thump thud one times"} +{"filepath": "data/multi_event_train/syn_2845.wav", "onoffCaption": "burping belching at 0.066-2.189 and dog barking at 4.94-6.94, 7.927-9.927", "frequencyCaption": "burping belching one times and dog barking two times"} +{"filepath": "data/multi_event_train/syn_2959.wav", "onoffCaption": "thump thud at 2.464-6.131", "frequencyCaption": "thump thud one times"} +{"filepath": "data/multi_event_train/syn_4106.wav", "onoffCaption": "burping belching at 2.396-5.396, 6.29-9.29", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_4154.wav", "onoffCaption": "burping belching at 0.1-3.38 and duck quacking at 4.7-6.7", "frequencyCaption": "burping belching one times and duck quacking one times"} +{"filepath": "data/multi_event_train/syn_4320.wav", "onoffCaption": "woman laughing at 1.992-4.347 and sheep goat bleating at 2.326-4.326, 6.491-8.491", "frequencyCaption": "woman laughing one times and sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_4327.wav", "onoffCaption": "tapping clicking clanking at 0.526-3.966, 4.575-8.015", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_4375.wav", "onoffCaption": "car horn honking at 1.515-5.837, 7.812-9.812", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_4390.wav", "onoffCaption": "door knocking at 0.626-2.978, 3.775-6.087, 6.804-9.392", "frequencyCaption": "door knocking three times"} +{"filepath": "data/multi_event_train/syn_4502.wav", "onoffCaption": "duck quacking at 2.496-4.496 and car horn honking at 2.541-5.041, 5.542-8.042", "frequencyCaption": "duck quacking one times and car horn honking two times"} +{"filepath": "data/multi_event_train/syn_4550.wav", "onoffCaption": "gunshot at 0.379-2.379, 3.425-5.425 and cat meowing at 6.709-9.613", "frequencyCaption": "gunshot two times and cat meowing one times"} +{"filepath": "data/multi_event_train/syn_4638.wav", "onoffCaption": "tapping clicking clanking at 2.149-5.589, 6.582-10.0", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_4688.wav", "onoffCaption": "train horn at 1.924-4.564", "frequencyCaption": "train horn one times"} +{"filepath": "data/multi_event_train/syn_4723.wav", "onoffCaption": "train horn at 0.301-8.501 and spraying at 1.872-2.813, 3.956-5.718, 6.281-7.062", "frequencyCaption": "train horn one times and spraying three times"} +{"filepath": "data/multi_event_train/syn_4771.wav", "onoffCaption": "thump thud at 2.044-6.494, 7.6-10.0", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_4776.wav", "onoffCaption": "burping belching at 1.744-5.288, 7.129-10.0", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_4793.wav", "onoffCaption": "duck quacking at 0.789-2.789, 3.91-5.91", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_4794.wav", "onoffCaption": "thump thud at 3.096-7.471", "frequencyCaption": "thump thud one times"} +{"filepath": "data/multi_event_train/syn_4801.wav", "onoffCaption": "sheep goat bleating at 2.842-4.842, 5.542-7.542", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_4854.wav", "onoffCaption": "sneeze at 0.166-1.711, 3.091-4.185", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_2011.wav", "onoffCaption": "cow mooing at 3.455-7.884", "frequencyCaption": "cow mooing one times"} +{"filepath": "data/multi_event_train/syn_2044.wav", "onoffCaption": "cow mooing at 2.752-6.05", "frequencyCaption": "cow mooing one times"} +{"filepath": "data/multi_event_train/syn_2237.wav", "onoffCaption": "spraying at 2.505-3.68, 5.61-8.07 and burping belching at 4.712-7.335", "frequencyCaption": "spraying two times and burping belching one times"} +{"filepath": "data/multi_event_train/syn_2246.wav", "onoffCaption": "train horn at 2.386-4.866, 5.514-7.994", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_2262.wav", "onoffCaption": "door slamming at 0.296-1.82, 2.403-3.927, 4.953-6.477", "frequencyCaption": "door slamming three times"} +{"filepath": "data/multi_event_train/syn_2287.wav", "onoffCaption": "sneeze at 2.468-4.08, 4.643-6.255, 7.115-8.727", "frequencyCaption": "sneeze three times"} +{"filepath": "data/multi_event_train/syn_2379.wav", "onoffCaption": "thump thud at 1.14-3.602, 4.287-6.749", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_2386.wav", "onoffCaption": "sneeze at 0.998-3.401, 4.07-5.347", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_2412.wav", "onoffCaption": "whistling at 2.899-8.074", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_2447.wav", "onoffCaption": "explosion at 0.429-5.429, 6.245-10.0", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_2509.wav", "onoffCaption": "car horn honking at 0.405-3.9, 5.336-7.484", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_2610.wav", "onoffCaption": "spraying at 0.285-2.721, 3.874-6.31, 7.014-9.45", "frequencyCaption": "spraying three times"} +{"filepath": "data/multi_event_train/syn_2634.wav", "onoffCaption": "cow mooing at 1.887-6.316 and dog barking at 2.493-4.493, 5.504-7.504", "frequencyCaption": "cow mooing one times and dog barking two times"} +{"filepath": "data/multi_event_train/syn_2661.wav", "onoffCaption": "spraying at 0.128-1.823, 2.851-4.337, 5.4-6.05 and duck quacking at 1.565-3.565, 5.072-7.072 and dog barking at 1.874-3.874", "frequencyCaption": "spraying three times and duck quacking two times and dog barking one times"} +{"filepath": "data/multi_event_train/syn_2684.wav", "onoffCaption": "sheep goat bleating at 0.422-2.422", "frequencyCaption": "sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_2858.wav", "onoffCaption": "spraying at 1.549-2.281, 3.504-4.236, 4.962-5.694", "frequencyCaption": "spraying three times"} +{"filepath": "data/multi_event_train/syn_2916.wav", "onoffCaption": "train horn at 1.358-4.598, 5.88-9.12", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_2943.wav", "onoffCaption": "door slamming at 2.913-4.913, 6.059-8.059", "frequencyCaption": "door slamming two times"} +{"filepath": "data/multi_event_train/syn_4007.wav", "onoffCaption": "train horn at 0.741-4.221, 6.423-9.063 and cat meowing at 2.233-3.593, 5.797-7.692", "frequencyCaption": "train horn two times and cat meowing two times"} +{"filepath": "data/multi_event_train/syn_4048.wav", "onoffCaption": "woman laughing at 3.569-6.364 and sneeze at 4.859-6.855", "frequencyCaption": "woman laughing one times and sneeze one times"} +{"filepath": "data/multi_event_train/syn_4052.wav", "onoffCaption": "duck quacking at 2.19-4.19, 6.433-8.433", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_4076.wav", "onoffCaption": "thump thud at 0.403-3.174 and sheep goat bleating at 2.8-4.8, 6.068-8.068", "frequencyCaption": "thump thud one times and sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_4093.wav", "onoffCaption": "door knocking at 3.703-5.924, 7.166-9.387", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_4149.wav", "onoffCaption": "whistling at 3.415-9.124", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_4153.wav", "onoffCaption": "door knocking at 0.839-3.569, 4.834-7.564 and sneeze at 1.952-4.037", "frequencyCaption": "door knocking two times and sneeze one times"} +{"filepath": "data/multi_event_train/syn_4188.wav", "onoffCaption": "duck quacking at 2.14-4.14", "frequencyCaption": "duck quacking one times"} +{"filepath": "data/multi_event_train/syn_4221.wav", "onoffCaption": "whistling at 0.086-8.471", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_4274.wav", "onoffCaption": "cat meowing at 0.042-1.144, 2.589-3.598 and woman laughing at 0.866-3.452, 4.245-6.831", "frequencyCaption": "cat meowing two times and woman laughing two times"} +{"filepath": "data/multi_event_train/syn_4291.wav", "onoffCaption": "thump thud at 1.138-5.588 and whistling at 1.843-6.327 and woman laughing at 5.378-7.797", "frequencyCaption": "thump thud one times and whistling one times and woman laughing one times"} +{"filepath": "data/multi_event_train/syn_4404.wav", "onoffCaption": "burping belching at 2.51-5.51, 7.101-9.541 and gunshot at 2.836-4.836", "frequencyCaption": "burping belching two times and gunshot one times"} +{"filepath": "data/multi_event_train/syn_4420.wav", "onoffCaption": "duck quacking at 2.843-4.843, 5.486-7.486 and thump thud at 5.54-8.311", "frequencyCaption": "duck quacking two times and thump thud one times"} +{"filepath": "data/multi_event_train/syn_4451.wav", "onoffCaption": "train horn at 0.239-3.039, 5.026-7.426", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_4505.wav", "onoffCaption": "door knocking at 1.084-3.654", "frequencyCaption": "door knocking one times"} +{"filepath": "data/multi_event_train/syn_4622.wav", "onoffCaption": "whistling at 2.926-5.155, 7.141-9.447", "frequencyCaption": "whistling two times"} +{"filepath": "data/multi_event_train/syn_4677.wav", "onoffCaption": "cow mooing at 3.047-7.476 and woman laughing at 3.474-6.269", "frequencyCaption": "cow mooing one times and woman laughing one times"} +{"filepath": "data/multi_event_train/syn_4692.wav", "onoffCaption": "duck quacking at 0.013-2.013", "frequencyCaption": "duck quacking one times"} +{"filepath": "data/multi_event_train/syn_4739.wav", "onoffCaption": "door slamming at 1.895-3.813", "frequencyCaption": "door slamming one times"} +{"filepath": "data/multi_event_train/syn_4789.wav", "onoffCaption": "sneeze at 1.002-5.058", "frequencyCaption": "sneeze one times"} +{"filepath": "data/multi_event_train/syn_4900.wav", "onoffCaption": "cow mooing at 0.223-3.233, 4.638-7.648 and door knocking at 4.601-6.761", "frequencyCaption": "cow mooing two times and door knocking one times"} +{"filepath": "data/multi_event_train/syn_4924.wav", "onoffCaption": "burping belching at 0.009-5.953", "frequencyCaption": "burping belching one times"} +{"filepath": "data/multi_event_train/syn_4955.wav", "onoffCaption": "door knocking at 0.394-5.096, 6.053-10.0", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_8.wav", "onoffCaption": "sheep goat bleating at 0.236-2.236, 3.143-5.143, 6.147-8.147", "frequencyCaption": "sheep goat bleating three times"} +{"filepath": "data/multi_event_train/syn_2035.wav", "onoffCaption": "thump thud at 3.809-6.148, 7.831-10.0", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_2060.wav", "onoffCaption": "sheep goat bleating at 0.166-5.046, 6.154-8.154 and explosion at 0.486-2.66, 3.478-6.478", "frequencyCaption": "sheep goat bleating two times and explosion two times"} +{"filepath": "data/multi_event_train/syn_2085.wav", "onoffCaption": "burping belching at 3.03-5.395, 5.899-8.216", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_2161.wav", "onoffCaption": "door knocking at 2.239-6.511", "frequencyCaption": "door knocking one times"} +{"filepath": "data/multi_event_train/syn_2184.wav", "onoffCaption": "woman laughing at 2.236-4.473, 6.682-8.919 and sneeze at 2.47-6.97 and sheep goat bleating at 6.116-8.116", "frequencyCaption": "woman laughing two times and sneeze one times and sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_2213.wav", "onoffCaption": "duck quacking at 2.521-4.521, 6.283-8.283", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_2308.wav", "onoffCaption": "cow mooing at 0.549-3.531, 5.902-8.609", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_2436.wav", "onoffCaption": "thump thud at 3.198-6.865", "frequencyCaption": "thump thud one times"} +{"filepath": "data/multi_event_train/syn_2463.wav", "onoffCaption": "whistling at 1.38-9.391", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_2486.wav", "onoffCaption": "gunshot at 1.07-3.343", "frequencyCaption": "gunshot one times"} +{"filepath": "data/multi_event_train/syn_2537.wav", "onoffCaption": "train horn at 2.728-7.586", "frequencyCaption": "train horn one times"} +{"filepath": "data/multi_event_train/syn_2578.wav", "onoffCaption": "gunshot at 2.509-4.509, 6.605-8.605", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_2645.wav", "onoffCaption": "sheep goat bleating at 0.42-2.42, 4.869-6.869 and cat meowing at 2.605-3.915, 4.495-6.631", "frequencyCaption": "sheep goat bleating two times and cat meowing two times"} +{"filepath": "data/multi_event_train/syn_2829.wav", "onoffCaption": "whistling at 0.202-5.377", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_2833.wav", "onoffCaption": "sneeze at 2.403-6.051, 7.994-10.0", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_2899.wav", "onoffCaption": "door knocking at 0.036-2.66, 4.527-7.364 and dog barking at 0.341-2.341, 2.973-4.973", "frequencyCaption": "door knocking two times and dog barking two times"} +{"filepath": "data/multi_event_train/syn_2928.wav", "onoffCaption": "burping belching at 0.293-6.973 and woman laughing at 2.69-4.806, 5.463-7.568", "frequencyCaption": "burping belching one times and woman laughing two times"} +{"filepath": "data/multi_event_train/syn_2932.wav", "onoffCaption": "spraying at 0.047-0.651, 1.26-2.164, 3.361-4.608 and dog barking at 0.972-2.972, 3.608-5.608", "frequencyCaption": "spraying three times and dog barking two times"} +{"filepath": "data/multi_event_train/syn_2967.wav", "onoffCaption": "cow mooing at 2.971-5.94, 7.066-10.0", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_2982.wav", "onoffCaption": "door knocking at 2.719-5.099", "frequencyCaption": "door knocking one times"} +{"filepath": "data/multi_event_train/syn_4023.wav", "onoffCaption": "door knocking at 1.077-4.577, 5.819-7.908", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_4039.wav", "onoffCaption": "thump thud at 2.056-4.556, 6.038-8.538", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_4122.wav", "onoffCaption": "cat meowing at 3.331-4.342, 4.92-5.931, 6.61-7.621", "frequencyCaption": "cat meowing three times"} +{"filepath": "data/multi_event_train/syn_4138.wav", "onoffCaption": "door knocking at 2.83-5.182, 6.312-8.664", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_4205.wav", "onoffCaption": "spraying at 0.092-2.484, 4.849-7.241", "frequencyCaption": "spraying two times"} +{"filepath": "data/multi_event_train/syn_4250.wav", "onoffCaption": "burping belching at 0.753-3.932", "frequencyCaption": "burping belching one times"} +{"filepath": "data/multi_event_train/syn_4351.wav", "onoffCaption": "dog barking at 0.601-2.601, 5.002-7.002", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_train/syn_4475.wav", "onoffCaption": "burping belching at 1.004-7.684 and whistling at 1.432-5.916, 6.702-9.65 and gunshot at 2.55-4.55, 5.582-7.582", "frequencyCaption": "burping belching one times and whistling two times and gunshot two times"} +{"filepath": "data/multi_event_train/syn_4490.wav", "onoffCaption": "door knocking at 2.648-4.775 and tapping clicking clanking at 7.088-10.0", "frequencyCaption": "door knocking one times and tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_4574.wav", "onoffCaption": "sneeze at 4.1-6.346", "frequencyCaption": "sneeze one times"} +{"filepath": "data/multi_event_train/syn_4606.wav", "onoffCaption": "thump thud at 1.232-4.003, 6.177-8.891", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_4653.wav", "onoffCaption": "tapping clicking clanking at 2.647-6.087", "frequencyCaption": "tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_4707.wav", "onoffCaption": "explosion at 0.051-5.051", "frequencyCaption": "explosion one times"} +{"filepath": "data/multi_event_train/syn_4748.wav", "onoffCaption": "thump thud at 1.279-3.779, 5.309-7.809", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_4895.wav", "onoffCaption": "gunshot at 3.74-6.214, 7.578-9.578", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_4971.wav", "onoffCaption": "cat meowing at 0.262-2.256", "frequencyCaption": "cat meowing one times"} +{"filepath": "data/multi_event_train/syn_4994.wav", "onoffCaption": "cat meowing at 3.746-4.746, 5.536-6.536 and sheep goat bleating at 4.329-6.329", "frequencyCaption": "cat meowing two times and sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_2048.wav", "onoffCaption": "burping belching at 0.76-4.304, 5.102-8.646", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_2134.wav", "onoffCaption": "spraying at 0.015-1.19, 2.932-5.368", "frequencyCaption": "spraying two times"} +{"filepath": "data/multi_event_train/syn_2153.wav", "onoffCaption": "spraying at 0.046-1.046 and explosion at 0.666-5.666, 6.451-10.0", "frequencyCaption": "spraying one times and explosion two times"} +{"filepath": "data/multi_event_train/syn_2209.wav", "onoffCaption": "gunshot at 2.013-4.013", "frequencyCaption": "gunshot one times"} +{"filepath": "data/multi_event_train/syn_2312.wav", "onoffCaption": "explosion at 2.43-7.351", "frequencyCaption": "explosion one times"} +{"filepath": "data/multi_event_train/syn_2347.wav", "onoffCaption": "explosion at 0.017-3.017, 3.938-6.938 and dog barking at 1.463-3.463", "frequencyCaption": "explosion two times and dog barking one times"} +{"filepath": "data/multi_event_train/syn_2479.wav", "onoffCaption": "burping belching at 1.689-7.776", "frequencyCaption": "burping belching one times"} +{"filepath": "data/multi_event_train/syn_2505.wav", "onoffCaption": "thump thud at 1.734-6.109, 6.786-10.0", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_2562.wav", "onoffCaption": "car horn honking at 0.787-5.694, 7.691-10.0", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_2587.wav", "onoffCaption": "sheep goat bleating at 3.286-5.286", "frequencyCaption": "sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_2711.wav", "onoffCaption": "train horn at 2.669-4.824, 6.196-8.66", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_2744.wav", "onoffCaption": "car horn honking at 0.047-2.973 and cat meowing at 6.678-8.219", "frequencyCaption": "car horn honking one times and cat meowing one times"} +{"filepath": "data/multi_event_train/syn_2801.wav", "onoffCaption": "burping belching at 1.71-5.0", "frequencyCaption": "burping belching one times"} +{"filepath": "data/multi_event_train/syn_2866.wav", "onoffCaption": "cat meowing at 0.382-2.57, 3.17-4.181, 4.892-6.078 and whistling at 0.515-8.9 and duck quacking at 5.413-7.413", "frequencyCaption": "cat meowing three times and whistling one times and duck quacking one times"} +{"filepath": "data/multi_event_train/syn_2883.wav", "onoffCaption": "sheep goat bleating at 3.204-5.204", "frequencyCaption": "sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_2998.wav", "onoffCaption": "spraying at 0.088-1.345, 3.612-4.869", "frequencyCaption": "spraying two times"} +{"filepath": "data/multi_event_train/syn_4089.wav", "onoffCaption": "sneeze at 0.337-1.591 and train horn at 3.809-9.524", "frequencyCaption": "sneeze one times and train horn one times"} +{"filepath": "data/multi_event_train/syn_4177.wav", "onoffCaption": "dog barking at 0.177-2.177 and sneeze at 0.871-3.485, 5.324-7.465 and woman laughing at 3.376-6.014, 7.273-10.0", "frequencyCaption": "dog barking one times and sneeze two times and woman laughing two times"} +{"filepath": "data/multi_event_train/syn_4192.wav", "onoffCaption": "woman laughing at 0.638-3.433, 5.851-8.646", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_4278.wav", "onoffCaption": "gunshot at 1.428-3.521, 4.261-6.354, 7.169-9.262 and door slamming at 5.994-7.369", "frequencyCaption": "gunshot three times and door slamming one times"} +{"filepath": "data/multi_event_train/syn_4304.wav", "onoffCaption": "train horn at 2.458-6.639, 7.492-9.932", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_4363.wav", "onoffCaption": "thump thud at 0.301-2.64 and duck quacking at 0.65-2.65, 4.552-6.552", "frequencyCaption": "thump thud one times and duck quacking two times"} +{"filepath": "data/multi_event_train/syn_4386.wav", "onoffCaption": "sneeze at 0.984-4.059, 5.924-8.999", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_4521.wav", "onoffCaption": "sneeze at 0.704-2.663, 3.814-5.773", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_4546.wav", "onoffCaption": "dog barking at 0.715-2.715, 5.2-7.2", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_train/syn_4591.wav", "onoffCaption": "sheep goat bleating at 2.507-4.507, 5.973-7.973", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_4649.wav", "onoffCaption": "spraying at 0.22-1.087, 2.362-3.229 and cat meowing at 6.586-8.526", "frequencyCaption": "spraying two times and cat meowing one times"} +{"filepath": "data/multi_event_train/syn_4735.wav", "onoffCaption": "train horn at 0.231-5.308", "frequencyCaption": "train horn one times"} +{"filepath": "data/multi_event_train/syn_4752.wav", "onoffCaption": "cow mooing at 1.023-3.992, 4.756-7.738 and cat meowing at 2.326-3.87, 4.908-6.452", "frequencyCaption": "cow mooing two times and cat meowing two times"} +{"filepath": "data/multi_event_train/syn_4825.wav", "onoffCaption": "burping belching at 0.564-5.564, 7.134-9.46", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_4870.wav", "onoffCaption": "thump thud at 1.655-6.03, 6.852-9.508", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_2106.wav", "onoffCaption": "car horn honking at 1.789-4.302, 5.357-7.87", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_2274.wav", "onoffCaption": "explosion at 0.13-5.051, 6.09-10.0", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_2291.wav", "onoffCaption": "cat meowing at 1.616-2.628, 3.395-6.299, 7.041-8.143", "frequencyCaption": "cat meowing three times"} +{"filepath": "data/multi_event_train/syn_2320.wav", "onoffCaption": "gunshot at 0.53-2.803, 3.85-5.85, 7.066-9.066", "frequencyCaption": "gunshot three times"} +{"filepath": "data/multi_event_train/syn_2375.wav", "onoffCaption": "door slamming at 1.481-2.872, 3.714-5.714, 6.695-7.844 and explosion at 5.251-8.438", "frequencyCaption": "door slamming three times and explosion one times"} +{"filepath": "data/multi_event_train/syn_2390.wav", "onoffCaption": "duck quacking at 0.077-2.077, 4.153-6.153", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_2451.wav", "onoffCaption": "burping belching at 0.021-3.311 and cat meowing at 5.102-7.042", "frequencyCaption": "burping belching one times and cat meowing one times"} +{"filepath": "data/multi_event_train/syn_2550.wav", "onoffCaption": "train horn at 2.524-8.268", "frequencyCaption": "train horn one times"} +{"filepath": "data/multi_event_train/syn_2622.wav", "onoffCaption": "tapping clicking clanking at 0.613-4.053 and train horn at 0.753-3.393, 4.437-7.077", "frequencyCaption": "tapping clicking clanking one times and train horn two times"} +{"filepath": "data/multi_event_train/syn_2638.wav", "onoffCaption": "gunshot at 0.324-2.324, 3.995-6.501 and whistling at 6.819-8.828", "frequencyCaption": "gunshot two times and whistling one times"} +{"filepath": "data/multi_event_train/syn_2688.wav", "onoffCaption": "dog barking at 0.336-2.336, 3.264-5.264, 5.967-7.967 and tapping clicking clanking at 2.286-5.726", "frequencyCaption": "dog barking three times and tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_2723.wav", "onoffCaption": "spraying at 0.799-1.974, 2.594-3.178, 4.428-6.19 and woman laughing at 2.694-4.977, 6.495-9.48", "frequencyCaption": "spraying three times and woman laughing two times"} +{"filepath": "data/multi_event_train/syn_2739.wav", "onoffCaption": "spraying at 1.824-2.451, 3.203-4.898, 7.269-8.516", "frequencyCaption": "spraying three times"} +{"filepath": "data/multi_event_train/syn_2776.wav", "onoffCaption": "sheep goat bleating at 2.702-4.702, 6.792-8.792", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_2793.wav", "onoffCaption": "spraying at 2.441-3.068, 5.222-5.849 and explosion at 7.549-10.0", "frequencyCaption": "spraying two times and explosion one times"} +{"filepath": "data/multi_event_train/syn_2854.wav", "onoffCaption": "cow mooing at 2.28-7.26", "frequencyCaption": "cow mooing one times"} +{"filepath": "data/multi_event_train/syn_2955.wav", "onoffCaption": "sneeze at 3.434-4.82, 5.419-6.805", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_4044.wav", "onoffCaption": "duck quacking at 1.493-3.493, 4.339-6.339, 7.456-9.456", "frequencyCaption": "duck quacking three times"} +{"filepath": "data/multi_event_train/syn_4110.wav", "onoffCaption": "tapping clicking clanking at 1.135-4.575, 5.943-8.406", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_4145.wav", "onoffCaption": "sheep goat bleating at 3.411-5.411, 6.238-8.238", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_4237.wav", "onoffCaption": "spraying at 0.24-1.726", "frequencyCaption": "spraying one times"} +{"filepath": "data/multi_event_train/syn_4336.wav", "onoffCaption": "burping belching at 2.645-5.824, 7.891-9.922", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_4408.wav", "onoffCaption": "explosion at 1.757-4.757, 5.479-8.479 and car horn honking at 6.199-8.199", "frequencyCaption": "explosion two times and car horn honking one times"} +{"filepath": "data/multi_event_train/syn_4412.wav", "onoffCaption": "cat meowing at 0.27-1.63", "frequencyCaption": "cat meowing one times"} +{"filepath": "data/multi_event_train/syn_4509.wav", "onoffCaption": "woman laughing at 0.787-4.363, 6.706-10.0", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_4513.wav", "onoffCaption": "gunshot at 0.752-2.752, 3.852-5.852, 6.881-8.881", "frequencyCaption": "gunshot three times"} +{"filepath": "data/multi_event_train/syn_4661.wav", "onoffCaption": "thump thud at 1.443-3.782, 5.47-7.809", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_4684.wav", "onoffCaption": "door slamming at 2.704-4.079, 6.287-7.811", "frequencyCaption": "door slamming two times"} +{"filepath": "data/multi_event_train/syn_4760.wav", "onoffCaption": "tapping clicking clanking at 0.533-3.973, 5.147-8.587", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_4785.wav", "onoffCaption": "whistling at 0.652-8.663", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_4817.wav", "onoffCaption": "train horn at 0.488-4.488 and sheep goat bleating at 0.833-2.833, 3.373-5.373, 5.952-7.952", "frequencyCaption": "train horn one times and sheep goat bleating three times"} +{"filepath": "data/multi_event_train/syn_4842.wav", "onoffCaption": "tapping clicking clanking at 2.652-6.092 and whistling at 2.885-8.385 and sheep goat bleating at 6.014-8.014", "frequencyCaption": "tapping clicking clanking one times and whistling one times and sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_4916.wav", "onoffCaption": "duck quacking at 2.204-4.204, 6.278-8.278", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_4959.wav", "onoffCaption": "cat meowing at 0.048-1.06, 1.852-2.864, 4.697-5.709 and train horn at 0.069-3.549, 4.338-7.276", "frequencyCaption": "cat meowing three times and train horn two times"} +{"filepath": "data/multi_event_train/syn_4.wav", "onoffCaption": "spraying at 0.375-2.503 and car horn honking at 4.549-8.136", "frequencyCaption": "spraying one times and car horn honking one times"} +{"filepath": "data/multi_event_train/syn_2007.wav", "onoffCaption": "train horn at 3.118-6.278, 7.28-10.0", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_2023.wav", "onoffCaption": "car horn honking at 4.143-7.638", "frequencyCaption": "car horn honking one times"} +{"filepath": "data/multi_event_train/syn_2052.wav", "onoffCaption": "spraying at 2.342-2.992 and cat meowing at 3.473-4.483, 5.231-6.258, 7.243-8.553", "frequencyCaption": "spraying one times and cat meowing three times"} +{"filepath": "data/multi_event_train/syn_2076.wav", "onoffCaption": "spraying at 0.727-1.508 and thump thud at 5.495-9.413", "frequencyCaption": "spraying one times and thump thud one times"} +{"filepath": "data/multi_event_train/syn_2089.wav", "onoffCaption": "duck quacking at 0.81-2.81 and spraying at 3.644-4.144, 5.805-6.656", "frequencyCaption": "duck quacking one times and spraying two times"} +{"filepath": "data/multi_event_train/syn_2093.wav", "onoffCaption": "sneeze at 0.898-2.175, 3.979-5.305 and spraying at 3.016-3.524, 4.306-4.814", "frequencyCaption": "sneeze two times and spraying two times"} +{"filepath": "data/multi_event_train/syn_2138.wav", "onoffCaption": "burping belching at 1.702-6.038, 7.211-10.0", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_2149.wav", "onoffCaption": "door knocking at 1.85-6.683, 7.825-10.0", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_2188.wav", "onoffCaption": "train horn at 3.407-9.876", "frequencyCaption": "train horn one times"} +{"filepath": "data/multi_event_train/syn_2205.wav", "onoffCaption": "thump thud at 3.709-6.171", "frequencyCaption": "thump thud one times"} +{"filepath": "data/multi_event_train/syn_2221.wav", "onoffCaption": "woman laughing at 2.818-5.89, 7.129-10.0", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_2250.wav", "onoffCaption": "explosion at 2.937-7.937", "frequencyCaption": "explosion one times"} +{"filepath": "data/multi_event_train/syn_2404.wav", "onoffCaption": "tapping clicking clanking at 2.396-5.836, 7.121-10.0", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_2420.wav", "onoffCaption": "cat meowing at 0.79-2.15 and woman laughing at 5.494-7.61", "frequencyCaption": "cat meowing one times and woman laughing one times"} +{"filepath": "data/multi_event_train/syn_2475.wav", "onoffCaption": "door knocking at 3.062-5.374, 6.46-8.772 and woman laughing at 3.618-6.037, 7.169-9.518", "frequencyCaption": "door knocking two times and woman laughing two times"} +{"filepath": "data/multi_event_train/syn_2490.wav", "onoffCaption": "cow mooing at 2.202-5.171, 6.377-8.411", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_2606.wav", "onoffCaption": "door knocking at 2.842-5.61", "frequencyCaption": "door knocking one times"} +{"filepath": "data/multi_event_train/syn_2653.wav", "onoffCaption": "cat meowing at 3.516-4.601", "frequencyCaption": "cat meowing one times"} +{"filepath": "data/multi_event_train/syn_2677.wav", "onoffCaption": "whistling at 0.283-4.767, 5.551-10.0", "frequencyCaption": "whistling two times"} +{"filepath": "data/multi_event_train/syn_2692.wav", "onoffCaption": "explosion at 0.361-3.417, 5.348-8.404", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_2748.wav", "onoffCaption": "thump thud at 0.302-2.802, 4.125-6.625 and dog barking at 0.683-2.683", "frequencyCaption": "thump thud two times and dog barking one times"} +{"filepath": "data/multi_event_train/syn_2789.wav", "onoffCaption": "explosion at 1.018-4.571", "frequencyCaption": "explosion one times"} +{"filepath": "data/multi_event_train/syn_2825.wav", "onoffCaption": "door knocking at 0.964-4.732", "frequencyCaption": "door knocking one times"} +{"filepath": "data/multi_event_train/syn_2900.wav", "onoffCaption": "train horn at 0.446-3.12, 4.402-6.539 and thump thud at 3.737-7.404", "frequencyCaption": "train horn two times and thump thud one times"} +{"filepath": "data/multi_event_train/syn_2924.wav", "onoffCaption": "burping belching at 0.202-6.882", "frequencyCaption": "burping belching one times"} +{"filepath": "data/multi_event_train/syn_2971.wav", "onoffCaption": "burping belching at 1.991-6.39, 7.29-9.52", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_2994.wav", "onoffCaption": "tapping clicking clanking at 2.3-5.74, 6.598-10.0", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_4011.wav", "onoffCaption": "sneeze at 2.458-3.561, 4.16-6.297", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_4035.wav", "onoffCaption": "car horn honking at 3.885-6.703, 7.76-10.0", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_4060.wav", "onoffCaption": "gunshot at 2.22-4.22, 4.777-6.777, 7.486-9.486", "frequencyCaption": "gunshot three times"} +{"filepath": "data/multi_event_train/syn_4085.wav", "onoffCaption": "dog barking at 0.613-2.613, 3.151-5.151, 6.447-8.447", "frequencyCaption": "dog barking three times"} +{"filepath": "data/multi_event_train/syn_4213.wav", "onoffCaption": "explosion at 0.298-5.298, 5.925-8.075", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_4246.wav", "onoffCaption": "duck quacking at 1.745-3.745, 4.943-6.943", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_4262.wav", "onoffCaption": "car horn honking at 0.025-2.025 and spraying at 3.726-4.353, 6.476-7.103, 8.766-9.393", "frequencyCaption": "car horn honking one times and spraying three times"} +{"filepath": "data/multi_event_train/syn_4287.wav", "onoffCaption": "door knocking at 2.855-5.207", "frequencyCaption": "door knocking one times"} +{"filepath": "data/multi_event_train/syn_4308.wav", "onoffCaption": "explosion at 0.564-5.564, 7.629-10.0", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_4379.wav", "onoffCaption": "dog barking at 0.247-2.247", "frequencyCaption": "dog barking one times"} +{"filepath": "data/multi_event_train/syn_4436.wav", "onoffCaption": "gunshot at 3.193-5.433, 7.188-9.188", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_4447.wav", "onoffCaption": "thump thud at 3.806-7.724", "frequencyCaption": "thump thud one times"} +{"filepath": "data/multi_event_train/syn_4463.wav", "onoffCaption": "tapping clicking clanking at 0.406-3.846, 6.106-8.202", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_4486.wav", "onoffCaption": "car horn honking at 3.658-6.584, 7.528-9.535", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_4578.wav", "onoffCaption": "car horn honking at 1.804-5.023", "frequencyCaption": "car horn honking one times"} +{"filepath": "data/multi_event_train/syn_4610.wav", "onoffCaption": "sneeze at 0.396-3.604, 4.394-7.602", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_4634.wav", "onoffCaption": "door slamming at 0.545-1.778 and duck quacking at 4.483-6.483", "frequencyCaption": "door slamming one times and duck quacking one times"} +{"filepath": "data/multi_event_train/syn_4645.wav", "onoffCaption": "door slamming at 0.033-1.324, 2.029-3.32, 4.632-5.923", "frequencyCaption": "door slamming three times"} +{"filepath": "data/multi_event_train/syn_4829.wav", "onoffCaption": "car horn honking at 0.514-3.427, 4.395-7.321, 7.878-10.0", "frequencyCaption": "car horn honking three times"} +{"filepath": "data/multi_event_train/syn_4858.wav", "onoffCaption": "train horn at 0.62-4.688, 5.25-9.318", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_4899.wav", "onoffCaption": "explosion at 2.455-7.455", "frequencyCaption": "explosion one times"} +{"filepath": "data/multi_event_train/syn_4932.wav", "onoffCaption": "burping belching at 0.383-3.927, 5.815-9.359", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_4943.wav", "onoffCaption": "explosion at 0.369-3.363, 4.428-7.086 and cow mooing at 1.762-5.06", "frequencyCaption": "explosion two times and cow mooing one times"} +{"filepath": "data/multi_event_train/syn_4967.wav", "onoffCaption": "door knocking at 1.44-5.59, 6.272-8.575 and cat meowing at 2.072-3.285, 3.8-5.013", "frequencyCaption": "door knocking two times and cat meowing two times"} +{"filepath": "data/multi_event_train/syn_4982.wav", "onoffCaption": "door knocking at 0.035-2.535, 3.47-5.97, 6.806-9.306 and door slamming at 0.149-0.83, 1.334-2.185", "frequencyCaption": "door knocking three times and door slamming two times"} +{"filepath": "data/multi_event_train/syn_2039.wav", "onoffCaption": "explosion at 3.189-7.029", "frequencyCaption": "explosion one times"} +{"filepath": "data/multi_event_train/syn_2122.wav", "onoffCaption": "spraying at 2.788-3.639, 4.382-5.233", "frequencyCaption": "spraying two times"} +{"filepath": "data/multi_event_train/syn_2177.wav", "onoffCaption": "cat meowing at 0.579-2.715 and door knocking at 1.119-4.672, 5.375-8.928", "frequencyCaption": "cat meowing one times and door knocking two times"} +{"filepath": "data/multi_event_train/syn_2192.wav", "onoffCaption": "whistling at 2.771-10.0", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_2304.wav", "onoffCaption": "cat meowing at 0.014-1.55, 2.249-3.785, 5.054-6.59 and door knocking at 0.12-2.341, 4.211-6.432", "frequencyCaption": "cat meowing three times and door knocking two times"} +{"filepath": "data/multi_event_train/syn_2351.wav", "onoffCaption": "train horn at 0.696-5.136 and door knocking at 6.603-9.978", "frequencyCaption": "train horn one times and door knocking one times"} +{"filepath": "data/multi_event_train/syn_2521.wav", "onoffCaption": "door knocking at 0.54-6.6, 7.245-9.422", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_2574.wav", "onoffCaption": "train horn at 0.214-5.072, 7.224-10.0", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_2591.wav", "onoffCaption": "car horn honking at 0.359-3.145", "frequencyCaption": "car horn honking one times"} +{"filepath": "data/multi_event_train/syn_2649.wav", "onoffCaption": "car horn honking at 0.111-4.511 and thump thud at 2.579-5.626 and door slamming at 2.596-3.596, 4.32-5.3, 6.247-7.622", "frequencyCaption": "car horn honking one times and thump thud one times and door slamming three times"} +{"filepath": "data/multi_event_train/syn_2707.wav", "onoffCaption": "whistling at 1.983-6.467 and gunshot at 3.437-5.437, 5.946-7.946", "frequencyCaption": "whistling one times and gunshot two times"} +{"filepath": "data/multi_event_train/syn_2752.wav", "onoffCaption": "burping belching at 0.679-6.28", "frequencyCaption": "burping belching one times"} +{"filepath": "data/multi_event_train/syn_2870.wav", "onoffCaption": "door knocking at 2.203-5.259, 7.501-9.947", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_2895.wav", "onoffCaption": "cow mooing at 0.668-3.65, 4.571-7.553 and dog barking at 2.804-4.804, 6.928-9.366", "frequencyCaption": "cow mooing two times and dog barking two times"} +{"filepath": "data/multi_event_train/syn_4134.wav", "onoffCaption": "tapping clicking clanking at 2.236-5.676", "frequencyCaption": "tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_4184.wav", "onoffCaption": "gunshot at 2.648-4.648 and woman laughing at 7.575-10.0", "frequencyCaption": "gunshot one times and woman laughing one times"} +{"filepath": "data/multi_event_train/syn_4209.wav", "onoffCaption": "door slamming at 2.279-3.532 and thump thud at 6.078-8.306 and door knocking at 7.164-9.352", "frequencyCaption": "door slamming one times and thump thud one times and door knocking one times"} +{"filepath": "data/multi_event_train/syn_4312.wav", "onoffCaption": "tapping clicking clanking at 0.624-4.064, 4.836-8.276", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_4347.wav", "onoffCaption": "cat meowing at 1.85-4.754, 6.696-7.708", "frequencyCaption": "cat meowing two times"} +{"filepath": "data/multi_event_train/syn_4479.wav", "onoffCaption": "duck quacking at 3.224-5.224 and whistling at 7.322-10.0", "frequencyCaption": "duck quacking one times and whistling one times"} +{"filepath": "data/multi_event_train/syn_4562.wav", "onoffCaption": "sheep goat bleating at 0.675-2.675, 5.049-7.049 and spraying at 2.84-4.968", "frequencyCaption": "sheep goat bleating two times and spraying one times"} +{"filepath": "data/multi_event_train/syn_4587.wav", "onoffCaption": "door slamming at 1.539-4.256, 5.225-7.942 and gunshot at 5.959-7.959", "frequencyCaption": "door slamming two times and gunshot one times"} +{"filepath": "data/multi_event_train/syn_4711.wav", "onoffCaption": "cow mooing at 2.135-5.145, 6.111-8.674 and explosion at 3.265-8.265", "frequencyCaption": "cow mooing two times and explosion one times"} +{"filepath": "data/multi_event_train/syn_4744.wav", "onoffCaption": "explosion at 2.169-4.43, 5.09-7.351", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_4833.wav", "onoffCaption": "duck quacking at 1.645-3.645, 5.743-7.743", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_4866.wav", "onoffCaption": "sneeze at 0.481-5.01, 6.331-8.038", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_4883.wav", "onoffCaption": "door slamming at 2.32-4.233, 5.384-7.384", "frequencyCaption": "door slamming two times"} +{"filepath": "data/multi_event_train/syn_4928.wav", "onoffCaption": "thump thud at 0.16-2.622, 3.878-6.34", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_4998.wav", "onoffCaption": "train horn at 0.32-2.787, 4.37-7.25 and sheep goat bleating at 3.461-6.781, 7.415-9.415", "frequencyCaption": "train horn two times and sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_2125.wav", "onoffCaption": "burping belching at 3.079-6.079, 7.222-9.897", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_2170.wav", "onoffCaption": "spraying at 0.023-2.607, 3.651-4.255, 4.818-6.065 and duck quacking at 0.269-2.269", "frequencyCaption": "spraying three times and duck quacking one times"} +{"filepath": "data/multi_event_train/syn_2195.wav", "onoffCaption": "burping belching at 0.087-3.346, 4.593-6.616 and explosion at 4.732-7.732", "frequencyCaption": "burping belching two times and explosion one times"} +{"filepath": "data/multi_event_train/syn_2218.wav", "onoffCaption": "tapping clicking clanking at 2.113-5.553, 6.579-9.274", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_2303.wav", "onoffCaption": "burping belching at 2.154-4.975, 5.67-8.491", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_2356.wav", "onoffCaption": "cow mooing at 2.672-5.641, 7.662-10.0", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_2526.wav", "onoffCaption": "cow mooing at 0.244-3.226, 4.079-6.625, 7.594-10.0 and car horn honking at 3.697-6.872", "frequencyCaption": "cow mooing three times and car horn honking one times"} +{"filepath": "data/multi_event_train/syn_2700.wav", "onoffCaption": "dog barking at 1.524-3.524, 4.817-6.817", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_train/syn_2755.wav", "onoffCaption": "car horn honking at 1.663-5.317, 6.053-8.202", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_2822.wav", "onoffCaption": "whistling at 2.19-5.065, 6.03-8.905", "frequencyCaption": "whistling two times"} +{"filepath": "data/multi_event_train/syn_2892.wav", "onoffCaption": "car horn honking at 0.074-3.728 and door slamming at 6.804-8.328", "frequencyCaption": "car horn honking one times and door slamming one times"} +{"filepath": "data/multi_event_train/syn_2939.wav", "onoffCaption": "cow mooing at 1.111-4.093, 4.704-6.768, 7.68-10.0", "frequencyCaption": "cow mooing three times"} +{"filepath": "data/multi_event_train/syn_2989.wav", "onoffCaption": "cat meowing at 2.02-3.575, 4.742-6.286 and door knocking at 2.836-6.452", "frequencyCaption": "cat meowing two times and door knocking one times"} +{"filepath": "data/multi_event_train/syn_4028.wav", "onoffCaption": "whistling at 2.589-7.073", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_4098.wav", "onoffCaption": "sheep goat bleating at 1.437-3.437, 3.974-6.969 and door knocking at 1.638-6.638, 7.29-10.0", "frequencyCaption": "sheep goat bleating two times and door knocking two times"} +{"filepath": "data/multi_event_train/syn_4133.wav", "onoffCaption": "explosion at 0.035-2.209, 4.651-7.242 and door slamming at 0.721-1.86, 2.689-4.454, 5.508-7.728", "frequencyCaption": "explosion two times and door slamming three times"} +{"filepath": "data/multi_event_train/syn_4161.wav", "onoffCaption": "sneeze at 2.452-4.928, 5.8-8.276", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_4166.wav", "onoffCaption": "cat meowing at 1.548-3.438, 4.507-6.397, 7.079-8.969", "frequencyCaption": "cat meowing three times"} +{"filepath": "data/multi_event_train/syn_4183.wav", "onoffCaption": "burping belching at 1.619-5.125, 6.442-9.948", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_4340.wav", "onoffCaption": "cow mooing at 0.835-3.817, 5.853-8.84", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_4530.wav", "onoffCaption": "whistling at 1.37-10.0 and gunshot at 3.876-5.876", "frequencyCaption": "whistling one times and gunshot one times"} +{"filepath": "data/multi_event_train/syn_4537.wav", "onoffCaption": "burping belching at 1.095-4.639, 5.725-8.07 and spraying at 1.935-2.802, 3.928-4.555", "frequencyCaption": "burping belching two times and spraying two times"} +{"filepath": "data/multi_event_train/syn_4565.wav", "onoffCaption": "burping belching at 1.951-4.712, 5.866-8.138", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_4580.wav", "onoffCaption": "thump thud at 0.07-2.841, 5.122-7.893 and sheep goat bleating at 1.296-3.296", "frequencyCaption": "thump thud two times and sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_4716.wav", "onoffCaption": "burping belching at 3.69-7.25", "frequencyCaption": "burping belching one times"} +{"filepath": "data/multi_event_train/syn_4834.wav", "onoffCaption": "door slamming at 2.952-5.952, 7.214-10.0", "frequencyCaption": "door slamming two times"} +{"filepath": "data/multi_event_train/syn_4861.wav", "onoffCaption": "duck quacking at 0.25-2.25 and thump thud at 0.885-3.224 and door knocking at 6.572-10.0", "frequencyCaption": "duck quacking one times and thump thud one times and door knocking one times"} +{"filepath": "data/multi_event_train/syn_4884.wav", "onoffCaption": "cat meowing at 0.347-1.357 and spraying at 0.846-2.021", "frequencyCaption": "cat meowing one times and spraying one times"} +{"filepath": "data/multi_event_train/syn_3.wav", "onoffCaption": "duck quacking at 0.203-2.203, 2.739-4.739 and train horn at 1.131-5.461, 6.713-9.193", "frequencyCaption": "duck quacking two times and train horn two times"} +{"filepath": "data/multi_event_train/syn_2000.wav", "onoffCaption": "burping belching at 0.839-3.839, 5.914-8.914", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_2024.wav", "onoffCaption": "thump thud at 0.692-5.142", "frequencyCaption": "thump thud one times"} +{"filepath": "data/multi_event_train/syn_2071.wav", "onoffCaption": "woman laughing at 2.728-5.147, 6.23-8.649", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_2094.wav", "onoffCaption": "burping belching at 0.22-2.843, 3.448-5.813, 7.816-10.0", "frequencyCaption": "burping belching three times"} +{"filepath": "data/multi_event_train/syn_2202.wav", "onoffCaption": "tapping clicking clanking at 0.947-4.387, 5.953-9.393 and woman laughing at 1.93-4.167", "frequencyCaption": "tapping clicking clanking two times and woman laughing one times"} +{"filepath": "data/multi_event_train/syn_2257.wav", "onoffCaption": "door knocking at 0.06-3.676 and sneeze at 0.255-3.365 and door slamming at 2.1-3.119", "frequencyCaption": "door knocking one times and sneeze one times and door slamming one times"} +{"filepath": "data/multi_event_train/syn_2296.wav", "onoffCaption": "gunshot at 0.013-2.013, 3.206-5.206, 6.18-8.18", "frequencyCaption": "gunshot three times"} +{"filepath": "data/multi_event_train/syn_2319.wav", "onoffCaption": "thump thud at 0.838-3.3 and woman laughing at 2.775-5.57, 6.239-9.034", "frequencyCaption": "thump thud one times and woman laughing two times"} +{"filepath": "data/multi_event_train/syn_2427.wav", "onoffCaption": "sneeze at 2.616-3.79, 4.47-6.177 and cat meowing at 7.473-9.017", "frequencyCaption": "sneeze two times and cat meowing one times"} +{"filepath": "data/multi_event_train/syn_2456.wav", "onoffCaption": "door slamming at 0.534-3.017, 4.185-6.668, 7.849-10.0", "frequencyCaption": "door slamming three times"} +{"filepath": "data/multi_event_train/syn_2468.wav", "onoffCaption": "dog barking at 0.272-2.272, 4.17-6.17, 7.135-9.135", "frequencyCaption": "dog barking three times"} +{"filepath": "data/multi_event_train/syn_2472.wav", "onoffCaption": "woman laughing at 2.006-8.74", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/multi_event_train/syn_2497.wav", "onoffCaption": "sneeze at 1.87-4.109, 4.688-6.927", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_2569.wav", "onoffCaption": "car horn honking at 0.611-5.518, 7.265-10.0 and sheep goat bleating at 0.81-2.81, 4.698-6.698", "frequencyCaption": "car horn honking two times and sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_2573.wav", "onoffCaption": "dog barking at 0.815-2.815, 3.608-5.608 and explosion at 6.902-9.493", "frequencyCaption": "dog barking two times and explosion one times"} +{"filepath": "data/multi_event_train/syn_2596.wav", "onoffCaption": "duck quacking at 0.302-2.302 and door slamming at 6.473-9.254", "frequencyCaption": "duck quacking one times and door slamming one times"} +{"filepath": "data/multi_event_train/syn_2601.wav", "onoffCaption": "burping belching at 2.939-5.037 and spraying at 4.187-4.837, 5.94-7.949, 9.176-9.78", "frequencyCaption": "burping belching one times and spraying three times"} +{"filepath": "data/multi_event_train/syn_2654.wav", "onoffCaption": "cat meowing at 0.362-1.778, 2.393-3.809, 4.914-6.33 and car horn honking at 3.445-6.231", "frequencyCaption": "cat meowing three times and car horn honking one times"} +{"filepath": "data/multi_event_train/syn_2838.wav", "onoffCaption": "door slamming at 1.159-3.522 and dog barking at 2.177-4.177, 4.702-6.702, 7.836-9.836", "frequencyCaption": "door slamming one times and dog barking three times"} +{"filepath": "data/multi_event_train/syn_2849.wav", "onoffCaption": "door knocking at 2.197-4.965, 5.543-8.39", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_2877.wav", "onoffCaption": "woman laughing at 1.891-4.457, 5.083-7.451 and dog barking at 4.627-7.065, 7.622-10.0", "frequencyCaption": "woman laughing two times and dog barking two times"} +{"filepath": "data/multi_event_train/syn_2888.wav", "onoffCaption": "gunshot at 3.402-5.402, 6.683-8.683", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_2923.wav", "onoffCaption": "spraying at 2.881-3.785", "frequencyCaption": "spraying one times"} +{"filepath": "data/multi_event_train/syn_2952.wav", "onoffCaption": "duck quacking at 1.431-3.431, 5.499-7.499", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_2976.wav", "onoffCaption": "dog barking at 0.106-2.106, 3.066-5.066, 7.405-9.405", "frequencyCaption": "dog barking three times"} +{"filepath": "data/multi_event_train/syn_2993.wav", "onoffCaption": "spraying at 0.875-2.637, 3.389-4.389, 5.063-6.004 and door slamming at 2.355-4.268", "frequencyCaption": "spraying three times and door slamming one times"} +{"filepath": "data/multi_event_train/syn_4032.wav", "onoffCaption": "car horn honking at 0.015-2.515, 3.464-5.964", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_4067.wav", "onoffCaption": "cow mooing at 1.537-5.966 and burping belching at 3.187-5.81", "frequencyCaption": "cow mooing one times and burping belching one times"} +{"filepath": "data/multi_event_train/syn_4082.wav", "onoffCaption": "door knocking at 0.068-4.443 and duck quacking at 0.264-2.264 and dog barking at 2.234-4.234, 4.989-6.989", "frequencyCaption": "door knocking one times and duck quacking one times and dog barking two times"} +{"filepath": "data/multi_event_train/syn_4129.wav", "onoffCaption": "tapping clicking clanking at 1.318-4.758 and sheep goat bleating at 3.041-5.041", "frequencyCaption": "tapping clicking clanking one times and sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_4199.wav", "onoffCaption": "dog barking at 0.158-2.158, 3.19-5.19, 7.467-9.467", "frequencyCaption": "dog barking three times"} +{"filepath": "data/multi_event_train/syn_4214.wav", "onoffCaption": "train horn at 1.922-5.456", "frequencyCaption": "train horn one times"} +{"filepath": "data/multi_event_train/syn_4230.wav", "onoffCaption": "train horn at 0.323-3.563, 5.508-8.471", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_4241.wav", "onoffCaption": "whistling at 0.723-3.698 and tapping clicking clanking at 5.638-9.078", "frequencyCaption": "whistling one times and tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_4315.wav", "onoffCaption": "burping belching at 1.063-7.743 and cat meowing at 6.9-7.921, 8.476-9.52", "frequencyCaption": "burping belching one times and cat meowing two times"} +{"filepath": "data/multi_event_train/syn_4415.wav", "onoffCaption": "car horn honking at 0.981-5.381, 6.324-10.0", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_4431.wav", "onoffCaption": "woman laughing at 0.175-6.909, 7.845-9.926", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_4464.wav", "onoffCaption": "door slamming at 2.516-3.494, 4.587-5.527, 6.595-7.97", "frequencyCaption": "door slamming three times"} +{"filepath": "data/multi_event_train/syn_4481.wav", "onoffCaption": "door knocking at 2.762-5.492, 6.815-8.975", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_4617.wav", "onoffCaption": "gunshot at 1.616-3.616, 4.192-6.192, 7.032-9.032", "frequencyCaption": "gunshot three times"} +{"filepath": "data/multi_event_train/syn_4642.wav", "onoffCaption": "explosion at 3.239-8.105", "frequencyCaption": "explosion one times"} +{"filepath": "data/multi_event_train/syn_4658.wav", "onoffCaption": "train horn at 1.537-10.0", "frequencyCaption": "train horn one times"} +{"filepath": "data/multi_event_train/syn_4666.wav", "onoffCaption": "cow mooing at 1.708-5.006, 6.168-9.466", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_4683.wav", "onoffCaption": "tapping clicking clanking at 3.129-6.569", "frequencyCaption": "tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_4743.wav", "onoffCaption": "gunshot at 2.914-4.914, 7.35-9.35", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_4759.wav", "onoffCaption": "sneeze at 3.36-5.473, 6.677-8.79", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_4798.wav", "onoffCaption": "tapping clicking clanking at 0.768-4.208, 5.52-8.96", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_4935.wav", "onoffCaption": "cow mooing at 1.516-4.814, 7.173-10.0", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_4960.wav", "onoffCaption": "spraying at 1.552-2.799, 4.582-7.042 and train horn at 2.416-5.896", "frequencyCaption": "spraying two times and train horn one times"} +{"filepath": "data/multi_event_train/syn_4985.wav", "onoffCaption": "train horn at 0.388-4.388 and sneeze at 1.181-2.458, 4.879-6.113", "frequencyCaption": "train horn one times and sneeze two times"} +{"filepath": "data/multi_event_train/syn_2055.wav", "onoffCaption": "door slamming at 0.237-1.215, 1.804-3.345, 4.557-7.436 and duck quacking at 0.321-2.321", "frequencyCaption": "door slamming three times and duck quacking one times"} +{"filepath": "data/multi_event_train/syn_2101.wav", "onoffCaption": "gunshot at 1.697-3.827", "frequencyCaption": "gunshot one times"} +{"filepath": "data/multi_event_train/syn_2154.wav", "onoffCaption": "dog barking at 0.723-2.723, 3.316-5.316, 6.483-8.483", "frequencyCaption": "dog barking three times"} +{"filepath": "data/multi_event_train/syn_2226.wav", "onoffCaption": "door slamming at 0.091-2.091, 2.603-3.454, 4.045-4.883", "frequencyCaption": "door slamming three times"} +{"filepath": "data/multi_event_train/syn_2269.wav", "onoffCaption": "door slamming at 1.059-2.972 and gunshot at 2.674-4.674, 5.625-7.625", "frequencyCaption": "door slamming one times and gunshot two times"} +{"filepath": "data/multi_event_train/syn_2273.wav", "onoffCaption": "woman laughing at 1.117-3.354, 3.88-6.117", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_2327.wav", "onoffCaption": "door slamming at 1.389-4.363, 6.253-9.227", "frequencyCaption": "door slamming two times"} +{"filepath": "data/multi_event_train/syn_2368.wav", "onoffCaption": "car horn honking at 2.647-7.159", "frequencyCaption": "car horn honking one times"} +{"filepath": "data/multi_event_train/syn_2372.wav", "onoffCaption": "whistling at 0.111-2.986", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_2397.wav", "onoffCaption": "tapping clicking clanking at 0.796-4.236", "frequencyCaption": "tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_2403.wav", "onoffCaption": "explosion at 2.342-5.469 and gunshot at 7.933-10.0", "frequencyCaption": "explosion one times and gunshot one times"} +{"filepath": "data/multi_event_train/syn_2419.wav", "onoffCaption": "gunshot at 0.477-2.477, 3.149-5.149 and door knocking at 1.493-4.613", "frequencyCaption": "gunshot two times and door knocking one times"} +{"filepath": "data/multi_event_train/syn_2502.wav", "onoffCaption": "gunshot at 0.827-2.827, 4.117-6.117", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_2518.wav", "onoffCaption": "cow mooing at 0.012-2.981", "frequencyCaption": "cow mooing one times"} +{"filepath": "data/multi_event_train/syn_2557.wav", "onoffCaption": "sneeze at 3.357-5.818, 7.048-8.576", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_2625.wav", "onoffCaption": "thump thud at 1.569-6.019, 6.528-10.0", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_2670.wav", "onoffCaption": "cow mooing at 1.159-4.128, 5.227-8.196 and explosion at 2.413-7.413", "frequencyCaption": "cow mooing two times and explosion one times"} +{"filepath": "data/multi_event_train/syn_2695.wav", "onoffCaption": "train horn at 0.6-4.781 and spraying at 0.721-1.453", "frequencyCaption": "train horn one times and spraying one times"} +{"filepath": "data/multi_event_train/syn_2724.wav", "onoffCaption": "woman laughing at 0.35-7.362 and tapping clicking clanking at 3.906-7.346", "frequencyCaption": "woman laughing one times and tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_2771.wav", "onoffCaption": "door slamming at 0.63-3.604, 4.655-7.629 and cat meowing at 2.295-4.235, 5.62-6.629 and duck quacking at 6.304-8.304", "frequencyCaption": "door slamming two times and cat meowing two times and duck quacking one times"} +{"filepath": "data/multi_event_train/syn_2794.wav", "onoffCaption": "gunshot at 1.926-4.199, 5.42-7.42", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_2806.wav", "onoffCaption": "burping belching at 0.386-2.589, 3.628-5.831, 6.503-8.706 and door knocking at 2.198-7.031", "frequencyCaption": "burping belching three times and door knocking one times"} +{"filepath": "data/multi_event_train/syn_2853.wav", "onoffCaption": "thump thud at 1.981-4.443, 6.001-8.463", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_2907.wav", "onoffCaption": "cow mooing at 0.749-3.759 and sheep goat bleating at 2.754-7.474 and dog barking at 3.083-5.083, 6.184-8.184", "frequencyCaption": "cow mooing one times and sheep goat bleating one times and dog barking two times"} +{"filepath": "data/multi_event_train/syn_2948.wav", "onoffCaption": "cat meowing at 0.559-2.291 and car horn honking at 6.429-10.0", "frequencyCaption": "cat meowing one times and car horn honking one times"} +{"filepath": "data/multi_event_train/syn_4016.wav", "onoffCaption": "sneeze at 2.058-3.289, 5.747-6.978 and door knocking at 4.891-7.194", "frequencyCaption": "sneeze two times and door knocking one times"} +{"filepath": "data/multi_event_train/syn_4043.wav", "onoffCaption": "door knocking at 0.075-4.225 and train horn at 6.32-10.0", "frequencyCaption": "door knocking one times and train horn one times"} +{"filepath": "data/multi_event_train/syn_4059.wav", "onoffCaption": "gunshot at 0.745-2.745, 4.879-6.879", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_4117.wav", "onoffCaption": "cat meowing at 0.091-1.112, 2.04-4.57, 5.573-7.533", "frequencyCaption": "cat meowing three times"} +{"filepath": "data/multi_event_train/syn_4142.wav", "onoffCaption": "dog barking at 0.414-2.414, 3.557-5.557, 6.334-8.334 and train horn at 1.087-3.242", "frequencyCaption": "dog barking three times and train horn one times"} +{"filepath": "data/multi_event_train/syn_4158.wav", "onoffCaption": "burping belching at 0.49-4.49, 5.449-9.449", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_4265.wav", "onoffCaption": "dog barking at 0.175-5.812, 6.78-10.0", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_train/syn_4280.wav", "onoffCaption": "dog barking at 2.669-4.669 and sheep goat bleating at 7.512-9.512", "frequencyCaption": "dog barking one times and sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_4331.wav", "onoffCaption": "cow mooing at 1.208-6.188 and duck quacking at 4.284-6.284", "frequencyCaption": "cow mooing one times and duck quacking one times"} +{"filepath": "data/multi_event_train/syn_4364.wav", "onoffCaption": "burping belching at 0.19-3.19, 5.308-8.308 and dog barking at 0.267-2.267 and gunshot at 1.186-3.186, 5.249-7.249", "frequencyCaption": "burping belching two times and dog barking one times and gunshot two times"} +{"filepath": "data/multi_event_train/syn_4381.wav", "onoffCaption": "spraying at 0.23-1.925 and car horn honking at 4.134-8.646", "frequencyCaption": "spraying one times and car horn honking one times"} +{"filepath": "data/multi_event_train/syn_4440.wav", "onoffCaption": "cow mooing at 0.007-2.976, 5.371-7.46", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_4514.wav", "onoffCaption": "tapping clicking clanking at 1.267-4.707, 5.587-9.027", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_4541.wav", "onoffCaption": "dog barking at 2.832-4.832, 5.772-7.772", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_train/syn_4629.wav", "onoffCaption": "duck quacking at 0.526-2.526, 3.179-5.179", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_4633.wav", "onoffCaption": "duck quacking at 0.812-2.812, 3.631-5.631, 6.45-8.45 and cat meowing at 2.522-4.077 and burping belching at 2.965-6.965", "frequencyCaption": "duck quacking three times and cat meowing one times and burping belching one times"} +{"filepath": "data/multi_event_train/syn_4699.wav", "onoffCaption": "door knocking at 3.099-5.121, 5.729-7.751", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_4728.wav", "onoffCaption": "explosion at 0.713-5.634, 6.498-9.498", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_4732.wav", "onoffCaption": "tapping clicking clanking at 2.353-5.793, 6.59-9.076", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_4767.wav", "onoffCaption": "dog barking at 2.492-4.492, 6.811-8.811", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_train/syn_4782.wav", "onoffCaption": "whistling at 3.517-10.0", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_4810.wav", "onoffCaption": "cat meowing at 2.171-3.171", "frequencyCaption": "cat meowing one times"} +{"filepath": "data/multi_event_train/syn_4845.wav", "onoffCaption": "door knocking at 2.751-5.321, 6.377-8.947", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_4911.wav", "onoffCaption": "thump thud at 1.568-3.907, 4.45-6.789, 7.746-10.0", "frequencyCaption": "thump thud three times"} +{"filepath": "data/multi_event_train/syn_4944.wav", "onoffCaption": "woman laughing at 0.34-2.54, 4.896-7.096", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_2115.wav", "onoffCaption": "sheep goat bleating at 0.902-2.902, 4.596-6.596 and cow mooing at 1.699-4.709, 5.792-8.247", "frequencyCaption": "sheep goat bleating two times and cow mooing two times"} +{"filepath": "data/multi_event_train/syn_2140.wav", "onoffCaption": "burping belching at 0.728-3.728, 4.523-7.523 and car horn honking at 2.581-7.488", "frequencyCaption": "burping belching two times and car horn honking one times"} +{"filepath": "data/multi_event_train/syn_2228.wav", "onoffCaption": "whistling at 1.17-6.67", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_2298.wav", "onoffCaption": "car horn honking at 0.039-2.857, 3.365-6.183, 6.974-9.792", "frequencyCaption": "car horn honking three times"} +{"filepath": "data/multi_event_train/syn_2333.wav", "onoffCaption": "gunshot at 3.701-5.701, 6.615-8.615", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_2366.wav", "onoffCaption": "woman laughing at 2.103-4.328, 4.915-7.115", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_2383.wav", "onoffCaption": "door slamming at 3.631-6.06, 6.913-7.764", "frequencyCaption": "door slamming two times"} +{"filepath": "data/multi_event_train/syn_2458.wav", "onoffCaption": "duck quacking at 1.91-3.91, 5.691-7.691", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_2516.wav", "onoffCaption": "tapping clicking clanking at 2.089-5.529 and train horn at 2.833-7.014", "frequencyCaption": "tapping clicking clanking one times and train horn one times"} +{"filepath": "data/multi_event_train/syn_2543.wav", "onoffCaption": "cow mooing at 3.059-6.028, 7.252-10.0", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_2730.wav", "onoffCaption": "dog barking at 0.421-2.421, 4.285-6.285", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_train/syn_2765.wav", "onoffCaption": "whistling at 0.918-3.793, 4.747-7.622", "frequencyCaption": "whistling two times"} +{"filepath": "data/multi_event_train/syn_2780.wav", "onoffCaption": "sneeze at 3.002-4.166, 4.977-7.343", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_2812.wav", "onoffCaption": "train horn at 1.579-5.579 and sheep goat bleating at 3.344-5.344", "frequencyCaption": "train horn one times and sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_2847.wav", "onoffCaption": "dog barking at 0.439-2.439 and tapping clicking clanking at 5.168-8.608", "frequencyCaption": "dog barking one times and tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_2909.wav", "onoffCaption": "duck quacking at 0.518-2.518, 4.844-6.844", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_4018.wav", "onoffCaption": "spraying at 0.372-0.976, 2.675-3.302, 5.276-5.851 and car horn honking at 3.509-7.831", "frequencyCaption": "spraying three times and car horn honking one times"} +{"filepath": "data/multi_event_train/syn_4103.wav", "onoffCaption": "cat meowing at 2.286-5.316, 6.353-8.313 and woman laughing at 5.253-7.453 and sneeze at 5.34-6.666", "frequencyCaption": "cat meowing two times and woman laughing one times and sneeze one times"} +{"filepath": "data/multi_event_train/syn_4156.wav", "onoffCaption": "explosion at 0.661-3.661", "frequencyCaption": "explosion one times"} +{"filepath": "data/multi_event_train/syn_4325.wav", "onoffCaption": "gunshot at 0.211-2.211", "frequencyCaption": "gunshot one times"} +{"filepath": "data/multi_event_train/syn_4370.wav", "onoffCaption": "gunshot at 1.844-3.844, 5.097-7.097, 7.762-9.762", "frequencyCaption": "gunshot three times"} +{"filepath": "data/multi_event_train/syn_4395.wav", "onoffCaption": "dog barking at 0.207-2.607, 4.311-6.311", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_train/syn_4500.wav", "onoffCaption": "explosion at 0.364-5.23, 7.676-10.0 and burping belching at 1.243-5.579", "frequencyCaption": "explosion two times and burping belching one times"} +{"filepath": "data/multi_event_train/syn_4555.wav", "onoffCaption": "door knocking at 0.82-5.353, 6.393-8.795", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_4668.wav", "onoffCaption": "duck quacking at 1.603-3.603, 5.311-7.311", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_4726.wav", "onoffCaption": "thump thud at 2.21-5.877", "frequencyCaption": "thump thud one times"} +{"filepath": "data/multi_event_train/syn_4773.wav", "onoffCaption": "sheep goat bleating at 1.094-3.094, 4.244-6.244, 7.536-9.536", "frequencyCaption": "sheep goat bleating three times"} +{"filepath": "data/multi_event_train/syn_4796.wav", "onoffCaption": "sneeze at 0.242-1.568, 2.708-4.034, 4.92-6.246", "frequencyCaption": "sneeze three times"} +{"filepath": "data/multi_event_train/syn_4804.wav", "onoffCaption": "duck quacking at 0.366-2.366, 3.404-5.404 and woman laughing at 7.732-10.0", "frequencyCaption": "duck quacking two times and woman laughing one times"} +{"filepath": "data/multi_event_train/syn_4851.wav", "onoffCaption": "door slamming at 0.634-1.937, 3.148-4.451, 6.589-7.892", "frequencyCaption": "door slamming three times"} +{"filepath": "data/multi_event_train/syn_4905.wav", "onoffCaption": "burping belching at 1.445-7.389 and cow mooing at 4.409-7.707", "frequencyCaption": "burping belching one times and cow mooing one times"} +{"filepath": "data/multi_event_train/syn_2014.wav", "onoffCaption": "duck quacking at 0.1-2.1, 2.681-4.681, 5.3-7.3 and woman laughing at 0.878-3.103, 5.052-7.548", "frequencyCaption": "duck quacking three times and woman laughing two times"} +{"filepath": "data/multi_event_train/syn_2041.wav", "onoffCaption": "gunshot at 0.756-2.756, 4.93-6.93", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_2232.wav", "onoffCaption": "cow mooing at 0.046-3.015 and sneeze at 0.117-1.704", "frequencyCaption": "cow mooing one times and sneeze one times"} +{"filepath": "data/multi_event_train/syn_2267.wav", "onoffCaption": "thump thud at 0.246-4.621 and sheep goat bleating at 1.704-3.704, 5.086-8.086 and dog barking at 4.822-6.822", "frequencyCaption": "thump thud one times and sheep goat bleating two times and dog barking one times"} +{"filepath": "data/multi_event_train/syn_2282.wav", "onoffCaption": "spraying at 0.196-0.797", "frequencyCaption": "spraying one times"} +{"filepath": "data/multi_event_train/syn_2329.wav", "onoffCaption": "sneeze at 0.061-2.378 and gunshot at 0.877-2.877 and dog barking at 1.829-3.829", "frequencyCaption": "sneeze one times and gunshot one times and dog barking one times"} +{"filepath": "data/multi_event_train/syn_2399.wav", "onoffCaption": "cow mooing at 0.806-5.235", "frequencyCaption": "cow mooing one times"} +{"filepath": "data/multi_event_train/syn_2417.wav", "onoffCaption": "gunshot at 2.087-4.087, 4.956-6.956, 7.941-9.941", "frequencyCaption": "gunshot three times"} +{"filepath": "data/multi_event_train/syn_2442.wav", "onoffCaption": "door knocking at 3.887-6.289, 7.303-9.705", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_2559.wav", "onoffCaption": "car horn honking at 1.034-4.529 and dog barking at 6.57-8.57", "frequencyCaption": "car horn honking one times and dog barking one times"} +{"filepath": "data/multi_event_train/syn_2631.wav", "onoffCaption": "burping belching at 0.394-3.596 and door slamming at 1.021-3.021 and thump thud at 5.105-9.555", "frequencyCaption": "burping belching one times and door slamming one times and thump thud one times"} +{"filepath": "data/multi_event_train/syn_2664.wav", "onoffCaption": "car horn honking at 0.045-2.831 and thump thud at 6.644-9.415", "frequencyCaption": "car horn honking one times and thump thud one times"} +{"filepath": "data/multi_event_train/syn_2681.wav", "onoffCaption": "woman laughing at 0.0-3.085 and door knocking at 5.304-8.804", "frequencyCaption": "woman laughing one times and door knocking one times"} +{"filepath": "data/multi_event_train/syn_2808.wav", "onoffCaption": "cat meowing at 0.026-1.026, 1.84-2.84 and woman laughing at 4.173-6.456", "frequencyCaption": "cat meowing two times and woman laughing one times"} +{"filepath": "data/multi_event_train/syn_2913.wav", "onoffCaption": "woman laughing at 3.473-6.268", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/multi_event_train/syn_2946.wav", "onoffCaption": "burping belching at 2.011-7.612", "frequencyCaption": "burping belching one times"} +{"filepath": "data/multi_event_train/syn_4002.wav", "onoffCaption": "gunshot at 0.13-2.13 and woman laughing at 1.245-3.445, 5.933-8.133", "frequencyCaption": "gunshot one times and woman laughing two times"} +{"filepath": "data/multi_event_train/syn_4057.wav", "onoffCaption": "door knocking at 1.551-3.854, 6.338-8.641", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_4119.wav", "onoffCaption": "gunshot at 4.016-6.016", "frequencyCaption": "gunshot one times"} +{"filepath": "data/multi_event_train/syn_4224.wav", "onoffCaption": "duck quacking at 0.733-2.733, 3.793-5.793, 6.393-8.393 and sheep goat bleating at 2.124-5.204 and cat meowing at 6.595-8.543", "frequencyCaption": "duck quacking three times and sheep goat bleating one times and cat meowing one times"} +{"filepath": "data/multi_event_train/syn_4271.wav", "onoffCaption": "cat meowing at 2.651-3.736, 4.47-6.63, 7.481-8.502", "frequencyCaption": "cat meowing three times"} +{"filepath": "data/multi_event_train/syn_4294.wav", "onoffCaption": "whistling at 1.904-4.779, 6.225-8.454", "frequencyCaption": "whistling two times"} +{"filepath": "data/multi_event_train/syn_4401.wav", "onoffCaption": "cow mooing at 0.424-3.406, 5.35-7.852 and car horn honking at 0.85-5.099, 5.722-8.254", "frequencyCaption": "cow mooing two times and car horn honking two times"} +{"filepath": "data/multi_event_train/syn_4454.wav", "onoffCaption": "door knocking at 2.973-5.436 and spraying at 7.871-8.621", "frequencyCaption": "door knocking one times and spraying one times"} +{"filepath": "data/multi_event_train/syn_4627.wav", "onoffCaption": "cat meowing at 0.803-1.814, 2.735-4.045 and woman laughing at 3.119-6.406", "frequencyCaption": "cat meowing two times and woman laughing one times"} +{"filepath": "data/multi_event_train/syn_4672.wav", "onoffCaption": "car horn honking at 0.057-3.552, 4.582-7.095", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_4697.wav", "onoffCaption": "gunshot at 0.129-2.129, 2.887-4.887, 5.731-8.232 and door knocking at 5.47-8.2", "frequencyCaption": "gunshot three times and door knocking one times"} +{"filepath": "data/multi_event_train/syn_4769.wav", "onoffCaption": "woman laughing at 2.871-5.29 and duck quacking at 3.644-5.644, 7.393-9.393", "frequencyCaption": "woman laughing one times and duck quacking two times"} +{"filepath": "data/multi_event_train/syn_4921.wav", "onoffCaption": "dog barking at 0.349-2.787, 4.269-6.269", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_train/syn_4950.wav", "onoffCaption": "cat meowing at 0.651-2.268, 3.577-4.588, 5.723-7.267 and duck quacking at 3.073-5.073", "frequencyCaption": "cat meowing three times and duck quacking one times"} +{"filepath": "data/multi_event_train/syn_2030.wav", "onoffCaption": "thump thud at 3.336-7.786", "frequencyCaption": "thump thud one times"} +{"filepath": "data/multi_event_train/syn_2065.wav", "onoffCaption": "sneeze at 0.087-1.375, 2.994-5.307, 6.757-8.702", "frequencyCaption": "sneeze three times"} +{"filepath": "data/multi_event_train/syn_2080.wav", "onoffCaption": "dog barking at 2.86-4.86, 6.259-8.259", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_train/syn_2164.wav", "onoffCaption": "duck quacking at 1.029-3.029, 4.586-6.586, 7.745-9.745", "frequencyCaption": "duck quacking three times"} +{"filepath": "data/multi_event_train/syn_2216.wav", "onoffCaption": "car horn honking at 1.183-4.77 and cat meowing at 3.31-4.927, 6.66-8.277", "frequencyCaption": "car horn honking one times and cat meowing two times"} +{"filepath": "data/multi_event_train/syn_2243.wav", "onoffCaption": "sneeze at 0.224-1.327 and duck quacking at 0.322-2.322", "frequencyCaption": "sneeze one times and duck quacking one times"} +{"filepath": "data/multi_event_train/syn_2358.wav", "onoffCaption": "burping belching at 2.599-5.801, 7.699-10.0", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_2433.wav", "onoffCaption": "gunshot at 0.442-2.442", "frequencyCaption": "gunshot one times"} +{"filepath": "data/multi_event_train/syn_2466.wav", "onoffCaption": "whistling at 1.816-6.3", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_2483.wav", "onoffCaption": "dog barking at 1.83-3.83, 5.526-7.526", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_train/syn_2528.wav", "onoffCaption": "explosion at 1.045-3.798, 4.36-6.68", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_2598.wav", "onoffCaption": "burping belching at 2.37-6.37, 7.357-10.0", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_2615.wav", "onoffCaption": "duck quacking at 3.183-5.183", "frequencyCaption": "duck quacking one times"} +{"filepath": "data/multi_event_train/syn_2640.wav", "onoffCaption": "dog barking at 0.901-2.901, 5.102-7.102", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_train/syn_2879.wav", "onoffCaption": "spraying at 0.77-3.354, 5.572-6.353 and woman laughing at 3.378-6.473, 7.664-10.0 and sheep goat bleating at 4.777-6.777", "frequencyCaption": "spraying two times and woman laughing two times and sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_2937.wav", "onoffCaption": "door knocking at 0.913-3.98, 5.526-7.766", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_2962.wav", "onoffCaption": "burping belching at 0.503-3.503, 5.097-8.097 and door knocking at 7.1-9.502", "frequencyCaption": "burping belching two times and door knocking one times"} +{"filepath": "data/multi_event_train/syn_2987.wav", "onoffCaption": "dog barking at 0.633-2.633, 4.02-6.02, 7.631-9.742", "frequencyCaption": "dog barking three times"} +{"filepath": "data/multi_event_train/syn_4026.wav", "onoffCaption": "door slamming at 0.769-1.269 and dog barking at 4.994-6.994", "frequencyCaption": "door slamming one times and dog barking one times"} +{"filepath": "data/multi_event_train/syn_4073.wav", "onoffCaption": "duck quacking at 0.686-2.686, 4.676-6.676", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_4096.wav", "onoffCaption": "tapping clicking clanking at 0.677-4.117", "frequencyCaption": "tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_4168.wav", "onoffCaption": "door knocking at 2.99-5.342 and tapping clicking clanking at 7.271-10.0", "frequencyCaption": "door knocking one times and tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_4200.wav", "onoffCaption": "whistling at 2.405-5.38 and car horn honking at 3.25-7.65", "frequencyCaption": "whistling one times and car horn honking one times"} +{"filepath": "data/multi_event_train/syn_4255.wav", "onoffCaption": "door slamming at 2.703-3.82, 4.869-6.16, 6.968-8.509", "frequencyCaption": "door slamming three times"} +{"filepath": "data/multi_event_train/syn_4425.wav", "onoffCaption": "whistling at 1.576-7.683", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_4470.wav", "onoffCaption": "door slamming at 0.129-0.81, 1.333-3.559, 4.597-5.097 and door knocking at 7.313-10.0", "frequencyCaption": "door slamming three times and door knocking one times"} +{"filepath": "data/multi_event_train/syn_4495.wav", "onoffCaption": "sneeze at 3.101-5.186, 7.527-9.234", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_4603.wav", "onoffCaption": "cow mooing at 1.547-4.557, 5.816-8.826 and cat meowing at 2.421-4.415", "frequencyCaption": "cow mooing two times and cat meowing one times"} +{"filepath": "data/multi_event_train/syn_4656.wav", "onoffCaption": "burping belching at 1.681-4.681, 5.243-8.243", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_4718.wav", "onoffCaption": "train horn at 2.361-4.801 and sheep goat bleating at 7.701-9.701", "frequencyCaption": "train horn one times and sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_4974.wav", "onoffCaption": "cow mooing at 0.276-3.574 and burping belching at 0.774-3.774 and thump thud at 1.698-4.16", "frequencyCaption": "cow mooing one times and burping belching one times and thump thud one times"} +{"filepath": "data/multi_event_train/syn_4991.wav", "onoffCaption": "explosion at 3.281-8.281", "frequencyCaption": "explosion one times"} +{"filepath": "data/multi_event_train/syn_2131.wav", "onoffCaption": "spraying at 2.591-3.113 and duck quacking at 4.191-6.191 and cow mooing at 4.489-7.499", "frequencyCaption": "spraying one times and duck quacking one times and cow mooing one times"} +{"filepath": "data/multi_event_train/syn_2181.wav", "onoffCaption": "door knocking at 2.812-7.345, 7.981-10.0", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_2259.wav", "onoffCaption": "dog barking at 0.445-2.445, 3.999-5.999 and gunshot at 4.324-6.324", "frequencyCaption": "dog barking two times and gunshot one times"} +{"filepath": "data/multi_event_train/syn_2317.wav", "onoffCaption": "tapping clicking clanking at 0.145-3.585", "frequencyCaption": "tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_2342.wav", "onoffCaption": "burping belching at 1.788-6.124, 7.024-10.0", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_2429.wav", "onoffCaption": "duck quacking at 2.986-4.986 and door slamming at 6.513-9.23", "frequencyCaption": "duck quacking one times and door slamming one times"} +{"filepath": "data/multi_event_train/syn_2499.wav", "onoffCaption": "sheep goat bleating at 0.024-3.32 and dog barking at 1.92-3.92", "frequencyCaption": "sheep goat bleating one times and dog barking one times"} +{"filepath": "data/multi_event_train/syn_2532.wav", "onoffCaption": "explosion at 2.04-4.631, 7.094-10.0", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_2567.wav", "onoffCaption": "door slamming at 0.156-2.074", "frequencyCaption": "door slamming one times"} +{"filepath": "data/multi_event_train/syn_2582.wav", "onoffCaption": "car horn honking at 3.286-7.535", "frequencyCaption": "car horn honking one times"} +{"filepath": "data/multi_event_train/syn_2714.wav", "onoffCaption": "sneeze at 2.176-4.121, 6.135-8.08", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_2741.wav", "onoffCaption": "sneeze at 0.258-1.422, 3.657-4.821 and woman laughing at 7.507-10.0", "frequencyCaption": "sneeze two times and woman laughing one times"} +{"filepath": "data/multi_event_train/syn_2836.wav", "onoffCaption": "thump thud at 2.554-5.054", "frequencyCaption": "thump thud one times"} +{"filepath": "data/multi_event_train/syn_2863.wav", "onoffCaption": "duck quacking at 0.365-2.365 and car horn honking at 4.688-8.183 and cat meowing at 5.873-7.148", "frequencyCaption": "duck quacking one times and car horn honking one times and cat meowing one times"} +{"filepath": "data/multi_event_train/syn_2886.wav", "onoffCaption": "burping belching at 0.982-4.542", "frequencyCaption": "burping belching one times"} +{"filepath": "data/multi_event_train/syn_2978.wav", "onoffCaption": "cat meowing at 0.357-1.901, 4.148-5.692, 6.877-8.421", "frequencyCaption": "cat meowing three times"} +{"filepath": "data/multi_event_train/syn_4069.wav", "onoffCaption": "train horn at 0.267-3.747, 4.969-7.643", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_4127.wav", "onoffCaption": "explosion at 1.4-3.407, 5.53-7.544", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_4172.wav", "onoffCaption": "thump thud at 3.642-6.104", "frequencyCaption": "thump thud one times"} +{"filepath": "data/multi_event_train/syn_4197.wav", "onoffCaption": "car horn honking at 0.548-2.895, 4.036-6.962 and duck quacking at 2.823-4.823, 5.992-7.992", "frequencyCaption": "car horn honking two times and duck quacking two times"} +{"filepath": "data/multi_event_train/syn_4301.wav", "onoffCaption": "door knocking at 0.963-3.315, 4.703-7.055", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_4354.wav", "onoffCaption": "car horn honking at 2.552-5.465", "frequencyCaption": "car horn honking one times"} +{"filepath": "data/multi_event_train/syn_4488.wav", "onoffCaption": "gunshot at 0.894-3.064, 4.684-6.684 and spraying at 1.03-1.762, 3.015-3.747", "frequencyCaption": "gunshot two times and spraying two times"} +{"filepath": "data/multi_event_train/syn_4524.wav", "onoffCaption": "explosion at 1.076-3.083, 5.017-7.024", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_4571.wav", "onoffCaption": "thump thud at 0.313-2.541 and door slamming at 4.244-5.082, 5.615-6.453, 7.056-7.894", "frequencyCaption": "thump thud one times and door slamming three times"} +{"filepath": "data/multi_event_train/syn_4594.wav", "onoffCaption": "woman laughing at 3.108-5.713, 6.503-8.987 and door slamming at 5.806-7.197", "frequencyCaption": "woman laughing two times and door slamming one times"} +{"filepath": "data/multi_event_train/syn_4619.wav", "onoffCaption": "door knocking at 0.244-2.814, 4.673-7.012 and gunshot at 0.491-2.491 and sneeze at 3.661-4.895", "frequencyCaption": "door knocking two times and gunshot one times and sneeze one times"} +{"filepath": "data/multi_event_train/syn_4702.wav", "onoffCaption": "thump thud at 2.28-6.73, 7.816-10.0", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_4757.wav", "onoffCaption": "dog barking at 0.149-2.149 and tapping clicking clanking at 0.464-3.904 and burping belching at 5.381-8.925", "frequencyCaption": "dog barking one times and tapping clicking clanking one times and burping belching one times"} +{"filepath": "data/multi_event_train/syn_4820.wav", "onoffCaption": "cow mooing at 2.125-5.135 and gunshot at 3.983-5.983, 7.818-9.818", "frequencyCaption": "cow mooing one times and gunshot two times"} +{"filepath": "data/multi_event_train/syn_4872.wav", "onoffCaption": "thump thud at 2.268-6.718", "frequencyCaption": "thump thud one times"} +{"filepath": "data/multi_event_train/syn_4875.wav", "onoffCaption": "door slamming at 0.609-2.609, 4.435-6.435 and sheep goat bleating at 2.272-4.272, 5.291-7.291", "frequencyCaption": "door slamming two times and sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_4890.wav", "onoffCaption": "sheep goat bleating at 3.192-5.192, 5.962-7.962 and spraying at 3.225-5.353, 6.646-8.774", "frequencyCaption": "sheep goat bleating two times and spraying two times"} +{"filepath": "data/multi_event_train/syn_4969.wav", "onoffCaption": "sneeze at 0.02-1.979 and door slamming at 1.078-2.453", "frequencyCaption": "sneeze one times and door slamming one times"} +{"filepath": "data/multi_event_train/syn_2037.wav", "onoffCaption": "cow mooing at 1.731-4.713, 6.368-9.35 and spraying at 2.99-4.247, 5.36-6.617", "frequencyCaption": "cow mooing two times and spraying two times"} +{"filepath": "data/multi_event_train/syn_2078.wav", "onoffCaption": "cat meowing at 0.134-3.038, 4.259-7.163 and explosion at 0.649-2.742, 3.601-5.694 and whistling at 1.104-5.588, 6.512-9.029", "frequencyCaption": "cat meowing two times and explosion two times and whistling two times"} +{"filepath": "data/multi_event_train/syn_2136.wav", "onoffCaption": "spraying at 0.061-0.665, 2.012-2.793 and explosion at 6.775-8.839", "frequencyCaption": "spraying two times and explosion one times"} +{"filepath": "data/multi_event_train/syn_2163.wav", "onoffCaption": "train horn at 1.284-4.084, 5.069-7.95 and gunshot at 2.44-4.44", "frequencyCaption": "train horn two times and gunshot one times"} +{"filepath": "data/multi_event_train/syn_2186.wav", "onoffCaption": "train horn at 2.174-7.918 and door knocking at 2.306-4.494, 6.83-9.018", "frequencyCaption": "train horn one times and door knocking two times"} +{"filepath": "data/multi_event_train/syn_2310.wav", "onoffCaption": "cow mooing at 0.644-5.073, 7.545-10.0", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_2345.wav", "onoffCaption": "cow mooing at 0.329-5.309, 6.179-9.161", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_2461.wav", "onoffCaption": "explosion at 1.207-3.3, 3.876-5.969 and car horn honking at 2.517-5.43, 7.292-10.0", "frequencyCaption": "explosion two times and car horn honking two times"} +{"filepath": "data/multi_event_train/syn_2535.wav", "onoffCaption": "cow mooing at 0.06-3.07, 5.163-8.173 and thump thud at 0.481-4.148, 4.943-7.171", "frequencyCaption": "cow mooing two times and thump thud two times"} +{"filepath": "data/multi_event_train/syn_2560.wav", "onoffCaption": "thump thud at 3.064-6.111", "frequencyCaption": "thump thud one times"} +{"filepath": "data/multi_event_train/syn_2585.wav", "onoffCaption": "cow mooing at 0.75-5.73, 6.405-8.768", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_2608.wav", "onoffCaption": "train horn at 1.016-10.0", "frequencyCaption": "train horn one times"} +{"filepath": "data/multi_event_train/syn_2713.wav", "onoffCaption": "explosion at 1.31-4.304, 5.298-8.298", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_2746.wav", "onoffCaption": "explosion at 0.892-3.764, 4.383-6.907, 7.915-10.0", "frequencyCaption": "explosion three times"} +{"filepath": "data/multi_event_train/syn_2831.wav", "onoffCaption": "gunshot at 2.394-4.394, 6.612-9.086", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_2864.wav", "onoffCaption": "train horn at 0.519-5.377, 6.619-9.02", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_2881.wav", "onoffCaption": "dog barking at 0.363-2.363, 4.329-6.329, 7.313-9.313", "frequencyCaption": "dog barking three times"} +{"filepath": "data/multi_event_train/syn_2980.wav", "onoffCaption": "door slamming at 2.98-4.495, 5.851-7.366", "frequencyCaption": "door slamming two times"} +{"filepath": "data/multi_event_train/syn_4120.wav", "onoffCaption": "sneeze at 0.206-2.452, 4.198-5.372, 6.923-9.537", "frequencyCaption": "sneeze three times"} +{"filepath": "data/multi_event_train/syn_4175.wav", "onoffCaption": "duck quacking at 2.753-4.753, 6.321-8.321", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_4190.wav", "onoffCaption": "sheep goat bleating at 0.805-2.805", "frequencyCaption": "sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_4248.wav", "onoffCaption": "duck quacking at 0.163-2.163 and tapping clicking clanking at 5.673-9.113", "frequencyCaption": "duck quacking one times and tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_4306.wav", "onoffCaption": "tapping clicking clanking at 3.279-6.719 and sheep goat bleating at 7.072-9.072", "frequencyCaption": "tapping clicking clanking one times and sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_4353.wav", "onoffCaption": "duck quacking at 0.65-2.65, 4.02-6.02", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_4438.wav", "onoffCaption": "thump thud at 0.905-4.572 and cat meowing at 6.995-8.566", "frequencyCaption": "thump thud one times and cat meowing one times"} +{"filepath": "data/multi_event_train/syn_4523.wav", "onoffCaption": "duck quacking at 2.504-4.504 and train horn at 6.766-10.0", "frequencyCaption": "duck quacking one times and train horn one times"} +{"filepath": "data/multi_event_train/syn_4576.wav", "onoffCaption": "gunshot at 0.73-2.73, 4.574-6.574 and train horn at 3.274-6.594", "frequencyCaption": "gunshot two times and train horn one times"} +{"filepath": "data/multi_event_train/syn_4593.wav", "onoffCaption": "car horn honking at 0.119-5.026 and sneeze at 0.407-1.638 and door slamming at 0.512-2.64", "frequencyCaption": "car horn honking one times and sneeze one times and door slamming one times"} +{"filepath": "data/multi_event_train/syn_4705.wav", "onoffCaption": "sheep goat bleating at 0.586-4.226, 6.002-9.642", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_4750.wav", "onoffCaption": "tapping clicking clanking at 1.972-5.412, 6.469-9.909 and woman laughing at 2.03-4.128", "frequencyCaption": "tapping clicking clanking two times and woman laughing one times"} +{"filepath": "data/multi_event_train/syn_4827.wav", "onoffCaption": "dog barking at 0.577-2.577 and train horn at 0.636-4.756, 5.88-10.0", "frequencyCaption": "dog barking one times and train horn two times"} +{"filepath": "data/multi_event_train/syn_4897.wav", "onoffCaption": "cow mooing at 2.385-5.367 and spraying at 8.29-8.917", "frequencyCaption": "cow mooing one times and spraying one times"} +{"filepath": "data/multi_event_train/syn_2013.wav", "onoffCaption": "dog barking at 1.338-3.338, 4.879-6.879", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_train/syn_2062.wav", "onoffCaption": "car horn honking at 1.119-3.632, 5.271-7.784 and dog barking at 4.978-6.978, 7.545-9.545", "frequencyCaption": "car horn honking two times and dog barking two times"} +{"filepath": "data/multi_event_train/syn_2087.wav", "onoffCaption": "door knocking at 0.82-5.259 and burping belching at 1.654-7.255 and door slamming at 6.373-9.347", "frequencyCaption": "door knocking one times and burping belching one times and door slamming one times"} +{"filepath": "data/multi_event_train/syn_2108.wav", "onoffCaption": "cow mooing at 3.083-6.093", "frequencyCaption": "cow mooing one times"} +{"filepath": "data/multi_event_train/syn_2179.wav", "onoffCaption": "gunshot at 0.366-2.366 and door slamming at 4.91-5.761, 7.513-8.364", "frequencyCaption": "gunshot one times and door slamming two times"} +{"filepath": "data/multi_event_train/syn_2211.wav", "onoffCaption": "sneeze at 0.014-4.514, 5.427-7.903", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_2244.wav", "onoffCaption": "train horn at 1.183-4.583, 5.705-9.105", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_2434.wav", "onoffCaption": "dog barking at 0.142-2.142, 3.472-5.472, 6.656-8.656", "frequencyCaption": "dog barking three times"} +{"filepath": "data/multi_event_train/syn_2484.wav", "onoffCaption": "cat meowing at 0.11-3.351, 4.446-7.687", "frequencyCaption": "cat meowing two times"} +{"filepath": "data/multi_event_train/syn_2612.wav", "onoffCaption": "cat meowing at 4.007-5.562, 6.692-7.967", "frequencyCaption": "cat meowing two times"} +{"filepath": "data/multi_event_train/syn_2647.wav", "onoffCaption": "spraying at 0.301-0.876, 3.183-3.758", "frequencyCaption": "spraying two times"} +{"filepath": "data/multi_event_train/syn_2709.wav", "onoffCaption": "train horn at 1.594-5.594", "frequencyCaption": "train horn one times"} +{"filepath": "data/multi_event_train/syn_2930.wav", "onoffCaption": "tapping clicking clanking at 0.463-3.903, 4.444-6.621, 7.812-10.0", "frequencyCaption": "tapping clicking clanking three times"} +{"filepath": "data/multi_event_train/syn_2965.wav", "onoffCaption": "door knocking at 0.177-3.552", "frequencyCaption": "door knocking one times"} +{"filepath": "data/multi_event_train/syn_4021.wav", "onoffCaption": "dog barking at 2.558-4.558 and sheep goat bleating at 6.288-8.288", "frequencyCaption": "dog barking one times and sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_4074.wav", "onoffCaption": "gunshot at 1.364-3.364, 4.698-6.698, 7.739-9.739", "frequencyCaption": "gunshot three times"} +{"filepath": "data/multi_event_train/syn_4091.wav", "onoffCaption": "train horn at 3.3-6.18, 6.958-9.838", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_4207.wav", "onoffCaption": "spraying at 0.091-0.995", "frequencyCaption": "spraying one times"} +{"filepath": "data/multi_event_train/syn_4252.wav", "onoffCaption": "duck quacking at 0.773-2.773, 4.659-6.659 and cat meowing at 6.245-7.257, 8.139-9.683", "frequencyCaption": "duck quacking two times and cat meowing two times"} +{"filepath": "data/multi_event_train/syn_4349.wav", "onoffCaption": "whistling at 0.804-2.813, 4.259-7.09 and sheep goat bleating at 1.958-6.678", "frequencyCaption": "whistling two times and sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_4422.wav", "onoffCaption": "door knocking at 3.405-5.785", "frequencyCaption": "door knocking one times"} +{"filepath": "data/multi_event_train/syn_4477.wav", "onoffCaption": "cat meowing at 0.083-1.349, 2.041-3.625 and gunshot at 2.761-4.761", "frequencyCaption": "cat meowing two times and gunshot one times"} +{"filepath": "data/multi_event_train/syn_4492.wav", "onoffCaption": "gunshot at 0.382-2.382, 3.831-5.831", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_4539.wav", "onoffCaption": "duck quacking at 3.101-5.101 and thump thud at 7.995-10.0", "frequencyCaption": "duck quacking one times and thump thud one times"} +{"filepath": "data/multi_event_train/syn_4589.wav", "onoffCaption": "burping belching at 0.511-3.511, 4.303-7.246, 7.967-10.0", "frequencyCaption": "burping belching three times"} +{"filepath": "data/multi_event_train/syn_4604.wav", "onoffCaption": "thump thud at 3.427-5.927 and tapping clicking clanking at 3.66-7.1", "frequencyCaption": "thump thud one times and tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_4651.wav", "onoffCaption": "gunshot at 1.084-3.084, 3.952-5.952, 7.198-9.198 and whistling at 1.399-9.784", "frequencyCaption": "gunshot three times and whistling one times"} +{"filepath": "data/multi_event_train/syn_4690.wav", "onoffCaption": "tapping clicking clanking at 1.909-5.349 and dog barking at 7.675-9.675", "frequencyCaption": "tapping clicking clanking one times and dog barking one times"} +{"filepath": "data/multi_event_train/syn_4868.wav", "onoffCaption": "tapping clicking clanking at 0.529-3.969", "frequencyCaption": "tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_4926.wav", "onoffCaption": "gunshot at 1.876-3.876, 4.842-6.842, 7.718-9.718", "frequencyCaption": "gunshot three times"} +{"filepath": "data/multi_event_train/syn_4973.wav", "onoffCaption": "woman laughing at 2.777-5.546", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/multi_event_train/syn_4996.wav", "onoffCaption": "gunshot at 2.571-4.701, 5.405-7.535", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_2046.wav", "onoffCaption": "thump thud at 0.536-3.307 and gunshot at 3.348-5.348", "frequencyCaption": "thump thud one times and gunshot one times"} +{"filepath": "data/multi_event_train/syn_2235.wav", "onoffCaption": "explosion at 2.514-5.514, 7.383-10.0", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_2260.wav", "onoffCaption": "train horn at 2.617-7.694", "frequencyCaption": "train horn one times"} +{"filepath": "data/multi_event_train/syn_2285.wav", "onoffCaption": "tapping clicking clanking at 0.136-3.576 and dog barking at 0.345-2.745", "frequencyCaption": "tapping clicking clanking one times and dog barking one times"} +{"filepath": "data/multi_event_train/syn_2334.wav", "onoffCaption": "thump thud at 0.05-3.097, 4.756-7.218 and dog barking at 0.212-2.212", "frequencyCaption": "thump thud two times and dog barking one times"} +{"filepath": "data/multi_event_train/syn_2410.wav", "onoffCaption": "gunshot at 1.449-3.449 and cat meowing at 2.183-4.143 and sneeze at 3.218-4.392", "frequencyCaption": "gunshot one times and cat meowing one times and sneeze one times"} +{"filepath": "data/multi_event_train/syn_2445.wav", "onoffCaption": "cat meowing at 0.275-1.811, 3.469-5.005, 6.461-7.997 and dog barking at 2.825-4.825 and sneeze at 5.34-8.0", "frequencyCaption": "cat meowing three times and dog barking one times and sneeze one times"} +{"filepath": "data/multi_event_train/syn_2636.wav", "onoffCaption": "train horn at 2.96-6.36, 7.243-10.0", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_2663.wav", "onoffCaption": "whistling at 0.052-2.281, 4.45-7.014 and train horn at 4.814-8.214", "frequencyCaption": "whistling two times and train horn one times"} +{"filepath": "data/multi_event_train/syn_2679.wav", "onoffCaption": "spraying at 2.8-3.3, 5.687-7.42", "frequencyCaption": "spraying two times"} +{"filepath": "data/multi_event_train/syn_2686.wav", "onoffCaption": "train horn at 2.685-7.762", "frequencyCaption": "train horn one times"} +{"filepath": "data/multi_event_train/syn_2762.wav", "onoffCaption": "door slamming at 0.1-2.32, 4.242-6.462 and dog barking at 3.512-5.512, 6.099-8.099", "frequencyCaption": "door slamming two times and dog barking two times"} +{"filepath": "data/multi_event_train/syn_2778.wav", "onoffCaption": "car horn honking at 0.462-3.681 and sheep goat bleating at 5.35-7.35", "frequencyCaption": "car horn honking one times and sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_2787.wav", "onoffCaption": "tapping clicking clanking at 1.579-5.019 and burping belching at 2.926-5.926, 7.082-10.0", "frequencyCaption": "tapping clicking clanking one times and burping belching two times"} +{"filepath": "data/multi_event_train/syn_2914.wav", "onoffCaption": "cow mooing at 2.723-7.152", "frequencyCaption": "cow mooing one times"} +{"filepath": "data/multi_event_train/syn_2941.wav", "onoffCaption": "duck quacking at 0.171-2.171 and spraying at 3.39-3.994 and sneeze at 5.409-6.795", "frequencyCaption": "duck quacking one times and spraying one times and sneeze one times"} +{"filepath": "data/multi_event_train/syn_4005.wav", "onoffCaption": "sneeze at 1.605-4.066, 6.518-8.437", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_4050.wav", "onoffCaption": "cat meowing at 0.481-1.525, 3.829-4.873 and tapping clicking clanking at 1.071-4.511", "frequencyCaption": "cat meowing two times and tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_4104.wav", "onoffCaption": "explosion at 0.572-2.665, 5.145-7.238 and door slamming at 3.172-4.289, 5.792-6.909", "frequencyCaption": "explosion two times and door slamming two times"} +{"filepath": "data/multi_event_train/syn_4223.wav", "onoffCaption": "duck quacking at 0.184-2.184, 3.018-5.018, 6.317-8.317 and sheep goat bleating at 6.077-8.077", "frequencyCaption": "duck quacking three times and sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_4276.wav", "onoffCaption": "gunshot at 3.244-5.718, 6.806-9.28", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_4293.wav", "onoffCaption": "sneeze at 2.122-3.579, 5.94-7.397 and cow mooing at 3.03-6.012", "frequencyCaption": "sneeze two times and cow mooing one times"} +{"filepath": "data/multi_event_train/syn_4338.wav", "onoffCaption": "burping belching at 1.041-3.406 and sneeze at 2.307-4.392", "frequencyCaption": "burping belching one times and sneeze one times"} +{"filepath": "data/multi_event_train/syn_4388.wav", "onoffCaption": "sneeze at 1.411-2.657 and explosion at 2.597-5.591", "frequencyCaption": "sneeze one times and explosion one times"} +{"filepath": "data/multi_event_train/syn_4406.wav", "onoffCaption": "door knocking at 0.49-3.61, 4.378-7.498 and train horn at 5.486-7.966", "frequencyCaption": "door knocking two times and train horn one times"} +{"filepath": "data/multi_event_train/syn_4453.wav", "onoffCaption": "explosion at 0.123-3.25, 5.341-8.468", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_4548.wav", "onoffCaption": "whistling at 2.538-10.0", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_4620.wav", "onoffCaption": "woman laughing at 1.411-4.465, 4.987-8.041 and gunshot at 1.892-3.892", "frequencyCaption": "woman laughing two times and gunshot one times"} +{"filepath": "data/multi_event_train/syn_4675.wav", "onoffCaption": "whistling at 0.21-8.595 and sneeze at 0.27-2.229", "frequencyCaption": "whistling one times and sneeze one times"} +{"filepath": "data/multi_event_train/syn_4819.wav", "onoffCaption": "cat meowing at 1.092-4.333", "frequencyCaption": "cat meowing one times"} +{"filepath": "data/multi_event_train/syn_4856.wav", "onoffCaption": "thump thud at 0.115-2.577, 4.284-6.746, 7.956-10.0", "frequencyCaption": "thump thud three times"} +{"filepath": "data/multi_event_train/syn_4902.wav", "onoffCaption": "spraying at 0.111-0.611, 1.41-1.979, 2.877-4.052 and gunshot at 6.942-8.942", "frequencyCaption": "spraying three times and gunshot one times"} +{"filepath": "data/multi_event_train/syn_4957.wav", "onoffCaption": "whistling at 1.592-10.0", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_2009.wav", "onoffCaption": "thump thud at 0.776-5.151", "frequencyCaption": "thump thud one times"} +{"filepath": "data/multi_event_train/syn_2077.wav", "onoffCaption": "whistling at 1.112-10.0", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_2112.wav", "onoffCaption": "explosion at 2.23-5.23 and door slamming at 8.138-9.157", "frequencyCaption": "explosion one times and door slamming one times"} +{"filepath": "data/multi_event_train/syn_2147.wav", "onoffCaption": "woman laughing at 1.159-4.547", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/multi_event_train/syn_2189.wav", "onoffCaption": "train horn at 0.649-3.323, 4.516-7.037", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_2361.wav", "onoffCaption": "train horn at 0.466-2.621, 3.841-5.996, 7.71-9.865", "frequencyCaption": "train horn three times"} +{"filepath": "data/multi_event_train/syn_2384.wav", "onoffCaption": "door slamming at 0.36-2.488, 4.172-6.3", "frequencyCaption": "door slamming two times"} +{"filepath": "data/multi_event_train/syn_2421.wav", "onoffCaption": "spraying at 0.679-1.926, 3.703-4.95 and door knocking at 3.2-5.93", "frequencyCaption": "spraying two times and door knocking one times"} +{"filepath": "data/multi_event_train/syn_2511.wav", "onoffCaption": "sheep goat bleating at 0.209-2.209", "frequencyCaption": "sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_2544.wav", "onoffCaption": "whistling at 2.084-4.313 and door slamming at 4.281-5.534", "frequencyCaption": "whistling one times and door slamming one times"} +{"filepath": "data/multi_event_train/syn_2737.wav", "onoffCaption": "sneeze at 0.645-2.558, 3.721-5.634, 6.689-8.602", "frequencyCaption": "sneeze three times"} +{"filepath": "data/multi_event_train/syn_2815.wav", "onoffCaption": "cat meowing at 2.48-4.23", "frequencyCaption": "cat meowing one times"} +{"filepath": "data/multi_event_train/syn_2840.wav", "onoffCaption": "burping belching at 0.204-3.484, 4.239-7.156 and whistling at 0.478-8.863", "frequencyCaption": "burping belching two times and whistling one times"} +{"filepath": "data/multi_event_train/syn_2925.wav", "onoffCaption": "tapping clicking clanking at 0.44-3.88", "frequencyCaption": "tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_4151.wav", "onoffCaption": "gunshot at 0.002-2.002, 3.068-5.068, 6.124-8.124", "frequencyCaption": "gunshot three times"} +{"filepath": "data/multi_event_train/syn_4239.wav", "onoffCaption": "cow mooing at 1.84-5.138, 6.741-10.0", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_4247.wav", "onoffCaption": "gunshot at 3.188-5.318, 7.34-9.34", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_4289.wav", "onoffCaption": "spraying at 2.817-3.992, 5.823-6.823", "frequencyCaption": "spraying two times"} +{"filepath": "data/multi_event_train/syn_4322.wav", "onoffCaption": "cow mooing at 0.833-3.802, 5.116-8.098", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_4377.wav", "onoffCaption": "whistling at 0.24-5.74 and spraying at 0.76-2.007, 2.668-3.915 and explosion at 1.028-6.028", "frequencyCaption": "whistling one times and spraying two times and explosion one times"} +{"filepath": "data/multi_event_train/syn_4392.wav", "onoffCaption": "spraying at 0.221-0.79, 1.312-1.881, 4.348-4.917", "frequencyCaption": "spraying three times"} +{"filepath": "data/multi_event_train/syn_4449.wav", "onoffCaption": "sheep goat bleating at 3.247-5.247", "frequencyCaption": "sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_4507.wav", "onoffCaption": "cat meowing at 0.37-2.364 and woman laughing at 2.598-5.184 and spraying at 4.817-7.836", "frequencyCaption": "cat meowing one times and woman laughing one times and spraying one times"} +{"filepath": "data/multi_event_train/syn_4552.wav", "onoffCaption": "whistling at 1.266-6.441, 7.836-10.0", "frequencyCaption": "whistling two times"} +{"filepath": "data/multi_event_train/syn_4721.wav", "onoffCaption": "car horn honking at 1.973-6.295", "frequencyCaption": "car horn honking one times"} +{"filepath": "data/multi_event_train/syn_4774.wav", "onoffCaption": "woman laughing at 0.137-2.331, 4.071-6.657 and cat meowing at 1.489-2.799", "frequencyCaption": "woman laughing two times and cat meowing one times"} +{"filepath": "data/multi_event_train/syn_4791.wav", "onoffCaption": "thump thud at 1.876-4.338, 5.469-7.931 and door knocking at 5.537-8.0", "frequencyCaption": "thump thud two times and door knocking one times"} +{"filepath": "data/multi_event_train/syn_4803.wav", "onoffCaption": "cat meowing at 1.28-2.546 and door slamming at 3.938-4.789, 5.958-8.184", "frequencyCaption": "cat meowing one times and door slamming two times"} +{"filepath": "data/multi_event_train/syn_4918.wav", "onoffCaption": "car horn honking at 0.143-3.056, 5.143-8.069", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_5.wav", "onoffCaption": "train horn at 3.048-7.116 and cat meowing at 6.564-7.75", "frequencyCaption": "train horn one times and cat meowing one times"} +{"filepath": "data/multi_event_train/syn_2022.wav", "onoffCaption": "sneeze at 0.421-2.738", "frequencyCaption": "sneeze one times"} +{"filepath": "data/multi_event_train/syn_2092.wav", "onoffCaption": "train horn at 0.127-4.457, 5.738-10.0", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_2139.wav", "onoffCaption": "tapping clicking clanking at 3.088-6.528, 7.884-10.0", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_2204.wav", "onoffCaption": "cat meowing at 3.308-5.058, 7.31-9.06", "frequencyCaption": "cat meowing two times"} +{"filepath": "data/multi_event_train/syn_2251.wav", "onoffCaption": "thump thud at 0.879-5.329", "frequencyCaption": "thump thud one times"} +{"filepath": "data/multi_event_train/syn_2350.wav", "onoffCaption": "train horn at 0.102-4.302, 5.661-8.516", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_2474.wav", "onoffCaption": "gunshot at 0.141-2.141, 4.521-6.521, 7.761-9.891", "frequencyCaption": "gunshot three times"} +{"filepath": "data/multi_event_train/syn_2491.wav", "onoffCaption": "spraying at 0.317-0.921 and sheep goat bleating at 2.923-4.923 and burping belching at 3.97-6.97", "frequencyCaption": "spraying one times and sheep goat bleating one times and burping belching one times"} +{"filepath": "data/multi_event_train/syn_2607.wav", "onoffCaption": "door knocking at 0.375-3.105 and duck quacking at 2.675-4.675", "frequencyCaption": "door knocking one times and duck quacking one times"} +{"filepath": "data/multi_event_train/syn_2652.wav", "onoffCaption": "thump thud at 3.045-7.495", "frequencyCaption": "thump thud one times"} +{"filepath": "data/multi_event_train/syn_2706.wav", "onoffCaption": "duck quacking at 1.028-3.028, 4.755-6.755, 7.606-9.606", "frequencyCaption": "duck quacking three times"} +{"filepath": "data/multi_event_train/syn_2749.wav", "onoffCaption": "spraying at 1.223-2.304", "frequencyCaption": "spraying one times"} +{"filepath": "data/multi_event_train/syn_2970.wav", "onoffCaption": "sneeze at 0.91-3.524", "frequencyCaption": "sneeze one times"} +{"filepath": "data/multi_event_train/syn_2995.wav", "onoffCaption": "train horn at 3.448-9.917", "frequencyCaption": "train horn one times"} +{"filepath": "data/multi_event_train/syn_4034.wav", "onoffCaption": "dog barking at 1.213-4.533, 6.858-10.0", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_train/syn_4061.wav", "onoffCaption": "door slamming at 3.947-4.798 and dog barking at 7.487-9.487", "frequencyCaption": "door slamming one times and dog barking one times"} +{"filepath": "data/multi_event_train/syn_4084.wav", "onoffCaption": "sheep goat bleating at 0.916-2.916, 4.112-6.918, 7.548-9.548", "frequencyCaption": "sheep goat bleating three times"} +{"filepath": "data/multi_event_train/syn_4160.wav", "onoffCaption": "gunshot at 1.982-4.483, 5.194-7.194", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_4185.wav", "onoffCaption": "cow mooing at 1.666-4.964, 6.339-9.308", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_4212.wav", "onoffCaption": "gunshot at 2.485-4.485, 5.365-7.365", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_4309.wav", "onoffCaption": "car horn honking at 0.258-4.507, 5.729-9.978", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_4437.wav", "onoffCaption": "cow mooing at 0.883-3.852, 4.639-7.608", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_4462.wav", "onoffCaption": "spraying at 1.642-4.661 and train horn at 2.0-5.32", "frequencyCaption": "spraying one times and train horn one times"} +{"filepath": "data/multi_event_train/syn_4487.wav", "onoffCaption": "woman laughing at 0.162-3.257, 4.068-6.552 and dog barking at 0.198-2.198", "frequencyCaption": "woman laughing two times and dog barking one times"} +{"filepath": "data/multi_event_train/syn_4536.wav", "onoffCaption": "woman laughing at 2.541-5.636, 7.344-10.0", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_4579.wav", "onoffCaption": "burping belching at 0.802-4.242, 5.248-8.688 and gunshot at 2.58-4.58, 6.553-8.553", "frequencyCaption": "burping belching two times and gunshot two times"} +{"filepath": "data/multi_event_train/syn_4611.wav", "onoffCaption": "dog barking at 2.046-4.046, 5.252-7.252", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_train/syn_4644.wav", "onoffCaption": "gunshot at 1.209-3.209, 5.051-7.324 and train horn at 2.986-7.106, 7.822-10.0", "frequencyCaption": "gunshot two times and train horn two times"} +{"filepath": "data/multi_event_train/syn_4828.wav", "onoffCaption": "car horn honking at 2.865-7.114 and thump thud at 4.049-6.277", "frequencyCaption": "car horn honking one times and thump thud one times"} +{"filepath": "data/multi_event_train/syn_4832.wav", "onoffCaption": "whistling at 1.547-9.202", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_4898.wav", "onoffCaption": "spraying at 0.281-1.538, 2.407-3.664, 4.717-5.974", "frequencyCaption": "spraying three times"} +{"filepath": "data/multi_event_train/syn_4929.wav", "onoffCaption": "gunshot at 1.601-3.601", "frequencyCaption": "gunshot one times"} +{"filepath": "data/multi_event_train/syn_4933.wav", "onoffCaption": "sheep goat bleating at 2.998-4.998, 6.444-8.444", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_4966.wav", "onoffCaption": "cow mooing at 3.137-6.106", "frequencyCaption": "cow mooing one times"} +{"filepath": "data/multi_event_train/syn_4983.wav", "onoffCaption": "whistling at 1.786-4.015, 5.199-7.52", "frequencyCaption": "whistling two times"} +{"filepath": "data/multi_event_train/syn_2038.wav", "onoffCaption": "duck quacking at 0.312-2.312, 3.475-5.475, 6.684-8.684 and thump thud at 1.912-4.412", "frequencyCaption": "duck quacking three times and thump thud one times"} +{"filepath": "data/multi_event_train/syn_2088.wav", "onoffCaption": "cat meowing at 0.012-1.902 and burping belching at 5.241-8.531", "frequencyCaption": "cat meowing one times and burping belching one times"} +{"filepath": "data/multi_event_train/syn_2123.wav", "onoffCaption": "tapping clicking clanking at 0.644-4.084, 5.681-8.568", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_2176.wav", "onoffCaption": "cow mooing at 2.931-7.911", "frequencyCaption": "cow mooing one times"} +{"filepath": "data/multi_event_train/syn_2193.wav", "onoffCaption": "whistling at 0.783-9.168", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_2305.wav", "onoffCaption": "sneeze at 0.788-2.747, 3.395-4.683 and gunshot at 1.228-3.228", "frequencyCaption": "sneeze two times and gunshot one times"} +{"filepath": "data/multi_event_train/syn_2374.wav", "onoffCaption": "sneeze at 2.903-4.067, 4.883-6.047 and train horn at 3.325-5.965, 7.216-9.683", "frequencyCaption": "sneeze two times and train horn two times"} +{"filepath": "data/multi_event_train/syn_2520.wav", "onoffCaption": "sheep goat bleating at 0.244-4.164", "frequencyCaption": "sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_2575.wav", "onoffCaption": "door knocking at 0.167-2.519, 3.115-5.467, 7.305-9.657", "frequencyCaption": "door knocking three times"} +{"filepath": "data/multi_event_train/syn_2590.wav", "onoffCaption": "door slamming at 0.593-1.533, 3.478-4.418", "frequencyCaption": "door slamming two times"} +{"filepath": "data/multi_event_train/syn_2639.wav", "onoffCaption": "train horn at 3.428-10.0", "frequencyCaption": "train horn one times"} +{"filepath": "data/multi_event_train/syn_2648.wav", "onoffCaption": "gunshot at 0.06-2.06", "frequencyCaption": "gunshot one times"} +{"filepath": "data/multi_event_train/syn_2722.wav", "onoffCaption": "duck quacking at 1.492-3.492, 5.29-7.29", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_2753.wav", "onoffCaption": "thump thud at 0.078-2.306, 4.188-6.416 and train horn at 0.566-3.206 and dog barking at 2.021-5.341, 6.271-9.591", "frequencyCaption": "thump thud two times and train horn one times and dog barking two times"} +{"filepath": "data/multi_event_train/syn_2824.wav", "onoffCaption": "woman laughing at 0.649-3.004, 5.141-7.589 and door knocking at 3.792-6.859, 7.738-10.0", "frequencyCaption": "woman laughing two times and door knocking two times"} +{"filepath": "data/multi_event_train/syn_2871.wav", "onoffCaption": "cow mooing at 1.316-4.614, 5.217-8.144", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_2894.wav", "onoffCaption": "gunshot at 1.267-3.267, 4.861-6.861", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_4135.wav", "onoffCaption": "duck quacking at 0.516-2.516", "frequencyCaption": "duck quacking one times"} +{"filepath": "data/multi_event_train/syn_4144.wav", "onoffCaption": "sneeze at 0.05-2.163, 4.515-6.628, 7.634-9.747", "frequencyCaption": "sneeze three times"} +{"filepath": "data/multi_event_train/syn_4208.wav", "onoffCaption": "dog barking at 3.387-5.387", "frequencyCaption": "dog barking one times"} +{"filepath": "data/multi_event_train/syn_4313.wav", "onoffCaption": "burping belching at 1.703-3.826, 4.405-7.405 and dog barking at 5.109-7.109, 7.775-9.775", "frequencyCaption": "burping belching two times and dog barking two times"} +{"filepath": "data/multi_event_train/syn_4346.wav", "onoffCaption": "tapping clicking clanking at 0.824-4.264 and cow mooing at 6.055-10.0", "frequencyCaption": "tapping clicking clanking one times and cow mooing one times"} +{"filepath": "data/multi_event_train/syn_4478.wav", "onoffCaption": "duck quacking at 0.198-2.198, 2.933-4.933 and cow mooing at 1.378-4.347", "frequencyCaption": "duck quacking two times and cow mooing one times"} +{"filepath": "data/multi_event_train/syn_4563.wav", "onoffCaption": "explosion at 0.394-3.394", "frequencyCaption": "explosion one times"} +{"filepath": "data/multi_event_train/syn_4586.wav", "onoffCaption": "door knocking at 3.024-6.133 and cat meowing at 3.165-4.525", "frequencyCaption": "door knocking one times and cat meowing one times"} +{"filepath": "data/multi_event_train/syn_4710.wav", "onoffCaption": "burping belching at 1.228-4.407 and sneeze at 2.434-3.728", "frequencyCaption": "burping belching one times and sneeze one times"} +{"filepath": "data/multi_event_train/syn_4745.wav", "onoffCaption": "burping belching at 0.033-3.212, 4.738-7.917 and sheep goat bleating at 3.435-5.435, 6.487-8.487", "frequencyCaption": "burping belching two times and sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_4816.wav", "onoffCaption": "woman laughing at 3.804-6.029 and dog barking at 5.915-7.915", "frequencyCaption": "woman laughing one times and dog barking one times"} +{"filepath": "data/multi_event_train/syn_4867.wav", "onoffCaption": "duck quacking at 0.123-2.123, 4.217-6.217 and gunshot at 4.27-6.27", "frequencyCaption": "duck quacking two times and gunshot one times"} +{"filepath": "data/multi_event_train/syn_4882.wav", "onoffCaption": "spraying at 0.017-0.868, 2.396-3.247", "frequencyCaption": "spraying two times"} +{"filepath": "data/multi_event_train/syn_4999.wav", "onoffCaption": "car horn honking at 0.007-3.226, 5.675-8.894 and cat meowing at 0.879-1.981", "frequencyCaption": "car horn honking two times and cat meowing one times"} +{"filepath": "data/multi_event_train/syn_2049.wav", "onoffCaption": "sheep goat bleating at 2.376-4.376, 5.004-7.004", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_2053.wav", "onoffCaption": "cow mooing at 3.22-6.23", "frequencyCaption": "cow mooing one times"} +{"filepath": "data/multi_event_train/syn_2107.wav", "onoffCaption": "door knocking at 0.202-2.579 and thump thud at 7.565-10.0", "frequencyCaption": "door knocking one times and thump thud one times"} +{"filepath": "data/multi_event_train/syn_2148.wav", "onoffCaption": "spraying at 1.992-3.754 and door slamming at 2.991-4.382, 5.138-6.529, 7.82-9.211", "frequencyCaption": "spraying one times and door slamming three times"} +{"filepath": "data/multi_event_train/syn_2152.wav", "onoffCaption": "tapping clicking clanking at 3.493-6.933 and cow mooing at 3.754-7.052", "frequencyCaption": "tapping clicking clanking one times and cow mooing one times"} +{"filepath": "data/multi_event_train/syn_2321.wav", "onoffCaption": "whistling at 2.723-7.207", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_2391.wav", "onoffCaption": "cat meowing at 2.812-4.772, 6.893-8.853", "frequencyCaption": "cat meowing two times"} +{"filepath": "data/multi_event_train/syn_2405.wav", "onoffCaption": "whistling at 0.352-3.227", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_2504.wav", "onoffCaption": "sneeze at 3.377-5.336", "frequencyCaption": "sneeze one times"} +{"filepath": "data/multi_event_train/syn_2551.wav", "onoffCaption": "door slamming at 2.538-3.677, 4.833-5.972", "frequencyCaption": "door slamming two times"} +{"filepath": "data/multi_event_train/syn_2689.wav", "onoffCaption": "woman laughing at 2.249-8.983", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/multi_event_train/syn_2777.wav", "onoffCaption": "burping belching at 0.147-3.691", "frequencyCaption": "burping belching one times"} +{"filepath": "data/multi_event_train/syn_2792.wav", "onoffCaption": "burping belching at 1.622-5.491", "frequencyCaption": "burping belching one times"} +{"filepath": "data/multi_event_train/syn_2800.wav", "onoffCaption": "door slamming at 0.311-1.544, 3.521-4.754", "frequencyCaption": "door slamming two times"} +{"filepath": "data/multi_event_train/syn_2855.wav", "onoffCaption": "car horn honking at 0.434-5.341", "frequencyCaption": "car horn honking one times"} +{"filepath": "data/multi_event_train/syn_2901.wav", "onoffCaption": "whistling at 0.354-5.854", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_4111.wav", "onoffCaption": "woman laughing at 2.429-4.534, 5.334-7.689 and dog barking at 3.371-5.371, 6.548-8.548", "frequencyCaption": "woman laughing two times and dog barking two times"} +{"filepath": "data/multi_event_train/syn_4279.wav", "onoffCaption": "car horn honking at 2.345-7.252", "frequencyCaption": "car horn honking one times"} +{"filepath": "data/multi_event_train/syn_4286.wav", "onoffCaption": "cow mooing at 0.317-5.297", "frequencyCaption": "cow mooing one times"} +{"filepath": "data/multi_event_train/syn_4337.wav", "onoffCaption": "spraying at 2.863-3.73 and cat meowing at 6.571-9.601", "frequencyCaption": "spraying one times and cat meowing one times"} +{"filepath": "data/multi_event_train/syn_4362.wav", "onoffCaption": "dog barking at 2.986-6.306", "frequencyCaption": "dog barking one times"} +{"filepath": "data/multi_event_train/syn_4387.wav", "onoffCaption": "cow mooing at 1.656-4.666, 6.806-9.081", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_4409.wav", "onoffCaption": "cat meowing at 0.497-2.491, 4.841-6.835 and car horn honking at 1.246-5.568", "frequencyCaption": "cat meowing two times and car horn honking one times"} +{"filepath": "data/multi_event_train/syn_4512.wav", "onoffCaption": "car horn honking at 1.26-5.101, 5.957-8.883", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_4547.wav", "onoffCaption": "train horn at 1.784-4.944", "frequencyCaption": "train horn one times"} +{"filepath": "data/multi_event_train/syn_4734.wav", "onoffCaption": "gunshot at 3.456-5.475, 7.633-9.633", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_4761.wav", "onoffCaption": "car horn honking at 1.518-5.172, 6.256-9.91", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_4784.wav", "onoffCaption": "dog barking at 2.425-4.425, 5.507-7.507 and spraying at 5.914-7.171, 8.215-9.39", "frequencyCaption": "dog barking two times and spraying two times"} +{"filepath": "data/multi_event_train/syn_4843.wav", "onoffCaption": "door slamming at 1.734-3.862, 5.016-7.144 and woman laughing at 4.551-6.632, 7.972-10.0", "frequencyCaption": "door slamming two times and woman laughing two times"} +{"filepath": "data/multi_event_train/syn_4958.wav", "onoffCaption": "cat meowing at 0.111-1.213, 2.139-3.241 and tapping clicking clanking at 1.733-5.173 and gunshot at 4.216-6.216", "frequencyCaption": "cat meowing two times and tapping clicking clanking one times and gunshot one times"} +{"filepath": "data/multi_event_train/syn_2006.wav", "onoffCaption": "explosion at 0.8-3.794, 5.316-8.31", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_2220.wav", "onoffCaption": "cow mooing at 0.297-3.307, 4.166-7.148", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_2227.wav", "onoffCaption": "train horn at 0.365-2.765, 3.678-6.078, 6.632-9.032 and cat meowing at 3.73-5.301", "frequencyCaption": "train horn three times and cat meowing one times"} +{"filepath": "data/multi_event_train/syn_2275.wav", "onoffCaption": "dog barking at 0.194-2.194, 4.569-6.569 and tapping clicking clanking at 0.203-3.643", "frequencyCaption": "dog barking two times and tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_2290.wav", "onoffCaption": "door knocking at 0.345-4.617, 6.113-10.0", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_2450.wav", "onoffCaption": "explosion at 3.699-8.293 and sheep goat bleating at 4.38-6.38, 7.404-9.404", "frequencyCaption": "explosion one times and sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_2623.wav", "onoffCaption": "sheep goat bleating at 1.467-6.187", "frequencyCaption": "sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_2671.wav", "onoffCaption": "sneeze at 0.735-2.012, 4.199-5.476", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_2676.wav", "onoffCaption": "gunshot at 2.71-4.71", "frequencyCaption": "gunshot one times"} +{"filepath": "data/multi_event_train/syn_2693.wav", "onoffCaption": "sheep goat bleating at 0.134-2.134, 2.98-4.98, 6.05-8.05", "frequencyCaption": "sheep goat bleating three times"} +{"filepath": "data/multi_event_train/syn_2694.wav", "onoffCaption": "cat meowing at 2.385-4.333, 5.505-7.453", "frequencyCaption": "cat meowing two times"} +{"filepath": "data/multi_event_train/syn_2738.wav", "onoffCaption": "whistling at 0.685-5.169, 7.608-10.0", "frequencyCaption": "whistling two times"} +{"filepath": "data/multi_event_train/syn_2788.wav", "onoffCaption": "burping belching at 0.0-4.023, 5.919-8.245 and whistling at 0.417-2.426", "frequencyCaption": "burping belching two times and whistling one times"} +{"filepath": "data/multi_event_train/syn_2954.wav", "onoffCaption": "sneeze at 0.438-2.397, 3.235-5.194 and spraying at 3.26-3.835, 5.502-6.077", "frequencyCaption": "sneeze two times and spraying two times"} +{"filepath": "data/multi_event_train/syn_4010.wav", "onoffCaption": "door slamming at 1.845-2.962, 5.286-6.403, 7.731-8.848", "frequencyCaption": "door slamming three times"} +{"filepath": "data/multi_event_train/syn_4017.wav", "onoffCaption": "explosion at 2.384-5.112 and woman laughing at 7.997-10.0", "frequencyCaption": "explosion one times and woman laughing one times"} +{"filepath": "data/multi_event_train/syn_4045.wav", "onoffCaption": "spraying at 1.773-2.273, 3.359-3.859, 5.793-6.293", "frequencyCaption": "spraying three times"} +{"filepath": "data/multi_event_train/syn_4236.wav", "onoffCaption": "door slamming at 3.015-5.241, 6.472-8.698 and spraying at 3.798-4.373, 5.108-7.544, 8.162-8.894", "frequencyCaption": "door slamming two times and spraying three times"} +{"filepath": "data/multi_event_train/syn_4263.wav", "onoffCaption": "train horn at 3.019-6.789 and sneeze at 3.37-5.483", "frequencyCaption": "train horn one times and sneeze one times"} +{"filepath": "data/multi_event_train/syn_4378.wav", "onoffCaption": "tapping clicking clanking at 0.679-4.119, 5.177-7.992 and cow mooing at 3.921-6.89", "frequencyCaption": "tapping clicking clanking two times and cow mooing one times"} +{"filepath": "data/multi_event_train/syn_4413.wav", "onoffCaption": "whistling at 0.02-7.77 and car horn honking at 3.683-6.858", "frequencyCaption": "whistling one times and car horn honking one times"} +{"filepath": "data/multi_event_train/syn_4441.wav", "onoffCaption": "car horn honking at 0.078-4.478, 5.779-10.0", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_4446.wav", "onoffCaption": "thump thud at 0.432-2.932, 4.167-6.395", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_4508.wav", "onoffCaption": "door knocking at 0.277-4.027, 5.925-8.174 and dog barking at 1.887-3.887", "frequencyCaption": "door knocking two times and dog barking one times"} +{"filepath": "data/multi_event_train/syn_4635.wav", "onoffCaption": "sneeze at 0.402-2.715, 5.087-7.4", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_4660.wav", "onoffCaption": "thump thud at 0.05-2.278, 3.47-5.565", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_4685.wav", "onoffCaption": "thump thud at 1.015-4.682, 6.109-8.296", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_4859.wav", "onoffCaption": "door knocking at 0.375-2.75, 4.289-6.664 and thump thud at 4.902-7.364", "frequencyCaption": "door knocking two times and thump thud one times"} +{"filepath": "data/multi_event_train/syn_4917.wav", "onoffCaption": "door knocking at 0.45-3.559 and cow mooing at 1.189-4.171, 5.069-7.646", "frequencyCaption": "door knocking one times and cow mooing two times"} +{"filepath": "data/multi_event_train/syn_4942.wav", "onoffCaption": "duck quacking at 2.841-4.841, 6.439-8.439", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_4945.wav", "onoffCaption": "sneeze at 3.167-4.421, 5.355-6.609", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_2001.wav", "onoffCaption": "cat meowing at 3.737-6.767, 7.269-10.0", "frequencyCaption": "cat meowing two times"} +{"filepath": "data/multi_event_train/syn_2054.wav", "onoffCaption": "explosion at 3.827-8.421", "frequencyCaption": "explosion one times"} +{"filepath": "data/multi_event_train/syn_2100.wav", "onoffCaption": "spraying at 2.764-3.368, 4.537-5.141", "frequencyCaption": "spraying two times"} +{"filepath": "data/multi_event_train/syn_2124.wav", "onoffCaption": "gunshot at 3.266-5.436, 7.546-9.546", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_2155.wav", "onoffCaption": "cow mooing at 1.989-4.999, 7.306-10.0", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_2268.wav", "onoffCaption": "whistling at 2.877-10.0", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_2272.wav", "onoffCaption": "door slamming at 2.858-3.997, 4.816-5.621, 6.487-8.405", "frequencyCaption": "door slamming three times"} +{"filepath": "data/multi_event_train/syn_2297.wav", "onoffCaption": "car horn honking at 3.459-5.924, 7.102-9.567", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_2326.wav", "onoffCaption": "train horn at 1.47-5.67 and gunshot at 1.569-3.569, 4.393-6.393, 7.694-9.694 and whistling at 2.957-5.832", "frequencyCaption": "train horn one times and gunshot three times and whistling one times"} +{"filepath": "data/multi_event_train/syn_2369.wav", "onoffCaption": "woman laughing at 1.147-3.739, 4.978-7.57", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_2373.wav", "onoffCaption": "cow mooing at 0.035-4.464 and cat meowing at 7.286-8.426", "frequencyCaption": "cow mooing one times and cat meowing one times"} +{"filepath": "data/multi_event_train/syn_2396.wav", "onoffCaption": "door slamming at 0.228-2.448, 3.938-6.158", "frequencyCaption": "door slamming two times"} +{"filepath": "data/multi_event_train/syn_2402.wav", "onoffCaption": "door knocking at 2.793-5.849", "frequencyCaption": "door knocking one times"} +{"filepath": "data/multi_event_train/syn_2418.wav", "onoffCaption": "dog barking at 0.291-2.291 and explosion at 0.294-5.294, 6.146-8.21", "frequencyCaption": "dog barking one times and explosion two times"} +{"filepath": "data/multi_event_train/syn_2457.wav", "onoffCaption": "dog barking at 3.02-5.02, 6.881-8.881", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_train/syn_2503.wav", "onoffCaption": "sheep goat bleating at 0.386-3.706", "frequencyCaption": "sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_2519.wav", "onoffCaption": "spraying at 3.627-4.228, 5.239-5.971, 6.818-8.976", "frequencyCaption": "spraying three times"} +{"filepath": "data/multi_event_train/syn_2556.wav", "onoffCaption": "train horn at 0.173-5.917", "frequencyCaption": "train horn one times"} +{"filepath": "data/multi_event_train/syn_2572.wav", "onoffCaption": "spraying at 0.005-0.589, 1.147-1.669, 2.456-3.703 and sheep goat bleating at 5.9-8.98", "frequencyCaption": "spraying three times and sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_2624.wav", "onoffCaption": "gunshot at 1.164-3.638", "frequencyCaption": "gunshot one times"} +{"filepath": "data/multi_event_train/syn_2725.wav", "onoffCaption": "cat meowing at 0.226-1.439, 2.668-3.881 and duck quacking at 7.397-9.397", "frequencyCaption": "cat meowing two times and duck quacking one times"} +{"filepath": "data/multi_event_train/syn_2770.wav", "onoffCaption": "train horn at 0.143-5.858, 6.746-9.611", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_2795.wav", "onoffCaption": "woman laughing at 3.182-5.298, 5.997-8.113", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_2807.wav", "onoffCaption": "woman laughing at 2.828-9.562 and train horn at 3.108-7.966", "frequencyCaption": "woman laughing one times and train horn one times"} +{"filepath": "data/multi_event_train/syn_2848.wav", "onoffCaption": "whistling at 2.81-9.727", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_2852.wav", "onoffCaption": "sneeze at 2.011-3.718 and duck quacking at 2.159-4.159", "frequencyCaption": "sneeze one times and duck quacking one times"} +{"filepath": "data/multi_event_train/syn_2906.wav", "onoffCaption": "cow mooing at 2.08-5.049", "frequencyCaption": "cow mooing one times"} +{"filepath": "data/multi_event_train/syn_2949.wav", "onoffCaption": "woman laughing at 3.906-6.261", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/multi_event_train/syn_2953.wav", "onoffCaption": "explosion at 0.864-3.864, 5.072-8.009", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_4042.wav", "onoffCaption": "dog barking at 2.567-5.488, 7.079-10.0", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_train/syn_4058.wav", "onoffCaption": "sneeze at 0.092-2.495, 3.276-4.863 and duck quacking at 0.416-2.416, 4.8-6.8 and gunshot at 3.904-5.904, 6.856-8.856", "frequencyCaption": "sneeze two times and duck quacking two times and gunshot two times"} +{"filepath": "data/multi_event_train/syn_4116.wav", "onoffCaption": "sheep goat bleating at 0.718-2.718, 4.892-6.892 and door knocking at 1.376-5.909, 6.721-8.865 and dog barking at 5.228-7.228", "frequencyCaption": "sheep goat bleating two times and door knocking two times and dog barking one times"} +{"filepath": "data/multi_event_train/syn_4143.wav", "onoffCaption": "whistling at 0.576-8.961 and dog barking at 0.748-6.385 and woman laughing at 2.533-5.138", "frequencyCaption": "whistling one times and dog barking one times and woman laughing one times"} +{"filepath": "data/multi_event_train/syn_4159.wav", "onoffCaption": "thump thud at 0.331-4.249 and duck quacking at 2.631-4.631, 5.476-7.476", "frequencyCaption": "thump thud one times and duck quacking two times"} +{"filepath": "data/multi_event_train/syn_4231.wav", "onoffCaption": "car horn honking at 1.169-3.634 and door slamming at 7.65-9.563", "frequencyCaption": "car horn honking one times and door slamming one times"} +{"filepath": "data/multi_event_train/syn_4264.wav", "onoffCaption": "burping belching at 1.205-4.711, 5.317-8.823 and woman laughing at 2.257-5.311", "frequencyCaption": "burping belching two times and woman laughing one times"} +{"filepath": "data/multi_event_train/syn_4281.wav", "onoffCaption": "woman laughing at 0.493-2.718", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/multi_event_train/syn_4330.wav", "onoffCaption": "gunshot at 1.206-3.206, 4.669-6.669 and sheep goat bleating at 2.87-4.87, 6.162-8.162", "frequencyCaption": "gunshot two times and sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_4365.wav", "onoffCaption": "tapping clicking clanking at 0.181-3.621, 4.89-7.477 and door slamming at 0.518-2.059, 2.809-4.35", "frequencyCaption": "tapping clicking clanking two times and door slamming two times"} +{"filepath": "data/multi_event_train/syn_4380.wav", "onoffCaption": "cow mooing at 1.129-4.427 and tapping clicking clanking at 2.306-5.746, 7.032-9.339", "frequencyCaption": "cow mooing one times and tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_4414.wav", "onoffCaption": "door slamming at 1.825-2.663, 4.903-5.741", "frequencyCaption": "door slamming two times"} +{"filepath": "data/multi_event_train/syn_4515.wav", "onoffCaption": "sneeze at 0.751-2.664, 3.184-5.097", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_4540.wav", "onoffCaption": "train horn at 0.83-6.545, 7.829-10.0", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_4628.wav", "onoffCaption": "duck quacking at 0.235-2.235, 3.981-5.981, 7.014-9.014 and door slamming at 0.25-1.15", "frequencyCaption": "duck quacking three times and door slamming one times"} +{"filepath": "data/multi_event_train/syn_4632.wav", "onoffCaption": "spraying at 0.339-1.071, 2.214-2.798, 3.436-4.5 and sneeze at 3.126-5.211, 6.084-8.169", "frequencyCaption": "spraying three times and sneeze two times"} +{"filepath": "data/multi_event_train/syn_4667.wav", "onoffCaption": "door knocking at 2.552-4.642, 6.518-8.608", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_4682.wav", "onoffCaption": "door slamming at 0.297-1.776, 2.503-3.354, 4.599-5.577", "frequencyCaption": "door slamming three times"} +{"filepath": "data/multi_event_train/syn_4698.wav", "onoffCaption": "cow mooing at 0.428-5.408 and whistling at 0.539-8.924", "frequencyCaption": "cow mooing one times and whistling one times"} +{"filepath": "data/multi_event_train/syn_4729.wav", "onoffCaption": "car horn honking at 1.494-4.42, 5.18-7.375", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_4733.wav", "onoffCaption": "woman laughing at 1.817-4.011 and door knocking at 5.746-8.593", "frequencyCaption": "woman laughing one times and door knocking one times"} +{"filepath": "data/multi_event_train/syn_4766.wav", "onoffCaption": "sheep goat bleating at 0.695-2.695, 3.642-5.642, 6.254-8.254", "frequencyCaption": "sheep goat bleating three times"} +{"filepath": "data/multi_event_train/syn_4783.wav", "onoffCaption": "cow mooing at 2.223-5.233, 7.477-10.0", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_4799.wav", "onoffCaption": "sneeze at 0.383-4.912, 6.313-7.59", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_4811.wav", "onoffCaption": "door knocking at 3.503-5.815, 7.402-9.714", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_4844.wav", "onoffCaption": "car horn honking at 0.575-3.04 and whistling at 1.136-6.311, 7.842-10.0 and door knocking at 5.702-8.47", "frequencyCaption": "car horn honking one times and whistling two times and door knocking one times"} +{"filepath": "data/multi_event_train/syn_4910.wav", "onoffCaption": "dog barking at 3.332-5.332, 6.55-8.55", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_train/syn_2171.wav", "onoffCaption": "gunshot at 3.247-5.247", "frequencyCaption": "gunshot one times"} +{"filepath": "data/multi_event_train/syn_2194.wav", "onoffCaption": "door knocking at 2.709-6.262, 7.986-10.0", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_2219.wav", "onoffCaption": "cow mooing at 0.867-5.296, 6.169-10.0", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_2302.wav", "onoffCaption": "burping belching at 2.824-5.824, 6.756-8.973", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_2318.wav", "onoffCaption": "whistling at 0.004-2.233, 3.194-6.069, 7.167-9.818", "frequencyCaption": "whistling three times"} +{"filepath": "data/multi_event_train/syn_2357.wav", "onoffCaption": "sneeze at 3.056-4.148, 6.17-7.262, 8.722-9.814", "frequencyCaption": "sneeze three times"} +{"filepath": "data/multi_event_train/syn_2469.wav", "onoffCaption": "burping belching at 0.092-3.092, 3.702-5.932, 7.024-9.674", "frequencyCaption": "burping belching three times"} +{"filepath": "data/multi_event_train/syn_2527.wav", "onoffCaption": "spraying at 1.357-3.941, 4.523-7.107", "frequencyCaption": "spraying two times"} +{"filepath": "data/multi_event_train/syn_2597.wav", "onoffCaption": "cat meowing at 3.19-5.13, 6.832-8.772", "frequencyCaption": "cat meowing two times"} +{"filepath": "data/multi_event_train/syn_2701.wav", "onoffCaption": "explosion at 1.956-4.685, 5.357-8.086", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_2754.wav", "onoffCaption": "sneeze at 0.324-1.824, 2.589-3.843 and tapping clicking clanking at 6.289-9.729", "frequencyCaption": "sneeze two times and tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_2823.wav", "onoffCaption": "sheep goat bleating at 0.14-3.14", "frequencyCaption": "sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_2876.wav", "onoffCaption": "duck quacking at 2.428-4.428, 6.174-8.174", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_2893.wav", "onoffCaption": "door slamming at 1.258-3.741", "frequencyCaption": "door slamming one times"} +{"filepath": "data/multi_event_train/syn_2938.wav", "onoffCaption": "duck quacking at 0.341-2.341, 3.83-5.83", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_2988.wav", "onoffCaption": "cow mooing at 1.038-4.02", "frequencyCaption": "cow mooing one times"} +{"filepath": "data/multi_event_train/syn_4029.wav", "onoffCaption": "tapping clicking clanking at 3.41-6.85", "frequencyCaption": "tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_4099.wav", "onoffCaption": "thump thud at 0.883-3.222, 5.031-7.37 and sneeze at 1.897-4.557, 5.208-7.167", "frequencyCaption": "thump thud two times and sneeze two times"} +{"filepath": "data/multi_event_train/syn_4132.wav", "onoffCaption": "whistling at 0.88-10.0", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_4167.wav", "onoffCaption": "burping belching at 0.189-7.357 and explosion at 2.581-7.581 and cat meowing at 7.678-8.689", "frequencyCaption": "burping belching one times and explosion one times and cat meowing one times"} +{"filepath": "data/multi_event_train/syn_4182.wav", "onoffCaption": "gunshot at 1.168-3.168, 5.131-7.131", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_4314.wav", "onoffCaption": "burping belching at 2.924-5.924, 6.913-8.948", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_4341.wav", "onoffCaption": "burping belching at 1.82-4.82", "frequencyCaption": "burping belching one times"} +{"filepath": "data/multi_event_train/syn_4531.wav", "onoffCaption": "whistling at 2.216-9.966", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_4564.wav", "onoffCaption": "burping belching at 1.236-4.236, 5.53-8.53 and sneeze at 5.994-7.288", "frequencyCaption": "burping belching two times and sneeze one times"} +{"filepath": "data/multi_event_train/syn_4581.wav", "onoffCaption": "door knocking at 0.688-4.241 and door slamming at 7.512-8.412", "frequencyCaption": "door knocking one times and door slamming one times"} +{"filepath": "data/multi_event_train/syn_4659.wav", "onoffCaption": "whistling at 0.504-8.08 and explosion at 2.227-6.227, 6.929-9.249 and spraying at 6.965-7.473, 7.993-8.501, 9.439-9.947", "frequencyCaption": "whistling one times and explosion two times and spraying three times"} +{"filepath": "data/multi_event_train/syn_4717.wav", "onoffCaption": "sheep goat bleating at 0.537-2.537, 3.04-5.78 and duck quacking at 1.164-3.164", "frequencyCaption": "sheep goat bleating two times and duck quacking one times"} +{"filepath": "data/multi_event_train/syn_4742.wav", "onoffCaption": "woman laughing at 3.527-6.165 and duck quacking at 5.318-7.318", "frequencyCaption": "woman laughing one times and duck quacking one times"} +{"filepath": "data/multi_event_train/syn_4835.wav", "onoffCaption": "cow mooing at 2.109-5.119 and gunshot at 7.477-9.477", "frequencyCaption": "cow mooing one times and gunshot one times"} +{"filepath": "data/multi_event_train/syn_4860.wav", "onoffCaption": "thump thud at 0.222-2.561, 4.496-7.267", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_4885.wav", "onoffCaption": "train horn at 0.303-3.663 and sheep goat bleating at 0.483-2.483 and cat meowing at 4.115-5.262, 7.386-8.533", "frequencyCaption": "train horn one times and sheep goat bleating one times and cat meowing two times"} +{"filepath": "data/multi_event_train/syn_2.wav", "onoffCaption": "gunshot at 3.979-5.979, 7.863-9.863", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_2070.wav", "onoffCaption": "tapping clicking clanking at 0.312-3.752, 6.233-8.612", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_2095.wav", "onoffCaption": "explosion at 0.659-3.531, 5.683-8.555", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_2203.wav", "onoffCaption": "train horn at 0.751-3.991, 4.924-7.607 and burping belching at 1.081-3.842", "frequencyCaption": "train horn two times and burping belching one times"} +{"filepath": "data/multi_event_train/syn_2256.wav", "onoffCaption": "whistling at 0.039-7.789", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_2426.wav", "onoffCaption": "burping belching at 0.083-3.952, 4.847-8.716", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_2496.wav", "onoffCaption": "burping belching at 0.734-2.841, 4.296-7.291 and duck quacking at 5.046-7.046, 7.787-9.787", "frequencyCaption": "burping belching two times and duck quacking two times"} +{"filepath": "data/multi_event_train/syn_2568.wav", "onoffCaption": "door knocking at 1.848-4.088, 5.701-7.941", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_2600.wav", "onoffCaption": "spraying at 1.873-3.606, 5.628-7.361", "frequencyCaption": "spraying two times"} +{"filepath": "data/multi_event_train/syn_2655.wav", "onoffCaption": "door slamming at 3.365-5.728, 6.77-9.133 and woman laughing at 5.218-7.784", "frequencyCaption": "door slamming two times and woman laughing one times"} +{"filepath": "data/multi_event_train/syn_2839.wav", "onoffCaption": "train horn at 2.721-7.623", "frequencyCaption": "train horn one times"} +{"filepath": "data/multi_event_train/syn_2889.wav", "onoffCaption": "woman laughing at 0.784-3.389, 5.389-7.994", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_2922.wav", "onoffCaption": "sheep goat bleating at 2.514-4.514, 6.364-9.354", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_2977.wav", "onoffCaption": "door knocking at 3.285-6.838, 7.652-9.812", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_2992.wav", "onoffCaption": "sneeze at 2.421-4.66, 5.402-7.641", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_4033.wav", "onoffCaption": "door knocking at 1.626-5.922, 6.87-9.119", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_4066.wav", "onoffCaption": "cat meowing at 0.594-2.73, 3.248-5.384, 7.175-9.311", "frequencyCaption": "cat meowing three times"} +{"filepath": "data/multi_event_train/syn_4083.wav", "onoffCaption": "door slamming at 2.678-3.911 and tapping clicking clanking at 6.349-9.789", "frequencyCaption": "door slamming one times and tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_4128.wav", "onoffCaption": "car horn honking at 0.637-5.149, 6.439-10.0", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_4198.wav", "onoffCaption": "door knocking at 2.713-5.025, 6.532-9.144 and burping belching at 2.951-4.986, 6.772-8.807", "frequencyCaption": "door knocking two times and burping belching two times"} +{"filepath": "data/multi_event_train/syn_4215.wav", "onoffCaption": "explosion at 0.955-3.949 and cat meowing at 2.343-3.759", "frequencyCaption": "explosion one times and cat meowing one times"} +{"filepath": "data/multi_event_train/syn_4240.wav", "onoffCaption": "train horn at 3.919-8.249", "frequencyCaption": "train horn one times"} +{"filepath": "data/multi_event_train/syn_4430.wav", "onoffCaption": "burping belching at 0.108-3.367, 4.557-7.03 and explosion at 3.841-8.841", "frequencyCaption": "burping belching two times and explosion one times"} +{"filepath": "data/multi_event_train/syn_4465.wav", "onoffCaption": "spraying at 1.705-2.213, 4.527-5.102", "frequencyCaption": "spraying two times"} +{"filepath": "data/multi_event_train/syn_4480.wav", "onoffCaption": "door slamming at 0.287-2.052, 4.508-7.482", "frequencyCaption": "door slamming two times"} +{"filepath": "data/multi_event_train/syn_4616.wav", "onoffCaption": "woman laughing at 2.164-4.769 and spraying at 6.976-7.976", "frequencyCaption": "woman laughing one times and spraying one times"} +{"filepath": "data/multi_event_train/syn_4643.wav", "onoffCaption": "burping belching at 0.436-2.559 and explosion at 0.7-5.621 and spraying at 7.99-9.723", "frequencyCaption": "burping belching one times and explosion one times and spraying one times"} +{"filepath": "data/multi_event_train/syn_4758.wav", "onoffCaption": "explosion at 0.898-5.898 and train horn at 2.17-8.639 and duck quacking at 2.778-4.778, 5.802-7.802", "frequencyCaption": "explosion one times and train horn one times and duck quacking two times"} +{"filepath": "data/multi_event_train/syn_4934.wav", "onoffCaption": "duck quacking at 2.328-4.328, 5.244-7.244 and sneeze at 2.934-4.997, 5.606-7.669", "frequencyCaption": "duck quacking two times and sneeze two times"} +{"filepath": "data/multi_event_train/syn_4961.wav", "onoffCaption": "train horn at 0.419-3.219, 5.239-7.706", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_4984.wav", "onoffCaption": "door knocking at 0.082-2.434, 4.087-6.099 and thump thud at 1.577-5.244 and dog barking at 5.732-7.732", "frequencyCaption": "door knocking two times and thump thud one times and dog barking one times"} +{"filepath": "data/multi_event_train/syn_2025.wav", "onoffCaption": "cat meowing at 0.213-2.349, 3.579-5.163", "frequencyCaption": "cat meowing two times"} +{"filepath": "data/multi_event_train/syn_2031.wav", "onoffCaption": "tapping clicking clanking at 0.15-3.59, 4.176-7.053, 7.619-9.914", "frequencyCaption": "tapping clicking clanking three times"} +{"filepath": "data/multi_event_train/syn_2064.wav", "onoffCaption": "gunshot at 1.132-3.132", "frequencyCaption": "gunshot one times"} +{"filepath": "data/multi_event_train/syn_2081.wav", "onoffCaption": "cow mooing at 0.092-3.074, 5.366-8.348", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_2217.wav", "onoffCaption": "door knocking at 0.221-3.983, 4.815-8.577 and whistling at 0.271-8.282 and car horn honking at 1.033-5.94", "frequencyCaption": "door knocking two times and whistling one times and car horn honking one times"} +{"filepath": "data/multi_event_train/syn_2242.wav", "onoffCaption": "burping belching at 3.447-6.727, 7.825-10.0", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_2359.wav", "onoffCaption": "spraying at 0.454-2.149 and whistling at 5.572-10.0", "frequencyCaption": "spraying one times and whistling one times"} +{"filepath": "data/multi_event_train/syn_2432.wav", "onoffCaption": "whistling at 0.178-9.045 and sneeze at 2.702-5.91", "frequencyCaption": "whistling one times and sneeze one times"} +{"filepath": "data/multi_event_train/syn_2467.wav", "onoffCaption": "door slamming at 2.516-4.879, 5.771-8.745", "frequencyCaption": "door slamming two times"} +{"filepath": "data/multi_event_train/syn_2473.wav", "onoffCaption": "explosion at 0.536-3.536 and door knocking at 3.283-5.595, 6.181-8.493", "frequencyCaption": "explosion one times and door knocking two times"} +{"filepath": "data/multi_event_train/syn_2482.wav", "onoffCaption": "thump thud at 2.321-5.988, 6.902-9.402", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_2529.wav", "onoffCaption": "cat meowing at 1.85-6.85, 7.75-10.0 and thump thud at 3.577-6.039", "frequencyCaption": "cat meowing two times and thump thud one times"} +{"filepath": "data/multi_event_train/syn_2599.wav", "onoffCaption": "sneeze at 0.377-1.611, 3.551-4.785", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_2614.wav", "onoffCaption": "train horn at 0.234-2.701, 3.897-6.569", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_2641.wav", "onoffCaption": "woman laughing at 0.106-3.206 and sheep goat bleating at 3.387-5.387, 6.381-8.381 and cat meowing at 5.944-6.971, 8.04-9.067", "frequencyCaption": "woman laughing one times and sheep goat bleating two times and cat meowing two times"} +{"filepath": "data/multi_event_train/syn_2878.wav", "onoffCaption": "sneeze at 2.708-4.621, 6.857-9.318", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_2936.wav", "onoffCaption": "car horn honking at 2.339-6.588", "frequencyCaption": "car horn honking one times"} +{"filepath": "data/multi_event_train/syn_2963.wav", "onoffCaption": "duck quacking at 0.591-2.591, 3.535-5.535", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_2986.wav", "onoffCaption": "thump thud at 0.431-2.931, 3.694-6.194, 6.919-9.854", "frequencyCaption": "thump thud three times"} +{"filepath": "data/multi_event_train/syn_4027.wav", "onoffCaption": "burping belching at 2.781-5.304, 7.447-9.97", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_4072.wav", "onoffCaption": "gunshot at 0.659-2.659, 4.787-6.787", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_4097.wav", "onoffCaption": "duck quacking at 2.279-4.279", "frequencyCaption": "duck quacking one times"} +{"filepath": "data/multi_event_train/syn_4169.wav", "onoffCaption": "thump thud at 1.232-5.682 and sheep goat bleating at 1.31-3.31", "frequencyCaption": "thump thud one times and sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_4201.wav", "onoffCaption": "thump thud at 0.928-4.595 and duck quacking at 1.99-3.99, 4.976-6.976", "frequencyCaption": "thump thud one times and duck quacking two times"} +{"filepath": "data/multi_event_train/syn_4254.wav", "onoffCaption": "burping belching at 0.617-4.117, 5.91-9.41", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_4424.wav", "onoffCaption": "explosion at 4.143-9.064", "frequencyCaption": "explosion one times"} +{"filepath": "data/multi_event_train/syn_4471.wav", "onoffCaption": "door knocking at 0.0-6.06 and gunshot at 2.217-4.258", "frequencyCaption": "door knocking one times and gunshot one times"} +{"filepath": "data/multi_event_train/syn_4494.wav", "onoffCaption": "spraying at 0.609-1.673 and duck quacking at 0.711-2.711, 3.85-5.85", "frequencyCaption": "spraying one times and duck quacking two times"} +{"filepath": "data/multi_event_train/syn_4602.wav", "onoffCaption": "tapping clicking clanking at 2.77-6.21, 7.235-9.279 and door knocking at 3.111-7.621", "frequencyCaption": "tapping clicking clanking two times and door knocking one times"} +{"filepath": "data/multi_event_train/syn_4657.wav", "onoffCaption": "car horn honking at 0.411-4.66, 5.261-7.726", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_4719.wav", "onoffCaption": "sneeze at 3.096-5.041", "frequencyCaption": "sneeze one times"} +{"filepath": "data/multi_event_train/syn_4920.wav", "onoffCaption": "sneeze at 0.108-2.722, 4.24-5.528, 6.997-9.077", "frequencyCaption": "sneeze three times"} +{"filepath": "data/multi_event_train/syn_4975.wav", "onoffCaption": "tapping clicking clanking at 2.428-5.868, 7.423-10.0", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_4990.wav", "onoffCaption": "cow mooing at 0.034-3.016 and train horn at 0.166-5.881", "frequencyCaption": "cow mooing one times and train horn one times"} +{"filepath": "data/multi_event_train/syn_2130.wav", "onoffCaption": "door slamming at 0.343-2.826, 4.639-7.122 and sheep goat bleating at 0.499-2.499, 4.127-6.127", "frequencyCaption": "door slamming two times and sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_2165.wav", "onoffCaption": "door slamming at 1.723-3.264, 4.189-5.73", "frequencyCaption": "door slamming two times"} +{"filepath": "data/multi_event_train/syn_2180.wav", "onoffCaption": "gunshot at 0.152-2.152, 3.02-5.02, 5.995-8.496", "frequencyCaption": "gunshot three times"} +{"filepath": "data/multi_event_train/syn_2258.wav", "onoffCaption": "door knocking at 0.871-5.573", "frequencyCaption": "door knocking one times"} +{"filepath": "data/multi_event_train/syn_2316.wav", "onoffCaption": "dog barking at 0.47-3.79 and car horn honking at 1.013-5.413 and door knocking at 3.196-5.384", "frequencyCaption": "dog barking one times and car horn honking one times and door knocking one times"} +{"filepath": "data/multi_event_train/syn_2343.wav", "onoffCaption": "dog barking at 0.514-2.514 and train horn at 2.989-8.704 and woman laughing at 5.063-7.701", "frequencyCaption": "dog barking one times and train horn one times and woman laughing one times"} +{"filepath": "data/multi_event_train/syn_2382.wav", "onoffCaption": "duck quacking at 1.839-3.839", "frequencyCaption": "duck quacking one times"} +{"filepath": "data/multi_event_train/syn_2428.wav", "onoffCaption": "sneeze at 2.249-4.863, 5.588-6.819, 7.797-10.0", "frequencyCaption": "sneeze three times"} +{"filepath": "data/multi_event_train/syn_2498.wav", "onoffCaption": "sheep goat bleating at 3.531-5.531, 6.48-8.48", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_2533.wav", "onoffCaption": "sheep goat bleating at 3.267-5.267", "frequencyCaption": "sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_2566.wav", "onoffCaption": "spraying at 0.111-0.611, 2.052-2.656, 4.625-7.015 and door knocking at 3.291-5.479, 7.437-9.625", "frequencyCaption": "spraying three times and door knocking two times"} +{"filepath": "data/multi_event_train/syn_2583.wav", "onoffCaption": "woman laughing at 1.036-3.404", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/multi_event_train/syn_2715.wav", "onoffCaption": "sheep goat bleating at 0.66-5.54, 7.267-9.267", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_2740.wav", "onoffCaption": "train horn at 0.131-3.901, 6.116-9.886", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_2837.wav", "onoffCaption": "whistling at 0.681-3.656, 5.848-8.823", "frequencyCaption": "whistling two times"} +{"filepath": "data/multi_event_train/syn_2862.wav", "onoffCaption": "dog barking at 0.087-2.525, 4.272-6.272", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_train/syn_2887.wav", "onoffCaption": "sheep goat bleating at 1.009-3.009, 4.409-6.409 and spraying at 2.342-4.778", "frequencyCaption": "sheep goat bleating two times and spraying one times"} +{"filepath": "data/multi_event_train/syn_2979.wav", "onoffCaption": "dog barking at 1.195-3.195, 4.515-6.515, 7.789-9.789 and woman laughing at 3.19-5.776 and door slamming at 3.266-5.486", "frequencyCaption": "dog barking three times and woman laughing one times and door slamming one times"} +{"filepath": "data/multi_event_train/syn_4068.wav", "onoffCaption": "sneeze at 0.318-4.818, 5.939-7.265, 8.03-9.284", "frequencyCaption": "sneeze three times"} +{"filepath": "data/multi_event_train/syn_4126.wav", "onoffCaption": "whistling at 2.345-10.0", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_4173.wav", "onoffCaption": "sheep goat bleating at 0.475-2.475 and cat meowing at 0.576-1.661 and sneeze at 2.632-3.735", "frequencyCaption": "sheep goat bleating one times and cat meowing one times and sneeze one times"} +{"filepath": "data/multi_event_train/syn_4196.wav", "onoffCaption": "gunshot at 3.737-5.737", "frequencyCaption": "gunshot one times"} +{"filepath": "data/multi_event_train/syn_4300.wav", "onoffCaption": "car horn honking at 1.553-5.14", "frequencyCaption": "car horn honking one times"} +{"filepath": "data/multi_event_train/syn_4355.wav", "onoffCaption": "cow mooing at 0.892-4.19, 5.52-7.717", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_4501.wav", "onoffCaption": "whistling at 0.143-7.798", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_4525.wav", "onoffCaption": "woman laughing at 0.406-2.631, 3.797-6.022", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_4570.wav", "onoffCaption": "door slamming at 0.189-1.129, 3.598-4.538, 5.299-6.239", "frequencyCaption": "door slamming three times"} +{"filepath": "data/multi_event_train/syn_4595.wav", "onoffCaption": "tapping clicking clanking at 0.081-3.521", "frequencyCaption": "tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_4618.wav", "onoffCaption": "explosion at 2.497-4.504 and door knocking at 7.28-10.0", "frequencyCaption": "explosion one times and door knocking one times"} +{"filepath": "data/multi_event_train/syn_4703.wav", "onoffCaption": "woman laughing at 1.525-3.725, 5.878-8.086", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_4756.wav", "onoffCaption": "cow mooing at 4.047-9.027", "frequencyCaption": "cow mooing one times"} +{"filepath": "data/multi_event_train/syn_4821.wav", "onoffCaption": "cat meowing at 2.463-4.423 and burping belching at 7.505-10.0", "frequencyCaption": "cat meowing one times and burping belching one times"} +{"filepath": "data/multi_event_train/syn_4874.wav", "onoffCaption": "burping belching at 1.013-4.013, 4.734-6.857", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_4891.wav", "onoffCaption": "explosion at 0.084-3.924 and sneeze at 3.631-5.314, 7.57-9.529", "frequencyCaption": "explosion one times and sneeze two times"} +{"filepath": "data/multi_event_train/syn_2040.wav", "onoffCaption": "sneeze at 0.191-2.136, 2.684-4.629", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_2114.wav", "onoffCaption": "gunshot at 0.672-2.672, 3.76-5.76 and tapping clicking clanking at 7.833-10.0", "frequencyCaption": "gunshot two times and tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_2141.wav", "onoffCaption": "whistling at 0.057-8.442", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_2229.wav", "onoffCaption": "explosion at 1.961-4.961, 6.699-9.699 and sheep goat bleating at 2.522-4.522", "frequencyCaption": "explosion two times and sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_2299.wav", "onoffCaption": "door slamming at 0.233-1.748, 2.833-4.348", "frequencyCaption": "door slamming two times"} +{"filepath": "data/multi_event_train/syn_2332.wav", "onoffCaption": "train horn at 1.612-4.932", "frequencyCaption": "train horn one times"} +{"filepath": "data/multi_event_train/syn_2367.wav", "onoffCaption": "burping belching at 2.641-5.006 and door slamming at 3.208-5.925 and dog barking at 4.614-6.614", "frequencyCaption": "burping belching one times and door slamming one times and dog barking one times"} +{"filepath": "data/multi_event_train/syn_2416.wav", "onoffCaption": "whistling at 0.288-8.901", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_2459.wav", "onoffCaption": "whistling at 1.055-6.636", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_2517.wav", "onoffCaption": "dog barking at 2.923-4.923, 6.67-8.67", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_train/syn_2542.wav", "onoffCaption": "cow mooing at 0.253-3.263 and cat meowing at 0.397-1.707 and spraying at 1.526-2.393", "frequencyCaption": "cow mooing one times and cat meowing one times and spraying one times"} +{"filepath": "data/multi_event_train/syn_2731.wav", "onoffCaption": "burping belching at 2.229-8.316", "frequencyCaption": "burping belching one times"} +{"filepath": "data/multi_event_train/syn_2764.wav", "onoffCaption": "tapping clicking clanking at 1.128-4.568, 5.185-7.486", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_2781.wav", "onoffCaption": "spraying at 2.741-3.345 and dog barking at 2.749-4.749", "frequencyCaption": "spraying one times and dog barking one times"} +{"filepath": "data/multi_event_train/syn_2809.wav", "onoffCaption": "dog barking at 1.625-3.625, 4.457-6.457", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_train/syn_2813.wav", "onoffCaption": "whistling at 0.073-8.084 and duck quacking at 0.917-2.917, 4.215-6.215 and sheep goat bleating at 2.786-4.786", "frequencyCaption": "whistling one times and duck quacking two times and sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_2846.wav", "onoffCaption": "thump thud at 3.629-6.129", "frequencyCaption": "thump thud one times"} +{"filepath": "data/multi_event_train/syn_2908.wav", "onoffCaption": "sheep goat bleating at 0.053-2.053, 4.129-6.129", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_2912.wav", "onoffCaption": "thump thud at 0.997-3.497, 4.826-7.597", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_4003.wav", "onoffCaption": "burping belching at 2.477-8.564", "frequencyCaption": "burping belching one times"} +{"filepath": "data/multi_event_train/syn_4019.wav", "onoffCaption": "explosion at 1.534-5.534, 6.985-10.0", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_4102.wav", "onoffCaption": "door knocking at 2.936-7.232 and door slamming at 5.741-8.458", "frequencyCaption": "door knocking one times and door slamming one times"} +{"filepath": "data/multi_event_train/syn_4157.wav", "onoffCaption": "tapping clicking clanking at 1.987-5.427, 6.586-8.643 and car horn honking at 3.357-6.576", "frequencyCaption": "tapping clicking clanking two times and car horn honking one times"} +{"filepath": "data/multi_event_train/syn_4270.wav", "onoffCaption": "gunshot at 0.201-2.201, 4.088-6.088", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_4295.wav", "onoffCaption": "dog barking at 2.052-4.052", "frequencyCaption": "dog barking one times"} +{"filepath": "data/multi_event_train/syn_4324.wav", "onoffCaption": "sneeze at 1.859-4.934, 6.814-9.889", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_4371.wav", "onoffCaption": "spraying at 0.137-0.869, 2.847-3.911, 5.493-8.077", "frequencyCaption": "spraying three times"} +{"filepath": "data/multi_event_train/syn_4394.wav", "onoffCaption": "cat meowing at 0.311-1.586, 2.6-3.875 and tapping clicking clanking at 2.157-5.597", "frequencyCaption": "cat meowing two times and tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_4455.wav", "onoffCaption": "duck quacking at 0.261-2.261, 3.583-5.583, 6.193-8.193", "frequencyCaption": "duck quacking three times"} +{"filepath": "data/multi_event_train/syn_4554.wav", "onoffCaption": "car horn honking at 2.582-6.423, 7.928-10.0", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_4626.wav", "onoffCaption": "burping belching at 2.098-5.642, 6.868-9.323", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_4669.wav", "onoffCaption": "thump thud at 2.425-4.887, 5.471-8.074", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_4727.wav", "onoffCaption": "tapping clicking clanking at 2.643-6.083, 7.748-9.849", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_4772.wav", "onoffCaption": "sneeze at 0.269-1.5, 3.185-4.416", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_4797.wav", "onoffCaption": "thump thud at 1.096-5.014", "frequencyCaption": "thump thud one times"} +{"filepath": "data/multi_event_train/syn_4805.wav", "onoffCaption": "cat meowing at 0.861-3.765", "frequencyCaption": "cat meowing one times"} +{"filepath": "data/multi_event_train/syn_4850.wav", "onoffCaption": "whistling at 3.246-10.0", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_2015.wav", "onoffCaption": "tapping clicking clanking at 0.308-3.748, 4.664-8.104 and spraying at 1.951-2.732, 4.676-5.457", "frequencyCaption": "tapping clicking clanking two times and spraying two times"} +{"filepath": "data/multi_event_train/syn_2047.wav", "onoffCaption": "tapping clicking clanking at 3.433-6.873", "frequencyCaption": "tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_2233.wav", "onoffCaption": "train horn at 3.961-6.428", "frequencyCaption": "train horn one times"} +{"filepath": "data/multi_event_train/syn_2234.wav", "onoffCaption": "duck quacking at 0.296-2.296, 2.961-4.961", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_2266.wav", "onoffCaption": "car horn honking at 0.862-4.449, 5.271-8.858", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_2283.wav", "onoffCaption": "explosion at 0.487-2.58, 3.857-6.725", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_2328.wav", "onoffCaption": "whistling at 0.999-6.047 and sheep goat bleating at 1.176-3.176, 3.966-5.966, 6.751-8.751", "frequencyCaption": "whistling one times and sheep goat bleating three times"} +{"filepath": "data/multi_event_train/syn_2398.wav", "onoffCaption": "door knocking at 1.309-4.418, 6.562-9.671 and woman laughing at 1.446-4.146, 4.759-6.771 and door slamming at 2.147-3.662, 5.397-6.397", "frequencyCaption": "door knocking two times and woman laughing two times and door slamming two times"} +{"filepath": "data/multi_event_train/syn_2411.wav", "onoffCaption": "tapping clicking clanking at 0.115-3.555, 5.71-8.203", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_2443.wav", "onoffCaption": "cat meowing at 2.254-4.004, 4.915-6.19 and train horn at 2.628-5.988, 6.935-9.02", "frequencyCaption": "cat meowing two times and train horn two times"} +{"filepath": "data/multi_event_train/syn_2558.wav", "onoffCaption": "whistling at 1.279-3.288", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_2630.wav", "onoffCaption": "whistling at 0.199-9.864 and thump thud at 0.926-4.593", "frequencyCaption": "whistling one times and thump thud one times"} +{"filepath": "data/multi_event_train/syn_2662.wav", "onoffCaption": "car horn honking at 2.538-6.938", "frequencyCaption": "car horn honking one times"} +{"filepath": "data/multi_event_train/syn_2665.wav", "onoffCaption": "door knocking at 0.383-5.085 and door slamming at 7.078-9.561", "frequencyCaption": "door knocking one times and door slamming one times"} +{"filepath": "data/multi_event_train/syn_2680.wav", "onoffCaption": "cow mooing at 0.205-5.185, 7.154-10.0", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_2687.wav", "onoffCaption": "woman laughing at 2.787-6.074, 6.661-8.766", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_2779.wav", "onoffCaption": "explosion at 2.806-5.067, 6.494-8.755", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_2915.wav", "onoffCaption": "sheep goat bleating at 0.649-2.649, 3.203-5.203, 5.869-7.869 and dog barking at 3.465-5.465", "frequencyCaption": "sheep goat bleating three times and dog barking one times"} +{"filepath": "data/multi_event_train/syn_2947.wav", "onoffCaption": "dog barking at 0.971-2.971 and thump thud at 5.698-10.0", "frequencyCaption": "dog barking one times and thump thud one times"} +{"filepath": "data/multi_event_train/syn_4004.wav", "onoffCaption": "tapping clicking clanking at 1.792-5.232, 7.002-10.0", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_4056.wav", "onoffCaption": "duck quacking at 3.429-5.429, 6.15-8.15", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_4118.wav", "onoffCaption": "sheep goat bleating at 3.47-5.47, 6.715-8.715", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_4225.wav", "onoffCaption": "gunshot at 0.572-2.572 and cat meowing at 7.208-8.22", "frequencyCaption": "gunshot one times and cat meowing one times"} +{"filepath": "data/multi_event_train/syn_4277.wav", "onoffCaption": "thump thud at 2.987-6.034", "frequencyCaption": "thump thud one times"} +{"filepath": "data/multi_event_train/syn_4292.wav", "onoffCaption": "explosion at 0.097-2.185, 4.513-6.926", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_4389.wav", "onoffCaption": "burping belching at 0.173-6.117, 6.715-10.0", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_4400.wav", "onoffCaption": "sheep goat bleating at 2.724-7.604 and thump thud at 3.747-6.247", "frequencyCaption": "sheep goat bleating one times and thump thud one times"} +{"filepath": "data/multi_event_train/syn_4452.wav", "onoffCaption": "cat meowing at 3.312-4.856, 6.196-8.904", "frequencyCaption": "cat meowing two times"} +{"filepath": "data/multi_event_train/syn_4549.wav", "onoffCaption": "sneeze at 1.038-4.113, 5.958-7.665 and door slamming at 5.131-8.092", "frequencyCaption": "sneeze two times and door slamming one times"} +{"filepath": "data/multi_event_train/syn_4621.wav", "onoffCaption": "whistling at 0.065-5.24", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_4673.wav", "onoffCaption": "tapping clicking clanking at 0.128-3.568", "frequencyCaption": "tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_4696.wav", "onoffCaption": "dog barking at 3.747-9.384", "frequencyCaption": "dog barking one times"} +{"filepath": "data/multi_event_train/syn_4768.wav", "onoffCaption": "tapping clicking clanking at 1.367-4.807", "frequencyCaption": "tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_4904.wav", "onoffCaption": "train horn at 3.17-5.637, 7.827-9.933", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_4951.wav", "onoffCaption": "sneeze at 2.27-3.434", "frequencyCaption": "sneeze one times"} +{"filepath": "data/multi_event_train/syn_4956.wav", "onoffCaption": "cat meowing at 2.574-4.145, 4.744-6.315, 7.461-9.032", "frequencyCaption": "cat meowing three times"} +{"filepath": "data/multi_event_train/syn_2008.wav", "onoffCaption": "gunshot at 0.098-2.098 and burping belching at 0.614-3.816, 5.977-8.798", "frequencyCaption": "gunshot one times and burping belching two times"} +{"filepath": "data/multi_event_train/syn_2012.wav", "onoffCaption": "thump thud at 0.055-2.394, 3.578-6.349", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_2109.wav", "onoffCaption": "sneeze at 1.144-4.792, 6.589-7.835", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_2113.wav", "onoffCaption": "thump thud at 0.539-3.039, 4.952-7.18", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_2146.wav", "onoffCaption": "car horn honking at 0.552-3.338, 5.746-8.532", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_2261.wav", "onoffCaption": "car horn honking at 0.166-3.341 and dog barking at 2.611-4.611 and sheep goat bleating at 5.761-7.761", "frequencyCaption": "car horn honking one times and dog barking one times and sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_2284.wav", "onoffCaption": "whistling at 1.837-4.812, 5.973-8.948 and sheep goat bleating at 5.456-8.536", "frequencyCaption": "whistling two times and sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_2360.wav", "onoffCaption": "tapping clicking clanking at 2.241-5.681, 7.788-9.791", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_2385.wav", "onoffCaption": "cat meowing at 2.894-3.921", "frequencyCaption": "cat meowing one times"} +{"filepath": "data/multi_event_train/syn_2444.wav", "onoffCaption": "gunshot at 0.397-2.397", "frequencyCaption": "gunshot one times"} +{"filepath": "data/multi_event_train/syn_2510.wav", "onoffCaption": "gunshot at 0.453-2.693, 4.329-6.459", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_2545.wav", "onoffCaption": "woman laughing at 0.559-3.145, 5.591-8.177", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_2637.wav", "onoffCaption": "woman laughing at 0.151-3.438, 5.006-8.293", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_2736.wav", "onoffCaption": "woman laughing at 0.176-3.752, 4.722-8.298 and explosion at 0.858-2.951 and door knocking at 4.024-6.376, 7.281-9.633", "frequencyCaption": "woman laughing two times and explosion one times and door knocking two times"} +{"filepath": "data/multi_event_train/syn_2814.wav", "onoffCaption": "gunshot at 1.732-3.732, 6.222-8.222", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_2841.wav", "onoffCaption": "burping belching at 0.477-3.477 and cat meowing at 2.608-4.358, 6.377-8.127", "frequencyCaption": "burping belching one times and cat meowing two times"} +{"filepath": "data/multi_event_train/syn_2940.wav", "onoffCaption": "door slamming at 0.759-3.476, 5.126-6.45, 7.827-9.13", "frequencyCaption": "door slamming three times"} +{"filepath": "data/multi_event_train/syn_4051.wav", "onoffCaption": "dog barking at 0.092-2.092", "frequencyCaption": "dog barking one times"} +{"filepath": "data/multi_event_train/syn_4150.wav", "onoffCaption": "whistling at 0.595-5.77, 7.744-10.0", "frequencyCaption": "whistling two times"} +{"filepath": "data/multi_event_train/syn_4222.wav", "onoffCaption": "spraying at 0.814-1.755, 3.619-4.369 and cow mooing at 2.19-5.2 and gunshot at 3.053-5.053, 6.329-8.329", "frequencyCaption": "spraying two times and cow mooing one times and gunshot two times"} +{"filepath": "data/multi_event_train/syn_4238.wav", "onoffCaption": "whistling at 0.013-7.763 and thump thud at 0.796-3.296, 4.227-6.566 and sneeze at 4.06-6.005, 7.006-8.951", "frequencyCaption": "whistling one times and thump thud two times and sneeze two times"} +{"filepath": "data/multi_event_train/syn_4288.wav", "onoffCaption": "door knocking at 0.772-3.152 and gunshot at 0.952-2.952, 3.707-5.707, 6.781-8.781", "frequencyCaption": "door knocking one times and gunshot three times"} +{"filepath": "data/multi_event_train/syn_4323.wav", "onoffCaption": "door knocking at 2.129-6.831", "frequencyCaption": "door knocking one times"} +{"filepath": "data/multi_event_train/syn_4339.wav", "onoffCaption": "door knocking at 2.612-4.702", "frequencyCaption": "door knocking one times"} +{"filepath": "data/multi_event_train/syn_4393.wav", "onoffCaption": "explosion at 0.73-3.73, 5.196-8.196", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_4407.wav", "onoffCaption": "thump thud at 0.715-3.215 and spraying at 2.174-3.869, 5.959-6.534, 7.932-8.713", "frequencyCaption": "thump thud one times and spraying three times"} +{"filepath": "data/multi_event_train/syn_4506.wav", "onoffCaption": "duck quacking at 2.427-4.427, 5.446-7.446", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_4674.wav", "onoffCaption": "woman laughing at 0.359-3.459 and gunshot at 0.473-2.473, 4.596-6.596 and cat meowing at 1.726-3.914", "frequencyCaption": "woman laughing one times and gunshot two times and cat meowing one times"} +{"filepath": "data/multi_event_train/syn_4691.wav", "onoffCaption": "dog barking at 3.46-5.86", "frequencyCaption": "dog barking one times"} +{"filepath": "data/multi_event_train/syn_4775.wav", "onoffCaption": "dog barking at 1.351-3.351, 5.849-7.849", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_train/syn_4790.wav", "onoffCaption": "spraying at 2.35-3.434, 3.962-5.046, 5.902-6.986 and car horn honking at 3.116-7.438", "frequencyCaption": "spraying three times and car horn honking one times"} +{"filepath": "data/multi_event_train/syn_4802.wav", "onoffCaption": "cow mooing at 0.621-3.59, 5.64-8.609 and woman laughing at 1.406-6.445, 7.25-9.348", "frequencyCaption": "cow mooing two times and woman laughing two times"} +{"filepath": "data/multi_event_train/syn_4818.wav", "onoffCaption": "burping belching at 1.606-8.774 and gunshot at 5.63-7.63", "frequencyCaption": "burping belching one times and gunshot one times"} +{"filepath": "data/multi_event_train/syn_4903.wav", "onoffCaption": "train horn at 3.661-8.101", "frequencyCaption": "train horn one times"} +{"filepath": "data/multi_event_train/syn_4919.wav", "onoffCaption": "door knocking at 2.36-5.207, 6.01-8.74", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_2079.wav", "onoffCaption": "gunshot at 2.234-4.234", "frequencyCaption": "gunshot one times"} +{"filepath": "data/multi_event_train/syn_2137.wav", "onoffCaption": "cow mooing at 2.569-5.538", "frequencyCaption": "cow mooing one times"} +{"filepath": "data/multi_event_train/syn_2162.wav", "onoffCaption": "thump thud at 0.332-4.707, 5.963-8.463 and car horn honking at 2.236-4.701", "frequencyCaption": "thump thud two times and car horn honking one times"} +{"filepath": "data/multi_event_train/syn_2335.wav", "onoffCaption": "tapping clicking clanking at 0.144-3.584, 5.766-8.185", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_2344.wav", "onoffCaption": "whistling at 3.642-8.126", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_2534.wav", "onoffCaption": "burping belching at 2.018-4.049, 4.623-6.654, 7.545-9.576", "frequencyCaption": "burping belching three times"} +{"filepath": "data/multi_event_train/syn_2561.wav", "onoffCaption": "thump thud at 2.0-5.667, 6.541-8.88", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_2584.wav", "onoffCaption": "explosion at 1.234-5.828, 6.719-10.0 and burping belching at 2.001-4.036", "frequencyCaption": "explosion two times and burping belching one times"} +{"filepath": "data/multi_event_train/syn_2609.wav", "onoffCaption": "door knocking at 0.956-5.106", "frequencyCaption": "door knocking one times"} +{"filepath": "data/multi_event_train/syn_2678.wav", "onoffCaption": "whistling at 2.467-10.0", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_2712.wav", "onoffCaption": "car horn honking at 2.13-6.53, 7.444-9.957", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_2763.wav", "onoffCaption": "sneeze at 2.235-3.466, 4.736-7.053, 7.596-10.0", "frequencyCaption": "sneeze three times"} +{"filepath": "data/multi_event_train/syn_2786.wav", "onoffCaption": "thump thud at 0.526-3.297, 5.681-7.846 and whistling at 3.429-6.404", "frequencyCaption": "thump thud two times and whistling one times"} +{"filepath": "data/multi_event_train/syn_2830.wav", "onoffCaption": "burping belching at 1.964-4.167", "frequencyCaption": "burping belching one times"} +{"filepath": "data/multi_event_train/syn_2865.wav", "onoffCaption": "sheep goat bleating at 0.352-2.352 and cow mooing at 1.577-4.559, 5.826-8.808", "frequencyCaption": "sheep goat bleating one times and cow mooing two times"} +{"filepath": "data/multi_event_train/syn_2880.wav", "onoffCaption": "thump thud at 0.527-3.027 and spraying at 5.201-6.896, 9.043-9.543", "frequencyCaption": "thump thud one times and spraying two times"} +{"filepath": "data/multi_event_train/syn_4105.wav", "onoffCaption": "thump thud at 2.466-6.916", "frequencyCaption": "thump thud one times"} +{"filepath": "data/multi_event_train/syn_4174.wav", "onoffCaption": "train horn at 3.731-5.886 and cat meowing at 6.148-7.719", "frequencyCaption": "train horn one times and cat meowing one times"} +{"filepath": "data/multi_event_train/syn_4191.wav", "onoffCaption": "tapping clicking clanking at 1.128-4.568, 5.787-8.419", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_4249.wav", "onoffCaption": "woman laughing at 3.737-6.809", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/multi_event_train/syn_4307.wav", "onoffCaption": "woman laughing at 2.521-9.255", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/multi_event_train/syn_4352.wav", "onoffCaption": "duck quacking at 2.31-4.31, 5.752-7.752 and door knocking at 6.221-8.684", "frequencyCaption": "duck quacking two times and door knocking one times"} +{"filepath": "data/multi_event_train/syn_4376.wav", "onoffCaption": "door slamming at 3.066-4.39, 4.96-6.284, 7.039-8.363", "frequencyCaption": "door slamming three times"} +{"filepath": "data/multi_event_train/syn_4439.wav", "onoffCaption": "spraying at 1.833-2.7, 3.222-4.089, 5.309-6.176", "frequencyCaption": "spraying three times"} +{"filepath": "data/multi_event_train/syn_4448.wav", "onoffCaption": "train horn at 2.19-8.25", "frequencyCaption": "train horn one times"} +{"filepath": "data/multi_event_train/syn_4522.wav", "onoffCaption": "door knocking at 0.198-2.575, 4.561-6.938 and woman laughing at 1.012-8.024", "frequencyCaption": "door knocking two times and woman laughing one times"} +{"filepath": "data/multi_event_train/syn_4553.wav", "onoffCaption": "cat meowing at 0.582-2.576", "frequencyCaption": "cat meowing one times"} +{"filepath": "data/multi_event_train/syn_4720.wav", "onoffCaption": "sneeze at 0.726-4.374 and spraying at 1.105-2.352, 3.415-4.662, 5.838-7.085", "frequencyCaption": "sneeze one times and spraying three times"} +{"filepath": "data/multi_event_train/syn_4751.wav", "onoffCaption": "cow mooing at 0.27-3.28 and burping belching at 6.449-9.449", "frequencyCaption": "cow mooing one times and burping belching one times"} +{"filepath": "data/multi_event_train/syn_4826.wav", "onoffCaption": "dog barking at 0.773-2.773 and cow mooing at 1.422-6.402, 7.024-10.0 and tapping clicking clanking at 3.237-6.677", "frequencyCaption": "dog barking one times and cow mooing two times and tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_4857.wav", "onoffCaption": "burping belching at 0.561-2.596, 3.115-5.15, 6.224-8.259", "frequencyCaption": "burping belching three times"} +{"filepath": "data/multi_event_train/syn_2036.wav", "onoffCaption": "whistling at 0.608-3.483, 4.251-6.383", "frequencyCaption": "whistling two times"} +{"filepath": "data/multi_event_train/syn_2063.wav", "onoffCaption": "cow mooing at 0.154-3.123 and car horn honking at 5.831-10.0", "frequencyCaption": "cow mooing one times and car horn honking one times"} +{"filepath": "data/multi_event_train/syn_2086.wav", "onoffCaption": "thump thud at 2.044-4.815, 6.182-8.692", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_2178.wav", "onoffCaption": "cow mooing at 2.45-7.43", "frequencyCaption": "cow mooing one times"} +{"filepath": "data/multi_event_train/syn_2187.wav", "onoffCaption": "thump thud at 2.111-4.339, 5.423-8.194", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_2210.wav", "onoffCaption": "dog barking at 0.214-2.214, 3.126-5.126, 6.017-8.017 and burping belching at 1.459-5.003, 6.152-8.659", "frequencyCaption": "dog barking three times and burping belching two times"} +{"filepath": "data/multi_event_train/syn_2245.wav", "onoffCaption": "gunshot at 0.323-2.323", "frequencyCaption": "gunshot one times"} +{"filepath": "data/multi_event_train/syn_2311.wav", "onoffCaption": "duck quacking at 0.64-2.64, 4.525-6.525, 7.803-9.803", "frequencyCaption": "duck quacking three times"} +{"filepath": "data/multi_event_train/syn_2435.wav", "onoffCaption": "gunshot at 2.92-4.92, 6.165-8.165", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_2613.wav", "onoffCaption": "sneeze at 3.626-4.79", "frequencyCaption": "sneeze one times"} +{"filepath": "data/multi_event_train/syn_2646.wav", "onoffCaption": "sneeze at 1.137-2.431, 3.879-6.36", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_2708.wav", "onoffCaption": "woman laughing at 1.941-6.98", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/multi_event_train/syn_2747.wav", "onoffCaption": "train horn at 2.336-6.536, 7.35-9.66", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_2931.wav", "onoffCaption": "woman laughing at 2.301-4.495", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/multi_event_train/syn_4020.wav", "onoffCaption": "dog barking at 2.093-4.093, 6.532-8.532 and gunshot at 5.252-7.252 and sneeze at 5.483-7.19", "frequencyCaption": "dog barking two times and gunshot one times and sneeze one times"} +{"filepath": "data/multi_event_train/syn_4075.wav", "onoffCaption": "cow mooing at 0.295-3.277", "frequencyCaption": "cow mooing one times"} +{"filepath": "data/multi_event_train/syn_4090.wav", "onoffCaption": "sheep goat bleating at 3.411-5.411, 6.681-8.681", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_4121.wav", "onoffCaption": "thump thud at 0.085-4.46, 5.692-8.463 and door slamming at 0.091-2.091", "frequencyCaption": "thump thud two times and door slamming one times"} +{"filepath": "data/multi_event_train/syn_4253.wav", "onoffCaption": "woman laughing at 3.8-7.087", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/multi_event_train/syn_4348.wav", "onoffCaption": "sneeze at 2.276-3.564, 4.675-6.358", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_4423.wav", "onoffCaption": "door slamming at 2.555-3.858 and spraying at 7.213-9.341", "frequencyCaption": "door slamming one times and spraying one times"} +{"filepath": "data/multi_event_train/syn_4476.wav", "onoffCaption": "burping belching at 0.592-4.928, 6.205-8.838", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_4489.wav", "onoffCaption": "duck quacking at 1.562-3.562, 4.618-6.618, 7.146-9.146", "frequencyCaption": "duck quacking three times"} +{"filepath": "data/multi_event_train/syn_4493.wav", "onoffCaption": "burping belching at 1.073-4.332, 5.408-8.408 and cat meowing at 4.695-6.585, 7.258-9.148", "frequencyCaption": "burping belching two times and cat meowing two times"} +{"filepath": "data/multi_event_train/syn_4538.wav", "onoffCaption": "thump thud at 0.955-3.294, 4.777-7.332 and cat meowing at 1.094-2.989, 5.376-7.316", "frequencyCaption": "thump thud two times and cat meowing two times"} +{"filepath": "data/multi_event_train/syn_4577.wav", "onoffCaption": "train horn at 0.185-4.366 and gunshot at 7.36-9.36", "frequencyCaption": "train horn one times and gunshot one times"} +{"filepath": "data/multi_event_train/syn_4588.wav", "onoffCaption": "sneeze at 0.785-3.024, 5.081-7.194, 7.912-9.975", "frequencyCaption": "sneeze three times"} +{"filepath": "data/multi_event_train/syn_4592.wav", "onoffCaption": "duck quacking at 2.429-4.429, 6.607-8.607", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_4605.wav", "onoffCaption": "cat meowing at 0.056-1.056, 2.31-4.677, 5.816-7.352 and door slamming at 1.937-2.437, 3.086-3.586", "frequencyCaption": "cat meowing three times and door slamming two times"} +{"filepath": "data/multi_event_train/syn_4704.wav", "onoffCaption": "spraying at 2.106-2.681, 3.676-6.112 and cat meowing at 2.174-3.44, 4.07-5.336, 6.08-7.346", "frequencyCaption": "spraying two times and cat meowing three times"} +{"filepath": "data/multi_event_train/syn_4869.wav", "onoffCaption": "gunshot at 2.43-4.43 and sneeze at 6.425-10.0", "frequencyCaption": "gunshot one times and sneeze one times"} +{"filepath": "data/multi_event_train/syn_4873.wav", "onoffCaption": "woman laughing at 2.935-6.02, 6.817-9.1", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_4896.wav", "onoffCaption": "cow mooing at 1.844-5.142, 6.268-9.566", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_4927.wav", "onoffCaption": "cow mooing at 2.408-5.418, 7.326-9.414", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_4968.wav", "onoffCaption": "duck quacking at 0.788-2.788, 4.242-6.242 and cow mooing at 2.553-5.522, 7.683-9.783", "frequencyCaption": "duck quacking two times and cow mooing two times"} +{"filepath": "data/multi_event_train/syn_4972.wav", "onoffCaption": "gunshot at 0.697-2.697, 4.279-6.279", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_4997.wav", "onoffCaption": "sneeze at 0.037-1.283 and dog barking at 4.35-6.35, 7.332-9.332", "frequencyCaption": "sneeze one times and dog barking two times"} +{"filepath": "data/multi_event_train/syn_2051.wav", "onoffCaption": "burping belching at 0.475-3.296, 5.659-8.092", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_2222.wav", "onoffCaption": "explosion at 3.458-5.551 and cat meowing at 5.386-6.386, 7.208-8.208", "frequencyCaption": "explosion one times and cat meowing two times"} +{"filepath": "data/multi_event_train/syn_2277.wav", "onoffCaption": "explosion at 1.115-3.208, 4.127-6.189", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_2292.wav", "onoffCaption": "gunshot at 1.525-3.525", "frequencyCaption": "gunshot one times"} +{"filepath": "data/multi_event_train/syn_2339.wav", "onoffCaption": "tapping clicking clanking at 3.039-6.479", "frequencyCaption": "tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_2389.wav", "onoffCaption": "explosion at 1.037-6.037, 6.705-9.214", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_2407.wav", "onoffCaption": "duck quacking at 0.106-2.106, 3.694-5.694", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_2452.wav", "onoffCaption": "gunshot at 2.289-4.289, 5.735-7.735", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_2460.wav", "onoffCaption": "duck quacking at 0.55-2.55 and tapping clicking clanking at 4.483-7.923", "frequencyCaption": "duck quacking one times and tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_2485.wav", "onoffCaption": "tapping clicking clanking at 2.678-6.118", "frequencyCaption": "tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_2549.wav", "onoffCaption": "thump thud at 1.047-4.094, 4.803-7.303", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_2621.wav", "onoffCaption": "car horn honking at 1.926-4.839, 6.023-8.936", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_2674.wav", "onoffCaption": "door knocking at 0.096-3.846, 5.923-9.673", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_2691.wav", "onoffCaption": "door knocking at 0.614-4.91, 5.98-8.229 and cow mooing at 4.553-7.535", "frequencyCaption": "door knocking two times and cow mooing one times"} +{"filepath": "data/multi_event_train/syn_2818.wav", "onoffCaption": "spraying at 2.945-3.945, 5.674-6.674, 7.659-8.659", "frequencyCaption": "spraying three times"} +{"filepath": "data/multi_event_train/syn_2903.wav", "onoffCaption": "cow mooing at 2.138-5.436, 6.232-9.201", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_2964.wav", "onoffCaption": "duck quacking at 3.185-5.185, 6.724-8.724", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_2981.wav", "onoffCaption": "woman laughing at 0.149-2.349, 3.471-6.171", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_4012.wav", "onoffCaption": "sheep goat bleating at 2.964-6.044", "frequencyCaption": "sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_4047.wav", "onoffCaption": "whistling at 1.46-9.065", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_4109.wav", "onoffCaption": "gunshot at 2.838-4.838, 6.567-8.567", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_4206.wav", "onoffCaption": "car horn honking at 1.392-6.299, 7.213-10.0", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_4261.wav", "onoffCaption": "gunshot at 0.303-2.809, 5.036-7.036", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_4284.wav", "onoffCaption": "spraying at 0.458-1.062 and cat meowing at 3.769-5.044, 5.973-7.248, 7.752-9.027", "frequencyCaption": "spraying one times and cat meowing three times"} +{"filepath": "data/multi_event_train/syn_4411.wav", "onoffCaption": "train horn at 0.37-2.507", "frequencyCaption": "train horn one times"} +{"filepath": "data/multi_event_train/syn_4444.wav", "onoffCaption": "duck quacking at 0.591-2.591", "frequencyCaption": "duck quacking one times"} +{"filepath": "data/multi_event_train/syn_4637.wav", "onoffCaption": "cow mooing at 3.246-6.228", "frequencyCaption": "cow mooing one times"} +{"filepath": "data/multi_event_train/syn_4650.wav", "onoffCaption": "dog barking at 3.055-5.055", "frequencyCaption": "dog barking one times"} +{"filepath": "data/multi_event_train/syn_4687.wav", "onoffCaption": "car horn honking at 2.758-4.758, 7.034-9.034", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_4915.wav", "onoffCaption": "gunshot at 0.044-2.044, 2.611-4.611, 5.506-7.506", "frequencyCaption": "gunshot three times"} +{"filepath": "data/multi_event_train/syn_4940.wav", "onoffCaption": "explosion at 0.203-3.203, 5.192-8.192", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_2004.wav", "onoffCaption": "burping belching at 2.243-4.337, 5.194-8.075", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_2105.wav", "onoffCaption": "gunshot at 0.096-2.336, 3.644-5.774, 6.28-8.28 and door slamming at 0.203-0.703", "frequencyCaption": "gunshot three times and door slamming one times"} +{"filepath": "data/multi_event_train/syn_2121.wav", "onoffCaption": "door knocking at 3.089-5.713, 7.505-10.0", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_2150.wav", "onoffCaption": "tapping clicking clanking at 3.558-6.998 and duck quacking at 6.009-8.009", "frequencyCaption": "tapping clicking clanking one times and duck quacking one times"} +{"filepath": "data/multi_event_train/syn_2174.wav", "onoffCaption": "dog barking at 0.653-2.653, 4.141-6.141", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_train/syn_2238.wav", "onoffCaption": "sheep goat bleating at 2.407-6.047, 6.802-10.0", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_2249.wav", "onoffCaption": "spraying at 0.058-0.925", "frequencyCaption": "spraying one times"} +{"filepath": "data/multi_event_train/syn_2288.wav", "onoffCaption": "thump thud at 3.546-5.885 and spraying at 7.512-8.687", "frequencyCaption": "thump thud one times and spraying one times"} +{"filepath": "data/multi_event_train/syn_2323.wav", "onoffCaption": "door slamming at 0.137-2.137, 2.976-4.976, 6.469-8.469", "frequencyCaption": "door slamming three times"} +{"filepath": "data/multi_event_train/syn_2352.wav", "onoffCaption": "whistling at 1.105-10.0", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_2376.wav", "onoffCaption": "woman laughing at 2.081-5.368, 6.66-9.298 and gunshot at 3.24-5.24, 5.749-7.749", "frequencyCaption": "woman laughing two times and gunshot two times"} +{"filepath": "data/multi_event_train/syn_2393.wav", "onoffCaption": "sneeze at 0.13-4.63, 5.693-10.0", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_2439.wav", "onoffCaption": "thump thud at 1.519-5.186 and woman laughing at 3.888-5.97, 7.447-9.529", "frequencyCaption": "thump thud one times and woman laughing two times"} +{"filepath": "data/multi_event_train/syn_2448.wav", "onoffCaption": "whistling at 2.912-8.412 and tapping clicking clanking at 5.008-8.448 and gunshot at 7.107-9.107", "frequencyCaption": "whistling one times and tapping clicking clanking one times and gunshot one times"} +{"filepath": "data/multi_event_train/syn_2489.wav", "onoffCaption": "tapping clicking clanking at 1.137-4.577, 6.143-8.158", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_2506.wav", "onoffCaption": "tapping clicking clanking at 0.235-3.675, 5.904-9.344", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_2522.wav", "onoffCaption": "thump thud at 0.022-2.793 and car horn honking at 1.65-6.557", "frequencyCaption": "thump thud one times and car horn honking one times"} +{"filepath": "data/multi_event_train/syn_2553.wav", "onoffCaption": "spraying at 0.053-0.628, 1.756-2.82, 3.538-4.795 and train horn at 0.586-5.026, 7.084-9.849", "frequencyCaption": "spraying three times and train horn two times"} +{"filepath": "data/multi_event_train/syn_2577.wav", "onoffCaption": "whistling at 0.231-5.731, 6.91-8.919", "frequencyCaption": "whistling two times"} +{"filepath": "data/multi_event_train/syn_2592.wav", "onoffCaption": "explosion at 2.034-4.562, 5.841-8.369", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_2704.wav", "onoffCaption": "spraying at 1.223-1.792, 2.643-3.393 and dog barking at 4.984-6.984", "frequencyCaption": "spraying two times and dog barking one times"} +{"filepath": "data/multi_event_train/syn_2720.wav", "onoffCaption": "whistling at 2.643-4.872, 5.859-8.467", "frequencyCaption": "whistling two times"} +{"filepath": "data/multi_event_train/syn_2775.wav", "onoffCaption": "sheep goat bleating at 2.154-4.154, 6.08-8.08", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_2790.wav", "onoffCaption": "sneeze at 2.831-4.894, 6.753-8.816", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_2802.wav", "onoffCaption": "whistling at 3.011-10.0", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_2857.wav", "onoffCaption": "sneeze at 3.411-6.025, 6.998-9.45", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_2873.wav", "onoffCaption": "cow mooing at 0.596-3.578, 6.07-9.052", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_2896.wav", "onoffCaption": "explosion at 3.203-8.203", "frequencyCaption": "explosion one times"} +{"filepath": "data/multi_event_train/syn_2919.wav", "onoffCaption": "thump thud at 0.206-2.977, 3.705-6.476", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_2956.wav", "onoffCaption": "woman laughing at 1.045-3.637, 4.725-7.363", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_2968.wav", "onoffCaption": "sneeze at 0.034-1.36 and tapping clicking clanking at 5.197-8.637", "frequencyCaption": "sneeze one times and tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_4008.wav", "onoffCaption": "woman laughing at 1.326-3.52, 5.953-8.147", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_4079.wav", "onoffCaption": "spraying at 0.063-1.549 and car horn honking at 3.496-6.422", "frequencyCaption": "spraying one times and car horn honking one times"} +{"filepath": "data/multi_event_train/syn_4113.wav", "onoffCaption": "car horn honking at 0.329-3.983, 4.793-7.611 and cow mooing at 5.071-8.04", "frequencyCaption": "car horn honking two times and cow mooing one times"} +{"filepath": "data/multi_event_train/syn_4146.wav", "onoffCaption": "sheep goat bleating at 3.618-5.618, 6.355-8.355", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_4162.wav", "onoffCaption": "tapping clicking clanking at 1.893-5.333, 7.249-10.0", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_4187.wav", "onoffCaption": "cow mooing at 0.198-3.496, 5.511-7.81", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_4234.wav", "onoffCaption": "door knocking at 1.232-3.695 and duck quacking at 7.38-9.38", "frequencyCaption": "door knocking one times and duck quacking one times"} +{"filepath": "data/multi_event_train/syn_4311.wav", "onoffCaption": "gunshot at 0.178-2.178, 3.114-5.114, 5.762-8.002 and explosion at 2.45-7.45", "frequencyCaption": "gunshot three times and explosion one times"} +{"filepath": "data/multi_event_train/syn_4335.wav", "onoffCaption": "sheep goat bleating at 0.499-2.499, 4.063-6.063, 7.651-9.651", "frequencyCaption": "sheep goat bleating three times"} +{"filepath": "data/multi_event_train/syn_4360.wav", "onoffCaption": "train horn at 2.318-6.648", "frequencyCaption": "train horn one times"} +{"filepath": "data/multi_event_train/syn_4385.wav", "onoffCaption": "burping belching at 0.088-5.088, 5.724-7.818", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_4510.wav", "onoffCaption": "sneeze at 0.848-2.807 and car horn honking at 7.218-10.0", "frequencyCaption": "sneeze one times and car horn honking one times"} +{"filepath": "data/multi_event_train/syn_4534.wav", "onoffCaption": "duck quacking at 0.48-2.48, 3.415-5.415, 7.851-9.851", "frequencyCaption": "duck quacking three times"} +{"filepath": "data/multi_event_train/syn_4545.wav", "onoffCaption": "train horn at 1.683-4.883, 6.583-9.433", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_4662.wav", "onoffCaption": "thump thud at 2.995-5.334, 5.838-8.209 and door knocking at 4.48-8.033", "frequencyCaption": "thump thud two times and door knocking one times"} +{"filepath": "data/multi_event_train/syn_4678.wav", "onoffCaption": "cow mooing at 1.341-4.351, 5.937-8.017", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_4736.wav", "onoffCaption": "door knocking at 2.538-7.24 and sheep goat bleating at 5.123-7.123", "frequencyCaption": "door knocking one times and sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_4747.wav", "onoffCaption": "train horn at 2.593-4.748, 7.117-9.772", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_4763.wav", "onoffCaption": "door knocking at 0.231-3.981", "frequencyCaption": "door knocking one times"} +{"filepath": "data/multi_event_train/syn_4779.wav", "onoffCaption": "cow mooing at 3.146-6.156 and door knocking at 3.765-6.265, 7.273-9.33", "frequencyCaption": "cow mooing one times and door knocking two times"} +{"filepath": "data/multi_event_train/syn_4786.wav", "onoffCaption": "train horn at 1.798-6.875 and dog barking at 5.284-7.284", "frequencyCaption": "train horn one times and dog barking one times"} +{"filepath": "data/multi_event_train/syn_4814.wav", "onoffCaption": "woman laughing at 2.443-5.238, 7.538-10.0", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_4830.wav", "onoffCaption": "spraying at 1.24-2.107, 4.396-5.263, 6.077-6.944", "frequencyCaption": "spraying three times"} +{"filepath": "data/multi_event_train/syn_4841.wav", "onoffCaption": "explosion at 0.673-5.673", "frequencyCaption": "explosion one times"} +{"filepath": "data/multi_event_train/syn_7.wav", "onoffCaption": "sheep goat bleating at 1.687-3.687, 4.963-6.963", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_2075.wav", "onoffCaption": "sneeze at 2.897-7.397", "frequencyCaption": "sneeze one times"} +{"filepath": "data/multi_event_train/syn_2090.wav", "onoffCaption": "cat meowing at 3.18-4.393, 6.703-7.916", "frequencyCaption": "cat meowing two times"} +{"filepath": "data/multi_event_train/syn_2191.wav", "onoffCaption": "door knocking at 1.462-3.839 and thump thud at 5.248-9.623", "frequencyCaption": "door knocking one times and thump thud one times"} +{"filepath": "data/multi_event_train/syn_2206.wav", "onoffCaption": "sneeze at 2.72-4.008, 4.819-6.502, 7.747-9.073", "frequencyCaption": "sneeze three times"} +{"filepath": "data/multi_event_train/syn_2253.wav", "onoffCaption": "sneeze at 0.069-1.161, 1.844-4.595, 5.283-7.491", "frequencyCaption": "sneeze three times"} +{"filepath": "data/multi_event_train/syn_2307.wav", "onoffCaption": "train horn at 0.621-3.061, 5.358-8.033", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_2348.wav", "onoffCaption": "door slamming at 2.104-3.869, 5.812-8.786 and dog barking at 2.325-4.325", "frequencyCaption": "door slamming two times and dog barking one times"} +{"filepath": "data/multi_event_train/syn_2423.wav", "onoffCaption": "gunshot at 2.171-4.672, 7.152-9.152", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_2538.wav", "onoffCaption": "door knocking at 2.954-6.329", "frequencyCaption": "door knocking one times"} +{"filepath": "data/multi_event_train/syn_2605.wav", "onoffCaption": "sheep goat bleating at 2.344-4.344, 6.559-8.559", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_2650.wav", "onoffCaption": "burping belching at 1.323-4.829, 5.63-7.737", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_2751.wav", "onoffCaption": "cow mooing at 0.385-3.354, 5.307-7.518", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_2826.wav", "onoffCaption": "thump thud at 2.171-6.546, 7.936-10.0", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_2927.wav", "onoffCaption": "sheep goat bleating at 2.616-4.616, 5.942-7.942", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_4036.wav", "onoffCaption": "door slamming at 1.098-3.294 and explosion at 2.32-5.32, 6.136-9.136 and sneeze at 8.145-9.309", "frequencyCaption": "door slamming one times and explosion two times and sneeze one times"} +{"filepath": "data/multi_event_train/syn_4086.wav", "onoffCaption": "thump thud at 0.396-2.735, 3.807-6.146, 7.197-9.536", "frequencyCaption": "thump thud three times"} +{"filepath": "data/multi_event_train/syn_4137.wav", "onoffCaption": "explosion at 0.362-3.362, 5.024-8.024 and tapping clicking clanking at 3.969-7.409", "frequencyCaption": "explosion two times and tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_4245.wav", "onoffCaption": "car horn honking at 0.888-4.729, 6.844-9.24", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_4344.wav", "onoffCaption": "gunshot at 2.648-4.648, 5.271-7.312", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_4460.wav", "onoffCaption": "gunshot at 2.04-4.04, 4.742-6.742", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_4485.wav", "onoffCaption": "cat meowing at 0.363-2.303, 3.082-5.022", "frequencyCaption": "cat meowing two times"} +{"filepath": "data/multi_event_train/syn_4561.wav", "onoffCaption": "cat meowing at 0.256-5.256", "frequencyCaption": "cat meowing one times"} +{"filepath": "data/multi_event_train/syn_4584.wav", "onoffCaption": "thump thud at 0.001-4.451, 5.57-10.0 and sneeze at 0.165-1.665", "frequencyCaption": "thump thud two times and sneeze one times"} +{"filepath": "data/multi_event_train/syn_4609.wav", "onoffCaption": "duck quacking at 0.204-2.204 and sneeze at 4.938-7.341", "frequencyCaption": "duck quacking one times and sneeze one times"} +{"filepath": "data/multi_event_train/syn_4613.wav", "onoffCaption": "cat meowing at 0.077-1.104, 1.625-2.652 and woman laughing at 5.845-8.94", "frequencyCaption": "cat meowing two times and woman laughing one times"} +{"filepath": "data/multi_event_train/syn_4708.wav", "onoffCaption": "burping belching at 2.626-4.751, 5.854-7.979", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_4712.wav", "onoffCaption": "sneeze at 0.454-2.066, 4.387-5.915 and sheep goat bleating at 4.63-6.63", "frequencyCaption": "sneeze two times and sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_4865.wav", "onoffCaption": "thump thud at 2.49-4.718, 6.791-9.291", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_4880.wav", "onoffCaption": "sheep goat bleating at 0.069-2.069 and spraying at 1.372-2.122", "frequencyCaption": "sheep goat bleating one times and spraying one times"} +{"filepath": "data/multi_event_train/syn_4931.wav", "onoffCaption": "door slamming at 2.992-4.516, 5.7-7.224", "frequencyCaption": "door slamming two times"} +{"filepath": "data/multi_event_train/syn_4964.wav", "onoffCaption": "door knocking at 0.461-4.9, 5.822-10.0", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_4981.wav", "onoffCaption": "train horn at 0.592-8.792", "frequencyCaption": "train horn one times"} +{"filepath": "data/multi_event_train/syn_2020.wav", "onoffCaption": "burping belching at 0.805-4.805, 7.041-9.152", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_2072.wav", "onoffCaption": "gunshot at 0.343-2.343, 4.093-6.093 and whistling at 0.77-6.27", "frequencyCaption": "gunshot two times and whistling one times"} +{"filepath": "data/multi_event_train/syn_2097.wav", "onoffCaption": "cat meowing at 0.499-2.07 and woman laughing at 4.3-6.382", "frequencyCaption": "cat meowing one times and woman laughing one times"} +{"filepath": "data/multi_event_train/syn_2169.wav", "onoffCaption": "thump thud at 1.556-4.327 and gunshot at 3.176-5.176", "frequencyCaption": "thump thud one times and gunshot one times"} +{"filepath": "data/multi_event_train/syn_2201.wav", "onoffCaption": "door slamming at 3.56-4.365, 6.282-7.087, 8.662-9.467", "frequencyCaption": "door slamming three times"} +{"filepath": "data/multi_event_train/syn_2424.wav", "onoffCaption": "cow mooing at 1.181-4.191 and explosion at 1.485-3.573 and gunshot at 2.08-4.08", "frequencyCaption": "cow mooing one times and explosion one times and gunshot one times"} +{"filepath": "data/multi_event_train/syn_2471.wav", "onoffCaption": "tapping clicking clanking at 0.354-3.794 and car horn honking at 0.545-5.452", "frequencyCaption": "tapping clicking clanking one times and car horn honking one times"} +{"filepath": "data/multi_event_train/syn_2476.wav", "onoffCaption": "woman laughing at 2.307-4.59", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/multi_event_train/syn_2493.wav", "onoffCaption": "gunshot at 2.205-4.205 and thump thud at 6.122-9.789", "frequencyCaption": "gunshot one times and thump thud one times"} +{"filepath": "data/multi_event_train/syn_2588.wav", "onoffCaption": "sheep goat bleating at 0.077-4.957, 5.948-10.0", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_2657.wav", "onoffCaption": "door slamming at 0.527-2.747, 3.423-4.947, 6.797-8.71", "frequencyCaption": "door slamming three times"} +{"filepath": "data/multi_event_train/syn_2869.wav", "onoffCaption": "burping belching at 0.032-3.472, 4.014-6.414 and explosion at 0.35-3.35, 4.571-7.435 and whistling at 3.319-8.494", "frequencyCaption": "burping belching two times and explosion two times and whistling one times"} +{"filepath": "data/multi_event_train/syn_2920.wav", "onoffCaption": "burping belching at 1.183-5.183, 6.898-10.0", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_2972.wav", "onoffCaption": "thump thud at 0.306-2.806, 3.519-6.279, 6.88-9.06", "frequencyCaption": "thump thud three times"} +{"filepath": "data/multi_event_train/syn_2997.wav", "onoffCaption": "burping belching at 1.791-5.791, 7.164-10.0", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_4031.wav", "onoffCaption": "sheep goat bleating at 0.181-2.181 and door knocking at 0.401-3.457", "frequencyCaption": "sheep goat bleating one times and door knocking one times"} +{"filepath": "data/multi_event_train/syn_4063.wav", "onoffCaption": "gunshot at 0.06-2.101, 3.276-5.317, 6.463-8.504", "frequencyCaption": "gunshot three times"} +{"filepath": "data/multi_event_train/syn_4178.wav", "onoffCaption": "gunshot at 1.149-3.149, 3.925-5.925, 6.522-8.522 and tapping clicking clanking at 5.417-8.857", "frequencyCaption": "gunshot three times and tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_4210.wav", "onoffCaption": "cow mooing at 2.521-5.49", "frequencyCaption": "cow mooing one times"} +{"filepath": "data/multi_event_train/syn_4242.wav", "onoffCaption": "duck quacking at 2.613-4.613", "frequencyCaption": "duck quacking one times"} +{"filepath": "data/multi_event_train/syn_4359.wav", "onoffCaption": "door slamming at 2.052-3.169, 3.806-4.923, 5.493-6.61 and woman laughing at 3.52-6.106", "frequencyCaption": "door slamming three times and woman laughing one times"} +{"filepath": "data/multi_event_train/syn_4435.wav", "onoffCaption": "spraying at 4.083-4.934", "frequencyCaption": "spraying one times"} +{"filepath": "data/multi_event_train/syn_4467.wav", "onoffCaption": "sheep goat bleating at 1.74-3.74 and thump thud at 6.828-9.328", "frequencyCaption": "sheep goat bleating one times and thump thud one times"} +{"filepath": "data/multi_event_train/syn_4482.wav", "onoffCaption": "explosion at 2.5-7.5 and duck quacking at 4.221-6.221", "frequencyCaption": "explosion one times and duck quacking one times"} +{"filepath": "data/multi_event_train/syn_4599.wav", "onoffCaption": "sneeze at 0.066-1.16 and door knocking at 5.548-9.101", "frequencyCaption": "sneeze one times and door knocking one times"} +{"filepath": "data/multi_event_train/syn_4614.wav", "onoffCaption": "duck quacking at 2.031-4.031 and train horn at 2.112-4.267, 5.729-8.564", "frequencyCaption": "duck quacking one times and train horn two times"} +{"filepath": "data/multi_event_train/syn_4646.wav", "onoffCaption": "sneeze at 0.795-4.443 and sheep goat bleating at 6.423-8.423", "frequencyCaption": "sneeze one times and sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_4878.wav", "onoffCaption": "dog barking at 0.779-2.779", "frequencyCaption": "dog barking one times"} +{"filepath": "data/multi_event_train/syn_4963.wav", "onoffCaption": "door knocking at 1.64-4.14, 5.135-7.775", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_4986.wav", "onoffCaption": "explosion at 0.519-2.612, 3.807-5.9, 7.053-9.146", "frequencyCaption": "explosion three times"} +{"filepath": "data/multi_event_train/syn_2027.wav", "onoffCaption": "cat meowing at 2.271-5.512, 7.359-8.499", "frequencyCaption": "cat meowing two times"} +{"filepath": "data/multi_event_train/syn_2126.wav", "onoffCaption": "spraying at 1.5-3.628, 4.53-6.658, 7.824-9.952", "frequencyCaption": "spraying three times"} +{"filepath": "data/multi_event_train/syn_2254.wav", "onoffCaption": "spraying at 3.935-4.51, 5.563-6.344, 7.392-9.784", "frequencyCaption": "spraying three times"} +{"filepath": "data/multi_event_train/syn_2300.wav", "onoffCaption": "sneeze at 3.324-5.8, 6.929-9.405", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_2355.wav", "onoffCaption": "duck quacking at 0.3-2.3, 3.841-5.841, 6.667-8.667", "frequencyCaption": "duck quacking three times"} +{"filepath": "data/multi_event_train/syn_2494.wav", "onoffCaption": "car horn honking at 1.413-3.926, 4.97-7.349 and tapping clicking clanking at 2.467-5.907, 7.887-10.0", "frequencyCaption": "car horn honking two times and tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_2570.wav", "onoffCaption": "door slamming at 0.573-1.876, 2.84-4.143", "frequencyCaption": "door slamming two times"} +{"filepath": "data/multi_event_train/syn_2595.wav", "onoffCaption": "woman laughing at 0.777-3.145, 5.247-7.615 and sheep goat bleating at 1.517-3.517", "frequencyCaption": "woman laughing two times and sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_2602.wav", "onoffCaption": "burping belching at 3.235-6.235, 7.539-10.0", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_2618.wav", "onoffCaption": "thump thud at 1.002-3.773", "frequencyCaption": "thump thud one times"} +{"filepath": "data/multi_event_train/syn_2703.wav", "onoffCaption": "cow mooing at 0.447-5.427, 7.486-9.883", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_2719.wav", "onoffCaption": "burping belching at 0.966-4.246, 5.414-7.779 and cat meowing at 3.195-4.812", "frequencyCaption": "burping belching two times and cat meowing one times"} +{"filepath": "data/multi_event_train/syn_2756.wav", "onoffCaption": "thump thud at 1.042-3.27, 4.464-6.692", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_2874.wav", "onoffCaption": "cow mooing at 2.648-5.63, 6.95-9.932", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_2891.wav", "onoffCaption": "explosion at 0.143-3.143", "frequencyCaption": "explosion one times"} +{"filepath": "data/multi_event_train/syn_2975.wav", "onoffCaption": "burping belching at 2.339-5.619, 6.718-9.701", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_2990.wav", "onoffCaption": "burping belching at 0.11-2.141, 2.953-4.984 and woman laughing at 7.255-9.538", "frequencyCaption": "burping belching two times and woman laughing one times"} +{"filepath": "data/multi_event_train/syn_4064.wav", "onoffCaption": "whistling at 1.653-6.137, 7.449-10.0", "frequencyCaption": "whistling two times"} +{"filepath": "data/multi_event_train/syn_4081.wav", "onoffCaption": "door knocking at 0.283-4.722 and woman laughing at 1.01-3.71, 4.329-6.947", "frequencyCaption": "door knocking one times and woman laughing two times"} +{"filepath": "data/multi_event_train/syn_4165.wav", "onoffCaption": "tapping clicking clanking at 1.022-4.462, 5.978-9.418 and gunshot at 1.274-3.274, 5.12-7.12", "frequencyCaption": "tapping clicking clanking two times and gunshot two times"} +{"filepath": "data/multi_event_train/syn_4180.wav", "onoffCaption": "cow mooing at 1.464-6.444", "frequencyCaption": "cow mooing one times"} +{"filepath": "data/multi_event_train/syn_4217.wav", "onoffCaption": "dog barking at 1.46-3.46, 4.833-6.833 and cat meowing at 1.471-3.411, 5.693-7.237", "frequencyCaption": "dog barking two times and cat meowing two times"} +{"filepath": "data/multi_event_train/syn_4316.wav", "onoffCaption": "car horn honking at 0.347-3.165 and cow mooing at 5.694-10.0", "frequencyCaption": "car horn honking one times and cow mooing one times"} +{"filepath": "data/multi_event_train/syn_4428.wav", "onoffCaption": "door knocking at 1.117-3.338, 4.015-6.392, 7.322-9.807", "frequencyCaption": "door knocking three times"} +{"filepath": "data/multi_event_train/syn_4432.wav", "onoffCaption": "door knocking at 0.13-2.977 and train horn at 1.697-4.577, 5.557-8.437", "frequencyCaption": "door knocking one times and train horn two times"} +{"filepath": "data/multi_event_train/syn_4529.wav", "onoffCaption": "sheep goat bleating at 1.449-3.449, 4.761-6.761 and door knocking at 4.19-6.69, 7.357-9.857", "frequencyCaption": "sheep goat bleating two times and door knocking two times"} +{"filepath": "data/multi_event_train/syn_4533.wav", "onoffCaption": "door slamming at 0.536-1.555, 2.537-3.537, 5.811-6.616", "frequencyCaption": "door slamming three times"} +{"filepath": "data/multi_event_train/syn_4641.wav", "onoffCaption": "explosion at 0.631-5.631 and sheep goat bleating at 2.075-5.995", "frequencyCaption": "explosion one times and sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_4740.wav", "onoffCaption": "tapping clicking clanking at 0.306-3.746, 5.611-8.049", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_4837.wav", "onoffCaption": "thump thud at 1.079-5.454", "frequencyCaption": "thump thud one times"} +{"filepath": "data/multi_event_train/syn_4936.wav", "onoffCaption": "spraying at 0.214-2.798, 3.672-6.256, 7.074-9.658", "frequencyCaption": "spraying three times"} +{"filepath": "data/multi_event_train/syn_2019.wav", "onoffCaption": "dog barking at 0.089-2.089, 2.72-4.72, 5.569-7.569 and train horn at 4.343-7.877 and door slamming at 5.704-7.924", "frequencyCaption": "dog barking three times and train horn one times and door slamming one times"} +{"filepath": "data/multi_event_train/syn_2068.wav", "onoffCaption": "duck quacking at 0.047-2.047, 2.698-4.698, 5.474-7.474 and cat meowing at 2.237-3.377 and dog barking at 6.219-8.219", "frequencyCaption": "duck quacking three times and cat meowing one times and dog barking one times"} +{"filepath": "data/multi_event_train/syn_2102.wav", "onoffCaption": "spraying at 1.298-3.06, 3.89-5.065, 6.352-8.788", "frequencyCaption": "spraying three times"} +{"filepath": "data/multi_event_train/syn_2173.wav", "onoffCaption": "car horn honking at 0.126-3.78 and cat meowing at 0.557-4.917, 6.481-7.628", "frequencyCaption": "car horn honking one times and cat meowing two times"} +{"filepath": "data/multi_event_train/syn_2196.wav", "onoffCaption": "sneeze at 1.355-4.43, 5.374-8.449", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_2371.wav", "onoffCaption": "cat meowing at 1.912-3.178, 4.339-5.605, 6.882-8.148", "frequencyCaption": "cat meowing three times"} +{"filepath": "data/multi_event_train/syn_2394.wav", "onoffCaption": "tapping clicking clanking at 2.112-5.552, 6.278-9.718", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_2525.wav", "onoffCaption": "train horn at 0.234-3.768, 4.385-7.919 and sneeze at 5.081-8.729", "frequencyCaption": "train horn two times and sneeze one times"} +{"filepath": "data/multi_event_train/syn_2554.wav", "onoffCaption": "thump thud at 2.155-4.655", "frequencyCaption": "thump thud one times"} +{"filepath": "data/multi_event_train/syn_2669.wav", "onoffCaption": "door knocking at 0.303-3.856 and train horn at 5.632-9.7", "frequencyCaption": "door knocking one times and train horn one times"} +{"filepath": "data/multi_event_train/syn_2727.wav", "onoffCaption": "train horn at 0.066-4.066, 4.98-8.98 and cat meowing at 1.727-2.727", "frequencyCaption": "train horn two times and cat meowing one times"} +{"filepath": "data/multi_event_train/syn_2772.wav", "onoffCaption": "burping belching at 1.808-4.808, 5.553-8.183 and car horn honking at 3.667-8.067", "frequencyCaption": "burping belching two times and car horn honking one times"} +{"filepath": "data/multi_event_train/syn_2821.wav", "onoffCaption": "door knocking at 0.226-3.073, 4.879-7.422", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_2850.wav", "onoffCaption": "dog barking at 0.215-2.215, 3.162-5.162, 6.429-8.429", "frequencyCaption": "dog barking three times"} +{"filepath": "data/multi_event_train/syn_4130.wav", "onoffCaption": "dog barking at 0.005-2.005 and explosion at 3.331-6.387, 7.394-10.0", "frequencyCaption": "dog barking one times and explosion two times"} +{"filepath": "data/multi_event_train/syn_4141.wav", "onoffCaption": "burping belching at 1.866-6.202, 7.316-10.0", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_4229.wav", "onoffCaption": "cow mooing at 2.388-5.398, 7.519-9.548", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_4258.wav", "onoffCaption": "explosion at 2.276-5.276, 6.95-9.95", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_4332.wav", "onoffCaption": "cat meowing at 1.898-3.482, 5.203-6.935", "frequencyCaption": "cat meowing two times"} +{"filepath": "data/multi_event_train/syn_4343.wav", "onoffCaption": "sheep goat bleating at 0.153-2.153, 2.766-4.766", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_4498.wav", "onoffCaption": "door slamming at 1.793-2.966, 4.605-6.129, 8.222-9.162", "frequencyCaption": "door slamming three times"} +{"filepath": "data/multi_event_train/syn_4517.wav", "onoffCaption": "cat meowing at 0.438-1.578, 3.851-6.376 and door slamming at 2.388-4.388, 4.942-6.942", "frequencyCaption": "cat meowing two times and door slamming two times"} +{"filepath": "data/multi_event_train/syn_4566.wav", "onoffCaption": "whistling at 2.552-7.727 and car horn honking at 3.898-8.41", "frequencyCaption": "whistling one times and car horn honking one times"} +{"filepath": "data/multi_event_train/syn_4583.wav", "onoffCaption": "spraying at 3.279-5.671, 6.189-7.056, 9.134-9.866", "frequencyCaption": "spraying three times"} +{"filepath": "data/multi_event_train/syn_4715.wav", "onoffCaption": "duck quacking at 1.168-3.168, 3.913-5.913, 7.163-9.163 and dog barking at 1.171-3.171", "frequencyCaption": "duck quacking three times and dog barking one times"} +{"filepath": "data/multi_event_train/syn_4764.wav", "onoffCaption": "woman laughing at 1.085-3.368 and door slamming at 5.864-6.702, 7.848-8.987", "frequencyCaption": "woman laughing one times and door slamming two times"} +{"filepath": "data/multi_event_train/syn_4781.wav", "onoffCaption": "woman laughing at 0.104-2.709, 3.749-5.831, 6.999-9.199", "frequencyCaption": "woman laughing three times"} +{"filepath": "data/multi_event_train/syn_4813.wav", "onoffCaption": "burping belching at 0.089-3.089, 4.311-7.192", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_4862.wav", "onoffCaption": "cow mooing at 1.786-4.768, 5.558-8.527", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_4887.wav", "onoffCaption": "woman laughing at 0.005-3.292", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/multi_event_train/syn_4908.wav", "onoffCaption": "gunshot at 0.567-2.608, 3.838-5.879, 6.969-9.01", "frequencyCaption": "gunshot three times"} +{"filepath": "data/multi_event_train/syn_4979.wav", "onoffCaption": "cow mooing at 0.323-4.752, 5.985-10.0", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_2003.wav", "onoffCaption": "door slamming at 2.017-4.475 and dog barking at 6.941-8.941", "frequencyCaption": "door slamming one times and dog barking one times"} +{"filepath": "data/multi_event_train/syn_2056.wav", "onoffCaption": "whistling at 2.868-4.877 and burping belching at 2.97-6.149, 7.093-10.0", "frequencyCaption": "whistling one times and burping belching two times"} +{"filepath": "data/multi_event_train/syn_2118.wav", "onoffCaption": "door slamming at 1.955-3.104, 4.415-5.564, 6.925-8.074 and car horn honking at 4.279-8.679", "frequencyCaption": "door slamming three times and car horn honking one times"} +{"filepath": "data/multi_event_train/syn_2157.wav", "onoffCaption": "door slamming at 3.384-4.617, 5.78-7.013, 7.976-9.209", "frequencyCaption": "door slamming three times"} +{"filepath": "data/multi_event_train/syn_2225.wav", "onoffCaption": "duck quacking at 2.673-4.673, 6.37-8.37 and spraying at 3.012-5.596", "frequencyCaption": "duck quacking two times and spraying one times"} +{"filepath": "data/multi_event_train/syn_2324.wav", "onoffCaption": "cow mooing at 2.87-6.168, 7.884-10.0", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_2400.wav", "onoffCaption": "cat meowing at 0.016-1.229", "frequencyCaption": "cat meowing one times"} +{"filepath": "data/multi_event_train/syn_2455.wav", "onoffCaption": "cat meowing at 1.766-2.778, 3.833-4.973, 5.712-7.444", "frequencyCaption": "cat meowing three times"} +{"filepath": "data/multi_event_train/syn_2501.wav", "onoffCaption": "gunshot at 0.124-2.124, 2.96-4.96 and door slamming at 0.289-1.438", "frequencyCaption": "gunshot two times and door slamming one times"} +{"filepath": "data/multi_event_train/syn_2673.wav", "onoffCaption": "woman laughing at 0.241-2.466", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/multi_event_train/syn_2696.wav", "onoffCaption": "cow mooing at 2.987-6.285", "frequencyCaption": "cow mooing one times"} +{"filepath": "data/multi_event_train/syn_2768.wav", "onoffCaption": "sneeze at 0.124-4.653, 5.641-6.935", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_2797.wav", "onoffCaption": "sheep goat bleating at 0.044-2.044, 3.147-5.147, 6.598-8.598", "frequencyCaption": "sheep goat bleating three times"} +{"filepath": "data/multi_event_train/syn_2805.wav", "onoffCaption": "sneeze at 0.029-1.123, 3.204-5.521", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_2904.wav", "onoffCaption": "whistling at 0.037-9.702 and dog barking at 0.082-2.082 and burping belching at 6.199-9.199", "frequencyCaption": "whistling one times and dog barking one times and burping belching one times"} +{"filepath": "data/multi_event_train/syn_2951.wav", "onoffCaption": "whistling at 2.043-10.0", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_4015.wav", "onoffCaption": "car horn honking at 2.146-6.468, 7.709-10.0", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_4114.wav", "onoffCaption": "sneeze at 0.7-3.103, 5.122-7.525", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_4233.wav", "onoffCaption": "whistling at 1.754-4.729 and woman laughing at 2.721-5.076", "frequencyCaption": "whistling one times and woman laughing one times"} +{"filepath": "data/multi_event_train/syn_4266.wav", "onoffCaption": "burping belching at 1.694-5.563, 6.453-10.0", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_4283.wav", "onoffCaption": "cow mooing at 0.564-3.862", "frequencyCaption": "cow mooing one times"} +{"filepath": "data/multi_event_train/syn_4299.wav", "onoffCaption": "car horn honking at 0.783-5.69, 7.139-10.0", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_4367.wav", "onoffCaption": "train horn at 0.133-3.373, 5.09-7.557 and dog barking at 2.429-4.429, 6.478-8.878", "frequencyCaption": "train horn two times and dog barking two times"} +{"filepath": "data/multi_event_train/syn_4382.wav", "onoffCaption": "gunshot at 0.052-2.553, 3.667-5.667, 6.354-8.354", "frequencyCaption": "gunshot three times"} +{"filepath": "data/multi_event_train/syn_4398.wav", "onoffCaption": "cat meowing at 0.031-1.18 and gunshot at 0.572-2.742, 5.068-7.238", "frequencyCaption": "cat meowing one times and gunshot two times"} +{"filepath": "data/multi_event_train/syn_4443.wav", "onoffCaption": "whistling at 0.006-7.756", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_4459.wav", "onoffCaption": "cat meowing at 0.808-2.363, 3.023-4.035 and tapping clicking clanking at 6.989-10.0", "frequencyCaption": "cat meowing two times and tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_4542.wav", "onoffCaption": "explosion at 0.903-4.03, 6.338-9.465", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_4558.wav", "onoffCaption": "whistling at 1.603-9.258", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_4630.wav", "onoffCaption": "cow mooing at 3.094-6.076, 7.671-10.0", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_4665.wav", "onoffCaption": "burping belching at 4.194-7.453", "frequencyCaption": "burping belching one times"} +{"filepath": "data/multi_event_train/syn_4680.wav", "onoffCaption": "cow mooing at 0.292-3.302, 4.366-7.335 and duck quacking at 6.865-8.865", "frequencyCaption": "cow mooing two times and duck quacking one times"} +{"filepath": "data/multi_event_train/syn_4731.wav", "onoffCaption": "burping belching at 0.174-3.174, 4.471-7.471 and woman laughing at 1.095-3.733", "frequencyCaption": "burping belching two times and woman laughing one times"} +{"filepath": "data/multi_event_train/syn_4846.wav", "onoffCaption": "sheep goat bleating at 3.216-5.216, 7.445-9.445", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_4947.wav", "onoffCaption": "cow mooing at 0.181-3.15, 4.35-7.332 and gunshot at 2.445-4.946, 5.779-7.779 and whistling at 4.017-6.892, 7.993-10.0", "frequencyCaption": "cow mooing two times and gunshot two times and whistling two times"} +{"filepath": "data/multi_event_train/syn_2042.wav", "onoffCaption": "sneeze at 2.179-4.175 and thump thud at 2.763-5.81, 7.173-9.414", "frequencyCaption": "sneeze one times and thump thud two times"} +{"filepath": "data/multi_event_train/syn_2159.wav", "onoffCaption": "door knocking at 2.29-5.137, 6.138-8.906", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_2231.wav", "onoffCaption": "cow mooing at 3.414-6.396, 7.031-9.087", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_2264.wav", "onoffCaption": "car horn honking at 3.121-5.468", "frequencyCaption": "car horn honking one times"} +{"filepath": "data/multi_event_train/syn_2270.wav", "onoffCaption": "train horn at 1.497-5.678, 6.589-10.0", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_2281.wav", "onoffCaption": "spraying at 0.544-2.306, 3.813-5.575 and train horn at 2.976-6.976", "frequencyCaption": "spraying two times and train horn one times"} +{"filepath": "data/multi_event_train/syn_2295.wav", "onoffCaption": "spraying at 3.423-4.05, 5.308-5.912, 7.048-7.632", "frequencyCaption": "spraying three times"} +{"filepath": "data/multi_event_train/syn_2414.wav", "onoffCaption": "sheep goat bleating at 1.213-3.213, 5.402-7.402", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_2626.wav", "onoffCaption": "cow mooing at 0.604-3.902, 5.015-7.984", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_2632.wav", "onoffCaption": "duck quacking at 2.26-4.26, 6.028-8.028", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_2667.wav", "onoffCaption": "sheep goat bleating at 0.259-2.259, 3.658-5.658 and cat meowing at 2.779-4.511", "frequencyCaption": "sheep goat bleating two times and cat meowing one times"} +{"filepath": "data/multi_event_train/syn_2682.wav", "onoffCaption": "cat meowing at 3.69-5.106, 7.025-8.965", "frequencyCaption": "cat meowing two times"} +{"filepath": "data/multi_event_train/syn_2729.wav", "onoffCaption": "woman laughing at 0.702-3.774, 4.528-7.6 and burping belching at 3.235-5.329", "frequencyCaption": "woman laughing two times and burping belching one times"} +{"filepath": "data/multi_event_train/syn_2799.wav", "onoffCaption": "sheep goat bleating at 0.527-2.527", "frequencyCaption": "sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_2910.wav", "onoffCaption": "sneeze at 1.626-3.309, 3.833-5.516, 6.281-7.964", "frequencyCaption": "sneeze three times"} +{"filepath": "data/multi_event_train/syn_2945.wav", "onoffCaption": "thump thud at 0.075-2.846, 3.421-6.192 and burping belching at 7.915-10.0", "frequencyCaption": "thump thud two times and burping belching one times"} +{"filepath": "data/multi_event_train/syn_4001.wav", "onoffCaption": "train horn at 0.358-6.073, 7.942-10.0", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_4040.wav", "onoffCaption": "door slamming at 1.741-4.522", "frequencyCaption": "door slamming one times"} +{"filepath": "data/multi_event_train/syn_4054.wav", "onoffCaption": "explosion at 0.122-5.122", "frequencyCaption": "explosion one times"} +{"filepath": "data/multi_event_train/syn_4227.wav", "onoffCaption": "tapping clicking clanking at 0.449-3.889, 4.56-6.915, 7.848-9.866", "frequencyCaption": "tapping clicking clanking three times"} +{"filepath": "data/multi_event_train/syn_4272.wav", "onoffCaption": "sheep goat bleating at 0.103-2.103 and thump thud at 5.105-9.48", "frequencyCaption": "sheep goat bleating one times and thump thud one times"} +{"filepath": "data/multi_event_train/syn_4297.wav", "onoffCaption": "door knocking at 1.869-3.891, 4.557-6.579, 7.312-9.334", "frequencyCaption": "door knocking three times"} +{"filepath": "data/multi_event_train/syn_4328.wav", "onoffCaption": "door slamming at 0.071-2.5, 3.7-5.618, 6.407-8.32", "frequencyCaption": "door slamming three times"} +{"filepath": "data/multi_event_train/syn_4369.wav", "onoffCaption": "thump thud at 4.339-6.839", "frequencyCaption": "thump thud one times"} +{"filepath": "data/multi_event_train/syn_4402.wav", "onoffCaption": "train horn at 3.277-6.597", "frequencyCaption": "train horn one times"} +{"filepath": "data/multi_event_train/syn_4416.wav", "onoffCaption": "sneeze at 0.054-1.641, 2.766-4.353, 5.195-6.782", "frequencyCaption": "sneeze three times"} +{"filepath": "data/multi_event_train/syn_4457.wav", "onoffCaption": "tapping clicking clanking at 1.373-4.813, 5.85-8.035", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_4519.wav", "onoffCaption": "burping belching at 2.319-8.999 and door slamming at 3.957-5.957, 7.089-10.0", "frequencyCaption": "burping belching one times and door slamming two times"} +{"filepath": "data/multi_event_train/syn_4624.wav", "onoffCaption": "train horn at 0.444-4.884, 5.959-10.0", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_4671.wav", "onoffCaption": "door knocking at 3.288-6.125", "frequencyCaption": "door knocking one times"} +{"filepath": "data/multi_event_train/syn_4694.wav", "onoffCaption": "thump thud at 2.224-4.724, 6.344-8.844", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_4809.wav", "onoffCaption": "door slamming at 0.796-3.513, 5.446-6.424", "frequencyCaption": "door slamming two times"} +{"filepath": "data/multi_event_train/syn_4848.wav", "onoffCaption": "spraying at 1.562-2.643, 3.178-4.259, 5.809-6.89", "frequencyCaption": "spraying three times"} +{"filepath": "data/multi_event_train/syn_4906.wav", "onoffCaption": "thump thud at 1.126-3.588 and cow mooing at 2.735-6.033", "frequencyCaption": "thump thud one times and cow mooing one times"} +{"filepath": "data/multi_event_train/syn_4912.wav", "onoffCaption": "thump thud at 1.033-5.408 and explosion at 6.642-9.514", "frequencyCaption": "thump thud one times and explosion one times"} +{"filepath": "data/multi_event_train/syn_4953.wav", "onoffCaption": "sneeze at 2.441-3.767 and dog barking at 2.683-4.683, 6.445-8.445", "frequencyCaption": "sneeze one times and dog barking two times"} +{"filepath": "data/multi_event_train/syn_2017.wav", "onoffCaption": "duck quacking at 2.591-4.591, 6.473-8.473", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_2029.wav", "onoffCaption": "thump thud at 0.12-4.57 and dog barking at 0.898-2.898, 4.555-6.555", "frequencyCaption": "thump thud one times and dog barking two times"} +{"filepath": "data/multi_event_train/syn_2058.wav", "onoffCaption": "door slamming at 0.54-1.713, 2.823-3.996, 5.038-6.211", "frequencyCaption": "door slamming three times"} +{"filepath": "data/multi_event_train/syn_2099.wav", "onoffCaption": "spraying at 1.117-1.625", "frequencyCaption": "spraying one times"} +{"filepath": "data/multi_event_train/syn_2116.wav", "onoffCaption": "sneeze at 3.4-4.634, 5.74-6.974", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_2132.wav", "onoffCaption": "tapping clicking clanking at 2.156-5.596", "frequencyCaption": "tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_2143.wav", "onoffCaption": "door slamming at 0.016-1.319, 2.87-4.173, 5.945-7.248 and spraying at 4.635-5.135, 7.255-7.882", "frequencyCaption": "door slamming three times and spraying two times"} +{"filepath": "data/multi_event_train/syn_2167.wav", "onoffCaption": "tapping clicking clanking at 0.555-3.995, 4.982-7.959 and car horn honking at 2.45-4.915 and dog barking at 3.612-5.612", "frequencyCaption": "tapping clicking clanking two times and car horn honking one times and dog barking one times"} +{"filepath": "data/multi_event_train/syn_2182.wav", "onoffCaption": "tapping clicking clanking at 2.81-6.25", "frequencyCaption": "tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_2330.wav", "onoffCaption": "explosion at 0.7-3.572 and thump thud at 5.734-9.652", "frequencyCaption": "explosion one times and thump thud one times"} +{"filepath": "data/multi_event_train/syn_2341.wav", "onoffCaption": "duck quacking at 0.59-2.59 and burping belching at 4.988-7.611", "frequencyCaption": "duck quacking one times and burping belching one times"} +{"filepath": "data/multi_event_train/syn_2365.wav", "onoffCaption": "tapping clicking clanking at 1.395-4.835 and door knocking at 3.354-5.731", "frequencyCaption": "tapping clicking clanking one times and door knocking one times"} +{"filepath": "data/multi_event_train/syn_2380.wav", "onoffCaption": "explosion at 3.921-6.915", "frequencyCaption": "explosion one times"} +{"filepath": "data/multi_event_train/syn_2441.wav", "onoffCaption": "sneeze at 0.431-1.523, 2.574-3.666 and gunshot at 0.854-2.854, 4.67-6.67", "frequencyCaption": "sneeze two times and gunshot two times"} +{"filepath": "data/multi_event_train/syn_2515.wav", "onoffCaption": "dog barking at 3.882-5.882, 6.574-8.574", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_train/syn_2540.wav", "onoffCaption": "sheep goat bleating at 0.606-2.606, 5.052-7.052 and door slamming at 5.485-6.624, 8.528-9.781", "frequencyCaption": "sheep goat bleating two times and door slamming two times"} +{"filepath": "data/multi_event_train/syn_2564.wav", "onoffCaption": "burping belching at 1.28-3.387, 4.428-6.535, 7.767-9.874", "frequencyCaption": "burping belching three times"} +{"filepath": "data/multi_event_train/syn_2581.wav", "onoffCaption": "cat meowing at 0.248-1.388, 3.452-4.727, 6.932-8.926", "frequencyCaption": "cat meowing three times"} +{"filepath": "data/multi_event_train/syn_2628.wav", "onoffCaption": "thump thud at 0.371-4.038", "frequencyCaption": "thump thud one times"} +{"filepath": "data/multi_event_train/syn_2698.wav", "onoffCaption": "explosion at 1.193-3.911, 5.087-7.427", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_2717.wav", "onoffCaption": "door slamming at 0.151-2.151", "frequencyCaption": "door slamming one times"} +{"filepath": "data/multi_event_train/syn_2733.wav", "onoffCaption": "sheep goat bleating at 2.91-4.91", "frequencyCaption": "sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_2766.wav", "onoffCaption": "woman laughing at 1.206-3.288, 3.961-6.868 and car horn honking at 2.938-6.113", "frequencyCaption": "woman laughing two times and car horn honking one times"} +{"filepath": "data/multi_event_train/syn_2783.wav", "onoffCaption": "door knocking at 2.496-4.736, 6.515-8.755", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_2811.wav", "onoffCaption": "whistling at 3.028-8.203", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_2835.wav", "onoffCaption": "whistling at 0.039-9.704 and spraying at 0.105-1.362, 3.426-4.33", "frequencyCaption": "whistling one times and spraying two times"} +{"filepath": "data/multi_event_train/syn_2844.wav", "onoffCaption": "thump thud at 3.178-6.225, 7.297-10.0", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_2860.wav", "onoffCaption": "whistling at 0.213-3.088, 3.926-6.389 and door knocking at 3.404-7.554", "frequencyCaption": "whistling two times and door knocking one times"} +{"filepath": "data/multi_event_train/syn_2885.wav", "onoffCaption": "duck quacking at 0.313-2.313, 4.547-6.547", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_4100.wav", "onoffCaption": "tapping clicking clanking at 3.081-6.521", "frequencyCaption": "tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_4155.wav", "onoffCaption": "door slamming at 0.152-1.052, 1.604-2.584, 3.551-4.668 and woman laughing at 7.554-10.0", "frequencyCaption": "door slamming three times and woman laughing one times"} +{"filepath": "data/multi_event_train/syn_4171.wav", "onoffCaption": "car horn honking at 0.875-3.661", "frequencyCaption": "car horn honking one times"} +{"filepath": "data/multi_event_train/syn_4194.wav", "onoffCaption": "duck quacking at 0.045-2.045, 4.189-6.189 and woman laughing at 0.11-7.122", "frequencyCaption": "duck quacking two times and woman laughing one times"} +{"filepath": "data/multi_event_train/syn_4219.wav", "onoffCaption": "whistling at 3.867-10.0", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_4268.wav", "onoffCaption": "train horn at 0.779-3.246, 4.852-6.993, 7.924-10.0 and car horn honking at 4.157-7.376", "frequencyCaption": "train horn three times and car horn honking one times"} +{"filepath": "data/multi_event_train/syn_4302.wav", "onoffCaption": "whistling at 1.87-9.525", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_4326.wav", "onoffCaption": "sheep goat bleating at 0.284-4.204, 5.202-9.122 and tapping clicking clanking at 2.879-6.319", "frequencyCaption": "sheep goat bleating two times and tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_4357.wav", "onoffCaption": "car horn honking at 0.415-2.762 and spraying at 1.964-2.591, 3.444-3.944, 5.15-6.407 and duck quacking at 2.399-4.399", "frequencyCaption": "car horn honking one times and spraying three times and duck quacking one times"} +{"filepath": "data/multi_event_train/syn_4373.wav", "onoffCaption": "woman laughing at 2.689-9.423", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/multi_event_train/syn_4396.wav", "onoffCaption": "woman laughing at 2.564-5.202, 6.278-8.561", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_4418.wav", "onoffCaption": "sneeze at 0.199-1.363, 2.216-3.38, 3.88-5.044", "frequencyCaption": "sneeze three times"} +{"filepath": "data/multi_event_train/syn_4503.wav", "onoffCaption": "cat meowing at 2.088-3.115 and whistling at 2.403-5.278, 6.471-9.346", "frequencyCaption": "cat meowing one times and whistling two times"} +{"filepath": "data/multi_event_train/syn_4527.wav", "onoffCaption": "whistling at 1.299-10.0", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_4556.wav", "onoffCaption": "sneeze at 0.693-2.193, 3.282-5.395, 6.632-9.449", "frequencyCaption": "sneeze three times"} +{"filepath": "data/multi_event_train/syn_4701.wav", "onoffCaption": "cat meowing at 1.878-3.153", "frequencyCaption": "cat meowing one times"} +{"filepath": "data/multi_event_train/syn_4725.wav", "onoffCaption": "door slamming at 0.723-2.026 and whistling at 1.016-8.671 and thump thud at 4.221-8.671", "frequencyCaption": "door slamming one times and whistling one times and thump thud one times"} +{"filepath": "data/multi_event_train/syn_4754.wav", "onoffCaption": "car horn honking at 0.071-2.536, 4.312-6.777", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_4770.wav", "onoffCaption": "cow mooing at 0.271-5.251, 6.816-9.785", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_4795.wav", "onoffCaption": "sheep goat bleating at 1.536-3.536 and train horn at 7.424-10.0", "frequencyCaption": "sheep goat bleating one times and train horn one times"} +{"filepath": "data/multi_event_train/syn_4807.wav", "onoffCaption": "whistling at 0.378-7.496 and duck quacking at 0.978-2.978", "frequencyCaption": "whistling one times and duck quacking one times"} +{"filepath": "data/multi_event_train/syn_4823.wav", "onoffCaption": "door knocking at 0.464-2.776, 3.809-6.112, 6.749-8.876", "frequencyCaption": "door knocking three times"} +{"filepath": "data/multi_event_train/syn_4852.wav", "onoffCaption": "cat meowing at 0.345-2.235 and door knocking at 5.757-7.978", "frequencyCaption": "cat meowing one times and door knocking one times"} +{"filepath": "data/multi_event_train/syn_4938.wav", "onoffCaption": "woman laughing at 3.72-6.774", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/multi_event_train/syn_4949.wav", "onoffCaption": "cat meowing at 2.909-3.994 and dog barking at 6.602-8.602", "frequencyCaption": "cat meowing one times and dog barking one times"} +{"filepath": "data/multi_event_train/syn_2033.wav", "onoffCaption": "cow mooing at 3.185-7.614", "frequencyCaption": "cow mooing one times"} +{"filepath": "data/multi_event_train/syn_2061.wav", "onoffCaption": "door slamming at 1.789-4.668 and door knocking at 2.337-4.586, 6.731-9.083 and dog barking at 3.416-5.416", "frequencyCaption": "door slamming one times and door knocking two times and dog barking one times"} +{"filepath": "data/multi_event_train/syn_2066.wav", "onoffCaption": "woman laughing at 0.892-3.247 and spraying at 8.202-9.283", "frequencyCaption": "woman laughing one times and spraying one times"} +{"filepath": "data/multi_event_train/syn_2083.wav", "onoffCaption": "spraying at 0.188-1.092", "frequencyCaption": "spraying one times"} +{"filepath": "data/multi_event_train/syn_2084.wav", "onoffCaption": "dog barking at 0.685-2.685 and tapping clicking clanking at 6.937-10.0", "frequencyCaption": "dog barking one times and tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_2128.wav", "onoffCaption": "tapping clicking clanking at 1.229-4.669 and spraying at 6.867-7.471, 8.495-9.099", "frequencyCaption": "tapping clicking clanking one times and spraying two times"} +{"filepath": "data/multi_event_train/syn_2198.wav", "onoffCaption": "dog barking at 0.7-2.7, 4.34-7.261 and gunshot at 2.054-4.073, 4.78-6.78, 7.354-9.354", "frequencyCaption": "dog barking two times and gunshot three times"} +{"filepath": "data/multi_event_train/syn_2215.wav", "onoffCaption": "whistling at 3.911-10.0", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_2240.wav", "onoffCaption": "cat meowing at 0.114-3.144, 4.645-6.585 and sheep goat bleating at 0.204-2.204", "frequencyCaption": "cat meowing two times and sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_2309.wav", "onoffCaption": "cow mooing at 1.54-5.969, 6.587-10.0", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_2314.wav", "onoffCaption": "whistling at 0.043-2.052, 3.186-5.195, 6.106-8.115 and tapping clicking clanking at 1.55-4.99, 5.612-9.052", "frequencyCaption": "whistling three times and tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_2430.wav", "onoffCaption": "spraying at 3.021-3.872, 4.802-5.706 and cow mooing at 7.834-10.0", "frequencyCaption": "spraying two times and cow mooing one times"} +{"filepath": "data/multi_event_train/syn_2437.wav", "onoffCaption": "gunshot at 2.885-4.885 and door slamming at 7.153-8.093", "frequencyCaption": "gunshot one times and door slamming one times"} +{"filepath": "data/multi_event_train/syn_2465.wav", "onoffCaption": "cow mooing at 3.343-6.353", "frequencyCaption": "cow mooing one times"} +{"filepath": "data/multi_event_train/syn_2480.wav", "onoffCaption": "spraying at 1.693-2.544, 3.905-4.756", "frequencyCaption": "spraying two times"} +{"filepath": "data/multi_event_train/syn_2531.wav", "onoffCaption": "thump thud at 0.081-4.531, 6.844-10.0", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_2616.wav", "onoffCaption": "whistling at 0.165-8.792 and door slamming at 0.317-1.168, 2.4-3.251, 4.02-4.871", "frequencyCaption": "whistling one times and door slamming three times"} +{"filepath": "data/multi_event_train/syn_2643.wav", "onoffCaption": "gunshot at 0.789-2.789, 3.662-5.662 and train horn at 2.218-6.399, 7.69-10.0", "frequencyCaption": "gunshot two times and train horn two times"} +{"filepath": "data/multi_event_train/syn_2644.wav", "onoffCaption": "cat meowing at 0.168-1.195, 1.774-3.751, 4.349-6.416", "frequencyCaption": "cat meowing three times"} +{"filepath": "data/multi_event_train/syn_2659.wav", "onoffCaption": "whistling at 0.1-5.275", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_2742.wav", "onoffCaption": "train horn at 2.253-5.653, 7.221-10.0", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_2758.wav", "onoffCaption": "car horn honking at 0.059-3.234, 5.075-8.25", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_2828.wav", "onoffCaption": "whistling at 0.57-8.32", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_2933.wav", "onoffCaption": "explosion at 0.276-3.148, 3.917-6.789", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_2934.wav", "onoffCaption": "burping belching at 0.338-7.315", "frequencyCaption": "burping belching one times"} +{"filepath": "data/multi_event_train/syn_2961.wav", "onoffCaption": "spraying at 4.013-6.473, 8.516-9.266", "frequencyCaption": "spraying two times"} +{"filepath": "data/multi_event_train/syn_2984.wav", "onoffCaption": "sheep goat bleating at 0.618-2.618 and gunshot at 5.151-7.151, 7.731-9.731", "frequencyCaption": "sheep goat bleating one times and gunshot two times"} +{"filepath": "data/multi_event_train/syn_4025.wav", "onoffCaption": "train horn at 3.629-6.096", "frequencyCaption": "train horn one times"} +{"filepath": "data/multi_event_train/syn_4070.wav", "onoffCaption": "door slamming at 0.029-1.32 and cow mooing at 2.727-6.025, 7.22-9.384 and sneeze at 4.803-7.279", "frequencyCaption": "door slamming one times and cow mooing two times and sneeze one times"} +{"filepath": "data/multi_event_train/syn_4095.wav", "onoffCaption": "thump thud at 3.766-6.537", "frequencyCaption": "thump thud one times"} +{"filepath": "data/multi_event_train/syn_4124.wav", "onoffCaption": "duck quacking at 0.025-2.025", "frequencyCaption": "duck quacking one times"} +{"filepath": "data/multi_event_train/syn_4203.wav", "onoffCaption": "spraying at 0.356-2.816, 4.552-5.202", "frequencyCaption": "spraying two times"} +{"filepath": "data/multi_event_train/syn_4251.wav", "onoffCaption": "woman laughing at 0.47-3.062, 5.365-7.586", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_4256.wav", "onoffCaption": "sheep goat bleating at 0.135-2.135, 3.296-5.296, 6.567-8.567", "frequencyCaption": "sheep goat bleating three times"} +{"filepath": "data/multi_event_train/syn_4318.wav", "onoffCaption": "gunshot at 3.162-5.162, 5.706-7.706 and duck quacking at 3.962-5.962, 7.3-9.3", "frequencyCaption": "gunshot two times and duck quacking two times"} +{"filepath": "data/multi_event_train/syn_4426.wav", "onoffCaption": "door knocking at 2.595-4.783, 5.891-8.728", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_4469.wav", "onoffCaption": "gunshot at 0.545-2.545 and woman laughing at 0.563-3.358, 4.196-6.991 and sheep goat bleating at 1.829-3.829", "frequencyCaption": "gunshot one times and woman laughing two times and sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_4473.wav", "onoffCaption": "sneeze at 3.586-8.086", "frequencyCaption": "sneeze one times"} +{"filepath": "data/multi_event_train/syn_4496.wav", "onoffCaption": "explosion at 2.265-5.265, 6.072-8.944", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_4568.wav", "onoffCaption": "cat meowing at 0.568-1.653, 2.506-3.591 and car horn honking at 7.41-10.0", "frequencyCaption": "cat meowing two times and car horn honking one times"} +{"filepath": "data/multi_event_train/syn_4572.wav", "onoffCaption": "duck quacking at 0.41-2.41, 4.509-6.509 and gunshot at 1.916-3.916", "frequencyCaption": "duck quacking two times and gunshot one times"} +{"filepath": "data/multi_event_train/syn_4597.wav", "onoffCaption": "cow mooing at 1.045-4.027, 4.606-7.588", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_4600.wav", "onoffCaption": "explosion at 0.464-4.464, 6.669-10.0", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_4607.wav", "onoffCaption": "cat meowing at 2.122-3.739, 4.759-6.069, 7.198-9.192", "frequencyCaption": "cat meowing three times"} +{"filepath": "data/multi_event_train/syn_4655.wav", "onoffCaption": "train horn at 0.649-3.889, 6.249-9.489", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_4839.wav", "onoffCaption": "door knocking at 3.565-6.94, 7.981-10.0", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_4876.wav", "onoffCaption": "sneeze at 0.389-3.499 and whistling at 0.703-5.878", "frequencyCaption": "sneeze one times and whistling one times"} +{"filepath": "data/multi_event_train/syn_4889.wav", "onoffCaption": "spraying at 2.837-3.412 and car horn honking at 6.908-10.0", "frequencyCaption": "spraying one times and car horn honking one times"} +{"filepath": "data/multi_event_train/syn_4893.wav", "onoffCaption": "door slamming at 3.845-4.864, 5.397-6.416 and duck quacking at 4.624-6.624", "frequencyCaption": "door slamming two times and duck quacking one times"} +{"filepath": "data/multi_event_train/syn_4922.wav", "onoffCaption": "door slamming at 0.069-0.75", "frequencyCaption": "door slamming one times"} +{"filepath": "data/multi_event_train/syn_4977.wav", "onoffCaption": "duck quacking at 1.623-3.623, 4.63-6.63, 7.915-9.915", "frequencyCaption": "duck quacking three times"} +{"filepath": "data/multi_event_train/syn_4988.wav", "onoffCaption": "spraying at 2.075-2.807, 4.719-5.57", "frequencyCaption": "spraying two times"} +{"filepath": "data/multi_event_train/syn_4992.wav", "onoffCaption": "door knocking at 0.681-5.056 and gunshot at 3.1-5.1, 7.191-9.191", "frequencyCaption": "door knocking one times and gunshot two times"} +{"filepath": "data/multi_event_train/syn_2034.wav", "onoffCaption": "duck quacking at 0.504-2.504, 3.307-5.307", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_2212.wav", "onoffCaption": "train horn at 2.927-6.697", "frequencyCaption": "train horn one times"} +{"filepath": "data/multi_event_train/syn_2247.wav", "onoffCaption": "dog barking at 0.081-2.081 and car horn honking at 0.117-4.366 and cat meowing at 6.32-7.904", "frequencyCaption": "dog barking one times and car horn honking one times and cat meowing one times"} +{"filepath": "data/multi_event_train/syn_2346.wav", "onoffCaption": "door knocking at 0.207-3.582", "frequencyCaption": "door knocking one times"} +{"filepath": "data/multi_event_train/syn_2462.wav", "onoffCaption": "tapping clicking clanking at 0.119-3.559, 4.558-6.763", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_2487.wav", "onoffCaption": "burping belching at 0.648-7.328 and door knocking at 2.358-6.126, 7.457-9.846", "frequencyCaption": "burping belching one times and door knocking two times"} +{"filepath": "data/multi_event_train/syn_2563.wav", "onoffCaption": "door slamming at 0.181-1.472 and explosion at 3.64-6.168 and dog barking at 4.954-6.954", "frequencyCaption": "door slamming one times and explosion one times and dog barking one times"} +{"filepath": "data/multi_event_train/syn_2579.wav", "onoffCaption": "cat meowing at 0.093-2.087", "frequencyCaption": "cat meowing one times"} +{"filepath": "data/multi_event_train/syn_2611.wav", "onoffCaption": "tapping clicking clanking at 0.362-3.802, 6.013-9.453", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_2710.wav", "onoffCaption": "duck quacking at 3.339-5.339, 7.228-9.228", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_2898.wav", "onoffCaption": "car horn honking at 2.151-6.473, 6.993-10.0", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_2966.wav", "onoffCaption": "train horn at 0.045-3.365, 4.328-7.648", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_2983.wav", "onoffCaption": "gunshot at 0.554-2.554, 3.272-5.272 and train horn at 3.312-7.312", "frequencyCaption": "gunshot two times and train horn one times"} +{"filepath": "data/multi_event_train/syn_4022.wav", "onoffCaption": "car horn honking at 0.689-3.908", "frequencyCaption": "car horn honking one times"} +{"filepath": "data/multi_event_train/syn_4077.wav", "onoffCaption": "sheep goat bleating at 0.949-2.949, 3.937-5.937, 6.447-8.447", "frequencyCaption": "sheep goat bleating three times"} +{"filepath": "data/multi_event_train/syn_4088.wav", "onoffCaption": "spraying at 2.992-5.452", "frequencyCaption": "spraying one times"} +{"filepath": "data/multi_event_train/syn_4092.wav", "onoffCaption": "explosion at 2.473-5.073", "frequencyCaption": "explosion one times"} +{"filepath": "data/multi_event_train/syn_4139.wav", "onoffCaption": "whistling at 0.518-8.173", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_4176.wav", "onoffCaption": "car horn honking at 0.455-4.042", "frequencyCaption": "car horn honking one times"} +{"filepath": "data/multi_event_train/syn_4189.wav", "onoffCaption": "cat meowing at 0.138-1.498, 2.544-3.904, 5.026-6.386", "frequencyCaption": "cat meowing three times"} +{"filepath": "data/multi_event_train/syn_4193.wav", "onoffCaption": "woman laughing at 0.153-5.192 and cow mooing at 3.326-7.755", "frequencyCaption": "woman laughing one times and cow mooing one times"} +{"filepath": "data/multi_event_train/syn_4204.wav", "onoffCaption": "train horn at 3.537-7.977 and duck quacking at 6.259-8.259", "frequencyCaption": "train horn one times and duck quacking one times"} +{"filepath": "data/multi_event_train/syn_4421.wav", "onoffCaption": "cat meowing at 0.141-1.288 and sheep goat bleating at 0.715-4.035 and door knocking at 6.083-8.583", "frequencyCaption": "cat meowing one times and sheep goat bleating one times and door knocking one times"} +{"filepath": "data/multi_event_train/syn_4474.wav", "onoffCaption": "burping belching at 2.963-5.378, 6.525-8.94", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_4491.wav", "onoffCaption": "tapping clicking clanking at 2.783-6.223, 7.925-10.0", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_4520.wav", "onoffCaption": "dog barking at 1.345-4.665, 6.247-9.567", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_train/syn_4652.wav", "onoffCaption": "thump thud at 0.178-2.678, 4.247-6.747 and gunshot at 0.859-2.859, 3.774-5.774, 6.922-8.922", "frequencyCaption": "thump thud two times and gunshot three times"} +{"filepath": "data/multi_event_train/syn_4749.wav", "onoffCaption": "duck quacking at 1.65-3.65, 5.955-7.955", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_4824.wav", "onoffCaption": "woman laughing at 0.292-2.408 and tapping clicking clanking at 4.885-8.325", "frequencyCaption": "woman laughing one times and tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_4925.wav", "onoffCaption": "spraying at 0.314-1.314 and cow mooing at 5.093-8.075", "frequencyCaption": "spraying one times and cow mooing one times"} +{"filepath": "data/multi_event_train/syn_4970.wav", "onoffCaption": "gunshot at 1.78-3.78, 5.766-7.766", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_4995.wav", "onoffCaption": "duck quacking at 1.467-3.467, 5.668-7.668", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_9.wav", "onoffCaption": "gunshot at 0.635-2.635", "frequencyCaption": "gunshot one times"} +{"filepath": "data/multi_event_train/syn_2135.wav", "onoffCaption": "cat meowing at 4.338-5.423, 7.709-8.794", "frequencyCaption": "cat meowing two times"} +{"filepath": "data/multi_event_train/syn_2160.wav", "onoffCaption": "car horn honking at 0.225-5.132, 6.96-9.007 and burping belching at 2.253-4.776", "frequencyCaption": "car horn honking two times and burping belching one times"} +{"filepath": "data/multi_event_train/syn_2185.wav", "onoffCaption": "thump thud at 2.942-7.392", "frequencyCaption": "thump thud one times"} +{"filepath": "data/multi_event_train/syn_2208.wav", "onoffCaption": "woman laughing at 1.114-8.559", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/multi_event_train/syn_2279.wav", "onoffCaption": "thump thud at 1.323-5.773, 7.153-10.0", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_2313.wav", "onoffCaption": "duck quacking at 1.898-3.898, 4.892-6.892 and sheep goat bleating at 4.573-6.573", "frequencyCaption": "duck quacking two times and sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_2478.wav", "onoffCaption": "dog barking at 0.087-2.087, 3.299-5.299, 6.614-8.614", "frequencyCaption": "dog barking three times"} +{"filepath": "data/multi_event_train/syn_2536.wav", "onoffCaption": "cat meowing at 0.427-3.668, 5.917-9.158 and woman laughing at 0.858-3.627", "frequencyCaption": "cat meowing two times and woman laughing one times"} +{"filepath": "data/multi_event_train/syn_2586.wav", "onoffCaption": "dog barking at 1.358-3.358, 5.489-7.489", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_train/syn_2734.wav", "onoffCaption": "burping belching at 1.247-3.354 and spraying at 2.437-3.064, 4.131-5.215", "frequencyCaption": "burping belching one times and spraying two times"} +{"filepath": "data/multi_event_train/syn_2745.wav", "onoffCaption": "cow mooing at 0.875-3.857, 5.932-8.914", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_2832.wav", "onoffCaption": "explosion at 0.339-2.341, 4.836-7.714 and car horn honking at 1.971-4.757", "frequencyCaption": "explosion two times and car horn honking one times"} +{"filepath": "data/multi_event_train/syn_2867.wav", "onoffCaption": "tapping clicking clanking at 0.598-4.038, 5.634-9.074", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_2882.wav", "onoffCaption": "cow mooing at 2.804-7.784", "frequencyCaption": "cow mooing one times"} +{"filepath": "data/multi_event_train/syn_2929.wav", "onoffCaption": "tapping clicking clanking at 1.97-5.41 and sheep goat bleating at 2.461-4.461, 4.975-6.975", "frequencyCaption": "tapping clicking clanking one times and sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_2999.wav", "onoffCaption": "thump thud at 0.135-2.597, 3.618-6.08", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_4038.wav", "onoffCaption": "cow mooing at 2.757-7.186", "frequencyCaption": "cow mooing one times"} +{"filepath": "data/multi_event_train/syn_4123.wav", "onoffCaption": "thump thud at 0.25-4.7, 5.564-8.064", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_4305.wav", "onoffCaption": "whistling at 2.362-7.862", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_4350.wav", "onoffCaption": "burping belching at 3.881-6.207, 6.742-9.742", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_4575.wav", "onoffCaption": "explosion at 2.14-7.14, 7.84-10.0", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_4590.wav", "onoffCaption": "thump thud at 1.677-4.724", "frequencyCaption": "thump thud one times"} +{"filepath": "data/multi_event_train/syn_4648.wav", "onoffCaption": "gunshot at 3.838-6.078", "frequencyCaption": "gunshot one times"} +{"filepath": "data/multi_event_train/syn_4706.wav", "onoffCaption": "explosion at 0.24-2.993, 4.501-7.501 and duck quacking at 2.746-4.746, 6.768-8.768", "frequencyCaption": "explosion two times and duck quacking two times"} +{"filepath": "data/multi_event_train/syn_4753.wav", "onoffCaption": "gunshot at 2.893-4.893", "frequencyCaption": "gunshot one times"} +{"filepath": "data/multi_event_train/syn_4871.wav", "onoffCaption": "car horn honking at 1.632-3.979, 5.537-8.355", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_4894.wav", "onoffCaption": "gunshot at 1.001-3.001", "frequencyCaption": "gunshot one times"} +{"filepath": "data/multi_event_train/syn_2111.wav", "onoffCaption": "duck quacking at 0.295-2.295, 2.851-4.851 and woman laughing at 1.563-5.615, 7.148-10.0", "frequencyCaption": "duck quacking two times and woman laughing two times"} +{"filepath": "data/multi_event_train/syn_2144.wav", "onoffCaption": "dog barking at 0.587-2.587, 3.375-5.375, 6.652-8.652", "frequencyCaption": "dog barking three times"} +{"filepath": "data/multi_event_train/syn_2337.wav", "onoffCaption": "cow mooing at 0.134-5.114, 6.062-8.242", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_2362.wav", "onoffCaption": "sheep goat bleating at 3.271-5.271, 7.111-9.111", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_2387.wav", "onoffCaption": "sheep goat bleating at 2.984-4.984", "frequencyCaption": "sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_2409.wav", "onoffCaption": "cow mooing at 1.935-5.233, 6.077-9.059", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_2413.wav", "onoffCaption": "gunshot at 0.733-2.733", "frequencyCaption": "gunshot one times"} +{"filepath": "data/multi_event_train/syn_2512.wav", "onoffCaption": "whistling at 3.548-10.0", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_2547.wav", "onoffCaption": "explosion at 0.219-2.221, 3.47-5.534 and sheep goat bleating at 6.765-8.765", "frequencyCaption": "explosion two times and sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_2761.wav", "onoffCaption": "duck quacking at 2.498-4.498, 5.686-7.686 and explosion at 5.745-7.809", "frequencyCaption": "duck quacking two times and explosion one times"} +{"filepath": "data/multi_event_train/syn_2784.wav", "onoffCaption": "thump thud at 1.867-4.206, 5.36-7.699", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_2816.wav", "onoffCaption": "door slamming at 2.818-5.599 and cow mooing at 2.905-7.885", "frequencyCaption": "door slamming one times and cow mooing one times"} +{"filepath": "data/multi_event_train/syn_2843.wav", "onoffCaption": "burping belching at 2.029-7.973", "frequencyCaption": "burping belching one times"} +{"filepath": "data/multi_event_train/syn_2958.wav", "onoffCaption": "spraying at 0.929-3.321", "frequencyCaption": "spraying one times"} +{"filepath": "data/multi_event_train/syn_4049.wav", "onoffCaption": "woman laughing at 0.476-7.921 and car horn honking at 3.279-5.779", "frequencyCaption": "woman laughing one times and car horn honking one times"} +{"filepath": "data/multi_event_train/syn_4107.wav", "onoffCaption": "cow mooing at 0.839-5.268, 6.236-10.0", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_4152.wav", "onoffCaption": "burping belching at 1.002-3.625, 6.031-8.654", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_4321.wav", "onoffCaption": "sneeze at 0.133-1.379, 2.353-3.965 and whistling at 1.027-7.317", "frequencyCaption": "sneeze two times and whistling one times"} +{"filepath": "data/multi_event_train/syn_4374.wav", "onoffCaption": "explosion at 0.919-3.672, 5.793-8.522 and burping belching at 5.774-8.976", "frequencyCaption": "explosion two times and burping belching one times"} +{"filepath": "data/multi_event_train/syn_4391.wav", "onoffCaption": "sheep goat bleating at 2.629-4.629", "frequencyCaption": "sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_4504.wav", "onoffCaption": "cat meowing at 0.188-1.374 and dog barking at 0.587-2.587, 3.86-5.86, 6.47-8.47", "frequencyCaption": "cat meowing one times and dog barking three times"} +{"filepath": "data/multi_event_train/syn_4551.wav", "onoffCaption": "woman laughing at 3.557-6.642", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/multi_event_train/syn_4639.wav", "onoffCaption": "car horn honking at 1.638-4.813 and burping belching at 7.602-9.633", "frequencyCaption": "car horn honking one times and burping belching one times"} +{"filepath": "data/multi_event_train/syn_4689.wav", "onoffCaption": "sheep goat bleating at 1.268-3.268, 5.556-7.556", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_4722.wav", "onoffCaption": "whistling at 1.885-7.385, 7.979-10.0", "frequencyCaption": "whistling two times"} +{"filepath": "data/multi_event_train/syn_4777.wav", "onoffCaption": "train horn at 1.37-5.81, 7.867-10.0 and gunshot at 2.589-4.589", "frequencyCaption": "train horn two times and gunshot one times"} +{"filepath": "data/multi_event_train/syn_4792.wav", "onoffCaption": "woman laughing at 1.201-3.395, 5.62-7.814", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_4800.wav", "onoffCaption": "train horn at 0.444-3.924 and sneeze at 2.515-4.198 and duck quacking at 6.469-8.469", "frequencyCaption": "train horn one times and sneeze one times and duck quacking one times"} +{"filepath": "data/multi_event_train/syn_4855.wav", "onoffCaption": "door slamming at 0.924-3.353 and tapping clicking clanking at 6.856-10.0", "frequencyCaption": "door slamming one times and tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_2010.wav", "onoffCaption": "thump thud at 0.321-2.783 and cat meowing at 4.756-9.756", "frequencyCaption": "thump thud one times and cat meowing one times"} +{"filepath": "data/multi_event_train/syn_2045.wav", "onoffCaption": "door slamming at 0.243-2.469 and cow mooing at 4.409-7.707", "frequencyCaption": "door slamming one times and cow mooing one times"} +{"filepath": "data/multi_event_train/syn_2236.wav", "onoffCaption": "thump thud at 1.714-3.942, 6.215-8.443", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_2263.wav", "onoffCaption": "car horn honking at 2.479-6.801 and door slamming at 2.505-3.524, 5.637-6.656 and door knocking at 3.955-6.455", "frequencyCaption": "car horn honking one times and door slamming two times and door knocking one times"} +{"filepath": "data/multi_event_train/syn_2286.wav", "onoffCaption": "sneeze at 0.115-1.349, 1.861-3.155, 3.884-6.727", "frequencyCaption": "sneeze three times"} +{"filepath": "data/multi_event_train/syn_2378.wav", "onoffCaption": "duck quacking at 1.898-3.898, 4.706-6.706, 7.974-9.974 and car horn honking at 3.594-6.38", "frequencyCaption": "duck quacking three times and car horn honking one times"} +{"filepath": "data/multi_event_train/syn_2446.wav", "onoffCaption": "door knocking at 3.375-5.999, 7.052-9.129", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_2508.wav", "onoffCaption": "woman laughing at 1.307-3.532, 4.108-6.316 and explosion at 3.427-6.427", "frequencyCaption": "woman laughing two times and explosion one times"} +{"filepath": "data/multi_event_train/syn_2635.wav", "onoffCaption": "sheep goat bleating at 0.781-2.781", "frequencyCaption": "sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_2660.wav", "onoffCaption": "woman laughing at 3.713-7.765", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/multi_event_train/syn_2685.wav", "onoffCaption": "sheep goat bleating at 0.99-2.99, 5.032-7.088", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_2859.wav", "onoffCaption": "burping belching at 2.044-5.55, 7.897-10.0", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_2917.wav", "onoffCaption": "tapping clicking clanking at 2.146-5.586, 6.162-9.602", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_2942.wav", "onoffCaption": "duck quacking at 0.672-2.672, 5.106-7.106, 7.837-9.837", "frequencyCaption": "duck quacking three times"} +{"filepath": "data/multi_event_train/syn_4006.wav", "onoffCaption": "tapping clicking clanking at 2.901-6.341", "frequencyCaption": "tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_4053.wav", "onoffCaption": "cat meowing at 0.589-1.599, 2.313-3.34, 4.373-5.52", "frequencyCaption": "cat meowing three times"} +{"filepath": "data/multi_event_train/syn_4148.wav", "onoffCaption": "spraying at 1.484-3.217, 3.795-5.052, 5.923-7.007", "frequencyCaption": "spraying three times"} +{"filepath": "data/multi_event_train/syn_4220.wav", "onoffCaption": "cow mooing at 0.893-4.191", "frequencyCaption": "cow mooing one times"} +{"filepath": "data/multi_event_train/syn_4275.wav", "onoffCaption": "train horn at 0.043-3.443 and sheep goat bleating at 5.656-7.656", "frequencyCaption": "train horn one times and sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_4290.wav", "onoffCaption": "dog barking at 0.053-2.053 and sneeze at 1.421-3.008 and cat meowing at 1.901-3.087", "frequencyCaption": "dog barking one times and sneeze one times and cat meowing one times"} +{"filepath": "data/multi_event_train/syn_4405.wav", "onoffCaption": "explosion at 0.051-3.051, 3.851-6.574", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_4450.wav", "onoffCaption": "sneeze at 2.386-3.62, 4.447-6.154, 7.403-10.0", "frequencyCaption": "sneeze three times"} +{"filepath": "data/multi_event_train/syn_4623.wav", "onoffCaption": "cat meowing at 2.217-7.217", "frequencyCaption": "cat meowing one times"} +{"filepath": "data/multi_event_train/syn_4676.wav", "onoffCaption": "sheep goat bleating at 0.572-2.572", "frequencyCaption": "sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_4693.wav", "onoffCaption": "cow mooing at 2.486-5.455, 7.47-9.667", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_4738.wav", "onoffCaption": "door slamming at 3.65-4.628, 6.613-8.613", "frequencyCaption": "door slamming two times"} +{"filepath": "data/multi_event_train/syn_4788.wav", "onoffCaption": "explosion at 0.578-4.447, 5.213-9.082 and car horn honking at 0.746-3.564", "frequencyCaption": "explosion two times and car horn honking one times"} +{"filepath": "data/multi_event_train/syn_4901.wav", "onoffCaption": "car horn honking at 2.729-5.642, 6.454-8.801", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_4954.wav", "onoffCaption": "cat meowing at 0.74-3.77", "frequencyCaption": "cat meowing one times"} diff --git a/picoaudio/audioldm/__init__.py b/picoaudio/audioldm/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..2bbf85f01ccc72b6f18e7405d940adf07a26b500 --- /dev/null +++ b/picoaudio/audioldm/__init__.py @@ -0,0 +1,8 @@ +from .ldm import LatentDiffusion +from .utils import seed_everything, save_wave, get_time, get_duration +from .pipeline import * + + + + + diff --git a/picoaudio/audioldm/__main__.py b/picoaudio/audioldm/__main__.py new file mode 100644 index 0000000000000000000000000000000000000000..13f8bafa839f512a156dd6380d2cf43c573a970a --- /dev/null +++ b/picoaudio/audioldm/__main__.py @@ -0,0 +1,183 @@ +#!/usr/bin/python3 +import os +from audioldm import text_to_audio, style_transfer, build_model, save_wave, get_time, round_up_duration, get_duration +import argparse + +CACHE_DIR = os.getenv( + "AUDIOLDM_CACHE_DIR", + os.path.join(os.path.expanduser("~"), ".cache/audioldm")) + +parser = argparse.ArgumentParser() + +parser.add_argument( + "--mode", + type=str, + required=False, + default="generation", + help="generation: text-to-audio generation; transfer: style transfer", + choices=["generation", "transfer"] +) + +parser.add_argument( + "-t", + "--text", + type=str, + required=False, + default="", + help="Text prompt to the model for audio generation", +) + +parser.add_argument( + "-f", + "--file_path", + type=str, + required=False, + default=None, + help="(--mode transfer): Original audio file for style transfer; Or (--mode generation): the guidance audio file for generating simialr audio", +) + +parser.add_argument( + "--transfer_strength", + type=float, + required=False, + default=0.5, + help="A value between 0 and 1. 0 means original audio without transfer, 1 means completely transfer to the audio indicated by text", +) + +parser.add_argument( + "-s", + "--save_path", + type=str, + required=False, + help="The path to save model output", + default="./output", +) + +parser.add_argument( + "--model_name", + type=str, + required=False, + help="The checkpoint you gonna use", + default="audioldm-s-full", + choices=["audioldm-s-full", "audioldm-l-full", "audioldm-s-full-v2"] +) + +parser.add_argument( + "-ckpt", + "--ckpt_path", + type=str, + required=False, + help="The path to the pretrained .ckpt model", + default=None, +) + +parser.add_argument( + "-b", + "--batchsize", + type=int, + required=False, + default=1, + help="Generate how many samples at the same time", +) + +parser.add_argument( + "--ddim_steps", + type=int, + required=False, + default=200, + help="The sampling step for DDIM", +) + +parser.add_argument( + "-gs", + "--guidance_scale", + type=float, + required=False, + default=2.5, + help="Guidance scale (Large => better quality and relavancy to text; Small => better diversity)", +) + +parser.add_argument( + "-dur", + "--duration", + type=float, + required=False, + default=10.0, + help="The duration of the samples", +) + +parser.add_argument( + "-n", + "--n_candidate_gen_per_text", + type=int, + required=False, + default=3, + help="Automatic quality control. This number control the number of candidates (e.g., generate three audios and choose the best to show you). A Larger value usually lead to better quality with heavier computation", +) + +parser.add_argument( + "--seed", + type=int, + required=False, + default=42, + help="Change this value (any integer number) will lead to a different generation result.", +) + +args = parser.parse_args() + +if(args.ckpt_path is not None): + print("Warning: ckpt_path has no effect after version 0.0.20.") + +assert args.duration % 2.5 == 0, "Duration must be a multiple of 2.5" + +mode = args.mode +if(mode == "generation" and args.file_path is not None): + mode = "generation_audio_to_audio" + if(len(args.text) > 0): + print("Warning: You have specified the --file_path. --text will be ignored") + args.text = "" + +save_path = os.path.join(args.save_path, mode) + +if(args.file_path is not None): + save_path = os.path.join(save_path, os.path.basename(args.file_path.split(".")[0])) + +text = args.text +random_seed = args.seed +duration = args.duration +guidance_scale = args.guidance_scale +n_candidate_gen_per_text = args.n_candidate_gen_per_text + +os.makedirs(save_path, exist_ok=True) +audioldm = build_model(model_name=args.model_name) + +if(args.mode == "generation"): + waveform = text_to_audio( + audioldm, + text, + args.file_path, + random_seed, + duration=duration, + guidance_scale=guidance_scale, + ddim_steps=args.ddim_steps, + n_candidate_gen_per_text=n_candidate_gen_per_text, + batchsize=args.batchsize, + ) + +elif(args.mode == "transfer"): + assert args.file_path is not None + assert os.path.exists(args.file_path), "The original audio file \'%s\' for style transfer does not exist." % args.file_path + waveform = style_transfer( + audioldm, + text, + args.file_path, + args.transfer_strength, + random_seed, + duration=duration, + guidance_scale=guidance_scale, + ddim_steps=args.ddim_steps, + batchsize=args.batchsize, + ) + waveform = waveform[:,None,:] + +save_wave(waveform, save_path, name="%s_%s" % (get_time(), text)) diff --git a/picoaudio/audioldm/audio/__init__.py b/picoaudio/audioldm/audio/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..56902e96f041bc4ba6bfadd7a7742023b9560233 --- /dev/null +++ b/picoaudio/audioldm/audio/__init__.py @@ -0,0 +1,2 @@ +from .tools import wav_to_fbank, read_wav_file +from .stft import TacotronSTFT diff --git a/picoaudio/audioldm/audio/audio_processing.py b/picoaudio/audioldm/audio/audio_processing.py new file mode 100644 index 0000000000000000000000000000000000000000..77a4057aa82f226f68474f4c2a19eba84510d663 --- /dev/null +++ b/picoaudio/audioldm/audio/audio_processing.py @@ -0,0 +1,100 @@ +import torch +import numpy as np +import librosa.util as librosa_util +from scipy.signal import get_window + + +def window_sumsquare( + window, + n_frames, + hop_length, + win_length, + n_fft, + dtype=np.float32, + norm=None, +): + """ + # from librosa 0.6 + Compute the sum-square envelope of a window function at a given hop length. + + This is used to estimate modulation effects induced by windowing + observations in short-time fourier transforms. + + Parameters + ---------- + window : string, tuple, number, callable, or list-like + Window specification, as in `get_window` + + n_frames : int > 0 + The number of analysis frames + + hop_length : int > 0 + The number of samples to advance between frames + + win_length : [optional] + The length of the window function. By default, this matches `n_fft`. + + n_fft : int > 0 + The length of each analysis frame. + + dtype : np.dtype + The data type of the output + + Returns + ------- + wss : np.ndarray, shape=`(n_fft + hop_length * (n_frames - 1))` + The sum-squared envelope of the window function + """ + if win_length is None: + win_length = n_fft + + n = n_fft + hop_length * (n_frames - 1) + x = np.zeros(n, dtype=dtype) + + # Compute the squared window at the desired length + win_sq = get_window(window, win_length, fftbins=True) + win_sq = librosa_util.normalize(win_sq, norm=norm) ** 2 + win_sq = librosa_util.pad_center(win_sq, n_fft) + + # Fill the envelope + for i in range(n_frames): + sample = i * hop_length + x[sample : min(n, sample + n_fft)] += win_sq[: max(0, min(n_fft, n - sample))] + return x + + +def griffin_lim(magnitudes, stft_fn, n_iters=30): + """ + PARAMS + ------ + magnitudes: spectrogram magnitudes + stft_fn: STFT class with transform (STFT) and inverse (ISTFT) methods + """ + + angles = np.angle(np.exp(2j * np.pi * np.random.rand(*magnitudes.size()))) + angles = angles.astype(np.float32) + angles = torch.autograd.Variable(torch.from_numpy(angles)) + signal = stft_fn.inverse(magnitudes, angles).squeeze(1) + + for i in range(n_iters): + _, angles = stft_fn.transform(signal) + signal = stft_fn.inverse(magnitudes, angles).squeeze(1) + return signal + + +def dynamic_range_compression(x, normalize_fun=torch.log, C=1, clip_val=1e-5): + """ + PARAMS + ------ + C: compression factor + """ + return normalize_fun(torch.clamp(x, min=clip_val) * C) + + +def dynamic_range_decompression(x, C=1): + """ + PARAMS + ------ + C: compression factor used to compress + """ + return torch.exp(x) / C diff --git a/picoaudio/audioldm/audio/stft.py b/picoaudio/audioldm/audio/stft.py new file mode 100644 index 0000000000000000000000000000000000000000..71d69793b3721a8179c1fb77afb427cfd869da40 --- /dev/null +++ b/picoaudio/audioldm/audio/stft.py @@ -0,0 +1,186 @@ +import torch +import torch.nn.functional as F +import numpy as np +from scipy.signal import get_window +from librosa.util import pad_center, tiny +from librosa.filters import mel as librosa_mel_fn + +from audioldm.audio.audio_processing import ( + dynamic_range_compression, + dynamic_range_decompression, + window_sumsquare, +) + + +class STFT(torch.nn.Module): + """adapted from Prem Seetharaman's https://github.com/pseeth/pytorch-stft""" + + def __init__(self, filter_length, hop_length, win_length, window="hann"): + super(STFT, self).__init__() + self.filter_length = filter_length + self.hop_length = hop_length + self.win_length = win_length + self.window = window + self.forward_transform = None + scale = self.filter_length / self.hop_length + fourier_basis = np.fft.fft(np.eye(self.filter_length)) + + cutoff = int((self.filter_length / 2 + 1)) + fourier_basis = np.vstack( + [np.real(fourier_basis[:cutoff, :]), np.imag(fourier_basis[:cutoff, :])] + ) + + forward_basis = torch.FloatTensor(fourier_basis[:, None, :]) + inverse_basis = torch.FloatTensor( + np.linalg.pinv(scale * fourier_basis).T[:, None, :] + ) + + if window is not None: + assert filter_length >= win_length + # get window and zero center pad it to filter_length + fft_window = get_window(window, win_length, fftbins=True) + fft_window = pad_center(fft_window, size=filter_length) + fft_window = torch.from_numpy(fft_window).float() + + # window the bases + forward_basis *= fft_window + inverse_basis *= fft_window + + self.register_buffer("forward_basis", forward_basis.float()) + self.register_buffer("inverse_basis", inverse_basis.float()) + + def transform(self, input_data): + device = self.forward_basis.device + input_data = input_data.to(device) + + num_batches = input_data.size(0) + num_samples = input_data.size(1) + + self.num_samples = num_samples + + # similar to librosa, reflect-pad the input + input_data = input_data.view(num_batches, 1, num_samples) + input_data = F.pad( + input_data.unsqueeze(1), + (int(self.filter_length / 2), int(self.filter_length / 2), 0, 0), + mode="reflect", + ) + input_data = input_data.squeeze(1) + + forward_transform = F.conv1d( + input_data, + torch.autograd.Variable(self.forward_basis, requires_grad=False), + stride=self.hop_length, + padding=0, + )#.cpu() + + cutoff = int((self.filter_length / 2) + 1) + real_part = forward_transform[:, :cutoff, :] + imag_part = forward_transform[:, cutoff:, :] + + magnitude = torch.sqrt(real_part**2 + imag_part**2) + phase = torch.autograd.Variable(torch.atan2(imag_part.data, real_part.data)) + + return magnitude, phase + + def inverse(self, magnitude, phase): + device = self.forward_basis.device + magnitude, phase = magnitude.to(device), phase.to(device) + + recombine_magnitude_phase = torch.cat( + [magnitude * torch.cos(phase), magnitude * torch.sin(phase)], dim=1 + ) + + inverse_transform = F.conv_transpose1d( + recombine_magnitude_phase, + torch.autograd.Variable(self.inverse_basis, requires_grad=False), + stride=self.hop_length, + padding=0, + ) + + if self.window is not None: + window_sum = window_sumsquare( + self.window, + magnitude.size(-1), + hop_length=self.hop_length, + win_length=self.win_length, + n_fft=self.filter_length, + dtype=np.float32, + ) + # remove modulation effects + approx_nonzero_indices = torch.from_numpy( + np.where(window_sum > tiny(window_sum))[0] + ) + window_sum = torch.autograd.Variable( + torch.from_numpy(window_sum), requires_grad=False + ) + window_sum = window_sum + inverse_transform[:, :, approx_nonzero_indices] /= window_sum[ + approx_nonzero_indices + ] + + # scale by hop ratio + inverse_transform *= float(self.filter_length) / self.hop_length + + inverse_transform = inverse_transform[:, :, int(self.filter_length / 2) :] + inverse_transform = inverse_transform[:, :, : -int(self.filter_length / 2) :] + + return inverse_transform + + def forward(self, input_data): + self.magnitude, self.phase = self.transform(input_data) + reconstruction = self.inverse(self.magnitude, self.phase) + return reconstruction + + +class TacotronSTFT(torch.nn.Module): + def __init__( + self, + filter_length, + hop_length, + win_length, + n_mel_channels, + sampling_rate, + mel_fmin, + mel_fmax, + ): + super(TacotronSTFT, self).__init__() + self.n_mel_channels = n_mel_channels + self.sampling_rate = sampling_rate + self.stft_fn = STFT(filter_length, hop_length, win_length) + mel_basis = librosa_mel_fn( + sr=sampling_rate, n_fft=filter_length, n_mels=n_mel_channels, fmin=mel_fmin, fmax=mel_fmax + ) + mel_basis = torch.from_numpy(mel_basis).float() + self.register_buffer("mel_basis", mel_basis) + + def spectral_normalize(self, magnitudes, normalize_fun): + output = dynamic_range_compression(magnitudes, normalize_fun) + return output + + def spectral_de_normalize(self, magnitudes): + output = dynamic_range_decompression(magnitudes) + return output + + def mel_spectrogram(self, y, normalize_fun=torch.log): + """Computes mel-spectrograms from a batch of waves + PARAMS + ------ + y: Variable(torch.FloatTensor) with shape (B, T) in range [-1, 1] + + RETURNS + ------- + mel_output: torch.FloatTensor of shape (B, n_mel_channels, T) + """ + assert torch.min(y.data) >= -1, torch.min(y.data) + assert torch.max(y.data) <= 1, torch.max(y.data) + + magnitudes, phases = self.stft_fn.transform(y) + magnitudes = magnitudes.data + mel_output = torch.matmul(self.mel_basis, magnitudes) + mel_output = self.spectral_normalize(mel_output, normalize_fun) + energy = torch.norm(magnitudes, dim=1) + + log_magnitudes = self.spectral_normalize(magnitudes, normalize_fun) + + return mel_output, log_magnitudes, energy diff --git a/picoaudio/audioldm/audio/tools.py b/picoaudio/audioldm/audio/tools.py new file mode 100644 index 0000000000000000000000000000000000000000..d641a982664b6673822c8528a1929c593f011b11 --- /dev/null +++ b/picoaudio/audioldm/audio/tools.py @@ -0,0 +1,85 @@ +import torch +import numpy as np +import torchaudio + + +def get_mel_from_wav(audio, _stft): + audio = torch.clip(torch.FloatTensor(audio).unsqueeze(0), -1, 1) + audio = torch.autograd.Variable(audio, requires_grad=False) + melspec, log_magnitudes_stft, energy = _stft.mel_spectrogram(audio) + melspec = torch.squeeze(melspec, 0).numpy().astype(np.float32) + log_magnitudes_stft = ( + torch.squeeze(log_magnitudes_stft, 0).numpy().astype(np.float32) + ) + energy = torch.squeeze(energy, 0).numpy().astype(np.float32) + return melspec, log_magnitudes_stft, energy + + +def _pad_spec(fbank, target_length=1024): + n_frames = fbank.shape[0] + p = target_length - n_frames + # cut and pad + if p > 0: + m = torch.nn.ZeroPad2d((0, 0, 0, p)) + fbank = m(fbank) + elif p < 0: + fbank = fbank[0:target_length, :] + + if fbank.size(-1) % 2 != 0: + fbank = fbank[..., :-1] + + return fbank + + +def pad_wav(waveform, segment_length): + waveform_length = waveform.shape[-1] + assert waveform_length > 100, "Waveform is too short, %s" % waveform_length + if segment_length is None or waveform_length == segment_length: + return waveform + elif waveform_length > segment_length: + return waveform[:segment_length] + elif waveform_length < segment_length: + temp_wav = np.zeros((1, segment_length)) + temp_wav[:, :waveform_length] = waveform + return temp_wav + +def normalize_wav(waveform): + waveform = waveform - np.mean(waveform) + waveform = waveform / (np.max(np.abs(waveform)) + 1e-8) + return waveform * 0.5 + + +def read_wav_file(filename, segment_length): + # waveform, sr = librosa.load(filename, sr=None, mono=True) # 4 times slower + waveform, sr = torchaudio.load(filename) # Faster!!! + waveform = torchaudio.functional.resample(waveform, orig_freq=sr, new_freq=16000) + waveform = waveform.numpy()[0, ...] + waveform = normalize_wav(waveform) + waveform = waveform[None, ...] + waveform = pad_wav(waveform, segment_length) + + waveform = waveform / np.max(np.abs(waveform)) + waveform = 0.5 * waveform + + return waveform + + +def wav_to_fbank(filename, target_length=1024, fn_STFT=None): + assert fn_STFT is not None + + # mixup + waveform = read_wav_file(filename, target_length * 160) # hop size is 160 + + waveform = waveform[0, ...] + waveform = torch.FloatTensor(waveform) + + fbank, log_magnitudes_stft, energy = get_mel_from_wav(waveform, fn_STFT) + + fbank = torch.FloatTensor(fbank.T) + log_magnitudes_stft = torch.FloatTensor(log_magnitudes_stft.T) + + fbank, log_magnitudes_stft = _pad_spec(fbank, target_length), _pad_spec( + log_magnitudes_stft, target_length + ) + + return fbank, log_magnitudes_stft, waveform diff --git a/picoaudio/audioldm/clap/__init__.py b/picoaudio/audioldm/clap/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/picoaudio/audioldm/clap/encoders.py b/picoaudio/audioldm/clap/encoders.py new file mode 100644 index 0000000000000000000000000000000000000000..77d5e5c47c9dacf44406e9d00a831bfa051f4214 --- /dev/null +++ b/picoaudio/audioldm/clap/encoders.py @@ -0,0 +1,170 @@ +import torch +import torch.nn as nn +from audioldm.clap.open_clip import create_model +from audioldm.clap.training.data import get_audio_features +import torchaudio +from transformers import RobertaTokenizer +import torch.nn.functional as F + + +class CLAPAudioEmbeddingClassifierFreev2(nn.Module): + def __init__( + self, + pretrained_path="", + key="class", + sampling_rate=16000, + embed_mode="audio", + amodel = "HTSAT-tiny", + unconditional_prob=0.1, + random_mute=False, + max_random_mute_portion=0.5, + training_mode=True, + ): + super().__init__() + + self.key = key + self.device = "cpu" + self.precision = "fp32" + self.amodel = amodel # or 'PANN-14' + self.tmodel = "roberta" # the best text encoder in our training + self.enable_fusion = False # False if you do not want to use the fusion model + self.fusion_type = "aff_2d" + self.pretrained = pretrained_path + self.embed_mode = embed_mode + self.embed_mode_orig = embed_mode + self.sampling_rate = sampling_rate + self.unconditional_prob = unconditional_prob + self.random_mute = random_mute + self.tokenize = RobertaTokenizer.from_pretrained("roberta-base") + self.max_random_mute_portion = max_random_mute_portion + self.training_mode = training_mode + self.model, self.model_cfg = create_model( + self.amodel, + self.tmodel, + self.pretrained, + precision=self.precision, + device=self.device, + enable_fusion=self.enable_fusion, + fusion_type=self.fusion_type, + ) + for p in self.model.parameters(): + p.requires_grad = False + + self.model.eval() + + def get_unconditional_condition(self, batchsize): + self.unconditional_token = self.model.get_text_embedding( + self.tokenizer(["", ""]) + )[0:1] + return torch.cat([self.unconditional_token.unsqueeze(0)] * batchsize, dim=0) + + def batch_to_list(self, batch): + ret = [] + for i in range(batch.size(0)): + ret.append(batch[i]) + return ret + + def make_decision(self, probability): + if float(torch.rand(1)) < probability: + return True + else: + return False + + def random_uniform(self, start, end): + val = torch.rand(1).item() + return start + (end - start) * val + + def _random_mute(self, waveform): + # waveform: [bs, t-steps] + t_steps = waveform.size(-1) + for i in range(waveform.size(0)): + mute_size = int( + self.random_uniform(0, end=int(t_steps * self.max_random_mute_portion)) + ) + mute_start = int(self.random_uniform(0, t_steps - mute_size)) + waveform[i, mute_start : mute_start + mute_size] = 0 + return waveform + + def cos_similarity(self, waveform, text): + # waveform: [bs, t_steps] + with torch.no_grad(): + self.embed_mode = "audio" + audio_emb = self(waveform.cuda()) + self.embed_mode = "text" + text_emb = self(text) + similarity = F.cosine_similarity(audio_emb, text_emb, dim=2), audio_emb, text_emb + return similarity.squeeze() + + def forward(self, batch, key=None): + # If you want this conditioner to be unconditional, set self.unconditional_prob = 1.0 + # If you want this conditioner to be fully conditional, set self.unconditional_prob = 0.0 + if self.model.training == True and not self.training_mode: + print( + "The pretrained CLAP model should always be in eval mode. Reloading model just in case you change the parameters." + ) + self.model, self.model_cfg = create_model( + self.amodel, + self.tmodel, + self.pretrained, + precision=self.precision, + device="cuda", + enable_fusion=self.enable_fusion, + fusion_type=self.fusion_type, + ) + for p in self.model.parameters(): + p.requires_grad = False + self.model.eval() + + # the 'fusion' truncate mode can be changed to 'rand_trunc' if run in unfusion mode + if self.embed_mode == "audio": + with torch.no_grad(): + audio_dict_list = [] + assert ( + self.sampling_rate == 16000 + ), "We only support 16000 sampling rate" + if self.random_mute: + batch = self._random_mute(batch) + # batch: [bs, 1, t-samples] + batch = torchaudio.functional.resample( + batch, orig_freq=self.sampling_rate, new_freq=48000 + ) + for waveform in self.batch_to_list(batch): + audio_dict = {} + audio_dict = get_audio_features( + audio_dict, + waveform, + 480000, + data_truncating="fusion", + data_filling="repeatpad", + audio_cfg=self.model_cfg["audio_cfg"], + ) + audio_dict_list.append(audio_dict) + # [bs, 512] + embed = self.model.get_audio_embedding(audio_dict_list) + elif self.embed_mode == "text": + with torch.no_grad(): + # the 'fusion' truncate mode can be changed to 'rand_trunc' if run in unfusion mode + text_data = self.tokenizer(batch) + embed = self.model.get_text_embedding(text_data) + + embed = embed.unsqueeze(1) + self.unconditional_token = self.model.get_text_embedding( + self.tokenizer(["", ""]) + )[0:1] + + for i in range(embed.size(0)): + if self.make_decision(self.unconditional_prob): + embed[i] = self.unconditional_token + + # [bs, 1, 512] + return embed.detach() + + def tokenizer(self, text): + result = self.tokenize( + text, + padding="max_length", + truncation=True, + max_length=512, + return_tensors="pt", + ) + return {k: v.squeeze(0) for k, v in result.items()} \ No newline at end of file diff --git a/picoaudio/audioldm/clap/open_clip/__init__.py b/picoaudio/audioldm/clap/open_clip/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e9f728f2f273be5d5fdbec6c6cc41d737176a8c0 --- /dev/null +++ b/picoaudio/audioldm/clap/open_clip/__init__.py @@ -0,0 +1,25 @@ +from .factory import ( + list_models, + create_model, + create_model_and_transforms, + add_model_config, +) +from .loss import ClipLoss, gather_features, LPLoss, lp_gather_features, LPMetrics +from .model import ( + CLAP, + CLAPTextCfg, + CLAPVisionCfg, + CLAPAudioCfp, + convert_weights_to_fp16, + trace_model, +) +from .openai import load_openai_model, list_openai_models +from .pretrained import ( + list_pretrained, + list_pretrained_tag_models, + list_pretrained_model_tags, + get_pretrained_url, + download_pretrained, +) +from .tokenizer import SimpleTokenizer, tokenize +from .transform import image_transform diff --git a/picoaudio/audioldm/clap/open_clip/bert.py b/picoaudio/audioldm/clap/open_clip/bert.py new file mode 100644 index 0000000000000000000000000000000000000000..a83d96d2a77ed05198efc05837522bc88d2499cc --- /dev/null +++ b/picoaudio/audioldm/clap/open_clip/bert.py @@ -0,0 +1,40 @@ +from transformers import BertTokenizer, BertModel + +tokenizer = BertTokenizer.from_pretrained("bert-base-uncased") +model = BertModel.from_pretrained("bert-base-uncased") +text = "Replace me by any text you'd like." + + +def bert_embeddings(text): + # text = "Replace me by any text you'd like." + encoded_input = tokenizer(text, return_tensors="pt") + output = model(**encoded_input) + return output + + +from transformers import RobertaTokenizer, RobertaModel + +tokenizer = RobertaTokenizer.from_pretrained("roberta-base") +model = RobertaModel.from_pretrained("roberta-base") +text = "Replace me by any text you'd like." + + +def Roberta_embeddings(text): + # text = "Replace me by any text you'd like." + encoded_input = tokenizer(text, return_tensors="pt") + output = model(**encoded_input) + return output + + +from transformers import BartTokenizer, BartModel + +tokenizer = BartTokenizer.from_pretrained("facebook/bart-base") +model = BartModel.from_pretrained("facebook/bart-base") +text = "Replace me by any text you'd like." + + +def bart_embeddings(text): + # text = "Replace me by any text you'd like." + encoded_input = tokenizer(text, return_tensors="pt") + output = model(**encoded_input) + return output diff --git a/picoaudio/audioldm/clap/open_clip/bpe_simple_vocab_16e6.txt.gz b/picoaudio/audioldm/clap/open_clip/bpe_simple_vocab_16e6.txt.gz new file mode 100644 index 0000000000000000000000000000000000000000..36a15856e00a06a9fbed8cdd34d2393fea4a3113 --- /dev/null +++ b/picoaudio/audioldm/clap/open_clip/bpe_simple_vocab_16e6.txt.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:924691ac288e54409236115652ad4aa250f48203de50a9e4722a6ecd48d6804a +size 1356917 diff --git a/picoaudio/audioldm/clap/open_clip/factory.py b/picoaudio/audioldm/clap/open_clip/factory.py new file mode 100644 index 0000000000000000000000000000000000000000..64d5368bf4f14bd9592472de73d6162a93b16d73 --- /dev/null +++ b/picoaudio/audioldm/clap/open_clip/factory.py @@ -0,0 +1,279 @@ +import json +import logging +import os +import pathlib +import re +from copy import deepcopy +from pathlib import Path + +import torch + +from .model import CLAP, convert_weights_to_fp16 +from .openai import load_openai_model +from .pretrained import get_pretrained_url, download_pretrained +from .transform import image_transform + +_MODEL_CONFIG_PATHS = [Path(__file__).parent / f"model_configs/"] +_MODEL_CONFIGS = {} # directory (model_name: config) of model architecture configs +CACHE_DIR = os.getenv("AUDIOLDM_CACHE_DIR", "~/.cache/audioldm") + + + +def _natural_key(string_): + return [int(s) if s.isdigit() else s for s in re.split(r"(\d+)", string_.lower())] + + +def _rescan_model_configs(): + global _MODEL_CONFIGS + + config_ext = (".json",) + config_files = [] + for config_path in _MODEL_CONFIG_PATHS: + if config_path.is_file() and config_path.suffix in config_ext: + config_files.append(config_path) + elif config_path.is_dir(): + for ext in config_ext: + config_files.extend(config_path.glob(f"*{ext}")) + + for cf in config_files: + if os.path.basename(cf)[0] == ".": + continue # Ignore hidden files + + with open(cf, "r") as f: + model_cfg = json.load(f) + if all(a in model_cfg for a in ("embed_dim", "audio_cfg", "text_cfg")): + _MODEL_CONFIGS[cf.stem] = model_cfg + + _MODEL_CONFIGS = { + k: v + for k, v in sorted(_MODEL_CONFIGS.items(), key=lambda x: _natural_key(x[0])) + } + + +_rescan_model_configs() # initial populate of model config registry + + +def load_state_dict(checkpoint_path: str, map_location="cpu", skip_params=True): + checkpoint = torch.load(checkpoint_path, map_location=map_location) + if isinstance(checkpoint, dict) and "state_dict" in checkpoint: + state_dict = checkpoint["state_dict"] + else: + state_dict = checkpoint + if skip_params: + if next(iter(state_dict.items()))[0].startswith("module"): + state_dict = {k[7:]: v for k, v in state_dict.items()} + # for k in state_dict: + # if k.startswith('transformer'): + # v = state_dict.pop(k) + # state_dict['text_branch.' + k[12:]] = v + return state_dict + + +def create_model( + amodel_name: str, + tmodel_name: str, + pretrained: str = "", + precision: str = "fp32", + device: torch.device = torch.device("cpu"), + jit: bool = False, + force_quick_gelu: bool = False, + openai_model_cache_dir: str = os.path.expanduser(f"{CACHE_DIR}/clip"), + skip_params=True, + pretrained_audio: str = "", + pretrained_text: str = "", + enable_fusion: bool = False, + fusion_type: str = "None" + # pretrained_image: bool = False, +): + amodel_name = amodel_name.replace( + "/", "-" + ) # for callers using old naming with / in ViT names + pretrained_orig = pretrained + pretrained = pretrained.lower() + if pretrained == "openai": + if amodel_name in _MODEL_CONFIGS: + logging.info(f"Loading {amodel_name} model config.") + model_cfg = deepcopy(_MODEL_CONFIGS[amodel_name]) + else: + logging.error( + f"Model config for {amodel_name} not found; available models {list_models()}." + ) + raise RuntimeError(f"Model config for {amodel_name} not found.") + + logging.info(f"Loading pretrained ViT-B-16 text encoder from OpenAI.") + # Hard Code in model name + model_cfg["text_cfg"]["model_type"] = tmodel_name + model = load_openai_model( + "ViT-B-16", + model_cfg, + device=device, + jit=jit, + cache_dir=openai_model_cache_dir, + enable_fusion=enable_fusion, + fusion_type=fusion_type, + ) + # See https://discuss.pytorch.org/t/valueerror-attemting-to-unscale-fp16-gradients/81372 + if precision == "amp" or precision == "fp32": + model = model.float() + else: + if amodel_name in _MODEL_CONFIGS: + logging.info(f"Loading {amodel_name} model config.") + model_cfg = deepcopy(_MODEL_CONFIGS[amodel_name]) + else: + logging.error( + f"Model config for {amodel_name} not found; available models {list_models()}." + ) + raise RuntimeError(f"Model config for {amodel_name} not found.") + + if force_quick_gelu: + # override for use of QuickGELU on non-OpenAI transformer models + model_cfg["quick_gelu"] = True + + # if pretrained_image: + # if 'timm_amodel_name' in model_cfg.get('vision_cfg', {}): + # # pretrained weight loading for timm models set via vision_cfg + # model_cfg['vision_cfg']['timm_model_pretrained'] = True + # else: + # assert False, 'pretrained image towers currently only supported for timm models' + model_cfg["text_cfg"]["model_type"] = tmodel_name + model_cfg["enable_fusion"] = enable_fusion + model_cfg["fusion_type"] = fusion_type + model = CLAP(**model_cfg) + + if pretrained: + checkpoint_path = "" + url = get_pretrained_url(amodel_name, pretrained) + if url: + checkpoint_path = download_pretrained(url, root=openai_model_cache_dir) + elif os.path.exists(pretrained_orig): + checkpoint_path = pretrained_orig + if checkpoint_path: + logging.info( + f"Loading pretrained {amodel_name}-{tmodel_name} weights ({pretrained})." + ) + ckpt = load_state_dict(checkpoint_path, skip_params=True) + model.load_state_dict(ckpt) + param_names = [n for n, p in model.named_parameters()] + # for n in param_names: + # print(n, "\t", "Loaded" if n in ckpt else "Unloaded") + else: + logging.warning( + f"Pretrained weights ({pretrained}) not found for model {amodel_name}." + ) + raise RuntimeError( + f"Pretrained weights ({pretrained}) not found for model {amodel_name}." + ) + + if pretrained_audio: + if amodel_name.startswith("PANN"): + if "Cnn14_mAP" in pretrained_audio: # official checkpoint + audio_ckpt = torch.load(pretrained_audio, map_location="cpu") + audio_ckpt = audio_ckpt["model"] + keys = list(audio_ckpt.keys()) + for key in keys: + if ( + "spectrogram_extractor" not in key + and "logmel_extractor" not in key + ): + v = audio_ckpt.pop(key) + audio_ckpt["audio_branch." + key] = v + elif os.path.basename(pretrained_audio).startswith( + "PANN" + ): # checkpoint trained via HTSAT codebase + audio_ckpt = torch.load(pretrained_audio, map_location="cpu") + audio_ckpt = audio_ckpt["state_dict"] + keys = list(audio_ckpt.keys()) + for key in keys: + if key.startswith("sed_model"): + v = audio_ckpt.pop(key) + audio_ckpt["audio_branch." + key[10:]] = v + elif os.path.basename(pretrained_audio).startswith( + "finetuned" + ): # checkpoint trained via linear probe codebase + audio_ckpt = torch.load(pretrained_audio, map_location="cpu") + else: + raise ValueError("Unknown audio checkpoint") + elif amodel_name.startswith("HTSAT"): + if "HTSAT_AudioSet_Saved" in pretrained_audio: # official checkpoint + audio_ckpt = torch.load(pretrained_audio, map_location="cpu") + audio_ckpt = audio_ckpt["state_dict"] + keys = list(audio_ckpt.keys()) + for key in keys: + if key.startswith("sed_model") and ( + "spectrogram_extractor" not in key + and "logmel_extractor" not in key + ): + v = audio_ckpt.pop(key) + audio_ckpt["audio_branch." + key[10:]] = v + elif os.path.basename(pretrained_audio).startswith( + "HTSAT" + ): # checkpoint trained via HTSAT codebase + audio_ckpt = torch.load(pretrained_audio, map_location="cpu") + audio_ckpt = audio_ckpt["state_dict"] + keys = list(audio_ckpt.keys()) + for key in keys: + if key.startswith("sed_model"): + v = audio_ckpt.pop(key) + audio_ckpt["audio_branch." + key[10:]] = v + elif os.path.basename(pretrained_audio).startswith( + "finetuned" + ): # checkpoint trained via linear probe codebase + audio_ckpt = torch.load(pretrained_audio, map_location="cpu") + else: + raise ValueError("Unknown audio checkpoint") + else: + raise f"this audio encoder pretrained checkpoint is not support" + + model.load_state_dict(audio_ckpt, strict=False) + logging.info( + f"Loading pretrained {amodel_name} weights ({pretrained_audio})." + ) + param_names = [n for n, p in model.named_parameters()] + for n in param_names: + print(n, "\t", "Loaded" if n in audio_ckpt else "Unloaded") + + model.to(device=device) + if precision == "fp16": + assert device.type != "cpu" + convert_weights_to_fp16(model) + + if jit: + model = torch.jit.script(model) + + return model, model_cfg + + +def create_model_and_transforms( + model_name: str, + pretrained: str = "", + precision: str = "fp32", + device: torch.device = torch.device("cpu"), + jit: bool = False, + force_quick_gelu: bool = False, + # pretrained_image: bool = False, +): + model = create_model( + model_name, + pretrained, + precision, + device, + jit, + force_quick_gelu=force_quick_gelu, + # pretrained_image=pretrained_image + ) + preprocess_train = image_transform(model.visual.image_size, is_train=True) + preprocess_val = image_transform(model.visual.image_size, is_train=False) + return model, preprocess_train, preprocess_val + + +def list_models(): + """enumerate available model architectures based on config files""" + return list(_MODEL_CONFIGS.keys()) + + +def add_model_config(path): + """add model config path or file and update registry""" + if not isinstance(path, Path): + path = Path(path) + _MODEL_CONFIG_PATHS.append(path) + _rescan_model_configs() \ No newline at end of file diff --git a/picoaudio/audioldm/clap/open_clip/feature_fusion.py b/picoaudio/audioldm/clap/open_clip/feature_fusion.py new file mode 100644 index 0000000000000000000000000000000000000000..dbe4e170e05894c12ebdc36ba1dc1de65e441b89 --- /dev/null +++ b/picoaudio/audioldm/clap/open_clip/feature_fusion.py @@ -0,0 +1,192 @@ +""" +Feature Fusion for Varible-Length Data Processing +AFF/iAFF is referred and modified from https://github.com/YimianDai/open-aff/blob/master/aff_pytorch/aff_net/fusion.py +According to the paper: Yimian Dai et al, Attentional Feature Fusion, IEEE Winter Conference on Applications of Computer Vision, WACV 2021 +""" + +import torch +import torch.nn as nn + + +class DAF(nn.Module): + """ + 直接相加 DirectAddFuse + """ + + def __init__(self): + super(DAF, self).__init__() + + def forward(self, x, residual): + return x + residual + + +class iAFF(nn.Module): + """ + 多特征融合 iAFF + """ + + def __init__(self, channels=64, r=4, type="2D"): + super(iAFF, self).__init__() + inter_channels = int(channels // r) + + if type == "1D": + # 本地注意力 + self.local_att = nn.Sequential( + nn.Conv1d(channels, inter_channels, kernel_size=1, stride=1, padding=0), + nn.BatchNorm1d(inter_channels), + nn.ReLU(inplace=True), + nn.Conv1d(inter_channels, channels, kernel_size=1, stride=1, padding=0), + nn.BatchNorm1d(channels), + ) + + # 全局注意力 + self.global_att = nn.Sequential( + nn.AdaptiveAvgPool1d(1), + nn.Conv1d(channels, inter_channels, kernel_size=1, stride=1, padding=0), + nn.BatchNorm1d(inter_channels), + nn.ReLU(inplace=True), + nn.Conv1d(inter_channels, channels, kernel_size=1, stride=1, padding=0), + nn.BatchNorm1d(channels), + ) + + # 第二次本地注意力 + self.local_att2 = nn.Sequential( + nn.Conv1d(channels, inter_channels, kernel_size=1, stride=1, padding=0), + nn.BatchNorm1d(inter_channels), + nn.ReLU(inplace=True), + nn.Conv1d(inter_channels, channels, kernel_size=1, stride=1, padding=0), + nn.BatchNorm1d(channels), + ) + # 第二次全局注意力 + self.global_att2 = nn.Sequential( + nn.AdaptiveAvgPool1d(1), + nn.Conv1d(channels, inter_channels, kernel_size=1, stride=1, padding=0), + nn.BatchNorm1d(inter_channels), + nn.ReLU(inplace=True), + nn.Conv1d(inter_channels, channels, kernel_size=1, stride=1, padding=0), + nn.BatchNorm1d(channels), + ) + elif type == "2D": + # 本地注意力 + self.local_att = nn.Sequential( + nn.Conv2d(channels, inter_channels, kernel_size=1, stride=1, padding=0), + nn.BatchNorm2d(inter_channels), + nn.ReLU(inplace=True), + nn.Conv2d(inter_channels, channels, kernel_size=1, stride=1, padding=0), + nn.BatchNorm2d(channels), + ) + + # 全局注意力 + self.global_att = nn.Sequential( + nn.AdaptiveAvgPool2d(1), + nn.Conv2d(channels, inter_channels, kernel_size=1, stride=1, padding=0), + nn.BatchNorm2d(inter_channels), + nn.ReLU(inplace=True), + nn.Conv2d(inter_channels, channels, kernel_size=1, stride=1, padding=0), + nn.BatchNorm2d(channels), + ) + + # 第二次本地注意力 + self.local_att2 = nn.Sequential( + nn.Conv2d(channels, inter_channels, kernel_size=1, stride=1, padding=0), + nn.BatchNorm2d(inter_channels), + nn.ReLU(inplace=True), + nn.Conv2d(inter_channels, channels, kernel_size=1, stride=1, padding=0), + nn.BatchNorm2d(channels), + ) + # 第二次全局注意力 + self.global_att2 = nn.Sequential( + nn.AdaptiveAvgPool2d(1), + nn.Conv2d(channels, inter_channels, kernel_size=1, stride=1, padding=0), + nn.BatchNorm2d(inter_channels), + nn.ReLU(inplace=True), + nn.Conv2d(inter_channels, channels, kernel_size=1, stride=1, padding=0), + nn.BatchNorm2d(channels), + ) + else: + raise f"the type is not supported" + + self.sigmoid = nn.Sigmoid() + + def forward(self, x, residual): + flag = False + xa = x + residual + if xa.size(0) == 1: + xa = torch.cat([xa, xa], dim=0) + flag = True + xl = self.local_att(xa) + xg = self.global_att(xa) + xlg = xl + xg + wei = self.sigmoid(xlg) + xi = x * wei + residual * (1 - wei) + + xl2 = self.local_att2(xi) + xg2 = self.global_att(xi) + xlg2 = xl2 + xg2 + wei2 = self.sigmoid(xlg2) + xo = x * wei2 + residual * (1 - wei2) + if flag: + xo = xo[0].unsqueeze(0) + return xo + + +class AFF(nn.Module): + """ + 多特征融合 AFF + """ + + def __init__(self, channels=64, r=4, type="2D"): + super(AFF, self).__init__() + inter_channels = int(channels // r) + + if type == "1D": + self.local_att = nn.Sequential( + nn.Conv1d(channels, inter_channels, kernel_size=1, stride=1, padding=0), + nn.BatchNorm1d(inter_channels), + nn.ReLU(inplace=True), + nn.Conv1d(inter_channels, channels, kernel_size=1, stride=1, padding=0), + nn.BatchNorm1d(channels), + ) + self.global_att = nn.Sequential( + nn.AdaptiveAvgPool1d(1), + nn.Conv1d(channels, inter_channels, kernel_size=1, stride=1, padding=0), + nn.BatchNorm1d(inter_channels), + nn.ReLU(inplace=True), + nn.Conv1d(inter_channels, channels, kernel_size=1, stride=1, padding=0), + nn.BatchNorm1d(channels), + ) + elif type == "2D": + self.local_att = nn.Sequential( + nn.Conv2d(channels, inter_channels, kernel_size=1, stride=1, padding=0), + nn.BatchNorm2d(inter_channels), + nn.ReLU(inplace=True), + nn.Conv2d(inter_channels, channels, kernel_size=1, stride=1, padding=0), + nn.BatchNorm2d(channels), + ) + self.global_att = nn.Sequential( + nn.AdaptiveAvgPool2d(1), + nn.Conv2d(channels, inter_channels, kernel_size=1, stride=1, padding=0), + nn.BatchNorm2d(inter_channels), + nn.ReLU(inplace=True), + nn.Conv2d(inter_channels, channels, kernel_size=1, stride=1, padding=0), + nn.BatchNorm2d(channels), + ) + else: + raise f"the type is not supported." + + self.sigmoid = nn.Sigmoid() + + def forward(self, x, residual): + flag = False + xa = x + residual + if xa.size(0) == 1: + xa = torch.cat([xa, xa], dim=0) + flag = True + xl = self.local_att(xa) + xg = self.global_att(xa) + xlg = xl + xg + wei = self.sigmoid(xlg) + xo = 2 * x * wei + 2 * residual * (1 - wei) + if flag: + xo = xo[0].unsqueeze(0) + return xo diff --git a/picoaudio/audioldm/clap/open_clip/htsat.py b/picoaudio/audioldm/clap/open_clip/htsat.py new file mode 100644 index 0000000000000000000000000000000000000000..3b856c6a43df162116a941f1b5c76e93713b276a --- /dev/null +++ b/picoaudio/audioldm/clap/open_clip/htsat.py @@ -0,0 +1,1308 @@ +# Ke Chen +# knutchen@ucsd.edu +# HTS-AT: A HIERARCHICAL TOKEN-SEMANTIC AUDIO TRANSFORMER FOR SOUND CLASSIFICATION AND DETECTION +# Some layers designed on the model +# below codes are based and referred from https://github.com/microsoft/Swin-Transformer +# Swin Transformer for Computer Vision: https://arxiv.org/pdf/2103.14030.pdf + +import torch +import torch.nn as nn +import torch.nn.functional as F +from itertools import repeat +import collections.abc +import math +import warnings + +from torch.nn.init import _calculate_fan_in_and_fan_out +import torch.utils.checkpoint as checkpoint + +import random + +from torchlibrosa.stft import Spectrogram, LogmelFilterBank +from torchlibrosa.augmentation import SpecAugmentation + +from itertools import repeat +from .utils import do_mixup, interpolate + +from .feature_fusion import iAFF, AFF, DAF + +# from PyTorch internals +def _ntuple(n): + def parse(x): + if isinstance(x, collections.abc.Iterable): + return x + return tuple(repeat(x, n)) + + return parse + + +to_1tuple = _ntuple(1) +to_2tuple = _ntuple(2) +to_3tuple = _ntuple(3) +to_4tuple = _ntuple(4) +to_ntuple = _ntuple + + +def drop_path(x, drop_prob: float = 0.0, training: bool = False): + """Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks). + This is the same as the DropConnect impl I created for EfficientNet, etc networks, however, + the original name is misleading as 'Drop Connect' is a different form of dropout in a separate paper... + See discussion: https://github.com/tensorflow/tpu/issues/494#issuecomment-532968956 ... I've opted for + changing the layer and argument names to 'drop path' rather than mix DropConnect as a layer name and use + 'survival rate' as the argument. + """ + if drop_prob == 0.0 or not training: + return x + keep_prob = 1 - drop_prob + shape = (x.shape[0],) + (1,) * ( + x.ndim - 1 + ) # work with diff dim tensors, not just 2D ConvNets + random_tensor = keep_prob + torch.rand(shape, dtype=x.dtype, device=x.device) + random_tensor.floor_() # binarize + output = x.div(keep_prob) * random_tensor + return output + + +class DropPath(nn.Module): + """Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).""" + + def __init__(self, drop_prob=None): + super(DropPath, self).__init__() + self.drop_prob = drop_prob + + def forward(self, x): + return drop_path(x, self.drop_prob, self.training) + + +class PatchEmbed(nn.Module): + """2D Image to Patch Embedding""" + + def __init__( + self, + img_size=224, + patch_size=16, + in_chans=3, + embed_dim=768, + norm_layer=None, + flatten=True, + patch_stride=16, + enable_fusion=False, + fusion_type="None", + ): + super().__init__() + img_size = to_2tuple(img_size) + patch_size = to_2tuple(patch_size) + patch_stride = to_2tuple(patch_stride) + self.img_size = img_size + self.patch_size = patch_size + self.patch_stride = patch_stride + self.grid_size = ( + img_size[0] // patch_stride[0], + img_size[1] // patch_stride[1], + ) + self.num_patches = self.grid_size[0] * self.grid_size[1] + self.flatten = flatten + self.in_chans = in_chans + self.embed_dim = embed_dim + + self.enable_fusion = enable_fusion + self.fusion_type = fusion_type + + padding = ( + (patch_size[0] - patch_stride[0]) // 2, + (patch_size[1] - patch_stride[1]) // 2, + ) + + if (self.enable_fusion) and (self.fusion_type == "channel_map"): + self.proj = nn.Conv2d( + in_chans * 4, + embed_dim, + kernel_size=patch_size, + stride=patch_stride, + padding=padding, + ) + else: + self.proj = nn.Conv2d( + in_chans, + embed_dim, + kernel_size=patch_size, + stride=patch_stride, + padding=padding, + ) + self.norm = norm_layer(embed_dim) if norm_layer else nn.Identity() + + if (self.enable_fusion) and ( + self.fusion_type in ["daf_2d", "aff_2d", "iaff_2d"] + ): + self.mel_conv2d = nn.Conv2d( + in_chans, + embed_dim, + kernel_size=(patch_size[0], patch_size[1] * 3), + stride=(patch_stride[0], patch_stride[1] * 3), + padding=padding, + ) + if self.fusion_type == "daf_2d": + self.fusion_model = DAF() + elif self.fusion_type == "aff_2d": + self.fusion_model = AFF(channels=embed_dim, type="2D") + elif self.fusion_type == "iaff_2d": + self.fusion_model = iAFF(channels=embed_dim, type="2D") + + def forward(self, x, longer_idx=None): + if (self.enable_fusion) and ( + self.fusion_type in ["daf_2d", "aff_2d", "iaff_2d"] + ): + global_x = x[:, 0:1, :, :] + + # global processing + B, C, H, W = global_x.shape + assert ( + H == self.img_size[0] and W == self.img_size[1] + ), f"Input image size ({H}*{W}) doesn't match model ({self.img_size[0]}*{self.img_size[1]})." + global_x = self.proj(global_x) + TW = global_x.size(-1) + if len(longer_idx) > 0: + # local processing + local_x = x[longer_idx, 1:, :, :].contiguous() + B, C, H, W = local_x.shape + local_x = local_x.view(B * C, 1, H, W) + local_x = self.mel_conv2d(local_x) + local_x = local_x.view( + B, C, local_x.size(1), local_x.size(2), local_x.size(3) + ) + local_x = local_x.permute((0, 2, 3, 1, 4)).contiguous().flatten(3) + TB, TC, TH, _ = local_x.size() + if local_x.size(-1) < TW: + local_x = torch.cat( + [ + local_x, + torch.zeros( + (TB, TC, TH, TW - local_x.size(-1)), + device=global_x.device, + ), + ], + dim=-1, + ) + else: + local_x = local_x[:, :, :, :TW] + + global_x[longer_idx] = self.fusion_model(global_x[longer_idx], local_x) + x = global_x + else: + B, C, H, W = x.shape + assert ( + H == self.img_size[0] and W == self.img_size[1] + ), f"Input image size ({H}*{W}) doesn't match model ({self.img_size[0]}*{self.img_size[1]})." + x = self.proj(x) + + if self.flatten: + x = x.flatten(2).transpose(1, 2) # BCHW -> BNC + x = self.norm(x) + return x + + +class Mlp(nn.Module): + """MLP as used in Vision Transformer, MLP-Mixer and related networks""" + + def __init__( + self, + in_features, + hidden_features=None, + out_features=None, + act_layer=nn.GELU, + drop=0.0, + ): + super().__init__() + out_features = out_features or in_features + hidden_features = hidden_features or in_features + self.fc1 = nn.Linear(in_features, hidden_features) + self.act = act_layer() + self.fc2 = nn.Linear(hidden_features, out_features) + self.drop = nn.Dropout(drop) + + def forward(self, x): + x = self.fc1(x) + x = self.act(x) + x = self.drop(x) + x = self.fc2(x) + x = self.drop(x) + return x + + +def _no_grad_trunc_normal_(tensor, mean, std, a, b): + # Cut & paste from PyTorch official master until it's in a few official releases - RW + # Method based on https://people.sc.fsu.edu/~jburkardt/presentations/truncated_normal.pdf + def norm_cdf(x): + # Computes standard normal cumulative distribution function + return (1.0 + math.erf(x / math.sqrt(2.0))) / 2.0 + + if (mean < a - 2 * std) or (mean > b + 2 * std): + warnings.warn( + "mean is more than 2 std from [a, b] in nn.init.trunc_normal_. " + "The distribution of values may be incorrect.", + stacklevel=2, + ) + + with torch.no_grad(): + # Values are generated by using a truncated uniform distribution and + # then using the inverse CDF for the normal distribution. + # Get upper and lower cdf values + l = norm_cdf((a - mean) / std) + u = norm_cdf((b - mean) / std) + + # Uniformly fill tensor with values from [l, u], then translate to + # [2l-1, 2u-1]. + tensor.uniform_(2 * l - 1, 2 * u - 1) + + # Use inverse cdf transform for normal distribution to get truncated + # standard normal + tensor.erfinv_() + + # Transform to proper mean, std + tensor.mul_(std * math.sqrt(2.0)) + tensor.add_(mean) + + # Clamp to ensure it's in the proper range + tensor.clamp_(min=a, max=b) + return tensor + + +def trunc_normal_(tensor, mean=0.0, std=1.0, a=-2.0, b=2.0): + # type: (Tensor, float, float, float, float) -> Tensor + r"""Fills the input Tensor with values drawn from a truncated + normal distribution. The values are effectively drawn from the + normal distribution :math:`\mathcal{N}(\text{mean}, \text{std}^2)` + with values outside :math:`[a, b]` redrawn until they are within + the bounds. The method used for generating the random values works + best when :math:`a \leq \text{mean} \leq b`. + Args: + tensor: an n-dimensional `torch.Tensor` + mean: the mean of the normal distribution + std: the standard deviation of the normal distribution + a: the minimum cutoff value + b: the maximum cutoff value + Examples: + >>> w = torch.empty(3, 5) + >>> nn.init.trunc_normal_(w) + """ + return _no_grad_trunc_normal_(tensor, mean, std, a, b) + + +def variance_scaling_(tensor, scale=1.0, mode="fan_in", distribution="normal"): + fan_in, fan_out = _calculate_fan_in_and_fan_out(tensor) + if mode == "fan_in": + denom = fan_in + elif mode == "fan_out": + denom = fan_out + elif mode == "fan_avg": + denom = (fan_in + fan_out) / 2 + + variance = scale / denom + + if distribution == "truncated_normal": + # constant is stddev of standard normal truncated to (-2, 2) + trunc_normal_(tensor, std=math.sqrt(variance) / 0.87962566103423978) + elif distribution == "normal": + tensor.normal_(std=math.sqrt(variance)) + elif distribution == "uniform": + bound = math.sqrt(3 * variance) + tensor.uniform_(-bound, bound) + else: + raise ValueError(f"invalid distribution {distribution}") + + +def lecun_normal_(tensor): + variance_scaling_(tensor, mode="fan_in", distribution="truncated_normal") + + +def window_partition(x, window_size): + """ + Args: + x: (B, H, W, C) + window_size (int): window size + Returns: + windows: (num_windows*B, window_size, window_size, C) + """ + B, H, W, C = x.shape + x = x.view(B, H // window_size, window_size, W // window_size, window_size, C) + windows = ( + x.permute(0, 1, 3, 2, 4, 5).contiguous().view(-1, window_size, window_size, C) + ) + return windows + + +def window_reverse(windows, window_size, H, W): + """ + Args: + windows: (num_windows*B, window_size, window_size, C) + window_size (int): Window size + H (int): Height of image + W (int): Width of image + Returns: + x: (B, H, W, C) + """ + B = int(windows.shape[0] / (H * W / window_size / window_size)) + x = windows.view( + B, H // window_size, W // window_size, window_size, window_size, -1 + ) + x = x.permute(0, 1, 3, 2, 4, 5).contiguous().view(B, H, W, -1) + return x + + +class WindowAttention(nn.Module): + r"""Window based multi-head self attention (W-MSA) module with relative position bias. + It supports both of shifted and non-shifted window. + Args: + dim (int): Number of input channels. + window_size (tuple[int]): The height and width of the window. + num_heads (int): Number of attention heads. + qkv_bias (bool, optional): If True, add a learnable bias to query, key, value. Default: True + qk_scale (float | None, optional): Override default qk scale of head_dim ** -0.5 if set + attn_drop (float, optional): Dropout ratio of attention weight. Default: 0.0 + proj_drop (float, optional): Dropout ratio of output. Default: 0.0 + """ + + def __init__( + self, + dim, + window_size, + num_heads, + qkv_bias=True, + qk_scale=None, + attn_drop=0.0, + proj_drop=0.0, + ): + + super().__init__() + self.dim = dim + self.window_size = window_size # Wh, Ww + self.num_heads = num_heads + head_dim = dim // num_heads + self.scale = qk_scale or head_dim**-0.5 + + # define a parameter table of relative position bias + self.relative_position_bias_table = nn.Parameter( + torch.zeros((2 * window_size[0] - 1) * (2 * window_size[1] - 1), num_heads) + ) # 2*Wh-1 * 2*Ww-1, nH + + # get pair-wise relative position index for each token inside the window + coords_h = torch.arange(self.window_size[0]) + coords_w = torch.arange(self.window_size[1]) + coords = torch.stack(torch.meshgrid([coords_h, coords_w])) # 2, Wh, Ww + coords_flatten = torch.flatten(coords, 1) # 2, Wh*Ww + relative_coords = ( + coords_flatten[:, :, None] - coords_flatten[:, None, :] + ) # 2, Wh*Ww, Wh*Ww + relative_coords = relative_coords.permute( + 1, 2, 0 + ).contiguous() # Wh*Ww, Wh*Ww, 2 + relative_coords[:, :, 0] += self.window_size[0] - 1 # shift to start from 0 + relative_coords[:, :, 1] += self.window_size[1] - 1 + relative_coords[:, :, 0] *= 2 * self.window_size[1] - 1 + relative_position_index = relative_coords.sum(-1) # Wh*Ww, Wh*Ww + self.register_buffer("relative_position_index", relative_position_index) + + self.qkv = nn.Linear(dim, dim * 3, bias=qkv_bias) + self.attn_drop = nn.Dropout(attn_drop) + self.proj = nn.Linear(dim, dim) + self.proj_drop = nn.Dropout(proj_drop) + + trunc_normal_(self.relative_position_bias_table, std=0.02) + self.softmax = nn.Softmax(dim=-1) + + def forward(self, x, mask=None): + """ + Args: + x: input features with shape of (num_windows*B, N, C) + mask: (0/-inf) mask with shape of (num_windows, Wh*Ww, Wh*Ww) or None + """ + B_, N, C = x.shape + qkv = ( + self.qkv(x) + .reshape(B_, N, 3, self.num_heads, C // self.num_heads) + .permute(2, 0, 3, 1, 4) + ) + q, k, v = ( + qkv[0], + qkv[1], + qkv[2], + ) # make torchscript happy (cannot use tensor as tuple) + + q = q * self.scale + attn = q @ k.transpose(-2, -1) + + relative_position_bias = self.relative_position_bias_table[ + self.relative_position_index.view(-1) + ].view( + self.window_size[0] * self.window_size[1], + self.window_size[0] * self.window_size[1], + -1, + ) # Wh*Ww,Wh*Ww,nH + relative_position_bias = relative_position_bias.permute( + 2, 0, 1 + ).contiguous() # nH, Wh*Ww, Wh*Ww + attn = attn + relative_position_bias.unsqueeze(0) + + if mask is not None: + nW = mask.shape[0] + attn = attn.view(B_ // nW, nW, self.num_heads, N, N) + mask.unsqueeze( + 1 + ).unsqueeze(0) + attn = attn.view(-1, self.num_heads, N, N) + attn = self.softmax(attn) + else: + attn = self.softmax(attn) + + attn = self.attn_drop(attn) + + x = (attn @ v).transpose(1, 2).reshape(B_, N, C) + x = self.proj(x) + x = self.proj_drop(x) + return x, attn + + def extra_repr(self): + return f"dim={self.dim}, window_size={self.window_size}, num_heads={self.num_heads}" + + +# We use the model based on Swintransformer Block, therefore we can use the swin-transformer pretrained model +class SwinTransformerBlock(nn.Module): + r"""Swin Transformer Block. + Args: + dim (int): Number of input channels. + input_resolution (tuple[int]): Input resulotion. + num_heads (int): Number of attention heads. + window_size (int): Window size. + shift_size (int): Shift size for SW-MSA. + mlp_ratio (float): Ratio of mlp hidden dim to embedding dim. + qkv_bias (bool, optional): If True, add a learnable bias to query, key, value. Default: True + qk_scale (float | None, optional): Override default qk scale of head_dim ** -0.5 if set. + drop (float, optional): Dropout rate. Default: 0.0 + attn_drop (float, optional): Attention dropout rate. Default: 0.0 + drop_path (float, optional): Stochastic depth rate. Default: 0.0 + act_layer (nn.Module, optional): Activation layer. Default: nn.GELU + norm_layer (nn.Module, optional): Normalization layer. Default: nn.LayerNorm + """ + + def __init__( + self, + dim, + input_resolution, + num_heads, + window_size=7, + shift_size=0, + mlp_ratio=4.0, + qkv_bias=True, + qk_scale=None, + drop=0.0, + attn_drop=0.0, + drop_path=0.0, + act_layer=nn.GELU, + norm_layer=nn.LayerNorm, + norm_before_mlp="ln", + ): + super().__init__() + self.dim = dim + self.input_resolution = input_resolution + self.num_heads = num_heads + self.window_size = window_size + self.shift_size = shift_size + self.mlp_ratio = mlp_ratio + self.norm_before_mlp = norm_before_mlp + if min(self.input_resolution) <= self.window_size: + # if window size is larger than input resolution, we don't partition windows + self.shift_size = 0 + self.window_size = min(self.input_resolution) + assert ( + 0 <= self.shift_size < self.window_size + ), "shift_size must in 0-window_size" + + self.norm1 = norm_layer(dim) + self.attn = WindowAttention( + dim, + window_size=to_2tuple(self.window_size), + num_heads=num_heads, + qkv_bias=qkv_bias, + qk_scale=qk_scale, + attn_drop=attn_drop, + proj_drop=drop, + ) + + self.drop_path = DropPath(drop_path) if drop_path > 0.0 else nn.Identity() + if self.norm_before_mlp == "ln": + self.norm2 = nn.LayerNorm(dim) + elif self.norm_before_mlp == "bn": + self.norm2 = lambda x: nn.BatchNorm1d(dim)(x.transpose(1, 2)).transpose( + 1, 2 + ) + else: + raise NotImplementedError + mlp_hidden_dim = int(dim * mlp_ratio) + self.mlp = Mlp( + in_features=dim, + hidden_features=mlp_hidden_dim, + act_layer=act_layer, + drop=drop, + ) + + if self.shift_size > 0: + # calculate attention mask for SW-MSA + H, W = self.input_resolution + img_mask = torch.zeros((1, H, W, 1)) # 1 H W 1 + h_slices = ( + slice(0, -self.window_size), + slice(-self.window_size, -self.shift_size), + slice(-self.shift_size, None), + ) + w_slices = ( + slice(0, -self.window_size), + slice(-self.window_size, -self.shift_size), + slice(-self.shift_size, None), + ) + cnt = 0 + for h in h_slices: + for w in w_slices: + img_mask[:, h, w, :] = cnt + cnt += 1 + + mask_windows = window_partition( + img_mask, self.window_size + ) # nW, window_size, window_size, 1 + mask_windows = mask_windows.view(-1, self.window_size * self.window_size) + attn_mask = mask_windows.unsqueeze(1) - mask_windows.unsqueeze(2) + attn_mask = attn_mask.masked_fill( + attn_mask != 0, float(-100.0) + ).masked_fill(attn_mask == 0, float(0.0)) + else: + attn_mask = None + + self.register_buffer("attn_mask", attn_mask) + + def forward(self, x): + # pdb.set_trace() + H, W = self.input_resolution + # print("H: ", H) + # print("W: ", W) + # pdb.set_trace() + B, L, C = x.shape + # assert L == H * W, "input feature has wrong size" + + shortcut = x + x = self.norm1(x) + x = x.view(B, H, W, C) + + # cyclic shift + if self.shift_size > 0: + shifted_x = torch.roll( + x, shifts=(-self.shift_size, -self.shift_size), dims=(1, 2) + ) + else: + shifted_x = x + + # partition windows + x_windows = window_partition( + shifted_x, self.window_size + ) # nW*B, window_size, window_size, C + x_windows = x_windows.view( + -1, self.window_size * self.window_size, C + ) # nW*B, window_size*window_size, C + + # W-MSA/SW-MSA + attn_windows, attn = self.attn( + x_windows, mask=self.attn_mask + ) # nW*B, window_size*window_size, C + + # merge windows + attn_windows = attn_windows.view(-1, self.window_size, self.window_size, C) + shifted_x = window_reverse(attn_windows, self.window_size, H, W) # B H' W' C + + # reverse cyclic shift + if self.shift_size > 0: + x = torch.roll( + shifted_x, shifts=(self.shift_size, self.shift_size), dims=(1, 2) + ) + else: + x = shifted_x + x = x.view(B, H * W, C) + + # FFN + x = shortcut + self.drop_path(x) + x = x + self.drop_path(self.mlp(self.norm2(x))) + + return x, attn + + def extra_repr(self): + return ( + f"dim={self.dim}, input_resolution={self.input_resolution}, num_heads={self.num_heads}, " + f"window_size={self.window_size}, shift_size={self.shift_size}, mlp_ratio={self.mlp_ratio}" + ) + + +class PatchMerging(nn.Module): + r"""Patch Merging Layer. + Args: + input_resolution (tuple[int]): Resolution of input feature. + dim (int): Number of input channels. + norm_layer (nn.Module, optional): Normalization layer. Default: nn.LayerNorm + """ + + def __init__(self, input_resolution, dim, norm_layer=nn.LayerNorm): + super().__init__() + self.input_resolution = input_resolution + self.dim = dim + self.reduction = nn.Linear(4 * dim, 2 * dim, bias=False) + self.norm = norm_layer(4 * dim) + + def forward(self, x): + """ + x: B, H*W, C + """ + H, W = self.input_resolution + B, L, C = x.shape + assert L == H * W, "input feature has wrong size" + assert H % 2 == 0 and W % 2 == 0, f"x size ({H}*{W}) are not even." + + x = x.view(B, H, W, C) + + x0 = x[:, 0::2, 0::2, :] # B H/2 W/2 C + x1 = x[:, 1::2, 0::2, :] # B H/2 W/2 C + x2 = x[:, 0::2, 1::2, :] # B H/2 W/2 C + x3 = x[:, 1::2, 1::2, :] # B H/2 W/2 C + x = torch.cat([x0, x1, x2, x3], -1) # B H/2 W/2 4*C + x = x.view(B, -1, 4 * C) # B H/2*W/2 4*C + + x = self.norm(x) + x = self.reduction(x) + + return x + + def extra_repr(self): + return f"input_resolution={self.input_resolution}, dim={self.dim}" + + +class BasicLayer(nn.Module): + """A basic Swin Transformer layer for one stage. + Args: + dim (int): Number of input channels. + input_resolution (tuple[int]): Input resolution. + depth (int): Number of blocks. + num_heads (int): Number of attention heads. + window_size (int): Local window size. + mlp_ratio (float): Ratio of mlp hidden dim to embedding dim. + qkv_bias (bool, optional): If True, add a learnable bias to query, key, value. Default: True + qk_scale (float | None, optional): Override default qk scale of head_dim ** -0.5 if set. + drop (float, optional): Dropout rate. Default: 0.0 + attn_drop (float, optional): Attention dropout rate. Default: 0.0 + drop_path (float | tuple[float], optional): Stochastic depth rate. Default: 0.0 + norm_layer (nn.Module, optional): Normalization layer. Default: nn.LayerNorm + downsample (nn.Module | None, optional): Downsample layer at the end of the layer. Default: None + use_checkpoint (bool): Whether to use checkpointing to save memory. Default: False. + """ + + def __init__( + self, + dim, + input_resolution, + depth, + num_heads, + window_size, + mlp_ratio=4.0, + qkv_bias=True, + qk_scale=None, + drop=0.0, + attn_drop=0.0, + drop_path=0.0, + norm_layer=nn.LayerNorm, + downsample=None, + use_checkpoint=False, + norm_before_mlp="ln", + ): + + super().__init__() + self.dim = dim + self.input_resolution = input_resolution + self.depth = depth + self.use_checkpoint = use_checkpoint + + # build blocks + self.blocks = nn.ModuleList( + [ + SwinTransformerBlock( + dim=dim, + input_resolution=input_resolution, + num_heads=num_heads, + window_size=window_size, + shift_size=0 if (i % 2 == 0) else window_size // 2, + mlp_ratio=mlp_ratio, + qkv_bias=qkv_bias, + qk_scale=qk_scale, + drop=drop, + attn_drop=attn_drop, + drop_path=drop_path[i] + if isinstance(drop_path, list) + else drop_path, + norm_layer=norm_layer, + norm_before_mlp=norm_before_mlp, + ) + for i in range(depth) + ] + ) + + # patch merging layer + if downsample is not None: + self.downsample = downsample( + input_resolution, dim=dim, norm_layer=norm_layer + ) + else: + self.downsample = None + + def forward(self, x): + attns = [] + for blk in self.blocks: + if self.use_checkpoint: + x = checkpoint.checkpoint(blk, x) + else: + x, attn = blk(x) + if not self.training: + attns.append(attn.unsqueeze(0)) + if self.downsample is not None: + x = self.downsample(x) + if not self.training: + attn = torch.cat(attns, dim=0) + attn = torch.mean(attn, dim=0) + return x, attn + + def extra_repr(self): + return f"dim={self.dim}, input_resolution={self.input_resolution}, depth={self.depth}" + + +# The Core of HTSAT +class HTSAT_Swin_Transformer(nn.Module): + r"""HTSAT based on the Swin Transformer + Args: + spec_size (int | tuple(int)): Input Spectrogram size. Default 256 + patch_size (int | tuple(int)): Patch size. Default: 4 + path_stride (iot | tuple(int)): Patch Stride for Frequency and Time Axis. Default: 4 + in_chans (int): Number of input image channels. Default: 1 (mono) + num_classes (int): Number of classes for classification head. Default: 527 + embed_dim (int): Patch embedding dimension. Default: 96 + depths (tuple(int)): Depth of each HTSAT-Swin Transformer layer. + num_heads (tuple(int)): Number of attention heads in different layers. + window_size (int): Window size. Default: 8 + mlp_ratio (float): Ratio of mlp hidden dim to embedding dim. Default: 4 + qkv_bias (bool): If True, add a learnable bias to query, key, value. Default: True + qk_scale (float): Override default qk scale of head_dim ** -0.5 if set. Default: None + drop_rate (float): Dropout rate. Default: 0 + attn_drop_rate (float): Attention dropout rate. Default: 0 + drop_path_rate (float): Stochastic depth rate. Default: 0.1 + norm_layer (nn.Module): Normalization layer. Default: nn.LayerNorm. + ape (bool): If True, add absolute position embedding to the patch embedding. Default: False + patch_norm (bool): If True, add normalization after patch embedding. Default: True + use_checkpoint (bool): Whether to use checkpointing to save memory. Default: False + config (module): The configuration Module from config.py + """ + + def __init__( + self, + spec_size=256, + patch_size=4, + patch_stride=(4, 4), + in_chans=1, + num_classes=527, + embed_dim=96, + depths=[2, 2, 6, 2], + num_heads=[4, 8, 16, 32], + window_size=8, + mlp_ratio=4.0, + qkv_bias=True, + qk_scale=None, + drop_rate=0.0, + attn_drop_rate=0.0, + drop_path_rate=0.1, + norm_layer=nn.LayerNorm, + ape=False, + patch_norm=True, + use_checkpoint=False, + norm_before_mlp="ln", + config=None, + enable_fusion=False, + fusion_type="None", + **kwargs, + ): + super(HTSAT_Swin_Transformer, self).__init__() + + self.config = config + self.spec_size = spec_size + self.patch_stride = patch_stride + self.patch_size = patch_size + self.window_size = window_size + self.embed_dim = embed_dim + self.depths = depths + self.ape = ape + self.in_chans = in_chans + self.num_classes = num_classes + self.num_heads = num_heads + self.num_layers = len(self.depths) + self.num_features = int(self.embed_dim * 2 ** (self.num_layers - 1)) + + self.drop_rate = drop_rate + self.attn_drop_rate = attn_drop_rate + self.drop_path_rate = drop_path_rate + + self.qkv_bias = qkv_bias + self.qk_scale = None + + self.patch_norm = patch_norm + self.norm_layer = norm_layer if self.patch_norm else None + self.norm_before_mlp = norm_before_mlp + self.mlp_ratio = mlp_ratio + + self.use_checkpoint = use_checkpoint + + self.enable_fusion = enable_fusion + self.fusion_type = fusion_type + + # process mel-spec ; used only once + self.freq_ratio = self.spec_size // self.config.mel_bins + window = "hann" + center = True + pad_mode = "reflect" + ref = 1.0 + amin = 1e-10 + top_db = None + self.interpolate_ratio = 32 # Downsampled ratio + # Spectrogram extractor + self.spectrogram_extractor = Spectrogram( + n_fft=config.window_size, + hop_length=config.hop_size, + win_length=config.window_size, + window=window, + center=center, + pad_mode=pad_mode, + freeze_parameters=True, + ) + # Logmel feature extractor + self.logmel_extractor = LogmelFilterBank( + sr=config.sample_rate, + n_fft=config.window_size, + n_mels=config.mel_bins, + fmin=config.fmin, + fmax=config.fmax, + ref=ref, + amin=amin, + top_db=top_db, + freeze_parameters=True, + ) + # Spec augmenter + self.spec_augmenter = SpecAugmentation( + time_drop_width=64, + time_stripes_num=2, + freq_drop_width=8, + freq_stripes_num=2, + ) # 2 2 + self.bn0 = nn.BatchNorm2d(self.config.mel_bins) + + # split spctrogram into non-overlapping patches + self.patch_embed = PatchEmbed( + img_size=self.spec_size, + patch_size=self.patch_size, + in_chans=self.in_chans, + embed_dim=self.embed_dim, + norm_layer=self.norm_layer, + patch_stride=patch_stride, + enable_fusion=self.enable_fusion, + fusion_type=self.fusion_type, + ) + + num_patches = self.patch_embed.num_patches + patches_resolution = self.patch_embed.grid_size + self.patches_resolution = patches_resolution + + # absolute position embedding + if self.ape: + self.absolute_pos_embed = nn.Parameter( + torch.zeros(1, num_patches, self.embed_dim) + ) + trunc_normal_(self.absolute_pos_embed, std=0.02) + + self.pos_drop = nn.Dropout(p=self.drop_rate) + + # stochastic depth + dpr = [ + x.item() for x in torch.linspace(0, self.drop_path_rate, sum(self.depths)) + ] # stochastic depth decay rule + + # build layers + self.layers = nn.ModuleList() + for i_layer in range(self.num_layers): + layer = BasicLayer( + dim=int(self.embed_dim * 2**i_layer), + input_resolution=( + patches_resolution[0] // (2**i_layer), + patches_resolution[1] // (2**i_layer), + ), + depth=self.depths[i_layer], + num_heads=self.num_heads[i_layer], + window_size=self.window_size, + mlp_ratio=self.mlp_ratio, + qkv_bias=self.qkv_bias, + qk_scale=self.qk_scale, + drop=self.drop_rate, + attn_drop=self.attn_drop_rate, + drop_path=dpr[ + sum(self.depths[:i_layer]) : sum(self.depths[: i_layer + 1]) + ], + norm_layer=self.norm_layer, + downsample=PatchMerging if (i_layer < self.num_layers - 1) else None, + use_checkpoint=use_checkpoint, + norm_before_mlp=self.norm_before_mlp, + ) + self.layers.append(layer) + + self.norm = self.norm_layer(self.num_features) + self.avgpool = nn.AdaptiveAvgPool1d(1) + self.maxpool = nn.AdaptiveMaxPool1d(1) + + SF = ( + self.spec_size + // (2 ** (len(self.depths) - 1)) + // self.patch_stride[0] + // self.freq_ratio + ) + self.tscam_conv = nn.Conv2d( + in_channels=self.num_features, + out_channels=self.num_classes, + kernel_size=(SF, 3), + padding=(0, 1), + ) + self.head = nn.Linear(num_classes, num_classes) + + if (self.enable_fusion) and ( + self.fusion_type in ["daf_1d", "aff_1d", "iaff_1d"] + ): + self.mel_conv1d = nn.Sequential( + nn.Conv1d(64, 64, kernel_size=5, stride=3, padding=2), + nn.BatchNorm1d(64), + ) + if self.fusion_type == "daf_1d": + self.fusion_model = DAF() + elif self.fusion_type == "aff_1d": + self.fusion_model = AFF(channels=64, type="1D") + elif self.fusion_type == "iaff_1d": + self.fusion_model = iAFF(channels=64, type="1D") + + self.apply(self._init_weights) + + def _init_weights(self, m): + if isinstance(m, nn.Linear): + trunc_normal_(m.weight, std=0.02) + if isinstance(m, nn.Linear) and m.bias is not None: + nn.init.constant_(m.bias, 0) + elif isinstance(m, nn.LayerNorm): + nn.init.constant_(m.bias, 0) + nn.init.constant_(m.weight, 1.0) + + @torch.jit.ignore + def no_weight_decay(self): + return {"absolute_pos_embed"} + + @torch.jit.ignore + def no_weight_decay_keywords(self): + return {"relative_position_bias_table"} + + def forward_features(self, x, longer_idx=None): + # A deprecated optimization for using a hierarchical output from different blocks + + frames_num = x.shape[2] + x = self.patch_embed(x, longer_idx=longer_idx) + if self.ape: + x = x + self.absolute_pos_embed + x = self.pos_drop(x) + for i, layer in enumerate(self.layers): + x, attn = layer(x) + # for x + x = self.norm(x) + B, N, C = x.shape + SF = frames_num // (2 ** (len(self.depths) - 1)) // self.patch_stride[0] + ST = frames_num // (2 ** (len(self.depths) - 1)) // self.patch_stride[1] + x = x.permute(0, 2, 1).contiguous().reshape(B, C, SF, ST) + B, C, F, T = x.shape + # group 2D CNN + c_freq_bin = F // self.freq_ratio + x = x.reshape(B, C, F // c_freq_bin, c_freq_bin, T) + x = x.permute(0, 1, 3, 2, 4).contiguous().reshape(B, C, c_freq_bin, -1) + # get latent_output + fine_grained_latent_output = torch.mean(x, dim=2) + fine_grained_latent_output = interpolate( + fine_grained_latent_output.permute(0, 2, 1).contiguous(), + 8 * self.patch_stride[1], + ) + + latent_output = self.avgpool(torch.flatten(x, 2)) + latent_output = torch.flatten(latent_output, 1) + + # display the attention map, if needed + + x = self.tscam_conv(x) + x = torch.flatten(x, 2) # B, C, T + + fpx = interpolate( + torch.sigmoid(x).permute(0, 2, 1).contiguous(), 8 * self.patch_stride[1] + ) + + x = self.avgpool(x) + x = torch.flatten(x, 1) + + output_dict = { + "framewise_output": fpx, # already sigmoided + "clipwise_output": torch.sigmoid(x), + "fine_grained_embedding": fine_grained_latent_output, + "embedding": latent_output, + } + + return output_dict + + def crop_wav(self, x, crop_size, spe_pos=None): + time_steps = x.shape[2] + tx = torch.zeros(x.shape[0], x.shape[1], crop_size, x.shape[3]).to(x.device) + for i in range(len(x)): + if spe_pos is None: + crop_pos = random.randint(0, time_steps - crop_size - 1) + else: + crop_pos = spe_pos + tx[i][0] = x[i, 0, crop_pos : crop_pos + crop_size, :] + return tx + + # Reshape the wavform to a img size, if you want to use the pretrained swin transformer model + def reshape_wav2img(self, x): + B, C, T, F = x.shape + target_T = int(self.spec_size * self.freq_ratio) + target_F = self.spec_size // self.freq_ratio + assert ( + T <= target_T and F <= target_F + ), "the wav size should less than or equal to the swin input size" + # to avoid bicubic zero error + if T < target_T: + x = nn.functional.interpolate( + x, (target_T, x.shape[3]), mode="bicubic", align_corners=True + ) + if F < target_F: + x = nn.functional.interpolate( + x, (x.shape[2], target_F), mode="bicubic", align_corners=True + ) + x = x.permute(0, 1, 3, 2).contiguous() + x = x.reshape( + x.shape[0], + x.shape[1], + x.shape[2], + self.freq_ratio, + x.shape[3] // self.freq_ratio, + ) + # print(x.shape) + x = x.permute(0, 1, 3, 2, 4).contiguous() + x = x.reshape(x.shape[0], x.shape[1], x.shape[2] * x.shape[3], x.shape[4]) + return x + + # Repeat the wavform to a img size, if you want to use the pretrained swin transformer model + def repeat_wat2img(self, x, cur_pos): + B, C, T, F = x.shape + target_T = int(self.spec_size * self.freq_ratio) + target_F = self.spec_size // self.freq_ratio + assert ( + T <= target_T and F <= target_F + ), "the wav size should less than or equal to the swin input size" + # to avoid bicubic zero error + if T < target_T: + x = nn.functional.interpolate( + x, (target_T, x.shape[3]), mode="bicubic", align_corners=True + ) + if F < target_F: + x = nn.functional.interpolate( + x, (x.shape[2], target_F), mode="bicubic", align_corners=True + ) + x = x.permute(0, 1, 3, 2).contiguous() # B C F T + x = x[:, :, :, cur_pos : cur_pos + self.spec_size] + x = x.repeat(repeats=(1, 1, 4, 1)) + return x + + def forward( + self, x: torch.Tensor, mixup_lambda=None, infer_mode=False, device=None + ): # out_feat_keys: List[str] = None): + + if self.enable_fusion and x["longer"].sum() == 0: + # if no audio is longer than 10s, then randomly select one audio to be longer + x["longer"][torch.randint(0, x["longer"].shape[0], (1,))] = True + + if not self.enable_fusion: + x = x["waveform"].to(device=device, non_blocking=True) + x = self.spectrogram_extractor(x) # (batch_size, 1, time_steps, freq_bins) + x = self.logmel_extractor(x) # (batch_size, 1, time_steps, mel_bins) + x = x.transpose(1, 3) + x = self.bn0(x) + x = x.transpose(1, 3) + if self.training: + x = self.spec_augmenter(x) + + if self.training and mixup_lambda is not None: + x = do_mixup(x, mixup_lambda) + + x = self.reshape_wav2img(x) + output_dict = self.forward_features(x) + else: + longer_list = x["longer"].to(device=device, non_blocking=True) + x = x["mel_fusion"].to(device=device, non_blocking=True) + x = x.transpose(1, 3) + x = self.bn0(x) + x = x.transpose(1, 3) + longer_list_idx = torch.where(longer_list)[0] + if self.fusion_type in ["daf_1d", "aff_1d", "iaff_1d"]: + new_x = x[:, 0:1, :, :].clone().contiguous() + if len(longer_list_idx) > 0: + # local processing + fusion_x_local = x[longer_list_idx, 1:, :, :].clone().contiguous() + FB, FC, FT, FF = fusion_x_local.size() + fusion_x_local = fusion_x_local.view(FB * FC, FT, FF) + fusion_x_local = torch.permute( + fusion_x_local, (0, 2, 1) + ).contiguous() + fusion_x_local = self.mel_conv1d(fusion_x_local) + fusion_x_local = fusion_x_local.view( + FB, FC, FF, fusion_x_local.size(-1) + ) + fusion_x_local = ( + torch.permute(fusion_x_local, (0, 2, 1, 3)) + .contiguous() + .flatten(2) + ) + if fusion_x_local.size(-1) < FT: + fusion_x_local = torch.cat( + [ + fusion_x_local, + torch.zeros( + (FB, FF, FT - fusion_x_local.size(-1)), + device=device, + ), + ], + dim=-1, + ) + else: + fusion_x_local = fusion_x_local[:, :, :FT] + # 1D fusion + new_x = new_x.squeeze(1).permute((0, 2, 1)).contiguous() + new_x[longer_list_idx] = self.fusion_model( + new_x[longer_list_idx], fusion_x_local + ) + x = new_x.permute((0, 2, 1)).contiguous()[:, None, :, :] + else: + x = new_x + + elif self.fusion_type in ["daf_2d", "aff_2d", "iaff_2d", "channel_map"]: + x = x # no change + + if self.training: + x = self.spec_augmenter(x) + if self.training and mixup_lambda is not None: + x = do_mixup(x, mixup_lambda) + + x = self.reshape_wav2img(x) + output_dict = self.forward_features(x, longer_idx=longer_list_idx) + + # if infer_mode: + # # in infer mode. we need to handle different length audio input + # frame_num = x.shape[2] + # target_T = int(self.spec_size * self.freq_ratio) + # repeat_ratio = math.floor(target_T / frame_num) + # x = x.repeat(repeats=(1,1,repeat_ratio,1)) + # x = self.reshape_wav2img(x) + # output_dict = self.forward_features(x) + # else: + # if x.shape[2] > self.freq_ratio * self.spec_size: + # if self.training: + # x = self.crop_wav(x, crop_size=self.freq_ratio * self.spec_size) + # x = self.reshape_wav2img(x) + # output_dict = self.forward_features(x) + # else: + # # Change: Hard code here + # overlap_size = (x.shape[2] - 1) // 4 + # output_dicts = [] + # crop_size = (x.shape[2] - 1) // 2 + # for cur_pos in range(0, x.shape[2] - crop_size - 1, overlap_size): + # tx = self.crop_wav(x, crop_size = crop_size, spe_pos = cur_pos) + # tx = self.reshape_wav2img(tx) + # output_dicts.append(self.forward_features(tx)) + # clipwise_output = torch.zeros_like(output_dicts[0]["clipwise_output"]).float().to(x.device) + # framewise_output = torch.zeros_like(output_dicts[0]["framewise_output"]).float().to(x.device) + # for d in output_dicts: + # clipwise_output += d["clipwise_output"] + # framewise_output += d["framewise_output"] + # clipwise_output = clipwise_output / len(output_dicts) + # framewise_output = framewise_output / len(output_dicts) + # output_dict = { + # 'framewise_output': framewise_output, + # 'clipwise_output': clipwise_output + # } + # else: # this part is typically used, and most easy one + # x = self.reshape_wav2img(x) + # output_dict = self.forward_features(x) + # x = self.head(x) + + # We process the data in the dataloader part, in that here we only consider the input_T < fixed_T + + return output_dict + + +def create_htsat_model(audio_cfg, enable_fusion=False, fusion_type="None"): + try: + + assert audio_cfg.model_name in [ + "tiny", + "base", + "large", + ], "model name for HTS-AT is wrong!" + if audio_cfg.model_name == "tiny": + model = HTSAT_Swin_Transformer( + spec_size=256, + patch_size=4, + patch_stride=(4, 4), + num_classes=audio_cfg.class_num, + embed_dim=96, + depths=[2, 2, 6, 2], + num_heads=[4, 8, 16, 32], + window_size=8, + config=audio_cfg, + enable_fusion=enable_fusion, + fusion_type=fusion_type, + ) + elif audio_cfg.model_name == "base": + model = HTSAT_Swin_Transformer( + spec_size=256, + patch_size=4, + patch_stride=(4, 4), + num_classes=audio_cfg.class_num, + embed_dim=128, + depths=[2, 2, 12, 2], + num_heads=[4, 8, 16, 32], + window_size=8, + config=audio_cfg, + enable_fusion=enable_fusion, + fusion_type=fusion_type, + ) + elif audio_cfg.model_name == "large": + model = HTSAT_Swin_Transformer( + spec_size=256, + patch_size=4, + patch_stride=(4, 4), + num_classes=audio_cfg.class_num, + embed_dim=256, + depths=[2, 2, 12, 2], + num_heads=[4, 8, 16, 32], + window_size=8, + config=audio_cfg, + enable_fusion=enable_fusion, + fusion_type=fusion_type, + ) + + return model + except: + raise RuntimeError( + f"Import Model for {audio_cfg.model_name} not found, or the audio cfg parameters are not enough." + ) diff --git a/picoaudio/audioldm/clap/open_clip/linear_probe.py b/picoaudio/audioldm/clap/open_clip/linear_probe.py new file mode 100644 index 0000000000000000000000000000000000000000..9d7e23b6b67a53e16d050d675a99d01d7d04d581 --- /dev/null +++ b/picoaudio/audioldm/clap/open_clip/linear_probe.py @@ -0,0 +1,66 @@ +import numpy as np +import torch.nn.functional as F +from torch import nn +from .model import MLPLayers + + +class LinearProbe(nn.Module): + def __init__(self, model, mlp, freeze, in_ch, out_ch, act=None): + """ + Args: + model: nn.Module + mlp: bool, if True, then use the MLP layer as the linear probe module + freeze: bool, if Ture, then freeze all the CLAP model's layers when training the linear probe + in_ch: int, the output channel from CLAP model + out_ch: int, the output channel from linear probe (class_num) + act: torch.nn.functional, the activation function before the loss function + """ + super().__init__() + in_ch = 512 + self.clap_model = model + self.clap_model.text_branch = None # to save memory + self.freeze = freeze + if mlp: + self.lp_layer = MLPLayers(units=[in_ch, in_ch * 2, out_ch]) + else: + self.lp_layer = nn.Linear(in_ch, out_ch) + + if self.freeze: + for param in self.clap_model.parameters(): + param.requires_grad = False + + if act == "None": + self.act = None + elif act == "relu": + self.act = nn.ReLU() + elif act == "elu": + self.act = nn.ELU() + elif act == "prelu": + self.act = nn.PReLU(num_parameters=in_ch) + elif act == "softmax": + self.act = nn.Softmax(dim=-1) + elif act == "sigmoid": + self.act = nn.Sigmoid() + + def forward(self, x, mix_lambda=None, device=None): + """ + Args: + x: waveform, torch.tensor [batch, t_samples] / batch of mel_spec and longer list + mix_lambda: torch.tensor [batch], the mixup lambda + Returns: + class_prob: torch.tensor [batch, class_num] + + """ + # batchnorm cancel grandient + if self.freeze: + self.clap_model.eval() + + x = self.clap_model.audio_projection( + self.clap_model.audio_branch(x, mixup_lambda=mix_lambda, device=device)[ + "embedding" + ] + ) + out = self.lp_layer(x) + if self.act is not None: + out = self.act(out) + return out diff --git a/picoaudio/audioldm/clap/open_clip/loss.py b/picoaudio/audioldm/clap/open_clip/loss.py new file mode 100644 index 0000000000000000000000000000000000000000..cc66298a14997da4aa2efc71e37c0a6bcda53fd1 --- /dev/null +++ b/picoaudio/audioldm/clap/open_clip/loss.py @@ -0,0 +1,398 @@ +from multiprocessing.sharedctypes import Value +import torch +import torch.distributed.nn +from torch import distributed as dist, nn as nn +from torch.nn import functional as F +import numpy as np +from sklearn.metrics import average_precision_score, roc_auc_score, accuracy_score + +try: + import horovod.torch as hvd +except ImportError: + hvd = None + + +def gather_features( + audio_features, + text_features, + audio_features_mlp=None, + text_features_mlp=None, + local_loss=False, + gather_with_grad=False, + rank=0, + world_size=1, + use_horovod=False, + mlp_loss=False, +): + if use_horovod: + assert hvd is not None, "Please install horovod" + if gather_with_grad: + all_audio_features = hvd.allgather(audio_features) + all_text_features = hvd.allgather(text_features) + if mlp_loss: + all_audio_features_mlp = hvd.allgather(audio_features_mlp) + all_text_features_mlp = hvd.allgather(text_features_mlp) + else: + with torch.no_grad(): + all_audio_features = hvd.allgather(audio_features) + all_text_features = hvd.allgather(text_features) + if mlp_loss: + all_audio_features_mlp = hvd.allgather(audio_features_mlp) + all_text_features_mlp = hvd.allgather(text_features_mlp) + if not local_loss: + # ensure grads for local rank when all_* features don't have a gradient + gathered_audio_features = list( + all_audio_features.chunk(world_size, dim=0) + ) + gathered_text_features = list( + all_text_features.chunk(world_size, dim=0) + ) + gathered_audio_features[rank] = audio_features + gathered_text_features[rank] = text_features + all_audio_features = torch.cat(gathered_audio_features, dim=0) + all_text_features = torch.cat(gathered_text_features, dim=0) + if mlp_loss: + gathered_audio_features_mlp = list( + all_audio_features_mlp.chunk(world_size, dim=0) + ) + gathered_text_features_mlp = list( + all_text_features_mlp.chunk(world_size, dim=0) + ) + gathered_audio_features_mlp[rank] = audio_features_mlp + gathered_text_features_mlp[rank] = text_features_mlp + all_audio_features_mlp = torch.cat( + gathered_audio_features_mlp, dim=0 + ) + all_text_features_mlp = torch.cat(gathered_text_features_mlp, dim=0) + else: + # We gather tensors from all gpus + if gather_with_grad: + all_audio_features = torch.cat( + torch.distributed.nn.all_gather(audio_features), dim=0 + ) + all_text_features = torch.cat( + torch.distributed.nn.all_gather(text_features), dim=0 + ) + if mlp_loss: + all_audio_features_mlp = torch.cat( + torch.distributed.nn.all_gather(audio_features_mlp), dim=0 + ) + all_text_features_mlp = torch.cat( + torch.distributed.nn.all_gather(text_features_mlp), dim=0 + ) + else: + gathered_audio_features = [ + torch.zeros_like(audio_features) for _ in range(world_size) + ] + gathered_text_features = [ + torch.zeros_like(text_features) for _ in range(world_size) + ] + dist.all_gather(gathered_audio_features, audio_features) + dist.all_gather(gathered_text_features, text_features) + if mlp_loss: + gathered_audio_features_mlp = [ + torch.zeros_like(audio_features_mlp) for _ in range(world_size) + ] + gathered_text_features_mlp = [ + torch.zeros_like(text_features_mlp) for _ in range(world_size) + ] + dist.all_gather(gathered_audio_features_mlp, audio_features_mlp) + dist.all_gather(gathered_text_features_mlp, text_features_mlp) + if not local_loss: + # ensure grads for local rank when all_* features don't have a gradient + gathered_audio_features[rank] = audio_features + gathered_text_features[rank] = text_features + if mlp_loss: + gathered_audio_features_mlp[rank] = audio_features_mlp + gathered_text_features_mlp[rank] = text_features_mlp + + all_audio_features = torch.cat(gathered_audio_features, dim=0) + all_text_features = torch.cat(gathered_text_features, dim=0) + if mlp_loss: + all_audio_features_mlp = torch.cat(gathered_audio_features_mlp, dim=0) + all_text_features_mlp = torch.cat(gathered_text_features_mlp, dim=0) + if mlp_loss: + return ( + all_audio_features, + all_text_features, + all_audio_features_mlp, + all_text_features_mlp, + ) + else: + return all_audio_features, all_text_features + + +class ClipLoss(nn.Module): + def __init__( + self, + local_loss=False, + gather_with_grad=False, + cache_labels=False, + rank=0, + world_size=1, + use_horovod=False, + mlp_loss=False, + weight_loss_kappa=0, + ): + super().__init__() + self.local_loss = local_loss + self.gather_with_grad = gather_with_grad + self.cache_labels = cache_labels + self.rank = rank + self.world_size = world_size + self.use_horovod = use_horovod + self.mlp_loss = mlp_loss + self.weighted_loss = bool(weight_loss_kappa != 0) + self.weight_loss_kappa = weight_loss_kappa + # cache state + self.prev_num_logits = 0 + self.labels = {} + + def forward( + self, + audio_features, + text_features, + logit_scale_a, + logit_scale_t=None, + audio_features_mlp=None, + text_features_mlp=None, + ): + device = audio_features.device + if self.mlp_loss: + if self.world_size > 1: + ( + all_audio_features, + all_text_features, + all_audio_features_mlp, + all_text_features_mlp, + ) = gather_features( + audio_features=audio_features, + text_features=text_features, + audio_features_mlp=audio_features_mlp, + text_features_mlp=text_features_mlp, + local_loss=self.local_loss, + gather_with_grad=self.gather_with_grad, + rank=self.rank, + world_size=self.world_size, + use_horovod=self.use_horovod, + mlp_loss=self.mlp_loss, + ) + if self.local_loss: + a_logits_per_audio = ( + logit_scale_a * audio_features @ all_text_features_mlp.T + ) + a_logits_per_text = ( + logit_scale_a * text_features_mlp @ all_audio_features.T + ) + t_logits_per_audio = ( + logit_scale_t * audio_features_mlp @ all_text_features.T + ) + t_logits_per_text = ( + logit_scale_t * text_features @ all_audio_features_mlp.T + ) + else: + a_logits_per_audio = ( + logit_scale_a * all_audio_features @ all_text_features_mlp.T + ) + a_logits_per_text = a_logits_per_audio.T + t_logits_per_audio = ( + logit_scale_t * all_audio_features_mlp @ all_text_features.T + ) + t_logits_per_text = t_logits_per_audio.T + else: + a_logits_per_audio = ( + logit_scale_a * audio_features @ text_features_mlp.T + ) + a_logits_per_text = logit_scale_a * text_features_mlp @ audio_features.T + t_logits_per_audio = ( + logit_scale_t * audio_features_mlp @ text_features.T + ) + t_logits_per_text = logit_scale_t * text_features @ audio_features_mlp.T + + # calculated ground-truth and cache if enabled + num_logits = a_logits_per_audio.shape[0] + if self.prev_num_logits != num_logits or device not in self.labels: + labels = torch.arange(num_logits, device=device, dtype=torch.long) + if self.world_size > 1 and self.local_loss: + labels = labels + num_logits * self.rank + if self.cache_labels: + self.labels[device] = labels + self.prev_num_logits = num_logits + else: + labels = self.labels[device] + + if not self.weighted_loss: + total_loss = ( + F.cross_entropy(a_logits_per_audio, labels) + + F.cross_entropy(a_logits_per_text, labels) + + F.cross_entropy(t_logits_per_audio, labels) + + F.cross_entropy(t_logits_per_text, labels) + ) / 4 + else: + audio_weight = (audio_features @ audio_features.T).detach() + audio_weight = ( + torch.exp( + torch.sum(audio_weight, axis=1) + / (self.weight_loss_kappa * len(audio_weight)) + ) + ).detach() + text_weight = (text_features @ text_features.T).detach() + text_weight = ( + torch.exp( + torch.sum(text_weight, axis=1) + / (self.weight_loss_kappa * len(text_features)) + ) + ).detach() + total_loss = ( + F.cross_entropy(a_logits_per_audio, labels, weight=audio_weight) + + F.cross_entropy(a_logits_per_text, labels, weight=audio_weight) + + F.cross_entropy(t_logits_per_audio, labels, weight=text_weight) + + F.cross_entropy(t_logits_per_text, labels, weight=text_weight) + ) / 4 + else: + if self.world_size > 1: + all_audio_features, all_text_features = gather_features( + audio_features=audio_features, + text_features=text_features, + local_loss=self.local_loss, + gather_with_grad=self.gather_with_grad, + rank=self.rank, + world_size=self.world_size, + use_horovod=self.use_horovod, + mlp_loss=self.mlp_loss, + ) + + if self.local_loss: + logits_per_audio = ( + logit_scale_a * audio_features @ all_text_features.T + ) + logits_per_text = ( + logit_scale_a * text_features @ all_audio_features.T + ) + else: + logits_per_audio = ( + logit_scale_a * all_audio_features @ all_text_features.T + ) + logits_per_text = logits_per_audio.T + else: + logits_per_audio = logit_scale_a * audio_features @ text_features.T + logits_per_text = logit_scale_a * text_features @ audio_features.T + + # calculated ground-truth and cache if enabled + num_logits = logits_per_audio.shape[0] + if self.prev_num_logits != num_logits or device not in self.labels: + labels = torch.arange(num_logits, device=device, dtype=torch.long) + if self.world_size > 1 and self.local_loss: + labels = labels + num_logits * self.rank + if self.cache_labels: + self.labels[device] = labels + self.prev_num_logits = num_logits + else: + labels = self.labels[device] + if not self.weighted_loss: + total_loss = ( + F.cross_entropy(logits_per_audio, labels) + + F.cross_entropy(logits_per_text, labels) + ) / 2 + else: + audio_weight = (all_audio_features @ all_audio_features.T).detach() + audio_weight = ( + torch.exp( + torch.sum(audio_weight, axis=1) + / (self.weight_loss_kappa * len(all_audio_features)) + ) + ).detach() + text_weight = (all_text_features @ all_text_features.T).detach() + text_weight = ( + torch.exp( + torch.sum(text_weight, axis=1) + / (self.weight_loss_kappa * len(all_text_features)) + ) + ).detach() + total_loss = ( + F.cross_entropy(logits_per_audio, labels, weight=text_weight) + + F.cross_entropy(logits_per_text, labels, weight=audio_weight) + ) / 2 + return total_loss + + +def lp_gather_features(pred, target, world_size=1, use_horovod=False): + if use_horovod: + assert hvd is not None, "Please install horovod" + with torch.no_grad(): + all_preds = hvd.allgather(pred) + all_targets = hvd.allgath(target) + else: + gathered_preds = [torch.zeros_like(pred) for _ in range(world_size)] + gathered_targets = [torch.zeros_like(target) for _ in range(world_size)] + + dist.all_gather(gathered_preds, pred) + dist.all_gather(gathered_targets, target) + all_preds = torch.cat(gathered_preds, dim=0) + all_targets = torch.cat(gathered_targets, dim=0) + + return all_preds, all_targets + + +def get_map(pred, target): + pred = torch.sigmoid(pred).numpy() + target = target.numpy() + return np.mean(average_precision_score(target, pred, average=None)) + + +def get_acc(pred, target): + pred = torch.argmax(pred, 1).numpy() + target = torch.argmax(target, 1).numpy() + return accuracy_score(target, pred) + + +def get_mauc(pred, target): + pred = torch.sigmoid(pred).numpy() + target = target.numpy() + return np.mean(roc_auc_score(target, pred, average=None)) + + +class LPMetrics(object): + def __init__(self, metric_names=["map", "acc", "mauc"]): + self.metrics = [] + for name in metric_names: + self.metrics.append(self.get_metric(name)) + self.metric_names = metric_names + + def get_metric(self, name): + if name == "map": + return get_map + elif name == "acc": + return get_acc + elif name == "mauc": + return get_mauc + else: + raise ValueError(f"the metric should be at least one of [map, acc, mauc]") + + def evaluate_mertics(self, pred, target): + metric_dict = {} + for i in range(len(self.metric_names)): + metric_dict[self.metric_names[i]] = self.metrics[i](pred, target) + return metric_dict + + +def calc_celoss(pred, target): + target = torch.argmax(target, 1).long() + return nn.CrossEntropyLoss()(pred, target) + + +class LPLoss(nn.Module): + def __init__(self, loss_name): + super().__init__() + if loss_name == "bce": + self.loss_func = nn.BCEWithLogitsLoss() + elif loss_name == "ce": + self.loss_func = calc_celoss + elif loss_name == "mse": + self.loss_func = nn.MSELoss() + else: + raise ValueError(f"the loss func should be at least one of [bce, ce, mse]") + + def forward(self, pred, target): + loss = self.loss_func(pred, target) + return loss diff --git a/picoaudio/audioldm/clap/open_clip/model.py b/picoaudio/audioldm/clap/open_clip/model.py new file mode 100644 index 0000000000000000000000000000000000000000..b439244f8c293a0b4263b7ac1fd553e9d0adf184 --- /dev/null +++ b/picoaudio/audioldm/clap/open_clip/model.py @@ -0,0 +1,936 @@ +""" CLAP Model + +Adapted from CLIP: https://github.com/openai/CLIP. Originally MIT License, Copyright (c) 2021 OpenAI. +Adapted to the Audio Task. +""" + +from collections import OrderedDict +from dataclasses import dataclass +from email.mime import audio +from typing import Tuple, Union, Callable, Optional + +import numpy as np +import torch +import torch.nn.functional as F +from torch import nn + +from .timm_model import TimmModel +import logging +from .utils import freeze_batch_norm_2d + +from .pann_model import create_pann_model +from .htsat import create_htsat_model +from transformers import BertModel, RobertaModel, BartModel +from transformers.tokenization_utils_base import BatchEncoding + + +class MLPLayers(nn.Module): + def __init__(self, units=[512, 512, 512], nonlin=nn.ReLU(), dropout=0.1): + super(MLPLayers, self).__init__() + self.nonlin = nonlin + self.dropout = dropout + + sequence = [] + for u0, u1 in zip(units[:-1], units[1:]): + sequence.append(nn.Linear(u0, u1)) + sequence.append(self.nonlin) + sequence.append(nn.Dropout(self.dropout)) + sequence = sequence[:-2] + + self.sequential = nn.Sequential(*sequence) + + def forward(self, X): + X = self.sequential(X) + return X + + +class Bottleneck(nn.Module): + expansion = 4 + + def __init__(self, inplanes, planes, stride=1): + super().__init__() + + # all conv layers have stride 1. an avgpool is performed after the second convolution when stride > 1 + self.conv1 = nn.Conv2d(inplanes, planes, 1, bias=False) + self.bn1 = nn.BatchNorm2d(planes) + + self.conv2 = nn.Conv2d(planes, planes, 3, padding=1, bias=False) + self.bn2 = nn.BatchNorm2d(planes) + + self.avgpool = nn.AvgPool2d(stride) if stride > 1 else nn.Identity() + + self.conv3 = nn.Conv2d(planes, planes * self.expansion, 1, bias=False) + self.bn3 = nn.BatchNorm2d(planes * self.expansion) + + self.relu = nn.ReLU(inplace=True) + self.downsample = None + self.stride = stride + + if stride > 1 or inplanes != planes * Bottleneck.expansion: + # downsampling layer is prepended with an avgpool, and the subsequent convolution has stride 1 + self.downsample = nn.Sequential( + OrderedDict( + [ + ("-1", nn.AvgPool2d(stride)), + ( + "0", + nn.Conv2d( + inplanes, + planes * self.expansion, + 1, + stride=1, + bias=False, + ), + ), + ("1", nn.BatchNorm2d(planes * self.expansion)), + ] + ) + ) + + def forward(self, x: torch.Tensor): + identity = x + + out = self.relu(self.bn1(self.conv1(x))) + out = self.relu(self.bn2(self.conv2(out))) + out = self.avgpool(out) + out = self.bn3(self.conv3(out)) + + if self.downsample is not None: + identity = self.downsample(x) + + out += identity + out = self.relu(out) + return out + + +class AttentionPool2d(nn.Module): + def __init__( + self, spacial_dim: int, embed_dim: int, num_heads: int, output_dim: int = None + ): + super().__init__() + self.positional_embedding = nn.Parameter( + torch.randn(spacial_dim**2 + 1, embed_dim) / embed_dim**0.5 + ) + self.k_proj = nn.Linear(embed_dim, embed_dim) + self.q_proj = nn.Linear(embed_dim, embed_dim) + self.v_proj = nn.Linear(embed_dim, embed_dim) + self.c_proj = nn.Linear(embed_dim, output_dim or embed_dim) + self.num_heads = num_heads + + def forward(self, x): + x = x.reshape(x.shape[0], x.shape[1], x.shape[2] * x.shape[3]).permute( + 2, 0, 1 + ) # NCHW -> (HW)NC + x = torch.cat([x.mean(dim=0, keepdim=True), x], dim=0) # (HW+1)NC + x = x + self.positional_embedding[:, None, :].to(x.dtype) # (HW+1)NC + x, _ = F.multi_head_attention_forward( + query=x, + key=x, + value=x, + embed_dim_to_check=x.shape[-1], + num_heads=self.num_heads, + q_proj_weight=self.q_proj.weight, + k_proj_weight=self.k_proj.weight, + v_proj_weight=self.v_proj.weight, + in_proj_weight=None, + in_proj_bias=torch.cat( + [self.q_proj.bias, self.k_proj.bias, self.v_proj.bias] + ), + bias_k=None, + bias_v=None, + add_zero_attn=False, + dropout_p=0, + out_proj_weight=self.c_proj.weight, + out_proj_bias=self.c_proj.bias, + use_separate_proj_weight=True, + training=self.training, + need_weights=False, + ) + + return x[0] + + +class ModifiedResNet(nn.Module): + """ + A ResNet class that is similar to torchvision's but contains the following changes: + - There are now 3 "stem" convolutions as opposed to 1, with an average pool instead of a max pool. + - Performs anti-aliasing strided convolutions, where an avgpool is prepended to convolutions with stride > 1 + - The final pooling layer is a QKV attention instead of an average pool + """ + + def __init__(self, layers, output_dim, heads, image_size=224, width=64): + super().__init__() + self.output_dim = output_dim + self.image_size = image_size + + # the 3-layer stem + self.conv1 = nn.Conv2d( + 3, width // 2, kernel_size=3, stride=2, padding=1, bias=False + ) + self.bn1 = nn.BatchNorm2d(width // 2) + self.conv2 = nn.Conv2d( + width // 2, width // 2, kernel_size=3, padding=1, bias=False + ) + self.bn2 = nn.BatchNorm2d(width // 2) + self.conv3 = nn.Conv2d(width // 2, width, kernel_size=3, padding=1, bias=False) + self.bn3 = nn.BatchNorm2d(width) + self.avgpool = nn.AvgPool2d(2) + self.relu = nn.ReLU(inplace=True) + + # residual layers + self._inplanes = width # this is a *mutable* variable used during construction + self.layer1 = self._make_layer(width, layers[0]) + self.layer2 = self._make_layer(width * 2, layers[1], stride=2) + self.layer3 = self._make_layer(width * 4, layers[2], stride=2) + self.layer4 = self._make_layer(width * 8, layers[3], stride=2) + + embed_dim = width * 32 # the ResNet feature dimension + self.attnpool = AttentionPool2d(image_size // 32, embed_dim, heads, output_dim) + + self.init_parameters() + + def _make_layer(self, planes, blocks, stride=1): + layers = [Bottleneck(self._inplanes, planes, stride)] + + self._inplanes = planes * Bottleneck.expansion + for _ in range(1, blocks): + layers.append(Bottleneck(self._inplanes, planes)) + + return nn.Sequential(*layers) + + def init_parameters(self): + if self.attnpool is not None: + std = self.attnpool.c_proj.in_features**-0.5 + nn.init.normal_(self.attnpool.q_proj.weight, std=std) + nn.init.normal_(self.attnpool.k_proj.weight, std=std) + nn.init.normal_(self.attnpool.v_proj.weight, std=std) + nn.init.normal_(self.attnpool.c_proj.weight, std=std) + + for resnet_block in [self.layer1, self.layer2, self.layer3, self.layer4]: + for name, param in resnet_block.named_parameters(): + if name.endswith("bn3.weight"): + nn.init.zeros_(param) + + def lock(self, unlocked_groups=0, freeze_bn_stats=False): + assert ( + unlocked_groups == 0 + ), "partial locking not currently supported for this model" + for param in self.parameters(): + param.requires_grad = False + if freeze_bn_stats: + freeze_batch_norm_2d(self) + + def stem(self, x): + for conv, bn in [ + (self.conv1, self.bn1), + (self.conv2, self.bn2), + (self.conv3, self.bn3), + ]: + x = self.relu(bn(conv(x))) + x = self.avgpool(x) + return x + + def forward(self, x): + x = self.stem(x) + x = self.layer1(x) + x = self.layer2(x) + x = self.layer3(x) + x = self.layer4(x) + x = self.attnpool(x) + + return x + + +class LayerNorm(nn.LayerNorm): + """Subclass torch's LayerNorm to handle fp16.""" + + def forward(self, x: torch.Tensor): + orig_type = x.dtype + x = F.layer_norm(x, self.normalized_shape, self.weight, self.bias, self.eps) + return x.to(orig_type) + + +class QuickGELU(nn.Module): + # NOTE This is slower than nn.GELU or nn.SiLU and uses more GPU memory + def forward(self, x: torch.Tensor): + return x * torch.sigmoid(1.702 * x) + + +class ResidualAttentionBlock(nn.Module): + def __init__(self, d_model: int, n_head: int, act_layer: Callable = nn.GELU): + super().__init__() + + self.attn = nn.MultiheadAttention(d_model, n_head) + self.ln_1 = LayerNorm(d_model) + self.mlp = nn.Sequential( + OrderedDict( + [ + ("c_fc", nn.Linear(d_model, d_model * 4)), + ("gelu", act_layer()), + ("c_proj", nn.Linear(d_model * 4, d_model)), + ] + ) + ) + self.ln_2 = LayerNorm(d_model) + + def attention(self, x: torch.Tensor, attn_mask: Optional[torch.Tensor] = None): + return self.attn(x, x, x, need_weights=False, attn_mask=attn_mask)[0] + + def forward(self, x: torch.Tensor, attn_mask: Optional[torch.Tensor] = None): + x = x + self.attention(self.ln_1(x), attn_mask=attn_mask) + x = x + self.mlp(self.ln_2(x)) + return x + + +class Transformer(nn.Module): + def __init__( + self, width: int, layers: int, heads: int, act_layer: Callable = nn.GELU + ): + super().__init__() + self.width = width + self.layers = layers + self.resblocks = nn.ModuleList( + [ + ResidualAttentionBlock(width, heads, act_layer=act_layer) + for _ in range(layers) + ] + ) + + def forward(self, x: torch.Tensor, attn_mask: Optional[torch.Tensor] = None): + for r in self.resblocks: + x = r(x, attn_mask=attn_mask) + return x + + +class VisualTransformer(nn.Module): + def __init__( + self, + image_size: int, + patch_size: int, + width: int, + layers: int, + heads: int, + output_dim: int, + act_layer: Callable = nn.GELU, + ): + super().__init__() + self.image_size = image_size + self.output_dim = output_dim + self.conv1 = nn.Conv2d( + in_channels=3, + out_channels=width, + kernel_size=patch_size, + stride=patch_size, + bias=False, + ) + + scale = width**-0.5 + self.class_embedding = nn.Parameter(scale * torch.randn(width)) + self.positional_embedding = nn.Parameter( + scale * torch.randn((image_size // patch_size) ** 2 + 1, width) + ) + self.ln_pre = LayerNorm(width) + + self.text_branch = Transformer(width, layers, heads, act_layer=act_layer) + + self.ln_post = LayerNorm(width) + self.proj = nn.Parameter(scale * torch.randn(width, output_dim)) + + def lock(self, unlocked_groups=0, freeze_bn_stats=False): + assert ( + unlocked_groups == 0 + ), "partial locking not currently supported for this model" + for param in self.parameters(): + param.requires_grad = False + + def forward(self, x: torch.Tensor): + x = self.conv1(x) # shape = [*, width, grid, grid] + x = x.reshape(x.shape[0], x.shape[1], -1) # shape = [*, width, grid ** 2] + x = x.permute(0, 2, 1) # shape = [*, grid ** 2, width] + x = torch.cat( + [ + self.class_embedding.to(x.dtype) + + torch.zeros( + x.shape[0], 1, x.shape[-1], dtype=x.dtype, device=x.device + ), + x, + ], + dim=1, + ) # shape = [*, grid ** 2 + 1, width] + x = x + self.positional_embedding.to(x.dtype) + x = self.ln_pre(x) + + x = x.permute(1, 0, 2) # NLD -> LND + x = self.text_branch(x) + x = x.permute(1, 0, 2) # LND -> NLD + + x = self.ln_post(x[:, 0, :]) + + if self.proj is not None: + x = x @ self.proj + + return x + + +@dataclass +class CLAPVisionCfg: + layers: Union[Tuple[int, int, int, int], int] = 12 + width: int = 768 + patch_size: int = 16 + image_size: Union[Tuple[int, int], int] = 224 + timm_model_name: str = ( + None # a valid model name overrides layers, width, patch_size + ) + timm_model_pretrained: bool = ( + False # use (imagenet) pretrained weights for named model + ) + timm_pool: str = ( + "avg" # feature pooling for timm model ('abs_attn', 'rot_attn', 'avg', '') + ) + timm_proj: str = ( + "linear" # linear projection for timm model output ('linear', 'mlp', '') + ) + + +# Audio Config Class +@dataclass +class CLAPAudioCfp: + model_type: str = "PANN" + model_name: str = "Cnn14" + sample_rate: int = 48000 + # Param + audio_length: int = 1024 + window_size: int = 1024 + hop_size: int = 1024 + fmin: int = 50 + fmax: int = 14000 + class_num: int = 527 + mel_bins: int = 64 + clip_samples: int = 480000 + + +@dataclass +class CLAPTextCfg: + context_length: int + vocab_size: int + width: int + heads: int + layers: int + model_type: str + + +class CLAP(nn.Module): + def __init__( + self, + embed_dim: int, + audio_cfg: CLAPAudioCfp, + text_cfg: CLAPTextCfg, + quick_gelu: bool = False, + enable_fusion: bool = False, + fusion_type: str = "None", + joint_embed_shape: int = 512, + mlp_act: str = "relu", + ): + super().__init__() + if isinstance(audio_cfg, dict): + audio_cfg = CLAPAudioCfp(**audio_cfg) + if isinstance(text_cfg, dict): + text_cfg = CLAPTextCfg(**text_cfg) + + self.audio_cfg = audio_cfg + self.text_cfg = text_cfg + self.enable_fusion = enable_fusion + self.fusion_type = fusion_type + self.joint_embed_shape = joint_embed_shape + self.mlp_act = mlp_act + + self.context_length = text_cfg.context_length + + # OpenAI models are pretrained w/ QuickGELU but native nn.GELU is both faster and more + # memory efficient in recent PyTorch releases (>= 1.10). + # NOTE: timm models always use native GELU regardless of quick_gelu flag. + act_layer = QuickGELU if quick_gelu else nn.GELU + + if mlp_act == "relu": + mlp_act_layer = nn.ReLU() + elif mlp_act == "gelu": + mlp_act_layer = nn.GELU() + else: + raise NotImplementedError + + # audio branch + # audio branch parameters + if audio_cfg.model_type == "PANN": + self.audio_branch = create_pann_model(audio_cfg, enable_fusion, fusion_type) + elif audio_cfg.model_type == "HTSAT": + self.audio_branch = create_htsat_model( + audio_cfg, enable_fusion, fusion_type + ) + else: + logging.error(f"Model config for {audio_cfg.model_type} not found") + raise RuntimeError(f"Model config for {audio_cfg.model_type} not found.") + + # text branch + # text branch parameters + if text_cfg.model_type == "transformer": + self.text_branch = Transformer( + width=text_cfg.width, + layers=text_cfg.layers, + heads=text_cfg.heads, + act_layer=act_layer, + ) + self.vocab_size = text_cfg.vocab_size + self.token_embedding = nn.Embedding(text_cfg.vocab_size, text_cfg.width) + self.positional_embedding = nn.Parameter( + torch.empty(self.context_length, text_cfg.width) + ) + self.ln_final = LayerNorm(text_cfg.width) + self.text_transform = MLPLayers( + units=[ + self.joint_embed_shape, + self.joint_embed_shape, + self.joint_embed_shape, + ], + dropout=0.1, + ) + self.text_projection = nn.Sequential( + nn.Linear(text_cfg.width, self.joint_embed_shape), + mlp_act_layer, + nn.Linear(self.joint_embed_shape, self.joint_embed_shape), + ) + elif text_cfg.model_type == "bert": + self.text_branch = BertModel.from_pretrained("bert-base-uncased") + self.text_transform = MLPLayers( + units=[ + self.joint_embed_shape, + self.joint_embed_shape, + self.joint_embed_shape, + ], + dropout=0.1, + ) + self.text_projection = nn.Sequential( + nn.Linear(768, self.joint_embed_shape), + mlp_act_layer, + nn.Linear(self.joint_embed_shape, self.joint_embed_shape), + ) + elif text_cfg.model_type == "roberta": + self.text_branch = RobertaModel.from_pretrained("roberta-base") + self.text_transform = MLPLayers( + units=[ + self.joint_embed_shape, + self.joint_embed_shape, + self.joint_embed_shape, + ], + dropout=0.1, + ) + self.text_projection = nn.Sequential( + nn.Linear(768, self.joint_embed_shape), + mlp_act_layer, + nn.Linear(self.joint_embed_shape, self.joint_embed_shape), + ) + elif text_cfg.model_type == "bart": + self.text_branch = BartModel.from_pretrained("facebook/bart-base") + self.text_transform = MLPLayers( + units=[ + self.joint_embed_shape, + self.joint_embed_shape, + self.joint_embed_shape, + ], + dropout=0.1, + ) + self.text_projection = nn.Sequential( + nn.Linear(768, self.joint_embed_shape), + mlp_act_layer, + nn.Linear(self.joint_embed_shape, self.joint_embed_shape), + ) + else: + logging.error(f"Model config for {text_cfg.model_type} not found") + raise RuntimeError(f"Model config for {text_cfg.model_type} not found.") + self.text_branch_type = text_cfg.model_type + # text branch parameters + + # audio branch parameters + self.audio_transform = MLPLayers( + units=[ + self.joint_embed_shape, + self.joint_embed_shape, + self.joint_embed_shape, + ], + dropout=0.1, + ) + + # below here is text branch parameters + + # ============================================================================================================ + self.audio_projection = nn.Sequential( + nn.Linear(embed_dim, self.joint_embed_shape), + mlp_act_layer, + nn.Linear(self.joint_embed_shape, self.joint_embed_shape), + ) + + self.logit_scale_a = nn.Parameter(torch.ones([]) * np.log(1 / 0.07)) + self.logit_scale_t = nn.Parameter(torch.ones([]) * np.log(1 / 0.07)) + self.register_buffer("attn_mask", self.build_attention_mask(), persistent=False) + + self.init_text_branch_parameters() + + def init_text_branch_parameters(self): + if self.text_branch_type == "transformer": + nn.init.normal_(self.token_embedding.weight, std=0.02) + nn.init.normal_(self.positional_embedding, std=0.01) + proj_std = (self.text_branch.width**-0.5) * ( + (2 * self.text_branch.layers) ** -0.5 + ) + attn_std = self.text_branch.width**-0.5 + fc_std = (2 * self.text_branch.width) ** -0.5 + for block in self.text_branch.resblocks: + nn.init.normal_(block.attn.in_proj_weight, std=attn_std) + nn.init.normal_(block.attn.out_proj.weight, std=proj_std) + nn.init.normal_(block.mlp.c_fc.weight, std=fc_std) + nn.init.normal_(block.mlp.c_proj.weight, std=proj_std) + if self.text_branch_type == "bert" or self.text_branch_type == "roberta": + width = self.text_branch.embeddings.word_embeddings.weight.shape[-1] + elif self.text_branch_type == "bart": + width = self.text_branch.shared.weight.shape[-1] + else: + width = self.text_branch.width + nn.init.constant_(self.logit_scale_a, np.log(1 / 0.07)) + nn.init.constant_(self.logit_scale_t, np.log(1 / 0.07)) + + # deprecated + # if hasattr(self.visual, 'init_parameters'): + # self.visual.init_parameters() + + # if self.text_projection is not None: + # nn.init.normal_(self.text_projection, std=width**-0.5) + + def build_attention_mask(self): + # lazily create causal attention mask, with full attention between the vision tokens + # pytorch uses additive attention mask; fill with -inf + mask = torch.empty(self.context_length, self.context_length) + mask.fill_(float("-inf")) + mask.triu_(1) # zero out the lower diagonal + return mask + + def encode_audio(self, audio, device): + return self.audio_branch( + audio, mixup_lambda=None, device=device + ) # mix lambda needs to add + + # def list_of_dict_of_tensor2dict_of_tensor(self, x, device): + # tmp = {} + # for k in x[0].keys(): + # tmp[k] = [] + # for i in range(len(x)): + # tmp[k].append(x[i][k][:77]) + # for k in x[0].keys(): + # tmp[k] = torch.tensor(tmp[k]).to(device=device, non_blocking=True) + # return tmp + + def encode_text(self, text, device): + if self.text_branch_type == "transformer": + text = text.to(device=device, non_blocking=True) + x = self.token_embedding(text) # [batch_size, n_ctx, d_model] + + x = x + self.positional_embedding + x = x.permute(1, 0, 2) # NLD -> LND + x = self.text_branch(x, attn_mask=self.attn_mask) + x = x.permute(1, 0, 2) # LND -> NLD + x = self.ln_final(x) + + # x.shape = [batch_size, n_ctx, transformer.width] + # take features from the eot embedding (eot_token is the highest number in each sequence) + x = self.text_projection(x[torch.arange(x.shape[0]), text.argmax(dim=-1)]) + elif self.text_branch_type == "bert": + # text = self.list_of_dict_of_tensor2dict_of_tensor(text, device) + # text = BatchEncoding(text) + x = self.text_branch( + input_ids=text["input_ids"].to(device=device, non_blocking=True), + attention_mask=text["attention_mask"].to( + device=device, non_blocking=True + ), + token_type_ids=text["token_type_ids"].to( + device=device, non_blocking=True + ), + )["pooler_output"] + x = self.text_projection(x) + elif self.text_branch_type == "roberta": + x = self.text_branch( + input_ids=text["input_ids"].to(device=device, non_blocking=True), + attention_mask=text["attention_mask"].to( + device=device, non_blocking=True + ), + )["pooler_output"] + x = self.text_projection(x) + elif self.text_branch_type == "bart": + x = torch.mean( + self.text_branch( + input_ids=text["input_ids"].to(device=device, non_blocking=True), + attention_mask=text["attention_mask"].to( + device=device, non_blocking=True + ), + )["encoder_last_hidden_state"], + axis=1, + ) + x = self.text_projection(x) + else: + logging.error(f"Model type {self.text_branch_type} not found") + raise RuntimeError(f"Model type {self.text_branch_type} not found.") + return x + + def forward(self, audio, text, device=None): + """Forward audio and text into the CLAP + + Parameters + ---------- + audio: torch.Tensor (batch_size, audio_length) + the time-domain audio input / the batch of mel_spec and longer list. + text: torch.Tensor () // need to add + the text token input + """ + if device is None: + if audio is not None: + device = audio.device + elif text is not None: + device = text.device + if audio is None and text is None: + # a hack to get the logit scale + return self.logit_scale_a.exp(), self.logit_scale_t.exp() + elif audio is None: + return self.encode_text(text, device=device) + elif text is None: + return self.audio_projection( + self.encode_audio(audio, device=device)["embedding"] + ) + audio_features = self.audio_projection( + self.encode_audio(audio, device=device)["embedding"] + ) + audio_features = F.normalize(audio_features, dim=-1) + + text_features = self.encode_text(text, device=device) + # print("text_features", text_features) + # print("text_features.shape", text_features.shape) + # print("text_features.type", type(text_features)) + text_features = F.normalize(text_features, dim=-1) + + audio_features_mlp = self.audio_transform(audio_features) + text_features_mlp = self.text_transform(text_features) + # Four outputs: audio features (basic & MLP), text features (basic & MLP) + return ( + audio_features, + text_features, + audio_features_mlp, + text_features_mlp, + self.logit_scale_a.exp(), + self.logit_scale_t.exp(), + ) + + def get_logit_scale(self): + return self.logit_scale_a.exp(), self.logit_scale_t.exp() + + def get_text_embedding(self, data): + """Get the text embedding from the model + + Parameters + ---------- + data: torch.Tensor + a tensor of text embedding + + Returns + ---------- + text_embed: torch.Tensor + a tensor of text_embeds (N, D) + + """ + device = next(self.parameters()).device + for k in data: + data[k] = data[k].to(device) + if len(data[k].size()) < 2: + data[k] = data[k].unsqueeze(0) + text_embeds = self.encode_text(data, device=device) + text_embeds = F.normalize(text_embeds, dim=-1) + + return text_embeds + + def get_audio_embedding(self, data): + """Get the audio embedding from the model + + Parameters + ---------- + data: a list of dict + the audio input dict list from 'get_audio_feature' method + + Returns + ---------- + audio_embed: torch.Tensor + a tensor of audio_embeds (N, D) + + """ + device = next(self.parameters()).device + input_dict = {} + keys = data[0].keys() + for k in keys: + input_dict[k] = torch.cat([d[k].unsqueeze(0) for d in data], dim=0).to( + device + ) + + audio_embeds = self.audio_projection( + self.encode_audio(input_dict, device=device)["embedding"] + ) + audio_embeds = F.normalize(audio_embeds, dim=-1) + + return audio_embeds + + def audio_infer(self, audio, hopsize=None, device=None): + """Forward one audio and produce the audio embedding + + Parameters + ---------- + audio: (audio_length) + the time-domain audio input, notice that it must be only one input + hopsize: int + the overlap hopsize as the sliding window + + Returns + ---------- + output_dict: { + key: [n, (embedding_shape)] if "HTS-AT" + or + key: [(embedding_shape)] if "PANN" + } + the list of key values of the audio branch + + """ + + assert not self.training, "the inference mode must be run at eval stage" + output_dict = {} + # PANN + if self.audio_cfg.model_type == "PANN": + audio_input = audio.unsqueeze(dim=0) + output_dict[key] = self.encode_audio(audio_input, device=device)[ + key + ].squeeze(dim=0) + elif self.audio_cfg.model_type == "HTSAT": + # repeat + audio_len = len(audio) + k = self.audio_cfg.clip_samples // audio_len + if k > 1: + audio = audio.repeat(k) + audio_len = len(audio) + + if hopsize is None: + hopsize = min(hopsize, audio_len) + + if audio_len > self.audio_cfg.clip_samples: + audio_input = [ + audio[pos : pos + self.audio_cfg.clip_samples].clone() + for pos in range( + 0, audio_len - self.audio_cfg.clip_samples, hopsize + ) + ] + audio_input.append(audio[-self.audio_cfg.clip_samples :].clone()) + audio_input = torch.stack(audio_input) + output_dict[key] = self.encode_audio(audio_input, device=device)[key] + else: + audio_input = audio.unsqueeze(dim=0) + output_dict[key] = self.encode_audio(audio_input, device=device)[ + key + ].squeeze(dim=0) + + return output_dict + + +def convert_weights_to_fp16(model: nn.Module): + """Convert applicable model parameters to fp16""" + + def _convert_weights_to_fp16(l): + if isinstance(l, (nn.Conv1d, nn.Conv2d, nn.Linear)): + l.weight.data = l.weight.data.half() + if l.bias is not None: + l.bias.data = l.bias.data.half() + + if isinstance(l, nn.MultiheadAttention): + for attr in [ + *[f"{s}_proj_weight" for s in ["in", "q", "k", "v"]], + "in_proj_bias", + "bias_k", + "bias_v", + ]: + tensor = getattr(l, attr) + if tensor is not None: + tensor.data = tensor.data.half() + + for name in ["text_projection", "proj"]: + if hasattr(l, name): + attr = getattr(l, name) + if attr is not None: + attr.data = attr.data.half() + + model.apply(_convert_weights_to_fp16) + + +# Ignore the state dict of the vision part +def build_model_from_openai_state_dict( + state_dict: dict, model_cfg, enable_fusion: bool = False, fusion_type: str = "None" +): + + embed_dim = model_cfg["embed_dim"] + audio_cfg = model_cfg["audio_cfg"] + text_cfg = model_cfg["text_cfg"] + context_length = state_dict["positional_embedding"].shape[0] + vocab_size = state_dict["token_embedding.weight"].shape[0] + transformer_width = state_dict["ln_final.weight"].shape[0] + transformer_heads = transformer_width // 64 + transformer_layers = len( + set( + k.split(".")[2] + for k in state_dict + if k.startswith(f"transformer.resblocks") + ) + ) + + audio_cfg = CLAPAudioCfp(**audio_cfg) + text_cfg = CLAPTextCfg(**text_cfg) + + model = CLAP( + embed_dim, + audio_cfg=audio_cfg, + text_cfg=text_cfg, + quick_gelu=True, # OpenAI models were trained with QuickGELU + enable_fusion=enable_fusion, + fusion_type=fusion_type, + ) + state_dict["logit_scale_a"] = state_dict["logit_scale"] + state_dict["logit_scale_t"] = state_dict["logit_scale"] + pop_keys = list(state_dict.keys())[::] + # pop the visual branch saved weights + for key in pop_keys: + if key.startswith("visual."): + state_dict.pop(key, None) + + for key in ["logit_scale", "input_resolution", "context_length", "vocab_size"]: + state_dict.pop(key, None) + + # not use fp16 + # convert_weights_to_fp16(model) + model.load_state_dict(state_dict, strict=False) + return model.eval() + + +def trace_model(model, batch_size=256, device=torch.device("cpu")): + model.eval() + audio_length = model.audio_cfg.audio_length + example_audio = torch.ones((batch_size, audio_length), device=device) + example_text = torch.zeros( + (batch_size, model.context_length), dtype=torch.int, device=device + ) + model = torch.jit.trace_module( + model, + inputs=dict( + forward=(example_audio, example_text), + encode_text=(example_text,), + encode_image=(example_audio,), + ), + ) + model.audio_cfg.audio_length = audio_length # Question: what does this do? + return model diff --git a/picoaudio/audioldm/clap/open_clip/model_configs/HTSAT-base.json b/picoaudio/audioldm/clap/open_clip/model_configs/HTSAT-base.json new file mode 100644 index 0000000000000000000000000000000000000000..6cef625a89daf4431f1c9f72e10bc9640eef2ba8 --- /dev/null +++ b/picoaudio/audioldm/clap/open_clip/model_configs/HTSAT-base.json @@ -0,0 +1,23 @@ +{ + "embed_dim": 1024, + "audio_cfg": { + "audio_length": 1024, + "clip_samples": 480000, + "mel_bins": 64, + "sample_rate": 48000, + "window_size": 1024, + "hop_size": 480, + "fmin": 50, + "fmax": 14000, + "class_num": 527, + "model_type": "HTSAT", + "model_name": "base" + }, + "text_cfg": { + "context_length": 77, + "vocab_size": 49408, + "width": 512, + "heads": 8, + "layers": 12 + } +} \ No newline at end of file diff --git a/picoaudio/audioldm/clap/open_clip/model_configs/HTSAT-large.json b/picoaudio/audioldm/clap/open_clip/model_configs/HTSAT-large.json new file mode 100644 index 0000000000000000000000000000000000000000..699cdb1b16855582606551e4196b24aba2ffd871 --- /dev/null +++ b/picoaudio/audioldm/clap/open_clip/model_configs/HTSAT-large.json @@ -0,0 +1,23 @@ +{ + "embed_dim": 2048, + "audio_cfg": { + "audio_length": 1024, + "clip_samples": 480000, + "mel_bins": 64, + "sample_rate": 48000, + "window_size": 1024, + "hop_size": 480, + "fmin": 50, + "fmax": 14000, + "class_num": 527, + "model_type": "HTSAT", + "model_name": "large" + }, + "text_cfg": { + "context_length": 77, + "vocab_size": 49408, + "width": 512, + "heads": 8, + "layers": 12 + } +} \ No newline at end of file diff --git a/picoaudio/audioldm/clap/open_clip/model_configs/HTSAT-tiny-win-1536.json b/picoaudio/audioldm/clap/open_clip/model_configs/HTSAT-tiny-win-1536.json new file mode 100644 index 0000000000000000000000000000000000000000..73e42990fe8361a0df502e7f93d29f19f58c9ecb --- /dev/null +++ b/picoaudio/audioldm/clap/open_clip/model_configs/HTSAT-tiny-win-1536.json @@ -0,0 +1,23 @@ +{ + "embed_dim": 768, + "audio_cfg": { + "audio_length": 1024, + "clip_samples": 480000, + "mel_bins": 64, + "sample_rate": 48000, + "window_size": 1536, + "hop_size": 480, + "fmin": 50, + "fmax": 14000, + "class_num": 527, + "model_type": "HTSAT", + "model_name": "tiny" + }, + "text_cfg": { + "context_length": 77, + "vocab_size": 49408, + "width": 512, + "heads": 8, + "layers": 12 + } +} \ No newline at end of file diff --git a/picoaudio/audioldm/clap/open_clip/model_configs/HTSAT-tiny.json b/picoaudio/audioldm/clap/open_clip/model_configs/HTSAT-tiny.json new file mode 100644 index 0000000000000000000000000000000000000000..a6e7821163d9afa81c27345a1e472475b92af169 --- /dev/null +++ b/picoaudio/audioldm/clap/open_clip/model_configs/HTSAT-tiny.json @@ -0,0 +1,23 @@ +{ + "embed_dim": 768, + "audio_cfg": { + "audio_length": 1024, + "clip_samples": 480000, + "mel_bins": 64, + "sample_rate": 48000, + "window_size": 1024, + "hop_size": 480, + "fmin": 50, + "fmax": 14000, + "class_num": 527, + "model_type": "HTSAT", + "model_name": "tiny" + }, + "text_cfg": { + "context_length": 77, + "vocab_size": 49408, + "width": 512, + "heads": 8, + "layers": 12 + } +} \ No newline at end of file diff --git a/picoaudio/audioldm/clap/open_clip/model_configs/PANN-10.json b/picoaudio/audioldm/clap/open_clip/model_configs/PANN-10.json new file mode 100644 index 0000000000000000000000000000000000000000..954ddf62921aed7dde9c37ffffec98a2e96a4ee7 --- /dev/null +++ b/picoaudio/audioldm/clap/open_clip/model_configs/PANN-10.json @@ -0,0 +1,23 @@ +{ + "embed_dim": 1024, + "audio_cfg": { + "audio_length": 1024, + "clip_samples": 480000, + "mel_bins": 64, + "sample_rate": 48000, + "window_size": 1024, + "hop_size": 480, + "fmin": 50, + "fmax": 14000, + "class_num": 527, + "model_type": "PANN", + "model_name": "Cnn10" + }, + "text_cfg": { + "context_length": 77, + "vocab_size": 49408, + "width": 512, + "heads": 8, + "layers": 12 + } +} \ No newline at end of file diff --git a/picoaudio/audioldm/clap/open_clip/model_configs/PANN-14-fmax-18k.json b/picoaudio/audioldm/clap/open_clip/model_configs/PANN-14-fmax-18k.json new file mode 100644 index 0000000000000000000000000000000000000000..b7989bc0cd95d0d39049b7524eba508b3e386439 --- /dev/null +++ b/picoaudio/audioldm/clap/open_clip/model_configs/PANN-14-fmax-18k.json @@ -0,0 +1,23 @@ +{ + "embed_dim": 2048, + "audio_cfg": { + "audio_length": 1024, + "clip_samples": 480000, + "mel_bins": 64, + "sample_rate": 48000, + "window_size": 1024, + "hop_size": 480, + "fmin": 50, + "fmax": 18000, + "class_num": 527, + "model_type": "PANN", + "model_name": "Cnn14" + }, + "text_cfg": { + "context_length": 77, + "vocab_size": 49408, + "width": 512, + "heads": 8, + "layers": 12 + } +} \ No newline at end of file diff --git a/picoaudio/audioldm/clap/open_clip/model_configs/PANN-14-fmax-8k-20s.json b/picoaudio/audioldm/clap/open_clip/model_configs/PANN-14-fmax-8k-20s.json new file mode 100644 index 0000000000000000000000000000000000000000..56bdb56bedc304ffa52d8bf5988cea2c1d82d14e --- /dev/null +++ b/picoaudio/audioldm/clap/open_clip/model_configs/PANN-14-fmax-8k-20s.json @@ -0,0 +1,23 @@ +{ + "embed_dim": 2048, + "audio_cfg": { + "audio_length": 1024, + "clip_samples": 960000, + "mel_bins": 64, + "sample_rate": 48000, + "window_size": 1024, + "hop_size": 360, + "fmin": 50, + "fmax": 8000, + "class_num": 527, + "model_type": "PANN", + "model_name": "Cnn14" + }, + "text_cfg": { + "context_length": 77, + "vocab_size": 49408, + "width": 512, + "heads": 8, + "layers": 12 + } +} \ No newline at end of file diff --git a/picoaudio/audioldm/clap/open_clip/model_configs/PANN-14-tiny-transformer.json b/picoaudio/audioldm/clap/open_clip/model_configs/PANN-14-tiny-transformer.json new file mode 100644 index 0000000000000000000000000000000000000000..5756e3bebc97cc985f512cb081930fee4e49bec1 --- /dev/null +++ b/picoaudio/audioldm/clap/open_clip/model_configs/PANN-14-tiny-transformer.json @@ -0,0 +1,23 @@ +{ + "embed_dim": 2048, + "audio_cfg": { + "audio_length": 1024, + "clip_samples": 480000, + "mel_bins": 64, + "sample_rate": 48000, + "window_size": 1024, + "hop_size": 480, + "fmin": 50, + "fmax": 14000, + "class_num": 527, + "model_type": "PANN", + "model_name": "Cnn14" + }, + "text_cfg": { + "context_length": 77, + "vocab_size": 49408, + "width": 512, + "heads": 8, + "layers": 4 + } +} \ No newline at end of file diff --git a/picoaudio/audioldm/clap/open_clip/model_configs/PANN-14-win-1536.json b/picoaudio/audioldm/clap/open_clip/model_configs/PANN-14-win-1536.json new file mode 100644 index 0000000000000000000000000000000000000000..5a9e7e208b661619d5e26625e849da1adda8a475 --- /dev/null +++ b/picoaudio/audioldm/clap/open_clip/model_configs/PANN-14-win-1536.json @@ -0,0 +1,23 @@ +{ + "embed_dim": 2048, + "audio_cfg": { + "audio_length": 1024, + "clip_samples": 480000, + "mel_bins": 64, + "sample_rate": 48000, + "window_size": 1536, + "hop_size": 480, + "fmin": 50, + "fmax": 14000, + "class_num": 527, + "model_type": "PANN", + "model_name": "Cnn14" + }, + "text_cfg": { + "context_length": 77, + "vocab_size": 49408, + "width": 512, + "heads": 8, + "layers": 12 + } +} \ No newline at end of file diff --git a/picoaudio/audioldm/clap/open_clip/model_configs/PANN-14.json b/picoaudio/audioldm/clap/open_clip/model_configs/PANN-14.json new file mode 100644 index 0000000000000000000000000000000000000000..39a5134cde1d8c50f4758377c952ef22f07bab41 --- /dev/null +++ b/picoaudio/audioldm/clap/open_clip/model_configs/PANN-14.json @@ -0,0 +1,23 @@ +{ + "embed_dim": 2048, + "audio_cfg": { + "audio_length": 1024, + "clip_samples": 480000, + "mel_bins": 64, + "sample_rate": 48000, + "window_size": 1024, + "hop_size": 480, + "fmin": 50, + "fmax": 14000, + "class_num": 527, + "model_type": "PANN", + "model_name": "Cnn14" + }, + "text_cfg": { + "context_length": 77, + "vocab_size": 49408, + "width": 512, + "heads": 8, + "layers": 12 + } +} \ No newline at end of file diff --git a/picoaudio/audioldm/clap/open_clip/model_configs/PANN-6.json b/picoaudio/audioldm/clap/open_clip/model_configs/PANN-6.json new file mode 100644 index 0000000000000000000000000000000000000000..21ebc344326de260c386ba77e0ad63cf9b04febf --- /dev/null +++ b/picoaudio/audioldm/clap/open_clip/model_configs/PANN-6.json @@ -0,0 +1,23 @@ +{ + "embed_dim": 512, + "audio_cfg": { + "audio_length": 1024, + "clip_samples": 480000, + "mel_bins": 64, + "sample_rate": 48000, + "window_size": 1024, + "hop_size": 480, + "fmin": 50, + "fmax": 14000, + "class_num": 527, + "model_type": "PANN", + "model_name": "Cnn6" + }, + "text_cfg": { + "context_length": 77, + "vocab_size": 49408, + "width": 512, + "heads": 8, + "layers": 12 + } +} \ No newline at end of file diff --git a/picoaudio/audioldm/clap/open_clip/model_configs/RN101-quickgelu.json b/picoaudio/audioldm/clap/open_clip/model_configs/RN101-quickgelu.json new file mode 100644 index 0000000000000000000000000000000000000000..d0db2c161d13138788c4609d373b023b8454d624 --- /dev/null +++ b/picoaudio/audioldm/clap/open_clip/model_configs/RN101-quickgelu.json @@ -0,0 +1,22 @@ +{ + "embed_dim": 512, + "quick_gelu": true, + "vision_cfg": { + "image_size": 224, + "layers": [ + 3, + 4, + 23, + 3 + ], + "width": 64, + "patch_size": null + }, + "text_cfg": { + "context_length": 77, + "vocab_size": 49408, + "width": 512, + "heads": 8, + "layers": 12 + } +} \ No newline at end of file diff --git a/picoaudio/audioldm/clap/open_clip/model_configs/RN101.json b/picoaudio/audioldm/clap/open_clip/model_configs/RN101.json new file mode 100644 index 0000000000000000000000000000000000000000..b88b4d3acbaa701c614ab0ea65fc88fcfe289c32 --- /dev/null +++ b/picoaudio/audioldm/clap/open_clip/model_configs/RN101.json @@ -0,0 +1,21 @@ +{ + "embed_dim": 512, + "vision_cfg": { + "image_size": 224, + "layers": [ + 3, + 4, + 23, + 3 + ], + "width": 64, + "patch_size": null + }, + "text_cfg": { + "context_length": 77, + "vocab_size": 49408, + "width": 512, + "heads": 8, + "layers": 12 + } +} \ No newline at end of file diff --git a/picoaudio/audioldm/clap/open_clip/model_configs/RN50-quickgelu.json b/picoaudio/audioldm/clap/open_clip/model_configs/RN50-quickgelu.json new file mode 100644 index 0000000000000000000000000000000000000000..8c2f91260cdeb043434dc1e893cce81d4ce7f0d1 --- /dev/null +++ b/picoaudio/audioldm/clap/open_clip/model_configs/RN50-quickgelu.json @@ -0,0 +1,22 @@ +{ + "embed_dim": 1024, + "quick_gelu": true, + "vision_cfg": { + "image_size": 224, + "layers": [ + 3, + 4, + 6, + 3 + ], + "width": 64, + "patch_size": null + }, + "text_cfg": { + "context_length": 77, + "vocab_size": 49408, + "width": 512, + "heads": 8, + "layers": 12 + } +} diff --git a/picoaudio/audioldm/clap/open_clip/model_configs/RN50.json b/picoaudio/audioldm/clap/open_clip/model_configs/RN50.json new file mode 100644 index 0000000000000000000000000000000000000000..33aa884d54fee0076c33676831e49d5e1ffcb8f2 --- /dev/null +++ b/picoaudio/audioldm/clap/open_clip/model_configs/RN50.json @@ -0,0 +1,21 @@ +{ + "embed_dim": 1024, + "vision_cfg": { + "image_size": 224, + "layers": [ + 3, + 4, + 6, + 3 + ], + "width": 64, + "patch_size": null + }, + "text_cfg": { + "context_length": 77, + "vocab_size": 49408, + "width": 512, + "heads": 8, + "layers": 12 + } +} \ No newline at end of file diff --git a/picoaudio/audioldm/clap/open_clip/model_configs/RN50x16.json b/picoaudio/audioldm/clap/open_clip/model_configs/RN50x16.json new file mode 100644 index 0000000000000000000000000000000000000000..3161e1a2c9a839161e652a4d729c2cdc971161db --- /dev/null +++ b/picoaudio/audioldm/clap/open_clip/model_configs/RN50x16.json @@ -0,0 +1,21 @@ +{ + "embed_dim": 768, + "vision_cfg": { + "image_size": 384, + "layers": [ + 6, + 8, + 18, + 8 + ], + "width": 96, + "patch_size": null + }, + "text_cfg": { + "context_length": 77, + "vocab_size": 49408, + "width": 768, + "heads": 12, + "layers": 12 + } +} \ No newline at end of file diff --git a/picoaudio/audioldm/clap/open_clip/model_configs/RN50x4.json b/picoaudio/audioldm/clap/open_clip/model_configs/RN50x4.json new file mode 100644 index 0000000000000000000000000000000000000000..e155237f8ce1026aaaeecc80751eabe6f329f0bb --- /dev/null +++ b/picoaudio/audioldm/clap/open_clip/model_configs/RN50x4.json @@ -0,0 +1,21 @@ +{ + "embed_dim": 640, + "vision_cfg": { + "image_size": 288, + "layers": [ + 4, + 6, + 10, + 6 + ], + "width": 80, + "patch_size": null + }, + "text_cfg": { + "context_length": 77, + "vocab_size": 49408, + "width": 640, + "heads": 10, + "layers": 12 + } +} \ No newline at end of file diff --git a/picoaudio/audioldm/clap/open_clip/model_configs/ViT-B-16.json b/picoaudio/audioldm/clap/open_clip/model_configs/ViT-B-16.json new file mode 100644 index 0000000000000000000000000000000000000000..395eea77ec3907c0611531aba63459b193e67b9c --- /dev/null +++ b/picoaudio/audioldm/clap/open_clip/model_configs/ViT-B-16.json @@ -0,0 +1,16 @@ +{ + "embed_dim": 512, + "vision_cfg": { + "image_size": 224, + "layers": 12, + "width": 768, + "patch_size": 16 + }, + "text_cfg": { + "context_length": 77, + "vocab_size": 49408, + "width": 512, + "heads": 8, + "layers": 12 + } +} \ No newline at end of file diff --git a/picoaudio/audioldm/clap/open_clip/model_configs/ViT-B-32-quickgelu.json b/picoaudio/audioldm/clap/open_clip/model_configs/ViT-B-32-quickgelu.json new file mode 100644 index 0000000000000000000000000000000000000000..ce6bd923593293ed50dfcfb28b73ca7403bcf3c5 --- /dev/null +++ b/picoaudio/audioldm/clap/open_clip/model_configs/ViT-B-32-quickgelu.json @@ -0,0 +1,17 @@ +{ + "embed_dim": 512, + "quick_gelu": true, + "vision_cfg": { + "image_size": 224, + "layers": 12, + "width": 768, + "patch_size": 32 + }, + "text_cfg": { + "context_length": 77, + "vocab_size": 49408, + "width": 512, + "heads": 8, + "layers": 12 + } +} \ No newline at end of file diff --git a/picoaudio/audioldm/clap/open_clip/model_configs/ViT-B-32.json b/picoaudio/audioldm/clap/open_clip/model_configs/ViT-B-32.json new file mode 100644 index 0000000000000000000000000000000000000000..07c8e28eb06fa1813ba932fe4eec668262d1c47f --- /dev/null +++ b/picoaudio/audioldm/clap/open_clip/model_configs/ViT-B-32.json @@ -0,0 +1,16 @@ +{ + "embed_dim": 512, + "vision_cfg": { + "image_size": 224, + "layers": 12, + "width": 768, + "patch_size": 32 + }, + "text_cfg": { + "context_length": 77, + "vocab_size": 49408, + "width": 512, + "heads": 8, + "layers": 12 + } +} \ No newline at end of file diff --git a/picoaudio/audioldm/clap/open_clip/model_configs/ViT-L-14.json b/picoaudio/audioldm/clap/open_clip/model_configs/ViT-L-14.json new file mode 100644 index 0000000000000000000000000000000000000000..d4a4bbb1dd4ed4edb317d3ace4f3ad13b211c241 --- /dev/null +++ b/picoaudio/audioldm/clap/open_clip/model_configs/ViT-L-14.json @@ -0,0 +1,16 @@ +{ + "embed_dim": 768, + "vision_cfg": { + "image_size": 224, + "layers": 24, + "width": 1024, + "patch_size": 14 + }, + "text_cfg": { + "context_length": 77, + "vocab_size": 49408, + "width": 768, + "heads": 12, + "layers": 12 + } +} \ No newline at end of file diff --git a/picoaudio/audioldm/clap/open_clip/openai.py b/picoaudio/audioldm/clap/open_clip/openai.py new file mode 100644 index 0000000000000000000000000000000000000000..fcb624f54a8b9d2c4b11e3adb50c53c3261716d4 --- /dev/null +++ b/picoaudio/audioldm/clap/open_clip/openai.py @@ -0,0 +1,159 @@ +""" OpenAI pretrained model functions + +Adapted from https://github.com/openai/CLIP. Originally MIT License, Copyright (c) 2021 OpenAI. +""" + +import os +import warnings +from typing import Union, List + +import torch + +from .model import build_model_from_openai_state_dict +from .pretrained import ( + get_pretrained_url, + list_pretrained_tag_models, + download_pretrained, +) + +__all__ = ["list_openai_models", "load_openai_model"] + +CACHE_DIR = os.getenv("AUDIOLDM_CACHE_DIR", "~/.cache") + + + +def list_openai_models() -> List[str]: + """Returns the names of available CLIP models""" + return list_pretrained_tag_models("openai") + + +def load_openai_model( + name: str, + model_cfg, + device: Union[str, torch.device] = "cuda" if torch.cuda.is_available() else "cpu", + jit=True, + cache_dir=os.path.expanduser(f"{CACHE_DIR}/clip"), + enable_fusion: bool = False, + fusion_type: str = "None", +): + """Load a CLIP model, preserve its text pretrained part, and set in the CLAP model + + Parameters + ---------- + name : str + A model name listed by `clip.available_models()`, or the path to a model checkpoint containing the state_dict + device : Union[str, torch.device] + The device to put the loaded model + jit : bool + Whether to load the optimized JIT model (default) or more hackable non-JIT model. + + Returns + ------- + model : torch.nn.Module + The CLAP model + preprocess : Callable[[PIL.Image], torch.Tensor] + A torchvision transform that converts a PIL image into a tensor that the returned model can take as its input + """ + if get_pretrained_url(name, "openai"): + model_path = download_pretrained( + get_pretrained_url(name, "openai"), root=cache_dir + ) + elif os.path.isfile(name): + model_path = name + else: + raise RuntimeError( + f"Model {name} not found; available models = {list_openai_models()}" + ) + + try: + # loading JIT archive + model = torch.jit.load(model_path, map_location=device if jit else "cpu").eval() + state_dict = None + except RuntimeError: + # loading saved state dict + if jit: + warnings.warn( + f"File {model_path} is not a JIT archive. Loading as a state dict instead" + ) + jit = False + state_dict = torch.load(model_path, map_location="cpu") + + if not jit: + try: + model = build_model_from_openai_state_dict( + state_dict or model.state_dict(), model_cfg, enable_fusion, fusion_type + ).to(device) + except KeyError: + sd = {k[7:]: v for k, v in state_dict["state_dict"].items()} + model = build_model_from_openai_state_dict( + sd, model_cfg, enable_fusion, fusion_type + ).to(device) + + if str(device) == "cpu": + model.float() + return model + + # patch the device names + device_holder = torch.jit.trace( + lambda: torch.ones([]).to(torch.device(device)), example_inputs=[] + ) + device_node = [ + n + for n in device_holder.graph.findAllNodes("prim::Constant") + if "Device" in repr(n) + ][-1] + + def patch_device(module): + try: + graphs = [module.graph] if hasattr(module, "graph") else [] + except RuntimeError: + graphs = [] + + if hasattr(module, "forward1"): + graphs.append(module.forward1.graph) + + for graph in graphs: + for node in graph.findAllNodes("prim::Constant"): + if "value" in node.attributeNames() and str(node["value"]).startswith( + "cuda" + ): + node.copyAttributes(device_node) + + model.apply(patch_device) + patch_device(model.encode_audio) + patch_device(model.encode_text) + + # patch dtype to float32 on CPU + if str(device) == "cpu": + float_holder = torch.jit.trace( + lambda: torch.ones([]).float(), example_inputs=[] + ) + float_input = list(float_holder.graph.findNode("aten::to").inputs())[1] + float_node = float_input.node() + + def patch_float(module): + try: + graphs = [module.graph] if hasattr(module, "graph") else [] + except RuntimeError: + graphs = [] + + if hasattr(module, "forward1"): + graphs.append(module.forward1.graph) + + for graph in graphs: + for node in graph.findAllNodes("aten::to"): + inputs = list(node.inputs()) + for i in [ + 1, + 2, + ]: # dtype can be the second or third argument to aten::to() + if inputs[i].node()["value"] == 5: + inputs[i].node().copyAttributes(float_node) + + model.apply(patch_float) + patch_float(model.encode_audio) + patch_float(model.encode_text) + model.float() + + model.audio_branch.audio_length = model.audio_cfg.audio_length + return model diff --git a/picoaudio/audioldm/clap/open_clip/pann_model.py b/picoaudio/audioldm/clap/open_clip/pann_model.py new file mode 100644 index 0000000000000000000000000000000000000000..0d9a8eb0bf897ad6ec04923361b01e5de433b2ef --- /dev/null +++ b/picoaudio/audioldm/clap/open_clip/pann_model.py @@ -0,0 +1,704 @@ +# PANNs: Large-Scale Pretrained Audio Neural Networks for Audio Pattern Recognition +# Reference from https://github.com/qiuqiangkong/audioset_tagging_cnn +# Some layers are re-designed for CLAP +import os + +os.environ["NUMBA_CACHE_DIR"] = "/tmp/" + +import torch +import torch.nn as nn +import torch.nn.functional as F +from torchlibrosa.stft import Spectrogram, LogmelFilterBank +from torchlibrosa.augmentation import SpecAugmentation + +from .utils import do_mixup, interpolate, pad_framewise_output +from .feature_fusion import iAFF, AFF, DAF + + +def init_layer(layer): + """Initialize a Linear or Convolutional layer.""" + nn.init.xavier_uniform_(layer.weight) + + if hasattr(layer, "bias"): + if layer.bias is not None: + layer.bias.data.fill_(0.0) + + +def init_bn(bn): + """Initialize a Batchnorm layer.""" + bn.bias.data.fill_(0.0) + bn.weight.data.fill_(1.0) + + +class ConvBlock(nn.Module): + def __init__(self, in_channels, out_channels): + + super(ConvBlock, self).__init__() + + self.conv1 = nn.Conv2d( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=(3, 3), + stride=(1, 1), + padding=(1, 1), + bias=False, + ) + + self.conv2 = nn.Conv2d( + in_channels=out_channels, + out_channels=out_channels, + kernel_size=(3, 3), + stride=(1, 1), + padding=(1, 1), + bias=False, + ) + + self.bn1 = nn.BatchNorm2d(out_channels) + self.bn2 = nn.BatchNorm2d(out_channels) + + self.init_weight() + + def init_weight(self): + init_layer(self.conv1) + init_layer(self.conv2) + init_bn(self.bn1) + init_bn(self.bn2) + + def forward(self, input, pool_size=(2, 2), pool_type="avg"): + + x = input + x = F.relu_(self.bn1(self.conv1(x))) + x = F.relu_(self.bn2(self.conv2(x))) + if pool_type == "max": + x = F.max_pool2d(x, kernel_size=pool_size) + elif pool_type == "avg": + x = F.avg_pool2d(x, kernel_size=pool_size) + elif pool_type == "avg+max": + x1 = F.avg_pool2d(x, kernel_size=pool_size) + x2 = F.max_pool2d(x, kernel_size=pool_size) + x = x1 + x2 + else: + raise Exception("Incorrect argument!") + + return x + + +class ConvBlock5x5(nn.Module): + def __init__(self, in_channels, out_channels): + + super(ConvBlock5x5, self).__init__() + + self.conv1 = nn.Conv2d( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=(5, 5), + stride=(1, 1), + padding=(2, 2), + bias=False, + ) + + self.bn1 = nn.BatchNorm2d(out_channels) + + self.init_weight() + + def init_weight(self): + init_layer(self.conv1) + init_bn(self.bn1) + + def forward(self, input, pool_size=(2, 2), pool_type="avg"): + + x = input + x = F.relu_(self.bn1(self.conv1(x))) + if pool_type == "max": + x = F.max_pool2d(x, kernel_size=pool_size) + elif pool_type == "avg": + x = F.avg_pool2d(x, kernel_size=pool_size) + elif pool_type == "avg+max": + x1 = F.avg_pool2d(x, kernel_size=pool_size) + x2 = F.max_pool2d(x, kernel_size=pool_size) + x = x1 + x2 + else: + raise Exception("Incorrect argument!") + + return x + + +class AttBlock(nn.Module): + def __init__(self, n_in, n_out, activation="linear", temperature=1.0): + super(AttBlock, self).__init__() + + self.activation = activation + self.temperature = temperature + self.att = nn.Conv1d( + in_channels=n_in, + out_channels=n_out, + kernel_size=1, + stride=1, + padding=0, + bias=True, + ) + self.cla = nn.Conv1d( + in_channels=n_in, + out_channels=n_out, + kernel_size=1, + stride=1, + padding=0, + bias=True, + ) + + self.bn_att = nn.BatchNorm1d(n_out) + self.init_weights() + + def init_weights(self): + init_layer(self.att) + init_layer(self.cla) + init_bn(self.bn_att) + + def forward(self, x): + # x: (n_samples, n_in, n_time) + norm_att = torch.softmax(torch.clamp(self.att(x), -10, 10), dim=-1) + cla = self.nonlinear_transform(self.cla(x)) + x = torch.sum(norm_att * cla, dim=2) + return x, norm_att, cla + + def nonlinear_transform(self, x): + if self.activation == "linear": + return x + elif self.activation == "sigmoid": + return torch.sigmoid(x) + + +class Cnn14(nn.Module): + def __init__( + self, + sample_rate, + window_size, + hop_size, + mel_bins, + fmin, + fmax, + classes_num, + enable_fusion=False, + fusion_type="None", + ): + + super(Cnn14, self).__init__() + + window = "hann" + center = True + pad_mode = "reflect" + ref = 1.0 + amin = 1e-10 + top_db = None + + self.enable_fusion = enable_fusion + self.fusion_type = fusion_type + + # Spectrogram extractor + self.spectrogram_extractor = Spectrogram( + n_fft=window_size, + hop_length=hop_size, + win_length=window_size, + window=window, + center=center, + pad_mode=pad_mode, + freeze_parameters=True, + ) + + # Logmel feature extractor + self.logmel_extractor = LogmelFilterBank( + sr=sample_rate, + n_fft=window_size, + n_mels=mel_bins, + fmin=fmin, + fmax=fmax, + ref=ref, + amin=amin, + top_db=top_db, + freeze_parameters=True, + ) + + # Spec augmenter + self.spec_augmenter = SpecAugmentation( + time_drop_width=64, + time_stripes_num=2, + freq_drop_width=8, + freq_stripes_num=2, + ) + + self.bn0 = nn.BatchNorm2d(64) + + if (self.enable_fusion) and (self.fusion_type == "channel_map"): + self.conv_block1 = ConvBlock(in_channels=4, out_channels=64) + else: + self.conv_block1 = ConvBlock(in_channels=1, out_channels=64) + self.conv_block2 = ConvBlock(in_channels=64, out_channels=128) + self.conv_block3 = ConvBlock(in_channels=128, out_channels=256) + self.conv_block4 = ConvBlock(in_channels=256, out_channels=512) + self.conv_block5 = ConvBlock(in_channels=512, out_channels=1024) + self.conv_block6 = ConvBlock(in_channels=1024, out_channels=2048) + + self.fc1 = nn.Linear(2048, 2048, bias=True) + self.fc_audioset = nn.Linear(2048, classes_num, bias=True) + + if (self.enable_fusion) and ( + self.fusion_type in ["daf_1d", "aff_1d", "iaff_1d"] + ): + self.mel_conv1d = nn.Sequential( + nn.Conv1d(64, 64, kernel_size=5, stride=3, padding=2), + nn.BatchNorm1d(64), # No Relu + ) + if self.fusion_type == "daf_1d": + self.fusion_model = DAF() + elif self.fusion_type == "aff_1d": + self.fusion_model = AFF(channels=64, type="1D") + elif self.fusion_type == "iaff_1d": + self.fusion_model = iAFF(channels=64, type="1D") + + if (self.enable_fusion) and ( + self.fusion_type in ["daf_2d", "aff_2d", "iaff_2d"] + ): + self.mel_conv2d = nn.Sequential( + nn.Conv2d(1, 64, kernel_size=(5, 5), stride=(6, 2), padding=(2, 2)), + nn.BatchNorm2d(64), + nn.ReLU(inplace=True), + ) + + if self.fusion_type == "daf_2d": + self.fusion_model = DAF() + elif self.fusion_type == "aff_2d": + self.fusion_model = AFF(channels=64, type="2D") + elif self.fusion_type == "iaff_2d": + self.fusion_model = iAFF(channels=64, type="2D") + self.init_weight() + + def init_weight(self): + init_bn(self.bn0) + init_layer(self.fc1) + init_layer(self.fc_audioset) + + def forward(self, input, mixup_lambda=None, device=None): + """ + Input: (batch_size, data_length)""" + + if self.enable_fusion and input["longer"].sum() == 0: + # if no audio is longer than 10s, then randomly select one audio to be longer + input["longer"][torch.randint(0, input["longer"].shape[0], (1,))] = True + + if not self.enable_fusion: + x = self.spectrogram_extractor( + input["waveform"].to(device=device, non_blocking=True) + ) # (batch_size, 1, time_steps, freq_bins) + x = self.logmel_extractor(x) # (batch_size, 1, time_steps, mel_bins) + + x = x.transpose(1, 3) + x = self.bn0(x) + x = x.transpose(1, 3) + else: + longer_list = input["longer"].to(device=device, non_blocking=True) + x = input["mel_fusion"].to(device=device, non_blocking=True) + longer_list_idx = torch.where(longer_list)[0] + x = x.transpose(1, 3) + x = self.bn0(x) + x = x.transpose(1, 3) + if self.fusion_type in ["daf_1d", "aff_1d", "iaff_1d"]: + new_x = x[:, 0:1, :, :].clone().contiguous() + # local processing + if len(longer_list_idx) > 0: + fusion_x_local = x[longer_list_idx, 1:, :, :].clone().contiguous() + FB, FC, FT, FF = fusion_x_local.size() + fusion_x_local = fusion_x_local.view(FB * FC, FT, FF) + fusion_x_local = torch.permute( + fusion_x_local, (0, 2, 1) + ).contiguous() + fusion_x_local = self.mel_conv1d(fusion_x_local) + fusion_x_local = fusion_x_local.view( + FB, FC, FF, fusion_x_local.size(-1) + ) + fusion_x_local = ( + torch.permute(fusion_x_local, (0, 2, 1, 3)) + .contiguous() + .flatten(2) + ) + if fusion_x_local.size(-1) < FT: + fusion_x_local = torch.cat( + [ + fusion_x_local, + torch.zeros( + (FB, FF, FT - fusion_x_local.size(-1)), + device=device, + ), + ], + dim=-1, + ) + else: + fusion_x_local = fusion_x_local[:, :, :FT] + # 1D fusion + new_x = new_x.squeeze(1).permute((0, 2, 1)).contiguous() + new_x[longer_list_idx] = self.fusion_model( + new_x[longer_list_idx], fusion_x_local + ) + x = new_x.permute((0, 2, 1)).contiguous()[:, None, :, :] + else: + x = new_x + elif self.fusion_type in ["daf_2d", "aff_2d", "iaff_2d", "channel_map"]: + x = x # no change + + if self.training: + x = self.spec_augmenter(x) + # Mixup on spectrogram + if self.training and mixup_lambda is not None: + x = do_mixup(x, mixup_lambda) + if (self.enable_fusion) and ( + self.fusion_type in ["daf_2d", "aff_2d", "iaff_2d"] + ): + global_x = x[:, 0:1, :, :] + + # global processing + B, C, H, W = global_x.shape + global_x = self.conv_block1(global_x, pool_size=(2, 2), pool_type="avg") + if len(longer_list_idx) > 0: + local_x = x[longer_list_idx, 1:, :, :].contiguous() + TH = global_x.size(-2) + # local processing + B, C, H, W = local_x.shape + local_x = local_x.view(B * C, 1, H, W) + local_x = self.mel_conv2d(local_x) + local_x = local_x.view( + B, C, local_x.size(1), local_x.size(2), local_x.size(3) + ) + local_x = local_x.permute((0, 2, 1, 3, 4)).contiguous().flatten(2, 3) + TB, TC, _, TW = local_x.size() + if local_x.size(-2) < TH: + local_x = torch.cat( + [ + local_x, + torch.zeros( + (TB, TC, TH - local_x.size(-2), TW), + device=global_x.device, + ), + ], + dim=-2, + ) + else: + local_x = local_x[:, :, :TH, :] + + global_x[longer_list_idx] = self.fusion_model( + global_x[longer_list_idx], local_x + ) + x = global_x + else: + x = self.conv_block1(x, pool_size=(2, 2), pool_type="avg") + + x = F.dropout(x, p=0.2, training=self.training) + x = self.conv_block2(x, pool_size=(2, 2), pool_type="avg") + x = F.dropout(x, p=0.2, training=self.training) + x = self.conv_block3(x, pool_size=(2, 2), pool_type="avg") + x = F.dropout(x, p=0.2, training=self.training) + x = self.conv_block4(x, pool_size=(2, 2), pool_type="avg") + x = F.dropout(x, p=0.2, training=self.training) + x = self.conv_block5(x, pool_size=(2, 2), pool_type="avg") + x = F.dropout(x, p=0.2, training=self.training) + x = self.conv_block6(x, pool_size=(1, 1), pool_type="avg") + x = F.dropout(x, p=0.2, training=self.training) + x = torch.mean(x, dim=3) + + latent_x1 = F.max_pool1d(x, kernel_size=3, stride=1, padding=1) + latent_x2 = F.avg_pool1d(x, kernel_size=3, stride=1, padding=1) + latent_x = latent_x1 + latent_x2 + latent_x = latent_x.transpose(1, 2) + latent_x = F.relu_(self.fc1(latent_x)) + latent_output = interpolate(latent_x, 32) + + (x1, _) = torch.max(x, dim=2) + x2 = torch.mean(x, dim=2) + x = x1 + x2 + x = F.dropout(x, p=0.5, training=self.training) + x = F.relu_(self.fc1(x)) + embedding = F.dropout(x, p=0.5, training=self.training) + clipwise_output = torch.sigmoid(self.fc_audioset(x)) + + output_dict = { + "clipwise_output": clipwise_output, + "embedding": embedding, + "fine_grained_embedding": latent_output, + } + return output_dict + + +class Cnn6(nn.Module): + def __init__( + self, + sample_rate, + window_size, + hop_size, + mel_bins, + fmin, + fmax, + classes_num, + enable_fusion=False, + fusion_type="None", + ): + + super(Cnn6, self).__init__() + + window = "hann" + center = True + pad_mode = "reflect" + ref = 1.0 + amin = 1e-10 + top_db = None + + self.enable_fusion = enable_fusion + self.fusion_type = fusion_type + + # Spectrogram extractor + self.spectrogram_extractor = Spectrogram( + n_fft=window_size, + hop_length=hop_size, + win_length=window_size, + window=window, + center=center, + pad_mode=pad_mode, + freeze_parameters=True, + ) + + # Logmel feature extractor + self.logmel_extractor = LogmelFilterBank( + sr=sample_rate, + n_fft=window_size, + n_mels=mel_bins, + fmin=fmin, + fmax=fmax, + ref=ref, + amin=amin, + top_db=top_db, + freeze_parameters=True, + ) + + # Spec augmenter + self.spec_augmenter = SpecAugmentation( + time_drop_width=64, + time_stripes_num=2, + freq_drop_width=8, + freq_stripes_num=2, + ) + + self.bn0 = nn.BatchNorm2d(64) + + self.conv_block1 = ConvBlock5x5(in_channels=1, out_channels=64) + self.conv_block2 = ConvBlock5x5(in_channels=64, out_channels=128) + self.conv_block3 = ConvBlock5x5(in_channels=128, out_channels=256) + self.conv_block4 = ConvBlock5x5(in_channels=256, out_channels=512) + + self.fc1 = nn.Linear(512, 512, bias=True) + self.fc_audioset = nn.Linear(512, classes_num, bias=True) + + self.init_weight() + + def init_weight(self): + init_bn(self.bn0) + init_layer(self.fc1) + init_layer(self.fc_audioset) + + def forward(self, input, mixup_lambda=None, device=None): + """ + Input: (batch_size, data_length)""" + + x = self.spectrogram_extractor(input) # (batch_size, 1, time_steps, freq_bins) + x = self.logmel_extractor(x) # (batch_size, 1, time_steps, mel_bins) + + x = x.transpose(1, 3) + x = self.bn0(x) + x = x.transpose(1, 3) + + if self.training: + x = self.spec_augmenter(x) + + # Mixup on spectrogram + if self.training and mixup_lambda is not None: + x = do_mixup(x, mixup_lambda) + + x = self.conv_block1(x, pool_size=(2, 2), pool_type="avg") + x = F.dropout(x, p=0.2, training=self.training) + x = self.conv_block2(x, pool_size=(2, 2), pool_type="avg") + x = F.dropout(x, p=0.2, training=self.training) + x = self.conv_block3(x, pool_size=(2, 2), pool_type="avg") + x = F.dropout(x, p=0.2, training=self.training) + x = self.conv_block4(x, pool_size=(2, 2), pool_type="avg") + x = F.dropout(x, p=0.2, training=self.training) + x = torch.mean(x, dim=3) + + latent_x1 = F.max_pool1d(x, kernel_size=3, stride=1, padding=1) + latent_x2 = F.avg_pool1d(x, kernel_size=3, stride=1, padding=1) + latent_x = latent_x1 + latent_x2 + latent_x = latent_x.transpose(1, 2) + latent_x = F.relu_(self.fc1(latent_x)) + latent_output = interpolate(latent_x, 16) + + (x1, _) = torch.max(x, dim=2) + x2 = torch.mean(x, dim=2) + x = x1 + x2 + x = F.dropout(x, p=0.5, training=self.training) + x = F.relu_(self.fc1(x)) + embedding = F.dropout(x, p=0.5, training=self.training) + clipwise_output = torch.sigmoid(self.fc_audioset(x)) + + output_dict = { + "clipwise_output": clipwise_output, + "embedding": embedding, + "fine_grained_embedding": latent_output, + } + + return output_dict + + +class Cnn10(nn.Module): + def __init__( + self, + sample_rate, + window_size, + hop_size, + mel_bins, + fmin, + fmax, + classes_num, + enable_fusion=False, + fusion_type="None", + ): + + super(Cnn10, self).__init__() + + window = "hann" + center = True + pad_mode = "reflect" + ref = 1.0 + amin = 1e-10 + top_db = None + + self.enable_fusion = enable_fusion + self.fusion_type = fusion_type + + # Spectrogram extractor + self.spectrogram_extractor = Spectrogram( + n_fft=window_size, + hop_length=hop_size, + win_length=window_size, + window=window, + center=center, + pad_mode=pad_mode, + freeze_parameters=True, + ) + + # Logmel feature extractor + self.logmel_extractor = LogmelFilterBank( + sr=sample_rate, + n_fft=window_size, + n_mels=mel_bins, + fmin=fmin, + fmax=fmax, + ref=ref, + amin=amin, + top_db=top_db, + freeze_parameters=True, + ) + + # Spec augmenter + self.spec_augmenter = SpecAugmentation( + time_drop_width=64, + time_stripes_num=2, + freq_drop_width=8, + freq_stripes_num=2, + ) + + self.bn0 = nn.BatchNorm2d(64) + + self.conv_block1 = ConvBlock(in_channels=1, out_channels=64) + self.conv_block2 = ConvBlock(in_channels=64, out_channels=128) + self.conv_block3 = ConvBlock(in_channels=128, out_channels=256) + self.conv_block4 = ConvBlock(in_channels=256, out_channels=512) + self.conv_block5 = ConvBlock(in_channels=512, out_channels=1024) + + self.fc1 = nn.Linear(1024, 1024, bias=True) + self.fc_audioset = nn.Linear(1024, classes_num, bias=True) + + self.init_weight() + + def init_weight(self): + init_bn(self.bn0) + init_layer(self.fc1) + init_layer(self.fc_audioset) + + def forward(self, input, mixup_lambda=None, device=None): + """ + Input: (batch_size, data_length)""" + + x = self.spectrogram_extractor(input) # (batch_size, 1, time_steps, freq_bins) + x = self.logmel_extractor(x) # (batch_size, 1, time_steps, mel_bins) + + x = x.transpose(1, 3) + x = self.bn0(x) + x = x.transpose(1, 3) + + if self.training: + x = self.spec_augmenter(x) + + # Mixup on spectrogram + if self.training and mixup_lambda is not None: + x = do_mixup(x, mixup_lambda) + + x = self.conv_block1(x, pool_size=(2, 2), pool_type="avg") + x = F.dropout(x, p=0.2, training=self.training) + x = self.conv_block2(x, pool_size=(2, 2), pool_type="avg") + x = F.dropout(x, p=0.2, training=self.training) + x = self.conv_block3(x, pool_size=(2, 2), pool_type="avg") + x = F.dropout(x, p=0.2, training=self.training) + x = self.conv_block4(x, pool_size=(2, 2), pool_type="avg") + x = F.dropout(x, p=0.2, training=self.training) + x = self.conv_block5(x, pool_size=(2, 2), pool_type="avg") + x = F.dropout(x, p=0.2, training=self.training) + x = torch.mean(x, dim=3) + + latent_x1 = F.max_pool1d(x, kernel_size=3, stride=1, padding=1) + latent_x2 = F.avg_pool1d(x, kernel_size=3, stride=1, padding=1) + latent_x = latent_x1 + latent_x2 + latent_x = latent_x.transpose(1, 2) + latent_x = F.relu_(self.fc1(latent_x)) + latent_output = interpolate(latent_x, 32) + + (x1, _) = torch.max(x, dim=2) + x2 = torch.mean(x, dim=2) + x = x1 + x2 + x = F.dropout(x, p=0.5, training=self.training) + x = F.relu_(self.fc1(x)) + embedding = F.dropout(x, p=0.5, training=self.training) + clipwise_output = torch.sigmoid(self.fc_audioset(x)) + + output_dict = { + "clipwise_output": clipwise_output, + "embedding": embedding, + "fine_grained_embedding": latent_output, + } + + return output_dict + + +def create_pann_model(audio_cfg, enable_fusion=False, fusion_type="None"): + try: + ModelProto = eval(audio_cfg.model_name) + model = ModelProto( + sample_rate=audio_cfg.sample_rate, + window_size=audio_cfg.window_size, + hop_size=audio_cfg.hop_size, + mel_bins=audio_cfg.mel_bins, + fmin=audio_cfg.fmin, + fmax=audio_cfg.fmax, + classes_num=audio_cfg.class_num, + enable_fusion=enable_fusion, + fusion_type=fusion_type, + ) + return model + except: + raise RuntimeError( + f"Import Model for {audio_cfg.model_name} not found, or the audio cfg parameters are not enough." + ) diff --git a/picoaudio/audioldm/clap/open_clip/pretrained.py b/picoaudio/audioldm/clap/open_clip/pretrained.py new file mode 100644 index 0000000000000000000000000000000000000000..8ed2ae1732a28c4e98d1f3412157ef27054e41dc --- /dev/null +++ b/picoaudio/audioldm/clap/open_clip/pretrained.py @@ -0,0 +1,169 @@ +import hashlib +import os +import urllib +import warnings + +from tqdm import tqdm + +CACHE_DIR = os.getenv("AUDIOLDM_CACHE_DIR", "~/.cache") + +_RN50 = dict( + openai="https://openaipublic.azureedge.net/clip/models/afeb0e10f9e5a86da6080e35cf09123aca3b358a0c3e3b6c78a7b63bc04b6762/RN50.pt", + yfcc15m="https://github.com/mlfoundations/open_clip/releases/download/v0.2-weights/rn50-quickgelu-yfcc15m-455df137.pt", + cc12m="https://github.com/mlfoundations/open_clip/releases/download/v0.2-weights/rn50-quickgelu-cc12m-f000538c.pt", +) + +_RN50_quickgelu = dict( + openai="https://openaipublic.azureedge.net/clip/models/afeb0e10f9e5a86da6080e35cf09123aca3b358a0c3e3b6c78a7b63bc04b6762/RN50.pt", + yfcc15m="https://github.com/mlfoundations/open_clip/releases/download/v0.2-weights/rn50-quickgelu-yfcc15m-455df137.pt", + cc12m="https://github.com/mlfoundations/open_clip/releases/download/v0.2-weights/rn50-quickgelu-cc12m-f000538c.pt", +) + +_RN101 = dict( + openai="https://openaipublic.azureedge.net/clip/models/8fa8567bab74a42d41c5915025a8e4538c3bdbe8804a470a72f30b0d94fab599/RN101.pt", + yfcc15m="https://github.com/mlfoundations/open_clip/releases/download/v0.2-weights/rn101-quickgelu-yfcc15m-3e04b30e.pt", +) + +_RN101_quickgelu = dict( + openai="https://openaipublic.azureedge.net/clip/models/8fa8567bab74a42d41c5915025a8e4538c3bdbe8804a470a72f30b0d94fab599/RN101.pt", + yfcc15m="https://github.com/mlfoundations/open_clip/releases/download/v0.2-weights/rn101-quickgelu-yfcc15m-3e04b30e.pt", +) + +_RN50x4 = dict( + openai="https://openaipublic.azureedge.net/clip/models/7e526bd135e493cef0776de27d5f42653e6b4c8bf9e0f653bb11773263205fdd/RN50x4.pt", +) + +_RN50x16 = dict( + openai="https://openaipublic.azureedge.net/clip/models/52378b407f34354e150460fe41077663dd5b39c54cd0bfd2b27167a4a06ec9aa/RN50x16.pt", +) + +_RN50x64 = dict( + openai="https://openaipublic.azureedge.net/clip/models/be1cfb55d75a9666199fb2206c106743da0f6468c9d327f3e0d0a543a9919d9c/RN50x64.pt", +) + +_VITB32 = dict( + openai="https://openaipublic.azureedge.net/clip/models/40d365715913c9da98579312b702a82c18be219cc2a73407c4526f58eba950af/ViT-B-32.pt", + laion400m_e31="https://github.com/mlfoundations/open_clip/releases/download/v0.2-weights/vit_b_32-quickgelu-laion400m_e31-d867053b.pt", + laion400m_e32="https://github.com/mlfoundations/open_clip/releases/download/v0.2-weights/vit_b_32-quickgelu-laion400m_e32-46683a32.pt", + laion400m_avg="https://github.com/mlfoundations/open_clip/releases/download/v0.2-weights/vit_b_32-quickgelu-laion400m_avg-8a00ab3c.pt", +) + +_VITB32_quickgelu = dict( + openai="https://openaipublic.azureedge.net/clip/models/40d365715913c9da98579312b702a82c18be219cc2a73407c4526f58eba950af/ViT-B-32.pt", + laion400m_e31="https://github.com/mlfoundations/open_clip/releases/download/v0.2-weights/vit_b_32-quickgelu-laion400m_e31-d867053b.pt", + laion400m_e32="https://github.com/mlfoundations/open_clip/releases/download/v0.2-weights/vit_b_32-quickgelu-laion400m_e32-46683a32.pt", + laion400m_avg="https://github.com/mlfoundations/open_clip/releases/download/v0.2-weights/vit_b_32-quickgelu-laion400m_avg-8a00ab3c.pt", +) + +_VITB16 = dict( + openai="https://openaipublic.azureedge.net/clip/models/5806e77cd80f8b59890b7e101eabd078d9fb84e6937f9e85e4ecb61988df416f/ViT-B-16.pt", +) + +_VITL14 = dict( + openai="https://openaipublic.azureedge.net/clip/models/b8cca3fd41ae0c99ba7e8951adf17d267cdb84cd88be6f7c2e0eca1737a03836/ViT-L-14.pt", +) + +_PRETRAINED = { + "RN50": _RN50, + "RN50-quickgelu": _RN50_quickgelu, + "RN101": _RN101, + "RN101-quickgelu": _RN101_quickgelu, + "RN50x4": _RN50x4, + "RN50x16": _RN50x16, + "ViT-B-32": _VITB32, + "ViT-B-32-quickgelu": _VITB32_quickgelu, + "ViT-B-16": _VITB16, + "ViT-L-14": _VITL14, +} + + +def list_pretrained(as_str: bool = False): + """returns list of pretrained models + Returns a tuple (model_name, pretrain_tag) by default or 'name:tag' if as_str == True + """ + return [ + ":".join([k, t]) if as_str else (k, t) + for k in _PRETRAINED.keys() + for t in _PRETRAINED[k].keys() + ] + + +def list_pretrained_tag_models(tag: str): + """return all models having the specified pretrain tag""" + models = [] + for k in _PRETRAINED.keys(): + if tag in _PRETRAINED[k]: + models.append(k) + return models + + +def list_pretrained_model_tags(model: str): + """return all pretrain tags for the specified model architecture""" + tags = [] + if model in _PRETRAINED: + tags.extend(_PRETRAINED[model].keys()) + return tags + + +def get_pretrained_url(model: str, tag: str): + if model not in _PRETRAINED: + return "" + model_pretrained = _PRETRAINED[model] + if tag not in model_pretrained: + return "" + return model_pretrained[tag] + + +def download_pretrained(url: str, root: str = os.path.expanduser(f"{CACHE_DIR}/clip")): + os.makedirs(root, exist_ok=True) + filename = os.path.basename(url) + + if "openaipublic" in url: + expected_sha256 = url.split("/")[-2] + else: + expected_sha256 = "" + + download_target = os.path.join(root, filename) + + if os.path.exists(download_target) and not os.path.isfile(download_target): + raise RuntimeError(f"{download_target} exists and is not a regular file") + + if os.path.isfile(download_target): + if expected_sha256: + if ( + hashlib.sha256(open(download_target, "rb").read()).hexdigest() + == expected_sha256 + ): + return download_target + else: + warnings.warn( + f"{download_target} exists, but the SHA256 checksum does not match; re-downloading the file" + ) + else: + return download_target + + with urllib.request.urlopen(url) as source, open(download_target, "wb") as output: + with tqdm( + total=int(source.info().get("Content-Length")), + ncols=80, + unit="iB", + unit_scale=True, + ) as loop: + while True: + buffer = source.read(8192) + if not buffer: + break + + output.write(buffer) + loop.update(len(buffer)) + + if ( + expected_sha256 + and hashlib.sha256(open(download_target, "rb").read()).hexdigest() + != expected_sha256 + ): + raise RuntimeError( + f"Model has been downloaded but the SHA256 checksum does not not match" + ) + + return download_target diff --git a/picoaudio/audioldm/clap/open_clip/timm_model.py b/picoaudio/audioldm/clap/open_clip/timm_model.py new file mode 100644 index 0000000000000000000000000000000000000000..c9d1ab4666b5bab5038d44b90c9ddca5087de460 --- /dev/null +++ b/picoaudio/audioldm/clap/open_clip/timm_model.py @@ -0,0 +1,112 @@ +""" timm model adapter + +Wraps timm (https://github.com/rwightman/pytorch-image-models) models for use as a vision tower in CLIP model. +""" +from collections import OrderedDict + +import torch.nn as nn + +try: + import timm + from timm.models.layers import Mlp, to_2tuple + from timm.models.layers.attention_pool2d import RotAttentionPool2d + from timm.models.layers.attention_pool2d import ( + AttentionPool2d as AbsAttentionPool2d, + ) +except ImportError as e: + timm = None + +from .utils import freeze_batch_norm_2d + + +class TimmModel(nn.Module): + """timm model adapter + # FIXME this adapter is a work in progress, may change in ways that break weight compat + """ + + def __init__( + self, + model_name, + embed_dim, + image_size=224, + pool="avg", + proj="linear", + drop=0.0, + pretrained=False, + ): + super().__init__() + if timm is None: + raise RuntimeError("Please `pip install timm` to use timm models.") + + self.image_size = to_2tuple(image_size) + self.trunk = timm.create_model(model_name, pretrained=pretrained) + feat_size = self.trunk.default_cfg.get("pool_size", None) + feature_ndim = 1 if not feat_size else 2 + if pool in ("abs_attn", "rot_attn"): + assert feature_ndim == 2 + # if attn pooling used, remove both classifier and default pool + self.trunk.reset_classifier(0, global_pool="") + else: + # reset global pool if pool config set, otherwise leave as network default + reset_kwargs = dict(global_pool=pool) if pool else {} + self.trunk.reset_classifier(0, **reset_kwargs) + prev_chs = self.trunk.num_features + + head_layers = OrderedDict() + if pool == "abs_attn": + head_layers["pool"] = AbsAttentionPool2d( + prev_chs, feat_size=feat_size, out_features=embed_dim + ) + prev_chs = embed_dim + elif pool == "rot_attn": + head_layers["pool"] = RotAttentionPool2d(prev_chs, out_features=embed_dim) + prev_chs = embed_dim + else: + assert proj, "projection layer needed if non-attention pooling is used." + + # NOTE attention pool ends with a projection layer, so proj should usually be set to '' if such pooling is used + if proj == "linear": + head_layers["drop"] = nn.Dropout(drop) + head_layers["proj"] = nn.Linear(prev_chs, embed_dim) + elif proj == "mlp": + head_layers["mlp"] = Mlp(prev_chs, 2 * embed_dim, embed_dim, drop=drop) + + self.head = nn.Sequential(head_layers) + + def lock(self, unlocked_groups=0, freeze_bn_stats=False): + """lock modules + Args: + unlocked_groups (int): leave last n layer groups unlocked (default: 0) + """ + if not unlocked_groups: + # lock full model + for param in self.trunk.parameters(): + param.requires_grad = False + if freeze_bn_stats: + freeze_batch_norm_2d(self.trunk) + else: + # NOTE: partial freeze requires latest timm (master) branch and is subject to change + try: + # FIXME import here until API stable and in an official release + from timm.models.helpers import group_parameters, group_modules + except ImportError: + raise RuntimeError( + "Please install latest timm `pip install git+https://github.com/rwightman/pytorch-image-models`" + ) + matcher = self.trunk.group_matcher() + gparams = group_parameters(self.trunk, matcher) + max_layer_id = max(gparams.keys()) + max_layer_id = max_layer_id - unlocked_groups + for group_idx in range(max_layer_id + 1): + group = gparams[group_idx] + for param in group: + self.trunk.get_parameter(param).requires_grad = False + if freeze_bn_stats: + gmodules = group_modules(self.trunk, matcher, reverse=True) + gmodules = {k for k, v in gmodules.items() if v <= max_layer_id} + freeze_batch_norm_2d(self.trunk, gmodules) + + def forward(self, x): + x = self.trunk(x) + x = self.head(x) + return x diff --git a/picoaudio/audioldm/clap/open_clip/tokenizer.py b/picoaudio/audioldm/clap/open_clip/tokenizer.py new file mode 100644 index 0000000000000000000000000000000000000000..ee4d28450ec5dd12a79daf38cf3088e9e73c2cd5 --- /dev/null +++ b/picoaudio/audioldm/clap/open_clip/tokenizer.py @@ -0,0 +1,197 @@ +""" CLIP tokenizer + +Copied from https://github.com/openai/CLIP. Originally MIT License, Copyright (c) 2021 OpenAI. +""" +import gzip +import html +import os +from functools import lru_cache +from typing import Union, List + +import ftfy +import regex as re +import torch + + +@lru_cache() +def default_bpe(): + return os.path.join( + os.path.dirname(os.path.abspath(__file__)), "bpe_simple_vocab_16e6.txt.gz" + ) + + +@lru_cache() +def bytes_to_unicode(): + """ + Returns list of utf-8 byte and a corresponding list of unicode strings. + The reversible bpe codes work on unicode strings. + This means you need a large # of unicode characters in your vocab if you want to avoid UNKs. + When you're at something like a 10B token dataset you end up needing around 5K for decent coverage. + This is a signficant percentage of your normal, say, 32K bpe vocab. + To avoid that, we want lookup tables between utf-8 bytes and unicode strings. + And avoids mapping to whitespace/control characters the bpe code barfs on. + """ + bs = ( + list(range(ord("!"), ord("~") + 1)) + + list(range(ord("¡"), ord("¬") + 1)) + + list(range(ord("®"), ord("ÿ") + 1)) + ) + cs = bs[:] + n = 0 + for b in range(2**8): + if b not in bs: + bs.append(b) + cs.append(2**8 + n) + n += 1 + cs = [chr(n) for n in cs] + return dict(zip(bs, cs)) + + +def get_pairs(word): + """Return set of symbol pairs in a word. + Word is represented as tuple of symbols (symbols being variable-length strings). + """ + pairs = set() + prev_char = word[0] + for char in word[1:]: + pairs.add((prev_char, char)) + prev_char = char + return pairs + + +def basic_clean(text): + text = ftfy.fix_text(text) + text = html.unescape(html.unescape(text)) + return text.strip() + + +def whitespace_clean(text): + text = re.sub(r"\s+", " ", text) + text = text.strip() + return text + + +class SimpleTokenizer(object): + def __init__(self, bpe_path: str = default_bpe(), special_tokens=None): + self.byte_encoder = bytes_to_unicode() + self.byte_decoder = {v: k for k, v in self.byte_encoder.items()} + merges = gzip.open(bpe_path).read().decode("utf-8").split("\n") + merges = merges[1 : 49152 - 256 - 2 + 1] + merges = [tuple(merge.split()) for merge in merges] + vocab = list(bytes_to_unicode().values()) + vocab = vocab + [v + "" for v in vocab] + for merge in merges: + vocab.append("".join(merge)) + if not special_tokens: + special_tokens = ["", ""] + else: + special_tokens = ["", ""] + special_tokens + vocab.extend(special_tokens) + self.encoder = dict(zip(vocab, range(len(vocab)))) + self.decoder = {v: k for k, v in self.encoder.items()} + self.bpe_ranks = dict(zip(merges, range(len(merges)))) + self.cache = {t: t for t in special_tokens} + special = "|".join(special_tokens) + self.pat = re.compile( + special + r"""|'s|'t|'re|'ve|'m|'ll|'d|[\p{L}]+|[\p{N}]|[^\s\p{L}\p{N}]+""", + re.IGNORECASE, + ) + + self.vocab_size = len(self.encoder) + self.all_special_ids = [self.encoder[t] for t in special_tokens] + + def bpe(self, token): + if token in self.cache: + return self.cache[token] + word = tuple(token[:-1]) + (token[-1] + "",) + pairs = get_pairs(word) + + if not pairs: + return token + "" + + while True: + bigram = min(pairs, key=lambda pair: self.bpe_ranks.get(pair, float("inf"))) + if bigram not in self.bpe_ranks: + break + first, second = bigram + new_word = [] + i = 0 + while i < len(word): + try: + j = word.index(first, i) + new_word.extend(word[i:j]) + i = j + except: + new_word.extend(word[i:]) + break + + if word[i] == first and i < len(word) - 1 and word[i + 1] == second: + new_word.append(first + second) + i += 2 + else: + new_word.append(word[i]) + i += 1 + new_word = tuple(new_word) + word = new_word + if len(word) == 1: + break + else: + pairs = get_pairs(word) + word = " ".join(word) + self.cache[token] = word + return word + + def encode(self, text): + bpe_tokens = [] + text = whitespace_clean(basic_clean(text)).lower() + for token in re.findall(self.pat, text): + token = "".join(self.byte_encoder[b] for b in token.encode("utf-8")) + bpe_tokens.extend( + self.encoder[bpe_token] for bpe_token in self.bpe(token).split(" ") + ) + return bpe_tokens + + def decode(self, tokens): + text = "".join([self.decoder[token] for token in tokens]) + text = ( + bytearray([self.byte_decoder[c] for c in text]) + .decode("utf-8", errors="replace") + .replace("", " ") + ) + return text + + +_tokenizer = SimpleTokenizer() + + +def tokenize( + texts: Union[str, List[str]], context_length: int = 77 +) -> torch.LongTensor: + """ + Returns the tokenized representation of given input string(s) + + Parameters + ---------- + texts : Union[str, List[str]] + An input string or a list of input strings to tokenize + context_length : int + The context length to use; all CLIP models use 77 as the context length + + Returns + ------- + A two-dimensional tensor containing the resulting tokens, shape = [number of input strings, context_length] + """ + if isinstance(texts, str): + texts = [texts] + + sot_token = _tokenizer.encoder[""] + eot_token = _tokenizer.encoder[""] + all_tokens = [[sot_token] + _tokenizer.encode(text) + [eot_token] for text in texts] + result = torch.zeros(len(all_tokens), context_length, dtype=torch.long) + + for i, tokens in enumerate(all_tokens): + if len(tokens) > context_length: + tokens = tokens[:context_length] # Truncate + result[i, : len(tokens)] = torch.tensor(tokens) + + return result diff --git a/picoaudio/audioldm/clap/open_clip/transform.py b/picoaudio/audioldm/clap/open_clip/transform.py new file mode 100644 index 0000000000000000000000000000000000000000..77aaa722c4a5544ac50de6df35d3e922f63b111d --- /dev/null +++ b/picoaudio/audioldm/clap/open_clip/transform.py @@ -0,0 +1,45 @@ +from torchvision.transforms import ( + Normalize, + Compose, + RandomResizedCrop, + InterpolationMode, + ToTensor, + Resize, + CenterCrop, +) + + +def _convert_to_rgb(image): + return image.convert("RGB") + + +def image_transform( + image_size: int, + is_train: bool, + mean=(0.48145466, 0.4578275, 0.40821073), + std=(0.26862954, 0.26130258, 0.27577711), +): + normalize = Normalize(mean=mean, std=std) + if is_train: + return Compose( + [ + RandomResizedCrop( + image_size, + scale=(0.9, 1.0), + interpolation=InterpolationMode.BICUBIC, + ), + _convert_to_rgb, + ToTensor(), + normalize, + ] + ) + else: + return Compose( + [ + Resize(image_size, interpolation=InterpolationMode.BICUBIC), + CenterCrop(image_size), + _convert_to_rgb, + ToTensor(), + normalize, + ] + ) diff --git a/picoaudio/audioldm/clap/open_clip/utils.py b/picoaudio/audioldm/clap/open_clip/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..34ecbced4cb7e6b6f92154a666e2c7efc7c922c6 --- /dev/null +++ b/picoaudio/audioldm/clap/open_clip/utils.py @@ -0,0 +1,362 @@ +import numpy as np +import torch +from torch import nn as nn +from torchvision.ops.misc import FrozenBatchNorm2d +import logging + +# import h5py +from tqdm import tqdm +import random +import json +import os +import pathlib + +# TODO: (yusong) this not a good place to store those information and does not scale. Need to be fixed later. +dataset_split = { + "audiocaps": ["train", "valid", "test"], + "audioset": ["balanced_train", "unbalanced_train", "eval"], + "BBCSoundEffects": ["train", "test"], + "Clotho": ["train", "test", "valid"], + "free_to_use_sounds": ["train", "test"], + "paramount_motion": ["train", "test"], + "sonniss_game_effects": ["train", "test"], + "wesoundeffects": ["train", "test"], + "MACS": ["train", "test"], + "freesound": ["train", "test"], + "FSD50K": ["train", "test", "valid"], + "fsd50k_class_label": ["train", "test", "valid"], + "esc50": ["train", "test"], + "audiostock": ["train", "test"], + "freesound_no_overlap_noesc50": ["train", "test"], + "epidemic_sound_effects": ["train", "test"], + "VGGSound": ["train", "test"], + "urbansound8k_class_label": ["train", "test"], + "audioset_t5": ["balanced_train", "unbalanced_train", "eval"], + "epidemic_sound_effects_t5": ["train", "test"], + "WavText5K": ["train", "test"], + "esc50_no_overlap": ["train", "test"], + "usd8k_no_overlap": ["train", "test"], + "fsd50k_200_class_label": ["train", "test", "valid"], +} + + +def freeze_batch_norm_2d(module, module_match={}, name=""): + """ + Converts all `BatchNorm2d` and `SyncBatchNorm` layers of provided module into `FrozenBatchNorm2d`. If `module` is + itself an instance of either `BatchNorm2d` or `SyncBatchNorm`, it is converted into `FrozenBatchNorm2d` and + returned. Otherwise, the module is walked recursively and submodules are converted in place. + + Args: + module (torch.nn.Module): Any PyTorch module. + module_match (dict): Dictionary of full module names to freeze (all if empty) + name (str): Full module name (prefix) + + Returns: + torch.nn.Module: Resulting module + + Inspired by https://github.com/pytorch/pytorch/blob/a5895f85be0f10212791145bfedc0261d364f103/torch/nn/modules/batchnorm.py#L762 + """ + res = module + is_match = True + if module_match: + is_match = name in module_match + if is_match and isinstance( + module, (nn.modules.batchnorm.BatchNorm2d, nn.modules.batchnorm.SyncBatchNorm) + ): + res = FrozenBatchNorm2d(module.num_features) + res.num_features = module.num_features + res.affine = module.affine + if module.affine: + res.weight.data = module.weight.data.clone().detach() + res.bias.data = module.bias.data.clone().detach() + res.running_mean.data = module.running_mean.data + res.running_var.data = module.running_var.data + res.eps = module.eps + else: + for child_name, child in module.named_children(): + full_child_name = ".".join([name, child_name]) if name else child_name + new_child = freeze_batch_norm_2d(child, module_match, full_child_name) + if new_child is not child: + res.add_module(child_name, new_child) + return res + + +def exist(dataset_name, dataset_type): + """ + Check if dataset exists + """ + if dataset_type in dataset_split[dataset_name]: + return True + else: + return False + + +def get_tar_path_from_dataset_name( + dataset_names, dataset_types, islocal, dataset_path, proportion=1, full_dataset=None +): + """ + Get tar path from dataset name and type + """ + output = [] + for n in dataset_names: + if full_dataset is not None and n in full_dataset: + current_dataset_types = dataset_split[n] + else: + current_dataset_types = dataset_types + for s in current_dataset_types: + tmp = [] + if islocal: + sizefilepath_ = f"{dataset_path}/{n}/{s}/sizes.json" + if not os.path.exists(sizefilepath_): + sizefilepath_ = f"./json_files/{n}/{s}/sizes.json" + else: + sizefilepath_ = f"./json_files/{n}/{s}/sizes.json" + if not os.path.exists(sizefilepath_): + continue + sizes = json.load(open(sizefilepath_, "r")) + for k in sizes.keys(): + if islocal: + tmp.append(f"{dataset_path}/{n}/{s}/{k}") + else: + tmp.append( + f"pipe:aws s3 --cli-connect-timeout 0 cp s3://s-laion-audio/webdataset_tar/{n}/{s}/{k} -" + ) + if proportion != 1: + tmp = random.sample(tmp, int(proportion * len(tmp))) + output.append(tmp) + return sum(output, []) + + +def get_tar_path_from_txts(txt_path, islocal, proportion=1): + """ + Get tar path from txt path + """ + if isinstance(txt_path, (list, tuple)): + return sum( + [ + get_tar_path_from_txts( + txt_path[i], islocal=islocal, proportion=proportion + ) + for i in range(len(txt_path)) + ], + [], + ) + if isinstance(txt_path, str): + with open(txt_path) as f: + lines = f.readlines() + if islocal: + lines = [ + lines[i] + .split("\n")[0] + .replace("pipe:aws s3 cp s3://s-laion-audio/", "/mnt/audio_clip/") + for i in range(len(lines)) + ] + else: + lines = [ + lines[i].split("\n")[0].replace(".tar", ".tar -") + for i in range(len(lines)) + ] + if proportion != 1: + print("Sampling tars with proportion of {}".format(proportion)) + lines = random.sample(lines, int(proportion * len(lines))) + return lines + + +def get_mix_lambda(mixup_alpha, batch_size): + mixup_lambdas = [ + np.random.beta(mixup_alpha, mixup_alpha, 1)[0] for _ in range(batch_size) + ] + return np.array(mixup_lambdas).astype(np.float32) + + +def do_mixup(x, mixup_lambda): + """ + Args: + x: (batch_size , ...) + mixup_lambda: (batch_size,) + Returns: + out: (batch_size, ...) + """ + out = ( + x.transpose(0, -1) * mixup_lambda + + torch.flip(x, dims=[0]).transpose(0, -1) * (1 - mixup_lambda) + ).transpose(0, -1) + return out + + +def interpolate(x, ratio): + """Interpolate data in time domain. This is used to compensate the + resolution reduction in downsampling of a CNN. + + Args: + x: (batch_size, time_steps, classes_num) + ratio: int, ratio to interpolate + Returns: + upsampled: (batch_size, time_steps * ratio, classes_num) + """ + (batch_size, time_steps, classes_num) = x.shape + upsampled = x[:, :, None, :].repeat(1, 1, ratio, 1) + upsampled = upsampled.reshape(batch_size, time_steps * ratio, classes_num) + return upsampled + + +def pad_framewise_output(framewise_output, frames_num): + """Pad framewise_output to the same length as input frames. The pad value + is the same as the value of the last frame. + Args: + framewise_output: (batch_size, frames_num, classes_num) + frames_num: int, number of frames to pad + Outputs: + output: (batch_size, frames_num, classes_num) + """ + pad = framewise_output[:, -1:, :].repeat( + 1, frames_num - framewise_output.shape[1], 1 + ) + """tensor for padding""" + + output = torch.cat((framewise_output, pad), dim=1) + """(batch_size, frames_num, classes_num)""" + + +# def process_ipc(index_path, classes_num, filename): +# # load data +# logging.info("Load Data...............") +# ipc = [[] for _ in range(classes_num)] +# with h5py.File(index_path, "r") as f: +# for i in tqdm(range(len(f["target"]))): +# t_class = np.where(f["target"][i])[0] +# for t in t_class: +# ipc[t].append(i) +# print(ipc) +# np.save(filename, ipc) +# logging.info("Load Data Succeed...............") + + +def save_to_dict(s, o_={}): + sp = s.split(": ") + o_.update({sp[0]: float(sp[1])}) + return o_ + + +def get_data_from_log(txt_path): + """ + Output dictionary from out.txt log file + """ + with open(txt_path) as f: + lines = f.readlines() + val_data = {} + train_data = {} + train_losses = [] + train_losses_epoch = [] + for i in range(len(lines)): + if "| INFO |" in lines[i]: + if "Eval Epoch" in lines[i]: + if "val_loss" in lines[i]: + # float(regex.sub("", lines[310].split(" ")[-1]).replace(" ", "")) + line = lines[i].split("Eval Epoch: ")[-1] + num_epoch = int(line.split(" ")[0].split(" ")[0]) + d = { + line.split(" ")[0] + .split(" ")[1] + .replace(":", ""): float(line.split(" ")[0].split(" ")[-1]) + } + for i in range(1, len(line.split(" "))): + d = save_to_dict(line.split(" ")[i], d) + val_data[num_epoch] = d + elif "Train Epoch" in lines[i]: + num_epoch = int(lines[i].split("Train Epoch: ")[1][0]) + loss = float(lines[i].split("Loss: ")[-1].split(" (")[0]) + train_losses.append(loss) + train_losses_epoch.append(num_epoch) + for i in range(len(train_losses)): + train_data[i] = { + "num_epoch": train_losses_epoch[i], + "train_loss": train_losses[i], + } + return train_data, val_data + + +def save_p(obj, filename): + import pickle + + try: + from deepdiff import DeepDiff + except: + os.system("pip install deepdiff") + from deepdiff import DeepDiff + with open(filename, "wb") as file: + pickle.dump(obj, file, protocol=pickle.HIGHEST_PROTOCOL) # highest protocol + with open(filename, "rb") as file: + z = pickle.load(file) + assert ( + DeepDiff(obj, z, ignore_string_case=True) == {} + ), "there is something wrong with the saving process" + return + + +def load_p(filename): + import pickle + + with open(filename, "rb") as file: + z = pickle.load(file) + return z + + +def save_json(data, name="data.json"): + import json + + with open(name, "w") as fp: + json.dump(data, fp) + return + + +def load_json(name): + import json + + with open(name, "r") as fp: + data = json.load(fp) + return data + + +from multiprocessing import Process, Manager +from multiprocessing import Process, Value, Array +from ctypes import c_wchar + + +def load_class_label(path): + # https://stackoverflow.com/questions/48004243/how-to-share-large-read-only-dictionary-list-across-processes-in-multiprocessing + # https://stackoverflow.com/questions/45693949/storing-strings-in-a-multiprocessing-sharedctypes-array + out = None + if path is not None: + if pathlib.Path(path).suffix in [".pkl", ".pickle"]: + out = load_p(path) + elif pathlib.Path(path).suffix in [".json", ".txt"]: + out = load_json(path) + elif pathlib.Path(path).suffix in [".npy", ".npz"]: + out = np.load(path) + elif pathlib.Path(path).suffix in [".csv"]: + import pandas as pd + + out = pd.read_csv(path) + return out + # if out is None: + # return None + # else: + # key = Array(c_wchar, '\n'.join(list(out.keys())), lock=False) + # val = Array('i', out.values(), lock=False) + # return (key, val) + + +from torch import optim + + +def get_optimizer(params, lr, betas, eps, momentum, optimizer_name): + if optimizer_name.lower() == "adamw": + optimizer = optim.AdamW(params, lr=lr, betas=betas, eps=eps) + elif optimizer_name.lower() == "sgd": + optimizer = optim.SGD(params, lr=lr, momentum=momentum) + elif optimizer_name.lower() == "adam": + optimizer = optim.Adam(params, lr=lr, betas=betas, eps=eps) + else: + raise ValueError("optimizer name is not correct") + return optimizer diff --git a/picoaudio/audioldm/clap/open_clip/version.py b/picoaudio/audioldm/clap/open_clip/version.py new file mode 100644 index 0000000000000000000000000000000000000000..3ced3581bb601ae91b1e1da4b8f4f520855a065e --- /dev/null +++ b/picoaudio/audioldm/clap/open_clip/version.py @@ -0,0 +1 @@ +__version__ = "0.2.1" diff --git a/picoaudio/audioldm/clap/training/__init__.py b/picoaudio/audioldm/clap/training/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/picoaudio/audioldm/clap/training/audioset_textmap.npy b/picoaudio/audioldm/clap/training/audioset_textmap.npy new file mode 100644 index 0000000000000000000000000000000000000000..3da4c92d3819aaec11e5f576464a9973a6df811b --- /dev/null +++ b/picoaudio/audioldm/clap/training/audioset_textmap.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bada103070d92f9eadd33e1b4f45ec8583f59080ef218c966b43294bd4c86d5b +size 84448 diff --git a/picoaudio/audioldm/clap/training/data.py b/picoaudio/audioldm/clap/training/data.py new file mode 100644 index 0000000000000000000000000000000000000000..a005fee2f51e577446839b8cffd117d9ae93abc9 --- /dev/null +++ b/picoaudio/audioldm/clap/training/data.py @@ -0,0 +1,981 @@ +import ast +import json +import logging +import math +import os +import random + +# import h5py +from dataclasses import dataclass +from audioldm.clap.training.params import parse_args + +# import braceexpand +import numpy as np +import pandas as pd +import torch +import torch.nn as nn +import torch.nn.functional as F +import torchvision.datasets as datasets +import torchvision.transforms + +# import webdataset as wds +from PIL import Image +from torch.utils.data import Dataset, DataLoader, SubsetRandomSampler +from torch.utils.data.distributed import DistributedSampler +from functools import partial +import soundfile as sf +import io +from pathlib import Path + +# import wget + +from audioldm.clap.open_clip.utils import ( + get_tar_path_from_dataset_name, + dataset_split, +) +from audioldm.clap.open_clip.utils import load_p, load_class_label +import copy + +try: + import horovod.torch as hvd +except ImportError: + hvd = None + +try: + import torchaudio +except ImportError: + torchaudio = None + +from audioldm.clap.open_clip import tokenize + + +def tokenizer(text): + return tokenize(text).squeeze(0) + + +from transformers import RobertaTokenizer + +tokenize = RobertaTokenizer.from_pretrained("roberta-base") + + +def tokenizer(text): + result = tokenize( + text, + padding="max_length", + truncation=True, + max_length=77, + return_tensors="pt", + ) + return {k: v.squeeze(0) for k, v in result.items()} + + +# initizlied the audioset map +_AUDIOSET_MAP_PATH = os.path.join(Path(__file__).parent, "audioset_textmap.npy") +_AUDIOSET_MAP = np.load(_AUDIOSET_MAP_PATH, allow_pickle=True) + + +def int16_to_float32(x): + return (x / 32767.0).astype(np.float32) + + +def float32_to_int16(x): + x = np.clip(x, a_min=-1.0, a_max=1.0) + return (x * 32767.0).astype(np.int16) + + +# For Toy Dataset +# class ToyDataset(Dataset): +# def __init__(self, index_path, ipc, config, eval_mode=False): +# """Toy Dataset for testing the audioset input with text labels +# Parameters +# ---------- +# index_path: str +# the link to the h5 file of each audio +# idc: str +# the link to the npy file, the number of samples in each class +# config: dict +# the audio cfg file +# eval_model (bool): to indicate if the dataset is a testing dataset +# """ +# self.audio_cfg = config["audio_cfg"] +# self.text_cfg = config["text_cfg"] +# self.fp = h5py.File(index_path, "r") +# self.ipc = np.load(ipc, allow_pickle=True) +# self.total_size = len(self.fp["audio_name"]) +# self.classes_num = self.audio_cfg["class_num"] +# self.eval_mode = eval_mode + +# if not eval_mode: +# self.generate_queue() +# else: +# self.queue = [] +# for i in range(self.total_size): +# target = self.fp["target"][i] +# if np.sum(target) > 0: +# self.queue.append(i) +# self.total_size = len(self.queue) +# logging.info("total dataset size: %d" % (self.total_size)) +# logging.info("class num: %d" % (self.classes_num)) + +# def time_shifting(self, x): +# frame_num = len(x) +# shift_len = random.randint(0, frame_num - 1) +# new_sample = np.concatenate([x[shift_len:], x[:shift_len]], axis=0) +# return new_sample + +# def generate_queue(self): +# self.queue = [] +# while len(self.queue) < self.total_size: +# class_set = [*range(self.classes_num)] +# random.shuffle(class_set) +# self.queue += [ +# self.ipc[d][random.randint(0, len(self.ipc[d]) - 1)] for d in class_set +# ] +# self.queue = self.queue[: self.total_size] + +# logging.info("queue regenerated:%s" % (self.queue[-5:])) + +# def crop_wav(self, x): +# crop_size = self.audio_cfg["crop_size"] +# crop_pos = random.randint(0, len(x) - crop_size - 1) +# return x[crop_pos : crop_pos + crop_size] + +# def prompt_text(self, target): +# events = _AUDIOSET_MAP[np.where(target > 0)] +# event_text = "The sounds of " + ", ".join(events[:-1]) + " and " + events[-1] +# text = tokenize(event_text)[0] +# return text + +# def __getitem__(self, index): +# """Load waveform, text, and target of an audio clip + +# Parameters +# ---------- +# index: int +# the index number +# Return +# ------ +# output: dict { +# "hdf5_path": str, +# "index_in_hdf5": int, +# "audio_name": str, +# "waveform": list (audio_length,), +# "target": list (class_num, ), +# "text": torch.tensor (context_length,) +# } +# the output dictionary +# """ +# s_index = self.queue[index] + +# audio_name = self.fp["audio_name"][s_index].decode() +# # Hardcode here CHANGE +# hdf5_path = ( +# self.fp["hdf5_path"][s_index] +# .decode() +# .replace( +# "../workspace", +# "/home/la/kechen/Research/ke_zsasp/workspace", +# ) +# ) +# r_idx = self.fp["index_in_hdf5"][s_index] +# target = self.fp["target"][s_index].astype(np.float32) +# text = self.prompt_text(target) +# with h5py.File(hdf5_path, "r") as f: +# waveform = int16_to_float32(f["waveform"][r_idx])[ +# : self.audio_cfg["clip_samples"] +# ] +# assert ( +# len(waveform) == self.audio_cfg["clip_samples"] +# ), "The sample length is not match" +# # Time shift +# # if (self.config.enable_time_shift) and (not self.eval_mode): +# # waveform = self.time_shifting(waveform) +# # # Label Enhance +# # if (self.config.crop_size is not None) and (not self.eval_mode): +# # waveform = self.crop_wav(waveform) +# # # the label enhance rate is fixed 0.5 +# # if (self.config.enable_label_enhance) and (not self.eval_mode) and random.random() < 0.5: +# # kidx = np.where(target)[0] +# # for k in kidx: +# # for add_key in self.class_map[k][1]: +# # target[add_key] = 1.0 +# # if len(self.class_map[k][2]) > 0: +# # add_key = random.choice(self.class_map[k][2]) +# # target[add_key] = 1.0 + +# # missing the text input +# mel_spec = get_mel(torch.from_numpy(waveform), self.audio_cfg)[None, :, :] +# mel_spec = ( +# torch.cat( +# [mel_spec, mel_spec.clone(), mel_spec.clone(), mel_spec.clone()], dim=0 +# ) +# .cpu() +# .numpy() +# ) +# longer = random.choice([True, False]) +# if longer == False: +# mel_spec[1:, :, :] = 0.0 +# data_dict = { +# "hdf5_path": hdf5_path, +# "index_in_hdf5": r_idx, +# "audio_name": audio_name, +# "waveform": waveform, +# "class_label": target, +# "text": text, +# "longer": longer, +# "mel_fusion": mel_spec, +# } +# return data_dict + +# def __len__(self): +# return self.total_size + + +class CsvDataset(Dataset): + def __init__(self, input_filename, transforms, img_key, caption_key, sep="\t"): + logging.debug(f"Loading csv data from {input_filename}.") + df = pd.read_csv(input_filename, sep=sep) + + self.images = df[img_key].tolist() + self.captions = df[caption_key].tolist() + self.transforms = transforms + logging.debug("Done loading data.") + + def __len__(self): + return len(self.captions) + + def __getitem__(self, idx): + images = self.transforms(Image.open(str(self.images[idx]))) + texts = tokenize([str(self.captions[idx])])[0] + return images, texts + + +@dataclass +class DataInfo: + dataloader: DataLoader + sampler: DistributedSampler + + +def preprocess_txt(text): + return tokenize([str(text)])[0] + + +def get_dataset_size(shards, sizefilepath_=None, is_local=True): + if isinstance(shards, list): + size_list = [] + for s in shards: + size_list.append( + get_dataset_size(s, sizefilepath_=sizefilepath_, is_local=is_local)[0] + ) + else: + if not is_local: + for n in dataset_split.keys(): + if n in shards.split("/"): + break + for s in dataset_split[n]: + if s in shards.split("/"): + break + sizefilepath_ = f"./json_files/{n}/{s}/sizes.json" + shards_list = list(braceexpand.braceexpand(shards)) + dir_path = os.path.dirname(shards) + if sizefilepath_ is not None: + sizes = json.load(open(sizefilepath_, "r")) + total_size = sum( + [ + int(sizes[os.path.basename(shard.replace(".tar -", ".tar"))]) + for shard in shards_list + ] + ) + else: + sizes_filename = os.path.join(dir_path, "sizes.json") + len_filename = os.path.join(dir_path, "__len__") + if os.path.exists(sizes_filename): + sizes = json.load(open(sizes_filename, "r")) + total_size = sum( + [int(sizes[os.path.basename(shard)]) for shard in shards_list] + ) + elif os.path.exists(len_filename): + # FIXME this used to be eval(open(...)) but that seemed rather unsafe + total_size = ast.literal_eval(open(len_filename, "r").read()) + else: + raise Exception( + "Cannot find sizes file for dataset. Please specify the path to the file." + ) + # total_size = None # num samples undefined + # some common dataset sizes (at time of authors last download) + # cc3m-train: 2905954 + # cc12m: 10968539 + # LAION-400m: 407332084 + num_shards = len(shards_list) + if isinstance(shards, list): + return sum(size_list), len(shards) + else: + return total_size, num_shards + + +def get_imagenet(args, preprocess_fns, split): + assert split in ["train", "val", "v2"] + is_train = split == "train" + preprocess_train, preprocess_val = preprocess_fns + + if split == "v2": + from imagenetv2_pytorch import ImageNetV2Dataset + + dataset = ImageNetV2Dataset(location=args.imagenet_v2, transform=preprocess_val) + else: + if is_train: + data_path = args.imagenet_train + preprocess_fn = preprocess_train + else: + data_path = args.imagenet_val + preprocess_fn = preprocess_val + assert data_path + + dataset = datasets.ImageFolder(data_path, transform=preprocess_fn) + + if is_train: + idxs = np.zeros(len(dataset.targets)) + target_array = np.array(dataset.targets) + k = 50 + for c in range(1000): + m = target_array == c + n = len(idxs[m]) + arr = np.zeros(n) + arr[:k] = 1 + np.random.shuffle(arr) + idxs[m] = arr + + idxs = idxs.astype("int") + sampler = SubsetRandomSampler(np.where(idxs)[0]) + else: + sampler = None + + dataloader = torch.utils.data.DataLoader( + dataset, + batch_size=args.batch_size, + num_workers=args.workers, + sampler=sampler, + ) + + return DataInfo(dataloader, sampler) + + +def count_samples(dataloader): + os.environ["WDS_EPOCH"] = "0" + n_elements, n_batches = 0, 0 + for images, texts in dataloader: + n_batches += 1 + n_elements += len(images) + assert len(images) == len(texts) + return n_elements, n_batches + + +def filter_no_caption(sample): + return "txt" in sample + + +def log_and_continue(exn): + """Call in an exception handler to ignore any exception, isssue a warning, and continue.""" + logging.warning(f"Handling webdataset error ({repr(exn)}). Ignoring.") + return True + + +_SHARD_SHUFFLE_SIZE = 2000 +_SHARD_SHUFFLE_INITIAL = 500 +_SAMPLE_SHUFFLE_SIZE = 5000 +_SAMPLE_SHUFFLE_INITIAL = 1000 + + +def sample_prop(sizefile, inputs, proportion, is_local=True): + """ + Sample a proportion of the data. + """ + file_path_dict = { + os.path.split(inputs[i])[1]: os.path.split(inputs[i])[0] + for i in range(len(inputs)) + } + sampled_filepath_dict = {} + sampled_size_dict = {} + if not is_local: + if os.path.exists("sizes.json"): + os.remove("sizes.json") + wget.download(sizefile, "sizes.json") + sizefile = "sizes.json" + with open(sizefile, "r", encoding="UTF-8") as f: + load_dict = json.load(f) + L = int(len(file_path_dict) * proportion) + subkeys = random.sample(file_path_dict.keys(), L) + for k in subkeys: + sampled_size_dict[k] = load_dict[k] + sampled_filepath_dict[k] = file_path_dict[k] + return ( + sum(sampled_size_dict.values()), + L, + [os.path.join(v, k) for k, v in sampled_filepath_dict.items()], + sampled_size_dict, + ) + + +def get_mel(audio_data, audio_cfg): + # mel shape: (n_mels, T) + mel = torchaudio.transforms.MelSpectrogram( + sample_rate=audio_cfg["sample_rate"], + n_fft=audio_cfg["window_size"], + win_length=audio_cfg["window_size"], + hop_length=audio_cfg["hop_size"], + center=True, + pad_mode="reflect", + power=2.0, + norm=None, + onesided=True, + n_mels=64, + f_min=audio_cfg["fmin"], + f_max=audio_cfg["fmax"], + ).to(audio_data.device) + mel = mel(audio_data) + # Align to librosa: + # librosa_melspec = librosa.feature.melspectrogram( + # waveform, + # sr=audio_cfg['sample_rate'], + # n_fft=audio_cfg['window_size'], + # hop_length=audio_cfg['hop_size'], + # win_length=audio_cfg['window_size'], + # center=True, + # pad_mode="reflect", + # power=2.0, + # n_mels=64, + # norm=None, + # htk=True, + # f_min=audio_cfg['fmin'], + # f_max=audio_cfg['fmax'] + # ) + # we use log mel spectrogram as input + mel = torchaudio.transforms.AmplitudeToDB(top_db=None)(mel) + return mel.T # (T, n_mels) + + +def get_audio_features( + sample, audio_data, max_len, data_truncating, data_filling, audio_cfg +): + """ + Calculate and add audio features to sample. + Sample: a dict containing all the data of current sample. + audio_data: a tensor of shape (T) containing audio data. + max_len: the maximum length of audio data. + data_truncating: the method of truncating data. + data_filling: the method of filling data. + audio_cfg: a dict containing audio configuration. Comes from model_cfg['audio_cfg']. + """ + with torch.no_grad(): + if len(audio_data) > max_len: + if data_truncating == "rand_trunc": + longer = torch.tensor([True]) + elif data_truncating == "fusion": + # fusion + mel = get_mel(audio_data, audio_cfg) + # split to three parts + chunk_frames = ( + max_len // audio_cfg["hop_size"] + 1 + ) # the +1 related to how the spectrogram is computed + total_frames = mel.shape[0] + if chunk_frames == total_frames: + # there is a corner case where the audio length is + # larger than max_len but smaller than max_len+hop_size. + # In this case, we just use the whole audio. + mel_fusion = torch.stack([mel, mel, mel, mel], dim=0) + sample["mel_fusion"] = mel_fusion + longer = torch.tensor([False]) + else: + ranges = np.array_split( + list(range(0, total_frames - chunk_frames + 1)), 3 + ) + # print('total_frames-chunk_frames:', total_frames-chunk_frames, + # 'len(audio_data):', len(audio_data), + # 'chunk_frames:', chunk_frames, + # 'total_frames:', total_frames) + if len(ranges[1]) == 0: + # if the audio is too short, we just use the first chunk + ranges[1] = [0] + if len(ranges[2]) == 0: + # if the audio is too short, we just use the first chunk + ranges[2] = [0] + # randomly choose index for each part + idx_front = np.random.choice(ranges[0]) + idx_middle = np.random.choice(ranges[1]) + idx_back = np.random.choice(ranges[2]) + # select mel + mel_chunk_front = mel[idx_front : idx_front + chunk_frames, :] + mel_chunk_middle = mel[idx_middle : idx_middle + chunk_frames, :] + mel_chunk_back = mel[idx_back : idx_back + chunk_frames, :] + + # shrink the mel + mel_shrink = torchvision.transforms.Resize(size=[chunk_frames, 64])( + mel[None] + )[0] + # logging.info(f"mel_shrink.shape: {mel_shrink.shape}") + + # stack + mel_fusion = torch.stack( + [mel_chunk_front, mel_chunk_middle, mel_chunk_back, mel_shrink], + dim=0, + ) + sample["mel_fusion"] = mel_fusion + longer = torch.tensor([True]) + else: + raise NotImplementedError( + f"data_truncating {data_truncating} not implemented" + ) + # random crop to max_len (for compatibility) + overflow = len(audio_data) - max_len + idx = np.random.randint(0, overflow + 1) + audio_data = audio_data[idx : idx + max_len] + + else: # padding if too short + if len(audio_data) < max_len: # do nothing if equal + if data_filling == "repeatpad": + n_repeat = int(max_len / len(audio_data)) + audio_data = audio_data.repeat(n_repeat) + # audio_data = audio_data.unsqueeze(0).unsqueeze(0).unsqueeze(0) + # audio_data = F.interpolate(audio_data,size=max_len,mode="bicubic")[0,0,0] + audio_data = F.pad( + audio_data, + (0, max_len - len(audio_data)), + mode="constant", + value=0, + ) + elif data_filling == "pad": + audio_data = F.pad( + audio_data, + (0, max_len - len(audio_data)), + mode="constant", + value=0, + ) + elif data_filling == "repeat": + n_repeat = int(max_len / len(audio_data)) + audio_data = audio_data.repeat(n_repeat + 1)[:max_len] + else: + raise NotImplementedError( + f"data_filling {data_filling} not implemented" + ) + if data_truncating == "fusion": + mel = get_mel(audio_data, audio_cfg) + mel_fusion = torch.stack([mel, mel, mel, mel], dim=0) + sample["mel_fusion"] = mel_fusion + longer = torch.tensor([False]) + + sample["longer"] = longer + sample["waveform"] = audio_data + + return sample + + +def preprocess( + sample, + audio_ext, + text_ext, + max_len, + audio_cfg, + class_index_dict=None, + data_filling="pad", + data_truncating="rand_trunc", + text_augment_selection=None, +): + """ + Preprocess a single sample for wdsdataloader. + """ + audio_data, orig_sr = sf.read(io.BytesIO(sample[audio_ext])) + audio_data = int16_to_float32(float32_to_int16(audio_data)) + audio_data = torch.tensor(audio_data).float() + + # TODO: (yusong) to be include in the future + # # if torchaudio not installed, use soundfile to load audio + # if torchaudio is None: + # audio_data, orig_sr = sf.read(io.BytesIO(sample[audio_ext])) + # audio_data = torch.tensor(audio_data).float() + # else: + # # https://github.com/webdataset/webdataset/blob/main/webdataset/autodecode.py + # with tempfile.TemporaryDirectory() as dirname: + # os.makedirs(dirname, exist_ok=True) + # fname = os.path.join(dirname, f"file.flac") + # with open(fname, "wb") as stream: + # stream.write(sample[audio_ext]) + # audio_data, orig_sr = torchaudio.load(fname) + # audio_data = audio_data[0, :].float() + + sample = get_audio_features( + sample, audio_data, max_len, data_truncating, data_filling, audio_cfg + ) + del sample[audio_ext] + + try: + json_dict_raw = json.loads(sample[text_ext].decode("utf-8")) + except: + print("sample[__url__]:", sample["__url__"]) + + # For selecting augmented text from dataset + if text_augment_selection is None or text_augment_selection == "none": + texts = json_dict_raw["text"] + elif text_augment_selection == "all": + if "text_augment_all" in json_dict_raw.keys(): + texts = json_dict_raw["text_augment_all"] + else: + texts = json_dict_raw["text"] + elif text_augment_selection == "augment_only": + if "text_augment_all" in json_dict_raw.keys(): + if json_dict_raw["text_augment_t5"] is None: + texts = json_dict_raw["text"] + else: + texts = json_dict_raw["text_augment_t5"] + else: + texts = json_dict_raw["text"] + else: + raise NotImplementedError( + f"text_augment_selection {text_augment_selection} not implemented" + ) + sample["full_text"] = texts + + if isinstance(texts, list) and isinstance(texts[0], str) and len(texts) > 1: + texts = random.choice(texts) + sample["raw_text"] = texts + sample["text"] = tokenizer(texts) # text shape: [num_token] + if class_index_dict is not None: + # https://stackoverflow.com/questions/48004243/how-to-share-large-read-only-dictionary-list-across-processes-in-multiprocessing + # https://stackoverflow.com/questions/45693949/storing-strings-in-a-multiprocessing-sharedctypes-array + # key, val = class_index_dict + # key = key[:].split('\n') + # _dict = {k: v for k, v in zip(key, val)} + sample["class_label"] = np.zeros(len(class_index_dict.keys())) + for x in json_dict_raw["tag"]: + sample["class_label"][class_index_dict[x]] = 1 + sample["class_label"] = torch.tensor(sample["class_label"]).float() + del sample[text_ext] + sample["audio_name"] = sample["__key__"].split("/")[-1] + "." + audio_ext + sample["text_name"] = sample["__key__"].split("/")[-1] + "." + text_ext + sample["audio_orig_sr"] = orig_sr + return sample + + +def collate_fn(batch): + """ + Collate function for wdsdataloader. + batch: a list of dict, each dict is a sample + """ + # concatenate values in each dictionary. if it is a tensor, concatenate. if it is a list, extend. + batch_dict = {} + for k in batch[0].keys(): + if isinstance(batch[0][k], dict): # dealwith bert tokenizer output + batch_dict[k] = {} + for kk in batch[0][k].keys(): + tmp = [] + for i in range(len(batch)): + tmp.append(batch[i][k][kk]) + batch_dict[k][kk] = torch.vstack(tmp) + elif isinstance(batch[0][k], torch.Tensor): + batch_dict[k] = torch.stack([sample[k] for sample in batch]) + elif isinstance(batch[0][k], np.ndarray): + batch_dict[k] = torch.tensor(np.stack([sample[k] for sample in batch])) + else: + batch_dict[k] = [sample[k] for sample in batch] + return batch_dict + + +def get_wds_dataset( + args, + model_cfg, + is_train, + audio_ext="flac", + text_ext="json", + max_len=480000, + proportion=1.0, + sizefilepath_=None, + is_local=None, +): + """ + Get a dataset for wdsdataloader. + """ + if is_local is None and (not args.remotedata is None): + is_local = not args.remotedata + + input_shards = args.train_data if is_train else args.val_data + assert input_shards is not None + + if not sizefilepath_ is None: + sizefilepath = sizefilepath_ + else: + sizefilepath = os.path.join(os.path.dirname(input_shards[0]), "sizes.json") + + if proportion != 1.0: + num_samples, num_shards, input_shards, _ = sample_prop( + sizefilepath, input_shards, proportion, is_local=is_local + ) + else: + num_samples, num_shards = get_dataset_size( + input_shards, sizefilepath_=sizefilepath_, is_local=is_local + ) + + if not num_samples: + if is_train: + num_samples = args.train_num_samples + if not num_samples: + raise RuntimeError( + "Currently, number of dataset samples must be specified for training dataset. " + "Please specify via `--train-num-samples` if no dataset length info present." + ) + else: + num_samples = ( + args.val_num_samples or 0 + ) # eval will just exhaust the iterator if not specified + + pipeline = [wds.SimpleShardList(input_shards)] + # at this point we have an iterator over all the shards + # TODO: (yusong): add a if statement of distributed. If not, we don't need to split_by_node + if is_train or args.parallel_eval: + pipeline.extend( + [ + wds.detshuffle( + bufsize=_SHARD_SHUFFLE_SIZE, + initial=_SHARD_SHUFFLE_INITIAL, + seed=args.seed, + ), + wds.split_by_node, + wds.split_by_worker, + # at this point, we have an iterator over the shards assigned to each worker at each node + wds.tarfile_to_samples(handler=log_and_continue), + wds.shuffle( + bufsize=_SAMPLE_SHUFFLE_SIZE, + initial=_SAMPLE_SHUFFLE_INITIAL, + rng=random.Random(args.seed), + ), + # wds.repeatedly, # FIXME determine if this is beneficial + ] + ) + else: + pipeline.extend( + [ + wds.split_by_worker, + # at this point, we have an iterator over the shards assigned to each worker + wds.tarfile_to_samples(handler=log_and_continue), + ] + ) + pipeline.append( + wds.map( + partial( + preprocess, + audio_ext=audio_ext, + text_ext=text_ext, + max_len=max_len, + audio_cfg=model_cfg["audio_cfg"], + class_index_dict=copy.deepcopy(args.class_index_dict), + data_filling=args.data_filling, + data_truncating=args.data_truncating, + text_augment_selection=args.text_augment_selection, + ) + ), + ) + + pipeline.append( + wds.batched( + args.batch_size, + partial=not (is_train or args.parallel_eval), + collation_fn=collate_fn, + ) + ) + + dataset = wds.DataPipeline(*pipeline) + if is_train or args.parallel_eval: + # (yusong): Currently parallel evaluation will be not precise as we are repeat the last few samples. + # (yusong): See comments below. + # roll over and repeat a few samples to get same number of full batches on each node + global_batch_size = args.batch_size * args.world_size + num_batches = math.ceil(num_samples / global_batch_size) + num_workers = max(1, args.workers) + num_worker_batches = math.ceil( + num_batches / num_workers + ) # per dataloader worker + num_batches = num_worker_batches * num_workers + num_samples = num_batches * global_batch_size + dataset = dataset.with_epoch( + num_worker_batches + ) # each worker is iterating over this + else: + # last batches are partial, eval is done on single (master) node + num_batches = math.ceil(num_samples / args.batch_size) + + kwargs = {} + if args.horovod: # multi-node training on summit + kwargs["multiprocessing_context"] = "forkserver" + + dataloader = wds.WebLoader( + dataset, batch_size=None, shuffle=False, num_workers=args.workers, **kwargs + ) + + # FIXME not clear which approach is better, with_epoch before vs after dataloader? + # hoping to resolve via https://github.com/webdataset/webdataset/issues/169 + # if is_train: + # # roll over and repeat a few samples to get same number of full batches on each node + # global_batch_size = args.batch_size * args.world_size + # num_batches = math.ceil(num_samples / global_batch_size) + # num_workers = max(1, args.workers) + # num_batches = math.ceil(num_batches / num_workers) * num_workers + # num_samples = num_batches * global_batch_size + # dataloader = dataloader.with_epoch(num_batches) + # else: + # # last batches are partial, eval is done on single (master) node + # num_batches = math.ceil(num_samples / args.batch_size) + + # add meta-data to dataloader instance for convenience + dataloader.num_batches = num_batches + dataloader.num_samples = num_samples + + return DataInfo(dataloader, None) + + +def wds_batch_list2dict( + batch, + keys=[ + "__url__", + "__key__", + "waveform", + "text", + "raw_text", + "audio_name", + "text_name", + "audio_orig_sr", + ], +): + """ + Return a dictionary of the batch, with keys as the names of the fields. + """ + assert len(keys) == len( + batch + ), "batch must have same number of keys as keys argument" + return {keys[i]: batch[i] for i in range(len(batch))} + + +def get_csv_dataset(args, preprocess_fn, is_train): + input_filename = args.train_data if is_train else args.val_data + assert input_filename + dataset = CsvDataset( + input_filename, + preprocess_fn, + img_key=args.csv_img_key, + caption_key=args.csv_caption_key, + sep=args.csv_separator, + ) + num_samples = len(dataset) + sampler = DistributedSampler(dataset) if args.distributed and is_train else None + shuffle = is_train and sampler is None + + dataloader = DataLoader( + dataset, + batch_size=args.batch_size, + shuffle=shuffle, + num_workers=args.workers, + pin_memory=True, + sampler=sampler, + drop_last=is_train, + ) + dataloader.num_samples = num_samples + dataloader.num_batches = len(dataloader) + + return DataInfo(dataloader, sampler) + + +def get_toy_dataset(args, model_cfg, is_train): + index_path = args.train_data if is_train else args.val_data + ipc_path = args.train_ipc if is_train else args.val_ipc + assert index_path and ipc_path + eval_mode = not is_train + dataset = ToyDataset(index_path, ipc_path, model_cfg, eval_mode=eval_mode) + + num_samples = len(dataset) + sampler = ( + DistributedSampler(dataset, shuffle=False) + if args.distributed and is_train + else None + ) + + dataloader = DataLoader( + dataset, + batch_size=args.batch_size, + shuffle=False, + num_workers=args.workers, + sampler=sampler, + drop_last=is_train, + ) + dataloader.num_samples = num_samples + dataloader.num_batches = len(dataloader) + + return DataInfo(dataloader, sampler) + + +def get_dataset_fn(data_path, dataset_type): + if dataset_type == "webdataset": + return get_wds_dataset + elif dataset_type == "csv": + return get_csv_dataset + elif dataset_type == "auto": + ext = data_path.split(".")[-1] + if ext in ["csv", "tsv"]: + return get_csv_dataset + elif ext in ["tar"]: + return get_wds_dataset + else: + raise ValueError( + f"Tried to figure out dataset type, but failed for extention {ext}." + ) + elif dataset_type == "toy": + return get_toy_dataset + else: + raise ValueError(f"Unsupported dataset type: {dataset_type}") + + +def get_data(args, model_cfg): + data = {} + + args.class_index_dict = load_class_label(args.class_label_path) + + if args.datasetinfos is None: + args.datasetinfos = ["train", "unbalanced_train", "balanced_train"] + if args.dataset_type == "webdataset": + args.train_data = get_tar_path_from_dataset_name( + args.datasetnames, + args.datasetinfos, + islocal=not args.remotedata, + proportion=args.dataset_proportion, + dataset_path=args.datasetpath, + full_dataset=args.full_train_dataset, + ) + + if args.full_train_dataset is None: + args.full_train_dataset = [] + if args.exclude_eval_dataset is None: + args.exclude_eval_dataset = [] + excluded_eval_datasets = args.full_train_dataset + args.exclude_eval_dataset + + val_dataset_names = ( + [n for n in args.datasetnames if n not in excluded_eval_datasets] + if excluded_eval_datasets + else args.datasetnames + ) + args.val_dataset_names = val_dataset_names + args.val_data = get_tar_path_from_dataset_name( + val_dataset_names, + ["valid", "test", "eval"], + islocal=not args.remotedata, + proportion=1, + dataset_path=args.datasetpath, + full_dataset=None, + ) + + if args.train_data: + data["train"] = get_dataset_fn(args.train_data, args.dataset_type)( + args, model_cfg, is_train=True + ) + + if args.val_data: + data["val"] = get_dataset_fn(args.val_data, args.dataset_type)( + args, model_cfg, is_train=False + ) + + return data diff --git a/picoaudio/audioldm/clap/training/distributed.py b/picoaudio/audioldm/clap/training/distributed.py new file mode 100644 index 0000000000000000000000000000000000000000..2fa61f76c5cc3ab9f6a9643042afa8e1f2e1cb7f --- /dev/null +++ b/picoaudio/audioldm/clap/training/distributed.py @@ -0,0 +1,150 @@ +import os + +import torch +import socket + +try: + import horovod.torch as hvd +except ImportError: + hvd = None + + +def is_global_master(args): + return args.rank == 0 + + +def is_local_master(args): + return args.local_rank == 0 + + +def is_master(args, local=False): + return is_local_master(args) if local else is_global_master(args) + + +def is_using_horovod(): + # NOTE w/ horovod run, OMPI vars should be set, but w/ SLURM PMI vars will be set + # Differentiating between horovod and DDP use via SLURM may not be possible, so horovod arg still required... + ompi_vars = ["OMPI_COMM_WORLD_RANK", "OMPI_COMM_WORLD_SIZE"] + pmi_vars = ["PMI_RANK", "PMI_SIZE"] + if all([var in os.environ for var in ompi_vars]) or all( + [var in os.environ for var in pmi_vars] + ): + return True + else: + return False + + +def is_using_distributed(): + if "WORLD_SIZE" in os.environ: + return int(os.environ["WORLD_SIZE"]) > 1 + if "SLURM_NTASKS" in os.environ: + return int(os.environ["SLURM_NTASKS"]) > 1 + return False + + +def world_info_from_env(): + local_rank = 0 + for v in ( + "SLURM_LOCALID", + "MPI_LOCALRANKID", + "OMPI_COMM_WORLD_LOCAL_RANK", + "LOCAL_RANK", + ): + if v in os.environ: + local_rank = int(os.environ[v]) + break + global_rank = 0 + for v in ("SLURM_PROCID", "PMI_RANK", "OMPI_COMM_WORLD_RANK", "RANK"): + if v in os.environ: + global_rank = int(os.environ[v]) + break + world_size = 1 + for v in ("SLURM_NTASKS", "PMI_SIZE", "OMPI_COMM_WORLD_SIZE", "WORLD_SIZE"): + if v in os.environ: + world_size = int(os.environ[v]) + break + + return local_rank, global_rank, world_size + + +def init_distributed_device(args): + # Distributed training = training on more than one GPU. + # Works in both single and multi-node scenarios. + args.distributed = False + args.world_size = 1 + args.rank = 0 # global rank + args.local_rank = 0 + if args.horovod: + assert hvd is not None, "Horovod is not installed" + hvd.init() + world_size = int(os.environ["OMPI_COMM_WORLD_SIZE"]) + world_rank = int(os.environ["OMPI_COMM_WORLD_RANK"]) + local_rank = int(os.environ["OMPI_COMM_WORLD_LOCAL_RANK"]) + args.local_rank = local_rank + args.rank = world_rank + args.world_size = world_size + # args.local_rank = int(hvd.local_rank()) + # args.rank = hvd.rank() + # args.world_size = hvd.size() + args.distributed = True + os.environ["LOCAL_RANK"] = str(args.local_rank) + os.environ["RANK"] = str(args.rank) + os.environ["WORLD_SIZE"] = str(args.world_size) + print( + f"Distributed training: local_rank={args.local_rank}, " + f"rank={args.rank}, world_size={args.world_size}, " + f"hostname={socket.gethostname()}, pid={os.getpid()}" + ) + elif is_using_distributed(): + if "SLURM_PROCID" in os.environ: + # DDP via SLURM + args.local_rank, args.rank, args.world_size = world_info_from_env() + # SLURM var -> torch.distributed vars in case needed + os.environ["LOCAL_RANK"] = str(args.local_rank) + os.environ["RANK"] = str(args.rank) + os.environ["WORLD_SIZE"] = str(args.world_size) + torch.distributed.init_process_group( + backend=args.dist_backend, + init_method=args.dist_url, + world_size=args.world_size, + rank=args.rank, + ) + elif "OMPI_COMM_WORLD_SIZE" in os.environ: # using Summit cluster + world_size = int(os.environ["OMPI_COMM_WORLD_SIZE"]) + world_rank = int(os.environ["OMPI_COMM_WORLD_RANK"]) + local_rank = int(os.environ["OMPI_COMM_WORLD_LOCAL_RANK"]) + args.local_rank = local_rank + args.rank = world_rank + args.world_size = world_size + torch.distributed.init_process_group( + backend=args.dist_backend, + init_method=args.dist_url, + world_size=args.world_size, + rank=args.rank, + ) + else: + # DDP via torchrun, torch.distributed.launch + args.local_rank, _, _ = world_info_from_env() + torch.distributed.init_process_group( + backend=args.dist_backend, init_method=args.dist_url + ) + args.world_size = torch.distributed.get_world_size() + args.rank = torch.distributed.get_rank() + args.distributed = True + print( + f"Distributed training: local_rank={args.local_rank}, " + f"rank={args.rank}, world_size={args.world_size}, " + f"hostname={socket.gethostname()}, pid={os.getpid()}" + ) + + if torch.cuda.is_available(): + if args.distributed and not args.no_set_device_rank: + device = "cuda:%d" % args.local_rank + else: + device = "cuda:0" + torch.cuda.set_device(device) + else: + device = "cpu" + args.device = device + device = torch.device(device) + return device diff --git a/picoaudio/audioldm/clap/training/imagenet_zeroshot_data.py b/picoaudio/audioldm/clap/training/imagenet_zeroshot_data.py new file mode 100644 index 0000000000000000000000000000000000000000..d32e55328d6799ccb8d61625f43abb80a33d6c17 --- /dev/null +++ b/picoaudio/audioldm/clap/training/imagenet_zeroshot_data.py @@ -0,0 +1,1088 @@ +# NOTE: This script is currently not supported for CLAP. + +imagenet_classnames = [ + "tench", + "goldfish", + "great white shark", + "tiger shark", + "hammerhead shark", + "electric ray", + "stingray", + "rooster", + "hen", + "ostrich", + "brambling", + "goldfinch", + "house finch", + "junco", + "indigo bunting", + "American robin", + "bulbul", + "jay", + "magpie", + "chickadee", + "American dipper", + "kite (bird of prey)", + "bald eagle", + "vulture", + "great grey owl", + "fire salamander", + "smooth newt", + "newt", + "spotted salamander", + "axolotl", + "American bullfrog", + "tree frog", + "tailed frog", + "loggerhead sea turtle", + "leatherback sea turtle", + "mud turtle", + "terrapin", + "box turtle", + "banded gecko", + "green iguana", + "Carolina anole", + "desert grassland whiptail lizard", + "agama", + "frilled-necked lizard", + "alligator lizard", + "Gila monster", + "European green lizard", + "chameleon", + "Komodo dragon", + "Nile crocodile", + "American alligator", + "triceratops", + "worm snake", + "ring-necked snake", + "eastern hog-nosed snake", + "smooth green snake", + "kingsnake", + "garter snake", + "water snake", + "vine snake", + "night snake", + "boa constrictor", + "African rock python", + "Indian cobra", + "green mamba", + "sea snake", + "Saharan horned viper", + "eastern diamondback rattlesnake", + "sidewinder rattlesnake", + "trilobite", + "harvestman", + "scorpion", + "yellow garden spider", + "barn spider", + "European garden spider", + "southern black widow", + "tarantula", + "wolf spider", + "tick", + "centipede", + "black grouse", + "ptarmigan", + "ruffed grouse", + "prairie grouse", + "peafowl", + "quail", + "partridge", + "african grey parrot", + "macaw", + "sulphur-crested cockatoo", + "lorikeet", + "coucal", + "bee eater", + "hornbill", + "hummingbird", + "jacamar", + "toucan", + "duck", + "red-breasted merganser", + "goose", + "black swan", + "tusker", + "echidna", + "platypus", + "wallaby", + "koala", + "wombat", + "jellyfish", + "sea anemone", + "brain coral", + "flatworm", + "nematode", + "conch", + "snail", + "slug", + "sea slug", + "chiton", + "chambered nautilus", + "Dungeness crab", + "rock crab", + "fiddler crab", + "red king crab", + "American lobster", + "spiny lobster", + "crayfish", + "hermit crab", + "isopod", + "white stork", + "black stork", + "spoonbill", + "flamingo", + "little blue heron", + "great egret", + "bittern bird", + "crane bird", + "limpkin", + "common gallinule", + "American coot", + "bustard", + "ruddy turnstone", + "dunlin", + "common redshank", + "dowitcher", + "oystercatcher", + "pelican", + "king penguin", + "albatross", + "grey whale", + "killer whale", + "dugong", + "sea lion", + "Chihuahua", + "Japanese Chin", + "Maltese", + "Pekingese", + "Shih Tzu", + "King Charles Spaniel", + "Papillon", + "toy terrier", + "Rhodesian Ridgeback", + "Afghan Hound", + "Basset Hound", + "Beagle", + "Bloodhound", + "Bluetick Coonhound", + "Black and Tan Coonhound", + "Treeing Walker Coonhound", + "English foxhound", + "Redbone Coonhound", + "borzoi", + "Irish Wolfhound", + "Italian Greyhound", + "Whippet", + "Ibizan Hound", + "Norwegian Elkhound", + "Otterhound", + "Saluki", + "Scottish Deerhound", + "Weimaraner", + "Staffordshire Bull Terrier", + "American Staffordshire Terrier", + "Bedlington Terrier", + "Border Terrier", + "Kerry Blue Terrier", + "Irish Terrier", + "Norfolk Terrier", + "Norwich Terrier", + "Yorkshire Terrier", + "Wire Fox Terrier", + "Lakeland Terrier", + "Sealyham Terrier", + "Airedale Terrier", + "Cairn Terrier", + "Australian Terrier", + "Dandie Dinmont Terrier", + "Boston Terrier", + "Miniature Schnauzer", + "Giant Schnauzer", + "Standard Schnauzer", + "Scottish Terrier", + "Tibetan Terrier", + "Australian Silky Terrier", + "Soft-coated Wheaten Terrier", + "West Highland White Terrier", + "Lhasa Apso", + "Flat-Coated Retriever", + "Curly-coated Retriever", + "Golden Retriever", + "Labrador Retriever", + "Chesapeake Bay Retriever", + "German Shorthaired Pointer", + "Vizsla", + "English Setter", + "Irish Setter", + "Gordon Setter", + "Brittany dog", + "Clumber Spaniel", + "English Springer Spaniel", + "Welsh Springer Spaniel", + "Cocker Spaniel", + "Sussex Spaniel", + "Irish Water Spaniel", + "Kuvasz", + "Schipperke", + "Groenendael dog", + "Malinois", + "Briard", + "Australian Kelpie", + "Komondor", + "Old English Sheepdog", + "Shetland Sheepdog", + "collie", + "Border Collie", + "Bouvier des Flandres dog", + "Rottweiler", + "German Shepherd Dog", + "Dobermann", + "Miniature Pinscher", + "Greater Swiss Mountain Dog", + "Bernese Mountain Dog", + "Appenzeller Sennenhund", + "Entlebucher Sennenhund", + "Boxer", + "Bullmastiff", + "Tibetan Mastiff", + "French Bulldog", + "Great Dane", + "St. Bernard", + "husky", + "Alaskan Malamute", + "Siberian Husky", + "Dalmatian", + "Affenpinscher", + "Basenji", + "pug", + "Leonberger", + "Newfoundland dog", + "Great Pyrenees dog", + "Samoyed", + "Pomeranian", + "Chow Chow", + "Keeshond", + "brussels griffon", + "Pembroke Welsh Corgi", + "Cardigan Welsh Corgi", + "Toy Poodle", + "Miniature Poodle", + "Standard Poodle", + "Mexican hairless dog (xoloitzcuintli)", + "grey wolf", + "Alaskan tundra wolf", + "red wolf or maned wolf", + "coyote", + "dingo", + "dhole", + "African wild dog", + "hyena", + "red fox", + "kit fox", + "Arctic fox", + "grey fox", + "tabby cat", + "tiger cat", + "Persian cat", + "Siamese cat", + "Egyptian Mau", + "cougar", + "lynx", + "leopard", + "snow leopard", + "jaguar", + "lion", + "tiger", + "cheetah", + "brown bear", + "American black bear", + "polar bear", + "sloth bear", + "mongoose", + "meerkat", + "tiger beetle", + "ladybug", + "ground beetle", + "longhorn beetle", + "leaf beetle", + "dung beetle", + "rhinoceros beetle", + "weevil", + "fly", + "bee", + "ant", + "grasshopper", + "cricket insect", + "stick insect", + "cockroach", + "praying mantis", + "cicada", + "leafhopper", + "lacewing", + "dragonfly", + "damselfly", + "red admiral butterfly", + "ringlet butterfly", + "monarch butterfly", + "small white butterfly", + "sulphur butterfly", + "gossamer-winged butterfly", + "starfish", + "sea urchin", + "sea cucumber", + "cottontail rabbit", + "hare", + "Angora rabbit", + "hamster", + "porcupine", + "fox squirrel", + "marmot", + "beaver", + "guinea pig", + "common sorrel horse", + "zebra", + "pig", + "wild boar", + "warthog", + "hippopotamus", + "ox", + "water buffalo", + "bison", + "ram (adult male sheep)", + "bighorn sheep", + "Alpine ibex", + "hartebeest", + "impala (antelope)", + "gazelle", + "arabian camel", + "llama", + "weasel", + "mink", + "European polecat", + "black-footed ferret", + "otter", + "skunk", + "badger", + "armadillo", + "three-toed sloth", + "orangutan", + "gorilla", + "chimpanzee", + "gibbon", + "siamang", + "guenon", + "patas monkey", + "baboon", + "macaque", + "langur", + "black-and-white colobus", + "proboscis monkey", + "marmoset", + "white-headed capuchin", + "howler monkey", + "titi monkey", + "Geoffroy's spider monkey", + "common squirrel monkey", + "ring-tailed lemur", + "indri", + "Asian elephant", + "African bush elephant", + "red panda", + "giant panda", + "snoek fish", + "eel", + "silver salmon", + "rock beauty fish", + "clownfish", + "sturgeon", + "gar fish", + "lionfish", + "pufferfish", + "abacus", + "abaya", + "academic gown", + "accordion", + "acoustic guitar", + "aircraft carrier", + "airliner", + "airship", + "altar", + "ambulance", + "amphibious vehicle", + "analog clock", + "apiary", + "apron", + "trash can", + "assault rifle", + "backpack", + "bakery", + "balance beam", + "balloon", + "ballpoint pen", + "Band-Aid", + "banjo", + "baluster / handrail", + "barbell", + "barber chair", + "barbershop", + "barn", + "barometer", + "barrel", + "wheelbarrow", + "baseball", + "basketball", + "bassinet", + "bassoon", + "swimming cap", + "bath towel", + "bathtub", + "station wagon", + "lighthouse", + "beaker", + "military hat (bearskin or shako)", + "beer bottle", + "beer glass", + "bell tower", + "baby bib", + "tandem bicycle", + "bikini", + "ring binder", + "binoculars", + "birdhouse", + "boathouse", + "bobsleigh", + "bolo tie", + "poke bonnet", + "bookcase", + "bookstore", + "bottle cap", + "hunting bow", + "bow tie", + "brass memorial plaque", + "bra", + "breakwater", + "breastplate", + "broom", + "bucket", + "buckle", + "bulletproof vest", + "high-speed train", + "butcher shop", + "taxicab", + "cauldron", + "candle", + "cannon", + "canoe", + "can opener", + "cardigan", + "car mirror", + "carousel", + "tool kit", + "cardboard box / carton", + "car wheel", + "automated teller machine", + "cassette", + "cassette player", + "castle", + "catamaran", + "CD player", + "cello", + "mobile phone", + "chain", + "chain-link fence", + "chain mail", + "chainsaw", + "storage chest", + "chiffonier", + "bell or wind chime", + "china cabinet", + "Christmas stocking", + "church", + "movie theater", + "cleaver", + "cliff dwelling", + "cloak", + "clogs", + "cocktail shaker", + "coffee mug", + "coffeemaker", + "spiral or coil", + "combination lock", + "computer keyboard", + "candy store", + "container ship", + "convertible", + "corkscrew", + "cornet", + "cowboy boot", + "cowboy hat", + "cradle", + "construction crane", + "crash helmet", + "crate", + "infant bed", + "Crock Pot", + "croquet ball", + "crutch", + "cuirass", + "dam", + "desk", + "desktop computer", + "rotary dial telephone", + "diaper", + "digital clock", + "digital watch", + "dining table", + "dishcloth", + "dishwasher", + "disc brake", + "dock", + "dog sled", + "dome", + "doormat", + "drilling rig", + "drum", + "drumstick", + "dumbbell", + "Dutch oven", + "electric fan", + "electric guitar", + "electric locomotive", + "entertainment center", + "envelope", + "espresso machine", + "face powder", + "feather boa", + "filing cabinet", + "fireboat", + "fire truck", + "fire screen", + "flagpole", + "flute", + "folding chair", + "football helmet", + "forklift", + "fountain", + "fountain pen", + "four-poster bed", + "freight car", + "French horn", + "frying pan", + "fur coat", + "garbage truck", + "gas mask or respirator", + "gas pump", + "goblet", + "go-kart", + "golf ball", + "golf cart", + "gondola", + "gong", + "gown", + "grand piano", + "greenhouse", + "radiator grille", + "grocery store", + "guillotine", + "hair clip", + "hair spray", + "half-track", + "hammer", + "hamper", + "hair dryer", + "hand-held computer", + "handkerchief", + "hard disk drive", + "harmonica", + "harp", + "combine harvester", + "hatchet", + "holster", + "home theater", + "honeycomb", + "hook", + "hoop skirt", + "gymnastic horizontal bar", + "horse-drawn vehicle", + "hourglass", + "iPod", + "clothes iron", + "carved pumpkin", + "jeans", + "jeep", + "T-shirt", + "jigsaw puzzle", + "rickshaw", + "joystick", + "kimono", + "knee pad", + "knot", + "lab coat", + "ladle", + "lampshade", + "laptop computer", + "lawn mower", + "lens cap", + "letter opener", + "library", + "lifeboat", + "lighter", + "limousine", + "ocean liner", + "lipstick", + "slip-on shoe", + "lotion", + "music speaker", + "loupe magnifying glass", + "sawmill", + "magnetic compass", + "messenger bag", + "mailbox", + "tights", + "one-piece bathing suit", + "manhole cover", + "maraca", + "marimba", + "mask", + "matchstick", + "maypole", + "maze", + "measuring cup", + "medicine cabinet", + "megalith", + "microphone", + "microwave oven", + "military uniform", + "milk can", + "minibus", + "miniskirt", + "minivan", + "missile", + "mitten", + "mixing bowl", + "mobile home", + "ford model t", + "modem", + "monastery", + "monitor", + "moped", + "mortar and pestle", + "graduation cap", + "mosque", + "mosquito net", + "vespa", + "mountain bike", + "tent", + "computer mouse", + "mousetrap", + "moving van", + "muzzle", + "metal nail", + "neck brace", + "necklace", + "baby pacifier", + "notebook computer", + "obelisk", + "oboe", + "ocarina", + "odometer", + "oil filter", + "pipe organ", + "oscilloscope", + "overskirt", + "bullock cart", + "oxygen mask", + "product packet / packaging", + "paddle", + "paddle wheel", + "padlock", + "paintbrush", + "pajamas", + "palace", + "pan flute", + "paper towel", + "parachute", + "parallel bars", + "park bench", + "parking meter", + "railroad car", + "patio", + "payphone", + "pedestal", + "pencil case", + "pencil sharpener", + "perfume", + "Petri dish", + "photocopier", + "plectrum", + "Pickelhaube", + "picket fence", + "pickup truck", + "pier", + "piggy bank", + "pill bottle", + "pillow", + "ping-pong ball", + "pinwheel", + "pirate ship", + "drink pitcher", + "block plane", + "planetarium", + "plastic bag", + "plate rack", + "farm plow", + "plunger", + "Polaroid camera", + "pole", + "police van", + "poncho", + "pool table", + "soda bottle", + "plant pot", + "potter's wheel", + "power drill", + "prayer rug", + "printer", + "prison", + "missile", + "projector", + "hockey puck", + "punching bag", + "purse", + "quill", + "quilt", + "race car", + "racket", + "radiator", + "radio", + "radio telescope", + "rain barrel", + "recreational vehicle", + "fishing casting reel", + "reflex camera", + "refrigerator", + "remote control", + "restaurant", + "revolver", + "rifle", + "rocking chair", + "rotisserie", + "eraser", + "rugby ball", + "ruler measuring stick", + "sneaker", + "safe", + "safety pin", + "salt shaker", + "sandal", + "sarong", + "saxophone", + "scabbard", + "weighing scale", + "school bus", + "schooner", + "scoreboard", + "CRT monitor", + "screw", + "screwdriver", + "seat belt", + "sewing machine", + "shield", + "shoe store", + "shoji screen / room divider", + "shopping basket", + "shopping cart", + "shovel", + "shower cap", + "shower curtain", + "ski", + "balaclava ski mask", + "sleeping bag", + "slide rule", + "sliding door", + "slot machine", + "snorkel", + "snowmobile", + "snowplow", + "soap dispenser", + "soccer ball", + "sock", + "solar thermal collector", + "sombrero", + "soup bowl", + "keyboard space bar", + "space heater", + "space shuttle", + "spatula", + "motorboat", + "spider web", + "spindle", + "sports car", + "spotlight", + "stage", + "steam locomotive", + "through arch bridge", + "steel drum", + "stethoscope", + "scarf", + "stone wall", + "stopwatch", + "stove", + "strainer", + "tram", + "stretcher", + "couch", + "stupa", + "submarine", + "suit", + "sundial", + "sunglasses", + "sunglasses", + "sunscreen", + "suspension bridge", + "mop", + "sweatshirt", + "swim trunks / shorts", + "swing", + "electrical switch", + "syringe", + "table lamp", + "tank", + "tape player", + "teapot", + "teddy bear", + "television", + "tennis ball", + "thatched roof", + "front curtain", + "thimble", + "threshing machine", + "throne", + "tile roof", + "toaster", + "tobacco shop", + "toilet seat", + "torch", + "totem pole", + "tow truck", + "toy store", + "tractor", + "semi-trailer truck", + "tray", + "trench coat", + "tricycle", + "trimaran", + "tripod", + "triumphal arch", + "trolleybus", + "trombone", + "hot tub", + "turnstile", + "typewriter keyboard", + "umbrella", + "unicycle", + "upright piano", + "vacuum cleaner", + "vase", + "vaulted or arched ceiling", + "velvet fabric", + "vending machine", + "vestment", + "viaduct", + "violin", + "volleyball", + "waffle iron", + "wall clock", + "wallet", + "wardrobe", + "military aircraft", + "sink", + "washing machine", + "water bottle", + "water jug", + "water tower", + "whiskey jug", + "whistle", + "hair wig", + "window screen", + "window shade", + "Windsor tie", + "wine bottle", + "airplane wing", + "wok", + "wooden spoon", + "wool", + "split-rail fence", + "shipwreck", + "sailboat", + "yurt", + "website", + "comic book", + "crossword", + "traffic or street sign", + "traffic light", + "dust jacket", + "menu", + "plate", + "guacamole", + "consomme", + "hot pot", + "trifle", + "ice cream", + "popsicle", + "baguette", + "bagel", + "pretzel", + "cheeseburger", + "hot dog", + "mashed potatoes", + "cabbage", + "broccoli", + "cauliflower", + "zucchini", + "spaghetti squash", + "acorn squash", + "butternut squash", + "cucumber", + "artichoke", + "bell pepper", + "cardoon", + "mushroom", + "Granny Smith apple", + "strawberry", + "orange", + "lemon", + "fig", + "pineapple", + "banana", + "jackfruit", + "cherimoya (custard apple)", + "pomegranate", + "hay", + "carbonara", + "chocolate syrup", + "dough", + "meatloaf", + "pizza", + "pot pie", + "burrito", + "red wine", + "espresso", + "tea cup", + "eggnog", + "mountain", + "bubble", + "cliff", + "coral reef", + "geyser", + "lakeshore", + "promontory", + "sandbar", + "beach", + "valley", + "volcano", + "baseball player", + "bridegroom", + "scuba diver", + "rapeseed", + "daisy", + "yellow lady's slipper", + "corn", + "acorn", + "rose hip", + "horse chestnut seed", + "coral fungus", + "agaric", + "gyromitra", + "stinkhorn mushroom", + "earth star fungus", + "hen of the woods mushroom", + "bolete", + "corn cob", + "toilet paper", +] + + +openai_imagenet_template = [ + lambda c: f"a bad photo of a {c}.", + lambda c: f"a photo of many {c}.", + lambda c: f"a sculpture of a {c}.", + lambda c: f"a photo of the hard to see {c}.", + lambda c: f"a low resolution photo of the {c}.", + lambda c: f"a rendering of a {c}.", + lambda c: f"graffiti of a {c}.", + lambda c: f"a bad photo of the {c}.", + lambda c: f"a cropped photo of the {c}.", + lambda c: f"a tattoo of a {c}.", + lambda c: f"the embroidered {c}.", + lambda c: f"a photo of a hard to see {c}.", + lambda c: f"a bright photo of a {c}.", + lambda c: f"a photo of a clean {c}.", + lambda c: f"a photo of a dirty {c}.", + lambda c: f"a dark photo of the {c}.", + lambda c: f"a drawing of a {c}.", + lambda c: f"a photo of my {c}.", + lambda c: f"the plastic {c}.", + lambda c: f"a photo of the cool {c}.", + lambda c: f"a close-up photo of a {c}.", + lambda c: f"a black and white photo of the {c}.", + lambda c: f"a painting of the {c}.", + lambda c: f"a painting of a {c}.", + lambda c: f"a pixelated photo of the {c}.", + lambda c: f"a sculpture of the {c}.", + lambda c: f"a bright photo of the {c}.", + lambda c: f"a cropped photo of a {c}.", + lambda c: f"a plastic {c}.", + lambda c: f"a photo of the dirty {c}.", + lambda c: f"a jpeg corrupted photo of a {c}.", + lambda c: f"a blurry photo of the {c}.", + lambda c: f"a photo of the {c}.", + lambda c: f"a good photo of the {c}.", + lambda c: f"a rendering of the {c}.", + lambda c: f"a {c} in a video game.", + lambda c: f"a photo of one {c}.", + lambda c: f"a doodle of a {c}.", + lambda c: f"a close-up photo of the {c}.", + lambda c: f"a photo of a {c}.", + lambda c: f"the origami {c}.", + lambda c: f"the {c} in a video game.", + lambda c: f"a sketch of a {c}.", + lambda c: f"a doodle of the {c}.", + lambda c: f"a origami {c}.", + lambda c: f"a low resolution photo of a {c}.", + lambda c: f"the toy {c}.", + lambda c: f"a rendition of the {c}.", + lambda c: f"a photo of the clean {c}.", + lambda c: f"a photo of a large {c}.", + lambda c: f"a rendition of a {c}.", + lambda c: f"a photo of a nice {c}.", + lambda c: f"a photo of a weird {c}.", + lambda c: f"a blurry photo of a {c}.", + lambda c: f"a cartoon {c}.", + lambda c: f"art of a {c}.", + lambda c: f"a sketch of the {c}.", + lambda c: f"a embroidered {c}.", + lambda c: f"a pixelated photo of a {c}.", + lambda c: f"itap of the {c}.", + lambda c: f"a jpeg corrupted photo of the {c}.", + lambda c: f"a good photo of a {c}.", + lambda c: f"a plushie {c}.", + lambda c: f"a photo of the nice {c}.", + lambda c: f"a photo of the small {c}.", + lambda c: f"a photo of the weird {c}.", + lambda c: f"the cartoon {c}.", + lambda c: f"art of the {c}.", + lambda c: f"a drawing of the {c}.", + lambda c: f"a photo of the large {c}.", + lambda c: f"a black and white photo of a {c}.", + lambda c: f"the plushie {c}.", + lambda c: f"a dark photo of a {c}.", + lambda c: f"itap of a {c}.", + lambda c: f"graffiti of the {c}.", + lambda c: f"a toy {c}.", + lambda c: f"itap of my {c}.", + lambda c: f"a photo of a cool {c}.", + lambda c: f"a photo of a small {c}.", + lambda c: f"a tattoo of the {c}.", +] diff --git a/picoaudio/audioldm/clap/training/infer_demo.py b/picoaudio/audioldm/clap/training/infer_demo.py new file mode 100644 index 0000000000000000000000000000000000000000..7d1f4784898dbfeb69affefb6f624711adc8cb42 --- /dev/null +++ b/picoaudio/audioldm/clap/training/infer_demo.py @@ -0,0 +1,105 @@ +import sys + +import os +import torch +import librosa +from open_clip import create_model +from training.data import get_audio_features +from training.data import int16_to_float32, float32_to_int16 +from transformers import RobertaTokenizer + +tokenize = RobertaTokenizer.from_pretrained("roberta-base") + + +def tokenizer(text): + result = tokenize( + text, + padding="max_length", + truncation=True, + max_length=77, + return_tensors="pt", + ) + return {k: v.squeeze(0) for k, v in result.items()} + + +PRETRAINED_PATH = "/mnt/fast/nobackup/users/hl01486/projects/contrastive_pretraining/CLAP/assets/checkpoints/epoch_top_0_audioset_no_fusion.pt" +WAVE_48k_PATH = "/mnt/fast/nobackup/users/hl01486/projects/contrastive_pretraining/CLAP/assets/audio/machine.wav" + + +def infer_text(): + device = "cuda:0" if torch.cuda.is_available() else "cpu" + precision = "fp32" + amodel = "HTSAT-tiny" # or 'PANN-14' + tmodel = "roberta" # the best text encoder in our training + enable_fusion = False # False if you do not want to use the fusion model + fusion_type = "aff_2d" + pretrained = PRETRAINED_PATH + + model, model_cfg = create_model( + amodel, + tmodel, + pretrained, + precision=precision, + device=device, + enable_fusion=enable_fusion, + fusion_type=fusion_type, + ) + # load the text, can be a list (i.e. batch size) + text_data = ["I love the contrastive learning", "I love the pretrain model"] + # tokenize for roberta, if you want to tokenize for another text encoder, please refer to data.py#L43-90 + text_data = tokenizer(text_data) + + text_embed = model.get_text_embedding(text_data) + print(text_embed.size()) + + +def infer_audio(): + + device = "cuda:0" if torch.cuda.is_available() else "cpu" + precision = "fp32" + amodel = "HTSAT-tiny" # or 'PANN-14' + tmodel = "roberta" # the best text encoder in our training + enable_fusion = False # False if you do not want to use the fusion model + fusion_type = "aff_2d" + pretrained = PRETRAINED_PATH + + model, model_cfg = create_model( + amodel, + tmodel, + pretrained, + precision=precision, + device=device, + enable_fusion=enable_fusion, + fusion_type=fusion_type, + ) + + # load the waveform of the shape (T,), should resample to 48000 + audio_waveform, sr = librosa.load(WAVE_48k_PATH, sr=48000) + # quantize + audio_waveform = int16_to_float32(float32_to_int16(audio_waveform)) + audio_waveform = torch.from_numpy(audio_waveform).float() + audio_dict = {} + + # the 'fusion' truncate mode can be changed to 'rand_trunc' if run in unfusion mode + import ipdb + + ipdb.set_trace() + audio_dict = get_audio_features( + audio_dict, + audio_waveform, + 480000, + data_truncating="fusion", + data_filling="repeatpad", + audio_cfg=model_cfg["audio_cfg"], + ) + # can send a list to the model, to process many audio tracks in one time (i.e. batch size) + audio_embed = model.get_audio_embedding([audio_dict]) + print(audio_embed.size()) + import ipdb + + ipdb.set_trace() + + +if __name__ == "__main__": + infer_text() + infer_audio() diff --git a/picoaudio/audioldm/clap/training/logger.py b/picoaudio/audioldm/clap/training/logger.py new file mode 100644 index 0000000000000000000000000000000000000000..ac4634970fae6aacde2b7b808355dbd50c90ce73 --- /dev/null +++ b/picoaudio/audioldm/clap/training/logger.py @@ -0,0 +1,30 @@ +import logging + + +def setup_logging(log_file, level, include_host=False): + if include_host: + import socket + + hostname = socket.gethostname() + formatter = logging.Formatter( + f"%(asctime)s | {hostname} | %(levelname)s | %(message)s", + datefmt="%Y-%m-%d,%H:%M:%S", + ) + else: + formatter = logging.Formatter( + "%(asctime)s | %(levelname)s | %(message)s", datefmt="%Y-%m-%d,%H:%M:%S" + ) + + logging.root.setLevel(level) + loggers = [logging.getLogger(name) for name in logging.root.manager.loggerDict] + for logger in loggers: + logger.setLevel(level) + + stream_handler = logging.StreamHandler() + stream_handler.setFormatter(formatter) + logging.root.addHandler(stream_handler) + + if log_file: + file_handler = logging.FileHandler(filename=log_file) + file_handler.setFormatter(formatter) + logging.root.addHandler(file_handler) diff --git a/picoaudio/audioldm/clap/training/lp_main.py b/picoaudio/audioldm/clap/training/lp_main.py new file mode 100644 index 0000000000000000000000000000000000000000..c2d4e8c85aaa3c8e4221963ef56a815cc14f354f --- /dev/null +++ b/picoaudio/audioldm/clap/training/lp_main.py @@ -0,0 +1,670 @@ +from cmath import cos +from inspect import getargs +import logging +import os +import random +from datetime import datetime +import bisect +import copy +from sched import scheduler +import numpy as np +import torch +import torch.backends.cudnn as cudnn +from torch import optim +from torch.cuda.amp import GradScaler +import faulthandler +import pathlib +import argparse +import time + +try: + import wandb +except ImportError: + wandb = None + +try: + import torch.utils.tensorboard as tensorboard +except ImportError: + tensorboard = None + +try: + import horovod.torch as hvd +except ImportError: + hvd = None + +from open_clip import create_model_and_transforms, trace_model, create_model +from training.data import get_data +from training.params import parse_args +from training.distributed import is_master, init_distributed_device, world_info_from_env +from training.logger import setup_logging +from training.scheduler import cosine_lr +from training.lp_train import train_one_epoch, evaluate +from open_clip.utils import get_tar_path_from_dataset_name, dataset_split, get_optimizer +from open_clip.utils import load_p, load_class_label +from open_clip.linear_probe import LinearProbe + + +def maintain_ckpts(args, startidx, all_idx_len): + for i in reversed(range(startidx, all_idx_len)): + if os.path.exists(os.path.join(args.checkpoint_path, f"epoch_top_{i}.pt")): + os.rename( + os.path.join(args.checkpoint_path, f"epoch_top_{i}.pt"), + os.path.join(args.checkpoint_path, f"epoch_top_{i+1}.pt"), + ) + if os.path.exists( + os.path.join(args.checkpoint_path, f"epoch_top_{all_idx_len}.pt") + ): + os.remove(os.path.join(args.checkpoint_path, f"epoch_top_{all_idx_len}.pt")) + return + + +def update_top_k_performance( + new_metrics_inputs, current_top_k_ckpt_metrics, args, ckpt, bignumbetter=True +): + """ + Record the top-k performance of the current epoch. + current_top_k_metrics is a dictionary of the form: {1: top_1_ckpt_measure, 2: top_2_ckpt_measure, ...} + """ + if isinstance(new_metrics_inputs, (list, tuple)): + new_metrics_inputs = np.mean(new_metrics_inputs) + return update_top_k_performance( + new_metrics_inputs, + current_top_k_ckpt_metrics, + args=args, + ckpt=ckpt, + bignumbetter=bignumbetter, + ) + elif isinstance(new_metrics_inputs, dict): + new_metrics_inputs = np.mean(list(new_metrics_inputs.values())) + return update_top_k_performance( + new_metrics_inputs, + current_top_k_ckpt_metrics, + args=args, + ckpt=ckpt, + bignumbetter=bignumbetter, + ) + elif isinstance(new_metrics_inputs, (float, int)): + update_flag = {k: False for k in current_top_k_ckpt_metrics.keys()} + sorted_keys = sorted(current_top_k_ckpt_metrics.keys()) + sorted_values = sorted( + current_top_k_ckpt_metrics.values(), reverse=bignumbetter + ) + sorted_values_ = copy.deepcopy(sorted_values) + sorted_values.append(new_metrics_inputs) + sorted_values = sorted(sorted_values, reverse=bignumbetter) + sorted_values = sorted_values[:-1] + + if sorted_values == sorted_values_: + return current_top_k_ckpt_metrics, new_metrics_inputs + else: + for i in range(len(sorted_keys)): + if current_top_k_ckpt_metrics[sorted_keys[i]] != sorted_values[i]: + current_top_k_ckpt_metrics[sorted_keys[i]] = sorted_values[i] + update_flag[sorted_keys[i]] = True + for i in range(len(update_flag)): + if update_flag[i]: + maintain_ckpts(args, i, len(sorted_keys)) + torch.save( + ckpt, + os.path.join(args.checkpoint_path, f"epoch_top_{i}.pt"), + ) + break + return current_top_k_ckpt_metrics, new_metrics_inputs + + +# def updateifNone(a, b): +# a = b if None else a +# return a + + +def is_pretrained_params(n): + return ( + n.startswith("clap_model.transformer") + or n in ["clap_model.positional_embedding", "clap_model.text_projection"] + or n.startswith("clap_model.token_embedding") + or n.startswith("clap_model.ln_final") + or n.startswith("clap_model.logit_scale_t") + ) + + +def random_seed(seed=42, rank=0): + torch.manual_seed(seed + rank) + np.random.seed(seed + rank) + random.seed(seed + rank) + + +def config_lp_optimizer(model, data, args): + # set wd-related params to 0 if use adam optimizer + if args.optimizer == "adam": + args.wd = 0 + args.wd_pretrained = 0 + args.wd_new = 0 + + in_clap = lambda n, p: n.startswith("clap_model") + + named_parameters = list(model.named_parameters()) + + optimizer = {} + scheduler = {} + + # freeze text encoder + text_freeze_parameters = [ + p + for n, p in named_parameters + if n.startswith("clap_model.transformer") + or n in ["clap_model.positional_embedding", "clap_model.text_projection"] + or n.startswith("clap_model.token_embedding") + or n.startswith("clap_model.ln_final") + ] + + if args.freeze_text: + logging.info("Freeze Text!!!!") + for k in text_freeze_parameters: + k.requires_grad = False + + if not args.lp_freeze: + exclude = ( + lambda n, p: p.ndim < 2 + or "bn" in n + or "ln" in n + or "bias" in n + or "logit_scale" in n + ) + include = lambda n, p: not exclude(n, p) + + # (yusong): we do not split the learning rate anymore + # p for n, p in named_parameters if in_clap(n,p) and exclude(n, p) and p.requires_grad + gain_or_bias_params = [ + p for n, p in named_parameters if exclude(n, p) and p.requires_grad + ] + # rest_params = [p for n, p in named_parameters if in_clap(n,p) and include(n, p) and p.requires_grad] + rest_params = [ + p for n, p in named_parameters if include(n, p) and p.requires_grad + ] + + if args.train_data is None: + optimizer = None + scheduler = None + else: + total_steps = data["train"].dataloader.num_batches * args.epochs + + if args.split_opt: + for x in ["lr", "beta1", "beta2", "eps", "wd"]: + for y in ["_new", "_pretrained"]: + if getattr(args, x + y) is None: + setattr(args, x + y, getattr(args, x)) + + gain_or_bias_pretrained_params = [ + p + for n, p in named_parameters + if (exclude(n, p) and p.requires_grad) and is_pretrained_params(n) + ] + rest_pretrained_params = [ + p + for n, p in named_parameters + if (include(n, p) and p.requires_grad) and is_pretrained_params(n) + ] + gain_or_bias_new_params = [ + p + for n, p in named_parameters + if (exclude(n, p) and p.requires_grad) + and (not is_pretrained_params(n)) + ] + rest_new_params = [ + p + for n, p in named_parameters + if (include(n, p) and p.requires_grad) + and (not is_pretrained_params(n)) + ] + + pretrained_params_optimizer = get_optimizer( + [ + {"params": gain_or_bias_pretrained_params, "weight_decay": 0.0}, + { + "params": rest_pretrained_params, + "weight_decay": args.wd_pretrained, + }, + ], + lr=args.lr_pretrained, + betas=(args.beta1_pretrained, args.beta2_pretrained), + eps=args.eps_pretrained, + momentum=args.momentum_pretrained, + optimizer_name=args.optimizer, + ) + pretrained_params_scheduler = cosine_lr( + pretrained_params_optimizer, + args.lr_pretrained, + args.warmup, + total_steps, + ) + + new_params_optimizer = get_optimizer( + [ + {"params": gain_or_bias_new_params, "weight_decay": 0.0}, + {"params": rest_new_params, "weight_decay": args.wd_new}, + ], + lr=args.lr_new, + betas=(args.beta1_new, args.beta2_new), + eps=args.eps_new, + momentum=args.momentum_new, + optimizer_name=args.optimizer, + ) + new_params_scheduler = cosine_lr( + new_params_optimizer, args.lr_new, args.warmup, total_steps + ) + + optimizer["text"] = pretrained_params_optimizer + optimizer["audio"] = new_params_optimizer + scheduler["text"] = pretrained_params_scheduler + scheduler["audio"] = new_params_scheduler + + if args.horovod: + pretrained_params_optimizer = hvd.DistributedOptimizer( + pretrained_params_optimizer, + named_parameters=model.named_parameters(), + ) + new_params_optimizer = hvd.DistributedOptimizer( + new_params_optimizer, named_parameters=model.named_parameters() + ) + hvd.broadcast_parameters(model.state_dict(), root_rank=0) + hvd.broadcast_optimizer_state( + pretrained_params_optimizer, root_rank=0 + ) + hvd.broadcast_optimizer_state(new_params_optimizer, root_rank=0) + else: + + optimizer["clap"] = get_optimizer( + [ + {"params": gain_or_bias_params, "weight_decay": 0.0}, + {"params": rest_params, "weight_decay": args.wd}, + ], + lr=args.lr, + betas=(args.beta1, args.beta2), + eps=args.eps, + momentum=args.momentum, + optimizer_name=args.optimizer, + ) + scheduler["clap"] = cosine_lr( + optimizer["clap"], args.lr, args.warmup, total_steps + ) + + if args.horovod: + optimizer["clap"] = hvd.DistributedOptimizer( + optimizer["clap"], named_parameters=model.named_parameters() + ) + hvd.broadcast_parameters(model.state_dict(), root_rank=0) + hvd.broadcast_optimizer_state(optimizer["clap"], root_rank=0) + + # linear probe optimizer + else: + lp_params = [ + p for n, p in named_parameters if (not in_clap(n, p)) and p.requires_grad + ] + lp_optim = get_optimizer( + lp_params, + lr=args.lp_lr, + betas=(args.beta1, args.beta2), + eps=args.eps, + momentum=0.9, + optimizer_name=args.optimizer, + ) + optimizer["lp"] = lp_optim + + return optimizer, scheduler, text_freeze_parameters + + +def main(): + args = parse_args() + + time.sleep(args.sleep) + + # sanitize model name for filesystem / uri use, easier if we don't use / in name as a rule? + args.amodel = args.amodel.replace("/", "-") + # download sizes.json file + + # (yusong): the below two lines are for debug + # print("setting up faulthandler") + # faulthandler.register(10) + + random.seed(args.seed) + torch.manual_seed(args.seed) + torch.cuda.manual_seed(args.seed) + torch.cuda.manual_seed_all(args.seed) + np.random.seed(args.seed) + args.class_index_dict = load_class_label(args.class_label_path) + + # get the name of the experiments + if args.name is None: + args.name = "-".join( + [ + datetime.now().strftime("%Y_%m_%d-%H_%M_%S"), + f"linear_probe" f"model_{args.amodel}", + f"lr_{args.lr}", + f"b_{args.batch_size}", + f"j_{args.workers}", + f"p_{args.precision}", + ] + ) + + # discover initial world args early so we can log properly + args.distributed = False + args.local_rank, args.rank, args.world_size = world_info_from_env() + + if args.remotedata and is_master(args): + for dataset_name in args.datasetnames: + for split in dataset_split[dataset_name]: + if not os.path.exists(f"./json_files/{dataset_name}/{split}"): + os.makedirs(f"./json_files/{dataset_name}/{split}") + os.system( + f"aws s3 cp s3://s-laion-audio/webdataset_tar/{dataset_name}/{split}/sizes.json ./json_files/{dataset_name}/{split}/sizes.json" + ) + + args.log_path = None + if is_master(args, local=args.log_local): + log_base_path = os.path.join(args.logs, args.name) + os.makedirs(log_base_path, exist_ok=True) + log_filename = f"out-{args.rank}" if args.log_local else "out.log" + args.log_path = os.path.join(log_base_path, log_filename) + + # avoid log dir in same name: + postfix = 0 + while os.path.exists(args.log_path): + postfix += 1 + log_base_path_new = log_base_path + "-" + str(postfix) + os.makedirs(log_base_path_new, exist_ok=True) + log_filename = f"out-{args.rank}" if args.log_local else "out.log" + args.log_path = os.path.join(log_base_path_new, log_filename) + # print( + # "Error. Experiment already exists. Use --name {} to specify a new experiment." + # ) + # return -1 + + # Set logger + args.log_level = logging.DEBUG if args.debug else logging.INFO + setup_logging(args.log_path, args.log_level) + + # fully initialize distributed device environment + device = init_distributed_device(args) + + args.wandb = "wandb" in args.report_to or "all" in args.report_to + args.tensorboard = "tensorboard" in args.report_to or "all" in args.report_to + if is_master(args): + args.tensorboard_path = ( + os.path.join(args.logs, args.name, "tensorboard") + if args.tensorboard + else "" + ) + args.checkpoint_path = os.path.join(args.logs, args.name, "checkpoints") + for dirname in [args.tensorboard_path, args.checkpoint_path]: + if dirname: + os.makedirs(dirname, exist_ok=True) + else: + args.tensorboard_path = "" + args.checkpoint_path = "" + + if args.copy_codebase: + copy_codebase(args) + + assert args.precision in ["amp", "fp16", "fp32"] + if args.precision == "fp16": + logging.warning( + "It is recommended to use AMP mixed-precision instead of FP16. " + "FP16 support needs further verification and tuning, especially for train." + ) + + if args.horovod: + logging.info( + f"Running in horovod mode with multiple processes / nodes. Device: {args.device}." + f"Process (global: {args.rank}, local {args.local_rank}), total {args.world_size}." + ) + elif args.distributed: + logging.info( + f"Running in distributed mode with multiple processes. Device: {args.device}." + f"Process (global: {args.rank}, local {args.local_rank}), total {args.world_size}." + ) + else: + logging.info(f"Running with a single process. Device {args.device}.") + + logging.info(f"openai cache dir: {os.path.expanduser(args.openai_model_cache_dir)}") + + # Create CLAP model + clap_model, clap_model_cfg = create_model( + args.amodel, + args.tmodel, + args.pretrained, + precision=args.precision, + device=device, + jit=args.torchscript, + force_quick_gelu=args.force_quick_gelu, + openai_model_cache_dir=os.path.expanduser(args.openai_model_cache_dir), + skip_params=False, + pretrained_audio=args.pretrained_audio, + pretrained_text=args.pretrained_text, + enable_fusion=args.enable_fusion, + fusion_type=args.fusion_type, + ) + + args.lp_out_ch = len(list(args.class_index_dict.keys())) + # Linear Probe + logging.info(f"linear probe using mlp: {args.lp_mlp}") + logging.info(f"linear probe using freeze: {args.lp_freeze}") + logging.info(f"linear probe act layer: {args.lp_act}") + logging.info(f"linear probe out ch: {args.lp_out_ch}") + logging.info(f"linear probe learning rate (if applicable): {args.lp_lr}") + logging.info(f"linear probe loss func: {args.lp_loss}") + logging.info(f"linear probe lp_metrics: {args.lp_metrics}") + + model = LinearProbe( + clap_model, + mlp=args.lp_mlp, + freeze=args.lp_freeze, + in_ch=512, + out_ch=args.lp_out_ch, + act=args.lp_act, + ) # in_ch is fixed (i.e., 512) + model = model.to(device) + + if args.horovod: + with torch.no_grad(): + for param in model.parameters(): + param.set_(param.contiguous()) + + if args.trace: + model = trace_model(model, batch_size=args.batch_size, device=device) + + if is_master(args): + logging.info("Linear Probe CLAP Model:") + logging.info(f"{str(clap_model)}") + logging.info("Params:") + params_file = os.path.join(args.logs, args.name, "params.txt") + with open(params_file, "w") as f: + for name in sorted(vars(args)): + val = getattr(args, name) + logging.info(f" {name}: {val}") + f.write(f"{name}: {val}\n") + + if args.distributed and not args.horovod: + if args.use_bn_sync: + model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model) + ddp_args = {} + if args.ddp_static_graph: + # this doesn't exist in older PyTorch, arg only added if enabled + ddp_args["static_graph"] = True + model = torch.nn.parallel.DistributedDataParallel( + model, device_ids=[device], find_unused_parameters=True, **ddp_args + ) + + data = get_data(args, clap_model_cfg) + assert len(data), "At least one train or eval dataset must be specified." + if args.trace: + assert "train" not in data, "Cannot train with traced model" + + optimizer, scheduler, text_freeze_parameters = config_lp_optimizer( + model, data, args + ) + + scaler = GradScaler() if args.precision == "amp" else None + + # optionally resume from a checkpoint + start_epoch = 0 + if args.resume is not None: + if os.path.isfile(args.resume): + checkpoint = torch.load(args.resume, map_location=device) + if "epoch" in checkpoint: + # resuming a train checkpoint w/ epoch and optimizer state + start_epoch = checkpoint["epoch"] + sd = checkpoint["state_dict"] + if not args.distributed and next(iter(sd.items()))[0].startswith( + "module" + ): + sd = {k[len("module.") :]: v for k, v in sd.items()} + model.load_state_dict(sd) + if args.split_opt: + if optimizer is not None: + for k, o_ in optimizer.items(): + o_.load_state_dict(checkpoint[k + "_" + "optimizer"]) + if optimizer is not None: + optimizer.load_state_dict(checkpoint["optimizer"]) + if scaler is not None and "scaler" in checkpoint: + scaler.load_state_dict(checkpoint["scaler"]) + logging.info( + f"=> resuming checkpoint '{args.resume}' (epoch {start_epoch})" + ) + else: + # loading a bare (model only) checkpoint for fine-tune or evaluation + model.load_state_dict(checkpoint) + logging.info( + f"=> loaded checkpoint '{args.resume}' (epoch {start_epoch})" + ) + if args.freeze_text: + print("Freeze Text!!!!") + for k in text_freeze_parameters: + k.requires_grad = False + else: + logging.info("=> no checkpoint found at '{}'".format(args.resume)) + + cudnn.benchmark = True + cudnn.deterministic = False + + # determine if this worker should save logs and checkpoints. only do so if it is rank == 0 + args.save_logs = args.logs and args.logs.lower() != "none" and is_master(args) + writer = None + if args.save_logs and args.tensorboard: + assert tensorboard is not None, "Please install tensorboard." + writer = tensorboard.SummaryWriter(args.tensorboard_path) + + if args.wandb and is_master(args): + assert wandb is not None, "Please install wandb." + logging.debug("Starting wandb.") + args.train_sz = data["train"].dataloader.num_samples + if args.val_data is not None: + args.val_sz = data["val"].dataloader.num_samples + # you will have to configure this for your project! + wandb.init( + project="clap", + notes=args.wandb_notes, + name=args.wandb_notes, + tags=[], + config=vars(args), + ) + if args.debug: + wandb.watch(model, log="all") + wandb.save(params_file) + logging.debug("Finished loading wandb.") + + if "train" not in data: + evaluate(model, data, start_epoch, args, writer) + return + elif start_epoch == 0 and "val" in data and not args.no_eval: + evaluate(model, data, 0, args, writer) + if args.save_top_performance: + current_top_k_ckpt_metrics = { + i: 0 for i in range(args.save_top_performance) + } # initialize the top-k metric for ckpts to 0 + + for epoch in range(start_epoch, args.epochs): + # freeze the text param after (include) args.freeze_text_after, this is -1 by default + if epoch == args.freeze_text_after: + print("Text pretrained parameters are freezed since this epoch.") + for k in text_freeze_parameters: + k.requires_grad = False + if is_master(args): + logging.info(f"Start epoch {epoch}") + + train_one_epoch(model, data, epoch, optimizer, scaler, scheduler, args, writer) + completed_epoch = epoch + 1 + + if ( + any(v in data for v in ("val", "imagenet-val", "imagenet-v2")) + and not args.no_eval + ): + metrics = evaluate(model, data, completed_epoch, args, writer) + if args.save_top_performance: + top_k_dataset = args.top_k_checkpoint_select_dataset + top_k_metric = args.top_k_checkpoint_select_metric + filtered_metrics = [ + v + for k, v in metrics.items() + if top_k_metric in k and top_k_dataset in k + ] # check all R@10 metrics (all dataset) and use it to update the ckpt + # Saving checkpoints. + if args.save_logs: + opt_dict = { + k + "_" + "optimizer": v.state_dict() for k, v in optimizer.items() + } + checkpoint_dict = { + "epoch": completed_epoch, + "name": args.name, + "state_dict": model.state_dict(), + } + checkpoint_dict.update(opt_dict) + if scaler is not None: + checkpoint_dict["scaler"] = scaler.state_dict() + + if completed_epoch == args.epochs or ( + args.save_frequency > 0 and (completed_epoch % args.save_frequency) == 0 + ): + torch.save( + checkpoint_dict, + os.path.join(args.checkpoint_path, f"epoch_{completed_epoch}.pt"), + ) + if args.save_most_recent: + torch.save( + checkpoint_dict, + os.path.join(args.checkpoint_path, f"epoch_latest.pt"), + ) + if args.save_top_performance and not args.no_eval: + update_top_k_performance( + filtered_metrics, + current_top_k_ckpt_metrics, + args, + checkpoint_dict, + bignumbetter=True, + ) + + if args.wandb and is_master(args): + wandb.finish() + + +def copy_codebase(args): + from shutil import copytree, ignore_patterns + + new_code_path = os.path.join(args.logs, args.name, "code") + if os.path.exists(new_code_path): + print( + f"Error. Experiment already exists at {new_code_path}. Use --name to specify a new experiment." + ) + return -1 + print(f"Copying codebase to {new_code_path}") + current_code_path = os.path.realpath(__file__) + for _ in range(3): + current_code_path = os.path.dirname(current_code_path) + copytree( + current_code_path, new_code_path, ignore=ignore_patterns("log", "logs", "wandb") + ) + print("Done copying code.") + return 1 + + +if __name__ == "__main__": + main() diff --git a/picoaudio/audioldm/clap/training/lp_train.py b/picoaudio/audioldm/clap/training/lp_train.py new file mode 100644 index 0000000000000000000000000000000000000000..24a19bacd0a4b789415cfccbce1f8bc99bc493ed --- /dev/null +++ b/picoaudio/audioldm/clap/training/lp_train.py @@ -0,0 +1,301 @@ +import json +import logging +import math +import os +import time +from contextlib import suppress + +import numpy as np +import torch +import torch.nn.functional as F + +try: + import wandb +except ImportError: + wandb = None + +from open_clip import LPLoss, LPMetrics, lp_gather_features +from open_clip.utils import do_mixup, get_mix_lambda +from .distributed import is_master +from .zero_shot import zero_shot_eval + + +class AverageMeter(object): + """Computes and stores the average and current value""" + + def __init__(self): + self.reset() + + def reset(self): + self.val = 0 + self.avg = 0 + self.sum = 0 + self.count = 0 + + def update(self, val, n=1): + self.val = val + self.sum += val * n + self.count += n + self.avg = self.sum / self.count + + +def unwrap_model(model): + if hasattr(model, "module"): + return model.module + else: + return model + + +def train_one_epoch( + model, + data, + epoch, + optimizer, + scaler, + scheduler, + args, + tb_writer=None, + extra_suffix="", +): + device = torch.device(args.device) + autocast = torch.cuda.amp.autocast if args.precision == "amp" else suppress + model.train() + loss = LPLoss(args.lp_loss) + + dataloader, sampler = data["train"].dataloader, data["train"].sampler + if args.distributed and sampler is not None: + sampler.set_epoch(epoch) + num_batches_per_epoch = dataloader.num_batches + sample_digits = math.ceil(math.log(dataloader.num_samples + 1, 10)) + + # for toy dataset + if args.dataset_type == "toy": + dataloader.dataset.generate_queue() + + loss_m = AverageMeter() + batch_time_m = AverageMeter() + data_time_m = AverageMeter() + end = time.time() + + for i, batch in enumerate(dataloader): + step = num_batches_per_epoch * epoch + i + + if isinstance(scheduler, dict): + for s in scheduler.values(): + s(step) + else: + scheduler(step) + + audio = batch # contains mel_spec, wavform, and longer list + class_label = batch["class_label"] + # audio = audio.to(device=device, non_blocking=True) + class_label = class_label.to(device=device, non_blocking=True) + + if args.mixup: + # https://github.com/RetroCirce/HTS-Audio-Transformer/blob/main/utils.py#L146 + mix_lambda = torch.from_numpy( + get_mix_lambda(0.5, len(audio["waveform"])) + ).to(device) + class_label = do_mixup(class_label, mix_lambda) + else: + mix_lambda = None + + data_time_m.update(time.time() - end) + if isinstance(optimizer, dict): + for o_ in optimizer.values(): + o_.zero_grad() + else: + optimizer.zero_grad() + + with autocast(): + pred = model(audio, mix_lambda=mix_lambda, device=device) + total_loss = loss(pred, class_label) + + if isinstance(optimizer, dict): + if scaler is not None: + scaler.scale(total_loss).backward() + for o_ in optimizer.values(): + if args.horovod: + o_.synchronize() + scaler.unscale_(o_) + with o_.skip_synchronize(): + scaler.step(o_) + else: + scaler.step(o_) + scaler.update() + else: + total_loss.backward() + for o_ in optimizer.values(): + o_.step() + else: + if scaler is not None: + scaler.scale(total_loss).backward() + if args.horovod: + optimizer.synchronize() + scaler.unscale_(optimizer) + with optimizer.skip_synchronize(): + scaler.step(optimizer) + else: + scaler.step(optimizer) + scaler.update() + else: + total_loss.backward() + optimizer.step() + + # Note: we clamp to 4.6052 = ln(100), as in the original paper. + with torch.no_grad(): + unwrap_model(model).clap_model.logit_scale_a.clamp_(0, math.log(100)) + unwrap_model(model).clap_model.logit_scale_t.clamp_(0, math.log(100)) + + batch_time_m.update(time.time() - end) + end = time.time() + batch_count = i + 1 + + if is_master(args) and (i % 100 == 0 or batch_count == num_batches_per_epoch): + if isinstance(audio, dict): + batch_size = len(audio["waveform"]) + else: + batch_size = len(audio) + num_samples = batch_count * batch_size * args.world_size + samples_per_epoch = dataloader.num_samples + percent_complete = 100.0 * batch_count / num_batches_per_epoch + + # NOTE loss is coarsely sampled, just master node and per log update + loss_m.update(total_loss.item(), batch_size) + if isinstance(optimizer, dict): + logging.info( + f"Train Epoch: {epoch} [{num_samples:>{sample_digits}}/{samples_per_epoch} ({percent_complete:.0f}%)] " + f"Loss: {loss_m.val:#.5g} ({loss_m.avg:#.4g}) " + f"Data (t): {data_time_m.avg:.3f} " + f"Batch (t): {batch_time_m.avg:.3f} " + f"LR: {[o_.param_groups[0]['lr'] for o_ in optimizer.values()]}" + ) + log_data = { + "loss": loss_m.val, + "data_time": data_time_m.val, + "batch_time": batch_time_m.val, + "lr": [o_.param_groups[0]["lr"] for o_ in optimizer.values()], + } + else: + logging.info( + f"Train Epoch: {epoch} [{num_samples:>{sample_digits}}/{samples_per_epoch} ({percent_complete:.0f}%)] " + f"Loss: {loss_m.val:#.5g} ({loss_m.avg:#.4g}) " + f"Data (t): {data_time_m.avg:.3f} " + f"Batch (t): {batch_time_m.avg:.3f} " + f"LR: {optimizer.param_groups[0]['lr']:5f} " + ) + + # Save train loss / etc. Using non avg meter values as loggers have their own smoothing + log_data = { + "loss": loss_m.val, + "data_time": data_time_m.val, + "batch_time": batch_time_m.val, + "lr": optimizer.param_groups[0]["lr"], + } + for name, val in log_data.items(): + name = f"train{extra_suffix}/{name}" + if tb_writer is not None: + tb_writer.add_scalar(name, val, step) + if args.wandb: + assert wandb is not None, "Please install wandb." + wandb.log({name: val, "step": step}) + + # resetting batch / data time meters per log window + batch_time_m.reset() + data_time_m.reset() + # end for + + +def evaluate(model, data, epoch, args, tb_writer=None, extra_suffix=""): + metrics = {} + if not args.parallel_eval: + if not is_master(args): + return metrics + device = torch.device(args.device) + model.eval() + + # CHANGE + # zero_shot_metrics = zero_shot_eval(model, data, epoch, args) + # metrics.update(zero_shot_metrics) + if is_master(args): + print("Evaluating...") + metric_names = args.lp_metrics.split(",") + eval_tool = LPMetrics(metric_names=metric_names) + + autocast = torch.cuda.amp.autocast if args.precision == "amp" else suppress + if "val" in data and ( + args.val_frequency + and ((epoch % args.val_frequency) == 0 or epoch == args.epochs) + ): + if args.parallel_eval: + dataloader, sampler = data["val"].dataloader, data["val"].sampler + if args.distributed and sampler is not None: + sampler.set_epoch(epoch) + samples_per_val = dataloader.num_samples + else: + dataloader = data["val"].dataloader + num_samples = 0 + samples_per_val = dataloader.num_samples + + eval_info = {"pred": [], "target": []} + with torch.no_grad(): + for i, batch in enumerate(dataloader): + audio = batch # contains mel_spec, wavform, and longer list + class_label = batch["class_label"] + + # audio = audio.to(device=device, non_blocking=True) + class_label = class_label.to(device=device, non_blocking=True) + + with autocast(): + pred = model(audio, device=device) + if args.parallel_eval: + pred, class_label = lp_gather_features( + pred, class_label, args.world_size, args.horovod + ) + eval_info["pred"].append(pred) + eval_info["target"].append(class_label) + + num_samples += class_label.shape[0] + + if (i % 100) == 0: # and i != 0: + logging.info( + f"Eval Epoch: {epoch} [{num_samples} / {samples_per_val}]" + ) + + if is_master(args): + eval_info["pred"] = torch.cat(eval_info["pred"], 0).cpu() + eval_info["target"] = torch.cat(eval_info["target"], 0).cpu() + metric_dict = eval_tool.evaluate_mertics( + eval_info["pred"], eval_info["target"] + ) + metrics.update(metric_dict) + if "epoch" not in metrics.keys(): + metrics.update({"epoch": epoch}) + + if is_master(args): + if not metrics: + return metrics + + logging.info( + f"Eval Epoch: {epoch} " + + "\n".join( + ["\t".join([f"{m}: {round(metrics[m], 4):.4f}"]) for m in metrics] + ) + ) + if args.save_logs: + for name, val in metrics.items(): + if tb_writer is not None: + tb_writer.add_scalar(f"val{extra_suffix}/{name}", val, epoch) + + with open(os.path.join(args.checkpoint_path, "results.jsonl"), "a+") as f: + f.write(json.dumps(metrics)) + f.write("\n") + + if args.wandb: + assert wandb is not None, "Please install wandb." + for name, val in metrics.items(): + wandb.log({f"val{extra_suffix}/{name}": val, "epoch": epoch}) + + return metrics + else: + return metrics diff --git a/picoaudio/audioldm/clap/training/main.py b/picoaudio/audioldm/clap/training/main.py new file mode 100644 index 0000000000000000000000000000000000000000..3b563a5d001be7adfbe779dee7ad8ac49aadc50d --- /dev/null +++ b/picoaudio/audioldm/clap/training/main.py @@ -0,0 +1,596 @@ +from inspect import getargs +import logging +import os +import random +from datetime import datetime +import bisect +import copy +import numpy as np +import torch +import torch.backends.cudnn as cudnn +from torch import optim +from torch.cuda.amp import GradScaler +import faulthandler +import pathlib + +try: + import wandb +except ImportError: + wandb = None + +try: + import torch.utils.tensorboard as tensorboard +except ImportError: + tensorboard = None + +try: + import horovod.torch as hvd +except ImportError: + hvd = None + +from open_clip import create_model_and_transforms, trace_model, create_model +from training.data import get_data +from training.distributed import is_master, init_distributed_device, world_info_from_env +from training.logger import setup_logging +from training.params import parse_args +from training.scheduler import cosine_lr +from training.train import train_one_epoch, evaluate +from open_clip.utils import dataset_split, get_optimizer + + +def maintain_ckpts(args, startidx, all_idx_len): + for i in reversed(range(startidx, all_idx_len)): + if os.path.exists(os.path.join(args.checkpoint_path, f"epoch_top_{i}.pt")): + os.rename( + os.path.join(args.checkpoint_path, f"epoch_top_{i}.pt"), + os.path.join(args.checkpoint_path, f"epoch_top_{i+1}.pt"), + ) + if os.path.exists( + os.path.join(args.checkpoint_path, f"epoch_top_{all_idx_len}.pt") + ): + os.remove(os.path.join(args.checkpoint_path, f"epoch_top_{all_idx_len}.pt")) + return + + +def update_top_k_performance( + new_metrics_inputs, current_top_k_ckpt_metrics, args, ckpt, bignumbetter=True +): + """ + Record the top-k performance of the current epoch. + current_top_k_metrics is a dictionary of the form: {1: top_1_ckpt_measure, 2: top_2_ckpt_measure, ...} + """ + if isinstance(new_metrics_inputs, (list, tuple)): + new_metrics_inputs = np.mean(new_metrics_inputs) + return update_top_k_performance( + new_metrics_inputs, + current_top_k_ckpt_metrics, + args=args, + ckpt=ckpt, + bignumbetter=bignumbetter, + ) + elif isinstance(new_metrics_inputs, dict): + new_metrics_inputs = np.mean(list(new_metrics_inputs.values())) + return update_top_k_performance( + new_metrics_inputs, + current_top_k_ckpt_metrics, + args=args, + ckpt=ckpt, + bignumbetter=bignumbetter, + ) + elif isinstance(new_metrics_inputs, (float, int)): + update_flag = {k: False for k in current_top_k_ckpt_metrics.keys()} + sorted_keys = sorted(current_top_k_ckpt_metrics.keys()) + sorted_values = sorted( + current_top_k_ckpt_metrics.values(), reverse=bignumbetter + ) + sorted_values_ = copy.deepcopy(sorted_values) + sorted_values.append(new_metrics_inputs) + sorted_values = sorted(sorted_values, reverse=bignumbetter) + sorted_values = sorted_values[:-1] + + if sorted_values == sorted_values_: + return current_top_k_ckpt_metrics, new_metrics_inputs + else: + for i in range(len(sorted_keys)): + if current_top_k_ckpt_metrics[sorted_keys[i]] != sorted_values[i]: + current_top_k_ckpt_metrics[sorted_keys[i]] = sorted_values[i] + update_flag[sorted_keys[i]] = True + for i in range(len(update_flag)): + if update_flag[i]: + maintain_ckpts(args, i, len(sorted_keys)) + torch.save( + ckpt, + os.path.join(args.checkpoint_path, f"epoch_top_{i}.pt"), + ) + break + return current_top_k_ckpt_metrics, new_metrics_inputs + + +# def updateifNone(a, b): +# a = b if None else a +# return a + + +def is_pretrained_params(n): + return ( + n.startswith("transformer") + or n in ["positional_embedding", "text_projection"] + or n.startswith("token_embedding") + or n.startswith("ln_final") + or n.startswith("logit_scale_t") + ) + + +def random_seed(seed=42, rank=0): + torch.manual_seed(seed + rank) + np.random.seed(seed + rank) + random.seed(seed + rank) + + +def main(): + args = parse_args() + # sanitize model name for filesystem / uri use, easier if we don't use / in name as a rule? + args.amodel = args.amodel.replace("/", "-") + # download sizes.json file + + # (yusong): the below two lines are for debug + # print("setting up faulthandler") + # faulthandler.register(10) + + random.seed(args.seed) + torch.manual_seed(args.seed) + torch.cuda.manual_seed(args.seed) + torch.cuda.manual_seed_all(args.seed) + np.random.seed(args.seed) + if args.tmodel == "bert" or args.tmodel == "roberta" or args.tmodel == "bart": + assert ( + args.pretrained == "" or args.pretrained is None + ), "bert/roberta/bart text encoder does not support pretrained models." + + # get the name of the experiments + if args.name is None: + args.name = "-".join( + [ + datetime.now().strftime("%Y_%m_%d-%H_%M_%S"), + f"model_{args.amodel}", + f"lr_{args.lr}", + f"b_{args.batch_size}", + f"j_{args.workers}", + f"p_{args.precision}", + ] + ) + + # discover initial world args early so we can log properly + args.distributed = False + args.local_rank, args.rank, args.world_size = world_info_from_env() + + if args.remotedata and is_master(args): + for dataset_name in args.datasetnames: + for split in dataset_split[dataset_name]: + if not os.path.exists(f"./json_files/{dataset_name}/{split}"): + os.makedirs(f"./json_files/{dataset_name}/{split}") + os.system( + f"aws s3 cp s3://s-laion-audio/webdataset_tar/{dataset_name}/{split}/sizes.json ./json_files/{dataset_name}/{split}/sizes.json" + ) + + args.log_path = None + if is_master(args, local=args.log_local): + log_base_path = os.path.join(args.logs, args.name) + os.makedirs(log_base_path, exist_ok=True) + log_filename = f"out-{args.rank}" if args.log_local else "out.log" + args.log_path = os.path.join(log_base_path, log_filename) + if os.path.exists(args.log_path): + print( + "Error. Experiment already exists. Use --name {} to specify a new experiment." + ) + return -1 + + # Set logger + args.log_level = logging.DEBUG if args.debug else logging.INFO + setup_logging(args.log_path, args.log_level) + + # fully initialize distributed device environment + device = init_distributed_device(args) + + args.wandb = "wandb" in args.report_to or "all" in args.report_to + args.tensorboard = "tensorboard" in args.report_to or "all" in args.report_to + if is_master(args): + args.tensorboard_path = ( + os.path.join(args.logs, args.name, "tensorboard") + if args.tensorboard + else "" + ) + args.checkpoint_path = os.path.join(args.logs, args.name, "checkpoints") + for dirname in [args.tensorboard_path, args.checkpoint_path]: + if dirname: + os.makedirs(dirname, exist_ok=True) + else: + args.tensorboard_path = "" + args.checkpoint_path = "" + + if args.copy_codebase: + copy_codebase(args) + + assert args.precision in ["amp", "fp16", "fp32"] + if args.precision == "fp16": + logging.warning( + "It is recommended to use AMP mixed-precision instead of FP16. " + "FP16 support needs further verification and tuning, especially for train." + ) + + if args.horovod: + logging.info( + f"Running in horovod mode with multiple processes / nodes. Device: {args.device}." + f"Process (global: {args.rank}, local {args.local_rank}), total {args.world_size}." + ) + elif args.distributed: + logging.info( + f"Running in distributed mode with multiple processes. Device: {args.device}." + f"Process (global: {args.rank}, local {args.local_rank}), total {args.world_size}." + ) + else: + logging.info(f"Running with a single process. Device {args.device}.") + + logging.info(f"openai cache dir: {os.path.expanduser(args.openai_model_cache_dir)}") + + model, model_cfg = create_model( + args.amodel, + args.tmodel, + args.pretrained, + precision=args.precision, + device=device, + jit=args.torchscript, + force_quick_gelu=args.force_quick_gelu, + openai_model_cache_dir=os.path.expanduser(args.openai_model_cache_dir), + skip_params=True, + pretrained_audio=args.pretrained_audio, + pretrained_text=args.pretrained_text, + enable_fusion=args.enable_fusion, + fusion_type=args.fusion_type, + ) + + if args.horovod: + with torch.no_grad(): + for param in model.parameters(): + param.set_(param.contiguous()) + + if args.trace: + model = trace_model(model, batch_size=args.batch_size, device=device) + + if is_master(args): + logging.info("Model:") + logging.info(f"{str(model)}") + logging.info("Params:") + params_file = os.path.join(args.logs, args.name, "params.txt") + with open(params_file, "w") as f: + for name in sorted(vars(args)): + val = getattr(args, name) + logging.info(f" {name}: {val}") + f.write(f"{name}: {val}\n") + + if args.distributed and not args.horovod: + if args.use_bn_sync: + model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model) + ddp_args = {} + if args.ddp_static_graph: + # this doesn't exist in older PyTorch, arg only added if enabled + ddp_args["static_graph"] = True + model = torch.nn.parallel.DistributedDataParallel( + model, device_ids=[device], find_unused_parameters=True, **ddp_args + ) + + data = get_data(args, model_cfg) + assert len(data), "At least one train or eval dataset must be specified." + if args.trace: + assert "train" not in data, "Cannot train with traced model" + + exclude = ( + lambda n, p: p.ndim < 2 + or "bn" in n + or "ln" in n + or "bias" in n + or "logit_scale" in n + ) + include = lambda n, p: not exclude(n, p) + + named_parameters = list(model.named_parameters()) + + # freeze text encoder + text_freeze_parameters = [p for n, p in named_parameters if "text_branch" in n] + + if args.freeze_text: + print("Freeze Text!!!!") + for k in text_freeze_parameters: + k.requires_grad = False + + gain_or_bias_params = [ + p for n, p in named_parameters if exclude(n, p) and p.requires_grad + ] + rest_params = [p for n, p in named_parameters if include(n, p) and p.requires_grad] + + # set wd-related params to 0 if use adam optimizer + if args.optimizer == "adam": + args.wd = 0 + args.wd_pretrained = 0 + args.wd_new = 0 + + if args.train_data is None: + optimizer = None + scheduler = None + else: + total_steps = data["train"].dataloader.num_batches * args.epochs + + if args.split_opt: + for x in ["lr", "beta1", "beta2", "eps", "wd"]: + for y in ["_new", "_pretrained"]: + if getattr(args, x + y) is None: + setattr(args, x + y, getattr(args, x)) + + gain_or_bias_pretrained_params = [ + p + for n, p in named_parameters + if (exclude(n, p) and p.requires_grad) and is_pretrained_params(n) + ] + rest_pretrained_params = [ + p + for n, p in named_parameters + if (include(n, p) and p.requires_grad) and is_pretrained_params(n) + ] + gain_or_bias_new_params = [ + p + for n, p in named_parameters + if (exclude(n, p) and p.requires_grad) and (not is_pretrained_params(n)) + ] + rest_new_params = [ + p + for n, p in named_parameters + if (include(n, p) and p.requires_grad) and (not is_pretrained_params(n)) + ] + pretrained_params_optimizer = get_optimizer( + [ + {"params": gain_or_bias_pretrained_params, "weight_decay": 0.0}, + { + "params": rest_pretrained_params, + "weight_decay": args.wd_pretrained, + }, + ], + lr=args.lr_pretrained, + betas=(args.beta1_pretrained, args.beta2_pretrained), + eps=args.eps_pretrained, + momentum=args.momentum_pretrained, + optimizer_name=args.optimizer, + ) + pretrained_params_scheduler = cosine_lr( + pretrained_params_optimizer, + args.lr_pretrained, + args.warmup, + total_steps, + ) + new_params_optimizer = get_optimizer( + [ + {"params": gain_or_bias_new_params, "weight_decay": 0.0}, + {"params": rest_new_params, "weight_decay": args.wd_new}, + ], + lr=args.lr_new, + betas=(args.beta1_new, args.beta2_new), + eps=args.eps_new, + momentum=args.momentum_new, + optimizer_name=args.optimizer, + ) + + new_params_scheduler = cosine_lr( + new_params_optimizer, args.lr_new, args.warmup, total_steps + ) + + optimizer = { + "pretrained": pretrained_params_optimizer, + "new": new_params_optimizer, + } + scheduler = { + "pretrained": pretrained_params_scheduler, + "new": new_params_scheduler, + } + + if args.horovod: + pretrained_params_optimizer = hvd.DistributedOptimizer( + pretrained_params_optimizer, + named_parameters=model.named_parameters(), + ) + new_params_optimizer = hvd.DistributedOptimizer( + new_params_optimizer, named_parameters=model.named_parameters() + ) + hvd.broadcast_parameters(model.state_dict(), root_rank=0) + hvd.broadcast_optimizer_state(pretrained_params_optimizer, root_rank=0) + hvd.broadcast_optimizer_state(new_params_optimizer, root_rank=0) + else: + optimizer = get_optimizer( + [ + {"params": gain_or_bias_params, "weight_decay": 0.0}, + {"params": rest_params, "weight_decay": args.wd}, + ], + lr=args.lr, + betas=(args.beta1, args.beta2), + eps=args.eps, + momentum=args.momentum, + optimizer_name=args.optimizer, + ) + + scheduler = cosine_lr(optimizer, args.lr, args.warmup, total_steps) + + if args.horovod: + optimizer = hvd.DistributedOptimizer( + optimizer, named_parameters=model.named_parameters() + ) + hvd.broadcast_parameters(model.state_dict(), root_rank=0) + hvd.broadcast_optimizer_state(optimizer, root_rank=0) + + scaler = GradScaler() if args.precision == "amp" else None + + # optionally resume from a checkpoint + start_epoch = 0 + if args.resume is not None: + if os.path.isfile(args.resume): + checkpoint = torch.load(args.resume, map_location=device) + if "epoch" in checkpoint: + # resuming a train checkpoint w/ epoch and optimizer state + start_epoch = checkpoint["epoch"] + sd = checkpoint["state_dict"] + if not args.distributed and next(iter(sd.items()))[0].startswith( + "module" + ): + sd = {k[len("module.") :]: v for k, v in sd.items()} + model.load_state_dict(sd) + if args.split_opt: + if optimizer is not None: + for k, o_ in optimizer.items(): + o_.load_state_dict(checkpoint[k + "_" + "optimizer"]) + if optimizer is not None: + optimizer.load_state_dict(checkpoint["optimizer"]) + if scaler is not None and "scaler" in checkpoint: + scaler.load_state_dict(checkpoint["scaler"]) + logging.info( + f"=> resuming checkpoint '{args.resume}' (epoch {start_epoch})" + ) + else: + # loading a bare (model only) checkpoint for fine-tune or evaluation + model.load_state_dict(checkpoint) + logging.info( + f"=> loaded checkpoint '{args.resume}' (epoch {start_epoch})" + ) + if args.freeze_text: + print("Freeze Text!!!!") + for k in text_freeze_parameters: + k.requires_grad = False + else: + logging.info("=> no checkpoint found at '{}'".format(args.resume)) + + cudnn.benchmark = True + cudnn.deterministic = False + + # determine if this worker should save logs and checkpoints. only do so if it is rank == 0 + args.save_logs = args.logs and args.logs.lower() != "none" and is_master(args) + writer = None + if args.save_logs and args.tensorboard: + assert tensorboard is not None, "Please install tensorboard." + writer = tensorboard.SummaryWriter(args.tensorboard_path) + + if args.wandb and is_master(args): + assert wandb is not None, "Please install wandb." + logging.debug("Starting wandb.") + args.train_sz = data["train"].dataloader.num_samples + if args.val_data is not None: + args.val_sz = data["val"].dataloader.num_samples + # you will have to configure this for your project! + wandb.init( + project="clap", + notes=args.wandb_notes, + name=args.wandb_notes, + tags=[], + config=vars(args), + ) + if args.debug: + wandb.watch(model, log="all") + wandb.save(params_file) + logging.debug("Finished loading wandb.") + + if "train" not in data: + evaluate(model, data, start_epoch, args, writer) + return + elif start_epoch == 0 and "val" in data and not args.no_eval: + evaluate(model, data, 0, args, writer) + # print(f'rank {args.rank}, Start First Evaluation')# (yusong): for debug + if args.save_top_performance: + current_top_k_ckpt_metrics = { + i: 0 for i in range(args.save_top_performance) + } # initialize the top-k metric for ckpts to 0 + + # print(f'rank {args.rank}, Start Training') # (yusong): for debug + for epoch in range(start_epoch, args.epochs): + # freeze the text param after (include) args.freeze_text_after, this is -1 by default + if epoch == args.freeze_text_after: + print("Text pretrained parameters are freezed since this epoch.") + for k in text_freeze_parameters: + k.requires_grad = False + if is_master(args): + logging.info(f"Start epoch {epoch}") + + train_one_epoch(model, data, epoch, optimizer, scaler, scheduler, args, writer) + completed_epoch = epoch + 1 + + if ( + any(v in data for v in ("val", "imagenet-val", "imagenet-v2")) + and not args.no_eval + ): + metrics = evaluate(model, data, completed_epoch, args, writer) + if args.save_top_performance: + top_k_dataset = args.top_k_checkpoint_select_dataset + top_k_metric = args.top_k_checkpoint_select_metric + filtered_metrics = [ + v + for k, v in metrics.items() + if top_k_metric in k and top_k_dataset in k + ] # check all R@10 metrics (all dataset) and use it to update the ckpt + # Saving checkpoints. + if args.save_logs: + if args.split_opt: + opt_dict = { + k + "_" + "optimizer": v.state_dict() for k, v in optimizer.items() + } + else: + opt_dict = {"optimizer": optimizer.state_dict()} + checkpoint_dict = { + "epoch": completed_epoch, + "name": args.name, + "state_dict": model.state_dict(), + } + checkpoint_dict.update(opt_dict) + if scaler is not None: + checkpoint_dict["scaler"] = scaler.state_dict() + + if completed_epoch == args.epochs or ( + args.save_frequency > 0 and (completed_epoch % args.save_frequency) == 0 + ): + torch.save( + checkpoint_dict, + os.path.join(args.checkpoint_path, f"epoch_{completed_epoch}.pt"), + ) + if args.save_most_recent: + torch.save( + checkpoint_dict, + os.path.join(args.checkpoint_path, f"epoch_latest.pt"), + ) + if args.save_top_performance and not args.no_eval: + update_top_k_performance( + filtered_metrics, + current_top_k_ckpt_metrics, + args, + checkpoint_dict, + bignumbetter=True, + ) + + if args.wandb and is_master(args): + wandb.finish() + + +def copy_codebase(args): + from shutil import copytree, ignore_patterns + + new_code_path = os.path.join(args.logs, args.name, "code") + if os.path.exists(new_code_path): + print( + f"Error. Experiment already exists at {new_code_path}. Use --name to specify a new experiment." + ) + return -1 + print(f"Copying codebase to {new_code_path}") + current_code_path = os.path.realpath(__file__) + for _ in range(3): + current_code_path = os.path.dirname(current_code_path) + copytree( + current_code_path, new_code_path, ignore=ignore_patterns("log", "logs", "wandb") + ) + print("Done copying code.") + return 1 + + +if __name__ == "__main__": + main() diff --git a/picoaudio/audioldm/clap/training/params.py b/picoaudio/audioldm/clap/training/params.py new file mode 100644 index 0000000000000000000000000000000000000000..b1933e3a78ff583733846ea285d56eb0a0b892a5 --- /dev/null +++ b/picoaudio/audioldm/clap/training/params.py @@ -0,0 +1,569 @@ +import argparse +import os + +CACHE_DIR = os.getenv( + "AUDIOLDM_CACHE_DIR", + "~/.cache") + + + +def get_default_params(model_name): + # Params from paper (https://arxiv.org/pdf/2103.00020.pdf) + model_name = model_name.lower() + if "vit" in model_name: + return {"lr": 5.0e-4, "beta1": 0.9, "beta2": 0.98, "eps": 1.0e-6} + else: + return {"lr": 5.0e-4, "beta1": 0.9, "beta2": 0.999, "eps": 1.0e-8} + + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument( + "--train-data", + type=str, + default=None, + help="Path to h5 filewith training data", + ) + parser.add_argument( + "--val-data", + type=str, + default=None, + help="Path to h5 file with validation data", + ) + parser.add_argument( + "--freeze-text", + default=False, + action="store_true", + help="if you need to freeze the text encoder, make this True", + ) + parser.add_argument( + "--freeze-text-after", + type=int, + default=-1, + help="if you need to freeze the text encoder after (include) epoch x, set this param to x. Set -1 to disable it", + ) + parser.add_argument( + "--train-ipc", + type=str, + default=None, + help="Path to npy file of the number of instance per class in training data", + ) + parser.add_argument( + "--val-ipc", + type=str, + default=None, + help="Path to npy file of the number of instance per class in validation data", + ) + parser.add_argument( + "--train-num-samples", + type=int, + default=None, + help="Number of samples in dataset. Required for webdataset if not available in info file.", + ) + parser.add_argument( + "--val-num-samples", + type=int, + default=None, + help="Number of samples in dataset. Useful for webdataset if not available in info file.", + ) + parser.add_argument( + "--dataset-type", + choices=["webdataset", "csv", "auto", "toy"], + default="auto", + help="Which type of dataset to process.", + ) + parser.add_argument( + "--csv-separator", + type=str, + default="\t", + help="For csv-like datasets, which separator to use.", + ) + parser.add_argument( + "--csv-img-key", + type=str, + default="filepath", + help="For csv-like datasets, the name of the key for the image paths.", + ) + parser.add_argument( + "--csv-caption-key", + type=str, + default="title", + help="For csv-like datasets, the name of the key for the captions.", + ) + parser.add_argument( + "--imagenet-val", + type=str, + default=None, + help="Path to imagenet val set for conducting zero shot evaluation.", + ) + parser.add_argument( + "--imagenet-v2", + type=str, + default=None, + help="Path to imagenet v2 for conducting zero shot evaluation.", + ) + parser.add_argument( + "--datasetnames", + nargs="+", + default=None, + help="If loading webdataset, spedify the dataset names to load. Can be some of these: Clotho, audioset, audiocaps, BBCSoundEffects", + ) + parser.add_argument( + "--full-train-dataset", + nargs="+", + default=None, + help="Which dataset will be trained with all the subsets. (train+test)", + ) + parser.add_argument( + "--exclude-eval-dataset", + nargs="+", + default=None, + help="Which dataset will be excluded with evaluation", + ) + parser.add_argument( + "--datasetinfos", + nargs="+", + default=None, + help="If loading webdataset, spedify the dataset types to load. Can be some of these: train, test, valid, unbalanced_train, balanced_train, eval", + ) + parser.add_argument( + "--dataset-proportion", + type=float, + default=1.0, + help="How much proportion of dataset we want to train.", + ) + parser.add_argument( + "--remotedata", + default=False, + action="store_true", + help="if the dataset is remote, set this flag", + ) + parser.add_argument( + "--class-label-path", + type=str, + default=None, + help="The path of the class label pickle or csv.", + ) + parser.add_argument( + "--datasetpath", + type=str, + default="/mnt/audio_clip/webdataset_tar", + help="The path to the dataset", + ) + parser.add_argument( + "--logs", + type=str, + default="./logs/", + help="Where to store tensorboard logs. Use None to avoid storing logs.", + ) + parser.add_argument( + "--log-local", + action="store_true", + default=False, + help="log files on local master, otherwise global master only.", + ) + parser.add_argument( + "--name", + type=str, + default=None, + help="Optional identifier for the experiment when storing logs. Otherwise use current time.", + ) + parser.add_argument( + "--workers", type=int, default=1, help="Number of workers per GPU." + ) + parser.add_argument( + "--batch-size", type=int, default=64, help="Batch size per GPU." + ) + parser.add_argument( + "--epochs", type=int, default=32, help="Number of epochs to train for." + ) + parser.add_argument("--lr", type=float, default=None, help="Learning rate.") + parser.add_argument("--beta1", type=float, default=None, help="Adam beta 1.") + parser.add_argument("--beta2", type=float, default=None, help="Adam beta 2.") + parser.add_argument("--eps", type=float, default=None, help="Adam epsilon.") + parser.add_argument("--momentum", type=float, default=None, help="SGD epsilon.") + parser.add_argument("--wd", type=float, default=0.2, help="Weight decay.") + + parser.add_argument( + "--split-opt", + action="store_true", + default=False, + help="Use this flag to skip the learning rate decay.", + ) + parser.add_argument( + "--lr-pretrained", type=float, default=None, help="Learning rate for text." + ) + parser.add_argument( + "--beta1-pretrained", type=float, default=None, help="Adam beta 1 for text." + ) + parser.add_argument( + "--beta2-pretrained", type=float, default=None, help="Adam beta 2 for text." + ) + parser.add_argument( + "--eps-pretrained", type=float, default=None, help="Adam epsilon for text." + ) + parser.add_argument( + "--wd-pretrained", type=float, default=0.2, help="Weight decay for text." + ) + parser.add_argument( + "--momentum-pretrained", type=float, default=0.9, help="Momentum for text." + ) + parser.add_argument( + "--lr-new", type=float, default=None, help="Learning rate for audio." + ) + parser.add_argument( + "--beta1-new", type=float, default=None, help="Adam beta 1 for audio." + ) + parser.add_argument( + "--beta2-new", type=float, default=None, help="Adam beta 2 for audio." + ) + parser.add_argument( + "--eps-new", type=float, default=None, help="Adam epsilon for audio." + ) + parser.add_argument( + "--wd-new", type=float, default=0.2, help="Weight decay for audio." + ) + parser.add_argument( + "--momentum-new", type=float, default=0.9, help="Momentum for audio." + ) + parser.add_argument( + "--warmup", type=int, default=10000, help="Number of steps to warmup for." + ) + parser.add_argument( + "--use-bn-sync", + default=False, + action="store_true", + help="Whether to use batch norm sync.", + ) + parser.add_argument( + "--skip-scheduler", + action="store_true", + default=False, + help="Use this flag to skip the learning rate decay.", + ) + parser.add_argument( + "--save-frequency", type=int, default=1, help="How often to save checkpoints." + ) + parser.add_argument( + "--save-top-performance", + type=int, + default=0, + help="Save the top x performance weights if the value >0", + ) + parser.add_argument( + "--save-most-recent", + action="store_true", + default=False, + help="Always save the most recent model trained to epoch_latest.pt.", + ) + parser.add_argument( + "--zeroshot-frequency", type=int, default=2, help="How often to run zero shot." + ) + parser.add_argument( + "--val-frequency", + type=int, + default=1, + help="How often to run evaluation with val data.", + ) + parser.add_argument( + "--resume", + default=None, + type=str, + help="path to latest checkpoint (default: none)", + ) + parser.add_argument( + "--precision", + choices=["amp", "fp16", "fp32"], + default="amp", + help="Floating point precision.", + ) + parser.add_argument( + "--amodel", + type=str, + default="RN50", + help="Name of the audio backbone to use.", + ) + parser.add_argument( + "--tmodel", + type=str, + default="transformer", + help="Name of the text backbone to use. Can be [transformer, bert, roberta, bart]", + ) + parser.add_argument( + "--pretrained-audio", + default="", + type=str, + help="Use a pretrained audio model weights for the audio encoder of CLAP", + ) + parser.add_argument( + "--pretrained-text", + default="", + type=str, + help="Use a pretrained text model weights for the text encoder of CLAP", + ) + parser.add_argument( + "--pretrained", + default="", + type=str, + help="Use a pretrained CLIP model weights with the specified tag or file path.", + ) + parser.add_argument( + "--pretrained-image", + default=False, + action="store_true", + help="Load imagenet pretrained weights for image tower backbone if available.", + ) + parser.add_argument( + "--lock-image", + default=False, + action="store_true", + help="Lock full image tower by disabling gradients.", + ) + parser.add_argument( + "--lock-image-unlocked-groups", + type=int, + default=0, + help="Leave last n image tower layer groups unlocked.", + ) + parser.add_argument( + "--lock-image-freeze-bn-stats", + default=False, + action="store_true", + help="Freeze BatchNorm running stats in image tower for any locked layers.", + ) + parser.add_argument( + "--local-loss", + default=False, + action="store_true", + help="calculate loss w/ local features @ global (instead of realizing full global @ global matrix)", + ) + parser.add_argument( + "--gather-with-grad", + default=False, + action="store_true", + help="enable full distributed gradient for feature gather", + ) + parser.add_argument( + "--force-quick-gelu", + default=False, + action="store_true", + help="Force use of QuickGELU activation for non-OpenAI transformer models.", + ) + parser.add_argument( + "--torchscript", + default=False, + action="store_true", + help="torch.jit.script the model, also uses jit version of OpenAI models if pretrained=='openai'", + ) + parser.add_argument( + "--trace", + default=False, + action="store_true", + help="torch.jit.trace the model for inference / eval only", + ) + # arguments for distributed training + parser.add_argument( + "--dist-url", + default="env://", + type=str, + help="url used to set up distributed training", + ) + parser.add_argument( + "--dist-backend", default="nccl", type=str, help="distributed backend" + ) + parser.add_argument( + "--report-to", + default="", + type=str, + help="Options are ['wandb', 'tensorboard', 'wandb,tensorboard']", + ) + parser.add_argument( + "--wandb-notes", default="", type=str, help="Notes if logging with wandb" + ) + parser.add_argument( + "--C", type=float, default=3.16, help="inverse regularizer for logistic reg." + ) + parser.add_argument( + "--debug", + default=False, + action="store_true", + help="If true, more information is logged.", + ) + parser.add_argument( + "--copy-codebase", + default=False, + action="store_true", + help="If true, we copy the entire base on the log diretory, and execute from there.", + ) + parser.add_argument( + "--horovod", + default=False, + action="store_true", + help="Use horovod for distributed training.", + ) + parser.add_argument( + "--ddp-static-graph", + default=False, + action="store_true", + help="Enable static graph optimization for DDP in PyTorch >= 1.11.", + ) + parser.add_argument( + "--no-set-device-rank", + default=False, + action="store_true", + help="Don't set device index from local rank (when CUDA_VISIBLE_DEVICES restricted to one per proc).", + ) + parser.add_argument("--seed", type=int, default=4242, help="Default random seed.") + + parser.add_argument( + "--top-k-checkpoint-select-dataset", + type=str, + default="all", + help="The dataset of selecting top-k checkpoint.", + ) + + # @R10, @R@5, @R1, mAP@10 + parser.add_argument( + "--top-k-checkpoint-select-metric", + type=str, + default="_R@10", + help="The metric for selecting top-k checkpoint.", + ) + parser.add_argument( + "--openai-model-cache-dir", + type=str, + default=f"{CACHE_DIR}/clip", + help="Directory to download OpenAI models.", + ) + parser.add_argument( + "--optimizer", + type=str, + default="adamw", + help="can be AdamW or SGD", + ) + parser.add_argument( + "--parallel-eval", + default=False, + action="store_true", + help="Eval in parallel (multi-GPU, multi-node).", + ) + + parser.add_argument( + "--no-eval", + default=False, + action="store_true", + help="Training without evaluation.", + ) + + parser.add_argument( + "--lp-mlp", + default=False, + action="store_true", + help="Linear Probe using MLP layer or not.", + ) + + parser.add_argument( + "--lp-freeze", + default=False, + action="store_true", + help="Linear Probe using Freeze CLAP or not", + ) + + parser.add_argument( + "--lp-act", + default="None", + type=str, + help="Options are ['relu','elu','prelu','softmax','sigmoid']", + ) + + parser.add_argument( + "--lp-loss", type=str, default="bce", help="Loss func of Linear Probe." + ) + + parser.add_argument( + "--lp-metrics", + type=str, + default="map,mauc,acc", + help="Metrics of Linear Probe.", + ) + + parser.add_argument( + "--lp-lr", type=float, default=1e-4, help="learning rate of linear probe" + ) + parser.add_argument( + "--kappa", + type=float, + default=0, + help="the kappa in the weighted contrastive loss, default is to turn off the weighted contrastive loss", + ) + + parser.add_argument( + "--data-filling", + type=str, + default="pad", + help="type of data filling when the audio length is shorter than the max length." + "Can be one of the following: repeat, repeatpad, pad", + ) + parser.add_argument( + "--data-truncating", + type=str, + default="rand_trunc", + help="type of data truncation when the audio length is longer than the max length." + "Can be one of the following: rand_trunc, fusion", + ) + + parser.add_argument( + "--clap-mlploss", + default=False, + action="store_true", + help="Using MLP loss for CLAP model or not", + ) + + parser.add_argument( + "--wandb-id", + type=str, + default=None, + help="the id of wandb experiment to restore.", + ) + + parser.add_argument( + "--sleep", type=float, default=0, help="sleep n seconds before start training" + ) + + # variable length processing + parser.add_argument( + "--enable-fusion", + default=False, + action="store_true", + help="Enable feature funsion for variable-length data", + ) + + parser.add_argument( + "--fusion-type", + type=str, + default="None", + help="Type is among ['channel_map', 'daf_1d','aff_1d','iaff_1d','daf_2d','aff_2d','iaff_2d']", + ) + + parser.add_argument( + "--mixup", + default=False, + action="store_true", + help="Enable mixup in finetuning training.", + ) + parser.add_argument( + "--text-augment-selection", + type=str, + default=None, + help="For selecting levels of augmented text. Type is among ['all', 'augment_only', 'none']", + ) + + args = parser.parse_args() + + # If some params are not passed, we use the default values based on model name. + default_params = get_default_params(args.amodel) + for name, val in default_params.items(): + if getattr(args, name) is None: + setattr(args, name, val) + + return args diff --git a/picoaudio/audioldm/clap/training/scheduler.py b/picoaudio/audioldm/clap/training/scheduler.py new file mode 100644 index 0000000000000000000000000000000000000000..7151ffbab25a113673b7627027b443b27f22cb0f --- /dev/null +++ b/picoaudio/audioldm/clap/training/scheduler.py @@ -0,0 +1,24 @@ +import numpy as np + + +def assign_learning_rate(optimizer, new_lr): + for param_group in optimizer.param_groups: + param_group["lr"] = new_lr + + +def _warmup_lr(base_lr, warmup_length, step): + return base_lr * (step + 1) / warmup_length + + +def cosine_lr(optimizer, base_lr, warmup_length, steps): + def _lr_adjuster(step): + if step < warmup_length: + lr = _warmup_lr(base_lr, warmup_length, step) + else: + e = step - warmup_length + es = steps - warmup_length + lr = 0.5 * (1 + np.cos(np.pi * e / es)) * base_lr + assign_learning_rate(optimizer, lr) + return lr + + return _lr_adjuster diff --git a/picoaudio/audioldm/clap/training/train.py b/picoaudio/audioldm/clap/training/train.py new file mode 100644 index 0000000000000000000000000000000000000000..f5759c4679d2ee9c0748444adf66b8453cf09728 --- /dev/null +++ b/picoaudio/audioldm/clap/training/train.py @@ -0,0 +1,838 @@ +import json +import logging +import math +import os +import time +from contextlib import suppress + +import numpy as np +import torch +import torch.nn.functional as F + +try: + import wandb +except ImportError: + wandb = None + +from open_clip import ClipLoss, gather_features +from .distributed import is_master +from .zero_shot import zero_shot_eval + + +class AverageMeter(object): + """Computes and stores the average and current value""" + + def __init__(self): + self.reset() + + def reset(self): + self.val = 0 + self.avg = 0 + self.sum = 0 + self.count = 0 + + def update(self, val, n=1): + self.val = val + self.sum += val * n + self.count += n + self.avg = self.sum / self.count + + +def unwrap_model(model): + if hasattr(model, "module"): + return model.module + else: + return model + + +def train_one_epoch( + model, data, epoch, optimizer, scaler, scheduler, args, tb_writer=None +): + device = torch.device(args.device) + autocast = torch.cuda.amp.autocast if args.precision == "amp" else suppress + model.train() + loss = ClipLoss( + local_loss=args.local_loss, + gather_with_grad=args.gather_with_grad, + cache_labels=True, + rank=args.rank, + world_size=args.world_size, + use_horovod=args.horovod, + mlp_loss=args.clap_mlploss, + weight_loss_kappa=args.kappa, + ) + + dataloader, sampler = data["train"].dataloader, data["train"].sampler + if args.distributed and sampler is not None: + sampler.set_epoch(epoch) + num_batches_per_epoch = dataloader.num_batches + sample_digits = math.ceil(math.log(dataloader.num_samples + 1, 10)) + + # for toy dataset + if args.dataset_type == "toy": + dataloader.dataset.generate_queue() + + loss_m = AverageMeter() + batch_time_m = AverageMeter() + data_time_m = AverageMeter() + end = time.time() + + for i, batch in enumerate(dataloader): + # logging.info(f"batch {i} of {num_batches_per_epoch}") + step = num_batches_per_epoch * epoch + i + if isinstance(scheduler, dict): + for s in scheduler.values(): + s(step) + else: + scheduler(step) + audios = batch # contains mel_spec, wavform, and longer list + texts = batch["text"] + # audios = audios.to(device=device, non_blocking=True) + # texts = texts.to(device=device, non_blocking=True) + + data_time_m.update(time.time() - end) + if isinstance(optimizer, dict): + for o_ in optimizer.values(): + o_.zero_grad() + else: + optimizer.zero_grad() + + with autocast(): + ( + audio_features, + text_features, + audio_features_mlp, + text_features_mlp, + logit_scale_a, + logit_scale_t, + ) = model(audios, texts, device) + + if args.clap_mlploss: + total_loss = loss( + audio_features=audio_features, + text_features=text_features, + logit_scale_a=logit_scale_a, + logit_scale_t=logit_scale_t, + audio_features_mlp=audio_features_mlp, + text_features_mlp=text_features_mlp, + ) + else: + total_loss = loss( + audio_features=audio_features, + text_features=text_features, + logit_scale_a=logit_scale_a, + ) + if isinstance(optimizer, dict): + if scaler is not None: + scaler.scale(total_loss).backward() + for o_ in optimizer.values(): + if args.horovod: + o_.synchronize() + scaler.unscale_(o_) + with o_.skip_synchronize(): + scaler.step(o_) + else: + scaler.step(o_) + scaler.update() + else: + total_loss.backward() + for o_ in optimizer.values(): + o_.step() + else: + if scaler is not None: + scaler.scale(total_loss).backward() + if args.horovod: + optimizer.synchronize() + scaler.unscale_(optimizer) + with optimizer.skip_synchronize(): + scaler.step(optimizer) + else: + scaler.step(optimizer) + scaler.update() + else: + total_loss.backward() + optimizer.step() + + # Note: we clamp to 4.6052 = ln(100), as in the original paper. + with torch.no_grad(): + unwrap_model(model).logit_scale_a.clamp_(0, math.log(100)) + if args.clap_mlploss: + unwrap_model(model).logit_scale_t.clamp_(0, math.log(100)) + + batch_time_m.update(time.time() - end) + end = time.time() + batch_count = i + 1 + if is_master(args) and (i % 100 == 0 or batch_count == num_batches_per_epoch): + if isinstance(audios, dict): + batch_size = len(audios["waveform"]) + else: + batch_size = len(audios) + num_samples = batch_count * batch_size * args.world_size + samples_per_epoch = dataloader.num_samples + percent_complete = 100.0 * batch_count / num_batches_per_epoch + + # NOTE loss is coarsely sampled, just master node and per log update + loss_m.update(total_loss.item(), batch_size) + logit_scale_scalar_a = logit_scale_a.item() + logit_scale_scalar_t = logit_scale_t.item() + if isinstance(optimizer, dict): + if args.clap_mlploss: + logging.info( + f"Train Epoch: {epoch} [{num_samples:>{sample_digits}}/{samples_per_epoch} ({percent_complete:.0f}%)] " + f"Loss: {loss_m.val:#.5g} ({loss_m.avg:#.4g}) " + f"Data (t): {data_time_m.avg:.3f} " + f"Batch (t): {batch_time_m.avg:.3f} " + f"LR: {[o_.param_groups[0]['lr'] for o_ in optimizer.values()]} " + f"Logit Scale Audio: {logit_scale_scalar_a:.3f}" + f"Logit Scale Text: {logit_scale_scalar_t:.3f}" + ) + log_data = { + "loss": loss_m.val, + "data_time": data_time_m.val, + "batch_time": batch_time_m.val, + "scale_audio": logit_scale_scalar_a, + "scale_text": logit_scale_scalar_t, + "lr": [o_.param_groups[0]["lr"] for o_ in optimizer.values()], + } + else: + logging.info( + f"Train Epoch: {epoch} [{num_samples:>{sample_digits}}/{samples_per_epoch} ({percent_complete:.0f}%)] " + f"Loss: {loss_m.val:#.5g} ({loss_m.avg:#.4g}) " + f"Data (t): {data_time_m.avg:.3f} " + f"Batch (t): {batch_time_m.avg:.3f} " + f"LR: {[o_.param_groups[0]['lr'] for o_ in optimizer.values()]} " + f"Logit Scale Audio: {logit_scale_scalar_a:.3f}" + ) + log_data = { + "loss": loss_m.val, + "data_time": data_time_m.val, + "batch_time": batch_time_m.val, + "scale_audio": logit_scale_scalar_a, + "lr": [o_.param_groups[0]["lr"] for o_ in optimizer.values()], + } + + else: + if args.clap_mlploss: + logging.info( + f"Train Epoch: {epoch} [{num_samples:>{sample_digits}}/{samples_per_epoch} ({percent_complete:.0f}%)] " + f"Loss: {loss_m.val:#.5g} ({loss_m.avg:#.4g}) " + f"Data (t): {data_time_m.avg:.3f} " + f"Batch (t): {batch_time_m.avg:.3f} " + f"LR: {optimizer.param_groups[0]['lr']:5f} " + f"Logit Scale Audio: {logit_scale_scalar_a:.3f}" + f"Logit Scale Text: {logit_scale_scalar_t:.3f}" + ) + + # Save train loss / etc. Using non avg meter values as loggers have their own smoothing + log_data = { + "loss": loss_m.val, + "data_time": data_time_m.val, + "batch_time": batch_time_m.val, + "scale_audio": logit_scale_scalar_a, + "scale_text": logit_scale_scalar_t, + "lr": optimizer.param_groups[0]["lr"], + } + else: + logging.info( + f"Train Epoch: {epoch} [{num_samples:>{sample_digits}}/{samples_per_epoch} ({percent_complete:.0f}%)] " + f"Loss: {loss_m.val:#.5g} ({loss_m.avg:#.4g}) " + f"Data (t): {data_time_m.avg:.3f} " + f"Batch (t): {batch_time_m.avg:.3f} " + f"LR: {optimizer.param_groups[0]['lr']:5f} " + f"Logit Scale Audio: {logit_scale_scalar_a:.3f}" + ) + + # Save train loss / etc. Using non avg meter values as loggers have their own smoothing + log_data = { + "loss": loss_m.val, + "data_time": data_time_m.val, + "batch_time": batch_time_m.val, + "scale_audio": logit_scale_scalar_a, + "lr": optimizer.param_groups[0]["lr"], + } + for name, val in log_data.items(): + name = "train/" + name + if tb_writer is not None: + tb_writer.add_scalar(name, val, step) + if args.wandb: + assert wandb is not None, "Please install wandb." + wandb.log({name: val, "step": step}) + + # resetting batch / data time meters per log window + batch_time_m.reset() + data_time_m.reset() + # end for + + +def evaluate(model, data, epoch, args, tb_writer=None): + metrics = {} + if not args.parallel_eval: + if not is_master(args): + return metrics + device = torch.device(args.device) + model.eval() + + # CHANGE + # zero_shot_metrics = zero_shot_eval(model, data, epoch, args) + # metrics.update(zero_shot_metrics) + if is_master(args): + print("Evaluating...") + autocast = torch.cuda.amp.autocast if args.precision == "amp" else suppress + if args.val_dataset_names == ["Clotho", "audiocaps"]: + # if only clotho and audiocaps are used, then we will use a different evaluation function. + # This is because in the Clotho and audiocaps valid and test set, there are 5 text for 1 audio. + if args.parallel_eval: + # (yusong): just a hack here. Don't use parallel eval when evaluating only clotho and audiocaps. + raise NotImplementedError( + "Parallel evaluation not supported for eval only Clotho and audiocaps." + ) + val_metrics_per_dataset = evaluate_clotho_audiocaps( + model, data, epoch, args, autocast, device, tb_writer + ) + for m in val_metrics_per_dataset.values(): + metrics.update(m) + if "epoch" not in metrics.keys(): + metrics.update({"epoch": epoch}) + metrics = select_top_metric_clotho_audiocaps( + metrics, val_metrics_per_dataset, args + ) + elif "val" in data and ( + args.val_frequency + and ((epoch % args.val_frequency) == 0 or epoch == args.epochs) + ): + dataloader = data["val"].dataloader + num_samples = 0 + samples_per_val = dataloader.num_samples + + # FIXME this does not scale past small eval datasets + # all_audio_features @ all_text_features will blow up memory and compute very quickly + eval_info = {} + if args.clap_mlploss: + eval_info["all"] = { + "cumulative_loss": 0.0, + "num_samples": 0, + "all_audio_features": [], + "all_text_features": [], + "all_audio_features_mlp": [], + "all_text_features_mlp": [], + } # cumulative_loss = 0.0 + else: + eval_info["all"] = { + "cumulative_loss": 0.0, + "num_samples": 0, + "all_audio_features": [], + "all_text_features": [], + } # cumu + # all_audio_features, all_text_features, all_audio_features_mlp, all_text_features_mlp = [], [], [], [] + with torch.no_grad(): + for i, batch in enumerate(dataloader): + audios = batch # contains mel_spec, wavform, and longer list + texts = batch["text"] + # audios = audios.to(device=device, non_blocking=True) + + all_names = list( + set(["-".join(b.split("/")[-3:-1]) for b in batch["__url__"]]) + ) + for name in all_names: + if name not in eval_info.keys(): + if args.clap_mlploss: + eval_info[name] = { + "cumulative_loss": 0.0, + "num_samples": 0, + "all_audio_features": [], + "all_text_features": [], + "all_audio_features_mlp": [], + "all_text_features_mlp": [], + } + else: + eval_info[name] = { + "cumulative_loss": 0.0, + "num_samples": 0, + "all_audio_features": [], + "all_text_features": [], + } + with autocast(): + ( + audio_features, + text_features, + audio_features_mlp, + text_features_mlp, + logit_scale_a, + logit_scale_t, + ) = model(audios, texts, device) + + if args.parallel_eval: + # multi-GPU eval + if args.clap_mlploss: + ( + audio_features, + text_features, + audio_features_mlp, + text_features_mlp, + ) = gather_features( + audio_features=audio_features, + text_features=text_features, + audio_features_mlp=audio_features_mlp, + text_features_mlp=text_features_mlp, + local_loss=False, + gather_with_grad=False, + rank=args.rank, + world_size=args.world_size, + use_horovod=args.horovod, + mlp_loss=args.clap_mlploss, + ) + else: + (audio_features, text_features,) = gather_features( + audio_features=audio_features, + text_features=text_features, + local_loss=False, + gather_with_grad=False, + rank=args.rank, + world_size=args.world_size, + use_horovod=args.horovod, + mlp_loss=args.clap_mlploss, + ) + + if is_master(args): + num_samples += audio_features.shape[0] + for n in [*all_names, "all"]: + if n == "all": + eval_info[n]["all_audio_features"].append( + audio_features.cpu() + ) + eval_info[n]["all_text_features"].append( + text_features.cpu() + ) + if args.clap_mlploss: + eval_info[n]["all_audio_features_mlp"].append( + audio_features_mlp.cpu() + ) + eval_info[n]["all_text_features_mlp"].append( + text_features_mlp.cpu() + ) + else: + idx = np.where( + np.array( + [ + "-".join(b.split("/")[-3:-1]) + for b in batch["__url__"] + ] + ) + == n + )[0] + eval_info[n]["all_audio_features"].append( + audio_features.cpu().index_select( + 0, torch.tensor(idx).long() + ) + ) + eval_info[n]["all_text_features"].append( + text_features.cpu().index_select( + 0, torch.tensor(idx).long() + ) + ) + if args.clap_mlploss: + eval_info[n]["all_audio_features_mlp"].append( + audio_features_mlp.cpu().index_select( + 0, torch.tensor(idx).long() + ) + ) + eval_info[n]["all_text_features_mlp"].append( + text_features_mlp.cpu().index_select( + 0, torch.tensor(idx).long() + ) + ) + # print(f'eval step {i}') # (yusong): for debug + + # cumulative_loss += total_loss * batch_size + # num_samples += batch_size + if is_master(args) and (i % 100) == 0: # and i != 0: + logging.info( + f"Eval Epoch: {epoch} [{num_samples} / {samples_per_val}]" + ) + if is_master(args): + val_metrics_per_dataset = {} + for n in eval_info.keys(): + if args.clap_mlploss: + metrics_single_dataset = get_metrics( + audio_features=torch.cat( + eval_info[n]["all_audio_features"] + ), + text_features=torch.cat(eval_info[n]["all_text_features"]), + logit_scale_a=logit_scale_a.cpu(), + audio_features_mlp=torch.cat( + eval_info[n]["all_audio_features_mlp"] + ), + text_features_mlp=torch.cat( + eval_info[n]["all_text_features_mlp"] + ), + logit_scale_t=logit_scale_t.cpu(), + mlp_loss=args.clap_mlploss, + ) + else: + metrics_single_dataset = get_metrics( + audio_features=torch.cat( + eval_info[n]["all_audio_features"] + ), + text_features=torch.cat(eval_info[n]["all_text_features"]), + logit_scale_a=logit_scale_a.cpu(), + mlp_loss=args.clap_mlploss, + ) + val_metrics_per_dataset[n] = { + n + "/" + k: v for k, v in metrics_single_dataset.items() + } + metrics.update(val_metrics_per_dataset[n]) + if "epoch" not in metrics.keys(): + metrics.update({"epoch": epoch}) + if is_master(args): + if not metrics: + return metrics + + logging.info( + f"Eval Epoch: {epoch} " + + "\n".join( + [ + "\t".join([f"{k}: {round(v, 4):.4f}" for k, v in m.items()]) + for m in val_metrics_per_dataset.values() + ] + ) + ) + + if args.save_logs: + for name, val in metrics.items(): + if tb_writer is not None: + tb_writer.add_scalar(f"val/{name}", val, epoch) + + with open(os.path.join(args.checkpoint_path, "results.jsonl"), "a+") as f: + f.write(json.dumps(metrics)) + f.write("\n") + + if args.wandb: + assert wandb is not None, "Please install wandb." + for name, val in metrics.items(): + wandb.log({f"val/{name}": val, "epoch": epoch}) + + return metrics + else: + return metrics + + +def get_metrics( + audio_features, + text_features, + logit_scale_a, + audio_features_mlp=None, + text_features_mlp=None, + logit_scale_t=None, + mlp_loss=False, +): + metrics = {} + if mlp_loss: + # Set up audio to text & text to audio similary matrice + a_logits_per_audio = ( + (logit_scale_a * audio_features @ text_features_mlp.t()).detach().cpu() + ) + a_logits_per_text = a_logits_per_audio.t().detach().cpu() + t_logits_per_audio = ( + (logit_scale_t * audio_features_mlp @ text_features.t()).detach().cpu() + ) + t_logits_per_text = t_logits_per_audio.t().detach().cpu() + + labels = torch.arange(audio_features.shape[0]).long() + # Change the loss from two terms into four terms with 2x2 combined CE loss + total_loss = ( + F.cross_entropy(a_logits_per_audio, labels) + + F.cross_entropy(a_logits_per_text, labels) + + F.cross_entropy(t_logits_per_audio, labels) + + F.cross_entropy(t_logits_per_text, labels) + ) / 4 + + metrics[f"cumulative_loss"] = total_loss.item() + metrics[f"num_samples"] = audio_features.shape[0] + + logits = { + "audio_to_text": (a_logits_per_audio + t_logits_per_audio) / 2, + "text_to_audio": (a_logits_per_text + t_logits_per_text) / 2, + } + ground_truth = torch.arange(len(text_features)).view(-1, 1) + + else: + # print("text_features", text_features) + # print("text_features.shape", text_features.shape) + logits_per_audio = ( + (logit_scale_a * audio_features @ text_features.t()).detach().cpu() + ) + logits_per_text = logits_per_audio.t().detach().cpu() + + labels = torch.arange(audio_features.shape[0]).long() + # Change the loss from two terms into four terms with 2x2 combined CE loss + total_loss = ( + F.cross_entropy(logits_per_audio, labels) + + F.cross_entropy(logits_per_text, labels) + ) / 2 + + metrics[f"cumulative_loss"] = total_loss.item() + metrics[f"num_samples"] = audio_features.shape[0] + + logits = {"audio_to_text": logits_per_audio, "text_to_audio": logits_per_text} + + ground_truth = torch.arange(len(text_features)).view(-1, 1) + + for name, logit in logits.items(): + ranking = torch.argsort(logit, descending=True) + preds = torch.where(ranking == ground_truth)[ + 1 + ] # (yusong) this line is slow because it uses single thread + preds = preds.detach().cpu().numpy() + metrics[f"{name}_mean_rank"] = preds.mean() + 1 + metrics[f"{name}_median_rank"] = np.floor(np.median(preds)) + 1 + for k in [1, 5, 10]: + metrics[f"{name}_R@{k}"] = np.mean(preds < k) + # map@10 + metrics[f"{name}_mAP@10"] = np.mean(np.where(preds < 10, 1 / (preds + 1), 0.0)) + + return metrics + + +def evaluate_clotho_audiocaps( + model, data, epoch, args, autocast, device, tb_writer=None +): + """ + Adapted from https://github.com/XinhaoMei/audio-text_retrieval/blob/main/tools/utils.py. + 1. for text-to-audio retrieval, do 5 times and average the results + 2. for R@1, R@5, R@10 in audio-to-text retrieval, take the best rank among 5 text + 3. for map@10 in audio-to-text retrieval: + 3.1: sort the rank of 5 text + 3.2: exclude the rank >=10 (0-index) + 3.3: compute the map regarding the remaining ranks: np.mean(np.arange(1, len(ranks)+1) / ranks). + (3.3) That is, take the top ranks of 5 text that is < 10, and assign the descending number as ground truth. + (3.3) E.g.: the ground truth of first rank of the 5 text should be 1, the second rank should be 2, etc. + """ + # TODO: (yusong) only support single GPU evaluation and only support non-mlp case for now. + dataloader = data["val"].dataloader + with torch.no_grad(): + eval_info = {} + for i, batch in enumerate(dataloader): + audios = batch # contains mel_spec, wavform, and longer list + + # each item in the list has 5 texts + if args.tmodel == "transformer": + from open_clip import tokenize + + texts = [tokenize(t) for t in batch["full_text"]] + texts = torch.cat(texts) + else: + from .data import tokenizer + + texts = [ + tokenizer(t) for t in batch["full_text"] + ] # 5 texts for each audio + texts = { + k: torch.cat([t[k] for t in texts]) for k in texts[0].keys() + } # 5 x batch + + # audios = audios.to(device=device, non_blocking=True) + + all_names = list( + set(["-".join(b.split("/")[-3:-1]) for b in batch["__url__"]]) + ) + for name in all_names: + if name not in eval_info.keys(): + # we will not use mlp outputs even if args.clap_mlploss=True + eval_info[name] = { + "cumulative_loss": 0.0, + "num_samples": 0, + "all_audio_features": [], + "all_text_features": [], + } + with autocast(): + audio_features = model(audios, None, device) + text_features = model(None, texts, device) + audio_features = F.normalize(audio_features, dim=-1) + text_features = F.normalize(text_features, dim=-1) + + all_names = list( + set(["-".join(b.split("/")[-3:-1]) for b in batch["__url__"]]) + ) + for n in all_names: + idx = np.where( + np.array( + ["-".join(b.split("/")[-3:-1]) for b in batch["__url__"]] + ) + == n + )[0] + eval_info[n]["all_audio_features"].append( + audio_features.cpu().index_select(0, torch.tensor(idx).long()) + ) + # (yusong) please double-check. This is for selecting 5 text features at once. + # because idx is a list of indices in size of num_samples, + # and text_features is a tensor of size (5*num_samples, dim) + # so we need to select 5 consecutive indices at once for a single index in idx. + eval_info[n]["all_text_features"].append( + text_features.cpu() + .reshape([-1, 5, text_features.shape[1]]) + .index_select(0, torch.tensor(idx).long()) + .reshape([-1, text_features.shape[1]]) + ) + + val_metrics_all = {} + + for n in eval_info.keys(): + logit_scale_a, logit_scale_t = model(None, None, device) + logit_scale_a = logit_scale_a.cpu() + + audio_features = torch.cat(eval_info[n]["all_audio_features"], dim=0) + text_features = torch.cat(eval_info[n]["all_text_features"], dim=0) + + logits_per_audio = ( + (logit_scale_a * audio_features @ text_features.t()).detach().cpu() + ) + logits_per_text = logits_per_audio.t().detach().cpu() + + # logits_per_audio shape: [num_samples, num_samples*5] + # logits_per_text shape: [num_samples*5, num_samples] + + logging.info( + f"dataset {n}, logits_per_audio shape: {logits_per_audio.shape}, " + f"logits_per_text shape: {logits_per_text.shape}" + ) + + metrics = {} + num_samples = audio_features.shape[0] + metrics[f"num_samples"] = num_samples + + # (yusong) the following code is very important, please double-check: + # logits_per_audio.reshape(num_samples, num_samples, 5)[:, :, d] + # logits_per_text.reshape(num_samples, 5, num_samples)[:, d, :] + # Those two are retrieving one of the 5 text for each audio. + labels = torch.arange(audio_features.shape[0]).long() + audio_to_text_loss = [ + F.cross_entropy( + logits_per_audio.reshape(num_samples, num_samples, 5)[:, :, d], + labels, + ) + for d in range(5) + ] + text_to_audio_loss = [ + F.cross_entropy( + logits_per_text.reshape(num_samples, 5, num_samples)[:, d, :], + labels, + ) + for d in range(5) + ] + total_loss = (np.mean(audio_to_text_loss) + np.mean(text_to_audio_loss)) / 2 + + metrics[f"cumulative_loss"] = total_loss.item() + + # text to audio: do 5 times + pred_text = [] + for d in range(5): + logit = logits_per_text.reshape(num_samples, 5, num_samples)[:, d, :] + ground_truth = torch.arange(len(logit)).view(-1, 1) + ranking = torch.argsort( + logit, descending=True + ) # [num_samples, num_samples] + preds = torch.where(ranking == ground_truth)[1] + pred_text.append(preds.detach().cpu().numpy()) + pred_text_concat = np.concatenate(pred_text, axis=0) # [5*num_samples] + metrics[f"text_to_audio_mean_rank"] = pred_text_concat.mean() + 1 + metrics[f"text_to_audio_median_rank"] = ( + np.floor(np.median(pred_text_concat)) + 1 + ) + for k in [1, 5, 10]: + metrics[f"text_to_audio_R@{k}"] = np.mean(pred_text_concat < k) + # map@10 + metrics[f"text_to_audio_mAP@10"] = np.mean( + np.where(pred_text_concat < 10, 1 / (pred_text_concat + 1), 0.0) + ) + + # audio to text: take the best result + # for audio to text map 10, sort and assign descending ground truth. + # see https://github.com/XinhaoMei/audio-text_retrieval/blob/main/tools/utils.py#L103 + # map@10 + map_all = [] + pred_audio_all = [] + for d in range(num_samples): + # logits_per_audio: [num_samples, num_samples*5] + logit_single = logits_per_audio[d, :] # [5*num_samples] + # Ground-truth index: [d*5, d*5+1, d*5+2, d*5+3, d*5+4] + ranking = torch.argsort( + logit_single, descending=True + ) # [5*num_samples] + # ranking: the index of first match, second match, ... + ground_truth = torch.arange(d * 5, d * 5 + 5)[None] + all_pred = torch.where( + torch.stack([ranking] * 5) == ground_truth.view(-1, 1) + )[1] + min_pred = torch.min(all_pred) + pred_audio_all.append(min_pred.detach().cpu().numpy()) + all_pred_filter = all_pred[all_pred < 10].detach().cpu().numpy() + # /5 because we have 5 text, so it means for the text rank >=10 we count as 0. + map_single = ( + np.sum( + (np.arange(1, len(all_pred_filter) + 1) / (all_pred_filter + 1)) + ) + / 5 + ) + map_all.append(map_single) + metrics[f"audio_to_text_mAP@10"] = np.mean(map_all) + for k in [1, 5, 10]: + metrics[f"audio_to_text_R@{k}"] = np.mean(np.array(pred_audio_all) < k) + + val_metrics_all[n] = {n + "/" + k: v for k, v in metrics.items()} + return val_metrics_all + + +def calculate_selection_performance_clotho_audiocaps(val_metrics_per_dataset): + """ + Calculate performance for Clotho+AudioCaps for model selection. + """ + selection_performance_all = [] + for n in val_metrics_per_dataset.keys(): + selection_performance = ( + val_metrics_per_dataset[n][f"{n}/audio_to_text_mAP@10"] + + val_metrics_per_dataset[n][f"{n}/text_to_audio_mAP@10"] + ) / 2 + selection_performance_all.append(selection_performance) + return np.mean(selection_performance_all) + + +def select_top_metric_clotho_audiocaps(metrics, val_metrics_per_dataset, args): + # val_metrics_per_dataset: dict, key: dataset name, value: dict, key: metric name, value: metric value + # metrics: dict, key: metric name, value: metric value + # Hack: use args to save the top performance + if not hasattr(args, "top_selection_performance"): + selection_performance = calculate_selection_performance_clotho_audiocaps( + val_metrics_per_dataset + ) + # TODO: write the if and else together + metric_update = {} + for n in val_metrics_per_dataset.keys(): + for k in val_metrics_per_dataset[n].keys(): + metric_update[ + k.split("/")[0] + "-top" + "/" + k.split("/")[1] + ] = val_metrics_per_dataset[n][k] + metric_update["top_selection_performance"] = selection_performance + metric_update["top-selection-epoch"] = metrics["epoch"] + metrics.update(metric_update) + args.top_metric = metric_update + args.top_selection_performance = selection_performance + else: + selection_performance_new = calculate_selection_performance_clotho_audiocaps( + val_metrics_per_dataset + ) + selection_performance_old = args.top_selection_performance + if selection_performance_new > selection_performance_old: + metric_update = {} + for n in val_metrics_per_dataset.keys(): + for k in val_metrics_per_dataset[n].keys(): + metric_update[ + k.split("/")[0] + "-top" + "/" + k.split("/")[1] + ] = val_metrics_per_dataset[n][k] + metric_update["top_selection_performance"] = selection_performance_new + metric_update["top-selection-epoch"] = metrics["epoch"] + metrics.update(metric_update) + args.top_metric = metric_update + args.top_selection_performance = selection_performance_new + else: + metrics.update(args.top_metric) + return metrics diff --git a/picoaudio/audioldm/clap/training/zero_shot.py b/picoaudio/audioldm/clap/training/zero_shot.py new file mode 100644 index 0000000000000000000000000000000000000000..28b8fccc1af17fc69002857a7f529ac041c374f2 --- /dev/null +++ b/picoaudio/audioldm/clap/training/zero_shot.py @@ -0,0 +1,95 @@ +# NOTE: This script is currently not supported for CLAP. +import logging +from contextlib import suppress + +import torch +import torch.nn.functional as F +from tqdm import tqdm + +from open_clip import tokenize +from .imagenet_zeroshot_data import imagenet_classnames, openai_imagenet_template + + +def zero_shot_classifier(model, classnames, templates, args): + with torch.no_grad(): + zeroshot_weights = [] + for classname in tqdm(classnames): + texts = [template(classname) for template in templates] # format with class + texts = tokenize(texts).to(args.device) # tokenize + if args.distributed and not args.horovod: + class_embeddings = model.module.encode_text(texts) + else: + class_embeddings = model.encode_text(texts) + class_embedding = F.normalize(class_embeddings, dim=-1).mean(dim=0) + class_embedding /= class_embedding.norm() + zeroshot_weights.append(class_embedding) + zeroshot_weights = torch.stack(zeroshot_weights, dim=1).to(args.device) + return zeroshot_weights + + +def accuracy(output, target, topk=(1,)): + pred = output.topk(max(topk), 1, True, True)[1].t() + correct = pred.eq(target.view(1, -1).expand_as(pred)) + return [ + float(correct[:k].reshape(-1).float().sum(0, keepdim=True).cpu().numpy()) + for k in topk + ] + + +def run(model, classifier, dataloader, args): + autocast = torch.cuda.amp.autocast if args.precision == "amp" else suppress + with torch.no_grad(): + top1, top5, n = 0.0, 0.0, 0.0 + for images, target in tqdm(dataloader, unit_scale=args.batch_size): + images = images.to(args.device) + target = target.to(args.device) + + with autocast(): + # predict + if args.distributed and not args.horovod: + image_features = model.module.encode_image(images) + else: + image_features = model.encode_image(images) + image_features = F.normalize(image_features, dim=-1) + logits = 100.0 * image_features @ classifier + + # measure accuracy + acc1, acc5 = accuracy(logits, target, topk=(1, 5)) + top1 += acc1 + top5 += acc5 + n += images.size(0) + + top1 = top1 / n + top5 = top5 / n + return top1, top5 + + +def zero_shot_eval(model, data, epoch, args): + if "imagenet-val" not in data and "imagenet-v2" not in data: + return {} + if args.zeroshot_frequency == 0: + return {} + if (epoch % args.zeroshot_frequency) != 0 and epoch != args.epochs: + return {} + + logging.info("Starting zero-shot imagenet.") + + logging.info("Building zero-shot classifier") + classifier = zero_shot_classifier( + model, imagenet_classnames, openai_imagenet_template, args + ) + + logging.info("Using classifier") + results = {} + if "imagenet-val" in data: + top1, top5 = run(model, classifier, data["imagenet-val"].dataloader, args) + results["imagenet-zeroshot-val-top1"] = top1 + results["imagenet-zeroshot-val-top5"] = top5 + if "imagenet-v2" in data: + top1, top5 = run(model, classifier, data["imagenet-v2"].dataloader, args) + results["imagenetv2-zeroshot-val-top1"] = top1 + results["imagenetv2-zeroshot-val-top5"] = top5 + + logging.info("Finished zero-shot imagenet.") + + return results diff --git a/picoaudio/audioldm/hifigan/__init__.py b/picoaudio/audioldm/hifigan/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e0ae476fe58c48e998c56234a55b871beba4042d --- /dev/null +++ b/picoaudio/audioldm/hifigan/__init__.py @@ -0,0 +1,7 @@ +from .models import Generator + + +class AttrDict(dict): + def __init__(self, *args, **kwargs): + super(AttrDict, self).__init__(*args, **kwargs) + self.__dict__ = self diff --git a/picoaudio/audioldm/hifigan/models.py b/picoaudio/audioldm/hifigan/models.py new file mode 100644 index 0000000000000000000000000000000000000000..c4382cc39de0463f9b7c0f33f037dbc233e7cb36 --- /dev/null +++ b/picoaudio/audioldm/hifigan/models.py @@ -0,0 +1,174 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F +from torch.nn import Conv1d, ConvTranspose1d +from torch.nn.utils import weight_norm, remove_weight_norm + +LRELU_SLOPE = 0.1 + + +def init_weights(m, mean=0.0, std=0.01): + classname = m.__class__.__name__ + if classname.find("Conv") != -1: + m.weight.data.normal_(mean, std) + + +def get_padding(kernel_size, dilation=1): + return int((kernel_size * dilation - dilation) / 2) + + +class ResBlock(torch.nn.Module): + def __init__(self, h, channels, kernel_size=3, dilation=(1, 3, 5)): + super(ResBlock, self).__init__() + self.h = h + self.convs1 = nn.ModuleList( + [ + weight_norm( + Conv1d( + channels, + channels, + kernel_size, + 1, + dilation=dilation[0], + padding=get_padding(kernel_size, dilation[0]), + ) + ), + weight_norm( + Conv1d( + channels, + channels, + kernel_size, + 1, + dilation=dilation[1], + padding=get_padding(kernel_size, dilation[1]), + ) + ), + weight_norm( + Conv1d( + channels, + channels, + kernel_size, + 1, + dilation=dilation[2], + padding=get_padding(kernel_size, dilation[2]), + ) + ), + ] + ) + self.convs1.apply(init_weights) + + self.convs2 = nn.ModuleList( + [ + weight_norm( + Conv1d( + channels, + channels, + kernel_size, + 1, + dilation=1, + padding=get_padding(kernel_size, 1), + ) + ), + weight_norm( + Conv1d( + channels, + channels, + kernel_size, + 1, + dilation=1, + padding=get_padding(kernel_size, 1), + ) + ), + weight_norm( + Conv1d( + channels, + channels, + kernel_size, + 1, + dilation=1, + padding=get_padding(kernel_size, 1), + ) + ), + ] + ) + self.convs2.apply(init_weights) + + def forward(self, x): + for c1, c2 in zip(self.convs1, self.convs2): + xt = F.leaky_relu(x, LRELU_SLOPE) + xt = c1(xt) + xt = F.leaky_relu(xt, LRELU_SLOPE) + xt = c2(xt) + x = xt + x + return x + + def remove_weight_norm(self): + for l in self.convs1: + remove_weight_norm(l) + for l in self.convs2: + remove_weight_norm(l) + + +class Generator(torch.nn.Module): + def __init__(self, h): + super(Generator, self).__init__() + self.h = h + self.num_kernels = len(h.resblock_kernel_sizes) + self.num_upsamples = len(h.upsample_rates) + self.conv_pre = weight_norm( + Conv1d(h.num_mels, h.upsample_initial_channel, 7, 1, padding=3) + ) + resblock = ResBlock + + self.ups = nn.ModuleList() + for i, (u, k) in enumerate(zip(h.upsample_rates, h.upsample_kernel_sizes)): + self.ups.append( + weight_norm( + ConvTranspose1d( + h.upsample_initial_channel // (2**i), + h.upsample_initial_channel // (2 ** (i + 1)), + k, + u, + padding=(k - u) // 2, + ) + ) + ) + + self.resblocks = nn.ModuleList() + for i in range(len(self.ups)): + ch = h.upsample_initial_channel // (2 ** (i + 1)) + for j, (k, d) in enumerate( + zip(h.resblock_kernel_sizes, h.resblock_dilation_sizes) + ): + self.resblocks.append(resblock(h, ch, k, d)) + + self.conv_post = weight_norm(Conv1d(ch, 1, 7, 1, padding=3)) + self.ups.apply(init_weights) + self.conv_post.apply(init_weights) + + def forward(self, x): + x = self.conv_pre(x) + for i in range(self.num_upsamples): + x = F.leaky_relu(x, LRELU_SLOPE) + x = self.ups[i](x) + xs = None + for j in range(self.num_kernels): + if xs is None: + xs = self.resblocks[i * self.num_kernels + j](x) + else: + xs += self.resblocks[i * self.num_kernels + j](x) + x = xs / self.num_kernels + x = F.leaky_relu(x) + x = self.conv_post(x) + x = torch.tanh(x) + + return x + + def remove_weight_norm(self): + # print("Removing weight norm...") + for l in self.ups: + remove_weight_norm(l) + for l in self.resblocks: + l.remove_weight_norm() + remove_weight_norm(self.conv_pre) + remove_weight_norm(self.conv_post) diff --git a/picoaudio/audioldm/hifigan/utilities.py b/picoaudio/audioldm/hifigan/utilities.py new file mode 100644 index 0000000000000000000000000000000000000000..ea9f958e460a77fd4936a6edf59403dd3ea617ab --- /dev/null +++ b/picoaudio/audioldm/hifigan/utilities.py @@ -0,0 +1,86 @@ +import os +import json + +import torch +import numpy as np + +import audioldm.hifigan as hifigan + +HIFIGAN_16K_64 = { + "resblock": "1", + "num_gpus": 6, + "batch_size": 16, + "learning_rate": 0.0002, + "adam_b1": 0.8, + "adam_b2": 0.99, + "lr_decay": 0.999, + "seed": 1234, + "upsample_rates": [5, 4, 2, 2, 2], + "upsample_kernel_sizes": [16, 16, 8, 4, 4], + "upsample_initial_channel": 1024, + "resblock_kernel_sizes": [3, 7, 11], + "resblock_dilation_sizes": [[1, 3, 5], [1, 3, 5], [1, 3, 5]], + "segment_size": 8192, + "num_mels": 64, + "num_freq": 1025, + "n_fft": 1024, + "hop_size": 160, + "win_size": 1024, + "sampling_rate": 16000, + "fmin": 0, + "fmax": 8000, + "fmax_for_loss": None, + "num_workers": 4, + "dist_config": { + "dist_backend": "nccl", + "dist_url": "tcp://localhost:54321", + "world_size": 1, + }, +} + + +def get_available_checkpoint_keys(model, ckpt): + print("==> Attemp to reload from %s" % ckpt) + state_dict = torch.load(ckpt)["state_dict"] + current_state_dict = model.state_dict() + new_state_dict = {} + for k in state_dict.keys(): + if ( + k in current_state_dict.keys() + and current_state_dict[k].size() == state_dict[k].size() + ): + new_state_dict[k] = state_dict[k] + else: + print("==> WARNING: Skipping %s" % k) + print( + "%s out of %s keys are matched" + % (len(new_state_dict.keys()), len(state_dict.keys())) + ) + return new_state_dict + + +def get_param_num(model): + num_param = sum(param.numel() for param in model.parameters()) + return num_param + + +def get_vocoder(config, device): + config = hifigan.AttrDict(HIFIGAN_16K_64) + vocoder = hifigan.Generator(config) + vocoder.eval() + vocoder.remove_weight_norm() + vocoder.to(device) + return vocoder + + +def vocoder_infer(mels, vocoder, lengths=None): + vocoder.eval() + with torch.no_grad(): + wavs = vocoder(mels).squeeze(1) + + wavs = (wavs.cpu().numpy() * 32768).astype("int16") + + if lengths is not None: + wavs = wavs[:, :lengths] + + return wavs diff --git a/picoaudio/audioldm/latent_diffusion/__init__.py b/picoaudio/audioldm/latent_diffusion/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/picoaudio/audioldm/latent_diffusion/attention.py b/picoaudio/audioldm/latent_diffusion/attention.py new file mode 100644 index 0000000000000000000000000000000000000000..27886f5ee3c7eb856100503b838399106ef00051 --- /dev/null +++ b/picoaudio/audioldm/latent_diffusion/attention.py @@ -0,0 +1,469 @@ +from inspect import isfunction +import math +import torch +import torch.nn.functional as F +from torch import nn +from einops import rearrange + +from audioldm.latent_diffusion.util import checkpoint + + +def exists(val): + return val is not None + + +def uniq(arr): + return {el: True for el in arr}.keys() + + +def default(val, d): + if exists(val): + return val + return d() if isfunction(d) else d + + +def max_neg_value(t): + return -torch.finfo(t.dtype).max + + +def init_(tensor): + dim = tensor.shape[-1] + std = 1 / math.sqrt(dim) + tensor.uniform_(-std, std) + return tensor + + +# feedforward +class GEGLU(nn.Module): + def __init__(self, dim_in, dim_out): + super().__init__() + self.proj = nn.Linear(dim_in, dim_out * 2) + + def forward(self, x): + x, gate = self.proj(x).chunk(2, dim=-1) + return x * F.gelu(gate) + + +class FeedForward(nn.Module): + def __init__(self, dim, dim_out=None, mult=4, glu=False, dropout=0.0): + super().__init__() + inner_dim = int(dim * mult) + dim_out = default(dim_out, dim) + project_in = ( + nn.Sequential(nn.Linear(dim, inner_dim), nn.GELU()) + if not glu + else GEGLU(dim, inner_dim) + ) + + self.net = nn.Sequential( + project_in, nn.Dropout(dropout), nn.Linear(inner_dim, dim_out) + ) + + def forward(self, x): + return self.net(x) + + +def zero_module(module): + """ + Zero out the parameters of a module and return it. + """ + for p in module.parameters(): + p.detach().zero_() + return module + + +def Normalize(in_channels): + return torch.nn.GroupNorm( + num_groups=32, num_channels=in_channels, eps=1e-6, affine=True + ) + + +class LinearAttention(nn.Module): + def __init__(self, dim, heads=4, dim_head=32): + super().__init__() + self.heads = heads + hidden_dim = dim_head * heads + self.to_qkv = nn.Conv2d(dim, hidden_dim * 3, 1, bias=False) + self.to_out = nn.Conv2d(hidden_dim, dim, 1) + + def forward(self, x): + b, c, h, w = x.shape + qkv = self.to_qkv(x) + q, k, v = rearrange( + qkv, "b (qkv heads c) h w -> qkv b heads c (h w)", heads=self.heads, qkv=3 + ) + k = k.softmax(dim=-1) + context = torch.einsum("bhdn,bhen->bhde", k, v) + out = torch.einsum("bhde,bhdn->bhen", context, q) + out = rearrange( + out, "b heads c (h w) -> b (heads c) h w", heads=self.heads, h=h, w=w + ) + return self.to_out(out) + + +class SpatialSelfAttention(nn.Module): + def __init__(self, in_channels): + super().__init__() + self.in_channels = in_channels + + self.norm = Normalize(in_channels) + self.q = torch.nn.Conv2d( + in_channels, in_channels, kernel_size=1, stride=1, padding=0 + ) + self.k = torch.nn.Conv2d( + in_channels, in_channels, kernel_size=1, stride=1, padding=0 + ) + self.v = torch.nn.Conv2d( + in_channels, in_channels, kernel_size=1, stride=1, padding=0 + ) + self.proj_out = torch.nn.Conv2d( + in_channels, in_channels, kernel_size=1, stride=1, padding=0 + ) + + def forward(self, x): + h_ = x + h_ = self.norm(h_) + q = self.q(h_) + k = self.k(h_) + v = self.v(h_) + + # compute attention + b, c, h, w = q.shape + q = rearrange(q, "b c h w -> b (h w) c") + k = rearrange(k, "b c h w -> b c (h w)") + w_ = torch.einsum("bij,bjk->bik", q, k) + + w_ = w_ * (int(c) ** (-0.5)) + w_ = torch.nn.functional.softmax(w_, dim=2) + + # attend to values + v = rearrange(v, "b c h w -> b c (h w)") + w_ = rearrange(w_, "b i j -> b j i") + h_ = torch.einsum("bij,bjk->bik", v, w_) + h_ = rearrange(h_, "b c (h w) -> b c h w", h=h) + h_ = self.proj_out(h_) + + return x + h_ + + +class CrossAttention(nn.Module): + """ + ### Cross Attention Layer + This falls-back to self-attention when conditional embeddings are not specified. + """ + + # use_flash_attention: bool = True + use_flash_attention: bool = False + + def __init__( + self, + query_dim, + context_dim=None, + heads=8, + dim_head=64, + dropout=0.0, + is_inplace: bool = True, + ): + # def __init__(self, d_model: int, d_cond: int, n_heads: int, d_head: int, is_inplace: bool = True): + """ + :param d_model: is the input embedding size + :param n_heads: is the number of attention heads + :param d_head: is the size of a attention head + :param d_cond: is the size of the conditional embeddings + :param is_inplace: specifies whether to perform the attention softmax computation inplace to + save memory + """ + super().__init__() + + self.is_inplace = is_inplace + self.n_heads = heads + self.d_head = dim_head + + # Attention scaling factor + self.scale = dim_head**-0.5 + + # The normal self-attention layer + if context_dim is None: + context_dim = query_dim + + # Query, key and value mappings + d_attn = dim_head * heads + self.to_q = nn.Linear(query_dim, d_attn, bias=False) + self.to_k = nn.Linear(context_dim, d_attn, bias=False) + self.to_v = nn.Linear(context_dim, d_attn, bias=False) + + # Final linear layer + self.to_out = nn.Sequential(nn.Linear(d_attn, query_dim), nn.Dropout(dropout)) + + # Setup [flash attention](https://github.com/HazyResearch/flash-attention). + # Flash attention is only used if it's installed + # and `CrossAttention.use_flash_attention` is set to `True`. + try: + # You can install flash attention by cloning their Github repo, + # [https://github.com/HazyResearch/flash-attention](https://github.com/HazyResearch/flash-attention) + # and then running `python setup.py install` + from flash_attn.flash_attention import FlashAttention + + self.flash = FlashAttention() + # Set the scale for scaled dot-product attention. + self.flash.softmax_scale = self.scale + # Set to `None` if it's not installed + except ImportError: + self.flash = None + + def forward(self, x, context=None, mask=None): + """ + :param x: are the input embeddings of shape `[batch_size, height * width, d_model]` + :param cond: is the conditional embeddings of shape `[batch_size, n_cond, d_cond]` + """ + + # If `cond` is `None` we perform self attention + has_cond = context is not None + if not has_cond: + context = x + + # Get query, key and value vectors + q = self.to_q(x) + k = self.to_k(context) + v = self.to_v(context) + + # Use flash attention if it's available and the head size is less than or equal to `128` + if ( + CrossAttention.use_flash_attention + and self.flash is not None + and not has_cond + and self.d_head <= 128 + ): + return self.flash_attention(q, k, v) + # Otherwise, fallback to normal attention + else: + return self.normal_attention(q, k, v) + + def flash_attention(self, q: torch.Tensor, k: torch.Tensor, v: torch.Tensor): + """ + #### Flash Attention + :param q: are the query vectors before splitting heads, of shape `[batch_size, seq, d_attn]` + :param k: are the query vectors before splitting heads, of shape `[batch_size, seq, d_attn]` + :param v: are the query vectors before splitting heads, of shape `[batch_size, seq, d_attn]` + """ + + # Get batch size and number of elements along sequence axis (`width * height`) + batch_size, seq_len, _ = q.shape + + # Stack `q`, `k`, `v` vectors for flash attention, to get a single tensor of + # shape `[batch_size, seq_len, 3, n_heads * d_head]` + qkv = torch.stack((q, k, v), dim=2) + # Split the heads + qkv = qkv.view(batch_size, seq_len, 3, self.n_heads, self.d_head) + + # Flash attention works for head sizes `32`, `64` and `128`, so we have to pad the heads to + # fit this size. + if self.d_head <= 32: + pad = 32 - self.d_head + elif self.d_head <= 64: + pad = 64 - self.d_head + elif self.d_head <= 128: + pad = 128 - self.d_head + else: + raise ValueError(f"Head size ${self.d_head} too large for Flash Attention") + + # Pad the heads + if pad: + qkv = torch.cat( + (qkv, qkv.new_zeros(batch_size, seq_len, 3, self.n_heads, pad)), dim=-1 + ) + + # Compute attention + # $$\underset{seq}{softmax}\Bigg(\frac{Q K^\top}{\sqrt{d_{key}}}\Bigg)V$$ + # This gives a tensor of shape `[batch_size, seq_len, n_heads, d_padded]` + # TODO here I add the dtype changing + out, _ = self.flash(qkv.type(torch.float16)) + # Truncate the extra head size + out = out[:, :, :, : self.d_head].float() + # Reshape to `[batch_size, seq_len, n_heads * d_head]` + out = out.reshape(batch_size, seq_len, self.n_heads * self.d_head) + + # Map to `[batch_size, height * width, d_model]` with a linear layer + return self.to_out(out) + + def normal_attention(self, q: torch.Tensor, k: torch.Tensor, v: torch.Tensor): + """ + #### Normal Attention + + :param q: are the query vectors before splitting heads, of shape `[batch_size, seq, d_attn]` + :param k: are the query vectors before splitting heads, of shape `[batch_size, seq, d_attn]` + :param v: are the query vectors before splitting heads, of shape `[batch_size, seq, d_attn]` + """ + + # Split them to heads of shape `[batch_size, seq_len, n_heads, d_head]` + q = q.view(*q.shape[:2], self.n_heads, -1) # [bs, 64, 20, 32] + k = k.view(*k.shape[:2], self.n_heads, -1) # [bs, 1, 20, 32] + v = v.view(*v.shape[:2], self.n_heads, -1) + + # Calculate attention $\frac{Q K^\top}{\sqrt{d_{key}}}$ + attn = torch.einsum("bihd,bjhd->bhij", q, k) * self.scale + + # Compute softmax + # $$\underset{seq}{softmax}\Bigg(\frac{Q K^\top}{\sqrt{d_{key}}}\Bigg)$$ + if self.is_inplace: + half = attn.shape[0] // 2 + attn[half:] = attn[half:].softmax(dim=-1) + attn[:half] = attn[:half].softmax(dim=-1) + else: + attn = attn.softmax(dim=-1) + + # Compute attention output + # $$\underset{seq}{softmax}\Bigg(\frac{Q K^\top}{\sqrt{d_{key}}}\Bigg)V$$ + # attn: [bs, 20, 64, 1] + # v: [bs, 1, 20, 32] + out = torch.einsum("bhij,bjhd->bihd", attn, v) + # Reshape to `[batch_size, height * width, n_heads * d_head]` + out = out.reshape(*out.shape[:2], -1) + # Map to `[batch_size, height * width, d_model]` with a linear layer + return self.to_out(out) + + +# class CrossAttention(nn.Module): +# def __init__(self, query_dim, context_dim=None, heads=8, dim_head=64, dropout=0.): +# super().__init__() +# inner_dim = dim_head * heads +# context_dim = default(context_dim, query_dim) + +# self.scale = dim_head ** -0.5 +# self.heads = heads + +# self.to_q = nn.Linear(query_dim, inner_dim, bias=False) +# self.to_k = nn.Linear(context_dim, inner_dim, bias=False) +# self.to_v = nn.Linear(context_dim, inner_dim, bias=False) + +# self.to_out = nn.Sequential( +# nn.Linear(inner_dim, query_dim), +# nn.Dropout(dropout) +# ) + +# def forward(self, x, context=None, mask=None): +# h = self.heads + +# q = self.to_q(x) +# context = default(context, x) +# k = self.to_k(context) +# v = self.to_v(context) + +# q, k, v = map(lambda t: rearrange(t, 'b n (h d) -> (b h) n d', h=h), (q, k, v)) + +# sim = einsum('b i d, b j d -> b i j', q, k) * self.scale + +# if exists(mask): +# mask = rearrange(mask, 'b ... -> b (...)') +# max_neg_value = -torch.finfo(sim.dtype).max +# mask = repeat(mask, 'b j -> (b h) () j', h=h) +# sim.masked_fill_(~mask, max_neg_value) + +# # attention, what we cannot get enough of +# attn = sim.softmax(dim=-1) + +# out = einsum('b i j, b j d -> b i d', attn, v) +# out = rearrange(out, '(b h) n d -> b n (h d)', h=h) +# return self.to_out(out) + + +class BasicTransformerBlock(nn.Module): + def __init__( + self, + dim, + n_heads, + d_head, + dropout=0.0, + context_dim=None, + gated_ff=True, + checkpoint=True, + ): + super().__init__() + self.attn1 = CrossAttention( + query_dim=dim, heads=n_heads, dim_head=d_head, dropout=dropout + ) # is a self-attention + self.ff = FeedForward(dim, dropout=dropout, glu=gated_ff) + self.attn2 = CrossAttention( + query_dim=dim, + context_dim=context_dim, + heads=n_heads, + dim_head=d_head, + dropout=dropout, + ) # is self-attn if context is none + self.norm1 = nn.LayerNorm(dim) + self.norm2 = nn.LayerNorm(dim) + self.norm3 = nn.LayerNorm(dim) + self.checkpoint = checkpoint + + def forward(self, x, context=None): + if context is None: + return checkpoint(self._forward, (x,), self.parameters(), self.checkpoint) + else: + return checkpoint( + self._forward, (x, context), self.parameters(), self.checkpoint + ) + + def _forward(self, x, context=None): + x = self.attn1(self.norm1(x)) + x + x = self.attn2(self.norm2(x), context=context) + x + x = self.ff(self.norm3(x)) + x + return x + + +class SpatialTransformer(nn.Module): + """ + Transformer block for image-like data. + First, project the input (aka embedding) + and reshape to b, t, d. + Then apply standard transformer action. + Finally, reshape to image + """ + + def __init__( + self, + in_channels, + n_heads, + d_head, + depth=1, + dropout=0.0, + context_dim=None, + no_context=False, + ): + super().__init__() + + if no_context: + context_dim = None + + self.in_channels = in_channels + inner_dim = n_heads * d_head + self.norm = Normalize(in_channels) + + self.proj_in = nn.Conv2d( + in_channels, inner_dim, kernel_size=1, stride=1, padding=0 + ) + + self.transformer_blocks = nn.ModuleList( + [ + BasicTransformerBlock( + inner_dim, n_heads, d_head, dropout=dropout, context_dim=context_dim + ) + for d in range(depth) + ] + ) + + self.proj_out = zero_module( + nn.Conv2d(inner_dim, in_channels, kernel_size=1, stride=1, padding=0) + ) + + def forward(self, x, context=None): + # note: if no context is given, cross-attention defaults to self-attention + b, c, h, w = x.shape + x_in = x + x = self.norm(x) + x = self.proj_in(x) + x = rearrange(x, "b c h w -> b (h w) c") + for block in self.transformer_blocks: + x = block(x, context=context) + x = rearrange(x, "b (h w) c -> b c h w", h=h, w=w) + x = self.proj_out(x) + return x + x_in diff --git a/picoaudio/audioldm/latent_diffusion/ddim.py b/picoaudio/audioldm/latent_diffusion/ddim.py new file mode 100644 index 0000000000000000000000000000000000000000..732002b048e9a193313aa0ef9a353d4fc078be72 --- /dev/null +++ b/picoaudio/audioldm/latent_diffusion/ddim.py @@ -0,0 +1,377 @@ +"""SAMPLING ONLY.""" + +import torch +import numpy as np +from tqdm import tqdm + +from audioldm.latent_diffusion.util import ( + make_ddim_sampling_parameters, + make_ddim_timesteps, + noise_like, + extract_into_tensor, +) + + +class DDIMSampler(object): + def __init__(self, model, schedule="linear", **kwargs): + super().__init__() + self.model = model + self.ddpm_num_timesteps = model.num_timesteps + self.schedule = schedule + + def register_buffer(self, name, attr): + if type(attr) == torch.Tensor: + if attr.device != torch.device("cuda"): + attr = attr.to(torch.device("cuda")) + setattr(self, name, attr) + + def make_schedule( + self, ddim_num_steps, ddim_discretize="uniform", ddim_eta=0.0, verbose=True + ): + self.ddim_timesteps = make_ddim_timesteps( + ddim_discr_method=ddim_discretize, + num_ddim_timesteps=ddim_num_steps, + num_ddpm_timesteps=self.ddpm_num_timesteps, + verbose=verbose, + ) + alphas_cumprod = self.model.alphas_cumprod + assert ( + alphas_cumprod.shape[0] == self.ddpm_num_timesteps + ), "alphas have to be defined for each timestep" + to_torch = lambda x: x.clone().detach().to(torch.float32).to(self.model.device) + + self.register_buffer("betas", to_torch(self.model.betas)) + self.register_buffer("alphas_cumprod", to_torch(alphas_cumprod)) + self.register_buffer( + "alphas_cumprod_prev", to_torch(self.model.alphas_cumprod_prev) + ) + + # calculations for diffusion q(x_t | x_{t-1}) and others + self.register_buffer( + "sqrt_alphas_cumprod", to_torch(np.sqrt(alphas_cumprod.cpu())) + ) + self.register_buffer( + "sqrt_one_minus_alphas_cumprod", + to_torch(np.sqrt(1.0 - alphas_cumprod.cpu())), + ) + self.register_buffer( + "log_one_minus_alphas_cumprod", to_torch(np.log(1.0 - alphas_cumprod.cpu())) + ) + self.register_buffer( + "sqrt_recip_alphas_cumprod", to_torch(np.sqrt(1.0 / alphas_cumprod.cpu())) + ) + self.register_buffer( + "sqrt_recipm1_alphas_cumprod", + to_torch(np.sqrt(1.0 / alphas_cumprod.cpu() - 1)), + ) + + # ddim sampling parameters + ddim_sigmas, ddim_alphas, ddim_alphas_prev = make_ddim_sampling_parameters( + alphacums=alphas_cumprod.cpu(), + ddim_timesteps=self.ddim_timesteps, + eta=ddim_eta, + verbose=verbose, + ) + self.register_buffer("ddim_sigmas", ddim_sigmas) + self.register_buffer("ddim_alphas", ddim_alphas) + self.register_buffer("ddim_alphas_prev", ddim_alphas_prev) + self.register_buffer("ddim_sqrt_one_minus_alphas", np.sqrt(1.0 - ddim_alphas)) + sigmas_for_original_sampling_steps = ddim_eta * torch.sqrt( + (1 - self.alphas_cumprod_prev) + / (1 - self.alphas_cumprod) + * (1 - self.alphas_cumprod / self.alphas_cumprod_prev) + ) + self.register_buffer( + "ddim_sigmas_for_original_num_steps", sigmas_for_original_sampling_steps + ) + + @torch.no_grad() + def sample( + self, + S, + batch_size, + shape, + conditioning=None, + callback=None, + normals_sequence=None, + img_callback=None, + quantize_x0=False, + eta=0.0, + mask=None, + x0=None, + temperature=1.0, + noise_dropout=0.0, + score_corrector=None, + corrector_kwargs=None, + verbose=True, + x_T=None, + log_every_t=100, + unconditional_guidance_scale=1.0, + unconditional_conditioning=None, + # this has to come in the same format as the conditioning, # e.g. as encoded tokens, ... + **kwargs, + ): + if conditioning is not None: + if isinstance(conditioning, dict): + cbs = conditioning[list(conditioning.keys())[0]].shape[0] + if cbs != batch_size: + print( + f"Warning: Got {cbs} conditionings but batch-size is {batch_size}" + ) + else: + if conditioning.shape[0] != batch_size: + print( + f"Warning: Got {conditioning.shape[0]} conditionings but batch-size is {batch_size}" + ) + + self.make_schedule(ddim_num_steps=S, ddim_eta=eta, verbose=verbose) + # sampling + C, H, W = shape + size = (batch_size, C, H, W) + samples, intermediates = self.ddim_sampling( + conditioning, + size, + callback=callback, + img_callback=img_callback, + quantize_denoised=quantize_x0, + mask=mask, + x0=x0, + ddim_use_original_steps=False, + noise_dropout=noise_dropout, + temperature=temperature, + score_corrector=score_corrector, + corrector_kwargs=corrector_kwargs, + x_T=x_T, + log_every_t=log_every_t, + unconditional_guidance_scale=unconditional_guidance_scale, + unconditional_conditioning=unconditional_conditioning, + ) + return samples, intermediates + + @torch.no_grad() + def ddim_sampling( + self, + cond, + shape, + x_T=None, + ddim_use_original_steps=False, + callback=None, + timesteps=None, + quantize_denoised=False, + mask=None, + x0=None, + img_callback=None, + log_every_t=100, + temperature=1.0, + noise_dropout=0.0, + score_corrector=None, + corrector_kwargs=None, + unconditional_guidance_scale=1.0, + unconditional_conditioning=None, + ): + device = self.model.betas.device + b = shape[0] + if x_T is None: + img = torch.randn(shape, device=device) + else: + img = x_T + + if timesteps is None: + timesteps = ( + self.ddpm_num_timesteps + if ddim_use_original_steps + else self.ddim_timesteps + ) + elif timesteps is not None and not ddim_use_original_steps: + subset_end = ( + int( + min(timesteps / self.ddim_timesteps.shape[0], 1) + * self.ddim_timesteps.shape[0] + ) + - 1 + ) + timesteps = self.ddim_timesteps[:subset_end] + + intermediates = {"x_inter": [img], "pred_x0": [img]} + time_range = ( + reversed(range(0, timesteps)) + if ddim_use_original_steps + else np.flip(timesteps) + ) + total_steps = timesteps if ddim_use_original_steps else timesteps.shape[0] + # print(f"Running DDIM Sampling with {total_steps} timesteps") + + # iterator = gr.Progress().tqdm(time_range, desc="DDIM Sampler", total=total_steps) + iterator = tqdm(time_range, desc="DDIM Sampler", total=total_steps, leave=False) + + for i, step in enumerate(iterator): + index = total_steps - i - 1 + ts = torch.full((b,), step, device=device, dtype=torch.long) + if mask is not None: + assert x0 is not None + img_orig = self.model.q_sample( + x0, ts + ) # TODO deterministic forward pass? + img = ( + img_orig * mask + (1.0 - mask) * img + ) # In the first sampling step, img is pure gaussian noise + + outs = self.p_sample_ddim( + img, + cond, + ts, + index=index, + use_original_steps=ddim_use_original_steps, + quantize_denoised=quantize_denoised, + temperature=temperature, + noise_dropout=noise_dropout, + score_corrector=score_corrector, + corrector_kwargs=corrector_kwargs, + unconditional_guidance_scale=unconditional_guidance_scale, + unconditional_conditioning=unconditional_conditioning, + ) + img, pred_x0 = outs + if callback: + callback(i) + if img_callback: + img_callback(pred_x0, i) + + if index % log_every_t == 0 or index == total_steps - 1: + intermediates["x_inter"].append(img) + intermediates["pred_x0"].append(pred_x0) + + return img, intermediates + + @torch.no_grad() + def stochastic_encode(self, x0, t, use_original_steps=False, noise=None): + # fast, but does not allow for exact reconstruction + # t serves as an index to gather the correct alphas + if use_original_steps: + sqrt_alphas_cumprod = self.sqrt_alphas_cumprod + sqrt_one_minus_alphas_cumprod = self.sqrt_one_minus_alphas_cumprod + else: + sqrt_alphas_cumprod = torch.sqrt(self.ddim_alphas) + sqrt_one_minus_alphas_cumprod = self.ddim_sqrt_one_minus_alphas + + if noise is None: + noise = torch.randn_like(x0) + + return ( + extract_into_tensor(sqrt_alphas_cumprod, t, x0.shape) * x0 + + extract_into_tensor(sqrt_one_minus_alphas_cumprod, t, x0.shape) * noise + ) + + @torch.no_grad() + def decode( + self, + x_latent, + cond, + t_start, + unconditional_guidance_scale=1.0, + unconditional_conditioning=None, + use_original_steps=False, + ): + + timesteps = ( + np.arange(self.ddpm_num_timesteps) + if use_original_steps + else self.ddim_timesteps + ) + timesteps = timesteps[:t_start] + + time_range = np.flip(timesteps) + total_steps = timesteps.shape[0] + # print(f"Running DDIM Sampling with {total_steps} timesteps") + + # iterator = gr.Progress().tqdm(time_range, desc="Decoding image", total=total_steps) + iterator = tqdm(time_range, desc="Decoding image", total=total_steps) + x_dec = x_latent + + for i, step in enumerate(iterator): + index = total_steps - i - 1 + ts = torch.full( + (x_latent.shape[0],), step, device=x_latent.device, dtype=torch.long + ) + x_dec, _ = self.p_sample_ddim( + x_dec, + cond, + ts, + index=index, + use_original_steps=use_original_steps, + unconditional_guidance_scale=unconditional_guidance_scale, + unconditional_conditioning=unconditional_conditioning, + ) + return x_dec + + @torch.no_grad() + def p_sample_ddim( + self, + x, + c, + t, + index, + repeat_noise=False, + use_original_steps=False, + quantize_denoised=False, + temperature=1.0, + noise_dropout=0.0, + score_corrector=None, + corrector_kwargs=None, + unconditional_guidance_scale=1.0, + unconditional_conditioning=None, + ): + b, *_, device = *x.shape, x.device + + if unconditional_conditioning is None or unconditional_guidance_scale == 1.0: + e_t = self.model.apply_model(x, t, c) + else: + x_in = torch.cat([x] * 2) + t_in = torch.cat([t] * 2) + c_in = torch.cat([unconditional_conditioning, c]) + e_t_uncond, e_t = self.model.apply_model(x_in, t_in, c_in).chunk(2) + # When unconditional_guidance_scale == 1: only e_t + # When unconditional_guidance_scale == 0: only unconditional + # When unconditional_guidance_scale > 1: add more unconditional guidance + e_t = e_t_uncond + unconditional_guidance_scale * (e_t - e_t_uncond) + + if score_corrector is not None: + assert self.model.parameterization == "eps" + e_t = score_corrector.modify_score( + self.model, e_t, x, t, c, **corrector_kwargs + ) + + alphas = self.model.alphas_cumprod if use_original_steps else self.ddim_alphas + alphas_prev = ( + self.model.alphas_cumprod_prev + if use_original_steps + else self.ddim_alphas_prev + ) + sqrt_one_minus_alphas = ( + self.model.sqrt_one_minus_alphas_cumprod + if use_original_steps + else self.ddim_sqrt_one_minus_alphas + ) + sigmas = ( + self.model.ddim_sigmas_for_original_num_steps + if use_original_steps + else self.ddim_sigmas + ) + # select parameters corresponding to the currently considered timestep + a_t = torch.full((b, 1, 1, 1), alphas[index], device=device) + a_prev = torch.full((b, 1, 1, 1), alphas_prev[index], device=device) + sigma_t = torch.full((b, 1, 1, 1), sigmas[index], device=device) + sqrt_one_minus_at = torch.full( + (b, 1, 1, 1), sqrt_one_minus_alphas[index], device=device + ) + + # current prediction for x_0 + pred_x0 = (x - sqrt_one_minus_at * e_t) / a_t.sqrt() + if quantize_denoised: + pred_x0, _, *_ = self.model.first_stage_model.quantize(pred_x0) + # direction pointing to x_t + dir_xt = (1.0 - a_prev - sigma_t**2).sqrt() * e_t + noise = sigma_t * noise_like(x.shape, device, repeat_noise) * temperature + if noise_dropout > 0.0: + noise = torch.nn.functional.dropout(noise, p=noise_dropout) + x_prev = a_prev.sqrt() * pred_x0 + dir_xt + noise # TODO + return x_prev, pred_x0 diff --git a/picoaudio/audioldm/latent_diffusion/ddpm.py b/picoaudio/audioldm/latent_diffusion/ddpm.py new file mode 100644 index 0000000000000000000000000000000000000000..ffca031c27d413698adee5a58547b7d0ea4069c3 --- /dev/null +++ b/picoaudio/audioldm/latent_diffusion/ddpm.py @@ -0,0 +1,441 @@ +""" +wild mixture of +https://github.com/lucidrains/denoising-diffusion-pytorch/blob/7706bdfc6f527f58d33f84b7b522e61e6e3164b3/denoising_diffusion_pytorch/denoising_diffusion_pytorch.py +https://github.com/openai/improved-diffusion/blob/e94489283bb876ac1477d5dd7709bbbd2d9902ce/improved_diffusion/gaussian_diffusion.py +https://github.com/CompVis/taming-transformers +-- merci +""" +import sys +import os + +import torch +import torch.nn as nn +import numpy as np +from contextlib import contextmanager +from functools import partial +from tqdm import tqdm + +from audioldm.utils import exists, default, count_params, instantiate_from_config +from audioldm.latent_diffusion.ema import LitEma +from audioldm.latent_diffusion.util import ( + make_beta_schedule, + extract_into_tensor, + noise_like, +) +import soundfile as sf +import os + + +__conditioning_keys__ = {"concat": "c_concat", "crossattn": "c_crossattn", "adm": "y"} + + +def disabled_train(self, mode=True): + """Overwrite model.train with this function to make sure train/eval mode + does not change anymore.""" + return self + + +def uniform_on_device(r1, r2, shape, device): + return (r1 - r2) * torch.rand(*shape, device=device) + r2 + + +class DiffusionWrapper(nn.Module): + def __init__(self, diff_model_config, conditioning_key): + super().__init__() + self.diffusion_model = instantiate_from_config(diff_model_config) + self.conditioning_key = conditioning_key + assert self.conditioning_key in [ + None, + "concat", + "crossattn", + "hybrid", + "adm", + "film", + ] + + def forward( + self, x, t, c_concat: list = None, c_crossattn: list = None, c_film: list = None + ): + x = x.contiguous() + t = t.contiguous() + + if self.conditioning_key is None: + out = self.diffusion_model(x, t) + elif self.conditioning_key == "concat": + xc = torch.cat([x] + c_concat, dim=1) + out = self.diffusion_model(xc, t) + elif self.conditioning_key == "crossattn": + cc = torch.cat(c_crossattn, 1) + out = self.diffusion_model(x, t, context=cc) + elif self.conditioning_key == "hybrid": + xc = torch.cat([x] + c_concat, dim=1) + cc = torch.cat(c_crossattn, 1) + out = self.diffusion_model(xc, t, context=cc) + elif ( + self.conditioning_key == "film" + ): # The condition is assumed to be a global token, which wil pass through a linear layer and added with the time embedding for the FILM + cc = c_film[0].squeeze(1) # only has one token + out = self.diffusion_model(x, t, y=cc) + elif self.conditioning_key == "adm": + cc = c_crossattn[0] + out = self.diffusion_model(x, t, y=cc) + else: + raise NotImplementedError() + + return out + + +class DDPM(nn.Module): + # classic DDPM with Gaussian diffusion, in image space + def __init__( + self, + unet_config, + timesteps=1000, + beta_schedule="linear", + loss_type="l2", + ckpt_path=None, + ignore_keys=[], + load_only_unet=False, + monitor="val/loss", + use_ema=True, + first_stage_key="image", + latent_t_size=256, + latent_f_size=16, + channels=3, + log_every_t=100, + clip_denoised=True, + linear_start=1e-4, + linear_end=2e-2, + cosine_s=8e-3, + given_betas=None, + original_elbo_weight=0.0, + v_posterior=0.0, # weight for choosing posterior variance as sigma = (1-v) * beta_tilde + v * beta + l_simple_weight=1.0, + conditioning_key=None, + parameterization="eps", # all assuming fixed variance schedules + scheduler_config=None, + use_positional_encodings=False, + learn_logvar=False, + logvar_init=0.0, + ): + super().__init__() + assert parameterization in [ + "eps", + "x0", + ], 'currently only supporting "eps" and "x0"' + self.parameterization = parameterization + self.state = None + # print(f"{self.__class__.__name__}: Running in {self.parameterization}-prediction mode") + self.cond_stage_model = None + self.clip_denoised = clip_denoised + self.log_every_t = log_every_t + self.first_stage_key = first_stage_key + + self.latent_t_size = latent_t_size + self.latent_f_size = latent_f_size + + self.channels = channels + self.use_positional_encodings = use_positional_encodings + self.model = DiffusionWrapper(unet_config, conditioning_key) + count_params(self.model, verbose=True) + self.use_ema = use_ema + if self.use_ema: + self.model_ema = LitEma(self.model) + # print(f"Keeping EMAs of {len(list(self.model_ema.buffers()))}.") + + self.use_scheduler = scheduler_config is not None + if self.use_scheduler: + self.scheduler_config = scheduler_config + + self.v_posterior = v_posterior + self.original_elbo_weight = original_elbo_weight + self.l_simple_weight = l_simple_weight + + if monitor is not None: + self.monitor = monitor + + self.register_schedule( + given_betas=given_betas, + beta_schedule=beta_schedule, + timesteps=timesteps, + linear_start=linear_start, + linear_end=linear_end, + cosine_s=cosine_s, + ) + + self.loss_type = loss_type + + self.learn_logvar = learn_logvar + self.logvar = torch.full(fill_value=logvar_init, size=(self.num_timesteps,)) + if self.learn_logvar: + self.logvar = nn.Parameter(self.logvar, requires_grad=True) + else: + self.logvar = nn.Parameter(self.logvar, requires_grad=False) + + self.logger_save_dir = None + self.logger_project = None + self.logger_version = None + self.label_indices_total = None + # To avoid the system cannot find metric value for checkpoint + self.metrics_buffer = { + "val/kullback_leibler_divergence_sigmoid": 15.0, + "val/kullback_leibler_divergence_softmax": 10.0, + "val/psnr": 0.0, + "val/ssim": 0.0, + "val/inception_score_mean": 1.0, + "val/inception_score_std": 0.0, + "val/kernel_inception_distance_mean": 0.0, + "val/kernel_inception_distance_std": 0.0, + "val/frechet_inception_distance": 133.0, + "val/frechet_audio_distance": 32.0, + } + self.initial_learning_rate = None + + def get_log_dir(self): + if ( + self.logger_save_dir is None + and self.logger_project is None + and self.logger_version is None + ): + return os.path.join( + self.logger.save_dir, self.logger._project, self.logger.version + ) + else: + return os.path.join( + self.logger_save_dir, self.logger_project, self.logger_version + ) + + def set_log_dir(self, save_dir, project, version): + self.logger_save_dir = save_dir + self.logger_project = project + self.logger_version = version + + def register_schedule( + self, + given_betas=None, + beta_schedule="linear", + timesteps=1000, + linear_start=1e-4, + linear_end=2e-2, + cosine_s=8e-3, + ): + if exists(given_betas): + betas = given_betas + else: + betas = make_beta_schedule( + beta_schedule, + timesteps, + linear_start=linear_start, + linear_end=linear_end, + cosine_s=cosine_s, + ) + alphas = 1.0 - betas + alphas_cumprod = np.cumprod(alphas, axis=0) + alphas_cumprod_prev = np.append(1.0, alphas_cumprod[:-1]) + + (timesteps,) = betas.shape + self.num_timesteps = int(timesteps) + self.linear_start = linear_start + self.linear_end = linear_end + assert ( + alphas_cumprod.shape[0] == self.num_timesteps + ), "alphas have to be defined for each timestep" + + to_torch = partial(torch.tensor, dtype=torch.float32) + + self.register_buffer("betas", to_torch(betas)) + self.register_buffer("alphas_cumprod", to_torch(alphas_cumprod)) + self.register_buffer("alphas_cumprod_prev", to_torch(alphas_cumprod_prev)) + + # calculations for diffusion q(x_t | x_{t-1}) and others + self.register_buffer("sqrt_alphas_cumprod", to_torch(np.sqrt(alphas_cumprod))) + self.register_buffer( + "sqrt_one_minus_alphas_cumprod", to_torch(np.sqrt(1.0 - alphas_cumprod)) + ) + self.register_buffer( + "log_one_minus_alphas_cumprod", to_torch(np.log(1.0 - alphas_cumprod)) + ) + self.register_buffer( + "sqrt_recip_alphas_cumprod", to_torch(np.sqrt(1.0 / alphas_cumprod)) + ) + self.register_buffer( + "sqrt_recipm1_alphas_cumprod", to_torch(np.sqrt(1.0 / alphas_cumprod - 1)) + ) + + # calculations for posterior q(x_{t-1} | x_t, x_0) + posterior_variance = (1 - self.v_posterior) * betas * ( + 1.0 - alphas_cumprod_prev + ) / (1.0 - alphas_cumprod) + self.v_posterior * betas + # above: equal to 1. / (1. / (1. - alpha_cumprod_tm1) + alpha_t / beta_t) + self.register_buffer("posterior_variance", to_torch(posterior_variance)) + # below: log calculation clipped because the posterior variance is 0 at the beginning of the diffusion chain + self.register_buffer( + "posterior_log_variance_clipped", + to_torch(np.log(np.maximum(posterior_variance, 1e-20))), + ) + self.register_buffer( + "posterior_mean_coef1", + to_torch(betas * np.sqrt(alphas_cumprod_prev) / (1.0 - alphas_cumprod)), + ) + self.register_buffer( + "posterior_mean_coef2", + to_torch( + (1.0 - alphas_cumprod_prev) * np.sqrt(alphas) / (1.0 - alphas_cumprod) + ), + ) + + if self.parameterization == "eps": + lvlb_weights = self.betas**2 / ( + 2 + * self.posterior_variance + * to_torch(alphas) + * (1 - self.alphas_cumprod) + ) + elif self.parameterization == "x0": + lvlb_weights = ( + 0.5 + * np.sqrt(torch.Tensor(alphas_cumprod)) + / (2.0 * 1 - torch.Tensor(alphas_cumprod)) + ) + else: + raise NotImplementedError("mu not supported") + # TODO how to choose this term + lvlb_weights[0] = lvlb_weights[1] + self.register_buffer("lvlb_weights", lvlb_weights, persistent=False) + assert not torch.isnan(self.lvlb_weights).all() + + @contextmanager + def ema_scope(self, context=None): + if self.use_ema: + self.model_ema.store(self.model.parameters()) + self.model_ema.copy_to(self.model) + if context is not None: + # print(f"{context}: Switched to EMA weights") + pass + try: + yield None + finally: + if self.use_ema: + self.model_ema.restore(self.model.parameters()) + if context is not None: + # print(f"{context}: Restored training weights") + pass + + def q_mean_variance(self, x_start, t): + """ + Get the distribution q(x_t | x_0). + :param x_start: the [N x C x ...] tensor of noiseless inputs. + :param t: the number of diffusion steps (minus 1). Here, 0 means one step. + :return: A tuple (mean, variance, log_variance), all of x_start's shape. + """ + mean = extract_into_tensor(self.sqrt_alphas_cumprod, t, x_start.shape) * x_start + variance = extract_into_tensor(1.0 - self.alphas_cumprod, t, x_start.shape) + log_variance = extract_into_tensor( + self.log_one_minus_alphas_cumprod, t, x_start.shape + ) + return mean, variance, log_variance + + def predict_start_from_noise(self, x_t, t, noise): + return ( + extract_into_tensor(self.sqrt_recip_alphas_cumprod, t, x_t.shape) * x_t + - extract_into_tensor(self.sqrt_recipm1_alphas_cumprod, t, x_t.shape) + * noise + ) + + def q_posterior(self, x_start, x_t, t): + posterior_mean = ( + extract_into_tensor(self.posterior_mean_coef1, t, x_t.shape) * x_start + + extract_into_tensor(self.posterior_mean_coef2, t, x_t.shape) * x_t + ) + posterior_variance = extract_into_tensor(self.posterior_variance, t, x_t.shape) + posterior_log_variance_clipped = extract_into_tensor( + self.posterior_log_variance_clipped, t, x_t.shape + ) + return posterior_mean, posterior_variance, posterior_log_variance_clipped + + def p_mean_variance(self, x, t, clip_denoised: bool): + model_out = self.model(x, t) + if self.parameterization == "eps": + x_recon = self.predict_start_from_noise(x, t=t, noise=model_out) + elif self.parameterization == "x0": + x_recon = model_out + if clip_denoised: + x_recon.clamp_(-1.0, 1.0) + + model_mean, posterior_variance, posterior_log_variance = self.q_posterior( + x_start=x_recon, x_t=x, t=t + ) + return model_mean, posterior_variance, posterior_log_variance + + @torch.no_grad() + def p_sample(self, x, t, clip_denoised=True, repeat_noise=False): + b, *_, device = *x.shape, x.device + model_mean, _, model_log_variance = self.p_mean_variance( + x=x, t=t, clip_denoised=clip_denoised + ) + noise = noise_like(x.shape, device, repeat_noise) + # no noise when t == 0 + nonzero_mask = ( + (1 - (t == 0).float()).reshape(b, *((1,) * (len(x.shape) - 1))).contiguous() + ) + return model_mean + nonzero_mask * (0.5 * model_log_variance).exp() * noise + + @torch.no_grad() + def p_sample_loop(self, shape, return_intermediates=False): + device = self.betas.device + b = shape[0] + img = torch.randn(shape, device=device) + intermediates = [img] + for i in tqdm( + reversed(range(0, self.num_timesteps)), + desc="Sampling t", + total=self.num_timesteps, + ): + img = self.p_sample( + img, + torch.full((b,), i, device=device, dtype=torch.long), + clip_denoised=self.clip_denoised, + ) + if i % self.log_every_t == 0 or i == self.num_timesteps - 1: + intermediates.append(img) + if return_intermediates: + return img, intermediates + return img + + @torch.no_grad() + def sample(self, batch_size=16, return_intermediates=False): + shape = (batch_size, channels, self.latent_t_size, self.latent_f_size) + channels = self.channels + return self.p_sample_loop(shape, return_intermediates=return_intermediates) + + def q_sample(self, x_start, t, noise=None): + noise = default(noise, lambda: torch.randn_like(x_start)) + return ( + extract_into_tensor(self.sqrt_alphas_cumprod, t, x_start.shape) * x_start + + extract_into_tensor(self.sqrt_one_minus_alphas_cumprod, t, x_start.shape) + * noise + ) + + def forward(self, x, *args, **kwargs): + t = torch.randint( + 0, self.num_timesteps, (x.shape[0],), device=self.device + ).long() + return self.p_losses(x, t, *args, **kwargs) + + def get_input(self, batch, k): + # fbank, log_magnitudes_stft, label_indices, fname, waveform, clip_label, text = batch + fbank, log_magnitudes_stft, label_indices, fname, waveform, text = batch + ret = {} + + ret["fbank"] = ( + fbank.unsqueeze(1).to(memory_format=torch.contiguous_format).float() + ) + ret["stft"] = log_magnitudes_stft.to( + memory_format=torch.contiguous_format + ).float() + # ret["clip_label"] = clip_label.to(memory_format=torch.contiguous_format).float() + ret["waveform"] = waveform.to(memory_format=torch.contiguous_format).float() + ret["text"] = list(text) + ret["fname"] = fname + + return ret[k] diff --git a/picoaudio/audioldm/latent_diffusion/ema.py b/picoaudio/audioldm/latent_diffusion/ema.py new file mode 100644 index 0000000000000000000000000000000000000000..880ca3d205d9b4d7450e146930a93f2e63c58b70 --- /dev/null +++ b/picoaudio/audioldm/latent_diffusion/ema.py @@ -0,0 +1,82 @@ +import torch +from torch import nn + + +class LitEma(nn.Module): + def __init__(self, model, decay=0.9999, use_num_upates=True): + super().__init__() + if decay < 0.0 or decay > 1.0: + raise ValueError("Decay must be between 0 and 1") + + self.m_name2s_name = {} + self.register_buffer("decay", torch.tensor(decay, dtype=torch.float32)) + self.register_buffer( + "num_updates", + torch.tensor(0, dtype=torch.int) + if use_num_upates + else torch.tensor(-1, dtype=torch.int), + ) + + for name, p in model.named_parameters(): + if p.requires_grad: + # remove as '.'-character is not allowed in buffers + s_name = name.replace(".", "") + self.m_name2s_name.update({name: s_name}) + self.register_buffer(s_name, p.clone().detach().data) + + self.collected_params = [] + + def forward(self, model): + decay = self.decay + + if self.num_updates >= 0: + self.num_updates += 1 + decay = min(self.decay, (1 + self.num_updates) / (10 + self.num_updates)) + + one_minus_decay = 1.0 - decay + + with torch.no_grad(): + m_param = dict(model.named_parameters()) + shadow_params = dict(self.named_buffers()) + + for key in m_param: + if m_param[key].requires_grad: + sname = self.m_name2s_name[key] + shadow_params[sname] = shadow_params[sname].type_as(m_param[key]) + shadow_params[sname].sub_( + one_minus_decay * (shadow_params[sname] - m_param[key]) + ) + else: + assert not key in self.m_name2s_name + + def copy_to(self, model): + m_param = dict(model.named_parameters()) + shadow_params = dict(self.named_buffers()) + for key in m_param: + if m_param[key].requires_grad: + m_param[key].data.copy_(shadow_params[self.m_name2s_name[key]].data) + else: + assert not key in self.m_name2s_name + + def store(self, parameters): + """ + Save the current parameters for restoring later. + Args: + parameters: Iterable of `torch.nn.Parameter`; the parameters to be + temporarily stored. + """ + self.collected_params = [param.clone() for param in parameters] + + def restore(self, parameters): + """ + Restore the parameters stored with the `store` method. + Useful to validate the model with EMA parameters without affecting the + original optimization process. Store the parameters before the + `copy_to` method. After validation (or model saving), use this to + restore the former parameters. + Args: + parameters: Iterable of `torch.nn.Parameter`; the parameters to be + updated with the stored parameters. + """ + for c_param, param in zip(self.collected_params, parameters): + param.data.copy_(c_param.data) diff --git a/picoaudio/audioldm/latent_diffusion/openaimodel.py b/picoaudio/audioldm/latent_diffusion/openaimodel.py new file mode 100644 index 0000000000000000000000000000000000000000..831d7aafb36bba16888e4389153979a6c13639f5 --- /dev/null +++ b/picoaudio/audioldm/latent_diffusion/openaimodel.py @@ -0,0 +1,1069 @@ +from abc import abstractmethod +import math + +import numpy as np +import torch as th +import torch.nn as nn +import torch.nn.functional as F + +from audioldm.latent_diffusion.util import ( + checkpoint, + conv_nd, + linear, + avg_pool_nd, + zero_module, + normalization, + timestep_embedding, +) +from audioldm.latent_diffusion.attention import SpatialTransformer + + +# dummy replace +def convert_module_to_f16(x): + pass + + +def convert_module_to_f32(x): + pass + + +## go +class AttentionPool2d(nn.Module): + """ + Adapted from CLIP: https://github.com/openai/CLIP/blob/main/clip/model.py + """ + + def __init__( + self, + spacial_dim: int, + embed_dim: int, + num_heads_channels: int, + output_dim: int = None, + ): + super().__init__() + self.positional_embedding = nn.Parameter( + th.randn(embed_dim, spacial_dim**2 + 1) / embed_dim**0.5 + ) + self.qkv_proj = conv_nd(1, embed_dim, 3 * embed_dim, 1) + self.c_proj = conv_nd(1, embed_dim, output_dim or embed_dim, 1) + self.num_heads = embed_dim // num_heads_channels + self.attention = QKVAttention(self.num_heads) + + def forward(self, x): + b, c, *_spatial = x.shape + x = x.reshape(b, c, -1).contiguous() # NC(HW) + x = th.cat([x.mean(dim=-1, keepdim=True), x], dim=-1) # NC(HW+1) + x = x + self.positional_embedding[None, :, :].to(x.dtype) # NC(HW+1) + x = self.qkv_proj(x) + x = self.attention(x) + x = self.c_proj(x) + return x[:, :, 0] + + +class TimestepBlock(nn.Module): + """ + Any module where forward() takes timestep embeddings as a second argument. + """ + + @abstractmethod + def forward(self, x, emb): + """ + Apply the module to `x` given `emb` timestep embeddings. + """ + + +class TimestepEmbedSequential(nn.Sequential, TimestepBlock): + """ + A sequential module that passes timestep embeddings to the children that + support it as an extra input. + """ + + def forward(self, x, emb, context=None): + for layer in self: + if isinstance(layer, TimestepBlock): + x = layer(x, emb) + elif isinstance(layer, SpatialTransformer): + x = layer(x, context) + else: + x = layer(x) + return x + + +class Upsample(nn.Module): + """ + An upsampling layer with an optional convolution. + :param channels: channels in the inputs and outputs. + :param use_conv: a bool determining if a convolution is applied. + :param dims: determines if the signal is 1D, 2D, or 3D. If 3D, then + upsampling occurs in the inner-two dimensions. + """ + + def __init__(self, channels, use_conv, dims=2, out_channels=None, padding=1): + super().__init__() + self.channels = channels + self.out_channels = out_channels or channels + self.use_conv = use_conv + self.dims = dims + if use_conv: + self.conv = conv_nd( + dims, self.channels, self.out_channels, 3, padding=padding + ) + + def forward(self, x): + assert x.shape[1] == self.channels + if self.dims == 3: + x = F.interpolate( + x, (x.shape[2], x.shape[3] * 2, x.shape[4] * 2), mode="nearest" + ) + else: + x = F.interpolate(x, scale_factor=2, mode="nearest") + if self.use_conv: + x = self.conv(x) + return x + + +class TransposedUpsample(nn.Module): + "Learned 2x upsampling without padding" + + def __init__(self, channels, out_channels=None, ks=5): + super().__init__() + self.channels = channels + self.out_channels = out_channels or channels + + self.up = nn.ConvTranspose2d( + self.channels, self.out_channels, kernel_size=ks, stride=2 + ) + + def forward(self, x): + return self.up(x) + + +class Downsample(nn.Module): + """ + A downsampling layer with an optional convolution. + :param channels: channels in the inputs and outputs. + :param use_conv: a bool determining if a convolution is applied. + :param dims: determines if the signal is 1D, 2D, or 3D. If 3D, then + downsampling occurs in the inner-two dimensions. + """ + + def __init__(self, channels, use_conv, dims=2, out_channels=None, padding=1): + super().__init__() + self.channels = channels + self.out_channels = out_channels or channels + self.use_conv = use_conv + self.dims = dims + stride = 2 if dims != 3 else (1, 2, 2) + if use_conv: + self.op = conv_nd( + dims, + self.channels, + self.out_channels, + 3, + stride=stride, + padding=padding, + ) + else: + assert self.channels == self.out_channels + self.op = avg_pool_nd(dims, kernel_size=stride, stride=stride) + + def forward(self, x): + assert x.shape[1] == self.channels + return self.op(x) + + +class ResBlock(TimestepBlock): + """ + A residual block that can optionally change the number of channels. + :param channels: the number of input channels. + :param emb_channels: the number of timestep embedding channels. + :param dropout: the rate of dropout. + :param out_channels: if specified, the number of out channels. + :param use_conv: if True and out_channels is specified, use a spatial + convolution instead of a smaller 1x1 convolution to change the + channels in the skip connection. + :param dims: determines if the signal is 1D, 2D, or 3D. + :param use_checkpoint: if True, use gradient checkpointing on this module. + :param up: if True, use this block for upsampling. + :param down: if True, use this block for downsampling. + """ + + def __init__( + self, + channels, + emb_channels, + dropout, + out_channels=None, + use_conv=False, + use_scale_shift_norm=False, + dims=2, + use_checkpoint=False, + up=False, + down=False, + ): + super().__init__() + self.channels = channels + self.emb_channels = emb_channels + self.dropout = dropout + self.out_channels = out_channels or channels + self.use_conv = use_conv + self.use_checkpoint = use_checkpoint + self.use_scale_shift_norm = use_scale_shift_norm + + self.in_layers = nn.Sequential( + normalization(channels), + nn.SiLU(), + conv_nd(dims, channels, self.out_channels, 3, padding=1), + ) + + self.updown = up or down + + if up: + self.h_upd = Upsample(channels, False, dims) + self.x_upd = Upsample(channels, False, dims) + elif down: + self.h_upd = Downsample(channels, False, dims) + self.x_upd = Downsample(channels, False, dims) + else: + self.h_upd = self.x_upd = nn.Identity() + + self.emb_layers = nn.Sequential( + nn.SiLU(), + linear( + emb_channels, + 2 * self.out_channels if use_scale_shift_norm else self.out_channels, + ), + ) + self.out_layers = nn.Sequential( + normalization(self.out_channels), + nn.SiLU(), + nn.Dropout(p=dropout), + zero_module( + conv_nd(dims, self.out_channels, self.out_channels, 3, padding=1) + ), + ) + + if self.out_channels == channels: + self.skip_connection = nn.Identity() + elif use_conv: + self.skip_connection = conv_nd( + dims, channels, self.out_channels, 3, padding=1 + ) + else: + self.skip_connection = conv_nd(dims, channels, self.out_channels, 1) + + def forward(self, x, emb): + """ + Apply the block to a Tensor, conditioned on a timestep embedding. + :param x: an [N x C x ...] Tensor of features. + :param emb: an [N x emb_channels] Tensor of timestep embeddings. + :return: an [N x C x ...] Tensor of outputs. + """ + return checkpoint( + self._forward, (x, emb), self.parameters(), self.use_checkpoint + ) + + def _forward(self, x, emb): + if self.updown: + in_rest, in_conv = self.in_layers[:-1], self.in_layers[-1] + h = in_rest(x) + h = self.h_upd(h) + x = self.x_upd(x) + h = in_conv(h) + else: + h = self.in_layers(x) + emb_out = self.emb_layers(emb).type(h.dtype) + while len(emb_out.shape) < len(h.shape): + emb_out = emb_out[..., None] + if self.use_scale_shift_norm: + out_norm, out_rest = self.out_layers[0], self.out_layers[1:] + scale, shift = th.chunk(emb_out, 2, dim=1) + h = out_norm(h) * (1 + scale) + shift + h = out_rest(h) + else: + h = h + emb_out + h = self.out_layers(h) + return self.skip_connection(x) + h + + +class AttentionBlock(nn.Module): + """ + An attention block that allows spatial positions to attend to each other. + Originally ported from here, but adapted to the N-d case. + https://github.com/hojonathanho/diffusion/blob/1e0dceb3b3495bbe19116a5e1b3596cd0706c543/diffusion_tf/models/unet.py#L66. + """ + + def __init__( + self, + channels, + num_heads=1, + num_head_channels=-1, + use_checkpoint=False, + use_new_attention_order=False, + ): + super().__init__() + self.channels = channels + if num_head_channels == -1: + self.num_heads = num_heads + else: + assert ( + channels % num_head_channels == 0 + ), f"q,k,v channels {channels} is not divisible by num_head_channels {num_head_channels}" + self.num_heads = channels // num_head_channels + self.use_checkpoint = use_checkpoint + self.norm = normalization(channels) + self.qkv = conv_nd(1, channels, channels * 3, 1) + if use_new_attention_order: + # split qkv before split heads + self.attention = QKVAttention(self.num_heads) + else: + # split heads before split qkv + self.attention = QKVAttentionLegacy(self.num_heads) + + self.proj_out = zero_module(conv_nd(1, channels, channels, 1)) + + def forward(self, x): + return checkpoint( + self._forward, (x,), self.parameters(), True + ) # TODO: check checkpoint usage, is True # TODO: fix the .half call!!! + # return pt_checkpoint(self._forward, x) # pytorch + + def _forward(self, x): + b, c, *spatial = x.shape + x = x.reshape(b, c, -1).contiguous() + qkv = self.qkv(self.norm(x)).contiguous() + h = self.attention(qkv).contiguous() + h = self.proj_out(h).contiguous() + return (x + h).reshape(b, c, *spatial).contiguous() + + +def count_flops_attn(model, _x, y): + """ + A counter for the `thop` package to count the operations in an + attention operation. + Meant to be used like: + macs, params = thop.profile( + model, + inputs=(inputs, timestamps), + custom_ops={QKVAttention: QKVAttention.count_flops}, + ) + """ + b, c, *spatial = y[0].shape + num_spatial = int(np.prod(spatial)) + # We perform two matmuls with the same number of ops. + # The first computes the weight matrix, the second computes + # the combination of the value vectors. + matmul_ops = 2 * b * (num_spatial**2) * c + model.total_ops += th.DoubleTensor([matmul_ops]) + + +class QKVAttentionLegacy(nn.Module): + """ + A module which performs QKV attention. Matches legacy QKVAttention + input/ouput heads shaping + """ + + def __init__(self, n_heads): + super().__init__() + self.n_heads = n_heads + + def forward(self, qkv): + """ + Apply QKV attention. + :param qkv: an [N x (H * 3 * C) x T] tensor of Qs, Ks, and Vs. + :return: an [N x (H * C) x T] tensor after attention. + """ + bs, width, length = qkv.shape + assert width % (3 * self.n_heads) == 0 + ch = width // (3 * self.n_heads) + q, k, v = ( + qkv.reshape(bs * self.n_heads, ch * 3, length).contiguous().split(ch, dim=1) + ) + scale = 1 / math.sqrt(math.sqrt(ch)) + weight = th.einsum( + "bct,bcs->bts", q * scale, k * scale + ) # More stable with f16 than dividing afterwards + weight = th.softmax(weight.float(), dim=-1).type(weight.dtype) + a = th.einsum("bts,bcs->bct", weight, v) + return a.reshape(bs, -1, length).contiguous() + + @staticmethod + def count_flops(model, _x, y): + return count_flops_attn(model, _x, y) + + +class QKVAttention(nn.Module): + """ + A module which performs QKV attention and splits in a different order. + """ + + def __init__(self, n_heads): + super().__init__() + self.n_heads = n_heads + + def forward(self, qkv): + """ + Apply QKV attention. + :param qkv: an [N x (3 * H * C) x T] tensor of Qs, Ks, and Vs. + :return: an [N x (H * C) x T] tensor after attention. + """ + bs, width, length = qkv.shape + assert width % (3 * self.n_heads) == 0 + ch = width // (3 * self.n_heads) + q, k, v = qkv.chunk(3, dim=1) + scale = 1 / math.sqrt(math.sqrt(ch)) + weight = th.einsum( + "bct,bcs->bts", + (q * scale).view(bs * self.n_heads, ch, length), + (k * scale).view(bs * self.n_heads, ch, length), + ) # More stable with f16 than dividing afterwards + weight = th.softmax(weight.float(), dim=-1).type(weight.dtype) + a = th.einsum( + "bts,bcs->bct", + weight, + v.reshape(bs * self.n_heads, ch, length).contiguous(), + ) + return a.reshape(bs, -1, length).contiguous() + + @staticmethod + def count_flops(model, _x, y): + return count_flops_attn(model, _x, y) + + +class UNetModel(nn.Module): + """ + The full UNet model with attention and timestep embedding. + :param in_channels: channels in the input Tensor. + :param model_channels: base channel count for the model. + :param out_channels: channels in the output Tensor. + :param num_res_blocks: number of residual blocks per downsample. + :param attention_resolutions: a collection of downsample rates at which + attention will take place. May be a set, list, or tuple. + For example, if this contains 4, then at 4x downsampling, attention + will be used. + :param dropout: the dropout probability. + :param channel_mult: channel multiplier for each level of the UNet. + :param conv_resample: if True, use learned convolutions for upsampling and + downsampling. + :param dims: determines if the signal is 1D, 2D, or 3D. + :param num_classes: if specified (as an int), then this model will be + class-conditional with `num_classes` classes. + :param use_checkpoint: use gradient checkpointing to reduce memory usage. + :param num_heads: the number of attention heads in each attention layer. + :param num_heads_channels: if specified, ignore num_heads and instead use + a fixed channel width per attention head. + :param num_heads_upsample: works with num_heads to set a different number + of heads for upsampling. Deprecated. + :param use_scale_shift_norm: use a FiLM-like conditioning mechanism. + :param resblock_updown: use residual blocks for up/downsampling. + :param use_new_attention_order: use a different attention pattern for potentially + increased efficiency. + """ + + def __init__( + self, + image_size, + in_channels, + model_channels, + out_channels, + num_res_blocks, + attention_resolutions, + dropout=0, + channel_mult=(1, 2, 4, 8), + conv_resample=True, + dims=2, + num_classes=None, + extra_film_condition_dim=None, + use_checkpoint=False, + use_fp16=False, + num_heads=-1, + num_head_channels=-1, + num_heads_upsample=-1, + use_scale_shift_norm=False, + extra_film_use_concat=False, # If true, concatenate extrafilm condition with time embedding, else addition + resblock_updown=False, + use_new_attention_order=False, + use_spatial_transformer=False, # custom transformer support + transformer_depth=1, # custom transformer support + context_dim=None, # custom transformer support + n_embed=None, # custom support for prediction of discrete ids into codebook of first stage vq model + legacy=True, + ): + super().__init__() + if num_heads_upsample == -1: + num_heads_upsample = num_heads + + if num_heads == -1: + assert ( + num_head_channels != -1 + ), "Either num_heads or num_head_channels has to be set" + + if num_head_channels == -1: + assert ( + num_heads != -1 + ), "Either num_heads or num_head_channels has to be set" + + self.image_size = image_size + self.in_channels = in_channels + self.model_channels = model_channels + self.out_channels = out_channels + self.num_res_blocks = num_res_blocks + self.attention_resolutions = attention_resolutions + self.dropout = dropout + self.channel_mult = channel_mult + self.conv_resample = conv_resample + self.num_classes = num_classes + self.extra_film_condition_dim = extra_film_condition_dim + self.use_checkpoint = use_checkpoint + self.dtype = th.float16 if use_fp16 else th.float32 + self.num_heads = num_heads + self.num_head_channels = num_head_channels + self.num_heads_upsample = num_heads_upsample + self.predict_codebook_ids = n_embed is not None + self.extra_film_use_concat = extra_film_use_concat + time_embed_dim = model_channels * 4 + self.time_embed = nn.Sequential( + linear(model_channels, time_embed_dim), + nn.SiLU(), + linear(time_embed_dim, time_embed_dim), + ) + + assert not ( + self.num_classes is not None and self.extra_film_condition_dim is not None + ), "As for the condition of theh UNet model, you can only set using class label or an extra embedding vector (such as from CLAP). You cannot set both num_classes and extra_film_condition_dim." + + if self.num_classes is not None: + self.label_emb = nn.Embedding(num_classes, time_embed_dim) + + self.use_extra_film_by_concat = ( + self.extra_film_condition_dim is not None and self.extra_film_use_concat + ) + self.use_extra_film_by_addition = ( + self.extra_film_condition_dim is not None and not self.extra_film_use_concat + ) + + if self.extra_film_condition_dim is not None: + self.film_emb = nn.Linear(self.extra_film_condition_dim, time_embed_dim) + # print("+ Use extra condition on UNet channel using Film. Extra condition dimension is %s. " % self.extra_film_condition_dim) + # if(self.use_extra_film_by_concat): + # print("\t By concatenation with time embedding") + # elif(self.use_extra_film_by_concat): + # print("\t By addition with time embedding") + + if use_spatial_transformer and ( + self.use_extra_film_by_concat or self.use_extra_film_by_addition + ): + # print("+ Spatial transformer will only be used as self-attention. Because you have choose to use film as your global condition.") + spatial_transformer_no_context = True + else: + spatial_transformer_no_context = False + + if use_spatial_transformer and not spatial_transformer_no_context: + assert ( + context_dim is not None + ), "Fool!! You forgot to include the dimension of your cross-attention conditioning..." + + if context_dim is not None and not spatial_transformer_no_context: + assert ( + use_spatial_transformer + ), "Fool!! You forgot to use the spatial transformer for your cross-attention conditioning..." + from omegaconf.listconfig import ListConfig + + if type(context_dim) == ListConfig: + context_dim = list(context_dim) + + self.input_blocks = nn.ModuleList( + [ + TimestepEmbedSequential( + conv_nd(dims, in_channels, model_channels, 3, padding=1) + ) + ] + ) + self._feature_size = model_channels + input_block_chans = [model_channels] + ch = model_channels + ds = 1 + for level, mult in enumerate(channel_mult): + for _ in range(num_res_blocks): + layers = [ + ResBlock( + ch, + time_embed_dim + if (not self.use_extra_film_by_concat) + else time_embed_dim * 2, + dropout, + out_channels=mult * model_channels, + dims=dims, + use_checkpoint=use_checkpoint, + use_scale_shift_norm=use_scale_shift_norm, + ) + ] + ch = mult * model_channels + if ds in attention_resolutions: + if num_head_channels == -1: + dim_head = ch // num_heads + else: + num_heads = ch // num_head_channels + dim_head = num_head_channels + if legacy: + dim_head = ( + ch // num_heads + if use_spatial_transformer + else num_head_channels + ) + layers.append( + AttentionBlock( + ch, + use_checkpoint=use_checkpoint, + num_heads=num_heads, + num_head_channels=dim_head, + use_new_attention_order=use_new_attention_order, + ) + if not use_spatial_transformer + else SpatialTransformer( + ch, + num_heads, + dim_head, + depth=transformer_depth, + context_dim=context_dim, + no_context=spatial_transformer_no_context, + ) + ) + self.input_blocks.append(TimestepEmbedSequential(*layers)) + self._feature_size += ch + input_block_chans.append(ch) + if level != len(channel_mult) - 1: + out_ch = ch + self.input_blocks.append( + TimestepEmbedSequential( + ResBlock( + ch, + time_embed_dim + if (not self.use_extra_film_by_concat) + else time_embed_dim * 2, + dropout, + out_channels=out_ch, + dims=dims, + use_checkpoint=use_checkpoint, + use_scale_shift_norm=use_scale_shift_norm, + down=True, + ) + if resblock_updown + else Downsample( + ch, conv_resample, dims=dims, out_channels=out_ch + ) + ) + ) + ch = out_ch + input_block_chans.append(ch) + ds *= 2 + self._feature_size += ch + + if num_head_channels == -1: + dim_head = ch // num_heads + else: + num_heads = ch // num_head_channels + dim_head = num_head_channels + if legacy: + # num_heads = 1 + dim_head = ch // num_heads if use_spatial_transformer else num_head_channels + self.middle_block = TimestepEmbedSequential( + ResBlock( + ch, + time_embed_dim + if (not self.use_extra_film_by_concat) + else time_embed_dim * 2, + dropout, + dims=dims, + use_checkpoint=use_checkpoint, + use_scale_shift_norm=use_scale_shift_norm, + ), + AttentionBlock( + ch, + use_checkpoint=use_checkpoint, + num_heads=num_heads, + num_head_channels=dim_head, + use_new_attention_order=use_new_attention_order, + ) + if not use_spatial_transformer + else SpatialTransformer( + ch, + num_heads, + dim_head, + depth=transformer_depth, + context_dim=context_dim, + no_context=spatial_transformer_no_context, + ), + ResBlock( + ch, + time_embed_dim + if (not self.use_extra_film_by_concat) + else time_embed_dim * 2, + dropout, + dims=dims, + use_checkpoint=use_checkpoint, + use_scale_shift_norm=use_scale_shift_norm, + ), + ) + self._feature_size += ch + + self.output_blocks = nn.ModuleList([]) + for level, mult in list(enumerate(channel_mult))[::-1]: + for i in range(num_res_blocks + 1): + ich = input_block_chans.pop() + layers = [ + ResBlock( + ch + ich, + time_embed_dim + if (not self.use_extra_film_by_concat) + else time_embed_dim * 2, + dropout, + out_channels=model_channels * mult, + dims=dims, + use_checkpoint=use_checkpoint, + use_scale_shift_norm=use_scale_shift_norm, + ) + ] + ch = model_channels * mult + if ds in attention_resolutions: + if num_head_channels == -1: + dim_head = ch // num_heads + else: + num_heads = ch // num_head_channels + dim_head = num_head_channels + if legacy: + # num_heads = 1 + dim_head = ( + ch // num_heads + if use_spatial_transformer + else num_head_channels + ) + layers.append( + AttentionBlock( + ch, + use_checkpoint=use_checkpoint, + num_heads=num_heads_upsample, + num_head_channels=dim_head, + use_new_attention_order=use_new_attention_order, + ) + if not use_spatial_transformer + else SpatialTransformer( + ch, + num_heads, + dim_head, + depth=transformer_depth, + context_dim=context_dim, + no_context=spatial_transformer_no_context, + ) + ) + if level and i == num_res_blocks: + out_ch = ch + layers.append( + ResBlock( + ch, + time_embed_dim + if (not self.use_extra_film_by_concat) + else time_embed_dim * 2, + dropout, + out_channels=out_ch, + dims=dims, + use_checkpoint=use_checkpoint, + use_scale_shift_norm=use_scale_shift_norm, + up=True, + ) + if resblock_updown + else Upsample(ch, conv_resample, dims=dims, out_channels=out_ch) + ) + ds //= 2 + self.output_blocks.append(TimestepEmbedSequential(*layers)) + self._feature_size += ch + + self.out = nn.Sequential( + normalization(ch), + nn.SiLU(), + zero_module(conv_nd(dims, model_channels, out_channels, 3, padding=1)), + ) + if self.predict_codebook_ids: + self.id_predictor = nn.Sequential( + normalization(ch), + conv_nd(dims, model_channels, n_embed, 1), + # nn.LogSoftmax(dim=1) # change to cross_entropy and produce non-normalized logits + ) + + self.shape_reported = False + + def convert_to_fp16(self): + """ + Convert the torso of the model to float16. + """ + self.input_blocks.apply(convert_module_to_f16) + self.middle_block.apply(convert_module_to_f16) + self.output_blocks.apply(convert_module_to_f16) + + def convert_to_fp32(self): + """ + Convert the torso of the model to float32. + """ + self.input_blocks.apply(convert_module_to_f32) + self.middle_block.apply(convert_module_to_f32) + self.output_blocks.apply(convert_module_to_f32) + + def forward(self, x, timesteps=None, context=None, y=None, **kwargs): + """ + Apply the model to an input batch. + :param x: an [N x C x ...] Tensor of inputs. + :param timesteps: a 1-D batch of timesteps. + :param context: conditioning plugged in via crossattn + :param y: an [N] Tensor of labels, if class-conditional. an [N, extra_film_condition_dim] Tensor if film-embed conditional + :return: an [N x C x ...] Tensor of outputs. + """ + if not self.shape_reported: + # print("The shape of UNet input is", x.size()) + self.shape_reported = True + + assert (y is not None) == ( + self.num_classes is not None or self.extra_film_condition_dim is not None + ), "must specify y if and only if the model is class-conditional or film embedding conditional" + hs = [] + t_emb = timestep_embedding(timesteps, self.model_channels, repeat_only=False) + emb = self.time_embed(t_emb) + + if self.num_classes is not None: + assert y.shape == (x.shape[0],) + emb = emb + self.label_emb(y) + + if self.use_extra_film_by_addition: + emb = emb + self.film_emb(y) + elif self.use_extra_film_by_concat: + emb = th.cat([emb, self.film_emb(y)], dim=-1) + + h = x.type(self.dtype) + for module in self.input_blocks: + h = module(h, emb, context) + hs.append(h) + h = self.middle_block(h, emb, context) + for module in self.output_blocks: + h = th.cat([h, hs.pop()], dim=1) + h = module(h, emb, context) + h = h.type(x.dtype) + if self.predict_codebook_ids: + return self.id_predictor(h) + else: + return self.out(h) + + +class EncoderUNetModel(nn.Module): + """ + The half UNet model with attention and timestep embedding. + For usage, see UNet. + """ + + def __init__( + self, + image_size, + in_channels, + model_channels, + out_channels, + num_res_blocks, + attention_resolutions, + dropout=0, + channel_mult=(1, 2, 4, 8), + conv_resample=True, + dims=2, + use_checkpoint=False, + use_fp16=False, + num_heads=1, + num_head_channels=-1, + num_heads_upsample=-1, + use_scale_shift_norm=False, + resblock_updown=False, + use_new_attention_order=False, + pool="adaptive", + *args, + **kwargs, + ): + super().__init__() + + if num_heads_upsample == -1: + num_heads_upsample = num_heads + + self.in_channels = in_channels + self.model_channels = model_channels + self.out_channels = out_channels + self.num_res_blocks = num_res_blocks + self.attention_resolutions = attention_resolutions + self.dropout = dropout + self.channel_mult = channel_mult + self.conv_resample = conv_resample + self.use_checkpoint = use_checkpoint + self.dtype = th.float16 if use_fp16 else th.float32 + self.num_heads = num_heads + self.num_head_channels = num_head_channels + self.num_heads_upsample = num_heads_upsample + + time_embed_dim = model_channels * 4 + self.time_embed = nn.Sequential( + linear(model_channels, time_embed_dim), + nn.SiLU(), + linear(time_embed_dim, time_embed_dim), + ) + + self.input_blocks = nn.ModuleList( + [ + TimestepEmbedSequential( + conv_nd(dims, in_channels, model_channels, 3, padding=1) + ) + ] + ) + self._feature_size = model_channels + input_block_chans = [model_channels] + ch = model_channels + ds = 1 + for level, mult in enumerate(channel_mult): + for _ in range(num_res_blocks): + layers = [ + ResBlock( + ch, + time_embed_dim, + dropout, + out_channels=mult * model_channels, + dims=dims, + use_checkpoint=use_checkpoint, + use_scale_shift_norm=use_scale_shift_norm, + ) + ] + ch = mult * model_channels + if ds in attention_resolutions: + layers.append( + AttentionBlock( + ch, + use_checkpoint=use_checkpoint, + num_heads=num_heads, + num_head_channels=num_head_channels, + use_new_attention_order=use_new_attention_order, + ) + ) + self.input_blocks.append(TimestepEmbedSequential(*layers)) + self._feature_size += ch + input_block_chans.append(ch) + if level != len(channel_mult) - 1: + out_ch = ch + self.input_blocks.append( + TimestepEmbedSequential( + ResBlock( + ch, + time_embed_dim, + dropout, + out_channels=out_ch, + dims=dims, + use_checkpoint=use_checkpoint, + use_scale_shift_norm=use_scale_shift_norm, + down=True, + ) + if resblock_updown + else Downsample( + ch, conv_resample, dims=dims, out_channels=out_ch + ) + ) + ) + ch = out_ch + input_block_chans.append(ch) + ds *= 2 + self._feature_size += ch + + self.middle_block = TimestepEmbedSequential( + ResBlock( + ch, + time_embed_dim, + dropout, + dims=dims, + use_checkpoint=use_checkpoint, + use_scale_shift_norm=use_scale_shift_norm, + ), + AttentionBlock( + ch, + use_checkpoint=use_checkpoint, + num_heads=num_heads, + num_head_channels=num_head_channels, + use_new_attention_order=use_new_attention_order, + ), + ResBlock( + ch, + time_embed_dim, + dropout, + dims=dims, + use_checkpoint=use_checkpoint, + use_scale_shift_norm=use_scale_shift_norm, + ), + ) + self._feature_size += ch + self.pool = pool + if pool == "adaptive": + self.out = nn.Sequential( + normalization(ch), + nn.SiLU(), + nn.AdaptiveAvgPool2d((1, 1)), + zero_module(conv_nd(dims, ch, out_channels, 1)), + nn.Flatten(), + ) + elif pool == "attention": + assert num_head_channels != -1 + self.out = nn.Sequential( + normalization(ch), + nn.SiLU(), + AttentionPool2d( + (image_size // ds), ch, num_head_channels, out_channels + ), + ) + elif pool == "spatial": + self.out = nn.Sequential( + nn.Linear(self._feature_size, 2048), + nn.ReLU(), + nn.Linear(2048, self.out_channels), + ) + elif pool == "spatial_v2": + self.out = nn.Sequential( + nn.Linear(self._feature_size, 2048), + normalization(2048), + nn.SiLU(), + nn.Linear(2048, self.out_channels), + ) + else: + raise NotImplementedError(f"Unexpected {pool} pooling") + + def convert_to_fp16(self): + """ + Convert the torso of the model to float16. + """ + self.input_blocks.apply(convert_module_to_f16) + self.middle_block.apply(convert_module_to_f16) + + def convert_to_fp32(self): + """ + Convert the torso of the model to float32. + """ + self.input_blocks.apply(convert_module_to_f32) + self.middle_block.apply(convert_module_to_f32) + + def forward(self, x, timesteps): + """ + Apply the model to an input batch. + :param x: an [N x C x ...] Tensor of inputs. + :param timesteps: a 1-D batch of timesteps. + :return: an [N x K] Tensor of outputs. + """ + emb = self.time_embed(timestep_embedding(timesteps, self.model_channels)) + + results = [] + h = x.type(self.dtype) + for module in self.input_blocks: + h = module(h, emb) + if self.pool.startswith("spatial"): + results.append(h.type(x.dtype).mean(dim=(2, 3))) + h = self.middle_block(h, emb) + if self.pool.startswith("spatial"): + results.append(h.type(x.dtype).mean(dim=(2, 3))) + h = th.cat(results, axis=-1) + return self.out(h) + else: + h = h.type(x.dtype) + return self.out(h) diff --git a/picoaudio/audioldm/latent_diffusion/util.py b/picoaudio/audioldm/latent_diffusion/util.py new file mode 100644 index 0000000000000000000000000000000000000000..8b289f6aa7f22a070870d8a706f944dc8547e936 --- /dev/null +++ b/picoaudio/audioldm/latent_diffusion/util.py @@ -0,0 +1,295 @@ +# adopted from +# https://github.com/openai/improved-diffusion/blob/main/improved_diffusion/gaussian_diffusion.py +# and +# https://github.com/lucidrains/denoising-diffusion-pytorch/blob/7706bdfc6f527f58d33f84b7b522e61e6e3164b3/denoising_diffusion_pytorch/denoising_diffusion_pytorch.py +# and +# https://github.com/openai/guided-diffusion/blob/0ba878e517b276c45d1195eb29f6f5f72659a05b/guided_diffusion/nn.py +# +# thanks! + + +import os +import math +import torch +import torch.nn as nn +import numpy as np +from einops import repeat + +from audioldm.utils import instantiate_from_config + + +def make_beta_schedule( + schedule, n_timestep, linear_start=1e-4, linear_end=2e-2, cosine_s=8e-3 +): + if schedule == "linear": + betas = ( + torch.linspace( + linear_start**0.5, linear_end**0.5, n_timestep, dtype=torch.float64 + ) + ** 2 + ) + + elif schedule == "cosine": + timesteps = ( + torch.arange(n_timestep + 1, dtype=torch.float64) / n_timestep + cosine_s + ) + alphas = timesteps / (1 + cosine_s) * np.pi / 2 + alphas = torch.cos(alphas).pow(2) + alphas = alphas / alphas[0] + betas = 1 - alphas[1:] / alphas[:-1] + betas = np.clip(betas, a_min=0, a_max=0.999) + + elif schedule == "sqrt_linear": + betas = torch.linspace( + linear_start, linear_end, n_timestep, dtype=torch.float64 + ) + elif schedule == "sqrt": + betas = ( + torch.linspace(linear_start, linear_end, n_timestep, dtype=torch.float64) + ** 0.5 + ) + else: + raise ValueError(f"schedule '{schedule}' unknown.") + return betas.numpy() + + +def make_ddim_timesteps( + ddim_discr_method, num_ddim_timesteps, num_ddpm_timesteps, verbose=True +): + if ddim_discr_method == "uniform": + c = num_ddpm_timesteps // num_ddim_timesteps + ddim_timesteps = np.asarray(list(range(0, num_ddpm_timesteps, c))) + elif ddim_discr_method == "quad": + ddim_timesteps = ( + (np.linspace(0, np.sqrt(num_ddpm_timesteps * 0.8), num_ddim_timesteps)) ** 2 + ).astype(int) + else: + raise NotImplementedError( + f'There is no ddim discretization method called "{ddim_discr_method}"' + ) + + # assert ddim_timesteps.shape[0] == num_ddim_timesteps + # add one to get the final alpha values right (the ones from first scale to data during sampling) + steps_out = ddim_timesteps + 1 + if verbose: + print(f"Selected timesteps for ddim sampler: {steps_out}") + return steps_out + + +def make_ddim_sampling_parameters(alphacums, ddim_timesteps, eta, verbose=True): + # select alphas for computing the variance schedule + alphas = alphacums[ddim_timesteps] + alphas_prev = np.asarray([alphacums[0]] + alphacums[ddim_timesteps[:-1]].tolist()) + + # according the the formula provided in https://arxiv.org/abs/2010.02502 + sigmas = eta * np.sqrt( + (1 - alphas_prev) / (1 - alphas) * (1 - alphas / alphas_prev) + ) + if verbose: + print( + f"Selected alphas for ddim sampler: a_t: {alphas}; a_(t-1): {alphas_prev}" + ) + print( + f"For the chosen value of eta, which is {eta}, " + f"this results in the following sigma_t schedule for ddim sampler {sigmas}" + ) + return sigmas, alphas, alphas_prev + + +def betas_for_alpha_bar(num_diffusion_timesteps, alpha_bar, max_beta=0.999): + """ + Create a beta schedule that discretizes the given alpha_t_bar function, + which defines the cumulative product of (1-beta) over time from t = [0,1]. + :param num_diffusion_timesteps: the number of betas to produce. + :param alpha_bar: a lambda that takes an argument t from 0 to 1 and + produces the cumulative product of (1-beta) up to that + part of the diffusion process. + :param max_beta: the maximum beta to use; use values lower than 1 to + prevent singularities. + """ + betas = [] + for i in range(num_diffusion_timesteps): + t1 = i / num_diffusion_timesteps + t2 = (i + 1) / num_diffusion_timesteps + betas.append(min(1 - alpha_bar(t2) / alpha_bar(t1), max_beta)) + return np.array(betas) + + +def extract_into_tensor(a, t, x_shape): + b, *_ = t.shape + out = a.gather(-1, t).contiguous() + return out.reshape(b, *((1,) * (len(x_shape) - 1))).contiguous() + + +def checkpoint(func, inputs, params, flag): + """ + Evaluate a function without caching intermediate activations, allowing for + reduced memory at the expense of extra compute in the backward pass. + :param func: the function to evaluate. + :param inputs: the argument sequence to pass to `func`. + :param params: a sequence of parameters `func` depends on but does not + explicitly take as arguments. + :param flag: if False, disable gradient checkpointing. + """ + if flag: + args = tuple(inputs) + tuple(params) + return CheckpointFunction.apply(func, len(inputs), *args) + else: + return func(*inputs) + + +class CheckpointFunction(torch.autograd.Function): + @staticmethod + def forward(ctx, run_function, length, *args): + ctx.run_function = run_function + ctx.input_tensors = list(args[:length]) + ctx.input_params = list(args[length:]) + + with torch.no_grad(): + output_tensors = ctx.run_function(*ctx.input_tensors) + return output_tensors + + @staticmethod + def backward(ctx, *output_grads): + ctx.input_tensors = [x.detach().requires_grad_(True) for x in ctx.input_tensors] + with torch.enable_grad(): + # Fixes a bug where the first op in run_function modifies the + # Tensor storage in place, which is not allowed for detach()'d + # Tensors. + shallow_copies = [x.view_as(x) for x in ctx.input_tensors] + output_tensors = ctx.run_function(*shallow_copies) + input_grads = torch.autograd.grad( + output_tensors, + ctx.input_tensors + ctx.input_params, + output_grads, + allow_unused=True, + ) + del ctx.input_tensors + del ctx.input_params + del output_tensors + return (None, None) + input_grads + + +def timestep_embedding(timesteps, dim, max_period=10000, repeat_only=False): + """ + Create sinusoidal timestep embeddings. + :param timesteps: a 1-D Tensor of N indices, one per batch element. + These may be fractional. + :param dim: the dimension of the output. + :param max_period: controls the minimum frequency of the embeddings. + :return: an [N x dim] Tensor of positional embeddings. + """ + if not repeat_only: + half = dim // 2 + freqs = torch.exp( + -math.log(max_period) + * torch.arange(start=0, end=half, dtype=torch.float32) + / half + ).to(device=timesteps.device) + args = timesteps[:, None].float() * freqs[None] + embedding = torch.cat([torch.cos(args), torch.sin(args)], dim=-1) + if dim % 2: + embedding = torch.cat( + [embedding, torch.zeros_like(embedding[:, :1])], dim=-1 + ) + else: + embedding = repeat(timesteps, "b -> b d", d=dim) + return embedding + + +def zero_module(module): + """ + Zero out the parameters of a module and return it. + """ + for p in module.parameters(): + p.detach().zero_() + return module + + +def scale_module(module, scale): + """ + Scale the parameters of a module and return it. + """ + for p in module.parameters(): + p.detach().mul_(scale) + return module + + +def mean_flat(tensor): + """ + Take the mean over all non-batch dimensions. + """ + return tensor.mean(dim=list(range(1, len(tensor.shape)))) + + +def normalization(channels): + """ + Make a standard normalization layer. + :param channels: number of input channels. + :return: an nn.Module for normalization. + """ + return GroupNorm32(32, channels) + + +# PyTorch 1.7 has SiLU, but we support PyTorch 1.5. +class SiLU(nn.Module): + def forward(self, x): + return x * torch.sigmoid(x) + + +class GroupNorm32(nn.GroupNorm): + def forward(self, x): + return super().forward(x.float()).type(x.dtype) + + +def conv_nd(dims, *args, **kwargs): + """ + Create a 1D, 2D, or 3D convolution module. + """ + if dims == 1: + return nn.Conv1d(*args, **kwargs) + elif dims == 2: + return nn.Conv2d(*args, **kwargs) + elif dims == 3: + return nn.Conv3d(*args, **kwargs) + raise ValueError(f"unsupported dimensions: {dims}") + + +def linear(*args, **kwargs): + """ + Create a linear module. + """ + return nn.Linear(*args, **kwargs) + + +def avg_pool_nd(dims, *args, **kwargs): + """ + Create a 1D, 2D, or 3D average pooling module. + """ + if dims == 1: + return nn.AvgPool1d(*args, **kwargs) + elif dims == 2: + return nn.AvgPool2d(*args, **kwargs) + elif dims == 3: + return nn.AvgPool3d(*args, **kwargs) + raise ValueError(f"unsupported dimensions: {dims}") + + +class HybridConditioner(nn.Module): + def __init__(self, c_concat_config, c_crossattn_config): + super().__init__() + self.concat_conditioner = instantiate_from_config(c_concat_config) + self.crossattn_conditioner = instantiate_from_config(c_crossattn_config) + + def forward(self, c_concat, c_crossattn): + c_concat = self.concat_conditioner(c_concat) + c_crossattn = self.crossattn_conditioner(c_crossattn) + return {"c_concat": [c_concat], "c_crossattn": [c_crossattn]} + + +def noise_like(shape, device, repeat=False): + repeat_noise = lambda: torch.randn((1, *shape[1:]), device=device).repeat( + shape[0], *((1,) * (len(shape) - 1)) + ) + noise = lambda: torch.randn(shape, device=device) + return repeat_noise() if repeat else noise() diff --git a/picoaudio/audioldm/ldm.py b/picoaudio/audioldm/ldm.py new file mode 100644 index 0000000000000000000000000000000000000000..e0179fd5a506052ac9db22bd37f3db6b910aded5 --- /dev/null +++ b/picoaudio/audioldm/ldm.py @@ -0,0 +1,818 @@ +import os + +import torch +import numpy as np +from tqdm import tqdm +from audioldm.utils import default, instantiate_from_config, save_wave +from audioldm.latent_diffusion.ddpm import DDPM +from audioldm.variational_autoencoder.distributions import DiagonalGaussianDistribution +from audioldm.latent_diffusion.util import noise_like +from audioldm.latent_diffusion.ddim import DDIMSampler +import os + + +def disabled_train(self, mode=True): + """Overwrite model.train with this function to make sure train/eval mode + does not change anymore.""" + return self + + +class LatentDiffusion(DDPM): + """main class""" + + def __init__( + self, + device="cuda", + first_stage_config=None, + cond_stage_config=None, + num_timesteps_cond=None, + cond_stage_key="image", + cond_stage_trainable=False, + concat_mode=True, + cond_stage_forward=None, + conditioning_key=None, + scale_factor=1.0, + scale_by_std=False, + base_learning_rate=None, + *args, + **kwargs, + ): + self.device = device + self.learning_rate = base_learning_rate + self.num_timesteps_cond = default(num_timesteps_cond, 1) + self.scale_by_std = scale_by_std + assert self.num_timesteps_cond <= kwargs["timesteps"] + # for backwards compatibility after implementation of DiffusionWrapper + if conditioning_key is None: + conditioning_key = "concat" if concat_mode else "crossattn" + if cond_stage_config == "__is_unconditional__": + conditioning_key = None + ckpt_path = kwargs.pop("ckpt_path", None) + ignore_keys = kwargs.pop("ignore_keys", []) + super().__init__(conditioning_key=conditioning_key, *args, **kwargs) + self.concat_mode = concat_mode + self.cond_stage_trainable = cond_stage_trainable + self.cond_stage_key = cond_stage_key + self.cond_stage_key_orig = cond_stage_key + try: + self.num_downs = len(first_stage_config.params.ddconfig.ch_mult) - 1 + except: + self.num_downs = 0 + if not scale_by_std: + self.scale_factor = scale_factor + else: + self.register_buffer("scale_factor", torch.tensor(scale_factor)) + self.instantiate_first_stage(first_stage_config) + self.instantiate_cond_stage(cond_stage_config) + self.cond_stage_forward = cond_stage_forward + self.clip_denoised = False + + def make_cond_schedule( + self, + ): + self.cond_ids = torch.full( + size=(self.num_timesteps,), + fill_value=self.num_timesteps - 1, + dtype=torch.long, + ) + ids = torch.round( + torch.linspace(0, self.num_timesteps - 1, self.num_timesteps_cond) + ).long() + self.cond_ids[: self.num_timesteps_cond] = ids + + def register_schedule( + self, + given_betas=None, + beta_schedule="linear", + timesteps=1000, + linear_start=1e-4, + linear_end=2e-2, + cosine_s=8e-3, + ): + super().register_schedule( + given_betas, beta_schedule, timesteps, linear_start, linear_end, cosine_s + ) + + self.shorten_cond_schedule = self.num_timesteps_cond > 1 + if self.shorten_cond_schedule: + self.make_cond_schedule() + + def instantiate_first_stage(self, config): + model = instantiate_from_config(config) + self.first_stage_model = model.eval() + self.first_stage_model.train = disabled_train + for param in self.first_stage_model.parameters(): + param.requires_grad = False + + def instantiate_cond_stage(self, config): + if not self.cond_stage_trainable: + if config == "__is_first_stage__": + print("Using first stage also as cond stage.") + self.cond_stage_model = self.first_stage_model + elif config == "__is_unconditional__": + print(f"Training {self.__class__.__name__} as an unconditional model.") + self.cond_stage_model = None + # self.be_unconditional = True + else: + model = instantiate_from_config(config) + self.cond_stage_model = model.eval() + self.cond_stage_model.train = disabled_train + for param in self.cond_stage_model.parameters(): + param.requires_grad = False + else: + assert config != "__is_first_stage__" + assert config != "__is_unconditional__" + model = instantiate_from_config(config) + self.cond_stage_model = model + self.cond_stage_model = self.cond_stage_model.to(self.device) + + def get_first_stage_encoding(self, encoder_posterior): + if isinstance(encoder_posterior, DiagonalGaussianDistribution): + z = encoder_posterior.sample() + elif isinstance(encoder_posterior, torch.Tensor): + z = encoder_posterior + else: + raise NotImplementedError( + f"encoder_posterior of type '{type(encoder_posterior)}' not yet implemented" + ) + return self.scale_factor * z + + def get_learned_conditioning(self, c): + if self.cond_stage_forward is None: + if hasattr(self.cond_stage_model, "encode") and callable( + self.cond_stage_model.encode + ): + c = self.cond_stage_model.encode(c) + if isinstance(c, DiagonalGaussianDistribution): + c = c.mode() + else: + # Text input is list + if type(c) == list and len(c) == 1: + c = self.cond_stage_model([c[0], c[0]]) + c = c[0:1] + else: + c = self.cond_stage_model(c) + else: + assert hasattr(self.cond_stage_model, self.cond_stage_forward) + c = getattr(self.cond_stage_model, self.cond_stage_forward)(c) + return c + + @torch.no_grad() + def get_input( + self, + batch, + k, + return_first_stage_encode=True, + return_first_stage_outputs=False, + force_c_encode=False, + cond_key=None, + return_original_cond=False, + bs=None, + ): + x = super().get_input(batch, k) + + if bs is not None: + x = x[:bs] + + x = x.to(self.device) + + if return_first_stage_encode: + encoder_posterior = self.encode_first_stage(x) + z = self.get_first_stage_encoding(encoder_posterior).detach() + else: + z = None + + if self.model.conditioning_key is not None: + if cond_key is None: + cond_key = self.cond_stage_key + if cond_key != self.first_stage_key: + if cond_key in ["caption", "coordinates_bbox"]: + xc = batch[cond_key] + elif cond_key == "class_label": + xc = batch + else: + # [bs, 1, 527] + xc = super().get_input(batch, cond_key) + if type(xc) == torch.Tensor: + xc = xc.to(self.device) + else: + xc = x + if not self.cond_stage_trainable or force_c_encode: + if isinstance(xc, dict) or isinstance(xc, list): + c = self.get_learned_conditioning(xc) + else: + c = self.get_learned_conditioning(xc.to(self.device)) + else: + c = xc + + if bs is not None: + c = c[:bs] + + else: + c = None + xc = None + if self.use_positional_encodings: + pos_x, pos_y = self.compute_latent_shifts(batch) + c = {"pos_x": pos_x, "pos_y": pos_y} + out = [z, c] + if return_first_stage_outputs: + xrec = self.decode_first_stage(z) + out.extend([x, xrec]) + if return_original_cond: + out.append(xc) + return out + + @torch.no_grad() + def decode_first_stage(self, z, predict_cids=False, force_not_quantize=False): + if predict_cids: + if z.dim() == 4: + z = torch.argmax(z.exp(), dim=1).long() + z = self.first_stage_model.quantize.get_codebook_entry(z, shape=None) + z = rearrange(z, "b h w c -> b c h w").contiguous() + + z = 1.0 / self.scale_factor * z + return self.first_stage_model.decode(z) + + def mel_spectrogram_to_waveform(self, mel): + # Mel: [bs, 1, t-steps, fbins] + if len(mel.size()) == 4: + mel = mel.squeeze(1) + mel = mel.permute(0, 2, 1) + waveform = self.first_stage_model.vocoder(mel) + waveform = waveform.cpu().detach().numpy() + return waveform + + @torch.no_grad() + def encode_first_stage(self, x): + return self.first_stage_model.encode(x) + + def apply_model(self, x_noisy, t, cond, return_ids=False): + + if isinstance(cond, dict): + # hybrid case, cond is exptected to be a dict + pass + else: + if not isinstance(cond, list): + cond = [cond] + if self.model.conditioning_key == "concat": + key = "c_concat" + elif self.model.conditioning_key == "crossattn": + key = "c_crossattn" + else: + key = "c_film" + + cond = {key: cond} + + x_recon = self.model(x_noisy, t, **cond) + + if isinstance(x_recon, tuple) and not return_ids: + return x_recon[0] + else: + return x_recon + + def p_mean_variance( + self, + x, + c, + t, + clip_denoised: bool, + return_codebook_ids=False, + quantize_denoised=False, + return_x0=False, + score_corrector=None, + corrector_kwargs=None, + ): + t_in = t + model_out = self.apply_model(x, t_in, c, return_ids=return_codebook_ids) + + if score_corrector is not None: + assert self.parameterization == "eps" + model_out = score_corrector.modify_score( + self, model_out, x, t, c, **corrector_kwargs + ) + + if return_codebook_ids: + model_out, logits = model_out + + if self.parameterization == "eps": + x_recon = self.predict_start_from_noise(x, t=t, noise=model_out) + elif self.parameterization == "x0": + x_recon = model_out + else: + raise NotImplementedError() + + if clip_denoised: + x_recon.clamp_(-1.0, 1.0) + if quantize_denoised: + x_recon, _, [_, _, indices] = self.first_stage_model.quantize(x_recon) + model_mean, posterior_variance, posterior_log_variance = self.q_posterior( + x_start=x_recon, x_t=x, t=t + ) + if return_codebook_ids: + return model_mean, posterior_variance, posterior_log_variance, logits + elif return_x0: + return model_mean, posterior_variance, posterior_log_variance, x_recon + else: + return model_mean, posterior_variance, posterior_log_variance + + @torch.no_grad() + def p_sample( + self, + x, + c, + t, + clip_denoised=False, + repeat_noise=False, + return_codebook_ids=False, + quantize_denoised=False, + return_x0=False, + temperature=1.0, + noise_dropout=0.0, + score_corrector=None, + corrector_kwargs=None, + ): + b, *_, device = *x.shape, x.device + outputs = self.p_mean_variance( + x=x, + c=c, + t=t, + clip_denoised=clip_denoised, + return_codebook_ids=return_codebook_ids, + quantize_denoised=quantize_denoised, + return_x0=return_x0, + score_corrector=score_corrector, + corrector_kwargs=corrector_kwargs, + ) + if return_codebook_ids: + raise DeprecationWarning("Support dropped.") + model_mean, _, model_log_variance, logits = outputs + elif return_x0: + model_mean, _, model_log_variance, x0 = outputs + else: + model_mean, _, model_log_variance = outputs + + noise = noise_like(x.shape, device, repeat_noise) * temperature + if noise_dropout > 0.0: + noise = torch.nn.functional.dropout(noise, p=noise_dropout) + # no noise when t == 0 + nonzero_mask = ( + (1 - (t == 0).float()).reshape(b, *((1,) * (len(x.shape) - 1))).contiguous() + ) + + if return_codebook_ids: + return model_mean + nonzero_mask * ( + 0.5 * model_log_variance + ).exp() * noise, logits.argmax(dim=1) + if return_x0: + return ( + model_mean + nonzero_mask * (0.5 * model_log_variance).exp() * noise, + x0, + ) + else: + return model_mean + nonzero_mask * (0.5 * model_log_variance).exp() * noise + + @torch.no_grad() + def progressive_denoising( + self, + cond, + shape, + verbose=True, + callback=None, + quantize_denoised=False, + img_callback=None, + mask=None, + x0=None, + temperature=1.0, + noise_dropout=0.0, + score_corrector=None, + corrector_kwargs=None, + batch_size=None, + x_T=None, + start_T=None, + log_every_t=None, + ): + if not log_every_t: + log_every_t = self.log_every_t + timesteps = self.num_timesteps + if batch_size is not None: + b = batch_size if batch_size is not None else shape[0] + shape = [batch_size] + list(shape) + else: + b = batch_size = shape[0] + if x_T is None: + img = torch.randn(shape, device=self.device) + else: + img = x_T + intermediates = [] + if cond is not None: + if isinstance(cond, dict): + cond = { + key: cond[key][:batch_size] + if not isinstance(cond[key], list) + else list(map(lambda x: x[:batch_size], cond[key])) + for key in cond + } + else: + cond = ( + [c[:batch_size] for c in cond] + if isinstance(cond, list) + else cond[:batch_size] + ) + + if start_T is not None: + timesteps = min(timesteps, start_T) + iterator = ( + tqdm( + reversed(range(0, timesteps)), + desc="Progressive Generation", + total=timesteps, + ) + if verbose + else reversed(range(0, timesteps)) + ) + if type(temperature) == float: + temperature = [temperature] * timesteps + + for i in iterator: + ts = torch.full((b,), i, device=self.device, dtype=torch.long) + if self.shorten_cond_schedule: + assert self.model.conditioning_key != "hybrid" + tc = self.cond_ids[ts].to(cond.device) + cond = self.q_sample(x_start=cond, t=tc, noise=torch.randn_like(cond)) + + img, x0_partial = self.p_sample( + img, + cond, + ts, + clip_denoised=self.clip_denoised, + quantize_denoised=quantize_denoised, + return_x0=True, + temperature=temperature[i], + noise_dropout=noise_dropout, + score_corrector=score_corrector, + corrector_kwargs=corrector_kwargs, + ) + if mask is not None: + assert x0 is not None + img_orig = self.q_sample(x0, ts) + img = img_orig * mask + (1.0 - mask) * img + + if i % log_every_t == 0 or i == timesteps - 1: + intermediates.append(x0_partial) + if callback: + callback(i) + if img_callback: + img_callback(img, i) + return img, intermediates + + @torch.no_grad() + def p_sample_loop( + self, + cond, + shape, + return_intermediates=False, + x_T=None, + verbose=True, + callback=None, + timesteps=None, + quantize_denoised=False, + mask=None, + x0=None, + img_callback=None, + start_T=None, + log_every_t=None, + ): + + if not log_every_t: + log_every_t = self.log_every_t + device = self.betas.device + b = shape[0] + if x_T is None: + img = torch.randn(shape, device=device) + else: + img = x_T + + intermediates = [img] + if timesteps is None: + timesteps = self.num_timesteps + + if start_T is not None: + timesteps = min(timesteps, start_T) + iterator = ( + tqdm(reversed(range(0, timesteps)), desc="Sampling t", total=timesteps) + if verbose + else reversed(range(0, timesteps)) + ) + + if mask is not None: + assert x0 is not None + assert x0.shape[2:3] == mask.shape[2:3] # spatial size has to match + + for i in iterator: + ts = torch.full((b,), i, device=device, dtype=torch.long) + if self.shorten_cond_schedule: + assert self.model.conditioning_key != "hybrid" + tc = self.cond_ids[ts].to(cond.device) + cond = self.q_sample(x_start=cond, t=tc, noise=torch.randn_like(cond)) + + img = self.p_sample( + img, + cond, + ts, + clip_denoised=self.clip_denoised, + quantize_denoised=quantize_denoised, + ) + if mask is not None: + img_orig = self.q_sample(x0, ts) + img = img_orig * mask + (1.0 - mask) * img + + if i % log_every_t == 0 or i == timesteps - 1: + intermediates.append(img) + if callback: + callback(i) + if img_callback: + img_callback(img, i) + + if return_intermediates: + return img, intermediates + return img + + @torch.no_grad() + def sample( + self, + cond, + batch_size=16, + return_intermediates=False, + x_T=None, + verbose=True, + timesteps=None, + quantize_denoised=False, + mask=None, + x0=None, + shape=None, + **kwargs, + ): + if shape is None: + shape = (batch_size, self.channels, self.latent_t_size, self.latent_f_size) + if cond is not None: + if isinstance(cond, dict): + cond = { + key: cond[key][:batch_size] + if not isinstance(cond[key], list) + else list(map(lambda x: x[:batch_size], cond[key])) + for key in cond + } + else: + cond = ( + [c[:batch_size] for c in cond] + if isinstance(cond, list) + else cond[:batch_size] + ) + return self.p_sample_loop( + cond, + shape, + return_intermediates=return_intermediates, + x_T=x_T, + verbose=verbose, + timesteps=timesteps, + quantize_denoised=quantize_denoised, + mask=mask, + x0=x0, + **kwargs, + ) + + @torch.no_grad() + def sample_log( + self, + cond, + batch_size, + ddim, + ddim_steps, + unconditional_guidance_scale=1.0, + unconditional_conditioning=None, + use_plms=False, + mask=None, + **kwargs, + ): + + if mask is not None: + shape = (self.channels, mask.size()[-2], mask.size()[-1]) + else: + shape = (self.channels, self.latent_t_size, self.latent_f_size) + + intermediate = None + if ddim and not use_plms: + # print("Use ddim sampler") + + ddim_sampler = DDIMSampler(self) + samples, intermediates = ddim_sampler.sample( + ddim_steps, + batch_size, + shape, + cond, + verbose=False, + unconditional_guidance_scale=unconditional_guidance_scale, + unconditional_conditioning=unconditional_conditioning, + mask=mask, + **kwargs, + ) + + else: + # print("Use DDPM sampler") + samples, intermediates = self.sample( + cond=cond, + batch_size=batch_size, + return_intermediates=True, + unconditional_guidance_scale=unconditional_guidance_scale, + mask=mask, + unconditional_conditioning=unconditional_conditioning, + **kwargs, + ) + + return samples, intermediate + + @torch.no_grad() + def generate_sample( + self, + batchs, + ddim_steps=200, + ddim_eta=1.0, + x_T=None, + n_candidate_gen_per_text=1, + unconditional_guidance_scale=1.0, + unconditional_conditioning=None, + name="waveform", + use_plms=False, + save=False, + **kwargs, + ): + # Generate n_candidate_gen_per_text times and select the best + # Batch: audio, text, fnames + assert x_T is None + try: + batchs = iter(batchs) + except TypeError: + raise ValueError("The first input argument should be an iterable object") + + if use_plms: + assert ddim_steps is not None + use_ddim = ddim_steps is not None + # waveform_save_path = os.path.join(self.get_log_dir(), name) + # os.makedirs(waveform_save_path, exist_ok=True) + # print("Waveform save path: ", waveform_save_path) + + with self.ema_scope("Generate"): + for batch in batchs: + z, c = self.get_input( + batch, + self.first_stage_key, + cond_key=self.cond_stage_key, + return_first_stage_outputs=False, + force_c_encode=True, + return_original_cond=False, + bs=None, + ) + text = super().get_input(batch, "text") + + # Generate multiple samples + batch_size = z.shape[0] * n_candidate_gen_per_text + c = torch.cat([c] * n_candidate_gen_per_text, dim=0) + text = text * n_candidate_gen_per_text + + if unconditional_guidance_scale != 1.0: + unconditional_conditioning = ( + self.cond_stage_model.get_unconditional_condition(batch_size) + ) + + samples, _ = self.sample_log( + cond=c, + batch_size=batch_size, + x_T=x_T, + ddim=use_ddim, + ddim_steps=ddim_steps, + eta=ddim_eta, + unconditional_guidance_scale=unconditional_guidance_scale, + unconditional_conditioning=unconditional_conditioning, + use_plms=use_plms, + ) + + if(torch.max(torch.abs(samples)) > 1e2): + samples = torch.clip(samples, min=-10, max=10) + + mel = self.decode_first_stage(samples) + + waveform = self.mel_spectrogram_to_waveform(mel) + + if waveform.shape[0] > 1: + similarity = self.cond_stage_model.cos_similarity( + torch.FloatTensor(waveform).squeeze(1), text + ) + + best_index = [] + for i in range(z.shape[0]): + candidates = similarity[i :: z.shape[0]] + max_index = torch.argmax(candidates).item() + best_index.append(i + max_index * z.shape[0]) + + waveform = waveform[best_index] + # print("Similarity between generated audio and text", similarity) + # print("Choose the following indexes:", best_index) + + return waveform + + @torch.no_grad() + def generate_sample_masked( + self, + batchs, + ddim_steps=200, + ddim_eta=1.0, + x_T=None, + n_candidate_gen_per_text=1, + unconditional_guidance_scale=1.0, + unconditional_conditioning=None, + name="waveform", + use_plms=False, + time_mask_ratio_start_and_end=(0.25, 0.75), + freq_mask_ratio_start_and_end=(0.75, 1.0), + save=False, + **kwargs, + ): + # Generate n_candidate_gen_per_text times and select the best + # Batch: audio, text, fnames + assert x_T is None + try: + batchs = iter(batchs) + except TypeError: + raise ValueError("The first input argument should be an iterable object") + + if use_plms: + assert ddim_steps is not None + use_ddim = ddim_steps is not None + # waveform_save_path = os.path.join(self.get_log_dir(), name) + # os.makedirs(waveform_save_path, exist_ok=True) + # print("Waveform save path: ", waveform_save_path) + + with self.ema_scope("Generate"): + for batch in batchs: + z, c = self.get_input( + batch, + self.first_stage_key, + cond_key=self.cond_stage_key, + return_first_stage_outputs=False, + force_c_encode=True, + return_original_cond=False, + bs=None, + ) + text = super().get_input(batch, "text") + + # Generate multiple samples + batch_size = z.shape[0] * n_candidate_gen_per_text + + _, h, w = z.shape[0], z.shape[2], z.shape[3] + + mask = torch.ones(batch_size, h, w).to(self.device) + + mask[:, int(h * time_mask_ratio_start_and_end[0]) : int(h * time_mask_ratio_start_and_end[1]), :] = 0 + mask[:, :, int(w * freq_mask_ratio_start_and_end[0]) : int(w * freq_mask_ratio_start_and_end[1])] = 0 + mask = mask[:, None, ...] + + c = torch.cat([c] * n_candidate_gen_per_text, dim=0) + text = text * n_candidate_gen_per_text + + if unconditional_guidance_scale != 1.0: + unconditional_conditioning = ( + self.cond_stage_model.get_unconditional_condition(batch_size) + ) + + samples, _ = self.sample_log( + cond=c, + batch_size=batch_size, + x_T=x_T, + ddim=use_ddim, + ddim_steps=ddim_steps, + eta=ddim_eta, + unconditional_guidance_scale=unconditional_guidance_scale, + unconditional_conditioning=unconditional_conditioning, + use_plms=use_plms, mask=mask, x0=torch.cat([z] * n_candidate_gen_per_text) + ) + + mel = self.decode_first_stage(samples) + + waveform = self.mel_spectrogram_to_waveform(mel) + + if waveform.shape[0] > 1: + similarity = self.cond_stage_model.cos_similarity( + torch.FloatTensor(waveform).squeeze(1), text + ) + + best_index = [] + for i in range(z.shape[0]): + candidates = similarity[i :: z.shape[0]] + max_index = torch.argmax(candidates).item() + best_index.append(i + max_index * z.shape[0]) + + waveform = waveform[best_index] + # print("Similarity between generated audio and text", similarity) + # print("Choose the following indexes:", best_index) + + return waveform \ No newline at end of file diff --git a/picoaudio/audioldm/pipeline.py b/picoaudio/audioldm/pipeline.py new file mode 100644 index 0000000000000000000000000000000000000000..b08e1f77206483025ce027588c2dea1de78ae26c --- /dev/null +++ b/picoaudio/audioldm/pipeline.py @@ -0,0 +1,301 @@ +import os + +import argparse +import yaml +import torch +from torch import autocast +from tqdm import tqdm, trange + +from audioldm import LatentDiffusion, seed_everything +from audioldm.utils import default_audioldm_config, get_duration, get_bit_depth, get_metadata, download_checkpoint +from audioldm.audio import wav_to_fbank, TacotronSTFT, read_wav_file +from audioldm.latent_diffusion.ddim import DDIMSampler +from einops import repeat +import os + +def make_batch_for_text_to_audio(text, waveform=None, fbank=None, batchsize=1): + text = [text] * batchsize + if batchsize < 1: + print("Warning: Batchsize must be at least 1. Batchsize is set to .") + + if(fbank is None): + fbank = torch.zeros((batchsize, 1024, 64)) # Not used, here to keep the code format + else: + fbank = torch.FloatTensor(fbank) + fbank = fbank.expand(batchsize, 1024, 64) + assert fbank.size(0) == batchsize + + stft = torch.zeros((batchsize, 1024, 512)) # Not used + + if(waveform is None): + waveform = torch.zeros((batchsize, 160000)) # Not used + else: + waveform = torch.FloatTensor(waveform) + waveform = waveform.expand(batchsize, -1) + assert waveform.size(0) == batchsize + + fname = [""] * batchsize # Not used + + batch = ( + fbank, + stft, + None, + fname, + waveform, + text, + ) + return batch + +def round_up_duration(duration): + return int(round(duration/2.5) + 1) * 2.5 + +def build_model( + ckpt_path=None, + config=None, + model_name="audioldm-s-full" +): + print("Load AudioLDM: %s", model_name) + + if(ckpt_path is None): + ckpt_path = get_metadata()[model_name]["path"] + + if(not os.path.exists(ckpt_path)): + download_checkpoint(model_name) + + if torch.cuda.is_available(): + device = torch.device("cuda:0") + else: + device = torch.device("cpu") + + if config is not None: + assert type(config) is str + config = yaml.load(open(config, "r"), Loader=yaml.FullLoader) + else: + config = default_audioldm_config(model_name) + + # Use text as condition instead of using waveform during training + config["model"]["params"]["device"] = device + config["model"]["params"]["cond_stage_key"] = "text" + + # No normalization here + latent_diffusion = LatentDiffusion(**config["model"]["params"]) + + resume_from_checkpoint = ckpt_path + + checkpoint = torch.load(resume_from_checkpoint, map_location=device) + latent_diffusion.load_state_dict(checkpoint["state_dict"]) + + latent_diffusion.eval() + latent_diffusion = latent_diffusion.to(device) + + latent_diffusion.cond_stage_model.embed_mode = "text" + return latent_diffusion + +def duration_to_latent_t_size(duration): + return int(duration * 25.6) + +def set_cond_audio(latent_diffusion): + latent_diffusion.cond_stage_key = "waveform" + latent_diffusion.cond_stage_model.embed_mode="audio" + return latent_diffusion + +def set_cond_text(latent_diffusion): + latent_diffusion.cond_stage_key = "text" + latent_diffusion.cond_stage_model.embed_mode="text" + return latent_diffusion + +def text_to_audio( + latent_diffusion, + text, + original_audio_file_path = None, + seed=42, + ddim_steps=200, + duration=10, + batchsize=1, + guidance_scale=2.5, + n_candidate_gen_per_text=3, + config=None, +): + seed_everything(int(seed)) + waveform = None + if(original_audio_file_path is not None): + waveform = read_wav_file(original_audio_file_path, int(duration * 102.4) * 160) + + batch = make_batch_for_text_to_audio(text, waveform=waveform, batchsize=batchsize) + + latent_diffusion.latent_t_size = duration_to_latent_t_size(duration) + + if(waveform is not None): + print("Generate audio that has similar content as %s" % original_audio_file_path) + latent_diffusion = set_cond_audio(latent_diffusion) + else: + print("Generate audio using text %s" % text) + latent_diffusion = set_cond_text(latent_diffusion) + + with torch.no_grad(): + waveform = latent_diffusion.generate_sample( + [batch], + unconditional_guidance_scale=guidance_scale, + ddim_steps=ddim_steps, + n_candidate_gen_per_text=n_candidate_gen_per_text, + duration=duration, + ) + return waveform + +def style_transfer( + latent_diffusion, + text, + original_audio_file_path, + transfer_strength, + seed=42, + duration=10, + batchsize=1, + guidance_scale=2.5, + ddim_steps=200, + config=None, +): + if torch.cuda.is_available(): + device = torch.device("cuda:0") + else: + device = torch.device("cpu") + + assert original_audio_file_path is not None, "You need to provide the original audio file path" + + audio_file_duration = get_duration(original_audio_file_path) + + assert get_bit_depth(original_audio_file_path) == 16, "The bit depth of the original audio file %s must be 16" % original_audio_file_path + + # if(duration > 20): + # print("Warning: The duration of the audio file %s must be less than 20 seconds. Longer duration will result in Nan in model output (we are still debugging that); Automatically set duration to 20 seconds") + # duration = 20 + + if(duration >= audio_file_duration): + print("Warning: Duration you specified %s-seconds must equal or smaller than the audio file duration %ss" % (duration, audio_file_duration)) + duration = round_up_duration(audio_file_duration) + print("Set new duration as %s-seconds" % duration) + + # duration = round_up_duration(duration) + + latent_diffusion = set_cond_text(latent_diffusion) + + if config is not None: + assert type(config) is str + config = yaml.load(open(config, "r"), Loader=yaml.FullLoader) + else: + config = default_audioldm_config() + + seed_everything(int(seed)) + # latent_diffusion.latent_t_size = duration_to_latent_t_size(duration) + latent_diffusion.cond_stage_model.embed_mode = "text" + + fn_STFT = TacotronSTFT( + config["preprocessing"]["stft"]["filter_length"], + config["preprocessing"]["stft"]["hop_length"], + config["preprocessing"]["stft"]["win_length"], + config["preprocessing"]["mel"]["n_mel_channels"], + config["preprocessing"]["audio"]["sampling_rate"], + config["preprocessing"]["mel"]["mel_fmin"], + config["preprocessing"]["mel"]["mel_fmax"], + ) + + mel, _, _ = wav_to_fbank( + original_audio_file_path, target_length=int(duration * 102.4), fn_STFT=fn_STFT + ) + mel = mel.unsqueeze(0).unsqueeze(0).to(device) + mel = repeat(mel, "1 ... -> b ...", b=batchsize) + init_latent = latent_diffusion.get_first_stage_encoding( + latent_diffusion.encode_first_stage(mel) + ) # move to latent space, encode and sample + if(torch.max(torch.abs(init_latent)) > 1e2): + init_latent = torch.clip(init_latent, min=-10, max=10) + sampler = DDIMSampler(latent_diffusion) + sampler.make_schedule(ddim_num_steps=ddim_steps, ddim_eta=1.0, verbose=False) + + t_enc = int(transfer_strength * ddim_steps) + prompts = text + + with torch.no_grad(): + with autocast("cuda"): + with latent_diffusion.ema_scope(): + uc = None + if guidance_scale != 1.0: + uc = latent_diffusion.cond_stage_model.get_unconditional_condition( + batchsize + ) + + c = latent_diffusion.get_learned_conditioning([prompts] * batchsize) + z_enc = sampler.stochastic_encode( + init_latent, torch.tensor([t_enc] * batchsize).to(device) + ) + samples = sampler.decode( + z_enc, + c, + t_enc, + unconditional_guidance_scale=guidance_scale, + unconditional_conditioning=uc, + ) + # x_samples = latent_diffusion.decode_first_stage(samples) # Will result in Nan in output + # print(torch.sum(torch.isnan(samples))) + x_samples = latent_diffusion.decode_first_stage(samples) + # print(x_samples) + x_samples = latent_diffusion.decode_first_stage(samples[:,:,:-3,:]) + # print(x_samples) + waveform = latent_diffusion.first_stage_model.decode_to_waveform( + x_samples + ) + + return waveform + +def super_resolution_and_inpainting( + latent_diffusion, + text, + original_audio_file_path = None, + seed=42, + ddim_steps=200, + duration=None, + batchsize=1, + guidance_scale=2.5, + n_candidate_gen_per_text=3, + time_mask_ratio_start_and_end=(0.10, 0.15), # regenerate the 10% to 15% of the time steps in the spectrogram + # time_mask_ratio_start_and_end=(1.0, 1.0), # no inpainting + # freq_mask_ratio_start_and_end=(0.75, 1.0), # regenerate the higher 75% to 100% mel bins + freq_mask_ratio_start_and_end=(1.0, 1.0), # no super-resolution + config=None, +): + seed_everything(int(seed)) + if config is not None: + assert type(config) is str + config = yaml.load(open(config, "r"), Loader=yaml.FullLoader) + else: + config = default_audioldm_config() + fn_STFT = TacotronSTFT( + config["preprocessing"]["stft"]["filter_length"], + config["preprocessing"]["stft"]["hop_length"], + config["preprocessing"]["stft"]["win_length"], + config["preprocessing"]["mel"]["n_mel_channels"], + config["preprocessing"]["audio"]["sampling_rate"], + config["preprocessing"]["mel"]["mel_fmin"], + config["preprocessing"]["mel"]["mel_fmax"], + ) + + # waveform = read_wav_file(original_audio_file_path, None) + mel, _, _ = wav_to_fbank( + original_audio_file_path, target_length=int(duration * 102.4), fn_STFT=fn_STFT + ) + + batch = make_batch_for_text_to_audio(text, fbank=mel[None,...], batchsize=batchsize) + + # latent_diffusion.latent_t_size = duration_to_latent_t_size(duration) + latent_diffusion = set_cond_text(latent_diffusion) + + with torch.no_grad(): + waveform = latent_diffusion.generate_sample_masked( + [batch], + unconditional_guidance_scale=guidance_scale, + ddim_steps=ddim_steps, + n_candidate_gen_per_text=n_candidate_gen_per_text, + duration=duration, + time_mask_ratio_start_and_end=time_mask_ratio_start_and_end, + freq_mask_ratio_start_and_end=freq_mask_ratio_start_and_end + ) + return waveform \ No newline at end of file diff --git a/picoaudio/audioldm/utils.py b/picoaudio/audioldm/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..5401b29d4366774233f1bf4a9e7fcb7ce214187e --- /dev/null +++ b/picoaudio/audioldm/utils.py @@ -0,0 +1,281 @@ +import contextlib +import importlib + +from inspect import isfunction +import os +import soundfile as sf +import time +import wave + +import urllib.request +import progressbar + +CACHE_DIR = os.getenv( + "AUDIOLDM_CACHE_DIR", + os.path.join(os.path.expanduser("~"), ".cache/audioldm")) + +def get_duration(fname): + with contextlib.closing(wave.open(fname, 'r')) as f: + frames = f.getnframes() + rate = f.getframerate() + return frames / float(rate) + +def get_bit_depth(fname): + with contextlib.closing(wave.open(fname, 'r')) as f: + bit_depth = f.getsampwidth() * 8 + return bit_depth + +def get_time(): + t = time.localtime() + return time.strftime("%d_%m_%Y_%H_%M_%S", t) + +def seed_everything(seed): + import random, os + import numpy as np + import torch + + random.seed(seed) + os.environ["PYTHONHASHSEED"] = str(seed) + np.random.seed(seed) + torch.manual_seed(seed) + torch.cuda.manual_seed(seed) + torch.backends.cudnn.deterministic = True + torch.backends.cudnn.benchmark = True + + +def save_wave(waveform, savepath, name="outwav"): + if type(name) is not list: + name = [name] * waveform.shape[0] + + for i in range(waveform.shape[0]): + path = os.path.join( + savepath, + "%s_%s.wav" + % ( + os.path.basename(name[i]) + if (not ".wav" in name[i]) + else os.path.basename(name[i]).split(".")[0], + i, + ), + ) + print("Save audio to %s" % path) + sf.write(path, waveform[i, 0], samplerate=16000) + + +def exists(x): + return x is not None + + +def default(val, d): + if exists(val): + return val + return d() if isfunction(d) else d + + +def count_params(model, verbose=False): + total_params = sum(p.numel() for p in model.parameters()) + if verbose: + print(f"{model.__class__.__name__} has {total_params * 1.e-6:.2f} M params.") + return total_params + + +def get_obj_from_str(string, reload=False): + module, cls = string.rsplit(".", 1) + if reload: + module_imp = importlib.import_module(module) + importlib.reload(module_imp) + return getattr(importlib.import_module(module, package=None), cls) + + +def instantiate_from_config(config): + if not "target" in config: + if config == "__is_first_stage__": + return None + elif config == "__is_unconditional__": + return None + raise KeyError("Expected key `target` to instantiate.") + return get_obj_from_str(config["target"])(**config.get("params", dict())) + + +def default_audioldm_config(model_name="audioldm-s-full"): + basic_config = { + "wave_file_save_path": "./output", + "id": { + "version": "v1", + "name": "default", + "root": "/mnt/fast/nobackup/users/hl01486/projects/general_audio_generation/AudioLDM-python/config/default/latent_diffusion.yaml", + }, + "preprocessing": { + "audio": {"sampling_rate": 16000, "max_wav_value": 32768}, + "stft": {"filter_length": 1024, "hop_length": 160, "win_length": 1024}, + "mel": { + "n_mel_channels": 64, + "mel_fmin": 0, + "mel_fmax": 8000, + "freqm": 0, + "timem": 0, + "blur": False, + "mean": -4.63, + "std": 2.74, + "target_length": 1024, + }, + }, + "model": { + "device": "cuda", + "target": "audioldm.pipline.LatentDiffusion", + "params": { + "base_learning_rate": 5e-06, + "linear_start": 0.0015, + "linear_end": 0.0195, + "num_timesteps_cond": 1, + "log_every_t": 200, + "timesteps": 1000, + "first_stage_key": "fbank", + "cond_stage_key": "waveform", + "latent_t_size": 256, + "latent_f_size": 16, + "channels": 8, + "cond_stage_trainable": True, + "conditioning_key": "film", + "monitor": "val/loss_simple_ema", + "scale_by_std": True, + "unet_config": { + "target": "audioldm.latent_diffusion.openaimodel.UNetModel", + "params": { + "image_size": 64, + "extra_film_condition_dim": 512, + "extra_film_use_concat": True, + "in_channels": 8, + "out_channels": 8, + "model_channels": 128, + "attention_resolutions": [8, 4, 2], + "num_res_blocks": 2, + "channel_mult": [1, 2, 3, 5], + "num_head_channels": 32, + "use_spatial_transformer": True, + }, + }, + "first_stage_config": { + "base_learning_rate": 4.5e-05, + "target": "audioldm.variational_autoencoder.autoencoder.AutoencoderKL", + "params": { + "monitor": "val/rec_loss", + "image_key": "fbank", + "subband": 1, + "embed_dim": 8, + "time_shuffle": 1, + "ddconfig": { + "double_z": True, + "z_channels": 8, + "resolution": 256, + "downsample_time": False, + "in_channels": 1, + "out_ch": 1, + "ch": 128, + "ch_mult": [1, 2, 4], + "num_res_blocks": 2, + "attn_resolutions": [], + "dropout": 0.0, + }, + }, + }, + "cond_stage_config": { + "target": "audioldm.clap.encoders.CLAPAudioEmbeddingClassifierFreev2", + "params": { + "key": "waveform", + "sampling_rate": 16000, + "embed_mode": "audio", + "unconditional_prob": 0.1, + }, + }, + }, + }, + } + + if("-l-" in model_name): + basic_config["model"]["params"]["unet_config"]["params"]["model_channels"] = 256 + basic_config["model"]["params"]["unet_config"]["params"]["num_head_channels"] = 64 + elif("-m-" in model_name): + basic_config["model"]["params"]["unet_config"]["params"]["model_channels"] = 192 + basic_config["model"]["params"]["cond_stage_config"]["params"]["amodel"] = "HTSAT-base" # This model use a larger HTAST + + return basic_config + +def get_metadata(): + return { + "audioldm-s-full": { + "path": os.path.join( + CACHE_DIR, + "audioldm-s-full.ckpt", + ), + "url": "https://zenodo.org/record/7600541/files/audioldm-s-full?download=1", + }, + "audioldm-l-full": { + "path": os.path.join( + CACHE_DIR, + "audioldm-l-full.ckpt", + ), + "url": "https://zenodo.org/record/7698295/files/audioldm-full-l.ckpt?download=1", + }, + "audioldm-s-full-v2": { + "path": os.path.join( + CACHE_DIR, + "audioldm-s-full-v2.ckpt", + ), + "url": "https://zenodo.org/record/7698295/files/audioldm-full-s-v2.ckpt?download=1", + }, + "audioldm-m-text-ft": { + "path": os.path.join( + CACHE_DIR, + "audioldm-m-text-ft.ckpt", + ), + "url": "https://zenodo.org/record/7813012/files/audioldm-m-text-ft.ckpt?download=1", + }, + "audioldm-s-text-ft": { + "path": os.path.join( + CACHE_DIR, + "audioldm-s-text-ft.ckpt", + ), + "url": "https://zenodo.org/record/7813012/files/audioldm-s-text-ft.ckpt?download=1", + }, + "audioldm-m-full": { + "path": os.path.join( + CACHE_DIR, + "audioldm-m-full.ckpt", + ), + "url": "https://zenodo.org/record/7813012/files/audioldm-m-full.ckpt?download=1", + }, + } + +class MyProgressBar(): + def __init__(self): + self.pbar = None + + def __call__(self, block_num, block_size, total_size): + if not self.pbar: + self.pbar=progressbar.ProgressBar(maxval=total_size) + self.pbar.start() + + downloaded = block_num * block_size + if downloaded < total_size: + self.pbar.update(downloaded) + else: + self.pbar.finish() + +def download_checkpoint(checkpoint_name="audioldm-s-full"): + meta = get_metadata() + if(checkpoint_name not in meta.keys()): + print("The model name you provided is not supported. Please use one of the following: ", meta.keys()) + + if not os.path.exists(meta[checkpoint_name]["path"]) or os.path.getsize(meta[checkpoint_name]["path"]) < 2*10**9: + os.makedirs(os.path.dirname(meta[checkpoint_name]["path"]), exist_ok=True) + print(f"Downloading the main structure of {checkpoint_name} into {os.path.dirname(meta[checkpoint_name]['path'])}") + + urllib.request.urlretrieve(meta[checkpoint_name]["url"], meta[checkpoint_name]["path"], MyProgressBar()) + print( + "Weights downloaded in: {} Size: {}".format( + meta[checkpoint_name]["path"], + os.path.getsize(meta[checkpoint_name]["path"]), + ) + ) + \ No newline at end of file diff --git a/picoaudio/audioldm/variational_autoencoder/__init__.py b/picoaudio/audioldm/variational_autoencoder/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..08b2a9b9698e02918d7b0dd9fe0431b2847e5aa2 --- /dev/null +++ b/picoaudio/audioldm/variational_autoencoder/__init__.py @@ -0,0 +1 @@ +from .autoencoder import AutoencoderKL \ No newline at end of file diff --git a/picoaudio/audioldm/variational_autoencoder/autoencoder.py b/picoaudio/audioldm/variational_autoencoder/autoencoder.py new file mode 100644 index 0000000000000000000000000000000000000000..9dadc849da65d1f9eb82dc75dc777250bf738151 --- /dev/null +++ b/picoaudio/audioldm/variational_autoencoder/autoencoder.py @@ -0,0 +1,135 @@ +import torch +from audioldm.latent_diffusion.ema import * +from audioldm.variational_autoencoder.modules import Encoder, Decoder +from audioldm.variational_autoencoder.distributions import DiagonalGaussianDistribution + +from audioldm.hifigan.utilities import get_vocoder, vocoder_infer + + +class AutoencoderKL(nn.Module): + def __init__( + self, + ddconfig=None, + lossconfig=None, + image_key="fbank", + embed_dim=None, + time_shuffle=1, + subband=1, + ckpt_path=None, + reload_from_ckpt=None, + ignore_keys=[], + colorize_nlabels=None, + monitor=None, + base_learning_rate=1e-5, + scale_factor=1 + ): + super().__init__() + + self.encoder = Encoder(**ddconfig) + self.decoder = Decoder(**ddconfig) + + self.subband = int(subband) + + if self.subband > 1: + print("Use subband decomposition %s" % self.subband) + + self.quant_conv = torch.nn.Conv2d(2 * ddconfig["z_channels"], 2 * embed_dim, 1) + self.post_quant_conv = torch.nn.Conv2d(embed_dim, ddconfig["z_channels"], 1) + + self.vocoder = get_vocoder(None, "cpu") + self.embed_dim = embed_dim + + if monitor is not None: + self.monitor = monitor + + self.time_shuffle = time_shuffle + self.reload_from_ckpt = reload_from_ckpt + self.reloaded = False + self.mean, self.std = None, None + + self.scale_factor = scale_factor + + def encode(self, x): + # x = self.time_shuffle_operation(x) + x = self.freq_split_subband(x) + h = self.encoder(x) + moments = self.quant_conv(h) + posterior = DiagonalGaussianDistribution(moments) + return posterior + + def decode(self, z): + z = self.post_quant_conv(z) + dec = self.decoder(z) + dec = self.freq_merge_subband(dec) + return dec + + def decode_to_waveform(self, dec): + dec = dec.squeeze(1).permute(0, 2, 1) + wav_reconstruction = vocoder_infer(dec, self.vocoder) + return wav_reconstruction + + def forward(self, input, sample_posterior=True): + posterior = self.encode(input) + if sample_posterior: + z = posterior.sample() + else: + z = posterior.mode() + + if self.flag_first_run: + print("Latent size: ", z.size()) + self.flag_first_run = False + + dec = self.decode(z) + + return dec, posterior + + def freq_split_subband(self, fbank): + if self.subband == 1 or self.image_key != "stft": + return fbank + + bs, ch, tstep, fbins = fbank.size() + + assert fbank.size(-1) % self.subband == 0 + assert ch == 1 + + return ( + fbank.squeeze(1) + .reshape(bs, tstep, self.subband, fbins // self.subband) + .permute(0, 2, 1, 3) + ) + + def freq_merge_subband(self, subband_fbank): + if self.subband == 1 or self.image_key != "stft": + return subband_fbank + assert subband_fbank.size(1) == self.subband # Channel dimension + bs, sub_ch, tstep, fbins = subband_fbank.size() + return subband_fbank.permute(0, 2, 1, 3).reshape(bs, tstep, -1).unsqueeze(1) + + def device(self): + return next(self.parameters()).device + + @torch.no_grad() + def encode_first_stage(self, x): + return self.encode(x) + + @torch.no_grad() + def decode_first_stage(self, z, predict_cids=False, force_not_quantize=False): + if predict_cids: + if z.dim() == 4: + z = torch.argmax(z.exp(), dim=1).long() + z = self.first_stage_model.quantize.get_codebook_entry(z, shape=None) + z = rearrange(z, "b h w c -> b c h w").contiguous() + + z = 1.0 / self.scale_factor * z + return self.decode(z) + + def get_first_stage_encoding(self, encoder_posterior): + if isinstance(encoder_posterior, DiagonalGaussianDistribution): + z = encoder_posterior.sample() + elif isinstance(encoder_posterior, torch.Tensor): + z = encoder_posterior + else: + raise NotImplementedError( + f"encoder_posterior of type '{type(encoder_posterior)}' not yet implemented" + ) + return self.scale_factor * z \ No newline at end of file diff --git a/picoaudio/audioldm/variational_autoencoder/distributions.py b/picoaudio/audioldm/variational_autoencoder/distributions.py new file mode 100644 index 0000000000000000000000000000000000000000..58eb535e7769f402169ddff77ee45c96ba3650d9 --- /dev/null +++ b/picoaudio/audioldm/variational_autoencoder/distributions.py @@ -0,0 +1,102 @@ +import torch +import numpy as np + + +class AbstractDistribution: + def sample(self): + raise NotImplementedError() + + def mode(self): + raise NotImplementedError() + + +class DiracDistribution(AbstractDistribution): + def __init__(self, value): + self.value = value + + def sample(self): + return self.value + + def mode(self): + return self.value + + +class DiagonalGaussianDistribution(object): + def __init__(self, parameters, deterministic=False): + self.parameters = parameters + self.mean, self.logvar = torch.chunk(parameters, 2, dim=1) + self.logvar = torch.clamp(self.logvar, -30.0, 20.0) + self.deterministic = deterministic + self.std = torch.exp(0.5 * self.logvar) + self.var = torch.exp(self.logvar) + if self.deterministic: + self.var = self.std = torch.zeros_like(self.mean).to( + device=self.parameters.device + ) + + def sample(self): + x = self.mean + self.std * torch.randn(self.mean.shape).to( + device=self.parameters.device + ) + return x + + def kl(self, other=None): + if self.deterministic: + return torch.Tensor([0.0]) + else: + if other is None: + return 0.5 * torch.mean( + torch.pow(self.mean, 2) + self.var - 1.0 - self.logvar, + dim=[1, 2, 3], + ) + else: + return 0.5 * torch.mean( + torch.pow(self.mean - other.mean, 2) / other.var + + self.var / other.var + - 1.0 + - self.logvar + + other.logvar, + dim=[1, 2, 3], + ) + + def nll(self, sample, dims=[1, 2, 3]): + if self.deterministic: + return torch.Tensor([0.0]) + logtwopi = np.log(2.0 * np.pi) + return 0.5 * torch.sum( + logtwopi + self.logvar + torch.pow(sample - self.mean, 2) / self.var, + dim=dims, + ) + + def mode(self): + return self.mean + + +def normal_kl(mean1, logvar1, mean2, logvar2): + """ + source: https://github.com/openai/guided-diffusion/blob/27c20a8fab9cb472df5d6bdd6c8d11c8f430b924/guided_diffusion/losses.py#L12 + Compute the KL divergence between two gaussians. + Shapes are automatically broadcasted, so batches can be compared to + scalars, among other use cases. + """ + tensor = None + for obj in (mean1, logvar1, mean2, logvar2): + if isinstance(obj, torch.Tensor): + tensor = obj + break + assert tensor is not None, "at least one argument must be a Tensor" + + # Force variances to be Tensors. Broadcasting helps convert scalars to + # Tensors, but it does not work for torch.exp(). + logvar1, logvar2 = [ + x if isinstance(x, torch.Tensor) else torch.tensor(x).to(tensor) + for x in (logvar1, logvar2) + ] + + return 0.5 * ( + -1.0 + + logvar2 + - logvar1 + + torch.exp(logvar1 - logvar2) + + ((mean1 - mean2) ** 2) * torch.exp(-logvar2) + ) diff --git a/picoaudio/audioldm/variational_autoencoder/modules.py b/picoaudio/audioldm/variational_autoencoder/modules.py new file mode 100644 index 0000000000000000000000000000000000000000..e48386d045c1d0e159de33db02af1035159c3447 --- /dev/null +++ b/picoaudio/audioldm/variational_autoencoder/modules.py @@ -0,0 +1,1066 @@ +# pytorch_diffusion + derived encoder decoder +import math +import torch +import torch.nn as nn +import numpy as np +from einops import rearrange + +from audioldm.utils import instantiate_from_config +from audioldm.latent_diffusion.attention import LinearAttention + + +def get_timestep_embedding(timesteps, embedding_dim): + """ + This matches the implementation in Denoising Diffusion Probabilistic Models: + From Fairseq. + Build sinusoidal embeddings. + This matches the implementation in tensor2tensor, but differs slightly + from the description in Section 3.5 of "Attention Is All You Need". + """ + assert len(timesteps.shape) == 1 + + half_dim = embedding_dim // 2 + emb = math.log(10000) / (half_dim - 1) + emb = torch.exp(torch.arange(half_dim, dtype=torch.float32) * -emb) + emb = emb.to(device=timesteps.device) + emb = timesteps.float()[:, None] * emb[None, :] + emb = torch.cat([torch.sin(emb), torch.cos(emb)], dim=1) + if embedding_dim % 2 == 1: # zero pad + emb = torch.nn.functional.pad(emb, (0, 1, 0, 0)) + return emb + + +def nonlinearity(x): + # swish + return x * torch.sigmoid(x) + + +def Normalize(in_channels, num_groups=32): + return torch.nn.GroupNorm( + num_groups=num_groups, num_channels=in_channels, eps=1e-6, affine=True + ) + + +class Upsample(nn.Module): + def __init__(self, in_channels, with_conv): + super().__init__() + self.with_conv = with_conv + if self.with_conv: + self.conv = torch.nn.Conv2d( + in_channels, in_channels, kernel_size=3, stride=1, padding=1 + ) + + def forward(self, x): + x = torch.nn.functional.interpolate(x, scale_factor=2.0, mode="nearest") + if self.with_conv: + x = self.conv(x) + return x + + +class UpsampleTimeStride4(nn.Module): + def __init__(self, in_channels, with_conv): + super().__init__() + self.with_conv = with_conv + if self.with_conv: + self.conv = torch.nn.Conv2d( + in_channels, in_channels, kernel_size=5, stride=1, padding=2 + ) + + def forward(self, x): + x = torch.nn.functional.interpolate(x, scale_factor=(4.0, 2.0), mode="nearest") + if self.with_conv: + x = self.conv(x) + return x + + +class Downsample(nn.Module): + def __init__(self, in_channels, with_conv): + super().__init__() + self.with_conv = with_conv + if self.with_conv: + # Do time downsampling here + # no asymmetric padding in torch conv, must do it ourselves + self.conv = torch.nn.Conv2d( + in_channels, in_channels, kernel_size=3, stride=2, padding=0 + ) + + def forward(self, x): + if self.with_conv: + pad = (0, 1, 0, 1) + x = torch.nn.functional.pad(x, pad, mode="constant", value=0) + x = self.conv(x) + else: + x = torch.nn.functional.avg_pool2d(x, kernel_size=2, stride=2) + return x + + +class DownsampleTimeStride4(nn.Module): + def __init__(self, in_channels, with_conv): + super().__init__() + self.with_conv = with_conv + if self.with_conv: + # Do time downsampling here + # no asymmetric padding in torch conv, must do it ourselves + self.conv = torch.nn.Conv2d( + in_channels, in_channels, kernel_size=5, stride=(4, 2), padding=1 + ) + + def forward(self, x): + if self.with_conv: + pad = (0, 1, 0, 1) + x = torch.nn.functional.pad(x, pad, mode="constant", value=0) + x = self.conv(x) + else: + x = torch.nn.functional.avg_pool2d(x, kernel_size=(4, 2), stride=(4, 2)) + return x + + +class ResnetBlock(nn.Module): + def __init__( + self, + *, + in_channels, + out_channels=None, + conv_shortcut=False, + dropout, + temb_channels=512, + ): + super().__init__() + self.in_channels = in_channels + out_channels = in_channels if out_channels is None else out_channels + self.out_channels = out_channels + self.use_conv_shortcut = conv_shortcut + + self.norm1 = Normalize(in_channels) + self.conv1 = torch.nn.Conv2d( + in_channels, out_channels, kernel_size=3, stride=1, padding=1 + ) + if temb_channels > 0: + self.temb_proj = torch.nn.Linear(temb_channels, out_channels) + self.norm2 = Normalize(out_channels) + self.dropout = torch.nn.Dropout(dropout) + self.conv2 = torch.nn.Conv2d( + out_channels, out_channels, kernel_size=3, stride=1, padding=1 + ) + if self.in_channels != self.out_channels: + if self.use_conv_shortcut: + self.conv_shortcut = torch.nn.Conv2d( + in_channels, out_channels, kernel_size=3, stride=1, padding=1 + ) + else: + self.nin_shortcut = torch.nn.Conv2d( + in_channels, out_channels, kernel_size=1, stride=1, padding=0 + ) + + def forward(self, x, temb): + h = x + h = self.norm1(h) + h = nonlinearity(h) + h = self.conv1(h) + + if temb is not None: + h = h + self.temb_proj(nonlinearity(temb))[:, :, None, None] + + h = self.norm2(h) + h = nonlinearity(h) + h = self.dropout(h) + h = self.conv2(h) + + if self.in_channels != self.out_channels: + if self.use_conv_shortcut: + x = self.conv_shortcut(x) + else: + x = self.nin_shortcut(x) + + return x + h + + +class LinAttnBlock(LinearAttention): + """to match AttnBlock usage""" + + def __init__(self, in_channels): + super().__init__(dim=in_channels, heads=1, dim_head=in_channels) + + +class AttnBlock(nn.Module): + def __init__(self, in_channels): + super().__init__() + self.in_channels = in_channels + + self.norm = Normalize(in_channels) + self.q = torch.nn.Conv2d( + in_channels, in_channels, kernel_size=1, stride=1, padding=0 + ) + self.k = torch.nn.Conv2d( + in_channels, in_channels, kernel_size=1, stride=1, padding=0 + ) + self.v = torch.nn.Conv2d( + in_channels, in_channels, kernel_size=1, stride=1, padding=0 + ) + self.proj_out = torch.nn.Conv2d( + in_channels, in_channels, kernel_size=1, stride=1, padding=0 + ) + + def forward(self, x): + h_ = x + h_ = self.norm(h_) + q = self.q(h_) + k = self.k(h_) + v = self.v(h_) + + # compute attention + b, c, h, w = q.shape + q = q.reshape(b, c, h * w).contiguous() + q = q.permute(0, 2, 1).contiguous() # b,hw,c + k = k.reshape(b, c, h * w).contiguous() # b,c,hw + w_ = torch.bmm(q, k).contiguous() # b,hw,hw w[b,i,j]=sum_c q[b,i,c]k[b,c,j] + w_ = w_ * (int(c) ** (-0.5)) + w_ = torch.nn.functional.softmax(w_, dim=2) + + # attend to values + v = v.reshape(b, c, h * w).contiguous() + w_ = w_.permute(0, 2, 1).contiguous() # b,hw,hw (first hw of k, second of q) + h_ = torch.bmm( + v, w_ + ).contiguous() # b, c,hw (hw of q) h_[b,c,j] = sum_i v[b,c,i] w_[b,i,j] + h_ = h_.reshape(b, c, h, w).contiguous() + + h_ = self.proj_out(h_) + + return x + h_ + + +def make_attn(in_channels, attn_type="vanilla"): + assert attn_type in ["vanilla", "linear", "none"], f"attn_type {attn_type} unknown" + # print(f"making attention of type '{attn_type}' with {in_channels} in_channels") + if attn_type == "vanilla": + return AttnBlock(in_channels) + elif attn_type == "none": + return nn.Identity(in_channels) + else: + return LinAttnBlock(in_channels) + + +class Model(nn.Module): + def __init__( + self, + *, + ch, + out_ch, + ch_mult=(1, 2, 4, 8), + num_res_blocks, + attn_resolutions, + dropout=0.0, + resamp_with_conv=True, + in_channels, + resolution, + use_timestep=True, + use_linear_attn=False, + attn_type="vanilla", + ): + super().__init__() + if use_linear_attn: + attn_type = "linear" + self.ch = ch + self.temb_ch = self.ch * 4 + self.num_resolutions = len(ch_mult) + self.num_res_blocks = num_res_blocks + self.resolution = resolution + self.in_channels = in_channels + + self.use_timestep = use_timestep + if self.use_timestep: + # timestep embedding + self.temb = nn.Module() + self.temb.dense = nn.ModuleList( + [ + torch.nn.Linear(self.ch, self.temb_ch), + torch.nn.Linear(self.temb_ch, self.temb_ch), + ] + ) + + # downsampling + self.conv_in = torch.nn.Conv2d( + in_channels, self.ch, kernel_size=3, stride=1, padding=1 + ) + + curr_res = resolution + in_ch_mult = (1,) + tuple(ch_mult) + self.down = nn.ModuleList() + for i_level in range(self.num_resolutions): + block = nn.ModuleList() + attn = nn.ModuleList() + block_in = ch * in_ch_mult[i_level] + block_out = ch * ch_mult[i_level] + for i_block in range(self.num_res_blocks): + block.append( + ResnetBlock( + in_channels=block_in, + out_channels=block_out, + temb_channels=self.temb_ch, + dropout=dropout, + ) + ) + block_in = block_out + if curr_res in attn_resolutions: + attn.append(make_attn(block_in, attn_type=attn_type)) + down = nn.Module() + down.block = block + down.attn = attn + if i_level != self.num_resolutions - 1: + down.downsample = Downsample(block_in, resamp_with_conv) + curr_res = curr_res // 2 + self.down.append(down) + + # middle + self.mid = nn.Module() + self.mid.block_1 = ResnetBlock( + in_channels=block_in, + out_channels=block_in, + temb_channels=self.temb_ch, + dropout=dropout, + ) + self.mid.attn_1 = make_attn(block_in, attn_type=attn_type) + self.mid.block_2 = ResnetBlock( + in_channels=block_in, + out_channels=block_in, + temb_channels=self.temb_ch, + dropout=dropout, + ) + + # upsampling + self.up = nn.ModuleList() + for i_level in reversed(range(self.num_resolutions)): + block = nn.ModuleList() + attn = nn.ModuleList() + block_out = ch * ch_mult[i_level] + skip_in = ch * ch_mult[i_level] + for i_block in range(self.num_res_blocks + 1): + if i_block == self.num_res_blocks: + skip_in = ch * in_ch_mult[i_level] + block.append( + ResnetBlock( + in_channels=block_in + skip_in, + out_channels=block_out, + temb_channels=self.temb_ch, + dropout=dropout, + ) + ) + block_in = block_out + if curr_res in attn_resolutions: + attn.append(make_attn(block_in, attn_type=attn_type)) + up = nn.Module() + up.block = block + up.attn = attn + if i_level != 0: + up.upsample = Upsample(block_in, resamp_with_conv) + curr_res = curr_res * 2 + self.up.insert(0, up) # prepend to get consistent order + + # end + self.norm_out = Normalize(block_in) + self.conv_out = torch.nn.Conv2d( + block_in, out_ch, kernel_size=3, stride=1, padding=1 + ) + + def forward(self, x, t=None, context=None): + # assert x.shape[2] == x.shape[3] == self.resolution + if context is not None: + # assume aligned context, cat along channel axis + x = torch.cat((x, context), dim=1) + if self.use_timestep: + # timestep embedding + assert t is not None + temb = get_timestep_embedding(t, self.ch) + temb = self.temb.dense[0](temb) + temb = nonlinearity(temb) + temb = self.temb.dense[1](temb) + else: + temb = None + + # downsampling + hs = [self.conv_in(x)] + for i_level in range(self.num_resolutions): + for i_block in range(self.num_res_blocks): + h = self.down[i_level].block[i_block](hs[-1], temb) + if len(self.down[i_level].attn) > 0: + h = self.down[i_level].attn[i_block](h) + hs.append(h) + if i_level != self.num_resolutions - 1: + hs.append(self.down[i_level].downsample(hs[-1])) + + # middle + h = hs[-1] + h = self.mid.block_1(h, temb) + h = self.mid.attn_1(h) + h = self.mid.block_2(h, temb) + + # upsampling + for i_level in reversed(range(self.num_resolutions)): + for i_block in range(self.num_res_blocks + 1): + h = self.up[i_level].block[i_block]( + torch.cat([h, hs.pop()], dim=1), temb + ) + if len(self.up[i_level].attn) > 0: + h = self.up[i_level].attn[i_block](h) + if i_level != 0: + h = self.up[i_level].upsample(h) + + # end + h = self.norm_out(h) + h = nonlinearity(h) + h = self.conv_out(h) + return h + + def get_last_layer(self): + return self.conv_out.weight + + +class Encoder(nn.Module): + def __init__( + self, + *, + ch, + out_ch, + ch_mult=(1, 2, 4, 8), + num_res_blocks, + attn_resolutions, + dropout=0.0, + resamp_with_conv=True, + in_channels, + resolution, + z_channels, + double_z=True, + use_linear_attn=False, + attn_type="vanilla", + downsample_time_stride4_levels=[], + **ignore_kwargs, + ): + super().__init__() + if use_linear_attn: + attn_type = "linear" + self.ch = ch + self.temb_ch = 0 + self.num_resolutions = len(ch_mult) + self.num_res_blocks = num_res_blocks + self.resolution = resolution + self.in_channels = in_channels + self.downsample_time_stride4_levels = downsample_time_stride4_levels + + if len(self.downsample_time_stride4_levels) > 0: + assert max(self.downsample_time_stride4_levels) < self.num_resolutions, ( + "The level to perform downsample 4 operation need to be smaller than the total resolution number %s" + % str(self.num_resolutions) + ) + + # downsampling + self.conv_in = torch.nn.Conv2d( + in_channels, self.ch, kernel_size=3, stride=1, padding=1 + ) + + curr_res = resolution + in_ch_mult = (1,) + tuple(ch_mult) + self.in_ch_mult = in_ch_mult + self.down = nn.ModuleList() + for i_level in range(self.num_resolutions): + block = nn.ModuleList() + attn = nn.ModuleList() + block_in = ch * in_ch_mult[i_level] + block_out = ch * ch_mult[i_level] + for i_block in range(self.num_res_blocks): + block.append( + ResnetBlock( + in_channels=block_in, + out_channels=block_out, + temb_channels=self.temb_ch, + dropout=dropout, + ) + ) + block_in = block_out + if curr_res in attn_resolutions: + attn.append(make_attn(block_in, attn_type=attn_type)) + down = nn.Module() + down.block = block + down.attn = attn + if i_level != self.num_resolutions - 1: + if i_level in self.downsample_time_stride4_levels: + down.downsample = DownsampleTimeStride4(block_in, resamp_with_conv) + else: + down.downsample = Downsample(block_in, resamp_with_conv) + curr_res = curr_res // 2 + self.down.append(down) + + # middle + self.mid = nn.Module() + self.mid.block_1 = ResnetBlock( + in_channels=block_in, + out_channels=block_in, + temb_channels=self.temb_ch, + dropout=dropout, + ) + self.mid.attn_1 = make_attn(block_in, attn_type=attn_type) + self.mid.block_2 = ResnetBlock( + in_channels=block_in, + out_channels=block_in, + temb_channels=self.temb_ch, + dropout=dropout, + ) + + # end + self.norm_out = Normalize(block_in) + self.conv_out = torch.nn.Conv2d( + block_in, + 2 * z_channels if double_z else z_channels, + kernel_size=3, + stride=1, + padding=1, + ) + + def forward(self, x): + # timestep embedding + temb = None + # downsampling + hs = [self.conv_in(x)] + for i_level in range(self.num_resolutions): + for i_block in range(self.num_res_blocks): + h = self.down[i_level].block[i_block](hs[-1], temb) + if len(self.down[i_level].attn) > 0: + h = self.down[i_level].attn[i_block](h) + hs.append(h) + if i_level != self.num_resolutions - 1: + hs.append(self.down[i_level].downsample(hs[-1])) + + # middle + h = hs[-1] + h = self.mid.block_1(h, temb) + h = self.mid.attn_1(h) + h = self.mid.block_2(h, temb) + + # end + h = self.norm_out(h) + h = nonlinearity(h) + h = self.conv_out(h) + return h + + +class Decoder(nn.Module): + def __init__( + self, + *, + ch, + out_ch, + ch_mult=(1, 2, 4, 8), + num_res_blocks, + attn_resolutions, + dropout=0.0, + resamp_with_conv=True, + in_channels, + resolution, + z_channels, + give_pre_end=False, + tanh_out=False, + use_linear_attn=False, + downsample_time_stride4_levels=[], + attn_type="vanilla", + **ignorekwargs, + ): + super().__init__() + if use_linear_attn: + attn_type = "linear" + self.ch = ch + self.temb_ch = 0 + self.num_resolutions = len(ch_mult) + self.num_res_blocks = num_res_blocks + self.resolution = resolution + self.in_channels = in_channels + self.give_pre_end = give_pre_end + self.tanh_out = tanh_out + self.downsample_time_stride4_levels = downsample_time_stride4_levels + + if len(self.downsample_time_stride4_levels) > 0: + assert max(self.downsample_time_stride4_levels) < self.num_resolutions, ( + "The level to perform downsample 4 operation need to be smaller than the total resolution number %s" + % str(self.num_resolutions) + ) + + # compute in_ch_mult, block_in and curr_res at lowest res + in_ch_mult = (1,) + tuple(ch_mult) + block_in = ch * ch_mult[self.num_resolutions - 1] + curr_res = resolution // 2 ** (self.num_resolutions - 1) + self.z_shape = (1, z_channels, curr_res, curr_res) + # print("Working with z of shape {} = {} dimensions.".format( + # self.z_shape, np.prod(self.z_shape))) + + # z to block_in + self.conv_in = torch.nn.Conv2d( + z_channels, block_in, kernel_size=3, stride=1, padding=1 + ) + + # middle + self.mid = nn.Module() + self.mid.block_1 = ResnetBlock( + in_channels=block_in, + out_channels=block_in, + temb_channels=self.temb_ch, + dropout=dropout, + ) + self.mid.attn_1 = make_attn(block_in, attn_type=attn_type) + self.mid.block_2 = ResnetBlock( + in_channels=block_in, + out_channels=block_in, + temb_channels=self.temb_ch, + dropout=dropout, + ) + + # upsampling + self.up = nn.ModuleList() + for i_level in reversed(range(self.num_resolutions)): + block = nn.ModuleList() + attn = nn.ModuleList() + block_out = ch * ch_mult[i_level] + for i_block in range(self.num_res_blocks + 1): + block.append( + ResnetBlock( + in_channels=block_in, + out_channels=block_out, + temb_channels=self.temb_ch, + dropout=dropout, + ) + ) + block_in = block_out + if curr_res in attn_resolutions: + attn.append(make_attn(block_in, attn_type=attn_type)) + up = nn.Module() + up.block = block + up.attn = attn + if i_level != 0: + if i_level - 1 in self.downsample_time_stride4_levels: + up.upsample = UpsampleTimeStride4(block_in, resamp_with_conv) + else: + up.upsample = Upsample(block_in, resamp_with_conv) + curr_res = curr_res * 2 + self.up.insert(0, up) # prepend to get consistent order + + # end + self.norm_out = Normalize(block_in) + self.conv_out = torch.nn.Conv2d( + block_in, out_ch, kernel_size=3, stride=1, padding=1 + ) + + def forward(self, z): + # assert z.shape[1:] == self.z_shape[1:] + self.last_z_shape = z.shape + + # timestep embedding + temb = None + + # z to block_in + h = self.conv_in(z) + + # middle + h = self.mid.block_1(h, temb) + h = self.mid.attn_1(h) + h = self.mid.block_2(h, temb) + + # upsampling + for i_level in reversed(range(self.num_resolutions)): + for i_block in range(self.num_res_blocks + 1): + h = self.up[i_level].block[i_block](h, temb) + if len(self.up[i_level].attn) > 0: + h = self.up[i_level].attn[i_block](h) + if i_level != 0: + h = self.up[i_level].upsample(h) + + # end + if self.give_pre_end: + return h + + h = self.norm_out(h) + h = nonlinearity(h) + h = self.conv_out(h) + if self.tanh_out: + h = torch.tanh(h) + return h + + +class SimpleDecoder(nn.Module): + def __init__(self, in_channels, out_channels, *args, **kwargs): + super().__init__() + self.model = nn.ModuleList( + [ + nn.Conv2d(in_channels, in_channels, 1), + ResnetBlock( + in_channels=in_channels, + out_channels=2 * in_channels, + temb_channels=0, + dropout=0.0, + ), + ResnetBlock( + in_channels=2 * in_channels, + out_channels=4 * in_channels, + temb_channels=0, + dropout=0.0, + ), + ResnetBlock( + in_channels=4 * in_channels, + out_channels=2 * in_channels, + temb_channels=0, + dropout=0.0, + ), + nn.Conv2d(2 * in_channels, in_channels, 1), + Upsample(in_channels, with_conv=True), + ] + ) + # end + self.norm_out = Normalize(in_channels) + self.conv_out = torch.nn.Conv2d( + in_channels, out_channels, kernel_size=3, stride=1, padding=1 + ) + + def forward(self, x): + for i, layer in enumerate(self.model): + if i in [1, 2, 3]: + x = layer(x, None) + else: + x = layer(x) + + h = self.norm_out(x) + h = nonlinearity(h) + x = self.conv_out(h) + return x + + +class UpsampleDecoder(nn.Module): + def __init__( + self, + in_channels, + out_channels, + ch, + num_res_blocks, + resolution, + ch_mult=(2, 2), + dropout=0.0, + ): + super().__init__() + # upsampling + self.temb_ch = 0 + self.num_resolutions = len(ch_mult) + self.num_res_blocks = num_res_blocks + block_in = in_channels + curr_res = resolution // 2 ** (self.num_resolutions - 1) + self.res_blocks = nn.ModuleList() + self.upsample_blocks = nn.ModuleList() + for i_level in range(self.num_resolutions): + res_block = [] + block_out = ch * ch_mult[i_level] + for i_block in range(self.num_res_blocks + 1): + res_block.append( + ResnetBlock( + in_channels=block_in, + out_channels=block_out, + temb_channels=self.temb_ch, + dropout=dropout, + ) + ) + block_in = block_out + self.res_blocks.append(nn.ModuleList(res_block)) + if i_level != self.num_resolutions - 1: + self.upsample_blocks.append(Upsample(block_in, True)) + curr_res = curr_res * 2 + + # end + self.norm_out = Normalize(block_in) + self.conv_out = torch.nn.Conv2d( + block_in, out_channels, kernel_size=3, stride=1, padding=1 + ) + + def forward(self, x): + # upsampling + h = x + for k, i_level in enumerate(range(self.num_resolutions)): + for i_block in range(self.num_res_blocks + 1): + h = self.res_blocks[i_level][i_block](h, None) + if i_level != self.num_resolutions - 1: + h = self.upsample_blocks[k](h) + h = self.norm_out(h) + h = nonlinearity(h) + h = self.conv_out(h) + return h + + +class LatentRescaler(nn.Module): + def __init__(self, factor, in_channels, mid_channels, out_channels, depth=2): + super().__init__() + # residual block, interpolate, residual block + self.factor = factor + self.conv_in = nn.Conv2d( + in_channels, mid_channels, kernel_size=3, stride=1, padding=1 + ) + self.res_block1 = nn.ModuleList( + [ + ResnetBlock( + in_channels=mid_channels, + out_channels=mid_channels, + temb_channels=0, + dropout=0.0, + ) + for _ in range(depth) + ] + ) + self.attn = AttnBlock(mid_channels) + self.res_block2 = nn.ModuleList( + [ + ResnetBlock( + in_channels=mid_channels, + out_channels=mid_channels, + temb_channels=0, + dropout=0.0, + ) + for _ in range(depth) + ] + ) + + self.conv_out = nn.Conv2d( + mid_channels, + out_channels, + kernel_size=1, + ) + + def forward(self, x): + x = self.conv_in(x) + for block in self.res_block1: + x = block(x, None) + x = torch.nn.functional.interpolate( + x, + size=( + int(round(x.shape[2] * self.factor)), + int(round(x.shape[3] * self.factor)), + ), + ) + x = self.attn(x).contiguous() + for block in self.res_block2: + x = block(x, None) + x = self.conv_out(x) + return x + + +class MergedRescaleEncoder(nn.Module): + def __init__( + self, + in_channels, + ch, + resolution, + out_ch, + num_res_blocks, + attn_resolutions, + dropout=0.0, + resamp_with_conv=True, + ch_mult=(1, 2, 4, 8), + rescale_factor=1.0, + rescale_module_depth=1, + ): + super().__init__() + intermediate_chn = ch * ch_mult[-1] + self.encoder = Encoder( + in_channels=in_channels, + num_res_blocks=num_res_blocks, + ch=ch, + ch_mult=ch_mult, + z_channels=intermediate_chn, + double_z=False, + resolution=resolution, + attn_resolutions=attn_resolutions, + dropout=dropout, + resamp_with_conv=resamp_with_conv, + out_ch=None, + ) + self.rescaler = LatentRescaler( + factor=rescale_factor, + in_channels=intermediate_chn, + mid_channels=intermediate_chn, + out_channels=out_ch, + depth=rescale_module_depth, + ) + + def forward(self, x): + x = self.encoder(x) + x = self.rescaler(x) + return x + + +class MergedRescaleDecoder(nn.Module): + def __init__( + self, + z_channels, + out_ch, + resolution, + num_res_blocks, + attn_resolutions, + ch, + ch_mult=(1, 2, 4, 8), + dropout=0.0, + resamp_with_conv=True, + rescale_factor=1.0, + rescale_module_depth=1, + ): + super().__init__() + tmp_chn = z_channels * ch_mult[-1] + self.decoder = Decoder( + out_ch=out_ch, + z_channels=tmp_chn, + attn_resolutions=attn_resolutions, + dropout=dropout, + resamp_with_conv=resamp_with_conv, + in_channels=None, + num_res_blocks=num_res_blocks, + ch_mult=ch_mult, + resolution=resolution, + ch=ch, + ) + self.rescaler = LatentRescaler( + factor=rescale_factor, + in_channels=z_channels, + mid_channels=tmp_chn, + out_channels=tmp_chn, + depth=rescale_module_depth, + ) + + def forward(self, x): + x = self.rescaler(x) + x = self.decoder(x) + return x + + +class Upsampler(nn.Module): + def __init__(self, in_size, out_size, in_channels, out_channels, ch_mult=2): + super().__init__() + assert out_size >= in_size + num_blocks = int(np.log2(out_size // in_size)) + 1 + factor_up = 1.0 + (out_size % in_size) + print( + f"Building {self.__class__.__name__} with in_size: {in_size} --> out_size {out_size} and factor {factor_up}" + ) + self.rescaler = LatentRescaler( + factor=factor_up, + in_channels=in_channels, + mid_channels=2 * in_channels, + out_channels=in_channels, + ) + self.decoder = Decoder( + out_ch=out_channels, + resolution=out_size, + z_channels=in_channels, + num_res_blocks=2, + attn_resolutions=[], + in_channels=None, + ch=in_channels, + ch_mult=[ch_mult for _ in range(num_blocks)], + ) + + def forward(self, x): + x = self.rescaler(x) + x = self.decoder(x) + return x + + +class Resize(nn.Module): + def __init__(self, in_channels=None, learned=False, mode="bilinear"): + super().__init__() + self.with_conv = learned + self.mode = mode + if self.with_conv: + print( + f"Note: {self.__class__.__name} uses learned downsampling and will ignore the fixed {mode} mode" + ) + raise NotImplementedError() + assert in_channels is not None + # no asymmetric padding in torch conv, must do it ourselves + self.conv = torch.nn.Conv2d( + in_channels, in_channels, kernel_size=4, stride=2, padding=1 + ) + + def forward(self, x, scale_factor=1.0): + if scale_factor == 1.0: + return x + else: + x = torch.nn.functional.interpolate( + x, mode=self.mode, align_corners=False, scale_factor=scale_factor + ) + return x + + +class FirstStagePostProcessor(nn.Module): + def __init__( + self, + ch_mult: list, + in_channels, + pretrained_model: nn.Module = None, + reshape=False, + n_channels=None, + dropout=0.0, + pretrained_config=None, + ): + super().__init__() + if pretrained_config is None: + assert ( + pretrained_model is not None + ), 'Either "pretrained_model" or "pretrained_config" must not be None' + self.pretrained_model = pretrained_model + else: + assert ( + pretrained_config is not None + ), 'Either "pretrained_model" or "pretrained_config" must not be None' + self.instantiate_pretrained(pretrained_config) + + self.do_reshape = reshape + + if n_channels is None: + n_channels = self.pretrained_model.encoder.ch + + self.proj_norm = Normalize(in_channels, num_groups=in_channels // 2) + self.proj = nn.Conv2d( + in_channels, n_channels, kernel_size=3, stride=1, padding=1 + ) + + blocks = [] + downs = [] + ch_in = n_channels + for m in ch_mult: + blocks.append( + ResnetBlock( + in_channels=ch_in, out_channels=m * n_channels, dropout=dropout + ) + ) + ch_in = m * n_channels + downs.append(Downsample(ch_in, with_conv=False)) + + self.model = nn.ModuleList(blocks) + self.downsampler = nn.ModuleList(downs) + + def instantiate_pretrained(self, config): + model = instantiate_from_config(config) + self.pretrained_model = model.eval() + # self.pretrained_model.train = False + for param in self.pretrained_model.parameters(): + param.requires_grad = False + + @torch.no_grad() + def encode_with_pretrained(self, x): + c = self.pretrained_model.encode(x) + if isinstance(c, DiagonalGaussianDistribution): + c = c.mode() + return c + + def forward(self, x): + z_fs = self.encode_with_pretrained(x) + z = self.proj_norm(z_fs) + z = self.proj(z) + z = nonlinearity(z) + + for submodel, downmodel in zip(self.model, self.downsampler): + z = submodel(z, temb=None) + z = downmodel(z) + + if self.do_reshape: + z = rearrange(z, "b c h w -> b (h w) c") + return z diff --git a/picoaudio/data/filter_data.py b/picoaudio/data/filter_data.py new file mode 100644 index 0000000000000000000000000000000000000000..0eb226cace5e44234b3ae4f2108a3ec817d1a38c --- /dev/null +++ b/picoaudio/data/filter_data.py @@ -0,0 +1,24 @@ +def get_event_list(): + event_list = [ + "burping_belching", # 0 + "car_horn_honking", # + "cat_meowing", # + "cow_mooing", # + "dog_barking", # + "door_knocking", # + "door_slamming", # + "explosion", # + "gunshot", # 8 + "sheep_goat_bleating", # + "sneeze", # + "spraying", # + "thump_thud", # + "train_horn", # + "tapping_clicking_clanking", # + "woman_laughing", # + "duck_quacking", # 16 + "whistling", # + + ] + return event_list + diff --git a/picoaudio/data/meta_data/test-frequency-control_onoffFromGpt_multi-event.json b/picoaudio/data/meta_data/test-frequency-control_onoffFromGpt_multi-event.json new file mode 100644 index 0000000000000000000000000000000000000000..de9e98acac39a0935f7baee5baa9a6b98eb8cd8d --- /dev/null +++ b/picoaudio/data/meta_data/test-frequency-control_onoffFromGpt_multi-event.json @@ -0,0 +1,200 @@ +{"filepath": "data/multi_event_test/syn_1.wav", "onoffCaption": "cat meowing at 0.5-2.0, 3.0-4.5 and whistling at 5.0-6.5 and explosion at 7.0-8.0, 8.5-9.5", "frequencyCaption": "cat meowing two times and whistling one times and explosion two times"} +{"filepath": "data/multi_event_test/syn_6.wav", "onoffCaption": "whistling at 2.0-6.0", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_test/syn_8.wav", "onoffCaption": "cow mooing at 1.954-4.954, 6.219-9.219", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_test/syn_11.wav", "onoffCaption": "burping belching at 0.0-2.0, 2.5-4.5 and dog barking at 5.0-7.0", "frequencyCaption": "burping belching two times and dog barking one times"} +{"filepath": "data/multi_event_test/syn_16.wav", "onoffCaption": "duck quacking at 0.0-1.0, 2.0-3.0, 4.0-5.0", "frequencyCaption": "duck quacking three times"} +{"filepath": "data/multi_event_test/syn_18.wav", "onoffCaption": "door knocking at 0.138-2.518, 3.708-6.088 and door slamming at 2.798-4.798", "frequencyCaption": "door knocking two times and door slamming one times"} +{"filepath": "data/multi_event_test/syn_21.wav", "onoffCaption": "dog barking at 0-1, 2-3", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_test/syn_26.wav", "onoffCaption": "whistling at 0.2-4.2", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_test/syn_28.wav", "onoffCaption": "cow mooing at 0.0-1.0 and spraying at 1.0-2.0", "frequencyCaption": "cow mooing one times and spraying one times"} +{"filepath": "data/multi_event_test/syn_32.wav", "onoffCaption": "duck quacking at 0.0-2.0, 2.5-4.5, 5.0-7.0", "frequencyCaption": "duck quacking three times"} +{"filepath": "data/multi_event_test/syn_35.wav", "onoffCaption": "car horn honking at 0.5-2.5, 3.0-5.0", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_test/syn_43.wav", "onoffCaption": "dog barking at 0.0-2.0, 2.5-4.5 and burping belching at 5.0-7.0, 7.5-9.5 and explosion at 4.8-7.8", "frequencyCaption": "dog barking two times and burping belching two times and explosion one times"} +{"filepath": "data/multi_event_test/syn_44.wav", "onoffCaption": "sneeze at 0.5-1.5", "frequencyCaption": "sneeze one times"} +{"filepath": "data/multi_event_test/syn_50.wav", "onoffCaption": "car horn honking at 0.0-2.0, 3.0-5.0 and sneeze at 6.0-7.0 and train horn at 8.0-10.0", "frequencyCaption": "car horn honking two times and sneeze one times and train horn one times"} +{"filepath": "data/multi_event_test/syn_57.wav", "onoffCaption": "dog barking at 0.0-2.0, 3.0-5.0 and cow mooing at 6.0-9.0", "frequencyCaption": "dog barking two times and cow mooing one times"} +{"filepath": "data/multi_event_test/syn_59.wav", "onoffCaption": "door slamming at 0.0-1.0 and explosion at 1.5-4.5, 5.0-8.0", "frequencyCaption": "door slamming one times and explosion two times"} +{"filepath": "data/multi_event_test/syn_60.wav", "onoffCaption": "train horn at 0-3", "frequencyCaption": "train horn one times"} +{"filepath": "data/multi_event_test/syn_67.wav", "onoffCaption": "whistling at 0.204-5.379", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_test/syn_69.wav", "onoffCaption": "door knocking at 0-1, 2-3, 4-5", "frequencyCaption": "door knocking three times"} +{"filepath": "data/multi_event_test/syn_73.wav", "onoffCaption": "door knocking at 0-1, 1-2, 2-3 and sneeze at 3-4, 4-5", "frequencyCaption": "door knocking three times and sneeze two times"} +{"filepath": "data/multi_event_test/syn_74.wav", "onoffCaption": "spraying at 0.5-1.0, 1.5-2.0 and gunshot at 3.0-4.0, 5.0-6.0, 7.0-8.0", "frequencyCaption": "spraying two times and gunshot three times"} +{"filepath": "data/multi_event_test/syn_82.wav", "onoffCaption": "dog barking at 0.464-2.464, 4.19-6.19", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_test/syn_91.wav", "onoffCaption": "gunshot at 0-1, 2-3", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_test/syn_96.wav", "onoffCaption": "door slamming at 0-1, 2-3, 4-5", "frequencyCaption": "door slamming three times"} +{"filepath": "data/multi_event_test/syn_98.wav", "onoffCaption": "thump thud at 1.017-4.684, 5.695-9.362", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_test/syn_101.wav", "onoffCaption": "dog barking at 0.464-2.464", "frequencyCaption": "dog barking one times"} +{"filepath": "data/multi_event_test/syn_106.wav", "onoffCaption": "burping belching at 0.0-1.0, 2.0-3.0, 4.0-5.0", "frequencyCaption": "burping belching three times"} +{"filepath": "data/multi_event_test/syn_108.wav", "onoffCaption": "sneeze at 0.5-1.5", "frequencyCaption": "sneeze one times"} +{"filepath": "data/multi_event_test/syn_112.wav", "onoffCaption": "woman laughing at 0.004-2.372, 3.672-6.653", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_test/syn_115.wav", "onoffCaption": "duck quacking at 0.3-2.3 and tapping clicking clanking at 2.5-5.5, 6.0-9.0", "frequencyCaption": "duck quacking one times and tapping clicking clanking two times"} +{"filepath": "data/multi_event_test/syn_122.wav", "onoffCaption": "door knocking at 0-1, 3-4, 6-7", "frequencyCaption": "door knocking three times"} +{"filepath": "data/multi_event_test/syn_125.wav", "onoffCaption": "cow mooing at 1.5-4.5, 5.5-8.5", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_test/syn_131.wav", "onoffCaption": "whistling at 0-1, 2-3 and cat meowing at 1-2", "frequencyCaption": "whistling two times and cat meowing one times"} +{"filepath": "data/multi_event_test/syn_136.wav", "onoffCaption": "sheep goat bleating at 0.5-2.5, 3.5-5.5 and whistling at 6.0-8.0, 8.5-9.5 and woman laughing at 2.0-4.0", "frequencyCaption": "sheep goat bleating two times and whistling two times and woman laughing one times"} +{"filepath": "data/multi_event_test/syn_138.wav", "onoffCaption": "gunshot at 0.0-1.0 and tapping clicking clanking at 1.5-5.0", "frequencyCaption": "gunshot one times and tapping clicking clanking one times"} +{"filepath": "data/multi_event_test/syn_140.wav", "onoffCaption": "door knocking at 0.00-2.00, 3.00-5.00, 6.00-8.00", "frequencyCaption": "door knocking three times"} +{"filepath": "data/multi_event_test/syn_147.wav", "onoffCaption": "door slamming at 0-1, 2-3, 4-5", "frequencyCaption": "door slamming three times"} +{"filepath": "data/multi_event_test/syn_149.wav", "onoffCaption": "car horn honking at 0.0-2.0, 3.0-5.0 and spraying at 5.5-6.0, 7.0-7.5", "frequencyCaption": "car horn honking two times and spraying two times"} +{"filepath": "data/multi_event_test/syn_153.wav", "onoffCaption": "cat meowing at 0-1.0", "frequencyCaption": "cat meowing one times"} +{"filepath": "data/multi_event_test/syn_154.wav", "onoffCaption": "cat meowing at 0.5-1.5 and door knocking at 2-3.5", "frequencyCaption": "cat meowing one times and door knocking one times"} +{"filepath": "data/multi_event_test/syn_163.wav", "onoffCaption": "sheep goat bleating at 0-1, 2-3, 4-5", "frequencyCaption": "sheep goat bleating three times"} +{"filepath": "data/multi_event_test/syn_164.wav", "onoffCaption": "whistling at 0.204-5.379, 7.724-10.0", "frequencyCaption": "whistling two times"} +{"filepath": "data/multi_event_test/syn_170.wav", "onoffCaption": "dog barking at 0.464-2.464, 4.19-6.19", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_test/syn_177.wav", "onoffCaption": "thump thud at 0-1 and cow mooing at 1-2", "frequencyCaption": "thump thud one times and cow mooing one times"} +{"filepath": "data/multi_event_test/syn_179.wav", "onoffCaption": "cow mooing at 1.954-4.602, 5.719-8.729", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_test/syn_181.wav", "onoffCaption": "cow mooing at 1.0-3.0, 4.0-6.0", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_test/syn_186.wav", "onoffCaption": "gunshot at 0.0-1.0", "frequencyCaption": "gunshot one times"} +{"filepath": "data/multi_event_test/syn_188.wav", "onoffCaption": "gunshot at 0-1, 1-2 and duck quacking at 2-3", "frequencyCaption": "gunshot two times and duck quacking one times"} +{"filepath": "data/multi_event_test/syn_192.wav", "onoffCaption": "spraying at 0.0-1.0, 2.0-3.0", "frequencyCaption": "spraying two times"} +{"filepath": "data/multi_event_test/syn_195.wav", "onoffCaption": "thump thud at 1.017-4.684, 5.695-9.362", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_test/syn_3.wav", "onoffCaption": "tapping clicking clanking at 0.5-3.0, 4.0-7.5", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_test/syn_4.wav", "onoffCaption": "door slamming at 0.145-1.085, 2.545-4.463", "frequencyCaption": "door slamming two times"} +{"filepath": "data/multi_event_test/syn_13.wav", "onoffCaption": "duck quacking at 0.0-2.0, 2.5-4.5, 5.0-7.0", "frequencyCaption": "duck quacking three times"} +{"filepath": "data/multi_event_test/syn_14.wav", "onoffCaption": "sneeze at 0.38-1.38", "frequencyCaption": "sneeze one times"} +{"filepath": "data/multi_event_test/syn_23.wav", "onoffCaption": "sneeze at 0.5-1.5, 2.5-3.5", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_test/syn_24.wav", "onoffCaption": "woman laughing at 2.782-5.368", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/multi_event_test/syn_30.wav", "onoffCaption": "burping belching at 0.871-3.871, 4.871-7.871", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_test/syn_37.wav", "onoffCaption": "thump thud at 0.0-1.5, 5.0-6.5 and door knocking at 1.5-3.5, 6.5-8.5 and burping belching at 3.5-4.5", "frequencyCaption": "thump thud two times and door knocking two times and burping belching one times"} +{"filepath": "data/multi_event_test/syn_39.wav", "onoffCaption": "train horn at 0.0-2.0, 2.5-4.5", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_test/syn_41.wav", "onoffCaption": "thump thud at 0.0-1.0", "frequencyCaption": "thump thud one times"} +{"filepath": "data/multi_event_test/syn_48.wav", "onoffCaption": "cat meowing at 0-1, 1-2, 2-3", "frequencyCaption": "cat meowing three times"} +{"filepath": "data/multi_event_test/syn_52.wav", "onoffCaption": "gunshot at 0.0-1.0 and duck quacking at 1.5-2.5 and tapping clicking clanking at 3.0-4.0", "frequencyCaption": "gunshot one times and duck quacking one times and tapping clicking clanking one times"} +{"filepath": "data/multi_event_test/syn_55.wav", "onoffCaption": "sneeze at 1.3-2.403, 4.759-6.442", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_test/syn_62.wav", "onoffCaption": "woman laughing at 0.004-2.372, 3.672-6.653", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_test/syn_65.wav", "onoffCaption": "gunshot at 0.0-2.0, 2.1-4.1", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_test/syn_71.wav", "onoffCaption": "door slamming at 0.0-1.0, 2.0-3.0 and whistling at 4.0-8.0", "frequencyCaption": "door slamming two times and whistling one times"} +{"filepath": "data/multi_event_test/syn_76.wav", "onoffCaption": "dog barking at 0.464-2.464", "frequencyCaption": "dog barking one times"} +{"filepath": "data/multi_event_test/syn_78.wav", "onoffCaption": "explosion at 0.0-2.0, 2.5-4.5 and duck quacking at 5.0-7.0, 7.5-9.5", "frequencyCaption": "explosion two times and duck quacking two times"} +{"filepath": "data/multi_event_test/syn_80.wav", "onoffCaption": "door slamming at 0.0-1.0 and sheep goat bleating at 2.0-4.0", "frequencyCaption": "door slamming one times and sheep goat bleating one times"} +{"filepath": "data/multi_event_test/syn_85.wav", "onoffCaption": "door knocking at 2.047-4.422", "frequencyCaption": "door knocking one times"} +{"filepath": "data/multi_event_test/syn_87.wav", "onoffCaption": "explosion at 1.773-4.034, 5.15-7.411", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_test/syn_89.wav", "onoffCaption": "car horn honking at 0.0-2.0 and cat meowing at 2.5-4.0", "frequencyCaption": "car horn honking one times and cat meowing one times"} +{"filepath": "data/multi_event_test/syn_93.wav", "onoffCaption": "dog barking at 0-2, 2-4", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_test/syn_94.wav", "onoffCaption": "gunshot at 0.0-2.0, 2.5-4.5, 5.0-7.0 and thump thud at 1.0-3.0, 4.0-6.0 and sheep goat bleating at 2.0-4.0, 7.0-9.0", "frequencyCaption": "gunshot three times and thump thud two times and sheep goat bleating two times"} +{"filepath": "data/multi_event_test/syn_103.wav", "onoffCaption": "whistling at 2.158-10.0", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_test/syn_104.wav", "onoffCaption": "duck quacking at 0-1", "frequencyCaption": "duck quacking one times"} +{"filepath": "data/multi_event_test/syn_110.wav", "onoffCaption": "train horn at 0-1 and duck quacking at 1-2 and cow mooing at 2-3", "frequencyCaption": "train horn one times and duck quacking one times and cow mooing one times"} +{"filepath": "data/multi_event_test/syn_117.wav", "onoffCaption": "sheep goat bleating at 1.0-3.0, 4.5-6.5", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_test/syn_119.wav", "onoffCaption": "train horn at 0.0-2.0 and door knocking at 2.5-4.5, 5.0-7.0", "frequencyCaption": "train horn one times and door knocking two times"} +{"filepath": "data/multi_event_test/syn_120.wav", "onoffCaption": "burping belching at 0.871-2.871, 3.871-5.871", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_test/syn_127.wav", "onoffCaption": "whistling at 2.158-10.0", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_test/syn_129.wav", "onoffCaption": "door knocking at 0-1, 1-2, 2-3", "frequencyCaption": "door knocking three times"} +{"filepath": "data/multi_event_test/syn_133.wav", "onoffCaption": "duck quacking at 2.203-4.203, 5.361-7.361", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_test/syn_134.wav", "onoffCaption": "car horn honking at 1.0-3.0, 4.0-6.0", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_test/syn_142.wav", "onoffCaption": "sneeze at 0.5-1.5, 2.0-3.0", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_test/syn_145.wav", "onoffCaption": "door knocking at 0.002-2.092, 2.842-5.601 and whistling at 1.9-10.0", "frequencyCaption": "door knocking two times and whistling one times"} +{"filepath": "data/multi_event_test/syn_151.wav", "onoffCaption": "dog barking at 0.121-2.121, 3.824-5.824, 7.767-9.767", "frequencyCaption": "dog barking three times"} +{"filepath": "data/multi_event_test/syn_156.wav", "onoffCaption": "car horn honking at 0.0-1.0, 2.0-3.0, 4.0-5.0", "frequencyCaption": "car horn honking three times"} +{"filepath": "data/multi_event_test/syn_158.wav", "onoffCaption": "tapping clicking clanking at 1.5-4.5, 5.5-8.5", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_test/syn_161.wav", "onoffCaption": "spraying at 0-1, 2-3", "frequencyCaption": "spraying two times"} +{"filepath": "data/multi_event_test/syn_166.wav", "onoffCaption": "woman laughing at 1.672-3.955", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/multi_event_test/syn_168.wav", "onoffCaption": "sheep goat bleating at 0.56-2.56", "frequencyCaption": "sheep goat bleating one times"} +{"filepath": "data/multi_event_test/syn_172.wav", "onoffCaption": "door knocking at 0-1, 1-2, 2-3", "frequencyCaption": "door knocking three times"} +{"filepath": "data/multi_event_test/syn_175.wav", "onoffCaption": "cow mooing at 0-3 and spraying at 3-6", "frequencyCaption": "cow mooing one times and spraying one times"} +{"filepath": "data/multi_event_test/syn_183.wav", "onoffCaption": "explosion at 0.0-2.0, 2.1-4.1", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_test/syn_184.wav", "onoffCaption": "sheep goat bleating at 0-1", "frequencyCaption": "sheep goat bleating one times"} +{"filepath": "data/multi_event_test/syn_190.wav", "onoffCaption": "whistling at 0.0-1.0", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_test/syn_197.wav", "onoffCaption": "tapping clicking clanking at 0.032-2.032, 2.532-4.532", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_test/syn_199.wav", "onoffCaption": "duck quacking at 0.0-2.0 and cat meowing at 2.5-4.5", "frequencyCaption": "duck quacking one times and cat meowing one times"} +{"filepath": "data/multi_event_test/syn_200.wav", "onoffCaption": "explosion at 1.0-3.0, 4.0-6.0", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_test/syn_2.wav", "onoffCaption": "door knocking at 0.0-1.0", "frequencyCaption": "door knocking one times"} +{"filepath": "data/multi_event_test/syn_5.wav", "onoffCaption": "burping belching at 0.359-2.774", "frequencyCaption": "burping belching one times"} +{"filepath": "data/multi_event_test/syn_12.wav", "onoffCaption": "sheep goat bleating at 0.0-2.0 and sneeze at 2.5-3.5", "frequencyCaption": "sheep goat bleating one times and sneeze one times"} +{"filepath": "data/multi_event_test/syn_15.wav", "onoffCaption": "tapping clicking clanking at 2.992-6.432", "frequencyCaption": "tapping clicking clanking one times"} +{"filepath": "data/multi_event_test/syn_22.wav", "onoffCaption": "whistling at 2.158-10.0", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_test/syn_25.wav", "onoffCaption": "burping belching at 0.871-3.871, 4.391-7.391", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_test/syn_31.wav", "onoffCaption": "woman laughing at 0-1, 2-3", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_test/syn_36.wav", "onoffCaption": "door slamming at 0.5-1.5, 2.0-3.0, 3.5-4.5", "frequencyCaption": "door slamming three times"} +{"filepath": "data/multi_event_test/syn_38.wav", "onoffCaption": "cat meowing at 0-1, 2-3, 4-5", "frequencyCaption": "cat meowing three times"} +{"filepath": "data/multi_event_test/syn_40.wav", "onoffCaption": "door knocking at 0.138-2.518, 3.708-6.088 and cow mooing at 6.91-9.447", "frequencyCaption": "door knocking two times and cow mooing one times"} +{"filepath": "data/multi_event_test/syn_46.wav", "onoffCaption": "door slamming at 1.145-2.085, 3.545-4.463", "frequencyCaption": "door slamming two times"} +{"filepath": "data/multi_event_test/syn_47.wav", "onoffCaption": "spraying at 0.0-1.0, 2.0-3.0, 4.0-5.0 and cow mooing at 6.0-8.0, 8.5-10.0", "frequencyCaption": "spraying three times and cow mooing two times"} +{"filepath": "data/multi_event_test/syn_49.wav", "onoffCaption": "sheep goat bleating at 0.5-2.5, 3.0-5.0 and tapping clicking clanking at 0.0-4.0, 5.5-9.5", "frequencyCaption": "sheep goat bleating two times and tapping clicking clanking two times"} +{"filepath": "data/multi_event_test/syn_51.wav", "onoffCaption": "train horn at 0.873-4.633, 5.147-8.907", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_test/syn_53.wav", "onoffCaption": "dog barking at 0-1, 2-3, 4-5", "frequencyCaption": "dog barking three times"} +{"filepath": "data/multi_event_test/syn_54.wav", "onoffCaption": "train horn at 0.0-2.0, 2.5-4.5", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_test/syn_63.wav", "onoffCaption": "train horn at 0-1 and cat meowing at 2-3 and dog barking at 4-5", "frequencyCaption": "train horn one times and cat meowing one times and dog barking one times"} +{"filepath": "data/multi_event_test/syn_64.wav", "onoffCaption": "sheep goat bleating at 0.56-2.56", "frequencyCaption": "sheep goat bleating one times"} +{"filepath": "data/multi_event_test/syn_70.wav", "onoffCaption": "whistling at 2.158-10.0", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_test/syn_77.wav", "onoffCaption": "cow mooing at 0.0-3.0", "frequencyCaption": "cow mooing one times"} +{"filepath": "data/multi_event_test/syn_79.wav", "onoffCaption": "dog barking at 0-1, 2-3, 4-5", "frequencyCaption": "dog barking three times"} +{"filepath": "data/multi_event_test/syn_81.wav", "onoffCaption": "gunshot at 0.0-2.0, 3.0-5.0, 6.0-8.0", "frequencyCaption": "gunshot three times"} +{"filepath": "data/multi_event_test/syn_86.wav", "onoffCaption": "whistling at 0-1 and woman laughing at 1-3, 3-5", "frequencyCaption": "whistling one times and woman laughing two times"} +{"filepath": "data/multi_event_test/syn_88.wav", "onoffCaption": "sheep goat bleating at 1.0-3.0, 4.0-6.0", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_test/syn_92.wav", "onoffCaption": "door slamming at 0.0-1.0, 2.0-3.0, 4.0-5.0 and tapping clicking clanking at 6.0-7.0", "frequencyCaption": "door slamming three times and tapping clicking clanking one times"} +{"filepath": "data/multi_event_test/syn_95.wav", "onoffCaption": "door slamming at 0-1, 2-3, 4-5", "frequencyCaption": "door slamming three times"} +{"filepath": "data/multi_event_test/syn_102.wav", "onoffCaption": "door knocking at 1.973-5.029, 6.285-9.132", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_test/syn_105.wav", "onoffCaption": "train horn at 0.0-1.0, 2.0-3.0, 4.0-5.0", "frequencyCaption": "train horn three times"} +{"filepath": "data/multi_event_test/syn_111.wav", "onoffCaption": "whistling at 0.204-2.79, 4.0-6.586 and door slamming at 7.0-8.0", "frequencyCaption": "whistling two times and door slamming one times"} +{"filepath": "data/multi_event_test/syn_116.wav", "onoffCaption": "burping belching at 1.0-3.0, 4.0-6.0", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_test/syn_118.wav", "onoffCaption": "sneeze at 0.0-1.0", "frequencyCaption": "sneeze one times"} +{"filepath": "data/multi_event_test/syn_121.wav", "onoffCaption": "car horn honking at 0.0-1.0, 2.0-3.0, 4.0-5.0", "frequencyCaption": "car horn honking three times"} +{"filepath": "data/multi_event_test/syn_123.wav", "onoffCaption": "sheep goat bleating at 0.65-2.65, 3.65-5.65", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_test/syn_126.wav", "onoffCaption": "sneeze at 0.373-2.332 and car horn honking at 1.03-5.542, 6.081-10.0", "frequencyCaption": "sneeze one times and car horn honking two times"} +{"filepath": "data/multi_event_test/syn_128.wav", "onoffCaption": "sheep goat bleating at 1.0-3.0 and door knocking at 3.5-5.5, 6.0-8.0", "frequencyCaption": "sheep goat bleating one times and door knocking two times"} +{"filepath": "data/multi_event_test/syn_132.wav", "onoffCaption": "sheep goat bleating at 0.0-2.0 and spraying at 2.5-3.0, 4.0-4.5, 5.5-6.0 and duck quacking at 6.5-7.5, 8.0-9.0, 9.5-10.0", "frequencyCaption": "sheep goat bleating one times and spraying three times and duck quacking three times"} +{"filepath": "data/multi_event_test/syn_135.wav", "onoffCaption": "tapping clicking clanking at 1.0-3.0, 4.0-6.0", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_test/syn_139.wav", "onoffCaption": "thump thud at 1.017-4.684, 5.695-9.362", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_test/syn_143.wav", "onoffCaption": "spraying at 0.0-1.0 and explosion at 1.5-4.5", "frequencyCaption": "spraying one times and explosion one times"} +{"filepath": "data/multi_event_test/syn_144.wav", "onoffCaption": "duck quacking at 0-1, 2-3, 4-5", "frequencyCaption": "duck quacking three times"} +{"filepath": "data/multi_event_test/syn_150.wav", "onoffCaption": "gunshot at 0.0-2.0, 2.5-4.5, 5.0-7.0", "frequencyCaption": "gunshot three times"} +{"filepath": "data/multi_event_test/syn_157.wav", "onoffCaption": "train horn at 0-3.5", "frequencyCaption": "train horn one times"} +{"filepath": "data/multi_event_test/syn_159.wav", "onoffCaption": "door slamming at 0.145-1.085, 2.545-4.463", "frequencyCaption": "door slamming two times"} +{"filepath": "data/multi_event_test/syn_160.wav", "onoffCaption": "spraying at 0.0-1.0 and whistling at 1.0-3.0", "frequencyCaption": "spraying one times and whistling one times"} +{"filepath": "data/multi_event_test/syn_167.wav", "onoffCaption": "burping belching at 0.0-2.0, 2.5-4.5 and gunshot at 5.0-7.0", "frequencyCaption": "burping belching two times and gunshot one times"} +{"filepath": "data/multi_event_test/syn_169.wav", "onoffCaption": "sneeze at 0.373-2.332, 3.255-5.716", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_test/syn_173.wav", "onoffCaption": "sheep goat bleating at 0-1, 2-3, 4-5", "frequencyCaption": "sheep goat bleating three times"} +{"filepath": "data/multi_event_test/syn_174.wav", "onoffCaption": "dog barking at 0.0-2.0, 2.5-4.5, 5.0-7.0", "frequencyCaption": "dog barking three times"} +{"filepath": "data/multi_event_test/syn_176.wav", "onoffCaption": "woman laughing at 1.625-3.98, 4.735-6.981", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_test/syn_182.wav", "onoffCaption": "cow mooing at 0.0-3.0 and gunshot at 4.0-5.0", "frequencyCaption": "cow mooing one times and gunshot one times"} +{"filepath": "data/multi_event_test/syn_185.wav", "onoffCaption": "spraying at 0.22-0.847 and door knocking at 2.797-5.334", "frequencyCaption": "spraying one times and door knocking one times"} +{"filepath": "data/multi_event_test/syn_189.wav", "onoffCaption": "door slamming at 0.145-1.085, 2.545-4.463", "frequencyCaption": "door slamming two times"} +{"filepath": "data/multi_event_test/syn_191.wav", "onoffCaption": "burping belching at 0.0-2.0, 2.5-4.5", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_test/syn_193.wav", "onoffCaption": "cow mooing at 1.0-3.0, 4.0-6.0", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_test/syn_196.wav", "onoffCaption": "spraying at 0.0-1.0", "frequencyCaption": "spraying one times"} +{"filepath": "data/multi_event_test/syn_198.wav", "onoffCaption": "gunshot at 0.0-2.0, 3.0-5.0, 6.0-8.0", "frequencyCaption": "gunshot three times"} +{"filepath": "data/multi_event_test/syn_7.wav", "onoffCaption": "spraying at 0.0-1.0 and burping belching at 1.5-2.5", "frequencyCaption": "spraying one times and burping belching one times"} +{"filepath": "data/multi_event_test/syn_9.wav", "onoffCaption": "cow mooing at 0.0-3.0", "frequencyCaption": "cow mooing one times"} +{"filepath": "data/multi_event_test/syn_10.wav", "onoffCaption": "door knocking at 2-4, 5-7", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_test/syn_17.wav", "onoffCaption": "dog barking at 0-1, 2-3", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_test/syn_19.wav", "onoffCaption": "gunshot at 0.0-1.0 and spraying at 1.5-2.5", "frequencyCaption": "gunshot one times and spraying one times"} +{"filepath": "data/multi_event_test/syn_20.wav", "onoffCaption": "tapping clicking clanking at 1.0-3.0, 4.0-6.0", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_test/syn_27.wav", "onoffCaption": "dog barking at 0.464-2.464, 4.19-6.19", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_test/syn_29.wav", "onoffCaption": "tapping clicking clanking at 1.0-3.0, 4.0-6.0", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_test/syn_33.wav", "onoffCaption": "dog barking at 0.0-2.0, 2.5-4.5 and car horn honking at 5.0-7.0", "frequencyCaption": "dog barking two times and car horn honking one times"} +{"filepath": "data/multi_event_test/syn_34.wav", "onoffCaption": "sheep goat bleating at 1.575-3.575", "frequencyCaption": "sheep goat bleating one times"} +{"filepath": "data/multi_event_test/syn_42.wav", "onoffCaption": "tapping clicking clanking at 0.0-2.0, 2.5-4.5", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_test/syn_45.wav", "onoffCaption": "cat meowing at 0.5-1.5 and train horn at 2.0-6.0", "frequencyCaption": "cat meowing one times and train horn one times"} +{"filepath": "data/multi_event_test/syn_56.wav", "onoffCaption": "tapping clicking clanking at 0.961-4.401, 6.37-9.81", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_test/syn_58.wav", "onoffCaption": "door slamming at 0.355-2.581", "frequencyCaption": "door slamming one times"} +{"filepath": "data/multi_event_test/syn_61.wav", "onoffCaption": "explosion at 0.5-3.5, 4.0-7.0 and train horn at 7.5-10.0 and woman laughing at 1.0-4.0", "frequencyCaption": "explosion two times and train horn one times and woman laughing one times"} +{"filepath": "data/multi_event_test/syn_66.wav", "onoffCaption": "sheep goat bleating at 0.56-2.56", "frequencyCaption": "sheep goat bleating one times"} +{"filepath": "data/multi_event_test/syn_68.wav", "onoffCaption": "car horn honking at 0.0-2.0, 2.5-4.5, 5.0-7.0", "frequencyCaption": "car horn honking three times"} +{"filepath": "data/multi_event_test/syn_72.wav", "onoffCaption": "spraying at 0.0-0.6, 1.0-1.6 and thump thud at 2.0-3.6 and dog barking at 4.0-6.0", "frequencyCaption": "spraying two times and thump thud one times and dog barking one times"} +{"filepath": "data/multi_event_test/syn_75.wav", "onoffCaption": "explosion at 0.5-2.5, 2.501-4.501", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_test/syn_83.wav", "onoffCaption": "whistling at 2.158-10.0", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_test/syn_84.wav", "onoffCaption": "burping belching at 0.871-3.871, 4.642-7.642", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_test/syn_90.wav", "onoffCaption": "gunshot at 0.2-1.2", "frequencyCaption": "gunshot one times"} +{"filepath": "data/multi_event_test/syn_97.wav", "onoffCaption": "cat meowing at 0.5-1.5", "frequencyCaption": "cat meowing one times"} +{"filepath": "data/multi_event_test/syn_99.wav", "onoffCaption": "duck quacking at 0.0-2.0, 2.0-4.0", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_test/syn_100.wav", "onoffCaption": "cat meowing at 0.0-2.0 and sheep goat bleating at 3.0-5.0, 6.0-8.0, 9.0-10.0", "frequencyCaption": "cat meowing one times and sheep goat bleating three times"} +{"filepath": "data/multi_event_test/syn_107.wav", "onoffCaption": "spraying at 0.0-1.5, 2.0-3.5 and dog barking at 4.0-6.0, 7.0-9.0 and tapping clicking clanking at 1.6-3.1, 3.6-5.1", "frequencyCaption": "spraying two times and dog barking two times and tapping clicking clanking two times"} +{"filepath": "data/multi_event_test/syn_109.wav", "onoffCaption": "cow mooing at 0.0-3.0 and gunshot at 3.5-4.5", "frequencyCaption": "cow mooing one times and gunshot one times"} +{"filepath": "data/multi_event_test/syn_113.wav", "onoffCaption": "whistling at 0.742-5.917 and tapping clicking clanking at 2.992-6.432", "frequencyCaption": "whistling one times and tapping clicking clanking one times"} +{"filepath": "data/multi_event_test/syn_114.wav", "onoffCaption": "car horn honking at 0-2 and door knocking at 2-4", "frequencyCaption": "car horn honking one times and door knocking one times"} +{"filepath": "data/multi_event_test/syn_124.wav", "onoffCaption": "gunshot at 0.0-2.0, 3.0-5.0", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_test/syn_130.wav", "onoffCaption": "dog barking at 0.0-2.0, 2.5-4.5, 5.0-7.0", "frequencyCaption": "dog barking three times"} +{"filepath": "data/multi_event_test/syn_137.wav", "onoffCaption": "door knocking at 0-1 and cow mooing at 2-3 and gunshot at 4-5", "frequencyCaption": "door knocking one times and cow mooing one times and gunshot one times"} +{"filepath": "data/multi_event_test/syn_141.wav", "onoffCaption": "sneeze at 0.33-1.403, 2.759-3.832", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_test/syn_146.wav", "onoffCaption": "sneeze at 0.0-1.0, 2.0-3.0 and cat meowing at 4.0-5.0", "frequencyCaption": "sneeze two times and cat meowing one times"} +{"filepath": "data/multi_event_test/syn_148.wav", "onoffCaption": "duck quacking at 0-1, 2-3 and cow mooing at 4-5", "frequencyCaption": "duck quacking two times and cow mooing one times"} +{"filepath": "data/multi_event_test/syn_152.wav", "onoffCaption": "tapping clicking clanking at 0.0-1.0, 1.5-2.5 and train horn at 3.0-7.0", "frequencyCaption": "tapping clicking clanking two times and train horn one times"} +{"filepath": "data/multi_event_test/syn_155.wav", "onoffCaption": "tapping clicking clanking at 0.0-1.0 and gunshot at 2.0-3.0 and cat meowing at 4.0-5.0", "frequencyCaption": "tapping clicking clanking one times and gunshot one times and cat meowing one times"} +{"filepath": "data/multi_event_test/syn_162.wav", "onoffCaption": "gunshot at 0.0-2.0, 2.5-4.5, 5.0-7.0", "frequencyCaption": "gunshot three times"} +{"filepath": "data/multi_event_test/syn_165.wav", "onoffCaption": "thump thud at 0.0-1.5, 2.0-3.5 and whistling at 4.0-7.0", "frequencyCaption": "thump thud two times and whistling one times"} +{"filepath": "data/multi_event_test/syn_171.wav", "onoffCaption": "spraying at 0.0-0.5, 1.5-2.0, 3.0-3.5 and thump thud at 4.0-5.0 and sheep goat bleating at 5.5-6.5, 7.0-8.0", "frequencyCaption": "spraying three times and thump thud one times and sheep goat bleating two times"} +{"filepath": "data/multi_event_test/syn_178.wav", "onoffCaption": "door slamming at 0.355-2.581 and woman laughing at 0.964-3.319", "frequencyCaption": "door slamming one times and woman laughing one times"} +{"filepath": "data/multi_event_test/syn_180.wav", "onoffCaption": "spraying at 0.0-1.0 and cow mooing at 2.0-5.0", "frequencyCaption": "spraying one times and cow mooing one times"} +{"filepath": "data/multi_event_test/syn_187.wav", "onoffCaption": "sneeze at 1.3-2.403, 4.759-6.442", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_test/syn_194.wav", "onoffCaption": "duck quacking at 0-1, 2-3", "frequencyCaption": "duck quacking two times"} diff --git a/picoaudio/data/meta_data/test-frequency-control_onoffFromGpt_single-event.json b/picoaudio/data/meta_data/test-frequency-control_onoffFromGpt_single-event.json new file mode 100644 index 0000000000000000000000000000000000000000..9310b20b6657974a4e9a39f0ddf9c0ab7c252c6f --- /dev/null +++ b/picoaudio/data/meta_data/test-frequency-control_onoffFromGpt_single-event.json @@ -0,0 +1,400 @@ +{"filepath": "data/single_event_multi_identity_test/syn_1.wav", "onoffCaption": "cat meowing at 1.674-5.019", "frequencyCaption": "cat meowing one times"} +{"filepath": "data/single_event_multi_identity_test/syn_6.wav", "onoffCaption": "tapping clicking clanking at 0.536-3.976", "frequencyCaption": "tapping clicking clanking one times"} +{"filepath": "data/single_event_multi_identity_test/syn_8.wav", "onoffCaption": "door slamming at 0-1", "frequencyCaption": "door slamming one times"} +{"filepath": "data/single_event_multi_identity_test/syn_11.wav", "onoffCaption": "dog barking at 0.464-2.464, 4.19-6.19", "frequencyCaption": "dog barking two times"} +{"filepath": "data/single_event_multi_identity_test/syn_16.wav", "onoffCaption": "thump thud at 0-1", "frequencyCaption": "thump thud one times"} +{"filepath": "data/single_event_multi_identity_test/syn_18.wav", "onoffCaption": "sheep goat bleating at 0.5-2.5", "frequencyCaption": "sheep goat bleating one times"} +{"filepath": "data/single_event_multi_identity_test/syn_21.wav", "onoffCaption": "sheep goat bleating at 0.56-2.56", "frequencyCaption": "sheep goat bleating one times"} +{"filepath": "data/single_event_multi_identity_test/syn_26.wav", "onoffCaption": "tapping clicking clanking at 0.536-3.976", "frequencyCaption": "tapping clicking clanking one times"} +{"filepath": "data/single_event_multi_identity_test/syn_28.wav", "onoffCaption": "sneeze at 0-1, 2-3", "frequencyCaption": "sneeze two times"} +{"filepath": "data/single_event_multi_identity_test/syn_32.wav", "onoffCaption": "cow mooing at 0-3.309", "frequencyCaption": "cow mooing one times"} +{"filepath": "data/single_event_multi_identity_test/syn_35.wav", "onoffCaption": "door slamming at 0.145-1.085, 2.545-4.463", "frequencyCaption": "door slamming two times"} +{"filepath": "data/single_event_multi_identity_test/syn_43.wav", "onoffCaption": "thump thud at 0-1, 2-3", "frequencyCaption": "thump thud two times"} +{"filepath": "data/single_event_multi_identity_test/syn_44.wav", "onoffCaption": "burping belching at 0-1, 2-3, 4-5", "frequencyCaption": "burping belching three times"} +{"filepath": "data/single_event_multi_identity_test/syn_50.wav", "onoffCaption": "car horn honking at 1.0-3.0, 4.0-6.0", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/single_event_multi_identity_test/syn_57.wav", "onoffCaption": "train horn at 0-3", "frequencyCaption": "train horn one times"} +{"filepath": "data/single_event_multi_identity_test/syn_59.wav", "onoffCaption": "woman laughing at 2.0-4.5, 5.0-7.5", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/single_event_multi_identity_test/syn_60.wav", "onoffCaption": "cat meowing at 1-2, 3-4", "frequencyCaption": "cat meowing two times"} +{"filepath": "data/single_event_multi_identity_test/syn_67.wav", "onoffCaption": "cow mooing at 0-3", "frequencyCaption": "cow mooing one times"} +{"filepath": "data/single_event_multi_identity_test/syn_69.wav", "onoffCaption": "burping belching at 2.0-3.0", "frequencyCaption": "burping belching one times"} +{"filepath": "data/single_event_multi_identity_test/syn_73.wav", "onoffCaption": "whistling at 0-1", "frequencyCaption": "whistling one times"} +{"filepath": "data/single_event_multi_identity_test/syn_74.wav", "onoffCaption": "cat meowing at 0-1", "frequencyCaption": "cat meowing one times"} +{"filepath": "data/single_event_multi_identity_test/syn_82.wav", "onoffCaption": "thump thud at 1.017-4.684, 5.695-8.362", "frequencyCaption": "thump thud two times"} +{"filepath": "data/single_event_multi_identity_test/syn_91.wav", "onoffCaption": "duck quacking at 0-1", "frequencyCaption": "duck quacking one times"} +{"filepath": "data/single_event_multi_identity_test/syn_96.wav", "onoffCaption": "cat meowing at 0-1, 2-3, 4-5", "frequencyCaption": "cat meowing three times"} +{"filepath": "data/single_event_multi_identity_test/syn_98.wav", "onoffCaption": "woman laughing at 0.0-2.0, 2.5-4.5", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/single_event_multi_identity_test/syn_101.wav", "onoffCaption": "burping belching at 0.871-1.871, 2.871-3.871", "frequencyCaption": "burping belching two times"} +{"filepath": "data/single_event_multi_identity_test/syn_106.wav", "onoffCaption": "gunshot at 0-1", "frequencyCaption": "gunshot one times"} +{"filepath": "data/single_event_multi_identity_test/syn_108.wav", "onoffCaption": "cat meowing at 0.0-2.0, 2.5-4.5", "frequencyCaption": "cat meowing two times"} +{"filepath": "data/single_event_multi_identity_test/syn_112.wav", "onoffCaption": "train horn at 0-1", "frequencyCaption": "train horn one times"} +{"filepath": "data/single_event_multi_identity_test/syn_115.wav", "onoffCaption": "sheep goat bleating at 0.5-2.5, 3.0-5.0", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/single_event_multi_identity_test/syn_122.wav", "onoffCaption": "tapping clicking clanking at 1.0-4.0, 5.0-8.0", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/single_event_multi_identity_test/syn_125.wav", "onoffCaption": "car horn honking at 0.0-2.0, 3.0-5.0", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/single_event_multi_identity_test/syn_131.wav", "onoffCaption": "cow mooing at 1.954-4.602, 6.719-9.729", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/single_event_multi_identity_test/syn_136.wav", "onoffCaption": "tapping clicking clanking at 1-3, 6-8", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/single_event_multi_identity_test/syn_138.wav", "onoffCaption": "burping belching at 0-1, 2-3, 4-5", "frequencyCaption": "burping belching three times"} +{"filepath": "data/single_event_multi_identity_test/syn_140.wav", "onoffCaption": "duck quacking at 0-1, 2-3", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/single_event_multi_identity_test/syn_147.wav", "onoffCaption": "burping belching at 0.5-2.5, 3.5-5.5", "frequencyCaption": "burping belching two times"} +{"filepath": "data/single_event_multi_identity_test/syn_149.wav", "onoffCaption": "gunshot at 0-1", "frequencyCaption": "gunshot one times"} +{"filepath": "data/single_event_multi_identity_test/syn_153.wav", "onoffCaption": "cow mooing at 0-1, 2-3", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/single_event_multi_identity_test/syn_154.wav", "onoffCaption": "train horn at 0-1, 2-3", "frequencyCaption": "train horn two times"} +{"filepath": "data/single_event_multi_identity_test/syn_163.wav", "onoffCaption": "cow mooing at 1.954-4.602, 6.719-9.729", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/single_event_multi_identity_test/syn_164.wav", "onoffCaption": "door slamming at 0.145-1.085, 2.545-4.463", "frequencyCaption": "door slamming two times"} +{"filepath": "data/single_event_multi_identity_test/syn_170.wav", "onoffCaption": "whistling at 0-1", "frequencyCaption": "whistling one times"} +{"filepath": "data/single_event_multi_identity_test/syn_177.wav", "onoffCaption": "door knocking at 1-2, 3-4", "frequencyCaption": "door knocking two times"} +{"filepath": "data/single_event_multi_identity_test/syn_179.wav", "onoffCaption": "gunshot at 0.0-2.0", "frequencyCaption": "gunshot one times"} +{"filepath": "data/single_event_multi_identity_test/syn_181.wav", "onoffCaption": "door knocking at 0-1.0", "frequencyCaption": "door knocking one times"} +{"filepath": "data/single_event_multi_identity_test/syn_186.wav", "onoffCaption": "sneeze at 0.3-1.3, 2.3-3.3", "frequencyCaption": "sneeze two times"} +{"filepath": "data/single_event_multi_identity_test/syn_188.wav", "onoffCaption": "explosion at 0-2", "frequencyCaption": "explosion one times"} +{"filepath": "data/single_event_multi_identity_test/syn_192.wav", "onoffCaption": "cat meowing at 0.0-2.0, 2.5-4.5", "frequencyCaption": "cat meowing two times"} +{"filepath": "data/single_event_multi_identity_test/syn_195.wav", "onoffCaption": "duck quacking at 0-1, 2-3", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/single_event_multi_identity_test/syn_3.wav", "onoffCaption": "burping belching at 0.5-2.5, 3.0-5.0", "frequencyCaption": "burping belching two times"} +{"filepath": "data/single_event_multi_identity_test/syn_4.wav", "onoffCaption": "cat meowing at 0-1", "frequencyCaption": "cat meowing one times"} +{"filepath": "data/single_event_multi_identity_test/syn_13.wav", "onoffCaption": "tapping clicking clanking at 0.032-1.032, 2.032-3.032, 4.032-5.032", "frequencyCaption": "tapping clicking clanking three times"} +{"filepath": "data/single_event_multi_identity_test/syn_14.wav", "onoffCaption": "tapping clicking clanking at 0-1, 2-3", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/single_event_multi_identity_test/syn_23.wav", "onoffCaption": "cow mooing at 1.954-4.602, 6.719-9.729", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/single_event_multi_identity_test/syn_24.wav", "onoffCaption": "thump thud at 1-2, 3-4", "frequencyCaption": "thump thud two times"} +{"filepath": "data/single_event_multi_identity_test/syn_30.wav", "onoffCaption": "explosion at 0-1, 2-3", "frequencyCaption": "explosion two times"} +{"filepath": "data/single_event_multi_identity_test/syn_37.wav", "onoffCaption": "tapping clicking clanking at 0.5-2.5, 3-5", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/single_event_multi_identity_test/syn_39.wav", "onoffCaption": "burping belching at 0.5-2.5, 3.0-5.0", "frequencyCaption": "burping belching two times"} +{"filepath": "data/single_event_multi_identity_test/syn_41.wav", "onoffCaption": "car horn honking at 1-2, 3-4", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/single_event_multi_identity_test/syn_48.wav", "onoffCaption": "train horn at 0-1", "frequencyCaption": "train horn one times"} +{"filepath": "data/single_event_multi_identity_test/syn_52.wav", "onoffCaption": "dog barking at 0-1, 2-3", "frequencyCaption": "dog barking two times"} +{"filepath": "data/single_event_multi_identity_test/syn_55.wav", "onoffCaption": "spraying at 0-1, 2-3, 4-5", "frequencyCaption": "spraying three times"} +{"filepath": "data/single_event_multi_identity_test/syn_62.wav", "onoffCaption": "woman laughing at 2.782-5.368, 6.831-8.912", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/single_event_multi_identity_test/syn_65.wav", "onoffCaption": "tapping clicking clanking at 1.0-3.0, 4.0-6.0", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/single_event_multi_identity_test/syn_71.wav", "onoffCaption": "train horn at 0-1", "frequencyCaption": "train horn one times"} +{"filepath": "data/single_event_multi_identity_test/syn_76.wav", "onoffCaption": "door knocking at 0-1", "frequencyCaption": "door knocking one times"} +{"filepath": "data/single_event_multi_identity_test/syn_78.wav", "onoffCaption": "door knocking at 1-2, 3-4", "frequencyCaption": "door knocking two times"} +{"filepath": "data/single_event_multi_identity_test/syn_80.wav", "onoffCaption": "car horn honking at 0-1", "frequencyCaption": "car horn honking one times"} +{"filepath": "data/single_event_multi_identity_test/syn_85.wav", "onoffCaption": "gunshot at 0.0-2.0, 3.0-5.0, 6.0-8.0", "frequencyCaption": "gunshot three times"} +{"filepath": "data/single_event_multi_identity_test/syn_87.wav", "onoffCaption": "thump thud at 1.017-4.684, 5.695-9.362", "frequencyCaption": "thump thud two times"} +{"filepath": "data/single_event_multi_identity_test/syn_89.wav", "onoffCaption": "door knocking at 1.973-5.029, 6.285-9.132", "frequencyCaption": "door knocking two times"} +{"filepath": "data/single_event_multi_identity_test/syn_93.wav", "onoffCaption": "whistling at 2.158-10.0", "frequencyCaption": "whistling one times"} +{"filepath": "data/single_event_multi_identity_test/syn_94.wav", "onoffCaption": "burping belching at 0.871-2.871, 5.218-7.218", "frequencyCaption": "burping belching two times"} +{"filepath": "data/single_event_multi_identity_test/syn_103.wav", "onoffCaption": "dog barking at 0-1, 2-3, 4-5", "frequencyCaption": "dog barking three times"} +{"filepath": "data/single_event_multi_identity_test/syn_104.wav", "onoffCaption": "thump thud at 0-2, 3-5", "frequencyCaption": "thump thud two times"} +{"filepath": "data/single_event_multi_identity_test/syn_110.wav", "onoffCaption": "whistling at 0-1", "frequencyCaption": "whistling one times"} +{"filepath": "data/single_event_multi_identity_test/syn_117.wav", "onoffCaption": "tapping clicking clanking at 0-2", "frequencyCaption": "tapping clicking clanking one times"} +{"filepath": "data/single_event_multi_identity_test/syn_119.wav", "onoffCaption": "duck quacking at 0.235-2.235, 3.085-5.085", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/single_event_multi_identity_test/syn_120.wav", "onoffCaption": "dog barking at 0-1, 2-3, 4-5", "frequencyCaption": "dog barking three times"} +{"filepath": "data/single_event_multi_identity_test/syn_127.wav", "onoffCaption": "duck quacking at 0-1, 2-3", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/single_event_multi_identity_test/syn_129.wav", "onoffCaption": "burping belching at 0.5-1.5, 2.5-3.5", "frequencyCaption": "burping belching two times"} +{"filepath": "data/single_event_multi_identity_test/syn_133.wav", "onoffCaption": "train horn at 0-3", "frequencyCaption": "train horn one times"} +{"filepath": "data/single_event_multi_identity_test/syn_134.wav", "onoffCaption": "spraying at 0.0-1.0, 2.0-3.0, 4.0-5.0", "frequencyCaption": "spraying three times"} +{"filepath": "data/single_event_multi_identity_test/syn_142.wav", "onoffCaption": "cow mooing at 1-2, 3-4", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/single_event_multi_identity_test/syn_145.wav", "onoffCaption": "cow mooing at 0.0-2.0, 3.0-5.0", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/single_event_multi_identity_test/syn_151.wav", "onoffCaption": "duck quacking at 0-1, 2-3", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/single_event_multi_identity_test/syn_156.wav", "onoffCaption": "thump thud at 0.0-2.0, 2.5-4.5, 5.0-7.0", "frequencyCaption": "thump thud three times"} +{"filepath": "data/single_event_multi_identity_test/syn_158.wav", "onoffCaption": "burping belching at 1.5-3.5", "frequencyCaption": "burping belching one times"} +{"filepath": "data/single_event_multi_identity_test/syn_161.wav", "onoffCaption": "car horn honking at 1.5-3.5, 4.0-6.0", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/single_event_multi_identity_test/syn_166.wav", "onoffCaption": "burping belching at 0-1, 1-2, 2-3", "frequencyCaption": "burping belching three times"} +{"filepath": "data/single_event_multi_identity_test/syn_168.wav", "onoffCaption": "door slamming at 0-1, 2-3, 4-5", "frequencyCaption": "door slamming three times"} +{"filepath": "data/single_event_multi_identity_test/syn_172.wav", "onoffCaption": "woman laughing at 0.0-2.0", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/single_event_multi_identity_test/syn_175.wav", "onoffCaption": "spraying at 0-1", "frequencyCaption": "spraying one times"} +{"filepath": "data/single_event_multi_identity_test/syn_183.wav", "onoffCaption": "woman laughing at 2.782-5.368, 6.831-8.912", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/single_event_multi_identity_test/syn_184.wav", "onoffCaption": "door slamming at 0.145-1.085, 2.545-4.463", "frequencyCaption": "door slamming two times"} +{"filepath": "data/single_event_multi_identity_test/syn_190.wav", "onoffCaption": "explosion at 1.773-4.034, 5.15-7.411", "frequencyCaption": "explosion two times"} +{"filepath": "data/single_event_multi_identity_test/syn_197.wav", "onoffCaption": "car horn honking at 1.817-4.404, 5.85-8.437", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/single_event_multi_identity_test/syn_199.wav", "onoffCaption": "car horn honking at 0.664-3.129", "frequencyCaption": "car horn honking one times"} +{"filepath": "data/single_event_multi_identity_test/syn_200.wav", "onoffCaption": "train horn at 0-2", "frequencyCaption": "train horn one times"} +{"filepath": "data/single_event_multi_identity_test/syn_2.wav", "onoffCaption": "cat meowing at 0.5-2.5", "frequencyCaption": "cat meowing one times"} +{"filepath": "data/single_event_multi_identity_test/syn_5.wav", "onoffCaption": "dog barking at 0.464-2.464, 4.19-6.19", "frequencyCaption": "dog barking two times"} +{"filepath": "data/single_event_multi_identity_test/syn_12.wav", "onoffCaption": "tapping clicking clanking at 0.536-3.976", "frequencyCaption": "tapping clicking clanking one times"} +{"filepath": "data/single_event_multi_identity_test/syn_15.wav", "onoffCaption": "explosion at 1.773-4.034, 5.15-7.411", "frequencyCaption": "explosion two times"} +{"filepath": "data/single_event_multi_identity_test/syn_22.wav", "onoffCaption": "sheep goat bleating at 0-1, 2-3, 4-5", "frequencyCaption": "sheep goat bleating three times"} +{"filepath": "data/single_event_multi_identity_test/syn_25.wav", "onoffCaption": "gunshot at 0.0-2.0, 2.1-4.1", "frequencyCaption": "gunshot two times"} +{"filepath": "data/single_event_multi_identity_test/syn_31.wav", "onoffCaption": "sneeze at 0.0-1.0", "frequencyCaption": "sneeze one times"} +{"filepath": "data/single_event_multi_identity_test/syn_36.wav", "onoffCaption": "sheep goat bleating at 0-1, 1-2", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/single_event_multi_identity_test/syn_38.wav", "onoffCaption": "whistling at 0-1", "frequencyCaption": "whistling one times"} +{"filepath": "data/single_event_multi_identity_test/syn_40.wav", "onoffCaption": "woman laughing at 0.0-2.0, 2.5-4.5", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/single_event_multi_identity_test/syn_46.wav", "onoffCaption": "tapping clicking clanking at 1.5-5.0", "frequencyCaption": "tapping clicking clanking one times"} +{"filepath": "data/single_event_multi_identity_test/syn_47.wav", "onoffCaption": "cow mooing at 0.0-2.0, 3.0-5.0, 6.0-8.0", "frequencyCaption": "cow mooing three times"} +{"filepath": "data/single_event_multi_identity_test/syn_49.wav", "onoffCaption": "dog barking at 0-1", "frequencyCaption": "dog barking one times"} +{"filepath": "data/single_event_multi_identity_test/syn_51.wav", "onoffCaption": "whistling at 0-1", "frequencyCaption": "whistling one times"} +{"filepath": "data/single_event_multi_identity_test/syn_53.wav", "onoffCaption": "whistling at 0-1, 2-3", "frequencyCaption": "whistling two times"} +{"filepath": "data/single_event_multi_identity_test/syn_54.wav", "onoffCaption": "cow mooing at 1.954-6.383, 7.52-10.0", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/single_event_multi_identity_test/syn_63.wav", "onoffCaption": "explosion at 0.0-3.0", "frequencyCaption": "explosion one times"} +{"filepath": "data/single_event_multi_identity_test/syn_64.wav", "onoffCaption": "whistling at 0-1, 2-3", "frequencyCaption": "whistling two times"} +{"filepath": "data/single_event_multi_identity_test/syn_70.wav", "onoffCaption": "dog barking at 1-2", "frequencyCaption": "dog barking one times"} +{"filepath": "data/single_event_multi_identity_test/syn_77.wav", "onoffCaption": "train horn at 0-3", "frequencyCaption": "train horn one times"} +{"filepath": "data/single_event_multi_identity_test/syn_79.wav", "onoffCaption": "train horn at 0-2.5", "frequencyCaption": "train horn one times"} +{"filepath": "data/single_event_multi_identity_test/syn_81.wav", "onoffCaption": "tapping clicking clanking at 0-1", "frequencyCaption": "tapping clicking clanking one times"} +{"filepath": "data/single_event_multi_identity_test/syn_86.wav", "onoffCaption": "gunshot at 0.0-2.0, 2.1-4.1", "frequencyCaption": "gunshot two times"} +{"filepath": "data/single_event_multi_identity_test/syn_88.wav", "onoffCaption": "car horn honking at 0.0-1.0, 2.0-3.0, 4.0-5.0", "frequencyCaption": "car horn honking three times"} +{"filepath": "data/single_event_multi_identity_test/syn_92.wav", "onoffCaption": "door slamming at 0-1, 2-4", "frequencyCaption": "door slamming two times"} +{"filepath": "data/single_event_multi_identity_test/syn_95.wav", "onoffCaption": "woman laughing at 1.5-3.5, 4.0-6.0", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/single_event_multi_identity_test/syn_102.wav", "onoffCaption": "duck quacking at 0-1, 2-3", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/single_event_multi_identity_test/syn_105.wav", "onoffCaption": "door slamming at 0.355-2.581", "frequencyCaption": "door slamming one times"} +{"filepath": "data/single_event_multi_identity_test/syn_111.wav", "onoffCaption": "dog barking at 0-1, 2-3", "frequencyCaption": "dog barking two times"} +{"filepath": "data/single_event_multi_identity_test/syn_116.wav", "onoffCaption": "sheep goat bleating at 0-1", "frequencyCaption": "sheep goat bleating one times"} +{"filepath": "data/single_event_multi_identity_test/syn_118.wav", "onoffCaption": "sheep goat bleating at 0.5-1.5", "frequencyCaption": "sheep goat bleating one times"} +{"filepath": "data/single_event_multi_identity_test/syn_121.wav", "onoffCaption": "cat meowing at 0-1", "frequencyCaption": "cat meowing one times"} +{"filepath": "data/single_event_multi_identity_test/syn_123.wav", "onoffCaption": "sheep goat bleating at 0.0-2.0, 3.0-5.0", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/single_event_multi_identity_test/syn_126.wav", "onoffCaption": "burping belching at 0.5-2.5, 3.0-5.0", "frequencyCaption": "burping belching two times"} +{"filepath": "data/single_event_multi_identity_test/syn_128.wav", "onoffCaption": "train horn at 0-1", "frequencyCaption": "train horn one times"} +{"filepath": "data/single_event_multi_identity_test/syn_132.wav", "onoffCaption": "duck quacking at 0-1, 2-3", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/single_event_multi_identity_test/syn_135.wav", "onoffCaption": "whistling at 0-1, 2-3", "frequencyCaption": "whistling two times"} +{"filepath": "data/single_event_multi_identity_test/syn_139.wav", "onoffCaption": "spraying at 0-1, 1-2", "frequencyCaption": "spraying two times"} +{"filepath": "data/single_event_multi_identity_test/syn_143.wav", "onoffCaption": "door knocking at 0.645-2.772, 3.875-6.782, 7.405-9.692", "frequencyCaption": "door knocking three times"} +{"filepath": "data/single_event_multi_identity_test/syn_144.wav", "onoffCaption": "spraying at 0-1, 2-3, 4-5", "frequencyCaption": "spraying three times"} +{"filepath": "data/single_event_multi_identity_test/syn_150.wav", "onoffCaption": "duck quacking at 0-1", "frequencyCaption": "duck quacking one times"} +{"filepath": "data/single_event_multi_identity_test/syn_157.wav", "onoffCaption": "explosion at 0.5-1.5, 2-3", "frequencyCaption": "explosion two times"} +{"filepath": "data/single_event_multi_identity_test/syn_159.wav", "onoffCaption": "sneeze at 0.5-1.5, 2.5-3.5", "frequencyCaption": "sneeze two times"} +{"filepath": "data/single_event_multi_identity_test/syn_160.wav", "onoffCaption": "woman laughing at 0.0-2.0", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/single_event_multi_identity_test/syn_167.wav", "onoffCaption": "thump thud at 1.017-4.684, 5.695-9.362", "frequencyCaption": "thump thud two times"} +{"filepath": "data/single_event_multi_identity_test/syn_169.wav", "onoffCaption": "gunshot at 0.0-2.0", "frequencyCaption": "gunshot one times"} +{"filepath": "data/single_event_multi_identity_test/syn_173.wav", "onoffCaption": "explosion at 0-3", "frequencyCaption": "explosion one times"} +{"filepath": "data/single_event_multi_identity_test/syn_174.wav", "onoffCaption": "duck quacking at 0.2-2.2, 3.2-5.2", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/single_event_multi_identity_test/syn_176.wav", "onoffCaption": "gunshot at 0.0-2.0, 2.5-4.5", "frequencyCaption": "gunshot two times"} +{"filepath": "data/single_event_multi_identity_test/syn_182.wav", "onoffCaption": "car horn honking at 0.653-3.872", "frequencyCaption": "car horn honking one times"} +{"filepath": "data/single_event_multi_identity_test/syn_185.wav", "onoffCaption": "dog barking at 0-2, 3-5", "frequencyCaption": "dog barking two times"} +{"filepath": "data/single_event_multi_identity_test/syn_189.wav", "onoffCaption": "burping belching at 0-1, 2-3", "frequencyCaption": "burping belching two times"} +{"filepath": "data/single_event_multi_identity_test/syn_191.wav", "onoffCaption": "tapping clicking clanking at 0.0-4.0", "frequencyCaption": "tapping clicking clanking one times"} +{"filepath": "data/single_event_multi_identity_test/syn_193.wav", "onoffCaption": "dog barking at 0-1", "frequencyCaption": "dog barking one times"} +{"filepath": "data/single_event_multi_identity_test/syn_196.wav", "onoffCaption": "duck quacking at 0-1", "frequencyCaption": "duck quacking one times"} +{"filepath": "data/single_event_multi_identity_test/syn_198.wav", "onoffCaption": "sheep goat bleating at 0.56-2.56", "frequencyCaption": "sheep goat bleating one times"} +{"filepath": "data/single_event_multi_identity_test/syn_7.wav", "onoffCaption": "door slamming at 0-2, 2-4", "frequencyCaption": "door slamming two times"} +{"filepath": "data/single_event_multi_identity_test/syn_9.wav", "onoffCaption": "sneeze at 0.5-1.5, 2.0-3.0", "frequencyCaption": "sneeze two times"} +{"filepath": "data/single_event_multi_identity_test/syn_10.wav", "onoffCaption": "door slamming at 0.0-1.0", "frequencyCaption": "door slamming one times"} +{"filepath": "data/single_event_multi_identity_test/syn_17.wav", "onoffCaption": "gunshot at 0.0-2.0", "frequencyCaption": "gunshot one times"} +{"filepath": "data/single_event_multi_identity_test/syn_19.wav", "onoffCaption": "thump thud at 1.9-4.5, 5.5-8.1", "frequencyCaption": "thump thud two times"} +{"filepath": "data/single_event_multi_identity_test/syn_20.wav", "onoffCaption": "dog barking at 0.0-2.0, 2.5-4.5, 5.0-7.0", "frequencyCaption": "dog barking three times"} +{"filepath": "data/single_event_multi_identity_test/syn_27.wav", "onoffCaption": "whistling at 0-1, 2-3", "frequencyCaption": "whistling two times"} +{"filepath": "data/single_event_multi_identity_test/syn_29.wav", "onoffCaption": "woman laughing at 0-1", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/single_event_multi_identity_test/syn_33.wav", "onoffCaption": "dog barking at 0.464-2.464, 4.19-6.19", "frequencyCaption": "dog barking two times"} +{"filepath": "data/single_event_multi_identity_test/syn_34.wav", "onoffCaption": "dog barking at 0-1, 1.5-2.5, 3-4", "frequencyCaption": "dog barking three times"} +{"filepath": "data/single_event_multi_identity_test/syn_42.wav", "onoffCaption": "door slamming at 0.145-1.085, 2.545-4.463", "frequencyCaption": "door slamming two times"} +{"filepath": "data/single_event_multi_identity_test/syn_45.wav", "onoffCaption": "woman laughing at 0-1", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/single_event_multi_identity_test/syn_56.wav", "onoffCaption": "cat meowing at 0-1", "frequencyCaption": "cat meowing one times"} +{"filepath": "data/single_event_multi_identity_test/syn_58.wav", "onoffCaption": "spraying at 0.5-1.5", "frequencyCaption": "spraying one times"} +{"filepath": "data/single_event_multi_identity_test/syn_61.wav", "onoffCaption": "sheep goat bleating at 0.8-2.8, 3.8-5.8", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/single_event_multi_identity_test/syn_66.wav", "onoffCaption": "duck quacking at 0-1", "frequencyCaption": "duck quacking one times"} +{"filepath": "data/single_event_multi_identity_test/syn_68.wav", "onoffCaption": "door slamming at 0.355-2.581", "frequencyCaption": "door slamming one times"} +{"filepath": "data/single_event_multi_identity_test/syn_72.wav", "onoffCaption": "duck quacking at 0-1", "frequencyCaption": "duck quacking one times"} +{"filepath": "data/single_event_multi_identity_test/syn_75.wav", "onoffCaption": "door slamming at 0-1, 2-3, 4-5", "frequencyCaption": "door slamming three times"} +{"filepath": "data/single_event_multi_identity_test/syn_83.wav", "onoffCaption": "spraying at 0-1, 2-3, 4-5", "frequencyCaption": "spraying three times"} +{"filepath": "data/single_event_multi_identity_test/syn_84.wav", "onoffCaption": "burping belching at 0-3", "frequencyCaption": "burping belching one times"} +{"filepath": "data/single_event_multi_identity_test/syn_90.wav", "onoffCaption": "whistling at 1-2", "frequencyCaption": "whistling one times"} +{"filepath": "data/single_event_multi_identity_test/syn_97.wav", "onoffCaption": "dog barking at 0-1", "frequencyCaption": "dog barking one times"} +{"filepath": "data/single_event_multi_identity_test/syn_99.wav", "onoffCaption": "gunshot at 0.5-2.5, 3.0-5.0", "frequencyCaption": "gunshot two times"} +{"filepath": "data/single_event_multi_identity_test/syn_100.wav", "onoffCaption": "gunshot at 0-1, 2-3", "frequencyCaption": "gunshot two times"} +{"filepath": "data/single_event_multi_identity_test/syn_107.wav", "onoffCaption": "cat meowing at 0-1, 2-3, 4-5", "frequencyCaption": "cat meowing three times"} +{"filepath": "data/single_event_multi_identity_test/syn_109.wav", "onoffCaption": "cat meowing at 0-1, 2-3, 4-5", "frequencyCaption": "cat meowing three times"} +{"filepath": "data/single_event_multi_identity_test/syn_113.wav", "onoffCaption": "door slamming at 0.145-1.085, 2.545-4.463", "frequencyCaption": "door slamming two times"} +{"filepath": "data/single_event_multi_identity_test/syn_114.wav", "onoffCaption": "explosion at 0.0-3.0", "frequencyCaption": "explosion one times"} +{"filepath": "data/single_event_multi_identity_test/syn_124.wav", "onoffCaption": "woman laughing at 0-1, 1-2", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/single_event_multi_identity_test/syn_130.wav", "onoffCaption": "gunshot at 0.0-2.0", "frequencyCaption": "gunshot one times"} +{"filepath": "data/single_event_multi_identity_test/syn_137.wav", "onoffCaption": "train horn at 0-1, 2-3, 4-5", "frequencyCaption": "train horn three times"} +{"filepath": "data/single_event_multi_identity_test/syn_141.wav", "onoffCaption": "woman laughing at 0.5-3.5", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/single_event_multi_identity_test/syn_146.wav", "onoffCaption": "sneeze at 0.8-1.8", "frequencyCaption": "sneeze one times"} +{"filepath": "data/single_event_multi_identity_test/syn_148.wav", "onoffCaption": "dog barking at 0-1, 2-3, 4-5", "frequencyCaption": "dog barking three times"} +{"filepath": "data/single_event_multi_identity_test/syn_152.wav", "onoffCaption": "dog barking at 0-1", "frequencyCaption": "dog barking one times"} +{"filepath": "data/single_event_multi_identity_test/syn_155.wav", "onoffCaption": "spraying at 0.033-1.519", "frequencyCaption": "spraying one times"} +{"filepath": "data/single_event_multi_identity_test/syn_162.wav", "onoffCaption": "explosion at 0-1, 2-3", "frequencyCaption": "explosion two times"} +{"filepath": "data/single_event_multi_identity_test/syn_165.wav", "onoffCaption": "whistling at 2.158-10.0", "frequencyCaption": "whistling one times"} +{"filepath": "data/single_event_multi_identity_test/syn_171.wav", "onoffCaption": "duck quacking at 0-1", "frequencyCaption": "duck quacking one times"} +{"filepath": "data/single_event_multi_identity_test/syn_178.wav", "onoffCaption": "tapping clicking clanking at 1-3, 4-6", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/single_event_multi_identity_test/syn_180.wav", "onoffCaption": "cow mooing at 0-3", "frequencyCaption": "cow mooing one times"} +{"filepath": "data/single_event_multi_identity_test/syn_187.wav", "onoffCaption": "explosion at 1.5-4.5", "frequencyCaption": "explosion one times"} +{"filepath": "data/single_event_multi_identity_test/syn_194.wav", "onoffCaption": "gunshot at 0-1", "frequencyCaption": "gunshot one times"} +{"filepath": "data/single_event_single_identity_test/syn_11.wav", "onoffCaption": "door knocking at 1-2, 3-4", "frequencyCaption": "door knocking two times"} +{"filepath": "data/single_event_single_identity_test/syn_16.wav", "onoffCaption": "burping belching at 0.5-3.5", "frequencyCaption": "burping belching one times"} +{"filepath": "data/single_event_single_identity_test/syn_18.wav", "onoffCaption": "burping belching at 1.0-2.0", "frequencyCaption": "burping belching one times"} +{"filepath": "data/single_event_single_identity_test/syn_21.wav", "onoffCaption": "burping belching at 0.5-1.5", "frequencyCaption": "burping belching one times"} +{"filepath": "data/single_event_single_identity_test/syn_26.wav", "onoffCaption": "spraying at 0-1", "frequencyCaption": "spraying one times"} +{"filepath": "data/single_event_single_identity_test/syn_28.wav", "onoffCaption": "train horn at 0-3", "frequencyCaption": "train horn one times"} +{"filepath": "data/single_event_single_identity_test/syn_32.wav", "onoffCaption": "gunshot at 0-1, 2-3, 4-5", "frequencyCaption": "gunshot three times"} +{"filepath": "data/single_event_single_identity_test/syn_35.wav", "onoffCaption": "woman laughing at 0-2, 3-5", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/single_event_single_identity_test/syn_43.wav", "onoffCaption": "door slamming at 0.145-1.085, 2.545-4.463", "frequencyCaption": "door slamming two times"} +{"filepath": "data/single_event_single_identity_test/syn_44.wav", "onoffCaption": "dog barking at 0-1, 2-3, 4-5", "frequencyCaption": "dog barking three times"} +{"filepath": "data/single_event_single_identity_test/syn_50.wav", "onoffCaption": "door knocking at 0-1, 2-3", "frequencyCaption": "door knocking two times"} +{"filepath": "data/single_event_single_identity_test/syn_57.wav", "onoffCaption": "train horn at 0-1", "frequencyCaption": "train horn one times"} +{"filepath": "data/single_event_single_identity_test/syn_59.wav", "onoffCaption": "gunshot at 0-1, 2-3, 4-5", "frequencyCaption": "gunshot three times"} +{"filepath": "data/single_event_single_identity_test/syn_60.wav", "onoffCaption": "cow mooing at 0.0-2.0, 2.5-4.5", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/single_event_single_identity_test/syn_67.wav", "onoffCaption": "cow mooing at 1.0-3.0, 4.0-6.0", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/single_event_single_identity_test/syn_69.wav", "onoffCaption": "burping belching at 0.0-2.0, 2.1-4.1", "frequencyCaption": "burping belching two times"} +{"filepath": "data/single_event_single_identity_test/syn_73.wav", "onoffCaption": "dog barking at 0-1, 2-3, 4-5", "frequencyCaption": "dog barking three times"} +{"filepath": "data/single_event_single_identity_test/syn_74.wav", "onoffCaption": "cow mooing at 0-2, 3-5", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/single_event_single_identity_test/syn_82.wav", "onoffCaption": "woman laughing at 2.0-6.0", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/single_event_single_identity_test/syn_85.wav", "onoffCaption": "dog barking at 0-1", "frequencyCaption": "dog barking one times"} +{"filepath": "data/single_event_single_identity_test/syn_91.wav", "onoffCaption": "tapping clicking clanking at 1.0-4.0", "frequencyCaption": "tapping clicking clanking one times"} +{"filepath": "data/single_event_single_identity_test/syn_96.wav", "onoffCaption": "door knocking at 0-1", "frequencyCaption": "door knocking one times"} +{"filepath": "data/single_event_single_identity_test/syn_98.wav", "onoffCaption": "door slamming at 0.355-2.581", "frequencyCaption": "door slamming one times"} +{"filepath": "data/single_event_single_identity_test/syn_101.wav", "onoffCaption": "spraying at 0.0-1.0, 2.0-3.0, 4.0-5.0", "frequencyCaption": "spraying three times"} +{"filepath": "data/single_event_single_identity_test/syn_106.wav", "onoffCaption": "spraying at 0.0-1.0", "frequencyCaption": "spraying one times"} +{"filepath": "data/single_event_single_identity_test/syn_108.wav", "onoffCaption": "gunshot at 0.2-2.2, 3.2-5.2", "frequencyCaption": "gunshot two times"} +{"filepath": "data/single_event_single_identity_test/syn_112.wav", "onoffCaption": "burping belching at 0.871-2.871, 3.871-5.871", "frequencyCaption": "burping belching two times"} +{"filepath": "data/single_event_single_identity_test/syn_115.wav", "onoffCaption": "explosion at 0-1", "frequencyCaption": "explosion one times"} +{"filepath": "data/single_event_single_identity_test/syn_122.wav", "onoffCaption": "tapping clicking clanking at 1.0-5.0", "frequencyCaption": "tapping clicking clanking one times"} +{"filepath": "data/single_event_single_identity_test/syn_125.wav", "onoffCaption": "explosion at 1.0-3.0, 3.5-5.5", "frequencyCaption": "explosion two times"} +{"filepath": "data/single_event_single_identity_test/syn_131.wav", "onoffCaption": "door slamming at 0-1, 2-3, 4-5", "frequencyCaption": "door slamming three times"} +{"filepath": "data/single_event_single_identity_test/syn_136.wav", "onoffCaption": "car horn honking at 0-1", "frequencyCaption": "car horn honking one times"} +{"filepath": "data/single_event_single_identity_test/syn_138.wav", "onoffCaption": "explosion at 0-1", "frequencyCaption": "explosion one times"} +{"filepath": "data/single_event_single_identity_test/syn_140.wav", "onoffCaption": "train horn at 0-1, 1-2", "frequencyCaption": "train horn two times"} +{"filepath": "data/single_event_single_identity_test/syn_147.wav", "onoffCaption": "explosion at 0-1, 2-3, 4-5", "frequencyCaption": "explosion three times"} +{"filepath": "data/single_event_single_identity_test/syn_149.wav", "onoffCaption": "spraying at 0.1-1.1, 1.2-2.2, 3.3-4.3", "frequencyCaption": "spraying three times"} +{"filepath": "data/single_event_single_identity_test/syn_153.wav", "onoffCaption": "dog barking at 0-1, 2-3", "frequencyCaption": "dog barking two times"} +{"filepath": "data/single_event_single_identity_test/syn_154.wav", "onoffCaption": "explosion at 0.0-1.0", "frequencyCaption": "explosion one times"} +{"filepath": "data/single_event_single_identity_test/syn_163.wav", "onoffCaption": "sneeze at 0-1", "frequencyCaption": "sneeze one times"} +{"filepath": "data/single_event_single_identity_test/syn_164.wav", "onoffCaption": "sneeze at 0-1", "frequencyCaption": "sneeze one times"} +{"filepath": "data/single_event_single_identity_test/syn_170.wav", "onoffCaption": "burping belching at 0.5-2.5", "frequencyCaption": "burping belching one times"} +{"filepath": "data/single_event_single_identity_test/syn_175.wav", "onoffCaption": "explosion at 2.941-5.813", "frequencyCaption": "explosion one times"} +{"filepath": "data/single_event_single_identity_test/syn_177.wav", "onoffCaption": "door knocking at 0-1, 2-3", "frequencyCaption": "door knocking two times"} +{"filepath": "data/single_event_single_identity_test/syn_179.wav", "onoffCaption": "explosion at 0.0-4.0", "frequencyCaption": "explosion one times"} +{"filepath": "data/single_event_single_identity_test/syn_181.wav", "onoffCaption": "train horn at 0-3", "frequencyCaption": "train horn one times"} +{"filepath": "data/single_event_single_identity_test/syn_186.wav", "onoffCaption": "sheep goat bleating at 0.0-2.0, 2.5-4.5", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/single_event_single_identity_test/syn_188.wav", "onoffCaption": "whistling at 2.158-10.0", "frequencyCaption": "whistling one times"} +{"filepath": "data/single_event_single_identity_test/syn_190.wav", "onoffCaption": "woman laughing at 0-1, 2-3", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/single_event_single_identity_test/syn_192.wav", "onoffCaption": "door knocking at 1-2, 3-4", "frequencyCaption": "door knocking two times"} +{"filepath": "data/single_event_single_identity_test/syn_195.wav", "onoffCaption": "cow mooing at 1.5-3.5, 4.0-6.0", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/single_event_single_identity_test/syn_13.wav", "onoffCaption": "tapping clicking clanking at 0.536-3.976", "frequencyCaption": "tapping clicking clanking one times"} +{"filepath": "data/single_event_single_identity_test/syn_14.wav", "onoffCaption": "woman laughing at 0-2", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/single_event_single_identity_test/syn_23.wav", "onoffCaption": "whistling at 2.158-10.0", "frequencyCaption": "whistling one times"} +{"filepath": "data/single_event_single_identity_test/syn_24.wav", "onoffCaption": "dog barking at 0.311-2.711", "frequencyCaption": "dog barking one times"} +{"filepath": "data/single_event_single_identity_test/syn_30.wav", "onoffCaption": "whistling at 2.158-10.0", "frequencyCaption": "whistling one times"} +{"filepath": "data/single_event_single_identity_test/syn_37.wav", "onoffCaption": "whistling at 0-1", "frequencyCaption": "whistling one times"} +{"filepath": "data/single_event_single_identity_test/syn_39.wav", "onoffCaption": "whistling at 0-1, 2-3", "frequencyCaption": "whistling two times"} +{"filepath": "data/single_event_single_identity_test/syn_41.wav", "onoffCaption": "sheep goat bleating at 0.5-2.5, 2.75-4.75", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/single_event_single_identity_test/syn_46.wav", "onoffCaption": "car horn honking at 1.0-3.0, 4.0-6.0", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/single_event_single_identity_test/syn_48.wav", "onoffCaption": "thump thud at 1.017-4.684, 5.695-9.362", "frequencyCaption": "thump thud two times"} +{"filepath": "data/single_event_single_identity_test/syn_52.wav", "onoffCaption": "dog barking at 0-1, 2-3, 4-5", "frequencyCaption": "dog barking three times"} +{"filepath": "data/single_event_single_identity_test/syn_55.wav", "onoffCaption": "gunshot at 0.0-2.0, 3.0-5.0", "frequencyCaption": "gunshot two times"} +{"filepath": "data/single_event_single_identity_test/syn_62.wav", "onoffCaption": "burping belching at 1.5-3.5, 4.0-6.0", "frequencyCaption": "burping belching two times"} +{"filepath": "data/single_event_single_identity_test/syn_65.wav", "onoffCaption": "sheep goat bleating at 1.0-3.0, 4.0-6.0", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/single_event_single_identity_test/syn_71.wav", "onoffCaption": "tapping clicking clanking at 0.5-3.0", "frequencyCaption": "tapping clicking clanking one times"} +{"filepath": "data/single_event_single_identity_test/syn_76.wav", "onoffCaption": "sheep goat bleating at 0.5-2.5, 3.0-5.0", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/single_event_single_identity_test/syn_78.wav", "onoffCaption": "train horn at 0-1, 2-3", "frequencyCaption": "train horn two times"} +{"filepath": "data/single_event_single_identity_test/syn_80.wav", "onoffCaption": "whistling at 0-1, 2-3", "frequencyCaption": "whistling two times"} +{"filepath": "data/single_event_single_identity_test/syn_87.wav", "onoffCaption": "car horn honking at 1.817-4.12, 6.106-8.453", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/single_event_single_identity_test/syn_89.wav", "onoffCaption": "train horn at 0-1", "frequencyCaption": "train horn one times"} +{"filepath": "data/single_event_single_identity_test/syn_93.wav", "onoffCaption": "dog barking at 0.464-2.464, 4.19-6.19", "frequencyCaption": "dog barking two times"} +{"filepath": "data/single_event_single_identity_test/syn_94.wav", "onoffCaption": "duck quacking at 0-1", "frequencyCaption": "duck quacking one times"} +{"filepath": "data/single_event_single_identity_test/syn_103.wav", "onoffCaption": "gunshot at 0.0-2.0", "frequencyCaption": "gunshot one times"} +{"filepath": "data/single_event_single_identity_test/syn_104.wav", "onoffCaption": "cat meowing at 0-1, 2-3, 4-5", "frequencyCaption": "cat meowing three times"} +{"filepath": "data/single_event_single_identity_test/syn_110.wav", "onoffCaption": "whistling at 0-1", "frequencyCaption": "whistling one times"} +{"filepath": "data/single_event_single_identity_test/syn_117.wav", "onoffCaption": "cat meowing at 1.0-3.0, 4.0-6.0", "frequencyCaption": "cat meowing two times"} +{"filepath": "data/single_event_single_identity_test/syn_119.wav", "onoffCaption": "car horn honking at 0.0-2.0", "frequencyCaption": "car horn honking one times"} +{"filepath": "data/single_event_single_identity_test/syn_120.wav", "onoffCaption": "door knocking at 0-1", "frequencyCaption": "door knocking one times"} +{"filepath": "data/single_event_single_identity_test/syn_127.wav", "onoffCaption": "sheep goat bleating at 0.5-2.5, 3-5", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/single_event_single_identity_test/syn_129.wav", "onoffCaption": "sheep goat bleating at 0-1, 2-3, 4-5", "frequencyCaption": "sheep goat bleating three times"} +{"filepath": "data/single_event_single_identity_test/syn_133.wav", "onoffCaption": "gunshot at 0.0-2.0, 2.5-4.5", "frequencyCaption": "gunshot two times"} +{"filepath": "data/single_event_single_identity_test/syn_134.wav", "onoffCaption": "sheep goat bleating at 0-1, 2-3", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/single_event_single_identity_test/syn_142.wav", "onoffCaption": "dog barking at 0-1, 2-3", "frequencyCaption": "dog barking two times"} +{"filepath": "data/single_event_single_identity_test/syn_145.wav", "onoffCaption": "train horn at 0-3", "frequencyCaption": "train horn one times"} +{"filepath": "data/single_event_single_identity_test/syn_151.wav", "onoffCaption": "burping belching at 1-3", "frequencyCaption": "burping belching one times"} +{"filepath": "data/single_event_single_identity_test/syn_156.wav", "onoffCaption": "cow mooing at 0-3", "frequencyCaption": "cow mooing one times"} +{"filepath": "data/single_event_single_identity_test/syn_158.wav", "onoffCaption": "door knocking at 0-1", "frequencyCaption": "door knocking one times"} +{"filepath": "data/single_event_single_identity_test/syn_161.wav", "onoffCaption": "spraying at 0-1, 2-3, 4-5", "frequencyCaption": "spraying three times"} +{"filepath": "data/single_event_single_identity_test/syn_166.wav", "onoffCaption": "tapping clicking clanking at 0.032-3.472, 4.758-7.489", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/single_event_single_identity_test/syn_168.wav", "onoffCaption": "explosion at 2.941-5.813", "frequencyCaption": "explosion one times"} +{"filepath": "data/single_event_single_identity_test/syn_172.wav", "onoffCaption": "gunshot at 0.0-2.0", "frequencyCaption": "gunshot one times"} +{"filepath": "data/single_event_single_identity_test/syn_183.wav", "onoffCaption": "duck quacking at 0-1", "frequencyCaption": "duck quacking one times"} +{"filepath": "data/single_event_single_identity_test/syn_184.wav", "onoffCaption": "spraying at 0-1", "frequencyCaption": "spraying one times"} +{"filepath": "data/single_event_single_identity_test/syn_197.wav", "onoffCaption": "sheep goat bleating at 0.0-2.0, 3.0-5.0", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/single_event_single_identity_test/syn_199.wav", "onoffCaption": "dog barking at 0-2, 2-4", "frequencyCaption": "dog barking two times"} +{"filepath": "data/single_event_single_identity_test/syn_200.wav", "onoffCaption": "thump thud at 2.224-5.891, 7.389-9.889", "frequencyCaption": "thump thud two times"} +{"filepath": "data/single_event_single_identity_test/syn_12.wav", "onoffCaption": "dog barking at 0-1", "frequencyCaption": "dog barking one times"} +{"filepath": "data/single_event_single_identity_test/syn_15.wav", "onoffCaption": "dog barking at 0.464-2.464, 4.19-6.19", "frequencyCaption": "dog barking two times"} +{"filepath": "data/single_event_single_identity_test/syn_22.wav", "onoffCaption": "whistling at 2.603-10.0", "frequencyCaption": "whistling one times"} +{"filepath": "data/single_event_single_identity_test/syn_25.wav", "onoffCaption": "explosion at 0.0-2.0, 2.5-4.5", "frequencyCaption": "explosion two times"} +{"filepath": "data/single_event_single_identity_test/syn_31.wav", "onoffCaption": "gunshot at 0.0-2.0, 2.5-4.5, 5.0-7.0", "frequencyCaption": "gunshot three times"} +{"filepath": "data/single_event_single_identity_test/syn_36.wav", "onoffCaption": "dog barking at 0.5-1.5", "frequencyCaption": "dog barking one times"} +{"filepath": "data/single_event_single_identity_test/syn_38.wav", "onoffCaption": "dog barking at 0.464-2.464, 4.19-6.19", "frequencyCaption": "dog barking two times"} +{"filepath": "data/single_event_single_identity_test/syn_40.wav", "onoffCaption": "sheep goat bleating at 0-1", "frequencyCaption": "sheep goat bleating one times"} +{"filepath": "data/single_event_single_identity_test/syn_47.wav", "onoffCaption": "door slamming at 0-1, 2-3, 4-5", "frequencyCaption": "door slamming three times"} +{"filepath": "data/single_event_single_identity_test/syn_49.wav", "onoffCaption": "duck quacking at 0-1", "frequencyCaption": "duck quacking one times"} +{"filepath": "data/single_event_single_identity_test/syn_51.wav", "onoffCaption": "cat meowing at 0.0-1.0, 2.0-3.0, 4.0-5.0", "frequencyCaption": "cat meowing three times"} +{"filepath": "data/single_event_single_identity_test/syn_53.wav", "onoffCaption": "cat meowing at 0-2", "frequencyCaption": "cat meowing one times"} +{"filepath": "data/single_event_single_identity_test/syn_54.wav", "onoffCaption": "gunshot at 0-1, 2-3", "frequencyCaption": "gunshot two times"} +{"filepath": "data/single_event_single_identity_test/syn_63.wav", "onoffCaption": "door slamming at 0.355-2.581", "frequencyCaption": "door slamming one times"} +{"filepath": "data/single_event_single_identity_test/syn_64.wav", "onoffCaption": "sheep goat bleating at 0-1, 2-3, 4-5", "frequencyCaption": "sheep goat bleating three times"} +{"filepath": "data/single_event_single_identity_test/syn_70.wav", "onoffCaption": "sneeze at 1.3-2.403, 4.759-6.442", "frequencyCaption": "sneeze two times"} +{"filepath": "data/single_event_single_identity_test/syn_77.wav", "onoffCaption": "dog barking at 0-1, 2-3", "frequencyCaption": "dog barking two times"} +{"filepath": "data/single_event_single_identity_test/syn_79.wav", "onoffCaption": "tapping clicking clanking at 0.536-3.976", "frequencyCaption": "tapping clicking clanking one times"} +{"filepath": "data/single_event_single_identity_test/syn_81.wav", "onoffCaption": "spraying at 0-1", "frequencyCaption": "spraying one times"} +{"filepath": "data/single_event_single_identity_test/syn_86.wav", "onoffCaption": "door knocking at 1-2", "frequencyCaption": "door knocking one times"} +{"filepath": "data/single_event_single_identity_test/syn_88.wav", "onoffCaption": "cow mooing at 1.0-3.0, 4.0-6.0", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/single_event_single_identity_test/syn_92.wav", "onoffCaption": "train horn at 0.0-2.0, 2.5-4.5", "frequencyCaption": "train horn two times"} +{"filepath": "data/single_event_single_identity_test/syn_95.wav", "onoffCaption": "thump thud at 0.0-1.0", "frequencyCaption": "thump thud one times"} +{"filepath": "data/single_event_single_identity_test/syn_102.wav", "onoffCaption": "thump thud at 0-1, 2-3", "frequencyCaption": "thump thud two times"} +{"filepath": "data/single_event_single_identity_test/syn_105.wav", "onoffCaption": "door slamming at 0.145-1.085, 2.545-4.463", "frequencyCaption": "door slamming two times"} +{"filepath": "data/single_event_single_identity_test/syn_111.wav", "onoffCaption": "door knocking at 0-1, 2-3", "frequencyCaption": "door knocking two times"} +{"filepath": "data/single_event_single_identity_test/syn_116.wav", "onoffCaption": "gunshot at 0-1, 2-3", "frequencyCaption": "gunshot two times"} +{"filepath": "data/single_event_single_identity_test/syn_118.wav", "onoffCaption": "cat meowing at 0-3", "frequencyCaption": "cat meowing one times"} +{"filepath": "data/single_event_single_identity_test/syn_121.wav", "onoffCaption": "door knocking at 1.155-5.305", "frequencyCaption": "door knocking one times"} +{"filepath": "data/single_event_single_identity_test/syn_126.wav", "onoffCaption": "sheep goat bleating at 0.5-2.5, 3.0-5.0", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/single_event_single_identity_test/syn_128.wav", "onoffCaption": "tapping clicking clanking at 0-1", "frequencyCaption": "tapping clicking clanking one times"} +{"filepath": "data/single_event_single_identity_test/syn_132.wav", "onoffCaption": "cat meowing at 0-1, 2-3, 4-5", "frequencyCaption": "cat meowing three times"} +{"filepath": "data/single_event_single_identity_test/syn_135.wav", "onoffCaption": "door slamming at 0.145-1.085, 2.545-4.463", "frequencyCaption": "door slamming two times"} +{"filepath": "data/single_event_single_identity_test/syn_139.wav", "onoffCaption": "duck quacking at 0-1, 2-3", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/single_event_single_identity_test/syn_143.wav", "onoffCaption": "cat meowing at 0-1", "frequencyCaption": "cat meowing one times"} +{"filepath": "data/single_event_single_identity_test/syn_144.wav", "onoffCaption": "cow mooing at 1.954-4.602, 6.719-9.729", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/single_event_single_identity_test/syn_150.wav", "onoffCaption": "dog barking at 0-1", "frequencyCaption": "dog barking one times"} +{"filepath": "data/single_event_single_identity_test/syn_157.wav", "onoffCaption": "sneeze at 0-1", "frequencyCaption": "sneeze one times"} +{"filepath": "data/single_event_single_identity_test/syn_159.wav", "onoffCaption": "sneeze at 0-1", "frequencyCaption": "sneeze one times"} +{"filepath": "data/single_event_single_identity_test/syn_160.wav", "onoffCaption": "tapping clicking clanking at 1.5-3.5, 5-7", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/single_event_single_identity_test/syn_167.wav", "onoffCaption": "cat meowing at 0-1, 2-3", "frequencyCaption": "cat meowing two times"} +{"filepath": "data/single_event_single_identity_test/syn_169.wav", "onoffCaption": "train horn at 0-3.5", "frequencyCaption": "train horn one times"} +{"filepath": "data/single_event_single_identity_test/syn_173.wav", "onoffCaption": "thump thud at 0-1", "frequencyCaption": "thump thud one times"} +{"filepath": "data/single_event_single_identity_test/syn_174.wav", "onoffCaption": "cat meowing at 0-1.2, 2-3.2, 4-5.2", "frequencyCaption": "cat meowing three times"} +{"filepath": "data/single_event_single_identity_test/syn_182.wav", "onoffCaption": "door knocking at 1-3, 4-6", "frequencyCaption": "door knocking two times"} +{"filepath": "data/single_event_single_identity_test/syn_185.wav", "onoffCaption": "gunshot at 0.0-2.0, 3.0-5.0", "frequencyCaption": "gunshot two times"} +{"filepath": "data/single_event_single_identity_test/syn_189.wav", "onoffCaption": "door knocking at 2.047-4.422", "frequencyCaption": "door knocking one times"} +{"filepath": "data/single_event_single_identity_test/syn_191.wav", "onoffCaption": "cow mooing at 0-3", "frequencyCaption": "cow mooing one times"} +{"filepath": "data/single_event_single_identity_test/syn_196.wav", "onoffCaption": "door knocking at 0-1, 2-3, 4-5", "frequencyCaption": "door knocking three times"} +{"filepath": "data/single_event_single_identity_test/syn_198.wav", "onoffCaption": "explosion at 0.0-1.0", "frequencyCaption": "explosion one times"} +{"filepath": "data/single_event_single_identity_test/syn_10.wav", "onoffCaption": "duck quacking at 0-1", "frequencyCaption": "duck quacking one times"} +{"filepath": "data/single_event_single_identity_test/syn_17.wav", "onoffCaption": "burping belching at 0.871-5.871, 7.218-10.0", "frequencyCaption": "burping belching two times"} +{"filepath": "data/single_event_single_identity_test/syn_19.wav", "onoffCaption": "cat meowing at 0-1, 2-3", "frequencyCaption": "cat meowing two times"} +{"filepath": "data/single_event_single_identity_test/syn_20.wav", "onoffCaption": "tapping clicking clanking at 1.851-5.291, 7.569-10.0", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/single_event_single_identity_test/syn_27.wav", "onoffCaption": "spraying at 0-1, 2-3, 4-5", "frequencyCaption": "spraying three times"} +{"filepath": "data/single_event_single_identity_test/syn_29.wav", "onoffCaption": "tapping clicking clanking at 1-3, 4-6", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/single_event_single_identity_test/syn_33.wav", "onoffCaption": "train horn at 0-3", "frequencyCaption": "train horn one times"} +{"filepath": "data/single_event_single_identity_test/syn_34.wav", "onoffCaption": "burping belching at 0-1, 2-3", "frequencyCaption": "burping belching two times"} +{"filepath": "data/single_event_single_identity_test/syn_42.wav", "onoffCaption": "dog barking at 2.579-4.579", "frequencyCaption": "dog barking one times"} +{"filepath": "data/single_event_single_identity_test/syn_45.wav", "onoffCaption": "cat meowing at 0-1", "frequencyCaption": "cat meowing one times"} +{"filepath": "data/single_event_single_identity_test/syn_56.wav", "onoffCaption": "train horn at 0-3", "frequencyCaption": "train horn one times"} +{"filepath": "data/single_event_single_identity_test/syn_58.wav", "onoffCaption": "duck quacking at 0-1, 2-3", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/single_event_single_identity_test/syn_61.wav", "onoffCaption": "spraying at 0.0-1.0", "frequencyCaption": "spraying one times"} +{"filepath": "data/single_event_single_identity_test/syn_66.wav", "onoffCaption": "cat meowing at 0-1, 2-3", "frequencyCaption": "cat meowing two times"} +{"filepath": "data/single_event_single_identity_test/syn_68.wav", "onoffCaption": "duck quacking at 0-1", "frequencyCaption": "duck quacking one times"} +{"filepath": "data/single_event_single_identity_test/syn_72.wav", "onoffCaption": "sneeze at 1.3-2.403, 4.759-6.442", "frequencyCaption": "sneeze two times"} +{"filepath": "data/single_event_single_identity_test/syn_75.wav", "onoffCaption": "door slamming at 0.145-1.085, 2.545-4.463", "frequencyCaption": "door slamming two times"} +{"filepath": "data/single_event_single_identity_test/syn_83.wav", "onoffCaption": "dog barking at 0.464-2.464, 4.19-6.19", "frequencyCaption": "dog barking two times"} +{"filepath": "data/single_event_single_identity_test/syn_84.wav", "onoffCaption": "woman laughing at 0-2, 2-4", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/single_event_single_identity_test/syn_90.wav", "onoffCaption": "dog barking at 0-1, 2-3, 4-5", "frequencyCaption": "dog barking three times"} +{"filepath": "data/single_event_single_identity_test/syn_97.wav", "onoffCaption": "gunshot at 0-1", "frequencyCaption": "gunshot one times"} +{"filepath": "data/single_event_single_identity_test/syn_99.wav", "onoffCaption": "door knocking at 1-3, 4-6", "frequencyCaption": "door knocking two times"} +{"filepath": "data/single_event_single_identity_test/syn_100.wav", "onoffCaption": "burping belching at 0.871-2.871, 3.891-5.891", "frequencyCaption": "burping belching two times"} +{"filepath": "data/single_event_single_identity_test/syn_107.wav", "onoffCaption": "woman laughing at 0-2, 5-7", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/single_event_single_identity_test/syn_109.wav", "onoffCaption": "cat meowing at 0.0-2.0, 3.0-5.0, 6.0-8.0", "frequencyCaption": "cat meowing three times"} +{"filepath": "data/single_event_single_identity_test/syn_113.wav", "onoffCaption": "duck quacking at 0-1", "frequencyCaption": "duck quacking one times"} +{"filepath": "data/single_event_single_identity_test/syn_114.wav", "onoffCaption": "duck quacking at 0-1, 2-3", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/single_event_single_identity_test/syn_123.wav", "onoffCaption": "woman laughing at 2.777-6.165", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/single_event_single_identity_test/syn_124.wav", "onoffCaption": "door slamming at 0.145-1.085, 2.545-4.463", "frequencyCaption": "door slamming two times"} +{"filepath": "data/single_event_single_identity_test/syn_130.wav", "onoffCaption": "duck quacking at 0.0-2.0, 3.0-5.0, 6.0-8.0", "frequencyCaption": "duck quacking three times"} +{"filepath": "data/single_event_single_identity_test/syn_137.wav", "onoffCaption": "dog barking at 0.5-2.5, 3.0-5.0", "frequencyCaption": "dog barking two times"} +{"filepath": "data/single_event_single_identity_test/syn_141.wav", "onoffCaption": "woman laughing at 2.782-5.368", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/single_event_single_identity_test/syn_146.wav", "onoffCaption": "dog barking at 0-1, 2-3", "frequencyCaption": "dog barking two times"} +{"filepath": "data/single_event_single_identity_test/syn_148.wav", "onoffCaption": "thump thud at 2-3", "frequencyCaption": "thump thud one times"} +{"filepath": "data/single_event_single_identity_test/syn_152.wav", "onoffCaption": "sheep goat bleating at 0-1, 2-3", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/single_event_single_identity_test/syn_155.wav", "onoffCaption": "woman laughing at 0.5-2.5, 3.0-5.0", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/single_event_single_identity_test/syn_162.wav", "onoffCaption": "door knocking at 0-1, 1-2", "frequencyCaption": "door knocking two times"} +{"filepath": "data/single_event_single_identity_test/syn_165.wav", "onoffCaption": "door slamming at 0.355-2.581", "frequencyCaption": "door slamming one times"} +{"filepath": "data/single_event_single_identity_test/syn_171.wav", "onoffCaption": "woman laughing at 2.672-5.672", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/single_event_single_identity_test/syn_176.wav", "onoffCaption": "burping belching at 0.5-3.5, 4.5-7.5", "frequencyCaption": "burping belching two times"} +{"filepath": "data/single_event_single_identity_test/syn_178.wav", "onoffCaption": "sheep goat bleating at 1-3, 4-7", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/single_event_single_identity_test/syn_180.wav", "onoffCaption": "cow mooing at 0-3", "frequencyCaption": "cow mooing one times"} +{"filepath": "data/single_event_single_identity_test/syn_187.wav", "onoffCaption": "gunshot at 0.5-2.5, 3.0-5.0", "frequencyCaption": "gunshot two times"} +{"filepath": "data/single_event_single_identity_test/syn_193.wav", "onoffCaption": "tapping clicking clanking at 1.851-5.291, 7.569-10.0", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/single_event_single_identity_test/syn_194.wav", "onoffCaption": "train horn at 0-3", "frequencyCaption": "train horn one times"} +{"filepath": "data/single_event_single_identity_test/syn_1.wav", "onoffCaption": "cat meowing at 1.0-3.0, 4.0-6.0", "frequencyCaption": "cat meowing two times"} +{"filepath": "data/single_event_single_identity_test/syn_2.wav", "onoffCaption": "cat meowing at 0.5-1.5, 2.5-3.5", "frequencyCaption": "cat meowing two times"} +{"filepath": "data/single_event_single_identity_test/syn_3.wav", "onoffCaption": "burping belching at 0-1, 2-3, 4-5", "frequencyCaption": "burping belching three times"} +{"filepath": "data/single_event_single_identity_test/syn_4.wav", "onoffCaption": "car horn honking at 0.664-3.129, 4.357-7.014", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/single_event_single_identity_test/syn_5.wav", "onoffCaption": "dog barking at 0.0-2.0", "frequencyCaption": "dog barking one times"} +{"filepath": "data/single_event_single_identity_test/syn_6.wav", "onoffCaption": "explosion at 0-1", "frequencyCaption": "explosion one times"} +{"filepath": "data/single_event_single_identity_test/syn_7.wav", "onoffCaption": "dog barking at 0-1", "frequencyCaption": "dog barking one times"} +{"filepath": "data/single_event_single_identity_test/syn_8.wav", "onoffCaption": "burping belching at 2.861-8.462", "frequencyCaption": "burping belching one times"} +{"filepath": "data/single_event_single_identity_test/syn_9.wav", "onoffCaption": "burping belching at 0.569-4.438", "frequencyCaption": "burping belching one times"} diff --git a/picoaudio/data/meta_data/test-onoff-control_multi-event.json b/picoaudio/data/meta_data/test-onoff-control_multi-event.json new file mode 100644 index 0000000000000000000000000000000000000000..7c57e688ed2805ae7ac07be7b61dc10e52d3b82e --- /dev/null +++ b/picoaudio/data/meta_data/test-onoff-control_multi-event.json @@ -0,0 +1,200 @@ +{"filepath": "data/multi_event_test/syn_1.wav", "onoffCaption": "cat meowing at 0.393-1.783, 3.975-5.365 and whistling at 0.861-5.455 and explosion at 2.089-4.841, 5.738-8.538", "frequencyCaption": "cat meowing two times and whistling one times and explosion two times"} +{"filepath": "data/multi_event_test/syn_6.wav", "onoffCaption": "whistling at 2.093-10.0", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_test/syn_8.wav", "onoffCaption": "cow mooing at 1.177-3.977, 5.15-7.774", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_test/syn_11.wav", "onoffCaption": "burping belching at 1.039-3.198, 4.085-6.244 and dog barking at 3.119-5.119", "frequencyCaption": "burping belching two times and dog barking one times"} +{"filepath": "data/multi_event_test/syn_16.wav", "onoffCaption": "duck quacking at 0.799-2.799, 3.634-5.634, 6.976-8.976", "frequencyCaption": "duck quacking three times"} +{"filepath": "data/multi_event_test/syn_18.wav", "onoffCaption": "door knocking at 1.225-3.352, 5.173-7.3 and door slamming at 5.439-7.678", "frequencyCaption": "door knocking two times and door slamming one times"} +{"filepath": "data/multi_event_test/syn_21.wav", "onoffCaption": "dog barking at 2.947-4.947, 6.186-8.186", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_test/syn_26.wav", "onoffCaption": "whistling at 2.848-7.442", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_test/syn_28.wav", "onoffCaption": "cow mooing at 2.639-5.263 and spraying at 8.565-9.667", "frequencyCaption": "cow mooing one times and spraying one times"} +{"filepath": "data/multi_event_test/syn_32.wav", "onoffCaption": "duck quacking at 0.039-2.039, 3.171-5.171, 5.938-7.938", "frequencyCaption": "duck quacking three times"} +{"filepath": "data/multi_event_test/syn_35.wav", "onoffCaption": "car horn honking at 2.31-5.271, 5.91-8.871", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_test/syn_43.wav", "onoffCaption": "dog barking at 2.157-4.157, 5.953-7.953 and burping belching at 2.431-5.388, 6.452-8.611 and explosion at 4.8-7.552", "frequencyCaption": "dog barking two times and burping belching two times and explosion one times"} +{"filepath": "data/multi_event_test/syn_44.wav", "onoffCaption": "sneeze at 2.638-6.791", "frequencyCaption": "sneeze one times"} +{"filepath": "data/multi_event_test/syn_50.wav", "onoffCaption": "car horn honking at 0.874-3.835, 4.429-7.39 and sneeze at 1.814-5.167 and train horn at 2.818-7.898", "frequencyCaption": "car horn honking two times and sneeze one times and train horn one times"} +{"filepath": "data/multi_event_test/syn_57.wav", "onoffCaption": "dog barking at 3.007-5.007, 6.103-8.103 and cow mooing at 3.017-5.641", "frequencyCaption": "dog barking two times and cow mooing one times"} +{"filepath": "data/multi_event_test/syn_59.wav", "onoffCaption": "door slamming at 0.035-2.274 and explosion at 3.857-6.609, 7.377-10.0", "frequencyCaption": "door slamming one times and explosion two times"} +{"filepath": "data/multi_event_test/syn_60.wav", "onoffCaption": "train horn at 0.062-3.062", "frequencyCaption": "train horn one times"} +{"filepath": "data/multi_event_test/syn_67.wav", "onoffCaption": "whistling at 1.616-10.0", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_test/syn_69.wav", "onoffCaption": "door knocking at 0.237-2.801, 4.117-6.681, 7.378-9.942", "frequencyCaption": "door knocking three times"} +{"filepath": "data/multi_event_test/syn_73.wav", "onoffCaption": "door knocking at 0.045-2.172, 2.718-5.282, 6.027-8.591 and sneeze at 2.92-6.273, 6.847-9.032", "frequencyCaption": "door knocking three times and sneeze two times"} +{"filepath": "data/multi_event_test/syn_74.wav", "onoffCaption": "spraying at 0.38-1.176, 3.06-3.856 and gunshot at 1.729-3.729, 4.367-6.367, 7.031-9.031", "frequencyCaption": "spraying two times and gunshot three times"} +{"filepath": "data/multi_event_test/syn_82.wav", "onoffCaption": "dog barking at 0.497-2.497, 4.187-6.187", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_test/syn_91.wav", "onoffCaption": "gunshot at 0.501-2.501, 3.148-5.148", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_test/syn_96.wav", "onoffCaption": "door slamming at 0.154-2.393, 3.23-4.641, 5.232-7.471", "frequencyCaption": "door slamming three times"} +{"filepath": "data/multi_event_test/syn_98.wav", "onoffCaption": "thump thud at 1.835-4.135, 6.505-9.18", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_test/syn_101.wav", "onoffCaption": "dog barking at 0.681-2.681", "frequencyCaption": "dog barking one times"} +{"filepath": "data/multi_event_test/syn_106.wav", "onoffCaption": "burping belching at 0.093-3.05, 3.962-6.121, 7.309-9.468", "frequencyCaption": "burping belching three times"} +{"filepath": "data/multi_event_test/syn_108.wav", "onoffCaption": "sneeze at 3.287-7.44", "frequencyCaption": "sneeze one times"} +{"filepath": "data/multi_event_test/syn_112.wav", "onoffCaption": "woman laughing at 1.823-4.587, 6.243-9.007", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_test/syn_115.wav", "onoffCaption": "duck quacking at 0.044-1.862 and tapping clicking clanking at 0.436-3.876, 5.547-7.6", "frequencyCaption": "duck quacking one times and tapping clicking clanking two times"} +{"filepath": "data/multi_event_test/syn_122.wav", "onoffCaption": "door knocking at 1.266-3.83, 4.854-7.418, 7.929-10.0", "frequencyCaption": "door knocking three times"} +{"filepath": "data/multi_event_test/syn_125.wav", "onoffCaption": "cow mooing at 2.954-5.754, 6.384-9.008", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_test/syn_131.wav", "onoffCaption": "whistling at 0.666-5.26, 5.984-8.335 and cat meowing at 0.904-2.294", "frequencyCaption": "whistling two times and cat meowing one times"} +{"filepath": "data/multi_event_test/syn_136.wav", "onoffCaption": "sheep goat bleating at 0.226-2.226, 3.707-5.707 and whistling at 1.058-5.652, 6.943-10.0 and woman laughing at 2.749-7.207", "frequencyCaption": "sheep goat bleating two times and whistling two times and woman laughing one times"} +{"filepath": "data/multi_event_test/syn_138.wav", "onoffCaption": "gunshot at 0.785-2.785 and tapping clicking clanking at 5.685-9.125", "frequencyCaption": "gunshot one times and tapping clicking clanking one times"} +{"filepath": "data/multi_event_test/syn_140.wav", "onoffCaption": "door knocking at 0.341-2.468, 3.382-5.946, 7.206-9.77", "frequencyCaption": "door knocking three times"} +{"filepath": "data/multi_event_test/syn_147.wav", "onoffCaption": "door slamming at 0.305-1.716, 2.95-4.361, 5.691-7.102", "frequencyCaption": "door slamming three times"} +{"filepath": "data/multi_event_test/syn_149.wav", "onoffCaption": "car horn honking at 0.666-3.35, 5.748-8.432 and spraying at 7.494-8.29, 8.904-9.7", "frequencyCaption": "car horn honking two times and spraying two times"} +{"filepath": "data/multi_event_test/syn_153.wav", "onoffCaption": "cat meowing at 3.029-4.355", "frequencyCaption": "cat meowing one times"} +{"filepath": "data/multi_event_test/syn_154.wav", "onoffCaption": "cat meowing at 2.044-3.37 and door knocking at 2.866-5.43", "frequencyCaption": "cat meowing one times and door knocking one times"} +{"filepath": "data/multi_event_test/syn_163.wav", "onoffCaption": "sheep goat bleating at 0.139-2.139, 3.188-5.188, 6.077-8.077", "frequencyCaption": "sheep goat bleating three times"} +{"filepath": "data/multi_event_test/syn_164.wav", "onoffCaption": "whistling at 0.15-4.744, 6.868-8.971", "frequencyCaption": "whistling two times"} +{"filepath": "data/multi_event_test/syn_170.wav", "onoffCaption": "dog barking at 0.286-2.286, 3.801-5.801", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_test/syn_177.wav", "onoffCaption": "thump thud at 0.593-2.893 and cow mooing at 4.617-7.241", "frequencyCaption": "thump thud one times and cow mooing one times"} +{"filepath": "data/multi_event_test/syn_179.wav", "onoffCaption": "cow mooing at 2.754-5.378, 6.145-8.769", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_test/syn_181.wav", "onoffCaption": "cow mooing at 3.381-6.181, 7.936-10.0", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_test/syn_186.wav", "onoffCaption": "gunshot at 0.131-2.131", "frequencyCaption": "gunshot one times"} +{"filepath": "data/multi_event_test/syn_188.wav", "onoffCaption": "gunshot at 0.785-2.785, 3.847-5.847 and duck quacking at 2.99-4.99", "frequencyCaption": "gunshot two times and duck quacking one times"} +{"filepath": "data/multi_event_test/syn_192.wav", "onoffCaption": "spraying at 1.763-2.865, 5.335-6.437", "frequencyCaption": "spraying two times"} +{"filepath": "data/multi_event_test/syn_195.wav", "onoffCaption": "thump thud at 2.422-5.097, 5.945-8.245", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_test/syn_3.wav", "onoffCaption": "tapping clicking clanking at 2.711-6.151, 7.783-10.0", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_test/syn_4.wav", "onoffCaption": "door slamming at 3.076-4.487, 6.877-8.288", "frequencyCaption": "door slamming two times"} +{"filepath": "data/multi_event_test/syn_13.wav", "onoffCaption": "duck quacking at 0.012-2.012, 3.202-5.202, 7.582-9.582", "frequencyCaption": "duck quacking three times"} +{"filepath": "data/multi_event_test/syn_14.wav", "onoffCaption": "sneeze at 1.853-6.006", "frequencyCaption": "sneeze one times"} +{"filepath": "data/multi_event_test/syn_23.wav", "onoffCaption": "sneeze at 0.109-4.262, 6.151-8.608", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_test/syn_24.wav", "onoffCaption": "woman laughing at 3.051-7.509", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/multi_event_test/syn_30.wav", "onoffCaption": "burping belching at 3.234-6.191, 7.597-10.0", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_test/syn_37.wav", "onoffCaption": "thump thud at 1.883-4.558, 6.153-8.453 and door knocking at 2.227-4.791, 5.771-8.335 and burping belching at 6.746-8.905", "frequencyCaption": "thump thud two times and door knocking two times and burping belching one times"} +{"filepath": "data/multi_event_test/syn_39.wav", "onoffCaption": "train horn at 2.197-5.197, 5.755-8.755", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_test/syn_41.wav", "onoffCaption": "thump thud at 1.465-3.765", "frequencyCaption": "thump thud one times"} +{"filepath": "data/multi_event_test/syn_48.wav", "onoffCaption": "cat meowing at 0.07-1.396, 3.738-5.064, 6.912-8.238", "frequencyCaption": "cat meowing three times"} +{"filepath": "data/multi_event_test/syn_52.wav", "onoffCaption": "gunshot at 0.761-2.761 and duck quacking at 0.994-2.994 and tapping clicking clanking at 5.144-8.584", "frequencyCaption": "gunshot one times and duck quacking one times and tapping clicking clanking one times"} +{"filepath": "data/multi_event_test/syn_55.wav", "onoffCaption": "sneeze at 2.529-6.682, 7.206-9.677", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_test/syn_62.wav", "onoffCaption": "woman laughing at 0.152-2.916, 5.112-7.934", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_test/syn_65.wav", "onoffCaption": "gunshot at 3.755-5.755, 6.54-8.54", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_test/syn_71.wav", "onoffCaption": "door slamming at 0.023-2.262, 4.712-6.123 and whistling at 1.979-6.573", "frequencyCaption": "door slamming two times and whistling one times"} +{"filepath": "data/multi_event_test/syn_76.wav", "onoffCaption": "dog barking at 0.741-2.741", "frequencyCaption": "dog barking one times"} +{"filepath": "data/multi_event_test/syn_78.wav", "onoffCaption": "explosion at 0.11-2.862, 4.292-7.044 and duck quacking at 2.338-4.156, 5.898-7.716", "frequencyCaption": "explosion two times and duck quacking two times"} +{"filepath": "data/multi_event_test/syn_80.wav", "onoffCaption": "door slamming at 0.695-2.106 and sheep goat bleating at 0.985-2.985", "frequencyCaption": "door slamming one times and sheep goat bleating one times"} +{"filepath": "data/multi_event_test/syn_85.wav", "onoffCaption": "door knocking at 4.074-6.201", "frequencyCaption": "door knocking one times"} +{"filepath": "data/multi_event_test/syn_87.wav", "onoffCaption": "explosion at 0.371-3.123, 5.335-8.087", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_test/syn_89.wav", "onoffCaption": "car horn honking at 2.099-5.06 and cat meowing at 5.989-7.315", "frequencyCaption": "car horn honking one times and cat meowing one times"} +{"filepath": "data/multi_event_test/syn_93.wav", "onoffCaption": "dog barking at 0.988-2.988, 5.289-7.289", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_test/syn_94.wav", "onoffCaption": "gunshot at 1.463-3.463, 4.41-6.41, 7.226-9.226 and thump thud at 1.729-4.404, 6.318-8.993 and sheep goat bleating at 1.895-3.895, 5.909-7.909", "frequencyCaption": "gunshot three times and thump thud two times and sheep goat bleating two times"} +{"filepath": "data/multi_event_test/syn_103.wav", "onoffCaption": "whistling at 2.759-10.0", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_test/syn_104.wav", "onoffCaption": "duck quacking at 4.149-5.967", "frequencyCaption": "duck quacking one times"} +{"filepath": "data/multi_event_test/syn_110.wav", "onoffCaption": "train horn at 0.111-5.191 and duck quacking at 0.894-2.894 and cow mooing at 5.062-7.862", "frequencyCaption": "train horn one times and duck quacking one times and cow mooing one times"} +{"filepath": "data/multi_event_test/syn_117.wav", "onoffCaption": "sheep goat bleating at 3.487-5.487, 7.705-9.705", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_test/syn_119.wav", "onoffCaption": "train horn at 2.056-5.056 and door knocking at 2.912-5.039, 5.997-8.124", "frequencyCaption": "train horn one times and door knocking two times"} +{"filepath": "data/multi_event_test/syn_120.wav", "onoffCaption": "burping belching at 2.114-5.071, 5.723-8.68", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_test/syn_127.wav", "onoffCaption": "whistling at 1.653-10.0", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_test/syn_129.wav", "onoffCaption": "door knocking at 0.592-2.719, 3.326-5.453, 6.255-8.382", "frequencyCaption": "door knocking three times"} +{"filepath": "data/multi_event_test/syn_133.wav", "onoffCaption": "duck quacking at 1.444-3.262, 4.595-6.413", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_test/syn_134.wav", "onoffCaption": "car horn honking at 0.439-3.123, 5.193-7.877", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_test/syn_142.wav", "onoffCaption": "sneeze at 0.338-4.491, 5.776-7.91", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_test/syn_145.wav", "onoffCaption": "door knocking at 0.308-2.872, 4.395-6.959 and whistling at 0.583-9.383", "frequencyCaption": "door knocking two times and whistling one times"} +{"filepath": "data/multi_event_test/syn_151.wav", "onoffCaption": "dog barking at 0.368-2.368, 3.112-5.112, 5.983-7.983", "frequencyCaption": "dog barking three times"} +{"filepath": "data/multi_event_test/syn_156.wav", "onoffCaption": "car horn honking at 0.03-2.714, 3.401-6.085, 6.775-9.459", "frequencyCaption": "car horn honking three times"} +{"filepath": "data/multi_event_test/syn_158.wav", "onoffCaption": "tapping clicking clanking at 3.057-6.497, 7.876-10.0", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_test/syn_161.wav", "onoffCaption": "spraying at 0.049-1.151, 2.004-2.8", "frequencyCaption": "spraying two times"} +{"filepath": "data/multi_event_test/syn_166.wav", "onoffCaption": "woman laughing at 1.442-5.9", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/multi_event_test/syn_168.wav", "onoffCaption": "sheep goat bleating at 0.016-2.016", "frequencyCaption": "sheep goat bleating one times"} +{"filepath": "data/multi_event_test/syn_172.wav", "onoffCaption": "door knocking at 0.153-2.28, 3.142-5.706, 6.305-8.869", "frequencyCaption": "door knocking three times"} +{"filepath": "data/multi_event_test/syn_175.wav", "onoffCaption": "cow mooing at 0.61-3.41 and spraying at 3.012-4.114", "frequencyCaption": "cow mooing one times and spraying one times"} +{"filepath": "data/multi_event_test/syn_183.wav", "onoffCaption": "explosion at 0.192-5.114, 5.844-8.596", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_test/syn_184.wav", "onoffCaption": "sheep goat bleating at 0.322-2.322", "frequencyCaption": "sheep goat bleating one times"} +{"filepath": "data/multi_event_test/syn_190.wav", "onoffCaption": "whistling at 2.571-7.165", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_test/syn_197.wav", "onoffCaption": "tapping clicking clanking at 1.043-4.483, 5.786-9.226", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_test/syn_199.wav", "onoffCaption": "duck quacking at 3.246-5.246 and cat meowing at 7.245-8.635", "frequencyCaption": "duck quacking one times and cat meowing one times"} +{"filepath": "data/multi_event_test/syn_200.wav", "onoffCaption": "explosion at 3.045-5.797, 7.133-9.196", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_test/syn_2.wav", "onoffCaption": "door knocking at 2.42-4.984", "frequencyCaption": "door knocking one times"} +{"filepath": "data/multi_event_test/syn_5.wav", "onoffCaption": "burping belching at 3.676-5.835", "frequencyCaption": "burping belching one times"} +{"filepath": "data/multi_event_test/syn_12.wav", "onoffCaption": "sheep goat bleating at 1.611-3.611 and sneeze at 5.808-9.161", "frequencyCaption": "sheep goat bleating one times and sneeze one times"} +{"filepath": "data/multi_event_test/syn_15.wav", "onoffCaption": "tapping clicking clanking at 0.807-4.247", "frequencyCaption": "tapping clicking clanking one times"} +{"filepath": "data/multi_event_test/syn_22.wav", "onoffCaption": "whistling at 3.354-7.948", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_test/syn_25.wav", "onoffCaption": "burping belching at 2.316-5.273, 6.42-9.377", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_test/syn_31.wav", "onoffCaption": "woman laughing at 0.674-5.132, 6.464-10.0", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_test/syn_36.wav", "onoffCaption": "door slamming at 0.106-2.345, 2.885-5.124, 5.997-8.236", "frequencyCaption": "door slamming three times"} +{"filepath": "data/multi_event_test/syn_38.wav", "onoffCaption": "cat meowing at 0.245-1.571, 3.125-4.451, 5.016-6.342", "frequencyCaption": "cat meowing three times"} +{"filepath": "data/multi_event_test/syn_40.wav", "onoffCaption": "door knocking at 2.051-4.178, 4.942-7.506 and cow mooing at 2.928-5.728", "frequencyCaption": "door knocking two times and cow mooing one times"} +{"filepath": "data/multi_event_test/syn_46.wav", "onoffCaption": "door slamming at 0.382-1.793, 2.674-4.913", "frequencyCaption": "door slamming two times"} +{"filepath": "data/multi_event_test/syn_47.wav", "onoffCaption": "spraying at 0.719-1.515, 2.813-3.915, 4.469-5.265 and cow mooing at 1.592-4.392, 4.998-7.798", "frequencyCaption": "spraying three times and cow mooing two times"} +{"filepath": "data/multi_event_test/syn_49.wav", "onoffCaption": "sheep goat bleating at 0.44-2.44, 3.141-5.141 and tapping clicking clanking at 1.283-4.723, 6.144-8.215", "frequencyCaption": "sheep goat bleating two times and tapping clicking clanking two times"} +{"filepath": "data/multi_event_test/syn_51.wav", "onoffCaption": "train horn at 0.258-3.258, 4.737-7.277", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_test/syn_53.wav", "onoffCaption": "dog barking at 0.072-2.072, 3.076-5.076, 6.003-8.003", "frequencyCaption": "dog barking three times"} +{"filepath": "data/multi_event_test/syn_54.wav", "onoffCaption": "train horn at 0.347-3.347, 4.652-7.652", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_test/syn_63.wav", "onoffCaption": "train horn at 0.507-3.507 and cat meowing at 7.463-8.789 and dog barking at 7.612-9.612", "frequencyCaption": "train horn one times and cat meowing one times and dog barking one times"} +{"filepath": "data/multi_event_test/syn_64.wav", "onoffCaption": "sheep goat bleating at 1.521-3.521", "frequencyCaption": "sheep goat bleating one times"} +{"filepath": "data/multi_event_test/syn_70.wav", "onoffCaption": "whistling at 2.267-10.0", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_test/syn_77.wav", "onoffCaption": "cow mooing at 0.75-3.55", "frequencyCaption": "cow mooing one times"} +{"filepath": "data/multi_event_test/syn_79.wav", "onoffCaption": "dog barking at 1.282-3.282, 4.117-6.117, 6.789-8.789", "frequencyCaption": "dog barking three times"} +{"filepath": "data/multi_event_test/syn_81.wav", "onoffCaption": "gunshot at 0.019-2.019, 2.851-4.851, 5.918-7.918", "frequencyCaption": "gunshot three times"} +{"filepath": "data/multi_event_test/syn_86.wav", "onoffCaption": "whistling at 1.438-6.032 and woman laughing at 2.351-5.115, 6.601-9.365", "frequencyCaption": "whistling one times and woman laughing two times"} +{"filepath": "data/multi_event_test/syn_88.wav", "onoffCaption": "sheep goat bleating at 3.021-5.021, 6.26-8.26", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_test/syn_92.wav", "onoffCaption": "door slamming at 0.346-1.757, 2.569-3.98, 5.839-7.25 and tapping clicking clanking at 2.508-5.948", "frequencyCaption": "door slamming three times and tapping clicking clanking one times"} +{"filepath": "data/multi_event_test/syn_95.wav", "onoffCaption": "door slamming at 2.522-3.933, 5.673-7.084, 8.486-9.897", "frequencyCaption": "door slamming three times"} +{"filepath": "data/multi_event_test/syn_102.wav", "onoffCaption": "door knocking at 2.145-4.272, 4.881-7.008", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_test/syn_105.wav", "onoffCaption": "train horn at 0.682-3.682, 4.465-6.698, 7.809-10.0", "frequencyCaption": "train horn three times"} +{"filepath": "data/multi_event_test/syn_111.wav", "onoffCaption": "whistling at 0.032-4.626, 6.182-10.0 and door slamming at 0.753-2.164", "frequencyCaption": "whistling two times and door slamming one times"} +{"filepath": "data/multi_event_test/syn_116.wav", "onoffCaption": "burping belching at 3.577-5.736, 6.261-9.218", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_test/syn_118.wav", "onoffCaption": "sneeze at 3.124-6.477", "frequencyCaption": "sneeze one times"} +{"filepath": "data/multi_event_test/syn_121.wav", "onoffCaption": "car horn honking at 0.782-3.743, 4.51-7.194, 7.76-10.0", "frequencyCaption": "car horn honking three times"} +{"filepath": "data/multi_event_test/syn_123.wav", "onoffCaption": "sheep goat bleating at 2.222-4.222, 6.493-8.493", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_test/syn_126.wav", "onoffCaption": "sneeze at 2.136-6.289 and car horn honking at 2.473-5.434, 7.027-9.711", "frequencyCaption": "sneeze one times and car horn honking two times"} +{"filepath": "data/multi_event_test/syn_128.wav", "onoffCaption": "sheep goat bleating at 0.291-2.291 and door knocking at 0.293-2.42, 3.227-5.791", "frequencyCaption": "sheep goat bleating one times and door knocking two times"} +{"filepath": "data/multi_event_test/syn_132.wav", "onoffCaption": "sheep goat bleating at 0.295-2.295 and spraying at 0.328-1.124, 2.065-3.167, 4.421-5.217 and duck quacking at 0.387-2.387, 2.967-4.785, 5.384-7.384", "frequencyCaption": "sheep goat bleating one times and spraying three times and duck quacking three times"} +{"filepath": "data/multi_event_test/syn_135.wav", "onoffCaption": "tapping clicking clanking at 0.458-3.898, 5.425-8.865", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_test/syn_139.wav", "onoffCaption": "thump thud at 2.477-4.777, 6.095-8.77", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_test/syn_143.wav", "onoffCaption": "spraying at 2.679-3.475 and explosion at 5.945-10.0", "frequencyCaption": "spraying one times and explosion one times"} +{"filepath": "data/multi_event_test/syn_144.wav", "onoffCaption": "duck quacking at 1.162-2.98, 3.994-5.994, 8.158-9.976", "frequencyCaption": "duck quacking three times"} +{"filepath": "data/multi_event_test/syn_150.wav", "onoffCaption": "gunshot at 1.946-3.946, 4.6-6.6, 7.322-9.322", "frequencyCaption": "gunshot three times"} +{"filepath": "data/multi_event_test/syn_157.wav", "onoffCaption": "train horn at 1.991-7.071", "frequencyCaption": "train horn one times"} +{"filepath": "data/multi_event_test/syn_159.wav", "onoffCaption": "door slamming at 3.182-5.421, 7.675-9.086", "frequencyCaption": "door slamming two times"} +{"filepath": "data/multi_event_test/syn_160.wav", "onoffCaption": "spraying at 0.179-0.975 and whistling at 3.947-10.0", "frequencyCaption": "spraying one times and whistling one times"} +{"filepath": "data/multi_event_test/syn_167.wav", "onoffCaption": "burping belching at 0.386-3.343, 4.105-6.264 and gunshot at 4.772-6.772", "frequencyCaption": "burping belching two times and gunshot one times"} +{"filepath": "data/multi_event_test/syn_169.wav", "onoffCaption": "sneeze at 0.56-4.713, 5.69-7.783", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_test/syn_173.wav", "onoffCaption": "sheep goat bleating at 0.834-2.834, 3.932-5.932, 6.656-8.656", "frequencyCaption": "sheep goat bleating three times"} +{"filepath": "data/multi_event_test/syn_174.wav", "onoffCaption": "dog barking at 0.021-2.021, 2.529-4.529, 5.505-7.505", "frequencyCaption": "dog barking three times"} +{"filepath": "data/multi_event_test/syn_176.wav", "onoffCaption": "woman laughing at 2.645-5.409, 7.198-9.435", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_test/syn_182.wav", "onoffCaption": "cow mooing at 0.007-2.807 and gunshot at 1.124-3.124", "frequencyCaption": "cow mooing one times and gunshot one times"} +{"filepath": "data/multi_event_test/syn_185.wav", "onoffCaption": "spraying at 2.564-3.666 and door knocking at 6.756-9.32", "frequencyCaption": "spraying one times and door knocking one times"} +{"filepath": "data/multi_event_test/syn_189.wav", "onoffCaption": "door slamming at 2.717-4.956, 5.586-6.997", "frequencyCaption": "door slamming two times"} +{"filepath": "data/multi_event_test/syn_191.wav", "onoffCaption": "burping belching at 2.833-4.992, 6.271-8.43", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_test/syn_193.wav", "onoffCaption": "cow mooing at 0.942-3.742, 4.83-7.454", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_test/syn_196.wav", "onoffCaption": "spraying at 3.461-4.563", "frequencyCaption": "spraying one times"} +{"filepath": "data/multi_event_test/syn_198.wav", "onoffCaption": "gunshot at 1.546-3.546, 4.501-6.501, 7.428-9.428", "frequencyCaption": "gunshot three times"} +{"filepath": "data/multi_event_test/syn_7.wav", "onoffCaption": "spraying at 0.113-0.909 and burping belching at 0.623-3.58", "frequencyCaption": "spraying one times and burping belching one times"} +{"filepath": "data/multi_event_test/syn_9.wav", "onoffCaption": "cow mooing at 1.06-3.86", "frequencyCaption": "cow mooing one times"} +{"filepath": "data/multi_event_test/syn_10.wav", "onoffCaption": "door knocking at 0.3-2.864, 5.022-7.586", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_test/syn_17.wav", "onoffCaption": "dog barking at 3.791-5.791, 7.757-9.757", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_test/syn_19.wav", "onoffCaption": "gunshot at 0.007-2.007 and spraying at 4.251-5.047", "frequencyCaption": "gunshot one times and spraying one times"} +{"filepath": "data/multi_event_test/syn_20.wav", "onoffCaption": "tapping clicking clanking at 0.017-3.457, 5.475-7.882", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_test/syn_27.wav", "onoffCaption": "dog barking at 2.012-4.012, 4.76-6.76", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_test/syn_29.wav", "onoffCaption": "tapping clicking clanking at 2.18-5.62, 6.49-9.93", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_test/syn_33.wav", "onoffCaption": "dog barking at 2.805-4.805, 5.866-7.866 and car horn honking at 5.136-8.097", "frequencyCaption": "dog barking two times and car horn honking one times"} +{"filepath": "data/multi_event_test/syn_34.wav", "onoffCaption": "sheep goat bleating at 1.113-3.113", "frequencyCaption": "sheep goat bleating one times"} +{"filepath": "data/multi_event_test/syn_42.wav", "onoffCaption": "tapping clicking clanking at 2.443-5.883, 7.179-9.684", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_test/syn_45.wav", "onoffCaption": "cat meowing at 0.324-1.65 and train horn at 4.186-9.266", "frequencyCaption": "cat meowing one times and train horn one times"} +{"filepath": "data/multi_event_test/syn_56.wav", "onoffCaption": "tapping clicking clanking at 1.696-5.136, 6.886-9.533", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_test/syn_58.wav", "onoffCaption": "door slamming at 2.48-3.891", "frequencyCaption": "door slamming one times"} +{"filepath": "data/multi_event_test/syn_61.wav", "onoffCaption": "explosion at 2.489-5.241, 5.792-8.521 and train horn at 2.512-7.592 and woman laughing at 6.424-9.188", "frequencyCaption": "explosion two times and train horn one times and woman laughing one times"} +{"filepath": "data/multi_event_test/syn_66.wav", "onoffCaption": "sheep goat bleating at 1.634-3.634", "frequencyCaption": "sheep goat bleating one times"} +{"filepath": "data/multi_event_test/syn_68.wav", "onoffCaption": "car horn honking at 0.051-3.012, 4.062-6.746, 7.319-10.0", "frequencyCaption": "car horn honking three times"} +{"filepath": "data/multi_event_test/syn_72.wav", "onoffCaption": "spraying at 0.013-0.809, 1.742-2.844 and thump thud at 1.117-3.792 and dog barking at 6.065-8.065", "frequencyCaption": "spraying two times and thump thud one times and dog barking one times"} +{"filepath": "data/multi_event_test/syn_75.wav", "onoffCaption": "explosion at 0.266-5.188, 6.431-9.183", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_test/syn_83.wav", "onoffCaption": "whistling at 2.863-10.0", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_test/syn_84.wav", "onoffCaption": "burping belching at 2.009-4.966, 6.768-8.927", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_test/syn_90.wav", "onoffCaption": "gunshot at 0.175-2.175", "frequencyCaption": "gunshot one times"} +{"filepath": "data/multi_event_test/syn_97.wav", "onoffCaption": "cat meowing at 3.666-5.056", "frequencyCaption": "cat meowing one times"} +{"filepath": "data/multi_event_test/syn_99.wav", "onoffCaption": "duck quacking at 0.697-2.515, 3.677-5.677", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_test/syn_100.wav", "onoffCaption": "cat meowing at 0.122-1.512 and sheep goat bleating at 0.564-2.564, 3.078-5.078, 5.762-7.762", "frequencyCaption": "cat meowing one times and sheep goat bleating three times"} +{"filepath": "data/multi_event_test/syn_107.wav", "onoffCaption": "spraying at 0.005-1.107, 3.385-4.487 and dog barking at 1.269-3.269, 4.85-6.85 and tapping clicking clanking at 1.455-4.895, 5.47-8.91", "frequencyCaption": "spraying two times and dog barking two times and tapping clicking clanking two times"} +{"filepath": "data/multi_event_test/syn_109.wav", "onoffCaption": "cow mooing at 1.573-4.373 and gunshot at 7.482-9.482", "frequencyCaption": "cow mooing one times and gunshot one times"} +{"filepath": "data/multi_event_test/syn_113.wav", "onoffCaption": "whistling at 0.12-4.714 and tapping clicking clanking at 0.731-4.171", "frequencyCaption": "whistling one times and tapping clicking clanking one times"} +{"filepath": "data/multi_event_test/syn_114.wav", "onoffCaption": "car horn honking at 3.216-5.9 and door knocking at 3.814-6.378", "frequencyCaption": "car horn honking one times and door knocking one times"} +{"filepath": "data/multi_event_test/syn_124.wav", "onoffCaption": "gunshot at 2.794-4.794, 5.712-7.712", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_test/syn_130.wav", "onoffCaption": "dog barking at 0.835-2.835, 3.911-5.911, 6.459-8.459", "frequencyCaption": "dog barking three times"} +{"filepath": "data/multi_event_test/syn_137.wav", "onoffCaption": "door knocking at 0.152-2.716 and cow mooing at 1.559-4.183 and gunshot at 5.826-7.826", "frequencyCaption": "door knocking one times and cow mooing one times and gunshot one times"} +{"filepath": "data/multi_event_test/syn_141.wav", "onoffCaption": "sneeze at 0.816-4.969, 5.643-9.796", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_test/syn_146.wav", "onoffCaption": "sneeze at 0.145-4.298, 5.107-8.031 and cat meowing at 1.128-2.454", "frequencyCaption": "sneeze two times and cat meowing one times"} +{"filepath": "data/multi_event_test/syn_148.wav", "onoffCaption": "duck quacking at 3.185-5.003, 5.701-7.701 and cow mooing at 3.469-6.093", "frequencyCaption": "duck quacking two times and cow mooing one times"} +{"filepath": "data/multi_event_test/syn_152.wav", "onoffCaption": "tapping clicking clanking at 0.851-4.291, 4.863-7.054 and train horn at 5.524-8.524", "frequencyCaption": "tapping clicking clanking two times and train horn one times"} +{"filepath": "data/multi_event_test/syn_155.wav", "onoffCaption": "tapping clicking clanking at 0.869-4.309 and gunshot at 1.402-3.402 and cat meowing at 6.9-8.226", "frequencyCaption": "tapping clicking clanking one times and gunshot one times and cat meowing one times"} +{"filepath": "data/multi_event_test/syn_162.wav", "onoffCaption": "gunshot at 0.5-2.5, 3.074-5.074, 5.829-7.829", "frequencyCaption": "gunshot three times"} +{"filepath": "data/multi_event_test/syn_165.wav", "onoffCaption": "thump thud at 0.322-2.622, 4.239-6.914 and whistling at 0.361-9.161", "frequencyCaption": "thump thud two times and whistling one times"} +{"filepath": "data/multi_event_test/syn_171.wav", "onoffCaption": "spraying at 1.23-2.332, 3.511-4.613, 5.79-6.892 and thump thud at 1.604-3.904 and sheep goat bleating at 1.985-3.985, 4.796-6.796", "frequencyCaption": "spraying three times and thump thud one times and sheep goat bleating two times"} +{"filepath": "data/multi_event_test/syn_178.wav", "onoffCaption": "door slamming at 0.233-2.472 and woman laughing at 6.658-10.0", "frequencyCaption": "door slamming one times and woman laughing one times"} +{"filepath": "data/multi_event_test/syn_180.wav", "onoffCaption": "spraying at 2.203-3.305 and cow mooing at 4.398-7.198", "frequencyCaption": "spraying one times and cow mooing one times"} +{"filepath": "data/multi_event_test/syn_187.wav", "onoffCaption": "sneeze at 2.13-6.283, 6.866-10.0", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_test/syn_194.wav", "onoffCaption": "duck quacking at 2.028-3.846, 5.612-7.43", "frequencyCaption": "duck quacking two times"} diff --git a/picoaudio/data/meta_data/test-onoff-control_single-event.json b/picoaudio/data/meta_data/test-onoff-control_single-event.json new file mode 100644 index 0000000000000000000000000000000000000000..6071ff006d0d2a2618a7aaf573812f9a058ca4ac --- /dev/null +++ b/picoaudio/data/meta_data/test-onoff-control_single-event.json @@ -0,0 +1,400 @@ +{"filepath": "data/single_event_multi_identity_test/syn_1.wav", "onoffCaption": "cat meowing at 0.258-1.584", "frequencyCaption": "cat meowing one times"} +{"filepath": "data/single_event_multi_identity_test/syn_6.wav", "onoffCaption": "tapping clicking clanking at 1.246-4.686", "frequencyCaption": "tapping clicking clanking one times"} +{"filepath": "data/single_event_multi_identity_test/syn_8.wav", "onoffCaption": "door slamming at 2.564-4.803", "frequencyCaption": "door slamming one times"} +{"filepath": "data/single_event_multi_identity_test/syn_11.wav", "onoffCaption": "dog barking at 0.084-2.084, 2.908-4.908", "frequencyCaption": "dog barking two times"} +{"filepath": "data/single_event_multi_identity_test/syn_16.wav", "onoffCaption": "thump thud at 0.776-3.451", "frequencyCaption": "thump thud one times"} +{"filepath": "data/single_event_multi_identity_test/syn_18.wav", "onoffCaption": "sheep goat bleating at 3.833-5.833", "frequencyCaption": "sheep goat bleating one times"} +{"filepath": "data/single_event_multi_identity_test/syn_21.wav", "onoffCaption": "sheep goat bleating at 2.491-4.491", "frequencyCaption": "sheep goat bleating one times"} +{"filepath": "data/single_event_multi_identity_test/syn_26.wav", "onoffCaption": "tapping clicking clanking at 0.89-4.33", "frequencyCaption": "tapping clicking clanking one times"} +{"filepath": "data/single_event_multi_identity_test/syn_28.wav", "onoffCaption": "sneeze at 0.109-4.262, 6.151-8.608", "frequencyCaption": "sneeze two times"} +{"filepath": "data/single_event_multi_identity_test/syn_32.wav", "onoffCaption": "cow mooing at 1.486-4.11", "frequencyCaption": "cow mooing one times"} +{"filepath": "data/single_event_multi_identity_test/syn_35.wav", "onoffCaption": "door slamming at 0.085-2.324, 4.153-5.564", "frequencyCaption": "door slamming two times"} +{"filepath": "data/single_event_multi_identity_test/syn_43.wav", "onoffCaption": "thump thud at 2.551-4.851, 5.601-8.276", "frequencyCaption": "thump thud two times"} +{"filepath": "data/single_event_multi_identity_test/syn_44.wav", "onoffCaption": "burping belching at 0.979-3.138, 4.115-7.072, 7.609-9.768", "frequencyCaption": "burping belching three times"} +{"filepath": "data/single_event_multi_identity_test/syn_50.wav", "onoffCaption": "car horn honking at 1.566-4.25, 6.473-9.434", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/single_event_multi_identity_test/syn_57.wav", "onoffCaption": "train horn at 3.341-8.421", "frequencyCaption": "train horn one times"} +{"filepath": "data/single_event_multi_identity_test/syn_59.wav", "onoffCaption": "woman laughing at 2.439-5.203, 6.08-8.827", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/single_event_multi_identity_test/syn_60.wav", "onoffCaption": "cat meowing at 0.074-1.464, 3.742-5.068", "frequencyCaption": "cat meowing two times"} +{"filepath": "data/single_event_multi_identity_test/syn_67.wav", "onoffCaption": "cow mooing at 3.535-6.159", "frequencyCaption": "cow mooing one times"} +{"filepath": "data/single_event_multi_identity_test/syn_69.wav", "onoffCaption": "burping belching at 0.799-2.958", "frequencyCaption": "burping belching one times"} +{"filepath": "data/single_event_multi_identity_test/syn_73.wav", "onoffCaption": "whistling at 2.868-7.462", "frequencyCaption": "whistling one times"} +{"filepath": "data/single_event_multi_identity_test/syn_74.wav", "onoffCaption": "cat meowing at 1.655-3.045", "frequencyCaption": "cat meowing one times"} +{"filepath": "data/single_event_multi_identity_test/syn_82.wav", "onoffCaption": "thump thud at 1.925-4.6, 5.398-7.698", "frequencyCaption": "thump thud two times"} +{"filepath": "data/single_event_multi_identity_test/syn_91.wav", "onoffCaption": "duck quacking at 0.497-2.497", "frequencyCaption": "duck quacking one times"} +{"filepath": "data/single_event_multi_identity_test/syn_96.wav", "onoffCaption": "cat meowing at 0.044-1.37, 3.201-4.591, 5.458-6.848", "frequencyCaption": "cat meowing three times"} +{"filepath": "data/single_event_multi_identity_test/syn_98.wav", "onoffCaption": "woman laughing at 2.458-6.916, 7.905-10.0", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/single_event_multi_identity_test/syn_101.wav", "onoffCaption": "burping belching at 1.697-4.654, 5.403-7.562", "frequencyCaption": "burping belching two times"} +{"filepath": "data/single_event_multi_identity_test/syn_106.wav", "onoffCaption": "gunshot at 0.047-2.047", "frequencyCaption": "gunshot one times"} +{"filepath": "data/single_event_multi_identity_test/syn_108.wav", "onoffCaption": "cat meowing at 1.96-3.35, 4.662-5.988", "frequencyCaption": "cat meowing two times"} +{"filepath": "data/single_event_multi_identity_test/syn_112.wav", "onoffCaption": "train horn at 3.416-8.496", "frequencyCaption": "train horn one times"} +{"filepath": "data/single_event_multi_identity_test/syn_115.wav", "onoffCaption": "sheep goat bleating at 3.021-5.021, 6.26-8.26", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/single_event_multi_identity_test/syn_122.wav", "onoffCaption": "tapping clicking clanking at 1.126-4.566, 6.974-9.783", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/single_event_multi_identity_test/syn_125.wav", "onoffCaption": "car horn honking at 3.106-5.79, 6.31-9.271", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/single_event_multi_identity_test/syn_131.wav", "onoffCaption": "cow mooing at 2.423-5.047, 6.252-9.052", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/single_event_multi_identity_test/syn_136.wav", "onoffCaption": "tapping clicking clanking at 0.672-4.112, 5.733-7.916", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/single_event_multi_identity_test/syn_138.wav", "onoffCaption": "burping belching at 0.093-3.05, 3.962-6.121, 7.309-9.468", "frequencyCaption": "burping belching three times"} +{"filepath": "data/single_event_multi_identity_test/syn_140.wav", "onoffCaption": "duck quacking at 1.928-3.928, 5.108-6.926", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/single_event_multi_identity_test/syn_147.wav", "onoffCaption": "burping belching at 2.269-4.428, 5.085-8.042", "frequencyCaption": "burping belching two times"} +{"filepath": "data/single_event_multi_identity_test/syn_149.wav", "onoffCaption": "gunshot at 0.434-2.434", "frequencyCaption": "gunshot one times"} +{"filepath": "data/single_event_multi_identity_test/syn_153.wav", "onoffCaption": "cow mooing at 3.209-5.833, 6.681-9.481", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/single_event_multi_identity_test/syn_154.wav", "onoffCaption": "train horn at 3.48-6.48, 7.121-9.68", "frequencyCaption": "train horn two times"} +{"filepath": "data/single_event_multi_identity_test/syn_163.wav", "onoffCaption": "cow mooing at 1.335-3.959, 6.377-9.177", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/single_event_multi_identity_test/syn_164.wav", "onoffCaption": "door slamming at 3.391-4.802, 5.918-8.157", "frequencyCaption": "door slamming two times"} +{"filepath": "data/single_event_multi_identity_test/syn_170.wav", "onoffCaption": "whistling at 0.053-8.853", "frequencyCaption": "whistling one times"} +{"filepath": "data/single_event_multi_identity_test/syn_177.wav", "onoffCaption": "door knocking at 0.585-2.712, 4.192-6.756", "frequencyCaption": "door knocking two times"} +{"filepath": "data/single_event_multi_identity_test/syn_179.wav", "onoffCaption": "gunshot at 2.477-4.477", "frequencyCaption": "gunshot one times"} +{"filepath": "data/single_event_multi_identity_test/syn_181.wav", "onoffCaption": "door knocking at 2.753-5.317", "frequencyCaption": "door knocking one times"} +{"filepath": "data/single_event_multi_identity_test/syn_186.wav", "onoffCaption": "sneeze at 2.336-6.489, 7.757-10.0", "frequencyCaption": "sneeze two times"} +{"filepath": "data/single_event_multi_identity_test/syn_188.wav", "onoffCaption": "explosion at 1.933-6.855", "frequencyCaption": "explosion one times"} +{"filepath": "data/single_event_multi_identity_test/syn_192.wav", "onoffCaption": "cat meowing at 0.139-1.465, 2.845-4.235", "frequencyCaption": "cat meowing two times"} +{"filepath": "data/single_event_multi_identity_test/syn_195.wav", "onoffCaption": "duck quacking at 3.185-5.003, 5.701-7.701", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/single_event_multi_identity_test/syn_3.wav", "onoffCaption": "burping belching at 0.203-3.16, 3.696-5.855", "frequencyCaption": "burping belching two times"} +{"filepath": "data/single_event_multi_identity_test/syn_4.wav", "onoffCaption": "cat meowing at 1.562-2.888", "frequencyCaption": "cat meowing one times"} +{"filepath": "data/single_event_multi_identity_test/syn_13.wav", "onoffCaption": "tapping clicking clanking at 0.838-4.278, 4.839-6.935, 7.732-9.827", "frequencyCaption": "tapping clicking clanking three times"} +{"filepath": "data/single_event_multi_identity_test/syn_14.wav", "onoffCaption": "tapping clicking clanking at 0.51-3.95, 5.245-8.17", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/single_event_multi_identity_test/syn_23.wav", "onoffCaption": "cow mooing at 0.467-3.267, 4.388-7.012", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/single_event_multi_identity_test/syn_24.wav", "onoffCaption": "thump thud at 3.239-5.539, 6.108-8.783", "frequencyCaption": "thump thud two times"} +{"filepath": "data/single_event_multi_identity_test/syn_30.wav", "onoffCaption": "explosion at 2.75-5.502, 7.44-10.0", "frequencyCaption": "explosion two times"} +{"filepath": "data/single_event_multi_identity_test/syn_37.wav", "onoffCaption": "tapping clicking clanking at 2.357-5.797, 7.176-9.79", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/single_event_multi_identity_test/syn_39.wav", "onoffCaption": "burping belching at 1.038-3.197, 4.613-7.57", "frequencyCaption": "burping belching two times"} +{"filepath": "data/single_event_multi_identity_test/syn_41.wav", "onoffCaption": "car horn honking at 2.524-5.485, 6.594-9.278", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/single_event_multi_identity_test/syn_48.wav", "onoffCaption": "train horn at 2.211-7.291", "frequencyCaption": "train horn one times"} +{"filepath": "data/single_event_multi_identity_test/syn_52.wav", "onoffCaption": "dog barking at 2.157-4.157, 5.953-7.953", "frequencyCaption": "dog barking two times"} +{"filepath": "data/single_event_multi_identity_test/syn_55.wav", "onoffCaption": "spraying at 1.616-2.718, 3.653-4.449, 5.396-6.498", "frequencyCaption": "spraying three times"} +{"filepath": "data/single_event_multi_identity_test/syn_62.wav", "onoffCaption": "woman laughing at 0.881-5.339, 6.657-9.421", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/single_event_multi_identity_test/syn_65.wav", "onoffCaption": "tapping clicking clanking at 1.976-5.416, 6.573-9.12", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/single_event_multi_identity_test/syn_71.wav", "onoffCaption": "train horn at 2.442-7.522", "frequencyCaption": "train horn one times"} +{"filepath": "data/single_event_multi_identity_test/syn_76.wav", "onoffCaption": "door knocking at 0.618-3.182", "frequencyCaption": "door knocking one times"} +{"filepath": "data/single_event_multi_identity_test/syn_78.wav", "onoffCaption": "door knocking at 0.065-2.192, 3.439-6.003", "frequencyCaption": "door knocking two times"} +{"filepath": "data/single_event_multi_identity_test/syn_80.wav", "onoffCaption": "car horn honking at 3.533-6.494", "frequencyCaption": "car horn honking one times"} +{"filepath": "data/single_event_multi_identity_test/syn_85.wav", "onoffCaption": "gunshot at 1.931-3.931, 4.716-6.716, 7.891-9.891", "frequencyCaption": "gunshot three times"} +{"filepath": "data/single_event_multi_identity_test/syn_87.wav", "onoffCaption": "thump thud at 1.759-4.059, 6.133-8.808", "frequencyCaption": "thump thud two times"} +{"filepath": "data/single_event_multi_identity_test/syn_89.wav", "onoffCaption": "door knocking at 0.065-2.192, 3.164-5.728", "frequencyCaption": "door knocking two times"} +{"filepath": "data/single_event_multi_identity_test/syn_93.wav", "onoffCaption": "whistling at 0.042-8.842", "frequencyCaption": "whistling one times"} +{"filepath": "data/single_event_multi_identity_test/syn_94.wav", "onoffCaption": "burping belching at 0.167-2.326, 3.873-6.83", "frequencyCaption": "burping belching two times"} +{"filepath": "data/single_event_multi_identity_test/syn_103.wav", "onoffCaption": "dog barking at 1.282-3.282, 4.117-6.117, 6.789-8.789", "frequencyCaption": "dog barking three times"} +{"filepath": "data/single_event_multi_identity_test/syn_104.wav", "onoffCaption": "thump thud at 1.988-4.663, 7.028-9.328", "frequencyCaption": "thump thud two times"} +{"filepath": "data/single_event_multi_identity_test/syn_110.wav", "onoffCaption": "whistling at 1.555-6.149", "frequencyCaption": "whistling one times"} +{"filepath": "data/single_event_multi_identity_test/syn_117.wav", "onoffCaption": "tapping clicking clanking at 0.487-3.927", "frequencyCaption": "tapping clicking clanking one times"} +{"filepath": "data/single_event_multi_identity_test/syn_119.wav", "onoffCaption": "duck quacking at 2.537-4.355, 5.889-7.889", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/single_event_multi_identity_test/syn_120.wav", "onoffCaption": "dog barking at 0.013-2.013, 3.064-5.064, 5.694-7.694", "frequencyCaption": "dog barking three times"} +{"filepath": "data/single_event_multi_identity_test/syn_127.wav", "onoffCaption": "duck quacking at 0.78-2.78, 5.24-7.058", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/single_event_multi_identity_test/syn_129.wav", "onoffCaption": "burping belching at 1.965-4.922, 6.696-8.855", "frequencyCaption": "burping belching two times"} +{"filepath": "data/single_event_multi_identity_test/syn_133.wav", "onoffCaption": "train horn at 3.059-8.139", "frequencyCaption": "train horn one times"} +{"filepath": "data/single_event_multi_identity_test/syn_134.wav", "onoffCaption": "spraying at 0.184-0.98, 2.498-3.6, 4.402-5.198", "frequencyCaption": "spraying three times"} +{"filepath": "data/single_event_multi_identity_test/syn_142.wav", "onoffCaption": "cow mooing at 2.715-5.339, 6.568-9.368", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/single_event_multi_identity_test/syn_145.wav", "onoffCaption": "cow mooing at 0.071-2.695, 4.586-7.386", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/single_event_multi_identity_test/syn_151.wav", "onoffCaption": "duck quacking at 0.425-2.425, 4.73-6.548", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/single_event_multi_identity_test/syn_156.wav", "onoffCaption": "thump thud at 0.071-2.746, 3.838-6.138, 7.435-9.735", "frequencyCaption": "thump thud three times"} +{"filepath": "data/single_event_multi_identity_test/syn_158.wav", "onoffCaption": "burping belching at 0.027-2.186", "frequencyCaption": "burping belching one times"} +{"filepath": "data/single_event_multi_identity_test/syn_161.wav", "onoffCaption": "car horn honking at 0.937-3.898, 5.036-7.72", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/single_event_multi_identity_test/syn_166.wav", "onoffCaption": "burping belching at 0.323-3.28, 4.07-6.229, 7.049-9.208", "frequencyCaption": "burping belching three times"} +{"filepath": "data/single_event_multi_identity_test/syn_168.wav", "onoffCaption": "door slamming at 0.115-1.526, 2.595-4.834, 5.389-7.628", "frequencyCaption": "door slamming three times"} +{"filepath": "data/single_event_multi_identity_test/syn_172.wav", "onoffCaption": "woman laughing at 3.125-5.889", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/single_event_multi_identity_test/syn_175.wav", "onoffCaption": "spraying at 0.007-0.803", "frequencyCaption": "spraying one times"} +{"filepath": "data/single_event_multi_identity_test/syn_183.wav", "onoffCaption": "woman laughing at 2.259-6.717, 7.786-10.0", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/single_event_multi_identity_test/syn_184.wav", "onoffCaption": "door slamming at 3.397-4.808, 6.096-8.335", "frequencyCaption": "door slamming two times"} +{"filepath": "data/single_event_multi_identity_test/syn_190.wav", "onoffCaption": "explosion at 0.228-5.15, 6.074-8.826", "frequencyCaption": "explosion two times"} +{"filepath": "data/single_event_multi_identity_test/syn_197.wav", "onoffCaption": "car horn honking at 3.732-6.416, 7.567-10.0", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/single_event_multi_identity_test/syn_199.wav", "onoffCaption": "car horn honking at 1.911-4.872", "frequencyCaption": "car horn honking one times"} +{"filepath": "data/single_event_multi_identity_test/syn_200.wav", "onoffCaption": "train horn at 0.413-3.413", "frequencyCaption": "train horn one times"} +{"filepath": "data/single_event_multi_identity_test/syn_2.wav", "onoffCaption": "cat meowing at 1.299-2.689", "frequencyCaption": "cat meowing one times"} +{"filepath": "data/single_event_multi_identity_test/syn_5.wav", "onoffCaption": "dog barking at 3.791-5.791, 6.571-8.571", "frequencyCaption": "dog barking two times"} +{"filepath": "data/single_event_multi_identity_test/syn_12.wav", "onoffCaption": "tapping clicking clanking at 1.245-4.685", "frequencyCaption": "tapping clicking clanking one times"} +{"filepath": "data/single_event_multi_identity_test/syn_15.wav", "onoffCaption": "explosion at 3.815-6.567, 7.214-9.546", "frequencyCaption": "explosion two times"} +{"filepath": "data/single_event_multi_identity_test/syn_22.wav", "onoffCaption": "sheep goat bleating at 0.26-2.26, 3.592-5.592, 7.325-9.325", "frequencyCaption": "sheep goat bleating three times"} +{"filepath": "data/single_event_multi_identity_test/syn_25.wav", "onoffCaption": "gunshot at 0.166-2.166, 3.749-5.749", "frequencyCaption": "gunshot two times"} +{"filepath": "data/single_event_multi_identity_test/syn_31.wav", "onoffCaption": "sneeze at 3.917-8.07", "frequencyCaption": "sneeze one times"} +{"filepath": "data/single_event_multi_identity_test/syn_36.wav", "onoffCaption": "sheep goat bleating at 2.86-4.86, 7.119-9.119", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/single_event_multi_identity_test/syn_38.wav", "onoffCaption": "whistling at 2.996-7.59", "frequencyCaption": "whistling one times"} +{"filepath": "data/single_event_multi_identity_test/syn_40.wav", "onoffCaption": "woman laughing at 0.024-4.482, 5.882-8.646", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/single_event_multi_identity_test/syn_46.wav", "onoffCaption": "tapping clicking clanking at 2.067-5.507", "frequencyCaption": "tapping clicking clanking one times"} +{"filepath": "data/single_event_multi_identity_test/syn_47.wav", "onoffCaption": "cow mooing at 0.008-2.808, 3.956-6.58, 7.995-10.0", "frequencyCaption": "cow mooing three times"} +{"filepath": "data/single_event_multi_identity_test/syn_49.wav", "onoffCaption": "dog barking at 3.464-5.464", "frequencyCaption": "dog barking one times"} +{"filepath": "data/single_event_multi_identity_test/syn_51.wav", "onoffCaption": "whistling at 0.26-9.06", "frequencyCaption": "whistling one times"} +{"filepath": "data/single_event_multi_identity_test/syn_53.wav", "onoffCaption": "whistling at 0.748-5.342, 6.45-8.456", "frequencyCaption": "whistling two times"} +{"filepath": "data/single_event_multi_identity_test/syn_54.wav", "onoffCaption": "cow mooing at 0.48-3.28, 4.237-6.861", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/single_event_multi_identity_test/syn_63.wav", "onoffCaption": "explosion at 1.214-6.136", "frequencyCaption": "explosion one times"} +{"filepath": "data/single_event_multi_identity_test/syn_64.wav", "onoffCaption": "whistling at 0.012-4.606, 5.649-8.052", "frequencyCaption": "whistling two times"} +{"filepath": "data/single_event_multi_identity_test/syn_70.wav", "onoffCaption": "dog barking at 4.239-6.239", "frequencyCaption": "dog barking one times"} +{"filepath": "data/single_event_multi_identity_test/syn_77.wav", "onoffCaption": "train horn at 2.336-7.416", "frequencyCaption": "train horn one times"} +{"filepath": "data/single_event_multi_identity_test/syn_79.wav", "onoffCaption": "train horn at 2.15-7.23", "frequencyCaption": "train horn one times"} +{"filepath": "data/single_event_multi_identity_test/syn_81.wav", "onoffCaption": "tapping clicking clanking at 3.241-6.681", "frequencyCaption": "tapping clicking clanking one times"} +{"filepath": "data/single_event_multi_identity_test/syn_86.wav", "onoffCaption": "gunshot at 0.406-2.406, 4.136-6.136", "frequencyCaption": "gunshot two times"} +{"filepath": "data/single_event_multi_identity_test/syn_88.wav", "onoffCaption": "car horn honking at 0.051-3.012, 4.062-6.746, 7.319-10.0", "frequencyCaption": "car horn honking three times"} +{"filepath": "data/single_event_multi_identity_test/syn_92.wav", "onoffCaption": "door slamming at 1.032-2.443, 4.422-6.661", "frequencyCaption": "door slamming two times"} +{"filepath": "data/single_event_multi_identity_test/syn_95.wav", "onoffCaption": "woman laughing at 0.147-4.605, 5.939-8.703", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/single_event_multi_identity_test/syn_102.wav", "onoffCaption": "duck quacking at 0.363-2.363, 2.979-4.797", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/single_event_multi_identity_test/syn_105.wav", "onoffCaption": "door slamming at 0.253-1.664", "frequencyCaption": "door slamming one times"} +{"filepath": "data/single_event_multi_identity_test/syn_111.wav", "onoffCaption": "dog barking at 0.562-2.562, 4.25-6.25", "frequencyCaption": "dog barking two times"} +{"filepath": "data/single_event_multi_identity_test/syn_116.wav", "onoffCaption": "sheep goat bleating at 2.658-4.658", "frequencyCaption": "sheep goat bleating one times"} +{"filepath": "data/single_event_multi_identity_test/syn_118.wav", "onoffCaption": "sheep goat bleating at 2.634-4.634", "frequencyCaption": "sheep goat bleating one times"} +{"filepath": "data/single_event_multi_identity_test/syn_121.wav", "onoffCaption": "cat meowing at 2.182-3.508", "frequencyCaption": "cat meowing one times"} +{"filepath": "data/single_event_multi_identity_test/syn_123.wav", "onoffCaption": "sheep goat bleating at 2.042-4.042, 5.044-7.044", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/single_event_multi_identity_test/syn_126.wav", "onoffCaption": "burping belching at 0.139-3.096, 4.403-6.562", "frequencyCaption": "burping belching two times"} +{"filepath": "data/single_event_multi_identity_test/syn_128.wav", "onoffCaption": "train horn at 1.814-4.814", "frequencyCaption": "train horn one times"} +{"filepath": "data/single_event_multi_identity_test/syn_132.wav", "onoffCaption": "duck quacking at 1.582-3.582, 4.673-6.491", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/single_event_multi_identity_test/syn_135.wav", "onoffCaption": "whistling at 1.414-6.008, 7.012-9.463", "frequencyCaption": "whistling two times"} +{"filepath": "data/single_event_multi_identity_test/syn_139.wav", "onoffCaption": "spraying at 1.819-2.615, 3.181-4.283", "frequencyCaption": "spraying two times"} +{"filepath": "data/single_event_multi_identity_test/syn_143.wav", "onoffCaption": "door knocking at 0.495-3.059, 4.039-6.166, 7.128-9.692", "frequencyCaption": "door knocking three times"} +{"filepath": "data/single_event_multi_identity_test/syn_144.wav", "onoffCaption": "spraying at 0.584-1.686, 2.49-3.286, 3.892-4.688", "frequencyCaption": "spraying three times"} +{"filepath": "data/single_event_multi_identity_test/syn_150.wav", "onoffCaption": "duck quacking at 2.654-4.654", "frequencyCaption": "duck quacking one times"} +{"filepath": "data/single_event_multi_identity_test/syn_157.wav", "onoffCaption": "explosion at 2.478-5.23, 6.261-9.209", "frequencyCaption": "explosion two times"} +{"filepath": "data/single_event_multi_identity_test/syn_159.wav", "onoffCaption": "sneeze at 1.342-4.695, 6.662-9.384", "frequencyCaption": "sneeze two times"} +{"filepath": "data/single_event_multi_identity_test/syn_160.wav", "onoffCaption": "woman laughing at 0.352-3.116", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/single_event_multi_identity_test/syn_167.wav", "onoffCaption": "thump thud at 0.177-2.852, 4.459-6.759", "frequencyCaption": "thump thud two times"} +{"filepath": "data/single_event_multi_identity_test/syn_169.wav", "onoffCaption": "gunshot at 0.088-2.088", "frequencyCaption": "gunshot one times"} +{"filepath": "data/single_event_multi_identity_test/syn_173.wav", "onoffCaption": "explosion at 0.195-5.117", "frequencyCaption": "explosion one times"} +{"filepath": "data/single_event_multi_identity_test/syn_174.wav", "onoffCaption": "duck quacking at 0.089-2.089, 4.166-5.984", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/single_event_multi_identity_test/syn_176.wav", "onoffCaption": "gunshot at 3.54-5.54, 7.238-9.238", "frequencyCaption": "gunshot two times"} +{"filepath": "data/single_event_multi_identity_test/syn_182.wav", "onoffCaption": "car horn honking at 0.14-2.824", "frequencyCaption": "car horn honking one times"} +{"filepath": "data/single_event_multi_identity_test/syn_185.wav", "onoffCaption": "dog barking at 3.434-5.434, 6.333-8.333", "frequencyCaption": "dog barking two times"} +{"filepath": "data/single_event_multi_identity_test/syn_189.wav", "onoffCaption": "burping belching at 0.432-3.389, 4.403-6.562", "frequencyCaption": "burping belching two times"} +{"filepath": "data/single_event_multi_identity_test/syn_191.wav", "onoffCaption": "tapping clicking clanking at 2.168-5.608", "frequencyCaption": "tapping clicking clanking one times"} +{"filepath": "data/single_event_multi_identity_test/syn_193.wav", "onoffCaption": "dog barking at 3.219-5.219", "frequencyCaption": "dog barking one times"} +{"filepath": "data/single_event_multi_identity_test/syn_196.wav", "onoffCaption": "duck quacking at 1.8-3.618", "frequencyCaption": "duck quacking one times"} +{"filepath": "data/single_event_multi_identity_test/syn_198.wav", "onoffCaption": "sheep goat bleating at 0.073-2.073", "frequencyCaption": "sheep goat bleating one times"} +{"filepath": "data/single_event_multi_identity_test/syn_7.wav", "onoffCaption": "door slamming at 2.809-4.22, 6.263-8.502", "frequencyCaption": "door slamming two times"} +{"filepath": "data/single_event_multi_identity_test/syn_9.wav", "onoffCaption": "sneeze at 0.07-4.223, 4.927-7.216", "frequencyCaption": "sneeze two times"} +{"filepath": "data/single_event_multi_identity_test/syn_10.wav", "onoffCaption": "door slamming at 2.191-3.602", "frequencyCaption": "door slamming one times"} +{"filepath": "data/single_event_multi_identity_test/syn_17.wav", "onoffCaption": "gunshot at 0.033-2.033", "frequencyCaption": "gunshot one times"} +{"filepath": "data/single_event_multi_identity_test/syn_19.wav", "onoffCaption": "thump thud at 2.571-4.871, 6.726-9.401", "frequencyCaption": "thump thud two times"} +{"filepath": "data/single_event_multi_identity_test/syn_20.wav", "onoffCaption": "dog barking at 2.557-4.557, 5.093-7.093, 7.963-9.963", "frequencyCaption": "dog barking three times"} +{"filepath": "data/single_event_multi_identity_test/syn_27.wav", "onoffCaption": "whistling at 2.141-6.735, 7.84-10.0", "frequencyCaption": "whistling two times"} +{"filepath": "data/single_event_multi_identity_test/syn_29.wav", "onoffCaption": "woman laughing at 3.051-7.509", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/single_event_multi_identity_test/syn_33.wav", "onoffCaption": "dog barking at 1.397-3.397, 5.014-7.014", "frequencyCaption": "dog barking two times"} +{"filepath": "data/single_event_multi_identity_test/syn_34.wav", "onoffCaption": "dog barking at 0.691-2.691, 4.339-6.339, 7.597-9.597", "frequencyCaption": "dog barking three times"} +{"filepath": "data/single_event_multi_identity_test/syn_42.wav", "onoffCaption": "door slamming at 0.111-1.522, 2.919-5.158", "frequencyCaption": "door slamming two times"} +{"filepath": "data/single_event_multi_identity_test/syn_45.wav", "onoffCaption": "woman laughing at 0.913-5.371", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/single_event_multi_identity_test/syn_56.wav", "onoffCaption": "cat meowing at 3.25-4.576", "frequencyCaption": "cat meowing one times"} +{"filepath": "data/single_event_multi_identity_test/syn_58.wav", "onoffCaption": "spraying at 2.012-2.808", "frequencyCaption": "spraying one times"} +{"filepath": "data/single_event_multi_identity_test/syn_61.wav", "onoffCaption": "sheep goat bleating at 0.44-2.44, 3.141-5.141", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/single_event_multi_identity_test/syn_66.wav", "onoffCaption": "duck quacking at 0.199-2.199", "frequencyCaption": "duck quacking one times"} +{"filepath": "data/single_event_multi_identity_test/syn_68.wav", "onoffCaption": "door slamming at 0.555-1.966", "frequencyCaption": "door slamming one times"} +{"filepath": "data/single_event_multi_identity_test/syn_72.wav", "onoffCaption": "duck quacking at 3.008-5.008", "frequencyCaption": "duck quacking one times"} +{"filepath": "data/single_event_multi_identity_test/syn_75.wav", "onoffCaption": "door slamming at 2.007-4.246, 5.403-6.814, 7.324-9.563", "frequencyCaption": "door slamming three times"} +{"filepath": "data/single_event_multi_identity_test/syn_83.wav", "onoffCaption": "spraying at 0.42-1.522, 2.179-2.975, 4.216-5.012", "frequencyCaption": "spraying three times"} +{"filepath": "data/single_event_multi_identity_test/syn_84.wav", "onoffCaption": "burping belching at 1.998-4.955", "frequencyCaption": "burping belching one times"} +{"filepath": "data/single_event_multi_identity_test/syn_90.wav", "onoffCaption": "whistling at 0.292-9.092", "frequencyCaption": "whistling one times"} +{"filepath": "data/single_event_multi_identity_test/syn_97.wav", "onoffCaption": "dog barking at 1.995-3.995", "frequencyCaption": "dog barking one times"} +{"filepath": "data/single_event_multi_identity_test/syn_99.wav", "onoffCaption": "gunshot at 1.846-3.846, 5.067-7.067", "frequencyCaption": "gunshot two times"} +{"filepath": "data/single_event_multi_identity_test/syn_100.wav", "onoffCaption": "gunshot at 2.965-4.965, 5.836-7.836", "frequencyCaption": "gunshot two times"} +{"filepath": "data/single_event_multi_identity_test/syn_107.wav", "onoffCaption": "cat meowing at 0.382-1.772, 4.195-5.521, 7.481-8.871", "frequencyCaption": "cat meowing three times"} +{"filepath": "data/single_event_multi_identity_test/syn_109.wav", "onoffCaption": "cat meowing at 1.827-3.217, 5.396-6.722, 8.387-9.777", "frequencyCaption": "cat meowing three times"} +{"filepath": "data/single_event_multi_identity_test/syn_113.wav", "onoffCaption": "door slamming at 1.281-3.52, 4.645-6.056", "frequencyCaption": "door slamming two times"} +{"filepath": "data/single_event_multi_identity_test/syn_114.wav", "onoffCaption": "explosion at 2.267-7.189", "frequencyCaption": "explosion one times"} +{"filepath": "data/single_event_multi_identity_test/syn_124.wav", "onoffCaption": "woman laughing at 0.666-5.124, 7.521-10.0", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/single_event_multi_identity_test/syn_130.wav", "onoffCaption": "gunshot at 2.672-4.672", "frequencyCaption": "gunshot one times"} +{"filepath": "data/single_event_multi_identity_test/syn_137.wav", "onoffCaption": "train horn at 0.682-3.682, 4.465-6.698, 7.809-10.0", "frequencyCaption": "train horn three times"} +{"filepath": "data/single_event_multi_identity_test/syn_141.wav", "onoffCaption": "woman laughing at 0.105-2.869", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/single_event_multi_identity_test/syn_146.wav", "onoffCaption": "sneeze at 1.102-4.455", "frequencyCaption": "sneeze one times"} +{"filepath": "data/single_event_multi_identity_test/syn_148.wav", "onoffCaption": "dog barking at 0.061-2.061, 3.265-5.265, 6.197-8.197", "frequencyCaption": "dog barking three times"} +{"filepath": "data/single_event_multi_identity_test/syn_152.wav", "onoffCaption": "dog barking at 0.127-2.127", "frequencyCaption": "dog barking one times"} +{"filepath": "data/single_event_multi_identity_test/syn_155.wav", "onoffCaption": "spraying at 3.549-4.651", "frequencyCaption": "spraying one times"} +{"filepath": "data/single_event_multi_identity_test/syn_162.wav", "onoffCaption": "explosion at 0.391-3.143, 3.673-5.706", "frequencyCaption": "explosion two times"} +{"filepath": "data/single_event_multi_identity_test/syn_165.wav", "onoffCaption": "whistling at 3.448-8.042", "frequencyCaption": "whistling one times"} +{"filepath": "data/single_event_multi_identity_test/syn_171.wav", "onoffCaption": "duck quacking at 2.752-4.752", "frequencyCaption": "duck quacking one times"} +{"filepath": "data/single_event_multi_identity_test/syn_178.wav", "onoffCaption": "tapping clicking clanking at 1.713-5.153, 6.827-9.222", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/single_event_multi_identity_test/syn_180.wav", "onoffCaption": "cow mooing at 0.181-2.981", "frequencyCaption": "cow mooing one times"} +{"filepath": "data/single_event_multi_identity_test/syn_187.wav", "onoffCaption": "explosion at 2.424-5.176", "frequencyCaption": "explosion one times"} +{"filepath": "data/single_event_multi_identity_test/syn_194.wav", "onoffCaption": "gunshot at 2.339-4.339", "frequencyCaption": "gunshot one times"} +{"filepath": "data/single_event_single_identity_test/syn_11.wav", "onoffCaption": "door knocking at 3.808-5.935, 6.708-8.835", "frequencyCaption": "door knocking two times"} +{"filepath": "data/single_event_single_identity_test/syn_16.wav", "onoffCaption": "burping belching at 2.569-5.526", "frequencyCaption": "burping belching one times"} +{"filepath": "data/single_event_single_identity_test/syn_18.wav", "onoffCaption": "burping belching at 2.907-5.066", "frequencyCaption": "burping belching one times"} +{"filepath": "data/single_event_single_identity_test/syn_21.wav", "onoffCaption": "burping belching at 0.64-2.799", "frequencyCaption": "burping belching one times"} +{"filepath": "data/single_event_single_identity_test/syn_26.wav", "onoffCaption": "spraying at 0.127-0.923", "frequencyCaption": "spraying one times"} +{"filepath": "data/single_event_single_identity_test/syn_28.wav", "onoffCaption": "train horn at 3.589-8.669", "frequencyCaption": "train horn one times"} +{"filepath": "data/single_event_single_identity_test/syn_32.wav", "onoffCaption": "gunshot at 1.173-3.173, 3.96-5.96, 6.617-8.617", "frequencyCaption": "gunshot three times"} +{"filepath": "data/single_event_single_identity_test/syn_35.wav", "onoffCaption": "woman laughing at 0.948-5.406, 7.602-10.0", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/single_event_single_identity_test/syn_43.wav", "onoffCaption": "door slamming at 3.246-4.657, 6.312-7.723", "frequencyCaption": "door slamming two times"} +{"filepath": "data/single_event_single_identity_test/syn_44.wav", "onoffCaption": "dog barking at 1.211-3.211, 4.206-6.206, 6.728-8.728", "frequencyCaption": "dog barking three times"} +{"filepath": "data/single_event_single_identity_test/syn_50.wav", "onoffCaption": "door knocking at 0.488-3.052, 5.244-7.808", "frequencyCaption": "door knocking two times"} +{"filepath": "data/single_event_single_identity_test/syn_57.wav", "onoffCaption": "train horn at 0.177-5.257", "frequencyCaption": "train horn one times"} +{"filepath": "data/single_event_single_identity_test/syn_59.wav", "onoffCaption": "gunshot at 0.24-2.24, 3.277-5.277, 7.394-9.394", "frequencyCaption": "gunshot three times"} +{"filepath": "data/single_event_single_identity_test/syn_60.wav", "onoffCaption": "cow mooing at 1.847-4.471, 6.336-8.96", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/single_event_single_identity_test/syn_67.wav", "onoffCaption": "cow mooing at 2.819-5.443, 6.06-8.684", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/single_event_single_identity_test/syn_69.wav", "onoffCaption": "burping belching at 1.971-4.928, 6.428-9.385", "frequencyCaption": "burping belching two times"} +{"filepath": "data/single_event_single_identity_test/syn_73.wav", "onoffCaption": "dog barking at 0.094-2.094, 3.294-5.294, 6.771-8.771", "frequencyCaption": "dog barking three times"} +{"filepath": "data/single_event_single_identity_test/syn_74.wav", "onoffCaption": "cow mooing at 2.351-4.975, 5.558-8.182", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/single_event_single_identity_test/syn_82.wav", "onoffCaption": "woman laughing at 2.876-7.334", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/single_event_single_identity_test/syn_85.wav", "onoffCaption": "dog barking at 2.785-4.785", "frequencyCaption": "dog barking one times"} +{"filepath": "data/single_event_single_identity_test/syn_91.wav", "onoffCaption": "tapping clicking clanking at 1.295-4.735", "frequencyCaption": "tapping clicking clanking one times"} +{"filepath": "data/single_event_single_identity_test/syn_96.wav", "onoffCaption": "door knocking at 0.452-2.579", "frequencyCaption": "door knocking one times"} +{"filepath": "data/single_event_single_identity_test/syn_98.wav", "onoffCaption": "door slamming at 2.339-4.578", "frequencyCaption": "door slamming one times"} +{"filepath": "data/single_event_single_identity_test/syn_101.wav", "onoffCaption": "spraying at 0.013-1.115, 1.805-2.907, 5.09-6.192", "frequencyCaption": "spraying three times"} +{"filepath": "data/single_event_single_identity_test/syn_106.wav", "onoffCaption": "spraying at 2.518-3.314", "frequencyCaption": "spraying one times"} +{"filepath": "data/single_event_single_identity_test/syn_108.wav", "onoffCaption": "gunshot at 3.946-5.946, 6.959-8.959", "frequencyCaption": "gunshot two times"} +{"filepath": "data/single_event_single_identity_test/syn_112.wav", "onoffCaption": "burping belching at 3.346-6.303, 7.74-10.0", "frequencyCaption": "burping belching two times"} +{"filepath": "data/single_event_single_identity_test/syn_115.wav", "onoffCaption": "explosion at 0.084-5.006", "frequencyCaption": "explosion one times"} +{"filepath": "data/single_event_single_identity_test/syn_122.wav", "onoffCaption": "tapping clicking clanking at 0.407-3.847", "frequencyCaption": "tapping clicking clanking one times"} +{"filepath": "data/single_event_single_identity_test/syn_125.wav", "onoffCaption": "explosion at 0.371-3.123, 5.335-8.087", "frequencyCaption": "explosion two times"} +{"filepath": "data/single_event_single_identity_test/syn_131.wav", "onoffCaption": "door slamming at 0.346-1.757, 2.569-3.98, 5.839-7.25", "frequencyCaption": "door slamming three times"} +{"filepath": "data/single_event_single_identity_test/syn_136.wav", "onoffCaption": "car horn honking at 0.066-2.75", "frequencyCaption": "car horn honking one times"} +{"filepath": "data/single_event_single_identity_test/syn_138.wav", "onoffCaption": "explosion at 2.129-4.881", "frequencyCaption": "explosion one times"} +{"filepath": "data/single_event_single_identity_test/syn_140.wav", "onoffCaption": "train horn at 1.872-6.952, 7.829-10.0", "frequencyCaption": "train horn two times"} +{"filepath": "data/single_event_single_identity_test/syn_147.wav", "onoffCaption": "explosion at 0.38-3.132, 4.352-7.104, 7.977-10.0", "frequencyCaption": "explosion three times"} +{"filepath": "data/single_event_single_identity_test/syn_149.wav", "onoffCaption": "spraying at 0.031-1.133, 1.86-2.962, 3.961-5.063", "frequencyCaption": "spraying three times"} +{"filepath": "data/single_event_single_identity_test/syn_153.wav", "onoffCaption": "dog barking at 0.435-2.435, 4.016-6.016", "frequencyCaption": "dog barking two times"} +{"filepath": "data/single_event_single_identity_test/syn_154.wav", "onoffCaption": "explosion at 1.704-6.626", "frequencyCaption": "explosion one times"} +{"filepath": "data/single_event_single_identity_test/syn_163.wav", "onoffCaption": "sneeze at 2.736-6.889", "frequencyCaption": "sneeze one times"} +{"filepath": "data/single_event_single_identity_test/syn_164.wav", "onoffCaption": "sneeze at 2.624-6.777", "frequencyCaption": "sneeze one times"} +{"filepath": "data/single_event_single_identity_test/syn_170.wav", "onoffCaption": "burping belching at 3.451-6.408", "frequencyCaption": "burping belching one times"} +{"filepath": "data/single_event_single_identity_test/syn_175.wav", "onoffCaption": "explosion at 1.902-6.824", "frequencyCaption": "explosion one times"} +{"filepath": "data/single_event_single_identity_test/syn_177.wav", "onoffCaption": "door knocking at 3.219-5.346, 7.058-9.185", "frequencyCaption": "door knocking two times"} +{"filepath": "data/single_event_single_identity_test/syn_179.wav", "onoffCaption": "explosion at 2.521-5.273", "frequencyCaption": "explosion one times"} +{"filepath": "data/single_event_single_identity_test/syn_181.wav", "onoffCaption": "train horn at 0.212-3.212", "frequencyCaption": "train horn one times"} +{"filepath": "data/single_event_single_identity_test/syn_186.wav", "onoffCaption": "sheep goat bleating at 0.651-2.651, 3.512-5.512", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/single_event_single_identity_test/syn_188.wav", "onoffCaption": "whistling at 0.87-9.67", "frequencyCaption": "whistling one times"} +{"filepath": "data/single_event_single_identity_test/syn_190.wav", "onoffCaption": "woman laughing at 0.484-3.248, 4.163-6.927", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/single_event_single_identity_test/syn_192.wav", "onoffCaption": "door knocking at 1.863-3.99, 5.187-7.314", "frequencyCaption": "door knocking two times"} +{"filepath": "data/single_event_single_identity_test/syn_195.wav", "onoffCaption": "cow mooing at 0.958-3.582, 5.272-7.896", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/single_event_single_identity_test/syn_13.wav", "onoffCaption": "tapping clicking clanking at 3.109-6.549", "frequencyCaption": "tapping clicking clanking one times"} +{"filepath": "data/single_event_single_identity_test/syn_14.wav", "onoffCaption": "woman laughing at 0.127-2.891", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/single_event_single_identity_test/syn_23.wav", "onoffCaption": "whistling at 0.074-8.874", "frequencyCaption": "whistling one times"} +{"filepath": "data/single_event_single_identity_test/syn_24.wav", "onoffCaption": "dog barking at 0.91-2.91", "frequencyCaption": "dog barking one times"} +{"filepath": "data/single_event_single_identity_test/syn_30.wav", "onoffCaption": "whistling at 0.978-5.572", "frequencyCaption": "whistling one times"} +{"filepath": "data/single_event_single_identity_test/syn_37.wav", "onoffCaption": "whistling at 2.107-6.701", "frequencyCaption": "whistling one times"} +{"filepath": "data/single_event_single_identity_test/syn_39.wav", "onoffCaption": "whistling at 0.165-4.759, 5.362-9.956", "frequencyCaption": "whistling two times"} +{"filepath": "data/single_event_single_identity_test/syn_41.wav", "onoffCaption": "sheep goat bleating at 0.023-2.023, 3.507-5.507", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/single_event_single_identity_test/syn_46.wav", "onoffCaption": "car horn honking at 1.978-4.939, 5.578-8.539", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/single_event_single_identity_test/syn_48.wav", "onoffCaption": "thump thud at 1.392-4.067, 5.357-8.032", "frequencyCaption": "thump thud two times"} +{"filepath": "data/single_event_single_identity_test/syn_52.wav", "onoffCaption": "dog barking at 0.25-2.25, 3.486-5.486, 6.439-8.439", "frequencyCaption": "dog barking three times"} +{"filepath": "data/single_event_single_identity_test/syn_55.wav", "onoffCaption": "gunshot at 2.722-4.722, 6.936-8.936", "frequencyCaption": "gunshot two times"} +{"filepath": "data/single_event_single_identity_test/syn_62.wav", "onoffCaption": "burping belching at 0.459-3.416, 4.188-7.145", "frequencyCaption": "burping belching two times"} +{"filepath": "data/single_event_single_identity_test/syn_65.wav", "onoffCaption": "sheep goat bleating at 0.55-2.55, 4.457-6.457", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/single_event_single_identity_test/syn_71.wav", "onoffCaption": "tapping clicking clanking at 3.396-6.836", "frequencyCaption": "tapping clicking clanking one times"} +{"filepath": "data/single_event_single_identity_test/syn_76.wav", "onoffCaption": "sheep goat bleating at 0.056-2.056, 3.47-5.47", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/single_event_single_identity_test/syn_78.wav", "onoffCaption": "train horn at 0.083-5.163, 6.748-10.0", "frequencyCaption": "train horn two times"} +{"filepath": "data/single_event_single_identity_test/syn_80.wav", "onoffCaption": "whistling at 1.269-5.863, 6.498-10.0", "frequencyCaption": "whistling two times"} +{"filepath": "data/single_event_single_identity_test/syn_87.wav", "onoffCaption": "car horn honking at 1.885-4.569, 5.797-8.481", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/single_event_single_identity_test/syn_89.wav", "onoffCaption": "train horn at 0.507-3.507", "frequencyCaption": "train horn one times"} +{"filepath": "data/single_event_single_identity_test/syn_93.wav", "onoffCaption": "dog barking at 3.063-5.063, 6.381-8.381", "frequencyCaption": "dog barking two times"} +{"filepath": "data/single_event_single_identity_test/syn_94.wav", "onoffCaption": "duck quacking at 2.332-4.15", "frequencyCaption": "duck quacking one times"} +{"filepath": "data/single_event_single_identity_test/syn_103.wav", "onoffCaption": "gunshot at 1.066-3.066", "frequencyCaption": "gunshot one times"} +{"filepath": "data/single_event_single_identity_test/syn_104.wav", "onoffCaption": "cat meowing at 0.488-1.878, 4.297-5.687, 6.263-7.653", "frequencyCaption": "cat meowing three times"} +{"filepath": "data/single_event_single_identity_test/syn_110.wav", "onoffCaption": "whistling at 0.407-5.001", "frequencyCaption": "whistling one times"} +{"filepath": "data/single_event_single_identity_test/syn_117.wav", "onoffCaption": "cat meowing at 1.091-2.481, 3.509-4.899", "frequencyCaption": "cat meowing two times"} +{"filepath": "data/single_event_single_identity_test/syn_119.wav", "onoffCaption": "car horn honking at 0.202-2.886", "frequencyCaption": "car horn honking one times"} +{"filepath": "data/single_event_single_identity_test/syn_120.wav", "onoffCaption": "door knocking at 2.729-5.293", "frequencyCaption": "door knocking one times"} +{"filepath": "data/single_event_single_identity_test/syn_127.wav", "onoffCaption": "sheep goat bleating at 2.262-4.262, 5.801-7.801", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/single_event_single_identity_test/syn_129.wav", "onoffCaption": "sheep goat bleating at 0.602-2.602, 4.548-6.548, 7.151-9.151", "frequencyCaption": "sheep goat bleating three times"} +{"filepath": "data/single_event_single_identity_test/syn_133.wav", "onoffCaption": "gunshot at 1.679-3.679, 5.98-7.98", "frequencyCaption": "gunshot two times"} +{"filepath": "data/single_event_single_identity_test/syn_134.wav", "onoffCaption": "sheep goat bleating at 0.091-2.091, 3.322-5.322", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/single_event_single_identity_test/syn_142.wav", "onoffCaption": "dog barking at 0.622-2.622, 5.087-7.087", "frequencyCaption": "dog barking two times"} +{"filepath": "data/single_event_single_identity_test/syn_145.wav", "onoffCaption": "train horn at 2.269-5.269", "frequencyCaption": "train horn one times"} +{"filepath": "data/single_event_single_identity_test/syn_151.wav", "onoffCaption": "burping belching at 0.193-2.352", "frequencyCaption": "burping belching one times"} +{"filepath": "data/single_event_single_identity_test/syn_156.wav", "onoffCaption": "cow mooing at 1.573-4.373", "frequencyCaption": "cow mooing one times"} +{"filepath": "data/single_event_single_identity_test/syn_158.wav", "onoffCaption": "door knocking at 1.174-3.301", "frequencyCaption": "door knocking one times"} +{"filepath": "data/single_event_single_identity_test/syn_161.wav", "onoffCaption": "spraying at 0.159-1.261, 2.033-3.135, 4.44-5.542", "frequencyCaption": "spraying three times"} +{"filepath": "data/single_event_single_identity_test/syn_166.wav", "onoffCaption": "tapping clicking clanking at 1.641-5.081, 6.146-9.586", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/single_event_single_identity_test/syn_168.wav", "onoffCaption": "explosion at 3.277-8.199", "frequencyCaption": "explosion one times"} +{"filepath": "data/single_event_single_identity_test/syn_172.wav", "onoffCaption": "gunshot at 1.58-3.58", "frequencyCaption": "gunshot one times"} +{"filepath": "data/single_event_single_identity_test/syn_183.wav", "onoffCaption": "duck quacking at 0.511-2.511", "frequencyCaption": "duck quacking one times"} +{"filepath": "data/single_event_single_identity_test/syn_184.wav", "onoffCaption": "spraying at 0.044-0.84", "frequencyCaption": "spraying one times"} +{"filepath": "data/single_event_single_identity_test/syn_197.wav", "onoffCaption": "sheep goat bleating at 2.317-4.317, 6.052-8.052", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/single_event_single_identity_test/syn_199.wav", "onoffCaption": "dog barking at 3.728-5.728, 6.93-8.93", "frequencyCaption": "dog barking two times"} +{"filepath": "data/single_event_single_identity_test/syn_200.wav", "onoffCaption": "thump thud at 1.717-4.017, 5.949-8.249", "frequencyCaption": "thump thud two times"} +{"filepath": "data/single_event_single_identity_test/syn_12.wav", "onoffCaption": "dog barking at 2.048-4.048", "frequencyCaption": "dog barking one times"} +{"filepath": "data/single_event_single_identity_test/syn_15.wav", "onoffCaption": "dog barking at 0.046-2.046, 4.09-6.09", "frequencyCaption": "dog barking two times"} +{"filepath": "data/single_event_single_identity_test/syn_22.wav", "onoffCaption": "whistling at 2.136-6.73", "frequencyCaption": "whistling one times"} +{"filepath": "data/single_event_single_identity_test/syn_25.wav", "onoffCaption": "explosion at 1.944-4.696, 6.227-8.979", "frequencyCaption": "explosion two times"} +{"filepath": "data/single_event_single_identity_test/syn_31.wav", "onoffCaption": "gunshot at 0.269-2.269, 3.559-5.559, 6.243-8.243", "frequencyCaption": "gunshot three times"} +{"filepath": "data/single_event_single_identity_test/syn_36.wav", "onoffCaption": "dog barking at 0.991-2.991", "frequencyCaption": "dog barking one times"} +{"filepath": "data/single_event_single_identity_test/syn_38.wav", "onoffCaption": "dog barking at 3.368-5.368, 6.043-8.043", "frequencyCaption": "dog barking two times"} +{"filepath": "data/single_event_single_identity_test/syn_40.wav", "onoffCaption": "sheep goat bleating at 0.185-2.185", "frequencyCaption": "sheep goat bleating one times"} +{"filepath": "data/single_event_single_identity_test/syn_47.wav", "onoffCaption": "door slamming at 0.106-2.345, 2.885-5.124, 5.997-8.236", "frequencyCaption": "door slamming three times"} +{"filepath": "data/single_event_single_identity_test/syn_49.wav", "onoffCaption": "duck quacking at 0.37-2.37", "frequencyCaption": "duck quacking one times"} +{"filepath": "data/single_event_single_identity_test/syn_51.wav", "onoffCaption": "cat meowing at 0.245-1.571, 3.125-4.451, 5.016-6.342", "frequencyCaption": "cat meowing three times"} +{"filepath": "data/single_event_single_identity_test/syn_53.wav", "onoffCaption": "cat meowing at 0.277-1.603", "frequencyCaption": "cat meowing one times"} +{"filepath": "data/single_event_single_identity_test/syn_54.wav", "onoffCaption": "gunshot at 0.17-2.17, 4.644-6.644", "frequencyCaption": "gunshot two times"} +{"filepath": "data/single_event_single_identity_test/syn_63.wav", "onoffCaption": "door slamming at 1.788-4.027", "frequencyCaption": "door slamming one times"} +{"filepath": "data/single_event_single_identity_test/syn_64.wav", "onoffCaption": "sheep goat bleating at 1.736-3.736, 4.735-6.735, 7.944-9.944", "frequencyCaption": "sheep goat bleating three times"} +{"filepath": "data/single_event_single_identity_test/syn_70.wav", "onoffCaption": "sneeze at 0.231-4.384, 5.433-9.586", "frequencyCaption": "sneeze two times"} +{"filepath": "data/single_event_single_identity_test/syn_77.wav", "onoffCaption": "dog barking at 3.416-5.416, 5.973-7.973", "frequencyCaption": "dog barking two times"} +{"filepath": "data/single_event_single_identity_test/syn_79.wav", "onoffCaption": "tapping clicking clanking at 0.931-4.371", "frequencyCaption": "tapping clicking clanking one times"} +{"filepath": "data/single_event_single_identity_test/syn_81.wav", "onoffCaption": "spraying at 2.201-2.997", "frequencyCaption": "spraying one times"} +{"filepath": "data/single_event_single_identity_test/syn_86.wav", "onoffCaption": "door knocking at 0.221-2.785", "frequencyCaption": "door knocking one times"} +{"filepath": "data/single_event_single_identity_test/syn_88.wav", "onoffCaption": "cow mooing at 2.087-4.887, 6.12-8.92", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/single_event_single_identity_test/syn_92.wav", "onoffCaption": "train horn at 0.429-5.509, 6.408-10.0", "frequencyCaption": "train horn two times"} +{"filepath": "data/single_event_single_identity_test/syn_95.wav", "onoffCaption": "thump thud at 2.906-5.581", "frequencyCaption": "thump thud one times"} +{"filepath": "data/single_event_single_identity_test/syn_102.wav", "onoffCaption": "thump thud at 2.581-4.881, 6.222-8.522", "frequencyCaption": "thump thud two times"} +{"filepath": "data/single_event_single_identity_test/syn_105.wav", "onoffCaption": "door slamming at 0.833-3.072, 4.449-6.688", "frequencyCaption": "door slamming two times"} +{"filepath": "data/single_event_single_identity_test/syn_111.wav", "onoffCaption": "door knocking at 1.124-3.688, 6.152-8.716", "frequencyCaption": "door knocking two times"} +{"filepath": "data/single_event_single_identity_test/syn_116.wav", "onoffCaption": "gunshot at 0.875-2.875, 4.735-6.735", "frequencyCaption": "gunshot two times"} +{"filepath": "data/single_event_single_identity_test/syn_118.wav", "onoffCaption": "cat meowing at 0.483-1.809", "frequencyCaption": "cat meowing one times"} +{"filepath": "data/single_event_single_identity_test/syn_121.wav", "onoffCaption": "door knocking at 1.619-4.183", "frequencyCaption": "door knocking one times"} +{"filepath": "data/single_event_single_identity_test/syn_126.wav", "onoffCaption": "sheep goat bleating at 3.885-5.885, 7.836-9.836", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/single_event_single_identity_test/syn_128.wav", "onoffCaption": "tapping clicking clanking at 2.571-6.011", "frequencyCaption": "tapping clicking clanking one times"} +{"filepath": "data/single_event_single_identity_test/syn_132.wav", "onoffCaption": "cat meowing at 2.927-4.317, 5.007-6.397, 6.922-8.312", "frequencyCaption": "cat meowing three times"} +{"filepath": "data/single_event_single_identity_test/syn_135.wav", "onoffCaption": "door slamming at 3.195-5.434, 6.893-9.132", "frequencyCaption": "door slamming two times"} +{"filepath": "data/single_event_single_identity_test/syn_139.wav", "onoffCaption": "duck quacking at 2.765-4.583, 6.906-8.724", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/single_event_single_identity_test/syn_143.wav", "onoffCaption": "cat meowing at 2.231-3.621", "frequencyCaption": "cat meowing one times"} +{"filepath": "data/single_event_single_identity_test/syn_144.wav", "onoffCaption": "cow mooing at 0.562-3.186, 4.31-6.934", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/single_event_single_identity_test/syn_150.wav", "onoffCaption": "dog barking at 0.436-2.436", "frequencyCaption": "dog barking one times"} +{"filepath": "data/single_event_single_identity_test/syn_157.wav", "onoffCaption": "sneeze at 3.222-7.375", "frequencyCaption": "sneeze one times"} +{"filepath": "data/single_event_single_identity_test/syn_159.wav", "onoffCaption": "sneeze at 2.417-6.57", "frequencyCaption": "sneeze one times"} +{"filepath": "data/single_event_single_identity_test/syn_160.wav", "onoffCaption": "tapping clicking clanking at 0.262-3.702, 5.703-9.143", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/single_event_single_identity_test/syn_167.wav", "onoffCaption": "cat meowing at 0.205-1.595, 2.703-4.093", "frequencyCaption": "cat meowing two times"} +{"filepath": "data/single_event_single_identity_test/syn_169.wav", "onoffCaption": "train horn at 3.293-8.373", "frequencyCaption": "train horn one times"} +{"filepath": "data/single_event_single_identity_test/syn_173.wav", "onoffCaption": "thump thud at 3.392-5.692", "frequencyCaption": "thump thud one times"} +{"filepath": "data/single_event_single_identity_test/syn_174.wav", "onoffCaption": "cat meowing at 2.478-3.804, 4.701-6.027, 7.098-8.424", "frequencyCaption": "cat meowing three times"} +{"filepath": "data/single_event_single_identity_test/syn_182.wav", "onoffCaption": "door knocking at 2.598-4.725, 5.428-7.555", "frequencyCaption": "door knocking two times"} +{"filepath": "data/single_event_single_identity_test/syn_185.wav", "onoffCaption": "gunshot at 3.329-5.329, 6.811-8.811", "frequencyCaption": "gunshot two times"} +{"filepath": "data/single_event_single_identity_test/syn_189.wav", "onoffCaption": "door knocking at 2.566-4.693", "frequencyCaption": "door knocking one times"} +{"filepath": "data/single_event_single_identity_test/syn_191.wav", "onoffCaption": "cow mooing at 2.094-4.894", "frequencyCaption": "cow mooing one times"} +{"filepath": "data/single_event_single_identity_test/syn_196.wav", "onoffCaption": "door knocking at 0.398-2.525, 3.558-5.685, 6.802-8.929", "frequencyCaption": "door knocking three times"} +{"filepath": "data/single_event_single_identity_test/syn_198.wav", "onoffCaption": "explosion at 3.575-6.327", "frequencyCaption": "explosion one times"} +{"filepath": "data/single_event_single_identity_test/syn_10.wav", "onoffCaption": "duck quacking at 0.107-1.925", "frequencyCaption": "duck quacking one times"} +{"filepath": "data/single_event_single_identity_test/syn_17.wav", "onoffCaption": "burping belching at 0.839-2.998, 4.442-6.601", "frequencyCaption": "burping belching two times"} +{"filepath": "data/single_event_single_identity_test/syn_19.wav", "onoffCaption": "cat meowing at 2.357-3.683, 5.023-6.349", "frequencyCaption": "cat meowing two times"} +{"filepath": "data/single_event_single_identity_test/syn_20.wav", "onoffCaption": "tapping clicking clanking at 2.446-5.886, 7.886-10.0", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/single_event_single_identity_test/syn_27.wav", "onoffCaption": "spraying at 0.301-1.403, 2.423-3.525, 4.539-5.641", "frequencyCaption": "spraying three times"} +{"filepath": "data/single_event_single_identity_test/syn_29.wav", "onoffCaption": "tapping clicking clanking at 0.69-4.13, 5.59-9.03", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/single_event_single_identity_test/syn_33.wav", "onoffCaption": "train horn at 2.016-7.096", "frequencyCaption": "train horn one times"} +{"filepath": "data/single_event_single_identity_test/syn_34.wav", "onoffCaption": "burping belching at 3.636-5.795, 7.726-9.885", "frequencyCaption": "burping belching two times"} +{"filepath": "data/single_event_single_identity_test/syn_42.wav", "onoffCaption": "dog barking at 2.092-4.092", "frequencyCaption": "dog barking one times"} +{"filepath": "data/single_event_single_identity_test/syn_45.wav", "onoffCaption": "cat meowing at 2.902-4.228", "frequencyCaption": "cat meowing one times"} +{"filepath": "data/single_event_single_identity_test/syn_56.wav", "onoffCaption": "train horn at 0.125-3.125", "frequencyCaption": "train horn one times"} +{"filepath": "data/single_event_single_identity_test/syn_58.wav", "onoffCaption": "duck quacking at 0.179-2.179, 4.629-6.629", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/single_event_single_identity_test/syn_61.wav", "onoffCaption": "spraying at 2.685-3.787", "frequencyCaption": "spraying one times"} +{"filepath": "data/single_event_single_identity_test/syn_66.wav", "onoffCaption": "cat meowing at 0.1-1.426, 2.691-4.017", "frequencyCaption": "cat meowing two times"} +{"filepath": "data/single_event_single_identity_test/syn_68.wav", "onoffCaption": "duck quacking at 0.259-2.077", "frequencyCaption": "duck quacking one times"} +{"filepath": "data/single_event_single_identity_test/syn_72.wav", "onoffCaption": "sneeze at 0.32-3.673, 4.809-8.162", "frequencyCaption": "sneeze two times"} +{"filepath": "data/single_event_single_identity_test/syn_75.wav", "onoffCaption": "door slamming at 3.048-4.459, 6.382-7.793", "frequencyCaption": "door slamming two times"} +{"filepath": "data/single_event_single_identity_test/syn_83.wav", "onoffCaption": "dog barking at 1.005-3.005, 5.367-7.367", "frequencyCaption": "dog barking two times"} +{"filepath": "data/single_event_single_identity_test/syn_84.wav", "onoffCaption": "woman laughing at 0.34-4.798, 6.685-10.0", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/single_event_single_identity_test/syn_90.wav", "onoffCaption": "dog barking at 0.965-2.965, 3.842-5.842, 7.713-9.713", "frequencyCaption": "dog barking three times"} +{"filepath": "data/single_event_single_identity_test/syn_97.wav", "onoffCaption": "gunshot at 1.924-3.924", "frequencyCaption": "gunshot one times"} +{"filepath": "data/single_event_single_identity_test/syn_99.wav", "onoffCaption": "door knocking at 3.167-5.294, 6.941-9.068", "frequencyCaption": "door knocking two times"} +{"filepath": "data/single_event_single_identity_test/syn_100.wav", "onoffCaption": "burping belching at 2.361-4.52, 5.23-7.389", "frequencyCaption": "burping belching two times"} +{"filepath": "data/single_event_single_identity_test/syn_107.wav", "onoffCaption": "woman laughing at 2.849-5.613, 6.83-9.594", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/single_event_single_identity_test/syn_109.wav", "onoffCaption": "cat meowing at 0.321-1.647, 2.314-3.64, 4.695-6.021", "frequencyCaption": "cat meowing three times"} +{"filepath": "data/single_event_single_identity_test/syn_113.wav", "onoffCaption": "duck quacking at 1.194-3.012", "frequencyCaption": "duck quacking one times"} +{"filepath": "data/single_event_single_identity_test/syn_114.wav", "onoffCaption": "duck quacking at 0.737-2.737, 3.972-5.972", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/single_event_single_identity_test/syn_123.wav", "onoffCaption": "woman laughing at 3.064-7.522", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/single_event_single_identity_test/syn_124.wav", "onoffCaption": "door slamming at 0.317-2.556, 3.904-6.143", "frequencyCaption": "door slamming two times"} +{"filepath": "data/single_event_single_identity_test/syn_130.wav", "onoffCaption": "duck quacking at 1.714-3.532, 4.074-5.892, 6.517-8.335", "frequencyCaption": "duck quacking three times"} +{"filepath": "data/single_event_single_identity_test/syn_137.wav", "onoffCaption": "dog barking at 0.126-2.126, 2.714-4.714", "frequencyCaption": "dog barking two times"} +{"filepath": "data/single_event_single_identity_test/syn_141.wav", "onoffCaption": "woman laughing at 3.098-7.556", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/single_event_single_identity_test/syn_146.wav", "onoffCaption": "dog barking at 0.087-2.087, 4.127-6.127", "frequencyCaption": "dog barking two times"} +{"filepath": "data/single_event_single_identity_test/syn_148.wav", "onoffCaption": "thump thud at 2.712-5.387", "frequencyCaption": "thump thud one times"} +{"filepath": "data/single_event_single_identity_test/syn_152.wav", "onoffCaption": "sheep goat bleating at 1.645-3.645, 5.29-7.29", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/single_event_single_identity_test/syn_155.wav", "onoffCaption": "woman laughing at 0.079-4.537, 5.539-9.997", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/single_event_single_identity_test/syn_162.wav", "onoffCaption": "door knocking at 0.465-2.592, 4.247-6.374", "frequencyCaption": "door knocking two times"} +{"filepath": "data/single_event_single_identity_test/syn_165.wav", "onoffCaption": "door slamming at 0.439-2.678", "frequencyCaption": "door slamming one times"} +{"filepath": "data/single_event_single_identity_test/syn_171.wav", "onoffCaption": "woman laughing at 0.467-3.231", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/single_event_single_identity_test/syn_176.wav", "onoffCaption": "burping belching at 0.432-2.591, 5.061-7.22", "frequencyCaption": "burping belching two times"} +{"filepath": "data/single_event_single_identity_test/syn_178.wav", "onoffCaption": "sheep goat bleating at 4.036-6.036, 6.704-8.704", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/single_event_single_identity_test/syn_180.wav", "onoffCaption": "cow mooing at 0.178-2.802", "frequencyCaption": "cow mooing one times"} +{"filepath": "data/single_event_single_identity_test/syn_187.wav", "onoffCaption": "gunshot at 0.523-2.523, 3.427-5.427", "frequencyCaption": "gunshot two times"} +{"filepath": "data/single_event_single_identity_test/syn_193.wav", "onoffCaption": "tapping clicking clanking at 1.074-4.514, 6.811-10.0", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/single_event_single_identity_test/syn_194.wav", "onoffCaption": "train horn at 1.729-6.809", "frequencyCaption": "train horn one times"} +{"filepath": "data/single_event_single_identity_test/syn_1.wav", "onoffCaption": "cat meowing at 0.393-1.783, 3.975-5.365", "frequencyCaption": "cat meowing two times"} +{"filepath": "data/single_event_single_identity_test/syn_2.wav", "onoffCaption": "cat meowing at 2.278-3.668, 5.204-6.594", "frequencyCaption": "cat meowing two times"} +{"filepath": "data/single_event_single_identity_test/syn_3.wav", "onoffCaption": "burping belching at 0.042-2.999, 4.324-7.281, 7.849-10.0", "frequencyCaption": "burping belching three times"} +{"filepath": "data/single_event_single_identity_test/syn_4.wav", "onoffCaption": "car horn honking at 0.38-3.341, 4.605-7.566", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/single_event_single_identity_test/syn_5.wav", "onoffCaption": "dog barking at 0.088-2.088", "frequencyCaption": "dog barking one times"} +{"filepath": "data/single_event_single_identity_test/syn_6.wav", "onoffCaption": "explosion at 2.796-7.718", "frequencyCaption": "explosion one times"} +{"filepath": "data/single_event_single_identity_test/syn_7.wav", "onoffCaption": "dog barking at 2.565-4.565", "frequencyCaption": "dog barking one times"} +{"filepath": "data/single_event_single_identity_test/syn_8.wav", "onoffCaption": "burping belching at 0.45-3.407", "frequencyCaption": "burping belching one times"} +{"filepath": "data/single_event_single_identity_test/syn_9.wav", "onoffCaption": "burping belching at 0.775-3.732", "frequencyCaption": "burping belching one times"} diff --git a/picoaudio/data/meta_data/train.json b/picoaudio/data/meta_data/train.json new file mode 100644 index 0000000000000000000000000000000000000000..c14872a174e0c402586c8572b0104c8929c5df1e --- /dev/null +++ b/picoaudio/data/meta_data/train.json @@ -0,0 +1,5000 @@ +{"filepath": "data/multi_event_train/syn_21.wav", "onoffCaption": "door slamming at 0.45-1.991, 3.019-5.8, 6.623-8.102", "frequencyCaption": "door slamming three times"} +{"filepath": "data/multi_event_train/syn_74.wav", "onoffCaption": "train horn at 2.817-5.697, 6.941-9.151", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_91.wav", "onoffCaption": "door knocking at 1.155-5.305", "frequencyCaption": "door knocking one times"} +{"filepath": "data/multi_event_train/syn_106.wav", "onoffCaption": "duck quacking at 0.309-2.309, 3.42-5.42, 6.714-8.714 and cow mooing at 2.038-5.007, 6.542-8.58", "frequencyCaption": "duck quacking three times and cow mooing two times"} +{"filepath": "data/multi_event_train/syn_153.wav", "onoffCaption": "cow mooing at 1.592-4.602, 6.719-9.729 and explosion at 3.329-6.882", "frequencyCaption": "cow mooing two times and explosion one times"} +{"filepath": "data/multi_event_train/syn_320.wav", "onoffCaption": "train horn at 3.325-5.48, 6.561-9.201", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_375.wav", "onoffCaption": "whistling at 0.48-4.964, 7.059-9.934", "frequencyCaption": "whistling two times"} +{"filepath": "data/multi_event_train/syn_390.wav", "onoffCaption": "dog barking at 3.038-5.038, 5.585-7.585", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_train/syn_505.wav", "onoffCaption": "thump thud at 1.59-6.04, 6.614-10.0", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_550.wav", "onoffCaption": "explosion at 0.707-5.707 and woman laughing at 3.44-5.677, 6.23-8.467", "frequencyCaption": "explosion one times and woman laughing two times"} +{"filepath": "data/multi_event_train/syn_638.wav", "onoffCaption": "door knocking at 1.973-5.029, 6.285-9.132", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_688.wav", "onoffCaption": "burping belching at 0.159-4.028, 6.032-8.977 and door knocking at 0.525-2.902, 4.436-6.813", "frequencyCaption": "burping belching two times and door knocking two times"} +{"filepath": "data/multi_event_train/syn_723.wav", "onoffCaption": "burping belching at 0.211-2.336, 2.942-5.466, 6.496-8.59 and dog barking at 0.78-2.78", "frequencyCaption": "burping belching three times and dog barking one times"} +{"filepath": "data/multi_event_train/syn_776.wav", "onoffCaption": "woman laughing at 2.782-5.368, 6.831-8.912", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_793.wav", "onoffCaption": "explosion at 2.941-5.813 and spraying at 4.494-5.575, 6.097-7.161", "frequencyCaption": "explosion one times and spraying two times"} +{"filepath": "data/multi_event_train/syn_801.wav", "onoffCaption": "whistling at 2.268-8.349 and door slamming at 2.584-4.78, 5.618-7.814", "frequencyCaption": "whistling one times and door slamming two times"} +{"filepath": "data/multi_event_train/syn_854.wav", "onoffCaption": "woman laughing at 0.638-3.925, 5.136-7.364 and dog barking at 1.269-3.269", "frequencyCaption": "woman laughing two times and dog barking one times"} +{"filepath": "data/multi_event_train/syn_1035.wav", "onoffCaption": "tapping clicking clanking at 0.961-4.401, 6.37-9.81 and sheep goat bleating at 1.484-4.78 and door knocking at 1.608-4.664, 5.492-7.703", "frequencyCaption": "tapping clicking clanking two times and sheep goat bleating one times and door knocking two times"} +{"filepath": "data/multi_event_train/syn_1060.wav", "onoffCaption": "spraying at 0.669-1.238, 2.488-3.057, 4.351-4.92 and whistling at 0.918-3.793, 5.909-8.784", "frequencyCaption": "spraying three times and whistling two times"} +{"filepath": "data/multi_event_train/syn_1085.wav", "onoffCaption": "whistling at 2.158-10.0", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_1209.wav", "onoffCaption": "spraying at 1.431-2.181, 2.756-3.34, 4.475-5.722", "frequencyCaption": "spraying three times"} +{"filepath": "data/multi_event_train/syn_1213.wav", "onoffCaption": "gunshot at 3.324-5.324, 6.563-8.563", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_1246.wav", "onoffCaption": "cow mooing at 1.954-6.383, 7.52-10.0", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_1308.wav", "onoffCaption": "duck quacking at 0.295-2.295, 3.085-5.085, 5.734-7.734 and cat meowing at 3.868-4.879, 5.645-7.2 and cow mooing at 5.281-8.579", "frequencyCaption": "duck quacking three times and cat meowing two times and cow mooing one times"} +{"filepath": "data/multi_event_train/syn_1312.wav", "onoffCaption": "dog barking at 3.089-5.089, 6.258-8.258", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_train/syn_1436.wav", "onoffCaption": "train horn at 2.446-5.806, 7.457-10.0", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_1463.wav", "onoffCaption": "sneeze at 0.309-2.016 and car horn honking at 1.819-4.637 and cow mooing at 7.987-10.0", "frequencyCaption": "sneeze one times and car horn honking one times and cow mooing one times"} +{"filepath": "data/multi_event_train/syn_1486.wav", "onoffCaption": "sheep goat bleating at 0.657-4.297, 4.952-8.592 and door slamming at 1.974-4.403, 5.259-7.688", "frequencyCaption": "sheep goat bleating two times and door slamming two times"} +{"filepath": "data/multi_event_train/syn_1578.wav", "onoffCaption": "dog barking at 0.121-2.121, 3.824-5.824, 7.767-9.767", "frequencyCaption": "dog barking three times"} +{"filepath": "data/multi_event_train/syn_1610.wav", "onoffCaption": "tapping clicking clanking at 1.851-5.291, 7.569-10.0", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_1645.wav", "onoffCaption": "door knocking at 0.645-2.772, 3.875-6.782, 7.405-9.692", "frequencyCaption": "door knocking three times"} +{"filepath": "data/multi_event_train/syn_1744.wav", "onoffCaption": "spraying at 0.033-1.519, 2.5-3.986, 4.812-6.298 and burping belching at 0.275-7.443", "frequencyCaption": "spraying three times and burping belching one times"} +{"filepath": "data/multi_event_train/syn_1829.wav", "onoffCaption": "duck quacking at 0.235-2.235, 4.037-6.037 and burping belching at 1.845-5.024, 6.206-8.841", "frequencyCaption": "duck quacking two times and burping belching two times"} +{"filepath": "data/multi_event_train/syn_1899.wav", "onoffCaption": "woman laughing at 2.777-6.165, 7.557-10.0 and gunshot at 2.778-4.778", "frequencyCaption": "woman laughing two times and gunshot one times"} +{"filepath": "data/multi_event_train/syn_1932.wav", "onoffCaption": "sheep goat bleating at 3.113-5.113, 5.767-7.767 and car horn honking at 3.453-5.453, 7.291-9.291 and spraying at 6.748-7.375", "frequencyCaption": "sheep goat bleating two times and car horn honking two times and spraying one times"} +{"filepath": "data/multi_event_train/syn_1967.wav", "onoffCaption": "train horn at 0.91-4.39 and tapping clicking clanking at 1.016-4.456, 6.672-10.0", "frequencyCaption": "train horn one times and tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_1982.wav", "onoffCaption": "tapping clicking clanking at 2.58-6.02, 7.81-10.0", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_3002.wav", "onoffCaption": "gunshot at 0.025-2.025, 3.214-5.214, 6.486-8.486", "frequencyCaption": "gunshot three times"} +{"filepath": "data/multi_event_train/syn_3057.wav", "onoffCaption": "dog barking at 0.464-2.464, 4.19-6.19", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_train/syn_3119.wav", "onoffCaption": "sheep goat bleating at 0.524-2.524, 3.378-5.378, 6.204-8.892", "frequencyCaption": "sheep goat bleating three times"} +{"filepath": "data/multi_event_train/syn_3224.wav", "onoffCaption": "burping belching at 0.304-2.719 and woman laughing at 6.258-10.0", "frequencyCaption": "burping belching one times and woman laughing one times"} +{"filepath": "data/multi_event_train/syn_3271.wav", "onoffCaption": "door knocking at 2.047-4.422", "frequencyCaption": "door knocking one times"} +{"filepath": "data/multi_event_train/syn_3294.wav", "onoffCaption": "cat meowing at 1.674-5.019 and spraying at 2.493-4.188", "frequencyCaption": "cat meowing one times and spraying one times"} +{"filepath": "data/multi_event_train/syn_3395.wav", "onoffCaption": "whistling at 0.204-5.379, 7.724-10.0", "frequencyCaption": "whistling two times"} +{"filepath": "data/multi_event_train/syn_3401.wav", "onoffCaption": "sneeze at 0.283-4.783", "frequencyCaption": "sneeze one times"} +{"filepath": "data/multi_event_train/syn_3454.wav", "onoffCaption": "cow mooing at 0.34-3.309", "frequencyCaption": "cow mooing one times"} +{"filepath": "data/multi_event_train/syn_3627.wav", "onoffCaption": "sneeze at 2.845-4.091", "frequencyCaption": "sneeze one times"} +{"filepath": "data/multi_event_train/syn_3672.wav", "onoffCaption": "duck quacking at 2.203-4.203, 5.361-7.361", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_3697.wav", "onoffCaption": "cow mooing at 1.882-5.18", "frequencyCaption": "cow mooing one times"} +{"filepath": "data/multi_event_train/syn_3769.wav", "onoffCaption": "dog barking at 2.579-4.579", "frequencyCaption": "dog barking one times"} +{"filepath": "data/multi_event_train/syn_3905.wav", "onoffCaption": "door knocking at 0.141-2.362, 2.877-5.098, 5.687-7.908", "frequencyCaption": "door knocking three times"} +{"filepath": "data/multi_event_train/syn_3950.wav", "onoffCaption": "whistling at 2.603-10.0", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_149.wav", "onoffCaption": "door knocking at 0.413-2.54, 3.538-5.665, 6.803-8.93 and burping belching at 6.659-9.282", "frequencyCaption": "door knocking three times and burping belching one times"} +{"filepath": "data/multi_event_train/syn_221.wav", "onoffCaption": "cow mooing at 2.969-6.267, 7.423-10.0", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_250.wav", "onoffCaption": "thump thud at 1.017-4.684, 5.695-9.362", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_274.wav", "onoffCaption": "tapping clicking clanking at 0.404-3.844, 5.186-8.626", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_291.wav", "onoffCaption": "dog barking at 1.244-3.244, 4.164-6.164 and door knocking at 3.598-6.168", "frequencyCaption": "dog barking two times and door knocking one times"} +{"filepath": "data/multi_event_train/syn_404.wav", "onoffCaption": "gunshot at 0.415-2.545, 4.002-6.132", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_451.wav", "onoffCaption": "car horn honking at 0.154-3.067, 3.794-6.707, 7.909-10.0", "frequencyCaption": "car horn honking three times"} +{"filepath": "data/multi_event_train/syn_606.wav", "onoffCaption": "cow mooing at 2.907-7.887", "frequencyCaption": "cow mooing one times"} +{"filepath": "data/multi_event_train/syn_622.wav", "onoffCaption": "thump thud at 3.172-7.622", "frequencyCaption": "thump thud one times"} +{"filepath": "data/multi_event_train/syn_677.wav", "onoffCaption": "car horn honking at 0.664-3.129, 4.357-7.014", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_692.wav", "onoffCaption": "door slamming at 0.005-1.396 and burping belching at 0.843-4.349, 4.994-7.088 and cat meowing at 6.136-7.136, 8.336-9.336", "frequencyCaption": "door slamming one times and burping belching two times and cat meowing two times"} +{"filepath": "data/multi_event_train/syn_739.wav", "onoffCaption": "car horn honking at 1.773-4.12, 6.106-8.453 and gunshot at 2.478-4.478", "frequencyCaption": "car horn honking two times and gunshot one times"} +{"filepath": "data/multi_event_train/syn_789.wav", "onoffCaption": "train horn at 0.264-3.064, 3.64-6.44 and door slamming at 4.141-6.361, 7.801-10.0", "frequencyCaption": "train horn two times and door slamming two times"} +{"filepath": "data/multi_event_train/syn_900.wav", "onoffCaption": "door slamming at 0.098-1.098, 1.787-3.787, 4.856-7.735", "frequencyCaption": "door slamming three times"} +{"filepath": "data/multi_event_train/syn_955.wav", "onoffCaption": "thump thud at 2.224-5.891, 7.389-9.889", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_994.wav", "onoffCaption": "sneeze at 0.59-2.297", "frequencyCaption": "sneeze one times"} +{"filepath": "data/multi_event_train/syn_1134.wav", "onoffCaption": "cow mooing at 3.267-6.236", "frequencyCaption": "cow mooing one times"} +{"filepath": "data/multi_event_train/syn_1145.wav", "onoffCaption": "train horn at 3.352-6.832", "frequencyCaption": "train horn one times"} +{"filepath": "data/multi_event_train/syn_1161.wav", "onoffCaption": "cow mooing at 2.373-5.671, 7.577-10.0", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_1184.wav", "onoffCaption": "burping belching at 2.861-8.462", "frequencyCaption": "burping belching one times"} +{"filepath": "data/multi_event_train/syn_1336.wav", "onoffCaption": "cow mooing at 4.099-7.109, 7.72-10.0", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_1347.wav", "onoffCaption": "woman laughing at 1.672-3.955", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/multi_event_train/syn_1408.wav", "onoffCaption": "spraying at 0.161-0.736, 2.006-4.59, 6.215-7.296 and burping belching at 1.503-5.503 and sheep goat bleating at 6.745-8.745", "frequencyCaption": "spraying three times and burping belching one times and sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_1479.wav", "onoffCaption": "car horn honking at 2.125-5.779, 7.871-10.0", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_1513.wav", "onoffCaption": "door knocking at 3.332-5.635, 6.643-9.518", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_1537.wav", "onoffCaption": "explosion at 1.773-4.034, 5.15-7.411", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_1562.wav", "onoffCaption": "whistling at 3.153-8.328", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_1587.wav", "onoffCaption": "cat meowing at 1.159-2.743, 4.625-6.753", "frequencyCaption": "cat meowing two times"} +{"filepath": "data/multi_event_train/syn_1711.wav", "onoffCaption": "gunshot at 0.349-2.349, 3.25-5.25 and whistling at 7.288-10.0", "frequencyCaption": "gunshot two times and whistling one times"} +{"filepath": "data/multi_event_train/syn_1760.wav", "onoffCaption": "explosion at 2.27-5.138, 5.861-8.729", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_1785.wav", "onoffCaption": "train horn at 1.794-10.0", "frequencyCaption": "train horn one times"} +{"filepath": "data/multi_event_train/syn_1833.wav", "onoffCaption": "thump thud at 0.115-2.454, 3.071-5.41", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_1866.wav", "onoffCaption": "door knocking at 3.895-6.625", "frequencyCaption": "door knocking one times"} +{"filepath": "data/multi_event_train/syn_1883.wav", "onoffCaption": "door knocking at 0.406-3.253 and woman laughing at 0.635-7.369 and explosion at 1.903-6.903", "frequencyCaption": "door knocking one times and woman laughing one times and explosion one times"} +{"filepath": "data/multi_event_train/syn_1928.wav", "onoffCaption": "woman laughing at 0.004-2.372, 3.672-6.653", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_1998.wav", "onoffCaption": "cat meowing at 0.728-2.283, 3.385-4.94, 5.621-7.176", "frequencyCaption": "cat meowing three times"} +{"filepath": "data/multi_event_train/syn_3018.wav", "onoffCaption": "burping belching at 2.502-4.625, 6.121-8.244", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_3103.wav", "onoffCaption": "thump thud at 1.211-4.878, 7.154-9.492", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_3156.wav", "onoffCaption": "cat meowing at 1.931-3.071, 4.724-5.864 and door knocking at 2.857-7.559", "frequencyCaption": "cat meowing two times and door knocking one times"} +{"filepath": "data/multi_event_train/syn_3325.wav", "onoffCaption": "thump thud at 2.22-4.72 and dog barking at 3.589-5.589, 7.361-9.361", "frequencyCaption": "thump thud one times and dog barking two times"} +{"filepath": "data/multi_event_train/syn_3354.wav", "onoffCaption": "gunshot at 0.26-2.26, 4.679-6.679", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_3370.wav", "onoffCaption": "gunshot at 1.812-3.812", "frequencyCaption": "gunshot one times"} +{"filepath": "data/multi_event_train/syn_3500.wav", "onoffCaption": "woman laughing at 0.391-3.091, 4.456-7.156", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_3555.wav", "onoffCaption": "cow mooing at 1.331-4.3, 5.203-8.172", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_3594.wav", "onoffCaption": "train horn at 0.651-4.051, 4.742-8.142", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_3619.wav", "onoffCaption": "gunshot at 0.367-2.367", "frequencyCaption": "gunshot one times"} +{"filepath": "data/multi_event_train/syn_3668.wav", "onoffCaption": "whistling at 2.959-10.0", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_3702.wav", "onoffCaption": "gunshot at 1.811-3.811 and duck quacking at 7.641-9.641", "frequencyCaption": "gunshot one times and duck quacking one times"} +{"filepath": "data/multi_event_train/syn_3726.wav", "onoffCaption": "woman laughing at 1.491-3.728 and whistling at 2.496-6.98", "frequencyCaption": "woman laughing one times and whistling one times"} +{"filepath": "data/multi_event_train/syn_3773.wav", "onoffCaption": "dog barking at 1.003-3.003 and door knocking at 5.52-9.288", "frequencyCaption": "dog barking one times and door knocking one times"} +{"filepath": "data/multi_event_train/syn_3796.wav", "onoffCaption": "cat meowing at 0.365-1.781, 2.545-3.82", "frequencyCaption": "cat meowing two times"} +{"filepath": "data/multi_event_train/syn_3804.wav", "onoffCaption": "explosion at 3.305-8.305 and spraying at 6.953-7.557", "frequencyCaption": "explosion one times and spraying one times"} +{"filepath": "data/multi_event_train/syn_3851.wav", "onoffCaption": "spraying at 0.101-0.952 and whistling at 1.043-6.218, 7.564-10.0", "frequencyCaption": "spraying one times and whistling two times"} +{"filepath": "data/multi_event_train/syn_138.wav", "onoffCaption": "train horn at 3.379-5.819, 6.721-8.914", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_177.wav", "onoffCaption": "car horn honking at 0.74-3.24, 5.316-7.829 and thump thud at 1.929-4.429", "frequencyCaption": "car horn honking two times and thump thud one times"} +{"filepath": "data/multi_event_train/syn_188.wav", "onoffCaption": "woman laughing at 0.562-2.799 and sheep goat bleating at 2.676-4.676, 6.672-8.672", "frequencyCaption": "woman laughing one times and sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_192.wav", "onoffCaption": "thump thud at 3.782-6.829", "frequencyCaption": "thump thud one times"} +{"filepath": "data/multi_event_train/syn_205.wav", "onoffCaption": "cow mooing at 0.72-3.702, 5.102-8.084", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_304.wav", "onoffCaption": "tapping clicking clanking at 3.198-6.638", "frequencyCaption": "tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_420.wav", "onoffCaption": "door slamming at 0.344-1.597, 3.048-4.026 and thump thud at 7.971-10.0", "frequencyCaption": "door slamming two times and thump thud one times"} +{"filepath": "data/multi_event_train/syn_475.wav", "onoffCaption": "gunshot at 0.17-2.17, 3.09-5.09, 6.183-8.183", "frequencyCaption": "gunshot three times"} +{"filepath": "data/multi_event_train/syn_490.wav", "onoffCaption": "explosion at 0.558-5.558, 6.884-10.0", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_521.wav", "onoffCaption": "door knocking at 0.623-3.193 and door slamming at 6.292-6.792, 8.204-8.704", "frequencyCaption": "door knocking one times and door slamming two times"} +{"filepath": "data/multi_event_train/syn_649.wav", "onoffCaption": "door slamming at 3.421-4.594", "frequencyCaption": "door slamming one times"} +{"filepath": "data/multi_event_train/syn_653.wav", "onoffCaption": "explosion at 0.055-5.055, 7.24-10.0", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_748.wav", "onoffCaption": "whistling at 0.244-4.728 and explosion at 0.417-3.008 and door slamming at 1.002-2.393, 3.236-4.627", "frequencyCaption": "whistling one times and explosion one times and door slamming two times"} +{"filepath": "data/multi_event_train/syn_752.wav", "onoffCaption": "whistling at 1.609-9.994", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_825.wav", "onoffCaption": "thump thud at 3.622-6.122, 7.47-9.97", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_870.wav", "onoffCaption": "explosion at 0.987-4.856, 5.954-8.826", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_924.wav", "onoffCaption": "sneeze at 1.0-3.085, 4.232-6.317", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_971.wav", "onoffCaption": "car horn honking at 0.235-5.142", "frequencyCaption": "car horn honking one times"} +{"filepath": "data/multi_event_train/syn_1011.wav", "onoffCaption": "cow mooing at 1.805-4.774, 5.439-7.469", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_1110.wav", "onoffCaption": "gunshot at 2.921-4.921, 7.411-9.411", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_1262.wav", "onoffCaption": "train horn at 0.21-4.65, 6.639-8.799", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_1278.wav", "onoffCaption": "gunshot at 3.554-5.554, 7.567-9.567", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_1287.wav", "onoffCaption": "dog barking at 3.277-5.277, 6.629-8.629", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_train/syn_1363.wav", "onoffCaption": "spraying at 0.161-1.065, 3.194-4.369, 6.556-7.064", "frequencyCaption": "spraying three times"} +{"filepath": "data/multi_event_train/syn_1379.wav", "onoffCaption": "cat meowing at 3.426-4.692", "frequencyCaption": "cat meowing one times"} +{"filepath": "data/multi_event_train/syn_1386.wav", "onoffCaption": "door knocking at 1.508-3.668 and cow mooing at 5.206-8.216", "frequencyCaption": "door knocking one times and cow mooing one times"} +{"filepath": "data/multi_event_train/syn_1447.wav", "onoffCaption": "sneeze at 0.818-3.135, 3.983-6.3 and door knocking at 1.957-4.527", "frequencyCaption": "sneeze two times and door knocking one times"} +{"filepath": "data/multi_event_train/syn_1546.wav", "onoffCaption": "spraying at 1.401-2.252, 3.819-4.67 and whistling at 1.789-6.273, 7.051-10.0", "frequencyCaption": "spraying two times and whistling two times"} +{"filepath": "data/multi_event_train/syn_1634.wav", "onoffCaption": "burping belching at 0.231-2.266 and spraying at 5.82-6.57, 8.991-9.858", "frequencyCaption": "burping belching one times and spraying two times"} +{"filepath": "data/multi_event_train/syn_1735.wav", "onoffCaption": "tapping clicking clanking at 2.463-5.903", "frequencyCaption": "tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_1817.wav", "onoffCaption": "sneeze at 1.669-3.665, 5.139-8.104", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_1842.wav", "onoffCaption": "door slamming at 2.786-3.786", "frequencyCaption": "door slamming one times"} +{"filepath": "data/multi_event_train/syn_1858.wav", "onoffCaption": "burping belching at 1.238-3.361", "frequencyCaption": "burping belching one times"} +{"filepath": "data/multi_event_train/syn_1916.wav", "onoffCaption": "thump thud at 0.025-4.475", "frequencyCaption": "thump thud one times"} +{"filepath": "data/multi_event_train/syn_1943.wav", "onoffCaption": "car horn honking at 0.017-3.192, 3.948-7.123, 7.76-10.0", "frequencyCaption": "car horn honking three times"} +{"filepath": "data/multi_event_train/syn_1959.wav", "onoffCaption": "door slamming at 3.143-5.369, 6.461-7.48", "frequencyCaption": "door slamming two times"} +{"filepath": "data/multi_event_train/syn_3026.wav", "onoffCaption": "explosion at 0.682-5.682 and gunshot at 5.466-7.466", "frequencyCaption": "explosion one times and gunshot one times"} +{"filepath": "data/multi_event_train/syn_3069.wav", "onoffCaption": "car horn honking at 3.111-6.765 and tapping clicking clanking at 4.73-8.17", "frequencyCaption": "car horn honking one times and tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_3073.wav", "onoffCaption": "train horn at 1.238-5.438", "frequencyCaption": "train horn one times"} +{"filepath": "data/multi_event_train/syn_3096.wav", "onoffCaption": "burping belching at 0.356-3.535", "frequencyCaption": "burping belching one times"} +{"filepath": "data/multi_event_train/syn_3127.wav", "onoffCaption": "gunshot at 2.644-5.15, 5.815-7.908", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_3168.wav", "onoffCaption": "cow mooing at 3.298-6.28", "frequencyCaption": "cow mooing one times"} +{"filepath": "data/multi_event_train/syn_3172.wav", "onoffCaption": "sheep goat bleating at 0.162-2.162 and spraying at 4.646-5.497", "frequencyCaption": "sheep goat bleating one times and spraying one times"} +{"filepath": "data/multi_event_train/syn_3197.wav", "onoffCaption": "cow mooing at 0.258-3.268, 4.919-7.929", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_3200.wav", "onoffCaption": "thump thud at 3.356-5.695, 6.815-9.154", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_3301.wav", "onoffCaption": "car horn honking at 0.679-3.192 and sheep goat bleating at 0.684-2.684, 5.024-7.024 and gunshot at 0.815-2.815, 3.554-5.554", "frequencyCaption": "car horn honking one times and sheep goat bleating two times and gunshot two times"} +{"filepath": "data/multi_event_train/syn_3425.wav", "onoffCaption": "sneeze at 3.563-5.88", "frequencyCaption": "sneeze one times"} +{"filepath": "data/multi_event_train/syn_3470.wav", "onoffCaption": "duck quacking at 2.5-4.5, 6.729-8.729", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_3524.wav", "onoffCaption": "door knocking at 4.11-8.485", "frequencyCaption": "door knocking one times"} +{"filepath": "data/multi_event_train/syn_3571.wav", "onoffCaption": "train horn at 2.494-5.134, 7.013-9.653", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_3656.wav", "onoffCaption": "door knocking at 0.171-3.724 and door slamming at 7.536-8.685", "frequencyCaption": "door knocking one times and door slamming one times"} +{"filepath": "data/multi_event_train/syn_3757.wav", "onoffCaption": "woman laughing at 1.808-4.508 and door slamming at 7.615-9.094", "frequencyCaption": "woman laughing one times and door slamming one times"} +{"filepath": "data/multi_event_train/syn_3820.wav", "onoffCaption": "gunshot at 0.478-2.478, 4.009-6.009", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_3875.wav", "onoffCaption": "sheep goat bleating at 1.313-3.313, 5.182-7.182", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_3890.wav", "onoffCaption": "gunshot at 3.266-5.266, 5.827-7.827", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_3921.wav", "onoffCaption": "sheep goat bleating at 2.646-4.646", "frequencyCaption": "sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_50.wav", "onoffCaption": "tapping clicking clanking at 1.23-4.67", "frequencyCaption": "tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_122.wav", "onoffCaption": "dog barking at 0.436-2.436, 4.526-6.526", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_train/syn_170.wav", "onoffCaption": "sneeze at 0.528-3.736, 4.834-8.042", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_195.wav", "onoffCaption": "thump thud at 2.18-4.642, 5.753-8.215", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_218.wav", "onoffCaption": "door slamming at 2.198-4.198", "frequencyCaption": "door slamming one times"} +{"filepath": "data/multi_event_train/syn_303.wav", "onoffCaption": "cow mooing at 3.336-8.316 and dog barking at 7.061-9.061", "frequencyCaption": "cow mooing one times and dog barking one times"} +{"filepath": "data/multi_event_train/syn_351.wav", "onoffCaption": "gunshot at 0.026-2.026 and whistling at 0.049-2.924 and duck quacking at 2.674-4.674, 6.611-8.611", "frequencyCaption": "gunshot one times and whistling one times and duck quacking two times"} +{"filepath": "data/multi_event_train/syn_526.wav", "onoffCaption": "duck quacking at 2.365-4.365, 5.347-7.347", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_574.wav", "onoffCaption": "door knocking at 2.41-6.026, 6.947-10.0", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_591.wav", "onoffCaption": "dog barking at 0.373-2.373 and door knocking at 4.578-8.728", "frequencyCaption": "dog barking one times and door knocking one times"} +{"filepath": "data/multi_event_train/syn_707.wav", "onoffCaption": "sneeze at 0.383-4.912 and duck quacking at 6.614-8.614", "frequencyCaption": "sneeze one times and duck quacking one times"} +{"filepath": "data/multi_event_train/syn_755.wav", "onoffCaption": "thump thud at 0.247-4.697, 5.344-7.86", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_822.wav", "onoffCaption": "gunshot at 2.509-4.983, 7.46-9.934 and tapping clicking clanking at 3.058-6.498 and spraying at 3.322-4.103, 5.476-6.208, 7.466-8.116", "frequencyCaption": "gunshot two times and tapping clicking clanking one times and spraying three times"} +{"filepath": "data/multi_event_train/syn_895.wav", "onoffCaption": "door slamming at 2.693-4.217, 4.832-6.356 and woman laughing at 3.667-6.752", "frequencyCaption": "door slamming two times and woman laughing one times"} +{"filepath": "data/multi_event_train/syn_939.wav", "onoffCaption": "tapping clicking clanking at 0.706-4.146 and sheep goat bleating at 2.877-4.877", "frequencyCaption": "tapping clicking clanking one times and sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_1016.wav", "onoffCaption": "whistling at 0.256-8.641 and woman laughing at 0.274-2.372 and gunshot at 3.743-5.743", "frequencyCaption": "whistling one times and woman laughing one times and gunshot one times"} +{"filepath": "data/multi_event_train/syn_1044.wav", "onoffCaption": "door knocking at 1.427-6.427 and thump thud at 2.18-6.63 and duck quacking at 4.935-6.935", "frequencyCaption": "door knocking one times and thump thud one times and duck quacking one times"} +{"filepath": "data/multi_event_train/syn_1237.wav", "onoffCaption": "sheep goat bleating at 1.542-3.542, 4.23-6.23 and thump thud at 3.688-6.188", "frequencyCaption": "sheep goat bleating two times and thump thud one times"} +{"filepath": "data/multi_event_train/syn_1265.wav", "onoffCaption": "door slamming at 2.016-5.016", "frequencyCaption": "door slamming one times"} +{"filepath": "data/multi_event_train/syn_1280.wav", "onoffCaption": "cat meowing at 0.119-1.479, 3.298-4.445, 5.477-6.752", "frequencyCaption": "cat meowing three times"} +{"filepath": "data/multi_event_train/syn_1412.wav", "onoffCaption": "duck quacking at 3.869-5.869, 7.87-9.87", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_1440.wav", "onoffCaption": "whistling at 0.032-2.907, 5.337-8.212", "frequencyCaption": "whistling two times"} +{"filepath": "data/multi_event_train/syn_1509.wav", "onoffCaption": "sheep goat bleating at 2.97-6.89", "frequencyCaption": "sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_1633.wav", "onoffCaption": "door knocking at 0.593-3.13, 4.121-6.748, 7.286-10.0", "frequencyCaption": "door knocking three times"} +{"filepath": "data/multi_event_train/syn_1661.wav", "onoffCaption": "door slamming at 0.077-1.096", "frequencyCaption": "door slamming one times"} +{"filepath": "data/multi_event_train/syn_1683.wav", "onoffCaption": "door slamming at 0.183-1.574, 2.15-3.001, 4.15-6.068 and explosion at 7.733-10.0", "frequencyCaption": "door slamming three times and explosion one times"} +{"filepath": "data/multi_event_train/syn_1684.wav", "onoffCaption": "train horn at 1.734-8.203", "frequencyCaption": "train horn one times"} +{"filepath": "data/multi_event_train/syn_1728.wav", "onoffCaption": "cow mooing at 3.139-6.121, 6.742-9.724", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_1798.wav", "onoffCaption": "gunshot at 0.031-2.031, 3.728-5.728, 6.569-8.569", "frequencyCaption": "gunshot three times"} +{"filepath": "data/multi_event_train/syn_1944.wav", "onoffCaption": "tapping clicking clanking at 2.026-5.466, 6.277-9.717", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_3074.wav", "onoffCaption": "explosion at 0.526-2.59 and burping belching at 2.463-4.494, 5.45-7.481 and tapping clicking clanking at 4.892-8.332", "frequencyCaption": "explosion one times and burping belching two times and tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_3091.wav", "onoffCaption": "cow mooing at 0.669-5.649, 7.382-10.0 and door knocking at 1.429-3.781", "frequencyCaption": "cow mooing two times and door knocking one times"} +{"filepath": "data/multi_event_train/syn_3207.wav", "onoffCaption": "woman laughing at 3.018-6.594, 7.428-9.674", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_3252.wav", "onoffCaption": "woman laughing at 2.935-6.987, 7.556-10.0", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_3255.wav", "onoffCaption": "cow mooing at 2.968-5.978 and spraying at 5.505-6.255, 7.07-7.82, 9.091-9.841", "frequencyCaption": "cow mooing one times and spraying three times"} +{"filepath": "data/multi_event_train/syn_3349.wav", "onoffCaption": "duck quacking at 4.337-6.337", "frequencyCaption": "duck quacking one times"} +{"filepath": "data/multi_event_train/syn_3422.wav", "onoffCaption": "door knocking at 0.853-4.603, 6.378-8.921", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_3495.wav", "onoffCaption": "whistling at 0.504-2.733, 4.408-6.637", "frequencyCaption": "whistling two times"} +{"filepath": "data/multi_event_train/syn_3539.wav", "onoffCaption": "door slamming at 0.962-2.215, 3.921-5.174, 7.427-8.68 and thump thud at 2.092-6.542 and gunshot at 5.373-7.373", "frequencyCaption": "door slamming three times and thump thud one times and gunshot one times"} +{"filepath": "data/multi_event_train/syn_3603.wav", "onoffCaption": "cat meowing at 1.309-2.88", "frequencyCaption": "cat meowing one times"} +{"filepath": "data/multi_event_train/syn_3604.wav", "onoffCaption": "sneeze at 2.882-5.199, 5.801-8.262", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_3651.wav", "onoffCaption": "sheep goat bleating at 2.163-4.163, 5.115-7.115", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_3718.wav", "onoffCaption": "door slamming at 0.076-1.591, 2.399-3.25, 3.84-5.131 and duck quacking at 2.94-4.94, 5.948-7.948", "frequencyCaption": "door slamming three times and duck quacking two times"} +{"filepath": "data/multi_event_train/syn_3926.wav", "onoffCaption": "door slamming at 1.11-2.634, 3.498-5.022, 6.331-7.855 and gunshot at 2.786-4.786, 6.387-8.387 and spraying at 7.305-8.305", "frequencyCaption": "door slamming three times and gunshot two times and spraying one times"} +{"filepath": "data/multi_event_train/syn_3974.wav", "onoffCaption": "tapping clicking clanking at 1.133-4.573, 5.176-7.376 and burping belching at 3.032-7.032", "frequencyCaption": "tapping clicking clanking two times and burping belching one times"} +{"filepath": "data/multi_event_train/syn_3991.wav", "onoffCaption": "dog barking at 0.342-2.342 and duck quacking at 6.157-8.157", "frequencyCaption": "dog barking one times and duck quacking one times"} +{"filepath": "data/multi_event_train/syn_57.wav", "onoffCaption": "gunshot at 1.054-3.054 and door slamming at 5.275-7.471", "frequencyCaption": "gunshot one times and door slamming one times"} +{"filepath": "data/multi_event_train/syn_125.wav", "onoffCaption": "duck quacking at 1.445-3.445, 4.236-6.236, 7.055-9.055 and sneeze at 2.323-4.282", "frequencyCaption": "duck quacking three times and sneeze one times"} +{"filepath": "data/multi_event_train/syn_257.wav", "onoffCaption": "car horn honking at 2.061-6.461, 7.737-10.0", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_356.wav", "onoffCaption": "gunshot at 0.745-2.745 and cow mooing at 4.159-8.588", "frequencyCaption": "gunshot one times and cow mooing one times"} +{"filepath": "data/multi_event_train/syn_468.wav", "onoffCaption": "cow mooing at 3.409-6.378", "frequencyCaption": "cow mooing one times"} +{"filepath": "data/multi_event_train/syn_472.wav", "onoffCaption": "spraying at 0.583-3.602", "frequencyCaption": "spraying one times"} +{"filepath": "data/multi_event_train/syn_497.wav", "onoffCaption": "train horn at 0.289-4.409, 6.425-10.0", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_569.wav", "onoffCaption": "thump thud at 0.004-2.504, 4.066-6.566", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_573.wav", "onoffCaption": "tapping clicking clanking at 0.237-3.677, 5.378-7.737", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_596.wav", "onoffCaption": "thump thud at 1.237-4.284, 5.393-7.909", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_601.wav", "onoffCaption": "dog barking at 0.134-2.134 and burping belching at 5.233-10.0", "frequencyCaption": "dog barking one times and burping belching one times"} +{"filepath": "data/multi_event_train/syn_700.wav", "onoffCaption": "dog barking at 1.016-3.016, 5.055-7.055", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_train/syn_838.wav", "onoffCaption": "car horn honking at 2.969-7.369", "frequencyCaption": "car horn honking one times"} +{"filepath": "data/multi_event_train/syn_877.wav", "onoffCaption": "door knocking at 3.48-6.327, 6.917-8.966", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_888.wav", "onoffCaption": "duck quacking at 0.569-2.569, 3.902-5.902, 6.419-8.419", "frequencyCaption": "duck quacking three times"} +{"filepath": "data/multi_event_train/syn_892.wav", "onoffCaption": "door slamming at 2.212-5.173, 6.664-7.642", "frequencyCaption": "door slamming two times"} +{"filepath": "data/multi_event_train/syn_976.wav", "onoffCaption": "train horn at 0.105-4.173, 5.738-9.806", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_989.wav", "onoffCaption": "cat meowing at 0.055-1.204, 3.495-4.507, 5.761-7.755", "frequencyCaption": "cat meowing three times"} +{"filepath": "data/multi_event_train/syn_993.wav", "onoffCaption": "door knocking at 2.388-6.921 and sheep goat bleating at 2.785-6.081, 7.459-10.0", "frequencyCaption": "door knocking one times and sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_1043.wav", "onoffCaption": "cat meowing at 0.138-1.709, 2.9-5.761 and door knocking at 4.303-6.683, 7.227-9.607", "frequencyCaption": "cat meowing two times and door knocking two times"} +{"filepath": "data/multi_event_train/syn_1059.wav", "onoffCaption": "car horn honking at 0.031-4.431, 4.972-6.972, 7.512-10.0", "frequencyCaption": "car horn honking three times"} +{"filepath": "data/multi_event_train/syn_1142.wav", "onoffCaption": "tapping clicking clanking at 1.363-4.803, 5.875-8.084", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_1158.wav", "onoffCaption": "door knocking at 0.327-2.827, 3.933-6.701 and car horn honking at 2.786-7.035", "frequencyCaption": "door knocking two times and car horn honking one times"} +{"filepath": "data/multi_event_train/syn_1230.wav", "onoffCaption": "thump thud at 0.0-3.667, 6.045-9.712", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_1331.wav", "onoffCaption": "gunshot at 0.021-2.021 and explosion at 0.033-2.762, 5.092-7.821", "frequencyCaption": "gunshot one times and explosion two times"} +{"filepath": "data/multi_event_train/syn_1415.wav", "onoffCaption": "door slamming at 0.772-1.577, 2.542-5.516 and sneeze at 1.092-2.592", "frequencyCaption": "door slamming two times and sneeze one times"} +{"filepath": "data/multi_event_train/syn_1514.wav", "onoffCaption": "door knocking at 0.852-5.291, 6.978-10.0", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_1666.wav", "onoffCaption": "tapping clicking clanking at 0.654-4.094", "frequencyCaption": "tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_1699.wav", "onoffCaption": "burping belching at 0.365-2.396, 4.748-7.307", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_1767.wav", "onoffCaption": "door slamming at 2.159-3.276, 4.526-7.487", "frequencyCaption": "door slamming two times"} +{"filepath": "data/multi_event_train/syn_1782.wav", "onoffCaption": "sheep goat bleating at 0.888-2.888, 4.833-6.833 and train horn at 5.074-7.748", "frequencyCaption": "sheep goat bleating two times and train horn one times"} +{"filepath": "data/multi_event_train/syn_1810.wav", "onoffCaption": "sneeze at 0.452-1.698 and duck quacking at 0.778-2.778, 3.694-5.694, 6.436-8.436", "frequencyCaption": "sneeze one times and duck quacking three times"} +{"filepath": "data/multi_event_train/syn_1911.wav", "onoffCaption": "woman laughing at 2.363-5.463, 7.505-9.873", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_3021.wav", "onoffCaption": "explosion at 0.157-5.157, 6.381-8.972", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_3120.wav", "onoffCaption": "dog barking at 0.042-2.042 and woman laughing at 1.67-4.025", "frequencyCaption": "dog barking one times and woman laughing one times"} +{"filepath": "data/multi_event_train/syn_3175.wav", "onoffCaption": "burping belching at 3.834-7.093, 7.803-10.0", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_3248.wav", "onoffCaption": "explosion at 0.563-3.291", "frequencyCaption": "explosion one times"} +{"filepath": "data/multi_event_train/syn_3353.wav", "onoffCaption": "dog barking at 1.301-4.222, 5.025-7.946 and cat meowing at 4.497-7.738", "frequencyCaption": "dog barking two times and cat meowing one times"} +{"filepath": "data/multi_event_train/syn_3438.wav", "onoffCaption": "spraying at 1.37-2.151, 3.483-4.052, 4.616-5.467 and door knocking at 7.804-10.0", "frequencyCaption": "spraying three times and door knocking one times"} +{"filepath": "data/multi_event_train/syn_3477.wav", "onoffCaption": "dog barking at 1.283-3.283, 5.184-7.184", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_train/syn_3488.wav", "onoffCaption": "burping belching at 0.015-3.559, 4.328-7.328, 7.932-10.0", "frequencyCaption": "burping belching three times"} +{"filepath": "data/multi_event_train/syn_3492.wav", "onoffCaption": "door knocking at 0.811-5.321 and dog barking at 2.454-4.454 and sneeze at 4.318-6.001", "frequencyCaption": "door knocking one times and dog barking one times and sneeze one times"} +{"filepath": "data/multi_event_train/syn_3576.wav", "onoffCaption": "thump thud at 0.352-4.802 and cat meowing at 7.774-9.722", "frequencyCaption": "thump thud one times and cat meowing one times"} +{"filepath": "data/multi_event_train/syn_3589.wav", "onoffCaption": "duck quacking at 0.293-2.293, 3.357-5.357, 6.355-8.355", "frequencyCaption": "duck quacking three times"} +{"filepath": "data/multi_event_train/syn_3593.wav", "onoffCaption": "dog barking at 0.036-2.036, 3.013-5.013, 5.719-7.719", "frequencyCaption": "dog barking three times"} +{"filepath": "data/multi_event_train/syn_3705.wav", "onoffCaption": "dog barking at 3.735-5.735", "frequencyCaption": "dog barking one times"} +{"filepath": "data/multi_event_train/syn_3868.wav", "onoffCaption": "train horn at 0.332-4.772, 6.048-10.0", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_3872.wav", "onoffCaption": "door slamming at 0.661-1.342, 3.734-4.415, 6.72-7.401", "frequencyCaption": "door slamming three times"} +{"filepath": "data/multi_event_train/syn_3897.wav", "onoffCaption": "thump thud at 2.086-4.857", "frequencyCaption": "thump thud one times"} +{"filepath": "data/multi_event_train/syn_3969.wav", "onoffCaption": "thump thud at 2.95-5.45, 6.105-8.605", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_3973.wav", "onoffCaption": "car horn honking at 2.448-4.961, 6.029-8.542", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_3996.wav", "onoffCaption": "burping belching at 2.047-4.082, 4.884-6.919, 7.802-9.837 and explosion at 2.284-7.284 and sneeze at 2.504-3.961, 4.923-6.38, 7.442-8.899", "frequencyCaption": "burping belching three times and explosion one times and sneeze three times"} +{"filepath": "data/multi_event_train/syn_18.wav", "onoffCaption": "thump thud at 0.402-2.63, 4.475-6.937", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_69.wav", "onoffCaption": "car horn honking at 1.273-5.114 and cat meowing at 1.976-3.517, 4.196-5.223", "frequencyCaption": "car horn honking one times and cat meowing two times"} +{"filepath": "data/multi_event_train/syn_202.wav", "onoffCaption": "sheep goat bleating at 3.609-5.609, 7.145-9.145", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_273.wav", "onoffCaption": "tapping clicking clanking at 0.069-3.509, 4.304-6.615", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_296.wav", "onoffCaption": "door slamming at 1.008-2.549, 4.839-6.604, 7.937-9.328", "frequencyCaption": "door slamming three times"} +{"filepath": "data/multi_event_train/syn_319.wav", "onoffCaption": "door slamming at 0.803-1.781", "frequencyCaption": "door slamming one times"} +{"filepath": "data/multi_event_train/syn_368.wav", "onoffCaption": "burping belching at 0.832-3.593, 4.807-7.037, 7.772-10.0", "frequencyCaption": "burping belching three times"} +{"filepath": "data/multi_event_train/syn_427.wav", "onoffCaption": "cow mooing at 0.73-3.699", "frequencyCaption": "cow mooing one times"} +{"filepath": "data/multi_event_train/syn_456.wav", "onoffCaption": "sneeze at 0.319-1.864, 3.011-4.556 and train horn at 7.979-10.0", "frequencyCaption": "sneeze two times and train horn one times"} +{"filepath": "data/multi_event_train/syn_625.wav", "onoffCaption": "door slamming at 1.746-2.863, 3.942-5.466", "frequencyCaption": "door slamming two times"} +{"filepath": "data/multi_event_train/syn_654.wav", "onoffCaption": "tapping clicking clanking at 0.273-3.713, 4.322-7.189, 7.877-10.0", "frequencyCaption": "tapping clicking clanking three times"} +{"filepath": "data/multi_event_train/syn_849.wav", "onoffCaption": "cow mooing at 2.184-5.482, 7.665-10.0 and woman laughing at 2.751-5.034", "frequencyCaption": "cow mooing two times and woman laughing one times"} +{"filepath": "data/multi_event_train/syn_923.wav", "onoffCaption": "tapping clicking clanking at 3.813-7.253, 7.927-10.0", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_952.wav", "onoffCaption": "gunshot at 0.715-2.885 and whistling at 0.92-9.305", "frequencyCaption": "gunshot one times and whistling one times"} +{"filepath": "data/multi_event_train/syn_1098.wav", "onoffCaption": "cow mooing at 0.964-3.933, 4.576-7.273", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_1117.wav", "onoffCaption": "sheep goat bleating at 0.773-2.773, 3.983-5.983, 6.891-9.811", "frequencyCaption": "sheep goat bleating three times"} +{"filepath": "data/multi_event_train/syn_1166.wav", "onoffCaption": "cow mooing at 2.671-5.653, 7.472-10.0", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_1183.wav", "onoffCaption": "door slamming at 2.346-4.346, 5.646-7.646", "frequencyCaption": "door slamming two times"} +{"filepath": "data/multi_event_train/syn_1315.wav", "onoffCaption": "sneeze at 1.326-2.938 and door slamming at 6.273-7.251, 8.082-9.06", "frequencyCaption": "sneeze one times and door slamming two times"} +{"filepath": "data/multi_event_train/syn_1364.wav", "onoffCaption": "car horn honking at 1.483-5.883, 7.161-10.0", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_1381.wav", "onoffCaption": "whistling at 0.727-8.598 and sneeze at 2.712-4.0", "frequencyCaption": "whistling one times and sneeze one times"} +{"filepath": "data/multi_event_train/syn_1530.wav", "onoffCaption": "tapping clicking clanking at 2.798-6.238 and cow mooing at 2.997-6.007, 7.616-10.0", "frequencyCaption": "tapping clicking clanking one times and cow mooing two times"} +{"filepath": "data/multi_event_train/syn_1541.wav", "onoffCaption": "sheep goat bleating at 0.579-2.579 and cat meowing at 3.673-4.684", "frequencyCaption": "sheep goat bleating one times and cat meowing one times"} +{"filepath": "data/multi_event_train/syn_1580.wav", "onoffCaption": "woman laughing at 2.859-6.435, 7.53-10.0", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_1629.wav", "onoffCaption": "dog barking at 0.638-2.638", "frequencyCaption": "dog barking one times"} +{"filepath": "data/multi_event_train/syn_1658.wav", "onoffCaption": "spraying at 0.41-1.011, 3.235-3.836, 5.746-6.347 and thump thud at 6.685-9.147", "frequencyCaption": "spraying three times and thump thud one times"} +{"filepath": "data/multi_event_train/syn_1732.wav", "onoffCaption": "sheep goat bleating at 0.343-2.343, 3.836-5.836", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_1743.wav", "onoffCaption": "explosion at 2.23-5.102, 6.265-9.265", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_1834.wav", "onoffCaption": "explosion at 3.121-5.993, 6.705-9.577", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_1845.wav", "onoffCaption": "cow mooing at 1.29-4.259, 6.477-8.876", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_3104.wav", "onoffCaption": "burping belching at 0.345-4.345 and duck quacking at 1.432-3.432", "frequencyCaption": "burping belching one times and duck quacking one times"} +{"filepath": "data/multi_event_train/syn_3151.wav", "onoffCaption": "cat meowing at 0.059-1.144", "frequencyCaption": "cat meowing one times"} +{"filepath": "data/multi_event_train/syn_3190.wav", "onoffCaption": "sneeze at 0.959-5.488, 6.743-10.0", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_3289.wav", "onoffCaption": "cat meowing at 0.933-4.174 and door knocking at 1.443-3.818 and dog barking at 1.738-3.738", "frequencyCaption": "cat meowing one times and door knocking one times and dog barking one times"} +{"filepath": "data/multi_event_train/syn_3306.wav", "onoffCaption": "tapping clicking clanking at 0.901-4.341 and spraying at 2.023-3.718, 4.646-5.513 and gunshot at 7.421-9.421", "frequencyCaption": "tapping clicking clanking one times and spraying two times and gunshot one times"} +{"filepath": "data/multi_event_train/syn_3377.wav", "onoffCaption": "whistling at 1.018-3.027, 4.596-7.243 and sheep goat bleating at 1.491-3.491, 4.68-6.68", "frequencyCaption": "whistling two times and sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_3392.wav", "onoffCaption": "whistling at 2.377-5.352", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_3449.wav", "onoffCaption": "car horn honking at 2.799-7.311 and spraying at 7.997-8.624", "frequencyCaption": "car horn honking one times and spraying one times"} +{"filepath": "data/multi_event_train/syn_3507.wav", "onoffCaption": "door knocking at 1.768-6.143, 7.28-10.0", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_3523.wav", "onoffCaption": "cat meowing at 2.864-3.874, 4.793-6.329, 7.107-8.107", "frequencyCaption": "cat meowing three times"} +{"filepath": "data/multi_event_train/syn_3552.wav", "onoffCaption": "burping belching at 0.815-4.375, 5.148-8.069", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_3721.wav", "onoffCaption": "dog barking at 0.194-2.194, 4.11-6.11 and sneeze at 2.888-4.847, 6.691-8.65", "frequencyCaption": "dog barking two times and sneeze two times"} +{"filepath": "data/multi_event_train/syn_3750.wav", "onoffCaption": "duck quacking at 1.107-3.107, 3.755-5.755, 7.819-9.819", "frequencyCaption": "duck quacking three times"} +{"filepath": "data/multi_event_train/syn_3827.wav", "onoffCaption": "door slamming at 3.427-4.718, 6.79-8.181 and burping belching at 5.419-8.419", "frequencyCaption": "door slamming two times and burping belching one times"} +{"filepath": "data/multi_event_train/syn_3856.wav", "onoffCaption": "sneeze at 2.238-4.197 and woman laughing at 6.756-9.361", "frequencyCaption": "sneeze one times and woman laughing one times"} +{"filepath": "data/multi_event_train/syn_26.wav", "onoffCaption": "whistling at 2.265-9.298", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_154.wav", "onoffCaption": "cow mooing at 0.083-3.065, 4.203-6.641, 7.573-10.0", "frequencyCaption": "cow mooing three times"} +{"filepath": "data/multi_event_train/syn_226.wav", "onoffCaption": "cow mooing at 3.057-6.355", "frequencyCaption": "cow mooing one times"} +{"filepath": "data/multi_event_train/syn_327.wav", "onoffCaption": "duck quacking at 2.283-4.283, 6.514-8.514", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_403.wav", "onoffCaption": "sneeze at 3.343-4.577 and thump thud at 7.198-9.969", "frequencyCaption": "sneeze one times and thump thud one times"} +{"filepath": "data/multi_event_train/syn_419.wav", "onoffCaption": "explosion at 0.829-3.429 and spraying at 3.018-3.518, 4.092-4.592, 5.533-6.033", "frequencyCaption": "explosion one times and spraying three times"} +{"filepath": "data/multi_event_train/syn_502.wav", "onoffCaption": "whistling at 0.199-8.584 and sneeze at 3.066-5.312", "frequencyCaption": "whistling one times and sneeze one times"} +{"filepath": "data/multi_event_train/syn_518.wav", "onoffCaption": "duck quacking at 0.924-2.924, 4.185-6.185, 7.586-9.586", "frequencyCaption": "duck quacking three times"} +{"filepath": "data/multi_event_train/syn_670.wav", "onoffCaption": "tapping clicking clanking at 0.126-3.566, 4.832-8.272", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_695.wav", "onoffCaption": "thump thud at 0.106-4.024 and door slamming at 0.509-3.29", "frequencyCaption": "thump thud one times and door slamming one times"} +{"filepath": "data/multi_event_train/syn_771.wav", "onoffCaption": "whistling at 1.179-9.19", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_794.wav", "onoffCaption": "door slamming at 2.185-5.185", "frequencyCaption": "door slamming one times"} +{"filepath": "data/multi_event_train/syn_806.wav", "onoffCaption": "sheep goat bleating at 0.148-2.148, 3.428-5.428, 6.242-8.242 and dog barking at 1.14-3.14, 5.434-7.434 and spraying at 1.374-2.225", "frequencyCaption": "sheep goat bleating three times and dog barking two times and spraying one times"} +{"filepath": "data/multi_event_train/syn_907.wav", "onoffCaption": "door slamming at 0.681-1.486, 2.744-3.549, 4.715-5.52 and car horn honking at 2.057-6.457", "frequencyCaption": "door slamming three times and car horn honking one times"} +{"filepath": "data/multi_event_train/syn_1028.wav", "onoffCaption": "door knocking at 0.558-3.326, 4.653-7.472", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_1032.wav", "onoffCaption": "gunshot at 0.97-3.243, 5.737-8.01 and whistling at 1.394-5.878, 6.976-9.839 and dog barking at 6.431-8.431", "frequencyCaption": "gunshot two times and whistling two times and dog barking one times"} +{"filepath": "data/multi_event_train/syn_1129.wav", "onoffCaption": "duck quacking at 1.938-3.938, 4.687-6.687", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_1133.wav", "onoffCaption": "door knocking at 0.162-2.662 and gunshot at 6.396-8.396", "frequencyCaption": "door knocking one times and gunshot one times"} +{"filepath": "data/multi_event_train/syn_1241.wav", "onoffCaption": "door knocking at 0.891-5.593, 6.815-9.127", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_1340.wav", "onoffCaption": "tapping clicking clanking at 0.427-3.867 and train horn at 6.281-9.161", "frequencyCaption": "tapping clicking clanking one times and train horn one times"} +{"filepath": "data/multi_event_train/syn_1464.wav", "onoffCaption": "sneeze at 1.81-4.127, 5.404-7.323 and dog barking at 2.707-4.707, 6.039-8.039", "frequencyCaption": "sneeze two times and dog barking two times"} +{"filepath": "data/multi_event_train/syn_1481.wav", "onoffCaption": "gunshot at 2.87-4.87, 6.198-8.198", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_1565.wav", "onoffCaption": "explosion at 0.188-2.276, 3.785-5.873 and woman laughing at 1.189-4.577, 5.689-7.781", "frequencyCaption": "explosion two times and woman laughing two times"} +{"filepath": "data/multi_event_train/syn_1617.wav", "onoffCaption": "spraying at 4.123-6.707, 8.0-8.867", "frequencyCaption": "spraying two times"} +{"filepath": "data/multi_event_train/syn_1716.wav", "onoffCaption": "burping belching at 0.962-3.288, 5.002-7.328", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_1861.wav", "onoffCaption": "duck quacking at 0.358-2.358, 4.545-6.545 and sneeze at 5.937-7.214", "frequencyCaption": "duck quacking two times and sneeze one times"} +{"filepath": "data/multi_event_train/syn_1884.wav", "onoffCaption": "train horn at 0.134-2.934 and door knocking at 5.52-7.895", "frequencyCaption": "train horn one times and door knocking one times"} +{"filepath": "data/multi_event_train/syn_1960.wav", "onoffCaption": "whistling at 2.883-10.0", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_1985.wav", "onoffCaption": "door knocking at 1.766-3.926, 4.914-7.074", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_3050.wav", "onoffCaption": "thump thud at 2.535-6.985", "frequencyCaption": "thump thud one times"} +{"filepath": "data/multi_event_train/syn_3223.wav", "onoffCaption": "sneeze at 1.918-3.082", "frequencyCaption": "sneeze one times"} +{"filepath": "data/multi_event_train/syn_3239.wav", "onoffCaption": "car horn honking at 1.927-6.834, 7.521-10.0 and door knocking at 3.368-8.368", "frequencyCaption": "car horn honking two times and door knocking one times"} +{"filepath": "data/multi_event_train/syn_3276.wav", "onoffCaption": "thump thud at 0.898-3.398", "frequencyCaption": "thump thud one times"} +{"filepath": "data/multi_event_train/syn_3322.wav", "onoffCaption": "door slamming at 3.403-5.403, 7.253-9.253", "frequencyCaption": "door slamming two times"} +{"filepath": "data/multi_event_train/syn_3338.wav", "onoffCaption": "door knocking at 0.462-2.65 and car horn honking at 6.561-10.0", "frequencyCaption": "door knocking one times and car horn honking one times"} +{"filepath": "data/multi_event_train/syn_3406.wav", "onoffCaption": "sneeze at 0.145-2.064 and thump thud at 4.654-7.116", "frequencyCaption": "sneeze one times and thump thud one times"} +{"filepath": "data/multi_event_train/syn_3620.wav", "onoffCaption": "train horn at 3.384-6.584", "frequencyCaption": "train horn one times"} +{"filepath": "data/multi_event_train/syn_3675.wav", "onoffCaption": "dog barking at 3.128-5.566, 7.331-9.769", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_train/syn_3690.wav", "onoffCaption": "sneeze at 0.221-1.515, 2.707-4.001, 4.655-5.949 and woman laughing at 4.131-6.736 and door slamming at 4.556-7.273", "frequencyCaption": "sneeze three times and woman laughing one times and door slamming one times"} +{"filepath": "data/multi_event_train/syn_3774.wav", "onoffCaption": "woman laughing at 0.469-3.523, 4.471-7.24", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_3791.wav", "onoffCaption": "gunshot at 2.794-4.794", "frequencyCaption": "gunshot one times"} +{"filepath": "data/multi_event_train/syn_3803.wav", "onoffCaption": "car horn honking at 3.31-5.775", "frequencyCaption": "car horn honking one times"} +{"filepath": "data/multi_event_train/syn_3819.wav", "onoffCaption": "dog barking at 1.059-3.059, 4.623-7.061", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_train/syn_3902.wav", "onoffCaption": "sneeze at 2.138-3.384, 4.942-6.188, 8.009-9.255", "frequencyCaption": "sneeze three times"} +{"filepath": "data/multi_event_train/syn_3918.wav", "onoffCaption": "explosion at 0.429-3.301", "frequencyCaption": "explosion one times"} +{"filepath": "data/multi_event_train/syn_73.wav", "onoffCaption": "cat meowing at 1.611-6.611", "frequencyCaption": "cat meowing one times"} +{"filepath": "data/multi_event_train/syn_96.wav", "onoffCaption": "door knocking at 0.339-4.107, 5.594-8.094", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_101.wav", "onoffCaption": "spraying at 3.192-5.628, 6.912-9.348", "frequencyCaption": "spraying two times"} +{"filepath": "data/multi_event_train/syn_140.wav", "onoffCaption": "burping belching at 0.155-2.385 and spraying at 1.861-3.594 and cat meowing at 3.753-5.941", "frequencyCaption": "burping belching one times and spraying one times and cat meowing one times"} +{"filepath": "data/multi_event_train/syn_269.wav", "onoffCaption": "car horn honking at 0.696-5.208, 7.272-9.272", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_372.wav", "onoffCaption": "spraying at 3.375-3.883, 6.226-6.734", "frequencyCaption": "spraying two times"} +{"filepath": "data/multi_event_train/syn_397.wav", "onoffCaption": "sneeze at 3.122-4.579 and gunshot at 6.347-8.347", "frequencyCaption": "sneeze one times and gunshot one times"} +{"filepath": "data/multi_event_train/syn_516.wav", "onoffCaption": "explosion at 2.108-4.636, 5.554-8.082 and cat meowing at 2.607-3.651, 5.572-6.581", "frequencyCaption": "explosion two times and cat meowing two times"} +{"filepath": "data/multi_event_train/syn_557.wav", "onoffCaption": "cow mooing at 0.457-3.467", "frequencyCaption": "cow mooing one times"} +{"filepath": "data/multi_event_train/syn_724.wav", "onoffCaption": "sneeze at 2.444-5.554, 6.077-7.371", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_812.wav", "onoffCaption": "door slamming at 2.125-3.144, 5.228-6.128", "frequencyCaption": "door slamming two times"} +{"filepath": "data/multi_event_train/syn_853.wav", "onoffCaption": "tapping clicking clanking at 0.486-3.926, 5.27-8.038 and spraying at 1.615-2.615, 3.253-4.51, 5.128-7.256", "frequencyCaption": "tapping clicking clanking two times and spraying three times"} +{"filepath": "data/multi_event_train/syn_909.wav", "onoffCaption": "tapping clicking clanking at 0.702-4.142, 5.289-7.949", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_948.wav", "onoffCaption": "explosion at 3.11-6.95", "frequencyCaption": "explosion one times"} +{"filepath": "data/multi_event_train/syn_1067.wav", "onoffCaption": "thump thud at 0.112-3.159 and burping belching at 0.191-2.556, 4.378-6.804", "frequencyCaption": "thump thud one times and burping belching two times"} +{"filepath": "data/multi_event_train/syn_1082.wav", "onoffCaption": "sneeze at 2.318-4.263, 5.955-7.9", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_1199.wav", "onoffCaption": "dog barking at 1.241-6.878", "frequencyCaption": "dog barking one times"} +{"filepath": "data/multi_event_train/syn_1214.wav", "onoffCaption": "sheep goat bleating at 2.417-4.417, 5.379-7.613 and cow mooing at 5.246-8.228", "frequencyCaption": "sheep goat bleating two times and cow mooing one times"} +{"filepath": "data/multi_event_train/syn_1431.wav", "onoffCaption": "thump thud at 0.131-3.178, 5.554-8.167", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_1470.wav", "onoffCaption": "spraying at 0.07-1.803, 2.542-3.623, 4.2-4.722", "frequencyCaption": "spraying three times"} +{"filepath": "data/multi_event_train/syn_1495.wav", "onoffCaption": "thump thud at 0.204-3.871 and door knocking at 5.99-10.0", "frequencyCaption": "thump thud one times and door knocking one times"} +{"filepath": "data/multi_event_train/syn_1642.wav", "onoffCaption": "woman laughing at 1.271-8.716", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/multi_event_train/syn_1759.wav", "onoffCaption": "cat meowing at 3.734-5.044, 5.722-7.032, 7.879-9.189", "frequencyCaption": "cat meowing three times"} +{"filepath": "data/multi_event_train/syn_1935.wav", "onoffCaption": "door slamming at 0.387-1.068, 3.499-5.499, 6.392-8.392", "frequencyCaption": "door slamming three times"} +{"filepath": "data/multi_event_train/syn_1974.wav", "onoffCaption": "train horn at 0.923-3.363, 4.373-6.429 and sneeze at 4.66-6.16", "frequencyCaption": "train horn two times and sneeze one times"} +{"filepath": "data/multi_event_train/syn_1991.wav", "onoffCaption": "sneeze at 4.039-5.142, 5.76-7.288 and whistling at 5.589-7.598", "frequencyCaption": "sneeze two times and whistling one times"} +{"filepath": "data/multi_event_train/syn_3005.wav", "onoffCaption": "door slamming at 2.507-3.831, 4.569-7.052, 8.333-9.233", "frequencyCaption": "door slamming three times"} +{"filepath": "data/multi_event_train/syn_3044.wav", "onoffCaption": "door knocking at 0.268-4.778, 5.999-10.0", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_3293.wav", "onoffCaption": "sneeze at 0.238-3.886 and train horn at 0.981-4.221, 6.222-8.722 and whistling at 1.282-4.257, 4.777-7.669", "frequencyCaption": "sneeze one times and train horn two times and whistling two times"} +{"filepath": "data/multi_event_train/syn_3388.wav", "onoffCaption": "cow mooing at 2.335-5.345, 7.12-10.0", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_3412.wav", "onoffCaption": "spraying at 0.422-2.55, 3.472-5.6", "frequencyCaption": "spraying two times"} +{"filepath": "data/multi_event_train/syn_3453.wav", "onoffCaption": "dog barking at 0.309-2.309, 2.831-4.831, 5.437-7.437 and tapping clicking clanking at 2.146-5.586, 6.531-9.971", "frequencyCaption": "dog barking three times and tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_3509.wav", "onoffCaption": "tapping clicking clanking at 0.602-4.042, 4.582-6.631 and burping belching at 6.007-8.237 and car horn honking at 6.487-8.952", "frequencyCaption": "tapping clicking clanking two times and burping belching one times and car horn honking one times"} +{"filepath": "data/multi_event_train/syn_3548.wav", "onoffCaption": "burping belching at 1.491-4.75, 5.696-8.955 and gunshot at 1.925-4.018 and duck quacking at 3.075-5.075", "frequencyCaption": "burping belching two times and gunshot one times and duck quacking one times"} +{"filepath": "data/multi_event_train/syn_3916.wav", "onoffCaption": "burping belching at 0.422-2.453, 4.46-7.46 and explosion at 4.672-7.544", "frequencyCaption": "burping belching two times and explosion one times"} +{"filepath": "data/multi_event_train/syn_3957.wav", "onoffCaption": "tapping clicking clanking at 0.344-3.784, 5.183-8.623 and dog barking at 3.175-5.175", "frequencyCaption": "tapping clicking clanking two times and dog barking one times"} +{"filepath": "data/multi_event_train/syn_32.wav", "onoffCaption": "whistling at 1.978-10.0", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_67.wav", "onoffCaption": "sneeze at 1.634-3.947", "frequencyCaption": "sneeze one times"} +{"filepath": "data/multi_event_train/syn_82.wav", "onoffCaption": "sheep goat bleating at 2.09-4.09, 5.408-7.408 and door knocking at 3.653-6.055", "frequencyCaption": "sheep goat bleating two times and door knocking one times"} +{"filepath": "data/multi_event_train/syn_115.wav", "onoffCaption": "thump thud at 1.139-3.639, 4.976-7.438, 7.978-10.0", "frequencyCaption": "thump thud three times"} +{"filepath": "data/multi_event_train/syn_228.wav", "onoffCaption": "burping belching at 1.339-3.37, 4.917-7.12 and dog barking at 5.396-7.396, 7.946-9.946", "frequencyCaption": "burping belching two times and dog barking two times"} +{"filepath": "data/multi_event_train/syn_267.wav", "onoffCaption": "explosion at 0.424-2.517 and door knocking at 6.923-9.111", "frequencyCaption": "explosion one times and door knocking one times"} +{"filepath": "data/multi_event_train/syn_282.wav", "onoffCaption": "train horn at 2.266-4.746", "frequencyCaption": "train horn one times"} +{"filepath": "data/multi_event_train/syn_298.wav", "onoffCaption": "door slamming at 0.022-0.703, 1.758-3.011", "frequencyCaption": "door slamming two times"} +{"filepath": "data/multi_event_train/syn_333.wav", "onoffCaption": "dog barking at 0.293-2.293 and door slamming at 4.141-4.946, 5.517-7.88, 8.508-9.189", "frequencyCaption": "dog barking one times and door slamming three times"} +{"filepath": "data/multi_event_train/syn_366.wav", "onoffCaption": "spraying at 2.509-3.017 and cat meowing at 6.585-8.169", "frequencyCaption": "spraying one times and cat meowing one times"} +{"filepath": "data/multi_event_train/syn_383.wav", "onoffCaption": "door knocking at 0.352-3.727, 4.364-7.739", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_399.wav", "onoffCaption": "cow mooing at 2.658-5.627, 7.837-10.0", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_458.wav", "onoffCaption": "whistling at 1.057-9.068 and explosion at 4.286-7.158", "frequencyCaption": "whistling one times and explosion one times"} +{"filepath": "data/multi_event_train/syn_543.wav", "onoffCaption": "cow mooing at 1.056-4.038, 5.843-8.812", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_631.wav", "onoffCaption": "gunshot at 2.915-4.915, 6.448-8.448", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_730.wav", "onoffCaption": "cat meowing at 3.413-4.562, 6.613-7.762", "frequencyCaption": "cat meowing two times"} +{"filepath": "data/multi_event_train/syn_765.wav", "onoffCaption": "car horn honking at 0.945-4.786, 5.97-9.811", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_780.wav", "onoffCaption": "whistling at 0.72-8.09", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_847.wav", "onoffCaption": "burping belching at 1.931-9.099", "frequencyCaption": "burping belching one times"} +{"filepath": "data/multi_event_train/syn_1026.wav", "onoffCaption": "tapping clicking clanking at 2.614-6.054 and gunshot at 2.933-4.933, 6.04-8.04 and door slamming at 3.285-4.402", "frequencyCaption": "tapping clicking clanking one times and gunshot two times and door slamming one times"} +{"filepath": "data/multi_event_train/syn_1073.wav", "onoffCaption": "train horn at 2.629-5.949, 6.477-9.797", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_1096.wav", "onoffCaption": "door slamming at 2.215-4.215 and whistling at 7.117-10.0", "frequencyCaption": "door slamming one times and whistling one times"} +{"filepath": "data/multi_event_train/syn_1168.wav", "onoffCaption": "duck quacking at 0.384-2.384", "frequencyCaption": "duck quacking one times"} +{"filepath": "data/multi_event_train/syn_1200.wav", "onoffCaption": "tapping clicking clanking at 0.097-3.537, 4.07-6.583 and train horn at 3.145-6.679", "frequencyCaption": "tapping clicking clanking two times and train horn one times"} +{"filepath": "data/multi_event_train/syn_1255.wav", "onoffCaption": "woman laughing at 0.763-4.151 and cow mooing at 5.988-8.97", "frequencyCaption": "woman laughing one times and cow mooing one times"} +{"filepath": "data/multi_event_train/syn_1425.wav", "onoffCaption": "gunshot at 0.046-2.046, 2.925-4.925, 5.487-7.487", "frequencyCaption": "gunshot three times"} +{"filepath": "data/multi_event_train/syn_1603.wav", "onoffCaption": "sneeze at 0.369-1.533", "frequencyCaption": "sneeze one times"} +{"filepath": "data/multi_event_train/syn_1656.wav", "onoffCaption": "dog barking at 0.624-2.624", "frequencyCaption": "dog barking one times"} +{"filepath": "data/multi_event_train/syn_1718.wav", "onoffCaption": "woman laughing at 1.553-3.651", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/multi_event_train/syn_1921.wav", "onoffCaption": "duck quacking at 0.196-2.196 and tapping clicking clanking at 4.003-7.443", "frequencyCaption": "duck quacking one times and tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_3011.wav", "onoffCaption": "door knocking at 0.914-3.484 and sheep goat bleating at 2.889-4.889, 6.023-8.023", "frequencyCaption": "door knocking one times and sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_3237.wav", "onoffCaption": "door knocking at 2.281-4.53 and cow mooing at 7.956-10.0", "frequencyCaption": "door knocking one times and cow mooing one times"} +{"filepath": "data/multi_event_train/syn_3262.wav", "onoffCaption": "cat meowing at 1.237-4.582, 5.9-7.26 and spraying at 2.83-3.562, 4.76-5.492, 7.812-8.544 and burping belching at 4.274-7.476", "frequencyCaption": "cat meowing two times and spraying three times and burping belching one times"} +{"filepath": "data/multi_event_train/syn_3278.wav", "onoffCaption": "duck quacking at 3.644-5.644, 7.565-9.565", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_3287.wav", "onoffCaption": "sheep goat bleating at 0.66-2.66, 3.969-5.969", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_3363.wav", "onoffCaption": "dog barking at 2.469-4.469, 4.984-6.984 and sneeze at 5.02-6.52", "frequencyCaption": "dog barking two times and sneeze one times"} +{"filepath": "data/multi_event_train/syn_3379.wav", "onoffCaption": "cow mooing at 2.657-5.667, 6.343-9.006", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_3447.wav", "onoffCaption": "car horn honking at 0.089-3.308 and cow mooing at 0.324-4.753, 6.437-9.419", "frequencyCaption": "car horn honking one times and cow mooing two times"} +{"filepath": "data/multi_event_train/syn_3634.wav", "onoffCaption": "cow mooing at 0.997-5.426, 6.53-10.0 and duck quacking at 2.647-4.647, 5.454-7.454", "frequencyCaption": "cow mooing two times and duck quacking two times"} +{"filepath": "data/multi_event_train/syn_3661.wav", "onoffCaption": "dog barking at 1.926-4.326, 6.456-8.456", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_train/syn_3684.wav", "onoffCaption": "duck quacking at 2.617-4.617, 5.935-7.935", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_3735.wav", "onoffCaption": "door knocking at 1.259-5.634 and tapping clicking clanking at 3.553-6.993", "frequencyCaption": "door knocking one times and tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_3858.wav", "onoffCaption": "door knocking at 2.954-5.203, 5.759-8.008", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_3943.wav", "onoffCaption": "spraying at 0.374-2.81, 4.757-5.507", "frequencyCaption": "spraying two times"} +{"filepath": "data/multi_event_train/syn_28.wav", "onoffCaption": "thump thud at 1.425-4.472, 5.233-8.28", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_98.wav", "onoffCaption": "sneeze at 3.369-5.482 and sheep goat bleating at 7.725-9.725", "frequencyCaption": "sneeze one times and sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_232.wav", "onoffCaption": "train horn at 2.722-9.191", "frequencyCaption": "train horn one times"} +{"filepath": "data/multi_event_train/syn_329.wav", "onoffCaption": "woman laughing at 0.281-2.386, 3.414-5.66 and gunshot at 0.769-2.769", "frequencyCaption": "woman laughing two times and gunshot one times"} +{"filepath": "data/multi_event_train/syn_417.wav", "onoffCaption": "tapping clicking clanking at 0.114-3.554 and door slamming at 5.506-7.03, 8.007-9.007", "frequencyCaption": "tapping clicking clanking one times and door slamming two times"} +{"filepath": "data/multi_event_train/syn_442.wav", "onoffCaption": "car horn honking at 0.262-3.175 and spraying at 2.001-3.763, 4.393-4.915", "frequencyCaption": "car horn honking one times and spraying two times"} +{"filepath": "data/multi_event_train/syn_559.wav", "onoffCaption": "door slamming at 0.382-2.84, 4.65-6.129, 7.21-9.436 and spraying at 1.394-2.144, 3.499-4.249, 5.971-6.721", "frequencyCaption": "door slamming three times and spraying three times"} +{"filepath": "data/multi_event_train/syn_664.wav", "onoffCaption": "car horn honking at 2.537-7.049", "frequencyCaption": "car horn honking one times"} +{"filepath": "data/multi_event_train/syn_681.wav", "onoffCaption": "duck quacking at 3.168-5.168, 6.271-8.271", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_808.wav", "onoffCaption": "explosion at 3.702-7.542", "frequencyCaption": "explosion one times"} +{"filepath": "data/multi_event_train/syn_913.wav", "onoffCaption": "explosion at 1.558-4.43, 5.523-8.276", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_946.wav", "onoffCaption": "duck quacking at 0.315-2.315, 4.783-6.783 and dog barking at 4.076-6.076", "frequencyCaption": "duck quacking two times and dog barking one times"} +{"filepath": "data/multi_event_train/syn_1069.wav", "onoffCaption": "train horn at 1.208-4.448, 5.077-7.082", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_1127.wav", "onoffCaption": "car horn honking at 0.006-2.792 and spraying at 6.323-6.845, 7.424-7.946", "frequencyCaption": "car horn honking one times and spraying two times"} +{"filepath": "data/multi_event_train/syn_1172.wav", "onoffCaption": "gunshot at 0.688-2.688, 4.069-6.069", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_1197.wav", "onoffCaption": "train horn at 0.206-3.006, 4.395-7.195", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_1301.wav", "onoffCaption": "cow mooing at 1.996-6.425", "frequencyCaption": "cow mooing one times"} +{"filepath": "data/multi_event_train/syn_1354.wav", "onoffCaption": "cow mooing at 1.681-4.663", "frequencyCaption": "cow mooing one times"} +{"filepath": "data/multi_event_train/syn_1524.wav", "onoffCaption": "dog barking at 0.558-2.558, 3.395-5.395, 6.074-8.074 and sheep goat bleating at 1.962-3.962", "frequencyCaption": "dog barking three times and sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_1571.wav", "onoffCaption": "thump thud at 0.702-2.93, 4.42-7.262 and explosion at 1.683-4.555 and burping belching at 2.168-5.427", "frequencyCaption": "thump thud two times and explosion one times and burping belching one times"} +{"filepath": "data/multi_event_train/syn_1594.wav", "onoffCaption": "thump thud at 0.537-4.987, 6.972-9.172", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_1619.wav", "onoffCaption": "dog barking at 2.648-5.086, 5.864-8.302 and woman laughing at 6.145-8.227", "frequencyCaption": "dog barking two times and woman laughing one times"} +{"filepath": "data/multi_event_train/syn_1702.wav", "onoffCaption": "cat meowing at 2.721-6.066, 6.608-7.693, 8.471-9.618", "frequencyCaption": "cat meowing three times"} +{"filepath": "data/multi_event_train/syn_1757.wav", "onoffCaption": "gunshot at 1.51-3.51, 5.146-7.386 and thump thud at 2.06-4.56", "frequencyCaption": "gunshot two times and thump thud one times"} +{"filepath": "data/multi_event_train/syn_1796.wav", "onoffCaption": "tapping clicking clanking at 0.181-3.621 and duck quacking at 2.364-4.364 and train horn at 7.722-10.0", "frequencyCaption": "tapping clicking clanking one times and duck quacking one times and train horn one times"} +{"filepath": "data/multi_event_train/syn_1820.wav", "onoffCaption": "cat meowing at 0.661-4.006, 4.781-6.047, 6.6-7.685 and woman laughing at 6.484-9.569", "frequencyCaption": "cat meowing three times and woman laughing one times"} +{"filepath": "data/multi_event_train/syn_1875.wav", "onoffCaption": "dog barking at 2.881-4.881 and cow mooing at 7.22-10.0", "frequencyCaption": "dog barking one times and cow mooing one times"} +{"filepath": "data/multi_event_train/syn_1890.wav", "onoffCaption": "spraying at 0.707-1.648, 3.552-4.493, 6.603-7.544", "frequencyCaption": "spraying three times"} +{"filepath": "data/multi_event_train/syn_3110.wav", "onoffCaption": "cat meowing at 0.442-2.436 and duck quacking at 1.45-3.45, 3.969-5.969, 6.517-8.517", "frequencyCaption": "cat meowing one times and duck quacking three times"} +{"filepath": "data/multi_event_train/syn_3145.wav", "onoffCaption": "train horn at 0.327-6.796 and door knocking at 6.15-8.525", "frequencyCaption": "train horn one times and door knocking one times"} +{"filepath": "data/multi_event_train/syn_3336.wav", "onoffCaption": "thump thud at 2.128-4.628, 5.384-8.155", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_3347.wav", "onoffCaption": "explosion at 3.829-7.669", "frequencyCaption": "explosion one times"} +{"filepath": "data/multi_event_train/syn_3386.wav", "onoffCaption": "door knocking at 2.413-6.029, 6.842-9.217", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_3408.wav", "onoffCaption": "explosion at 3.363-8.363", "frequencyCaption": "explosion one times"} +{"filepath": "data/multi_event_train/syn_3513.wav", "onoffCaption": "woman laughing at 1.221-5.273, 6.213-10.0", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_3546.wav", "onoffCaption": "gunshot at 2.824-4.824", "frequencyCaption": "gunshot one times"} +{"filepath": "data/multi_event_train/syn_3711.wav", "onoffCaption": "spraying at 0.063-0.585, 1.438-1.96, 3.397-3.919 and cow mooing at 1.311-4.293, 6.776-9.397", "frequencyCaption": "spraying three times and cow mooing two times"} +{"filepath": "data/multi_event_train/syn_3760.wav", "onoffCaption": "duck quacking at 0.042-2.042, 3.248-5.248, 6.411-8.411", "frequencyCaption": "duck quacking three times"} +{"filepath": "data/multi_event_train/syn_3785.wav", "onoffCaption": "tapping clicking clanking at 0.561-4.001, 6.477-9.917", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_3817.wav", "onoffCaption": "spraying at 0.006-0.61", "frequencyCaption": "spraying one times"} +{"filepath": "data/multi_event_train/syn_3842.wav", "onoffCaption": "sneeze at 0.021-1.521, 2.134-3.634", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_3959.wav", "onoffCaption": "burping belching at 1.098-4.098, 5.432-7.873", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_59.wav", "onoffCaption": "train horn at 1.154-5.484 and cat meowing at 8.009-9.021", "frequencyCaption": "train horn one times and cat meowing one times"} +{"filepath": "data/multi_event_train/syn_164.wav", "onoffCaption": "burping belching at 0.151-4.151, 6.609-10.0", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_181.wav", "onoffCaption": "sneeze at 1.324-4.972, 6.303-9.951", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_216.wav", "onoffCaption": "thump thud at 0.386-2.886, 4.342-6.391 and door slamming at 0.64-1.779", "frequencyCaption": "thump thud two times and door slamming one times"} +{"filepath": "data/multi_event_train/syn_243.wav", "onoffCaption": "gunshot at 0.792-2.792, 4.535-6.535, 7.287-9.287", "frequencyCaption": "gunshot three times"} +{"filepath": "data/multi_event_train/syn_358.wav", "onoffCaption": "door knocking at 2.31-6.843", "frequencyCaption": "door knocking one times"} +{"filepath": "data/multi_event_train/syn_429.wav", "onoffCaption": "dog barking at 0.504-2.504, 3.032-5.032 and sheep goat bleating at 7.803-9.803", "frequencyCaption": "dog barking two times and sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_433.wav", "onoffCaption": "duck quacking at 0.816-2.816 and door knocking at 5.455-9.205", "frequencyCaption": "duck quacking one times and door knocking one times"} +{"filepath": "data/multi_event_train/syn_466.wav", "onoffCaption": "duck quacking at 0.463-2.463, 3.794-5.794, 7.209-9.209", "frequencyCaption": "duck quacking three times"} +{"filepath": "data/multi_event_train/syn_483.wav", "onoffCaption": "whistling at 3.824-6.799, 7.476-10.0", "frequencyCaption": "whistling two times"} +{"filepath": "data/multi_event_train/syn_528.wav", "onoffCaption": "tapping clicking clanking at 2.682-6.122, 7.378-9.544", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_598.wav", "onoffCaption": "whistling at 0.363-2.592 and woman laughing at 6.028-8.311", "frequencyCaption": "whistling one times and woman laughing one times"} +{"filepath": "data/multi_event_train/syn_615.wav", "onoffCaption": "whistling at 0.199-2.428", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_640.wav", "onoffCaption": "thump thud at 0.872-4.79, 5.882-9.8", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_836.wav", "onoffCaption": "whistling at 1.112-6.226, 7.367-10.0", "frequencyCaption": "whistling two times"} +{"filepath": "data/multi_event_train/syn_879.wav", "onoffCaption": "door knocking at 2.646-5.702, 7.465-10.0", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_937.wav", "onoffCaption": "dog barking at 4.0-6.0, 7.139-9.139", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_train/syn_962.wav", "onoffCaption": "cat meowing at 0.143-5.143, 6.242-10.0 and car horn honking at 1.548-3.895", "frequencyCaption": "cat meowing two times and car horn honking one times"} +{"filepath": "data/multi_event_train/syn_987.wav", "onoffCaption": "car horn honking at 0.823-4.477, 6.669-10.0", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_1018.wav", "onoffCaption": "car horn honking at 0.255-4.767, 5.748-8.386", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_1103.wav", "onoffCaption": "car horn honking at 3.214-7.536", "frequencyCaption": "car horn honking one times"} +{"filepath": "data/multi_event_train/syn_1156.wav", "onoffCaption": "whistling at 3.274-10.0", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_1325.wav", "onoffCaption": "sneeze at 2.663-5.124, 6.447-8.908 and gunshot at 3.303-5.303, 6.37-8.37", "frequencyCaption": "sneeze two times and gunshot two times"} +{"filepath": "data/multi_event_train/syn_1370.wav", "onoffCaption": "cow mooing at 0.829-5.258, 6.489-9.114", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_1395.wav", "onoffCaption": "duck quacking at 2.874-4.874, 7.229-9.229", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_1500.wav", "onoffCaption": "thump thud at 1.523-4.294 and burping belching at 3.024-6.283", "frequencyCaption": "thump thud one times and burping belching one times"} +{"filepath": "data/multi_event_train/syn_1555.wav", "onoffCaption": "whistling at 0.923-3.798, 5.507-7.736 and gunshot at 2.133-4.133", "frequencyCaption": "whistling two times and gunshot one times"} +{"filepath": "data/multi_event_train/syn_1668.wav", "onoffCaption": "burping belching at 0.939-4.141 and car horn honking at 2.97-5.788, 7.057-9.875", "frequencyCaption": "burping belching one times and car horn honking two times"} +{"filepath": "data/multi_event_train/syn_1726.wav", "onoffCaption": "explosion at 2.333-7.333", "frequencyCaption": "explosion one times"} +{"filepath": "data/multi_event_train/syn_1773.wav", "onoffCaption": "woman laughing at 2.701-10.0", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/multi_event_train/syn_1804.wav", "onoffCaption": "explosion at 0.379-4.379, 6.146-8.899 and cat meowing at 0.504-1.516 and whistling at 3.877-9.052", "frequencyCaption": "explosion two times and cat meowing one times and whistling one times"} +{"filepath": "data/multi_event_train/syn_1851.wav", "onoffCaption": "car horn honking at 3.222-6.008, 7.231-9.239", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_1950.wav", "onoffCaption": "whistling at 3.046-8.221 and sheep goat bleating at 7.003-9.003", "frequencyCaption": "whistling one times and sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_3060.wav", "onoffCaption": "door knocking at 1.078-5.911, 7.79-10.0", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_3134.wav", "onoffCaption": "thump thud at 2.316-4.655, 6.441-8.89", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_3161.wav", "onoffCaption": "thump thud at 1.467-4.514, 5.451-8.498", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_3184.wav", "onoffCaption": "thump thud at 0.155-2.617, 4.858-7.467 and dog barking at 6.665-8.665", "frequencyCaption": "thump thud two times and dog barking one times"} +{"filepath": "data/multi_event_train/syn_3209.wav", "onoffCaption": "car horn honking at 0.261-4.102, 5.871-9.712", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_3312.wav", "onoffCaption": "door knocking at 0.796-3.108 and thump thud at 6.247-10.0", "frequencyCaption": "door knocking one times and thump thud one times"} +{"filepath": "data/multi_event_train/syn_3436.wav", "onoffCaption": "door knocking at 2.451-6.219, 6.734-10.0", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_3479.wav", "onoffCaption": "car horn honking at 0.4-2.865", "frequencyCaption": "car horn honking one times"} +{"filepath": "data/multi_event_train/syn_3537.wav", "onoffCaption": "duck quacking at 0.27-2.27 and car horn honking at 6.138-10.0", "frequencyCaption": "duck quacking one times and car horn honking one times"} +{"filepath": "data/multi_event_train/syn_3562.wav", "onoffCaption": "sneeze at 2.315-3.592, 5.084-6.372, 7.543-10.0", "frequencyCaption": "sneeze three times"} +{"filepath": "data/multi_event_train/syn_3587.wav", "onoffCaption": "car horn honking at 1.174-3.687", "frequencyCaption": "car horn honking one times"} +{"filepath": "data/multi_event_train/syn_3744.wav", "onoffCaption": "tapping clicking clanking at 1.635-5.075, 5.921-9.361", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_3829.wav", "onoffCaption": "duck quacking at 0.57-2.57, 3.69-5.69", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_3833.wav", "onoffCaption": "duck quacking at 2.933-4.933, 6.082-8.082", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_3866.wav", "onoffCaption": "cow mooing at 0.249-3.259 and sheep goat bleating at 2.129-4.129, 4.999-6.999", "frequencyCaption": "cow mooing one times and sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_3883.wav", "onoffCaption": "whistling at 0.366-8.377", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_3928.wav", "onoffCaption": "gunshot at 2.58-4.58, 6.96-8.96 and door knocking at 3.521-5.898", "frequencyCaption": "gunshot two times and door knocking one times"} +{"filepath": "data/multi_event_train/syn_3998.wav", "onoffCaption": "car horn honking at 1.23-4.817, 7.133-10.0", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_16.wav", "onoffCaption": "woman laughing at 1.302-3.894, 4.898-7.49", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_43.wav", "onoffCaption": "sheep goat bleating at 1.381-3.381, 4.024-6.942, 7.88-9.88", "frequencyCaption": "sheep goat bleating three times"} +{"filepath": "data/multi_event_train/syn_131.wav", "onoffCaption": "train horn at 2.615-5.975", "frequencyCaption": "train horn one times"} +{"filepath": "data/multi_event_train/syn_259.wav", "onoffCaption": "dog barking at 1.087-3.087, 5.583-7.583", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_train/syn_317.wav", "onoffCaption": "door slamming at 0.245-2.01, 3.859-5.624 and explosion at 1.731-6.731", "frequencyCaption": "door slamming two times and explosion one times"} +{"filepath": "data/multi_event_train/syn_342.wav", "onoffCaption": "woman laughing at 0.341-2.907, 4.104-6.67, 7.981-10.0 and train horn at 3.333-5.973", "frequencyCaption": "woman laughing three times and train horn one times"} +{"filepath": "data/multi_event_train/syn_499.wav", "onoffCaption": "woman laughing at 0.549-3.634, 4.963-7.2", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_532.wav", "onoffCaption": "woman laughing at 2.264-4.856, 5.681-7.7", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_567.wav", "onoffCaption": "whistling at 0.302-8.052", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_582.wav", "onoffCaption": "sheep goat bleating at 0.147-2.147, 4.118-6.118", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_714.wav", "onoffCaption": "door slamming at 1.142-3.142, 5.231-7.231", "frequencyCaption": "door slamming two times"} +{"filepath": "data/multi_event_train/syn_741.wav", "onoffCaption": "explosion at 2.483-5.483, 5.99-8.99", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_863.wav", "onoffCaption": "spraying at 2.737-4.432, 5.492-7.187, 7.813-9.508", "frequencyCaption": "spraying three times"} +{"filepath": "data/multi_event_train/syn_886.wav", "onoffCaption": "dog barking at 0.055-2.055, 3.364-5.364, 6.632-8.632 and sneeze at 2.661-4.978", "frequencyCaption": "dog barking three times and sneeze one times"} +{"filepath": "data/multi_event_train/syn_978.wav", "onoffCaption": "duck quacking at 3.758-5.758, 6.667-8.667", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_1002.wav", "onoffCaption": "cat meowing at 2.9-5.804, 8.273-9.583", "frequencyCaption": "cat meowing two times"} +{"filepath": "data/multi_event_train/syn_1057.wav", "onoffCaption": "train horn at 0.616-3.056 and woman laughing at 2.221-5.502", "frequencyCaption": "train horn one times and woman laughing one times"} +{"filepath": "data/multi_event_train/syn_1119.wav", "onoffCaption": "gunshot at 0.328-2.328, 3.264-5.264, 6.146-8.146", "frequencyCaption": "gunshot three times"} +{"filepath": "data/multi_event_train/syn_1224.wav", "onoffCaption": "sheep goat bleating at 1.04-3.04", "frequencyCaption": "sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_1271.wav", "onoffCaption": "woman laughing at 3.563-6.658, 7.737-10.0", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_1293.wav", "onoffCaption": "burping belching at 1.074-4.276, 5.175-7.481", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_1294.wav", "onoffCaption": "dog barking at 0.663-2.663, 3.258-5.258 and explosion at 0.759-5.759 and sheep goat bleating at 1.279-3.279, 5.169-7.169", "frequencyCaption": "dog barking two times and explosion one times and sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_1388.wav", "onoffCaption": "whistling at 0.114-8.125 and gunshot at 1.695-3.695, 4.819-6.819 and explosion at 4.022-7.022", "frequencyCaption": "whistling one times and gunshot two times and explosion one times"} +{"filepath": "data/multi_event_train/syn_1401.wav", "onoffCaption": "thump thud at 0.379-2.841, 3.369-5.708, 6.811-9.748", "frequencyCaption": "thump thud three times"} +{"filepath": "data/multi_event_train/syn_1454.wav", "onoffCaption": "door knocking at 2.104-4.941 and explosion at 5.967-10.0", "frequencyCaption": "door knocking one times and explosion one times"} +{"filepath": "data/multi_event_train/syn_1627.wav", "onoffCaption": "thump thud at 0.558-3.058, 4.154-6.977", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_1672.wav", "onoffCaption": "cat meowing at 2.314-7.314 and spraying at 5.588-8.048", "frequencyCaption": "cat meowing one times and spraying one times"} +{"filepath": "data/multi_event_train/syn_1697.wav", "onoffCaption": "woman laughing at 0.865-3.634, 4.773-7.73", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_1769.wav", "onoffCaption": "tapping clicking clanking at 0.115-3.555 and duck quacking at 5.928-7.928 and dog barking at 6.357-8.795", "frequencyCaption": "tapping clicking clanking one times and duck quacking one times and dog barking one times"} +{"filepath": "data/multi_event_train/syn_1905.wav", "onoffCaption": "thump thud at 2.283-4.783, 5.933-8.433 and sneeze at 7.176-9.095", "frequencyCaption": "thump thud two times and sneeze one times"} +{"filepath": "data/multi_event_train/syn_3035.wav", "onoffCaption": "whistling at 1.36-6.535", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_3085.wav", "onoffCaption": "tapping clicking clanking at 2.948-6.388, 7.912-10.0", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_3213.wav", "onoffCaption": "door slamming at 0.257-1.736 and whistling at 0.27-2.279, 3.595-6.47", "frequencyCaption": "door slamming one times and whistling two times"} +{"filepath": "data/multi_event_train/syn_3214.wav", "onoffCaption": "thump thud at 2.382-4.721, 5.845-8.607", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_3246.wav", "onoffCaption": "tapping clicking clanking at 0.331-3.771 and cow mooing at 6.725-10.0", "frequencyCaption": "tapping clicking clanking one times and cow mooing one times"} +{"filepath": "data/multi_event_train/syn_3308.wav", "onoffCaption": "woman laughing at 0.073-5.112 and duck quacking at 0.649-2.649", "frequencyCaption": "woman laughing one times and duck quacking one times"} +{"filepath": "data/multi_event_train/syn_3463.wav", "onoffCaption": "dog barking at 0.095-2.095, 3.294-5.294, 6.96-8.96", "frequencyCaption": "dog barking three times"} +{"filepath": "data/multi_event_train/syn_3486.wav", "onoffCaption": "sheep goat bleating at 0.432-3.432, 4.38-6.38", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_3578.wav", "onoffCaption": "dog barking at 0.095-2.495, 3.497-5.935, 7.9-9.9", "frequencyCaption": "dog barking three times"} +{"filepath": "data/multi_event_train/syn_3610.wav", "onoffCaption": "explosion at 0.757-3.757, 5.449-8.449", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_3642.wav", "onoffCaption": "door knocking at 0.246-2.746, 5.236-7.736", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_3645.wav", "onoffCaption": "tapping clicking clanking at 1.919-5.359", "frequencyCaption": "tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_3759.wav", "onoffCaption": "door knocking at 2.919-5.649, 6.984-9.048", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_3899.wav", "onoffCaption": "woman laughing at 2.366-5.135, 6.523-9.277", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_3932.wav", "onoffCaption": "woman laughing at 0.413-2.696, 3.687-6.456, 7.42-9.645", "frequencyCaption": "woman laughing three times"} +{"filepath": "data/multi_event_train/syn_3967.wav", "onoffCaption": "woman laughing at 0.22-2.312, 3.688-5.78", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_3982.wav", "onoffCaption": "whistling at 0.296-5.796, 6.441-10.0", "frequencyCaption": "whistling two times"} +{"filepath": "data/multi_event_train/syn_11.wav", "onoffCaption": "spraying at 0.048-0.675, 2.079-3.163 and explosion at 6.554-10.0", "frequencyCaption": "spraying two times and explosion one times"} +{"filepath": "data/multi_event_train/syn_44.wav", "onoffCaption": "sheep goat bleating at 0.24-2.24, 3.277-5.277, 7.394-9.394", "frequencyCaption": "sheep goat bleating three times"} +{"filepath": "data/multi_event_train/syn_163.wav", "onoffCaption": "duck quacking at 0.386-2.386, 3.148-5.148 and sheep goat bleating at 3.949-5.949", "frequencyCaption": "duck quacking two times and sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_186.wav", "onoffCaption": "door slamming at 0.044-1.161 and burping belching at 1.036-4.316", "frequencyCaption": "door slamming one times and burping belching one times"} +{"filepath": "data/multi_event_train/syn_310.wav", "onoffCaption": "sneeze at 1.983-4.386, 5.995-7.495", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_345.wav", "onoffCaption": "spraying at 1.07-3.462 and explosion at 4.971-9.971", "frequencyCaption": "spraying one times and explosion one times"} +{"filepath": "data/multi_event_train/syn_535.wav", "onoffCaption": "woman laughing at 2.988-5.343, 6.095-8.681", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_608.wav", "onoffCaption": "explosion at 2.457-5.329", "frequencyCaption": "explosion one times"} +{"filepath": "data/multi_event_train/syn_713.wav", "onoffCaption": "duck quacking at 0.237-2.237, 3.979-5.979, 7.934-9.934", "frequencyCaption": "duck quacking three times"} +{"filepath": "data/multi_event_train/syn_746.wav", "onoffCaption": "train horn at 0.123-4.453, 5.099-9.429", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_831.wav", "onoffCaption": "dog barking at 0.341-2.341, 2.889-4.889, 6.503-8.503", "frequencyCaption": "dog barking three times"} +{"filepath": "data/multi_event_train/syn_881.wav", "onoffCaption": "tapping clicking clanking at 0.683-4.123 and thump thud at 7.869-10.0", "frequencyCaption": "tapping clicking clanking one times and thump thud one times"} +{"filepath": "data/multi_event_train/syn_1005.wav", "onoffCaption": "door knocking at 1.384-7.444 and explosion at 5.109-8.109", "frequencyCaption": "door knocking one times and explosion one times"} +{"filepath": "data/multi_event_train/syn_1050.wav", "onoffCaption": "explosion at 2.575-4.639, 5.936-8.753", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_1223.wav", "onoffCaption": "sheep goat bleating at 1.401-3.401, 5.028-7.028", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_1276.wav", "onoffCaption": "gunshot at 2.945-4.945, 6.205-8.706", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_1338.wav", "onoffCaption": "tapping clicking clanking at 0.123-3.563, 4.576-8.016 and sheep goat bleating at 6.188-8.188", "frequencyCaption": "tapping clicking clanking two times and sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_1453.wav", "onoffCaption": "train horn at 2.239-5.119, 5.847-8.727", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_1548.wav", "onoffCaption": "thump thud at 0.102-3.149, 3.878-6.469", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_1620.wav", "onoffCaption": "gunshot at 0.078-2.552 and thump thud at 0.946-4.864", "frequencyCaption": "gunshot one times and thump thud one times"} +{"filepath": "data/multi_event_train/syn_1675.wav", "onoffCaption": "door knocking at 2.094-4.469, 5.66-8.035", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_1690.wav", "onoffCaption": "door slamming at 1.034-3.517 and spraying at 7.495-8.099", "frequencyCaption": "door slamming one times and spraying one times"} +{"filepath": "data/multi_event_train/syn_1957.wav", "onoffCaption": "sneeze at 1.883-3.59, 4.837-6.544, 8.083-9.79", "frequencyCaption": "sneeze three times"} +{"filepath": "data/multi_event_train/syn_3067.wav", "onoffCaption": "thump thud at 0.31-2.81 and burping belching at 6.33-9.091", "frequencyCaption": "thump thud one times and burping belching one times"} +{"filepath": "data/multi_event_train/syn_3082.wav", "onoffCaption": "cow mooing at 0.949-3.959, 5.766-7.948", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_3199.wav", "onoffCaption": "thump thud at 1.786-4.833, 6.026-9.073", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_3241.wav", "onoffCaption": "sheep goat bleating at 2.492-4.492, 5.259-7.259, 7.956-9.956", "frequencyCaption": "sheep goat bleating three times"} +{"filepath": "data/multi_event_train/syn_3431.wav", "onoffCaption": "woman laughing at 0.558-3.258, 3.954-6.039, 6.909-9.007 and whistling at 2.627-7.111", "frequencyCaption": "woman laughing three times and whistling one times"} +{"filepath": "data/multi_event_train/syn_3617.wav", "onoffCaption": "train horn at 0.479-6.948, 7.832-10.0", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_3935.wav", "onoffCaption": "sneeze at 1.497-3.958, 4.871-6.197", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_136.wav", "onoffCaption": "thump thud at 0.761-3.532 and whistling at 5.796-10.0", "frequencyCaption": "thump thud one times and whistling one times"} +{"filepath": "data/multi_event_train/syn_244.wav", "onoffCaption": "spraying at 0.842-1.709 and door slamming at 2.747-3.247", "frequencyCaption": "spraying one times and door slamming one times"} +{"filepath": "data/multi_event_train/syn_461.wav", "onoffCaption": "cat meowing at 2.228-3.978 and explosion at 3.017-5.024, 6.181-8.188 and cow mooing at 5.927-8.909", "frequencyCaption": "cat meowing one times and explosion two times and cow mooing one times"} +{"filepath": "data/multi_event_train/syn_484.wav", "onoffCaption": "car horn honking at 1.94-5.781", "frequencyCaption": "car horn honking one times"} +{"filepath": "data/multi_event_train/syn_560.wav", "onoffCaption": "dog barking at 2.223-4.223, 5.485-7.485", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_train/syn_585.wav", "onoffCaption": "spraying at 3.005-3.609, 4.518-6.646, 7.52-9.215", "frequencyCaption": "spraying three times"} +{"filepath": "data/multi_event_train/syn_612.wav", "onoffCaption": "dog barking at 0.012-2.012, 4.228-6.228, 6.945-8.945", "frequencyCaption": "dog barking three times"} +{"filepath": "data/multi_event_train/syn_864.wav", "onoffCaption": "explosion at 1.183-4.183 and woman laughing at 7.821-10.0", "frequencyCaption": "explosion one times and woman laughing one times"} +{"filepath": "data/multi_event_train/syn_965.wav", "onoffCaption": "dog barking at 2.009-4.009, 5.762-7.762 and explosion at 2.108-4.861, 5.845-8.598", "frequencyCaption": "dog barking two times and explosion two times"} +{"filepath": "data/multi_event_train/syn_980.wav", "onoffCaption": "cat meowing at 0.306-1.446, 2.819-3.959", "frequencyCaption": "cat meowing two times"} +{"filepath": "data/multi_event_train/syn_1151.wav", "onoffCaption": "sneeze at 0.567-3.181 and explosion at 1.075-6.075", "frequencyCaption": "sneeze one times and explosion one times"} +{"filepath": "data/multi_event_train/syn_1406.wav", "onoffCaption": "sheep goat bleating at 1.684-3.684, 4.252-6.252, 7.758-9.758", "frequencyCaption": "sheep goat bleating three times"} +{"filepath": "data/multi_event_train/syn_1507.wav", "onoffCaption": "cat meowing at 0.787-2.328, 4.605-5.915 and burping belching at 1.025-6.149, 7.732-9.857", "frequencyCaption": "cat meowing two times and burping belching two times"} +{"filepath": "data/multi_event_train/syn_1791.wav", "onoffCaption": "duck quacking at 0.18-2.18 and cat meowing at 0.29-3.531, 4.935-7.682", "frequencyCaption": "duck quacking one times and cat meowing two times"} +{"filepath": "data/multi_event_train/syn_1803.wav", "onoffCaption": "whistling at 2.821-10.0", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_1819.wav", "onoffCaption": "sneeze at 0.143-2.389, 2.913-4.832, 5.475-7.588", "frequencyCaption": "sneeze three times"} +{"filepath": "data/multi_event_train/syn_1902.wav", "onoffCaption": "dog barking at 2.405-4.405, 6.495-8.495", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_train/syn_1918.wav", "onoffCaption": "burping belching at 3.973-6.973", "frequencyCaption": "burping belching one times"} +{"filepath": "data/multi_event_train/syn_3028.wav", "onoffCaption": "sneeze at 0.602-1.879", "frequencyCaption": "sneeze one times"} +{"filepath": "data/multi_event_train/syn_3032.wav", "onoffCaption": "woman laughing at 1.827-5.215, 6.908-9.263", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_3129.wav", "onoffCaption": "spraying at 1.764-2.828, 4.27-5.334 and door knocking at 6.965-9.502", "frequencyCaption": "spraying two times and door knocking one times"} +{"filepath": "data/multi_event_train/syn_3133.wav", "onoffCaption": "woman laughing at 1.628-3.836, 5.97-8.178", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_3340.wav", "onoffCaption": "door slamming at 0.33-2.526 and cow mooing at 6.04-9.022", "frequencyCaption": "door slamming one times and cow mooing one times"} +{"filepath": "data/multi_event_train/syn_3464.wav", "onoffCaption": "dog barking at 3.184-5.184, 6.269-8.269", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_train/syn_3481.wav", "onoffCaption": "explosion at 1.66-4.528, 6.089-8.957", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_3565.wav", "onoffCaption": "cow mooing at 0.862-3.872 and spraying at 7.02-7.624, 9.258-9.862", "frequencyCaption": "cow mooing one times and spraying two times"} +{"filepath": "data/multi_event_train/syn_3580.wav", "onoffCaption": "whistling at 0.212-7.611", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_3716.wav", "onoffCaption": "thump thud at 3.076-7.526", "frequencyCaption": "thump thud one times"} +{"filepath": "data/multi_event_train/syn_3861.wav", "onoffCaption": "whistling at 0.139-7.889", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_3884.wav", "onoffCaption": "cat meowing at 3.525-8.525", "frequencyCaption": "cat meowing one times"} +{"filepath": "data/multi_event_train/syn_3960.wav", "onoffCaption": "cow mooing at 3.13-6.099, 6.661-9.63", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_3985.wav", "onoffCaption": "thump thud at 1.04-4.958", "frequencyCaption": "thump thud one times"} +{"filepath": "data/multi_event_train/syn_108.wav", "onoffCaption": "cow mooing at 0.556-4.985 and train horn at 0.71-8.91 and woman laughing at 2.098-4.466, 5.962-8.33", "frequencyCaption": "cow mooing one times and train horn one times and woman laughing two times"} +{"filepath": "data/multi_event_train/syn_179.wav", "onoffCaption": "gunshot at 2.115-4.115, 5.617-7.617", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_211.wav", "onoffCaption": "burping belching at 0.658-6.602, 7.664-10.0", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_434.wav", "onoffCaption": "explosion at 2.431-5.16 and train horn at 7.14-10.0", "frequencyCaption": "explosion one times and train horn one times"} +{"filepath": "data/multi_event_train/syn_647.wav", "onoffCaption": "sheep goat bleating at 1.751-3.751", "frequencyCaption": "sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_709.wav", "onoffCaption": "cow mooing at 0.149-3.447, 4.752-7.721", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_930.wav", "onoffCaption": "duck quacking at 2.399-4.399, 5.505-7.505 and woman laughing at 3.444-5.526, 6.964-9.448 and dog barking at 4.535-6.535", "frequencyCaption": "duck quacking two times and woman laughing two times and dog barking one times"} +{"filepath": "data/multi_event_train/syn_941.wav", "onoffCaption": "cow mooing at 0.379-5.359, 7.153-9.591", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_1104.wav", "onoffCaption": "door slamming at 0.251-2.016, 2.613-4.378, 5.27-7.035 and gunshot at 2.093-4.093, 5.224-7.243 and sheep goat bleating at 2.203-4.203, 6.075-8.075", "frequencyCaption": "door slamming three times and gunshot two times and sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_1190.wav", "onoffCaption": "woman laughing at 0.835-3.035 and spraying at 6.648-7.223", "frequencyCaption": "woman laughing one times and spraying one times"} +{"filepath": "data/multi_event_train/syn_1239.wav", "onoffCaption": "woman laughing at 0.599-3.185, 4.736-7.341", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_1289.wav", "onoffCaption": "car horn honking at 3.261-6.047, 7.282-10.0", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_1322.wav", "onoffCaption": "duck quacking at 3.022-5.022, 6.303-8.303", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_1377.wav", "onoffCaption": "sheep goat bleating at 0.256-2.256 and sneeze at 4.422-6.105, 7.36-9.043", "frequencyCaption": "sheep goat bleating one times and sneeze two times"} +{"filepath": "data/multi_event_train/syn_1392.wav", "onoffCaption": "car horn honking at 0.604-3.117, 5.468-7.981", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_1449.wav", "onoffCaption": "explosion at 2.052-4.313, 6.672-8.933", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_1552.wav", "onoffCaption": "burping belching at 0.716-3.716 and duck quacking at 5.025-7.025, 7.592-9.592", "frequencyCaption": "burping belching one times and duck quacking two times"} +{"filepath": "data/multi_event_train/syn_1721.wav", "onoffCaption": "cow mooing at 1.991-4.973, 5.974-8.943", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_1774.wav", "onoffCaption": "door slamming at 1.63-2.863, 4.075-6.075, 6.681-9.11", "frequencyCaption": "door slamming three times"} +{"filepath": "data/multi_event_train/syn_1827.wav", "onoffCaption": "woman laughing at 3.327-5.919", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/multi_event_train/syn_1856.wav", "onoffCaption": "sneeze at 2.355-4.969", "frequencyCaption": "sneeze one times"} +{"filepath": "data/multi_event_train/syn_3098.wav", "onoffCaption": "car horn honking at 1.715-6.037", "frequencyCaption": "car horn honking one times"} +{"filepath": "data/multi_event_train/syn_3117.wav", "onoffCaption": "sneeze at 0.541-2.604, 4.626-6.689 and sheep goat bleating at 2.158-4.158, 5.39-7.39", "frequencyCaption": "sneeze two times and sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_3166.wav", "onoffCaption": "spraying at 2.954-3.805", "frequencyCaption": "spraying one times"} +{"filepath": "data/multi_event_train/syn_3183.wav", "onoffCaption": "sheep goat bleating at 0.991-2.991, 3.74-5.74, 6.273-8.273", "frequencyCaption": "sheep goat bleating three times"} +{"filepath": "data/multi_event_train/syn_3315.wav", "onoffCaption": "duck quacking at 0.019-2.019, 3.177-5.177, 6.468-8.468", "frequencyCaption": "duck quacking three times"} +{"filepath": "data/multi_event_train/syn_3530.wav", "onoffCaption": "woman laughing at 2.851-5.443", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/multi_event_train/syn_3541.wav", "onoffCaption": "spraying at 0.143-0.793, 1.737-2.588, 3.538-4.27 and train horn at 5.735-10.0", "frequencyCaption": "spraying three times and train horn one times"} +{"filepath": "data/multi_event_train/syn_3658.wav", "onoffCaption": "cat meowing at 0.634-2.529, 3.868-5.763 and explosion at 3.879-6.879, 7.905-10.0", "frequencyCaption": "cat meowing two times and explosion two times"} +{"filepath": "data/multi_event_train/syn_3743.wav", "onoffCaption": "thump thud at 2.308-6.758", "frequencyCaption": "thump thud one times"} +{"filepath": "data/multi_event_train/syn_3834.wav", "onoffCaption": "sneeze at 1.737-4.397 and woman laughing at 6.52-9.106", "frequencyCaption": "sneeze one times and woman laughing one times"} +{"filepath": "data/multi_event_train/syn_3845.wav", "onoffCaption": "tapping clicking clanking at 3.386-6.826 and door slamming at 6.172-8.63", "frequencyCaption": "tapping clicking clanking one times and door slamming one times"} +{"filepath": "data/multi_event_train/syn_260.wav", "onoffCaption": "car horn honking at 0.679-3.497, 4.414-6.414, 7.4-9.747 and thump thud at 0.765-3.104", "frequencyCaption": "car horn honking three times and thump thud one times"} +{"filepath": "data/multi_event_train/syn_285.wav", "onoffCaption": "cat meowing at 2.662-3.674", "frequencyCaption": "cat meowing one times"} +{"filepath": "data/multi_event_train/syn_410.wav", "onoffCaption": "sneeze at 2.922-4.918", "frequencyCaption": "sneeze one times"} +{"filepath": "data/multi_event_train/syn_445.wav", "onoffCaption": "burping belching at 2.349-4.443 and sheep goat bleating at 7.468-9.468", "frequencyCaption": "burping belching one times and sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_636.wav", "onoffCaption": "thump thud at 0.085-3.752, 4.485-8.152", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_778.wav", "onoffCaption": "cat meowing at 0.073-1.968, 4.277-5.463", "frequencyCaption": "cat meowing two times"} +{"filepath": "data/multi_event_train/syn_914.wav", "onoffCaption": "cat meowing at 2.239-3.266, 3.834-4.861, 5.407-6.434", "frequencyCaption": "cat meowing three times"} +{"filepath": "data/multi_event_train/syn_1120.wav", "onoffCaption": "sheep goat bleating at 3.176-5.176, 6.418-8.418", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_1175.wav", "onoffCaption": "spraying at 0.156-2.284", "frequencyCaption": "spraying one times"} +{"filepath": "data/multi_event_train/syn_1248.wav", "onoffCaption": "woman laughing at 0.652-3.447, 4.195-6.99 and burping belching at 2.568-6.591, 7.652-10.0", "frequencyCaption": "woman laughing two times and burping belching two times"} +{"filepath": "data/multi_event_train/syn_1306.wav", "onoffCaption": "woman laughing at 0.786-3.154, 5.194-7.562 and spraying at 1.837-2.412, 4.389-7.386 and explosion at 5.217-8.211", "frequencyCaption": "woman laughing two times and spraying two times and explosion one times"} +{"filepath": "data/multi_event_train/syn_1353.wav", "onoffCaption": "woman laughing at 2.632-4.832 and tapping clicking clanking at 7.798-10.0", "frequencyCaption": "woman laughing one times and tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_1438.wav", "onoffCaption": "dog barking at 0.127-5.764 and tapping clicking clanking at 7.741-10.0", "frequencyCaption": "dog barking one times and tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_1488.wav", "onoffCaption": "woman laughing at 0.084-3.365, 4.111-7.392", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_1523.wav", "onoffCaption": "cat meowing at 3.057-4.628, 5.333-6.904, 8.188-9.759", "frequencyCaption": "cat meowing three times"} +{"filepath": "data/multi_event_train/syn_1576.wav", "onoffCaption": "woman laughing at 0.377-2.602", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/multi_event_train/syn_1593.wav", "onoffCaption": "duck quacking at 2.419-4.419, 5.123-7.123, 7.708-9.708", "frequencyCaption": "duck quacking three times"} +{"filepath": "data/multi_event_train/syn_1705.wav", "onoffCaption": "gunshot at 0.188-2.188, 3.006-5.006, 6.0-8.0 and cow mooing at 2.515-5.813, 6.954-9.655", "frequencyCaption": "gunshot three times and cow mooing two times"} +{"filepath": "data/multi_event_train/syn_1750.wav", "onoffCaption": "thump thud at 0.121-4.039, 5.53-8.03", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_1872.wav", "onoffCaption": "cat meowing at 0.257-1.267 and train horn at 5.374-7.814", "frequencyCaption": "cat meowing one times and train horn one times"} +{"filepath": "data/multi_event_train/syn_1897.wav", "onoffCaption": "cow mooing at 3.107-8.087", "frequencyCaption": "cow mooing one times"} +{"filepath": "data/multi_event_train/syn_1969.wav", "onoffCaption": "duck quacking at 0.218-2.218", "frequencyCaption": "duck quacking one times"} +{"filepath": "data/multi_event_train/syn_3059.wav", "onoffCaption": "burping belching at 0.083-2.286", "frequencyCaption": "burping belching one times"} +{"filepath": "data/multi_event_train/syn_3142.wav", "onoffCaption": "woman laughing at 0.185-2.885, 4.611-7.311 and burping belching at 2.382-5.822 and duck quacking at 5.807-7.807", "frequencyCaption": "woman laughing two times and burping belching one times and duck quacking one times"} +{"filepath": "data/multi_event_train/syn_3364.wav", "onoffCaption": "door slamming at 1.137-3.333, 4.292-6.488, 7.604-9.8", "frequencyCaption": "door slamming three times"} +{"filepath": "data/multi_event_train/syn_3381.wav", "onoffCaption": "duck quacking at 2.558-4.558 and dog barking at 3.089-5.089, 6.993-8.993", "frequencyCaption": "duck quacking one times and dog barking two times"} +{"filepath": "data/multi_event_train/syn_3514.wav", "onoffCaption": "sneeze at 1.47-6.47, 7.74-10.0", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_3629.wav", "onoffCaption": "duck quacking at 2.888-4.888, 7.107-9.107", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_3699.wav", "onoffCaption": "woman laughing at 0.287-3.568, 4.142-6.944, 7.835-9.884", "frequencyCaption": "woman laughing three times"} +{"filepath": "data/multi_event_train/syn_3732.wav", "onoffCaption": "cow mooing at 0.364-3.662, 4.801-7.605", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_3782.wav", "onoffCaption": "spraying at 0.427-2.887, 4.194-5.451", "frequencyCaption": "spraying two times"} +{"filepath": "data/multi_event_train/syn_3810.wav", "onoffCaption": "thump thud at 2.015-5.682, 7.857-10.0", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_35.wav", "onoffCaption": "thump thud at 3.337-5.676, 7.377-10.0", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_60.wav", "onoffCaption": "cat meowing at 0.035-1.619 and explosion at 0.384-3.256, 3.9-6.772 and spraying at 3.01-5.47", "frequencyCaption": "cat meowing one times and explosion two times and spraying one times"} +{"filepath": "data/multi_event_train/syn_147.wav", "onoffCaption": "gunshot at 0.792-2.792, 3.335-5.335", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_235.wav", "onoffCaption": "cat meowing at 0.778-1.787, 2.553-3.565 and gunshot at 2.067-4.067, 6.151-8.151", "frequencyCaption": "cat meowing two times and gunshot two times"} +{"filepath": "data/multi_event_train/syn_334.wav", "onoffCaption": "duck quacking at 3.078-5.078", "frequencyCaption": "duck quacking one times"} +{"filepath": "data/multi_event_train/syn_361.wav", "onoffCaption": "car horn honking at 2.942-5.289, 5.9-8.413", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_384.wav", "onoffCaption": "duck quacking at 1.589-3.589, 4.206-6.206", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_511.wav", "onoffCaption": "woman laughing at 0.627-2.91 and thump thud at 6.446-9.217", "frequencyCaption": "woman laughing one times and thump thud one times"} +{"filepath": "data/multi_event_train/syn_663.wav", "onoffCaption": "whistling at 0.402-4.886 and dog barking at 2.751-4.751, 6.775-8.775 and door slamming at 8.642-9.142", "frequencyCaption": "whistling one times and dog barking two times and door slamming one times"} +{"filepath": "data/multi_event_train/syn_679.wav", "onoffCaption": "sneeze at 0.263-2.326, 3.042-5.105, 6.243-8.306", "frequencyCaption": "sneeze three times"} +{"filepath": "data/multi_event_train/syn_686.wav", "onoffCaption": "cat meowing at 0.012-3.357, 5.746-7.478", "frequencyCaption": "cat meowing two times"} +{"filepath": "data/multi_event_train/syn_737.wav", "onoffCaption": "thump thud at 3.081-7.531", "frequencyCaption": "thump thud one times"} +{"filepath": "data/multi_event_train/syn_762.wav", "onoffCaption": "car horn honking at 3.016-7.528", "frequencyCaption": "car horn honking one times"} +{"filepath": "data/multi_event_train/syn_787.wav", "onoffCaption": "sneeze at 0.267-2.506, 3.642-5.881 and cat meowing at 1.741-6.101", "frequencyCaption": "sneeze two times and cat meowing one times"} +{"filepath": "data/multi_event_train/syn_815.wav", "onoffCaption": "dog barking at 0.838-2.838, 4.348-6.348 and woman laughing at 5.774-7.982", "frequencyCaption": "dog barking two times and woman laughing one times"} +{"filepath": "data/multi_event_train/syn_1021.wav", "onoffCaption": "explosion at 2.342-5.342, 5.92-8.92 and burping belching at 2.438-5.438", "frequencyCaption": "explosion two times and burping belching one times"} +{"filepath": "data/multi_event_train/syn_1074.wav", "onoffCaption": "burping belching at 2.126-4.233, 5.913-8.02", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_1207.wav", "onoffCaption": "sheep goat bleating at 0.427-2.427, 3.863-5.863, 6.741-8.741", "frequencyCaption": "sheep goat bleating three times"} +{"filepath": "data/multi_event_train/syn_1252.wav", "onoffCaption": "thump thud at 2.779-5.55, 6.397-8.569", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_1349.wav", "onoffCaption": "burping belching at 3.22-5.251, 5.977-8.943", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_1477.wav", "onoffCaption": "door knocking at 0.762-5.058", "frequencyCaption": "door knocking one times"} +{"filepath": "data/multi_event_train/syn_1492.wav", "onoffCaption": "train horn at 0.546-3.706", "frequencyCaption": "train horn one times"} +{"filepath": "data/multi_event_train/syn_1539.wav", "onoffCaption": "whistling at 0.69-5.174, 5.721-10.0", "frequencyCaption": "whistling two times"} +{"filepath": "data/multi_event_train/syn_1589.wav", "onoffCaption": "cow mooing at 1.966-4.935, 6.103-9.072", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_1604.wav", "onoffCaption": "sneeze at 0.952-5.481, 6.461-10.0", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_1651.wav", "onoffCaption": "thump thud at 0.031-4.481, 5.06-7.831 and cow mooing at 1.949-6.929 and gunshot at 2.458-4.458, 5.594-7.594", "frequencyCaption": "thump thud two times and cow mooing one times and gunshot two times"} +{"filepath": "data/multi_event_train/syn_1868.wav", "onoffCaption": "dog barking at 2.736-4.736, 5.262-7.262", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_train/syn_1973.wav", "onoffCaption": "sheep goat bleating at 0.478-2.478", "frequencyCaption": "sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_1996.wav", "onoffCaption": "sheep goat bleating at 0.337-2.337, 4.071-6.071", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_3043.wav", "onoffCaption": "duck quacking at 1.343-3.343 and door slamming at 6.658-8.576", "frequencyCaption": "duck quacking one times and door slamming one times"} +{"filepath": "data/multi_event_train/syn_3158.wav", "onoffCaption": "spraying at 2.079-3.841, 4.379-6.141, 7.114-8.876", "frequencyCaption": "spraying three times"} +{"filepath": "data/multi_event_train/syn_3230.wav", "onoffCaption": "car horn honking at 0.711-5.111 and burping belching at 7.655-10.0", "frequencyCaption": "car horn honking one times and burping belching one times"} +{"filepath": "data/multi_event_train/syn_3265.wav", "onoffCaption": "gunshot at 0.842-2.861, 3.712-5.731, 6.886-8.905", "frequencyCaption": "gunshot three times"} +{"filepath": "data/multi_event_train/syn_3280.wav", "onoffCaption": "thump thud at 0.098-4.548 and train horn at 1.037-4.807, 5.772-8.572", "frequencyCaption": "thump thud one times and train horn two times"} +{"filepath": "data/multi_event_train/syn_3331.wav", "onoffCaption": "explosion at 0.213-3.213, 4.115-6.28 and gunshot at 7.473-9.473", "frequencyCaption": "explosion two times and gunshot one times"} +{"filepath": "data/multi_event_train/syn_3415.wav", "onoffCaption": "tapping clicking clanking at 1.663-5.103 and thump thud at 1.668-4.168, 5.582-8.082", "frequencyCaption": "tapping clicking clanking one times and thump thud two times"} +{"filepath": "data/multi_event_train/syn_3633.wav", "onoffCaption": "cow mooing at 0.348-4.777", "frequencyCaption": "cow mooing one times"} +{"filepath": "data/multi_event_train/syn_3666.wav", "onoffCaption": "whistling at 3.006-7.49", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_3683.wav", "onoffCaption": "tapping clicking clanking at 0.355-3.795, 6.028-8.443", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_3728.wav", "onoffCaption": "door knocking at 0.613-2.993 and sheep goat bleating at 1.374-3.374, 4.594-6.594", "frequencyCaption": "door knocking one times and sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_3767.wav", "onoffCaption": "cow mooing at 1.682-4.651, 6.711-9.68", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_3798.wav", "onoffCaption": "sneeze at 2.406-6.054, 7.169-10.0", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_3911.wav", "onoffCaption": "explosion at 3.713-6.713, 7.614-10.0", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_5000.wav", "onoffCaption": "woman laughing at 2.643-4.868, 7.346-9.571", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_85.wav", "onoffCaption": "thump thud at 0.092-4.542, 5.186-9.636", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_112.wav", "onoffCaption": "door slamming at 2.357-3.474, 4.082-6.0", "frequencyCaption": "door slamming two times"} +{"filepath": "data/multi_event_train/syn_175.wav", "onoffCaption": "whistling at 0.475-8.713", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_190.wav", "onoffCaption": "burping belching at 2.79-5.969", "frequencyCaption": "burping belching one times"} +{"filepath": "data/multi_event_train/syn_248.wav", "onoffCaption": "whistling at 0.668-3.643, 6.008-8.983", "frequencyCaption": "whistling two times"} +{"filepath": "data/multi_event_train/syn_306.wav", "onoffCaption": "whistling at 2.191-9.535", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_353.wav", "onoffCaption": "explosion at 1.105-6.105, 7.952-9.971", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_438.wav", "onoffCaption": "sheep goat bleating at 1.945-3.945", "frequencyCaption": "sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_523.wav", "onoffCaption": "explosion at 2.55-5.141, 5.651-8.242", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_544.wav", "onoffCaption": "gunshot at 1.072-3.242, 3.961-6.002, 7.293-9.293", "frequencyCaption": "gunshot three times"} +{"filepath": "data/multi_event_train/syn_705.wav", "onoffCaption": "train horn at 0.471-5.329", "frequencyCaption": "train horn one times"} +{"filepath": "data/multi_event_train/syn_750.wav", "onoffCaption": "cow mooing at 2.139-5.149, 5.874-8.836", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_827.wav", "onoffCaption": "tapping clicking clanking at 0.111-3.551, 5.819-8.222 and whistling at 3.405-8.58 and explosion at 5.426-8.154", "frequencyCaption": "tapping clicking clanking two times and whistling one times and explosion one times"} +{"filepath": "data/multi_event_train/syn_840.wav", "onoffCaption": "gunshot at 0.056-2.056 and whistling at 4.934-10.0", "frequencyCaption": "gunshot one times and whistling one times"} +{"filepath": "data/multi_event_train/syn_1013.wav", "onoffCaption": "door slamming at 0.266-0.766, 1.741-3.659, 4.964-6.964 and explosion at 1.158-6.158", "frequencyCaption": "door slamming three times and explosion one times"} +{"filepath": "data/multi_event_train/syn_1091.wav", "onoffCaption": "explosion at 0.702-3.302", "frequencyCaption": "explosion one times"} +{"filepath": "data/multi_event_train/syn_1108.wav", "onoffCaption": "door knocking at 0.744-3.047, 4.096-6.399", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_1235.wav", "onoffCaption": "explosion at 0.214-2.967", "frequencyCaption": "explosion one times"} +{"filepath": "data/multi_event_train/syn_1260.wav", "onoffCaption": "train horn at 1.145-3.945 and explosion at 6.009-8.609", "frequencyCaption": "train horn one times and explosion one times"} +{"filepath": "data/multi_event_train/syn_1285.wav", "onoffCaption": "thump thud at 0.028-2.799 and gunshot at 0.633-2.633, 4.968-6.968", "frequencyCaption": "thump thud one times and gunshot two times"} +{"filepath": "data/multi_event_train/syn_1422.wav", "onoffCaption": "whistling at 1.541-6.716", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_1445.wav", "onoffCaption": "gunshot at 2.192-4.192, 6.123-8.123", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_1636.wav", "onoffCaption": "cow mooing at 0.692-3.674, 4.245-7.07 and explosion at 1.708-4.576, 6.62-9.614 and door slamming at 7.173-8.312", "frequencyCaption": "cow mooing two times and explosion two times and door slamming one times"} +{"filepath": "data/multi_event_train/syn_1663.wav", "onoffCaption": "thump thud at 1.73-3.958, 5.73-8.425", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_1686.wav", "onoffCaption": "spraying at 1.212-3.648, 4.274-6.71, 7.857-10.0", "frequencyCaption": "spraying three times"} +{"filepath": "data/multi_event_train/syn_1778.wav", "onoffCaption": "dog barking at 2.24-4.24 and explosion at 6.124-10.0", "frequencyCaption": "dog barking one times and explosion one times"} +{"filepath": "data/multi_event_train/syn_1926.wav", "onoffCaption": "whistling at 3.395-10.0", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_1941.wav", "onoffCaption": "whistling at 0.213-7.868 and gunshot at 3.4-5.4", "frequencyCaption": "whistling one times and gunshot one times"} +{"filepath": "data/multi_event_train/syn_3016.wav", "onoffCaption": "cow mooing at 2.11-5.408", "frequencyCaption": "cow mooing one times"} +{"filepath": "data/multi_event_train/syn_3071.wav", "onoffCaption": "car horn honking at 0.372-2.837, 5.051-7.551", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_3094.wav", "onoffCaption": "gunshot at 2.875-4.875", "frequencyCaption": "gunshot one times"} +{"filepath": "data/multi_event_train/syn_3202.wav", "onoffCaption": "duck quacking at 0.628-2.628, 3.29-5.29, 6.192-8.192", "frequencyCaption": "duck quacking three times"} +{"filepath": "data/multi_event_train/syn_3257.wav", "onoffCaption": "car horn honking at 0.974-3.321, 5.028-7.375 and train horn at 3.119-5.759, 6.861-9.328", "frequencyCaption": "car horn honking two times and train horn two times"} +{"filepath": "data/multi_event_train/syn_3319.wav", "onoffCaption": "sneeze at 3.802-5.036, 6.027-8.488", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_3427.wav", "onoffCaption": "spraying at 0.506-2.966, 4.117-5.85, 7.285-9.895 and woman laughing at 1.012-4.588", "frequencyCaption": "spraying three times and woman laughing one times"} +{"filepath": "data/multi_event_train/syn_3440.wav", "onoffCaption": "car horn honking at 1.313-5.713, 7.865-10.0", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_3497.wav", "onoffCaption": "duck quacking at 1.589-3.589, 4.21-6.21, 7.047-9.047", "frequencyCaption": "duck quacking three times"} +{"filepath": "data/multi_event_train/syn_3601.wav", "onoffCaption": "spraying at 0.049-1.133 and burping belching at 5.237-8.237", "frequencyCaption": "spraying one times and burping belching one times"} +{"filepath": "data/multi_event_train/syn_3654.wav", "onoffCaption": "sneeze at 2.713-4.325", "frequencyCaption": "sneeze one times"} +{"filepath": "data/multi_event_train/syn_3838.wav", "onoffCaption": "train horn at 2.023-7.738", "frequencyCaption": "train horn one times"} +{"filepath": "data/multi_event_train/syn_3923.wav", "onoffCaption": "gunshot at 0.039-2.209, 3.821-5.821", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_3944.wav", "onoffCaption": "sheep goat bleating at 0.704-2.704, 4.835-6.835 and cat meowing at 1.039-2.252, 3.829-5.245", "frequencyCaption": "sheep goat bleating two times and cat meowing two times"} +{"filepath": "data/multi_event_train/syn_48.wav", "onoffCaption": "door knocking at 1.798-3.925, 6.24-8.864", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_52.wav", "onoffCaption": "cat meowing at 0.243-5.243 and whistling at 1.921-4.15, 5.629-8.103", "frequencyCaption": "cat meowing one times and whistling two times"} +{"filepath": "data/multi_event_train/syn_120.wav", "onoffCaption": "duck quacking at 2.114-4.114, 4.766-6.766, 7.77-9.77", "frequencyCaption": "duck quacking three times"} +{"filepath": "data/multi_event_train/syn_252.wav", "onoffCaption": "gunshot at 0.236-2.236, 4.669-6.71", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_349.wav", "onoffCaption": "gunshot at 2.895-4.895, 7.074-9.074 and duck quacking at 3.758-5.758", "frequencyCaption": "gunshot two times and duck quacking one times"} +{"filepath": "data/multi_event_train/syn_422.wav", "onoffCaption": "spraying at 2.662-5.054, 5.777-8.169", "frequencyCaption": "spraying two times"} +{"filepath": "data/multi_event_train/syn_477.wav", "onoffCaption": "dog barking at 2.616-4.616, 5.85-7.85", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_train/syn_488.wav", "onoffCaption": "thump thud at 0.294-4.744 and explosion at 2.961-5.833", "frequencyCaption": "thump thud one times and explosion one times"} +{"filepath": "data/multi_event_train/syn_492.wav", "onoffCaption": "burping belching at 3.015-6.515, 7.914-10.0", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_539.wav", "onoffCaption": "duck quacking at 0.626-2.626 and gunshot at 0.647-2.647 and sheep goat bleating at 6.015-8.015", "frequencyCaption": "duck quacking one times and gunshot one times and sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_576.wav", "onoffCaption": "cow mooing at 0.017-2.986", "frequencyCaption": "cow mooing one times"} +{"filepath": "data/multi_event_train/syn_589.wav", "onoffCaption": "sneeze at 0.227-2.34", "frequencyCaption": "sneeze one times"} +{"filepath": "data/multi_event_train/syn_593.wav", "onoffCaption": "spraying at 0.736-1.386, 2.43-4.89 and burping belching at 2.475-5.734 and car horn honking at 3.11-7.51", "frequencyCaption": "spraying two times and burping belching one times and car horn honking one times"} +{"filepath": "data/multi_event_train/syn_604.wav", "onoffCaption": "sneeze at 0.064-2.381, 4.864-7.181 and woman laughing at 1.889-4.658, 5.873-8.11", "frequencyCaption": "sneeze two times and woman laughing two times"} +{"filepath": "data/multi_event_train/syn_868.wav", "onoffCaption": "duck quacking at 1.107-3.107, 5.382-7.382", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_872.wav", "onoffCaption": "sheep goat bleating at 1.92-4.92", "frequencyCaption": "sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_897.wav", "onoffCaption": "woman laughing at 2.093-7.132, 7.959-10.0", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_969.wav", "onoffCaption": "thump thud at 0.204-3.871, 4.373-6.774 and duck quacking at 0.295-2.295, 4.429-6.429", "frequencyCaption": "thump thud two times and duck quacking two times"} +{"filepath": "data/multi_event_train/syn_973.wav", "onoffCaption": "woman laughing at 2.152-4.636, 7.017-9.501", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_996.wav", "onoffCaption": "door knocking at 1.037-3.127 and train horn at 4.574-7.054", "frequencyCaption": "door knocking one times and train horn one times"} +{"filepath": "data/multi_event_train/syn_1009.wav", "onoffCaption": "explosion at 0.047-5.047, 6.829-10.0", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_1046.wav", "onoffCaption": "thump thud at 2.605-7.055", "frequencyCaption": "thump thud one times"} +{"filepath": "data/multi_event_train/syn_1112.wav", "onoffCaption": "dog barking at 0.813-3.251", "frequencyCaption": "dog barking one times"} +{"filepath": "data/multi_event_train/syn_1147.wav", "onoffCaption": "door slamming at 0.008-0.986, 2.768-3.746 and burping belching at 6.192-9.192", "frequencyCaption": "door slamming two times and burping belching one times"} +{"filepath": "data/multi_event_train/syn_1334.wav", "onoffCaption": "burping belching at 2.724-9.892", "frequencyCaption": "burping belching one times"} +{"filepath": "data/multi_event_train/syn_1410.wav", "onoffCaption": "spraying at 2.671-3.298, 4.313-4.94, 5.78-6.407", "frequencyCaption": "spraying three times"} +{"filepath": "data/multi_event_train/syn_1511.wav", "onoffCaption": "cow mooing at 2.906-5.875, 6.784-9.753", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_1544.wav", "onoffCaption": "tapping clicking clanking at 0.031-3.471, 4.249-7.055", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_1679.wav", "onoffCaption": "whistling at 0.412-5.912", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_1762.wav", "onoffCaption": "sneeze at 0.149-5.149", "frequencyCaption": "sneeze one times"} +{"filepath": "data/multi_event_train/syn_1787.wav", "onoffCaption": "tapping clicking clanking at 3.695-7.135", "frequencyCaption": "tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_1815.wav", "onoffCaption": "cat meowing at 2.792-3.978, 4.638-6.388", "frequencyCaption": "cat meowing two times"} +{"filepath": "data/multi_event_train/syn_1840.wav", "onoffCaption": "whistling at 0.127-7.877 and duck quacking at 0.626-2.626, 4.514-6.514", "frequencyCaption": "whistling one times and duck quacking two times"} +{"filepath": "data/multi_event_train/syn_1914.wav", "onoffCaption": "duck quacking at 0.36-2.36 and spraying at 5.948-6.68, 7.206-7.938, 9.061-9.793", "frequencyCaption": "duck quacking one times and spraying three times"} +{"filepath": "data/multi_event_train/syn_3024.wav", "onoffCaption": "whistling at 1.51-4.385, 4.941-7.136 and sheep goat bleating at 4.617-6.617", "frequencyCaption": "whistling two times and sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_3125.wav", "onoffCaption": "cow mooing at 0.525-3.535 and sneeze at 7.273-10.0", "frequencyCaption": "cow mooing one times and sneeze one times"} +{"filepath": "data/multi_event_train/syn_3170.wav", "onoffCaption": "thump thud at 2.4-6.775 and cow mooing at 3.142-7.571", "frequencyCaption": "thump thud one times and cow mooing one times"} +{"filepath": "data/multi_event_train/syn_3195.wav", "onoffCaption": "duck quacking at 3.632-5.632, 6.157-8.157", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_3356.wav", "onoffCaption": "dog barking at 0.004-2.004 and sneeze at 0.554-2.867", "frequencyCaption": "dog barking one times and sneeze one times"} +{"filepath": "data/multi_event_train/syn_3468.wav", "onoffCaption": "tapping clicking clanking at 1.654-5.094, 6.11-9.55", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_3472.wav", "onoffCaption": "thump thud at 0.086-3.133, 4.191-7.238 and cow mooing at 2.851-6.149, 7.782-10.0", "frequencyCaption": "thump thud two times and cow mooing two times"} +{"filepath": "data/multi_event_train/syn_3526.wav", "onoffCaption": "burping belching at 2.329-5.329, 5.967-8.967 and spraying at 3.37-5.498", "frequencyCaption": "burping belching two times and spraying one times"} +{"filepath": "data/multi_event_train/syn_3569.wav", "onoffCaption": "woman laughing at 3.645-6.717", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/multi_event_train/syn_3573.wav", "onoffCaption": "tapping clicking clanking at 0.078-3.518 and woman laughing at 2.07-4.438", "frequencyCaption": "tapping clicking clanking one times and woman laughing one times"} +{"filepath": "data/multi_event_train/syn_3596.wav", "onoffCaption": "gunshot at 0.42-2.42, 3.855-5.855 and whistling at 0.821-3.796, 5.538-7.962", "frequencyCaption": "gunshot two times and whistling two times"} +{"filepath": "data/multi_event_train/syn_3700.wav", "onoffCaption": "thump thud at 2.566-6.484", "frequencyCaption": "thump thud one times"} +{"filepath": "data/multi_event_train/syn_3822.wav", "onoffCaption": "tapping clicking clanking at 0.026-3.466, 4.038-7.478 and sneeze at 0.281-2.594 and whistling at 0.577-8.588", "frequencyCaption": "tapping clicking clanking two times and sneeze one times and whistling one times"} +{"filepath": "data/multi_event_train/syn_3877.wav", "onoffCaption": "sheep goat bleating at 2.689-5.985, 7.249-10.0", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_3888.wav", "onoffCaption": "gunshot at 0.011-2.011, 4.076-6.076 and woman laughing at 2.145-4.353, 5.515-7.798 and burping belching at 3.798-6.321, 7.51-9.581", "frequencyCaption": "gunshot two times and woman laughing two times and burping belching two times"} +{"filepath": "data/multi_event_train/syn_3892.wav", "onoffCaption": "whistling at 0.003-5.178, 6.434-10.0 and gunshot at 0.281-2.281, 4.664-6.664", "frequencyCaption": "whistling two times and gunshot two times"} +{"filepath": "data/multi_event_train/syn_3939.wav", "onoffCaption": "door knocking at 2.256-5.103, 6.83-8.99", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_3976.wav", "onoffCaption": "sheep goat bleating at 4.316-6.316, 7.624-9.624", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_3989.wav", "onoffCaption": "duck quacking at 0.753-2.753, 4.201-6.201", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_3993.wav", "onoffCaption": "sheep goat bleating at 0.539-3.619, 5.059-8.139", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_39.wav", "onoffCaption": "explosion at 0.263-2.327, 4.015-6.079", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_89.wav", "onoffCaption": "sheep goat bleating at 1.399-3.399, 4.145-6.145, 6.832-9.296", "frequencyCaption": "sheep goat bleating three times"} +{"filepath": "data/multi_event_train/syn_207.wav", "onoffCaption": "train horn at 0.007-4.188 and cat meowing at 0.737-2.714", "frequencyCaption": "train horn one times and cat meowing one times"} +{"filepath": "data/multi_event_train/syn_276.wav", "onoffCaption": "dog barking at 0.676-2.676 and sheep goat bleating at 1.893-3.893, 4.654-7.441", "frequencyCaption": "dog barking one times and sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_293.wav", "onoffCaption": "spraying at 3.538-4.795", "frequencyCaption": "spraying one times"} +{"filepath": "data/multi_event_train/syn_338.wav", "onoffCaption": "tapping clicking clanking at 3.456-6.896", "frequencyCaption": "tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_388.wav", "onoffCaption": "car horn honking at 0.262-4.662, 6.687-10.0", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_406.wav", "onoffCaption": "sheep goat bleating at 1.038-3.038, 4.725-6.725, 7.491-9.491", "frequencyCaption": "sheep goat bleating three times"} +{"filepath": "data/multi_event_train/syn_453.wav", "onoffCaption": "tapping clicking clanking at 0.529-3.969, 5.003-7.612", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_548.wav", "onoffCaption": "burping belching at 3.305-8.305", "frequencyCaption": "burping belching one times"} +{"filepath": "data/multi_event_train/syn_620.wav", "onoffCaption": "spraying at 1.449-1.971, 3.051-3.573, 4.44-4.962", "frequencyCaption": "spraying three times"} +{"filepath": "data/multi_event_train/syn_651.wav", "onoffCaption": "explosion at 0.475-5.396, 6.197-10.0", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_819.wav", "onoffCaption": "cow mooing at 0.503-3.485, 4.402-7.384 and dog barking at 3.328-5.328, 6.489-8.489", "frequencyCaption": "cow mooing two times and dog barking two times"} +{"filepath": "data/multi_event_train/syn_902.wav", "onoffCaption": "explosion at 0.945-5.945, 7.669-10.0", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_926.wav", "onoffCaption": "woman laughing at 1.915-5.196, 6.014-9.295", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_957.wav", "onoffCaption": "spraying at 3.156-3.783, 4.327-4.896, 5.659-6.743", "frequencyCaption": "spraying three times"} +{"filepath": "data/multi_event_train/syn_1078.wav", "onoffCaption": "dog barking at 2.572-4.572", "frequencyCaption": "dog barking one times"} +{"filepath": "data/multi_event_train/syn_1136.wav", "onoffCaption": "sneeze at 1.816-3.761 and woman laughing at 6.146-9.722", "frequencyCaption": "sneeze one times and woman laughing one times"} +{"filepath": "data/multi_event_train/syn_1163.wav", "onoffCaption": "thump thud at 0.616-3.663, 4.265-6.765 and cat meowing at 1.658-6.658, 7.892-8.904", "frequencyCaption": "thump thud two times and cat meowing two times"} +{"filepath": "data/multi_event_train/syn_1186.wav", "onoffCaption": "tapping clicking clanking at 2.584-6.024, 7.048-9.723", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_1310.wav", "onoffCaption": "cat meowing at 0.11-1.122, 1.672-3.404, 4.651-7.141", "frequencyCaption": "cat meowing three times"} +{"filepath": "data/multi_event_train/syn_1345.wav", "onoffCaption": "tapping clicking clanking at 0.045-3.485, 4.395-7.835", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_1361.wav", "onoffCaption": "whistling at 2.651-8.484", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_1384.wav", "onoffCaption": "burping belching at 1.138-3.959 and explosion at 7.473-10.0", "frequencyCaption": "burping belching one times and explosion one times"} +{"filepath": "data/multi_event_train/syn_1535.wav", "onoffCaption": "cat meowing at 0.403-1.616, 2.396-3.951", "frequencyCaption": "cat meowing two times"} +{"filepath": "data/multi_event_train/syn_1560.wav", "onoffCaption": "dog barking at 0.055-2.055, 2.803-4.803, 6.123-8.123", "frequencyCaption": "dog barking three times"} +{"filepath": "data/multi_event_train/syn_1585.wav", "onoffCaption": "car horn honking at 1.4-5.649", "frequencyCaption": "car horn honking one times"} +{"filepath": "data/multi_event_train/syn_1608.wav", "onoffCaption": "tapping clicking clanking at 0.833-4.273 and sneeze at 1.635-2.738", "frequencyCaption": "tapping clicking clanking one times and sneeze one times"} +{"filepath": "data/multi_event_train/syn_1713.wav", "onoffCaption": "burping belching at 0.208-3.708 and train horn at 5.339-8.219", "frequencyCaption": "burping belching one times and train horn one times"} +{"filepath": "data/multi_event_train/syn_1737.wav", "onoffCaption": "woman laughing at 2.723-4.948, 6.995-9.6", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_1746.wav", "onoffCaption": "cat meowing at 2.19-4.167, 5.515-7.651", "frequencyCaption": "cat meowing two times"} +{"filepath": "data/multi_event_train/syn_1831.wav", "onoffCaption": "gunshot at 2.959-4.959, 6.448-8.448", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_1864.wav", "onoffCaption": "duck quacking at 2.461-4.461, 5.492-7.492 and woman laughing at 3.466-6.261", "frequencyCaption": "duck quacking two times and woman laughing one times"} +{"filepath": "data/multi_event_train/syn_1881.wav", "onoffCaption": "sheep goat bleating at 2.941-4.941, 5.649-7.649", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_3101.wav", "onoffCaption": "door slamming at 0.408-2.628, 4.108-6.328 and woman laughing at 2.274-4.474 and door knocking at 3.199-6.752, 7.928-10.0", "frequencyCaption": "door slamming two times and woman laughing one times and door knocking two times"} +{"filepath": "data/multi_event_train/syn_3154.wav", "onoffCaption": "gunshot at 2.635-4.635", "frequencyCaption": "gunshot one times"} +{"filepath": "data/multi_event_train/syn_3218.wav", "onoffCaption": "gunshot at 0.374-2.374 and sneeze at 5.184-7.798", "frequencyCaption": "gunshot one times and sneeze one times"} +{"filepath": "data/multi_event_train/syn_3269.wav", "onoffCaption": "woman laughing at 2.804-5.573", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/multi_event_train/syn_3303.wav", "onoffCaption": "train horn at 0.168-3.408, 4.333-7.573", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_3372.wav", "onoffCaption": "whistling at 2.433-5.408, 7.427-10.0", "frequencyCaption": "whistling two times"} +{"filepath": "data/multi_event_train/syn_3397.wav", "onoffCaption": "train horn at 3.047-8.791 and spraying at 3.625-4.229, 6.445-7.445", "frequencyCaption": "train horn one times and spraying two times"} +{"filepath": "data/multi_event_train/syn_3419.wav", "onoffCaption": "duck quacking at 0.225-2.225 and dog barking at 1.184-3.184, 4.339-6.339", "frequencyCaption": "duck quacking one times and dog barking two times"} +{"filepath": "data/multi_event_train/syn_3502.wav", "onoffCaption": "sneeze at 0.887-3.501, 4.361-6.975 and door slamming at 2.409-3.214, 5.124-7.905", "frequencyCaption": "sneeze two times and door slamming two times"} +{"filepath": "data/multi_event_train/syn_3557.wav", "onoffCaption": "thump thud at 1.144-5.519, 6.289-8.744 and sheep goat bleating at 2.984-6.904, 7.408-9.604", "frequencyCaption": "thump thud two times and sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_3724.wav", "onoffCaption": "duck quacking at 0.417-2.417, 3.645-5.645", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_3755.wav", "onoffCaption": "dog barking at 1.109-3.109, 4.896-6.896, 7.76-9.76 and duck quacking at 3.606-5.606", "frequencyCaption": "dog barking three times and duck quacking one times"} +{"filepath": "data/multi_event_train/syn_3794.wav", "onoffCaption": "whistling at 0.449-3.324, 4.92-7.795", "frequencyCaption": "whistling two times"} +{"filepath": "data/multi_event_train/syn_3806.wav", "onoffCaption": "car horn honking at 2.923-5.27, 6.695-9.042", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_3853.wav", "onoffCaption": "car horn honking at 2.237-6.749, 7.707-10.0", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_3948.wav", "onoffCaption": "cow mooing at 2.363-7.343 and gunshot at 6.261-8.501", "frequencyCaption": "cow mooing one times and gunshot one times"} +{"filepath": "data/multi_event_train/syn_23.wav", "onoffCaption": "sheep goat bleating at 2.024-4.024, 4.681-6.681, 7.699-9.699", "frequencyCaption": "sheep goat bleating three times"} +{"filepath": "data/multi_event_train/syn_76.wav", "onoffCaption": "cow mooing at 0.517-5.497", "frequencyCaption": "cow mooing one times"} +{"filepath": "data/multi_event_train/syn_151.wav", "onoffCaption": "burping belching at 0.578-4.447 and door slamming at 2.144-3.468", "frequencyCaption": "burping belching one times and door slamming one times"} +{"filepath": "data/multi_event_train/syn_223.wav", "onoffCaption": "thump thud at 0.521-3.292 and sneeze at 2.031-3.285", "frequencyCaption": "thump thud one times and sneeze one times"} +{"filepath": "data/multi_event_train/syn_239.wav", "onoffCaption": "sheep goat bleating at 0.693-2.693, 4.197-6.197", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_289.wav", "onoffCaption": "cat meowing at 0.229-1.25, 2.111-3.132, 4.459-5.48 and woman laughing at 2.584-5.684, 7.136-9.504", "frequencyCaption": "cat meowing three times and woman laughing two times"} +{"filepath": "data/multi_event_train/syn_322.wav", "onoffCaption": "door slamming at 1.672-4.551, 7.01-8.401 and thump thud at 2.44-4.779", "frequencyCaption": "door slamming two times and thump thud one times"} +{"filepath": "data/multi_event_train/syn_377.wav", "onoffCaption": "whistling at 2.592-5.567, 6.838-9.813", "frequencyCaption": "whistling two times"} +{"filepath": "data/multi_event_train/syn_392.wav", "onoffCaption": "door slamming at 0.562-2.69, 3.661-5.789, 7.08-9.208", "frequencyCaption": "door slamming three times"} +{"filepath": "data/multi_event_train/syn_507.wav", "onoffCaption": "duck quacking at 2.858-4.858", "frequencyCaption": "duck quacking one times"} +{"filepath": "data/multi_event_train/syn_675.wav", "onoffCaption": "woman laughing at 2.855-5.063, 5.692-8.111", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_690.wav", "onoffCaption": "thump thud at 2.745-4.973, 7.024-9.486", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_721.wav", "onoffCaption": "car horn honking at 0.361-4.761, 7.058-10.0", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_774.wav", "onoffCaption": "burping belching at 2.738-4.832 and cow mooing at 7.019-10.0", "frequencyCaption": "burping belching one times and cow mooing one times"} +{"filepath": "data/multi_event_train/syn_791.wav", "onoffCaption": "train horn at 0.204-2.671, 3.684-6.181 and whistling at 0.286-8.671 and burping belching at 5.965-8.195", "frequencyCaption": "train horn two times and whistling one times and burping belching one times"} +{"filepath": "data/multi_event_train/syn_803.wav", "onoffCaption": "dog barking at 1.213-3.213, 5.389-7.389 and explosion at 3.633-6.505, 7.382-10.0", "frequencyCaption": "dog barking two times and explosion two times"} +{"filepath": "data/multi_event_train/syn_918.wav", "onoffCaption": "door knocking at 1.957-4.084, 6.555-8.682", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_1037.wav", "onoffCaption": "cow mooing at 0.323-3.292, 4.556-7.542", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_1211.wav", "onoffCaption": "train horn at 0.755-4.515, 5.812-9.572", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_1244.wav", "onoffCaption": "gunshot at 0.224-2.698 and woman laughing at 6.812-9.912", "frequencyCaption": "gunshot one times and woman laughing one times"} +{"filepath": "data/multi_event_train/syn_1461.wav", "onoffCaption": "duck quacking at 0.523-2.523, 4.907-6.907 and woman laughing at 1.227-3.421", "frequencyCaption": "duck quacking two times and woman laughing one times"} +{"filepath": "data/multi_event_train/syn_1484.wav", "onoffCaption": "burping belching at 2.002-8.979 and tapping clicking clanking at 3.197-6.637", "frequencyCaption": "burping belching one times and tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_1612.wav", "onoffCaption": "whistling at 2.42-10.0", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_1647.wav", "onoffCaption": "gunshot at 0.059-2.059, 2.889-4.889, 5.928-7.928 and sneeze at 0.303-1.986, 2.839-4.133, 5.618-8.191", "frequencyCaption": "gunshot three times and sneeze three times"} +{"filepath": "data/multi_event_train/syn_1709.wav", "onoffCaption": "dog barking at 3.101-5.101", "frequencyCaption": "dog barking one times"} +{"filepath": "data/multi_event_train/syn_1965.wav", "onoffCaption": "spraying at 3.478-4.329 and thump thud at 6.947-10.0", "frequencyCaption": "spraying one times and thump thud one times"} +{"filepath": "data/multi_event_train/syn_1980.wav", "onoffCaption": "car horn honking at 3.67-6.135, 7.16-9.625", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_3055.wav", "onoffCaption": "thump thud at 0.803-3.85", "frequencyCaption": "thump thud one times"} +{"filepath": "data/multi_event_train/syn_3226.wav", "onoffCaption": "whistling at 2.624-7.108, 7.973-10.0", "frequencyCaption": "whistling two times"} +{"filepath": "data/multi_event_train/syn_3273.wav", "onoffCaption": "cow mooing at 3.464-6.446", "frequencyCaption": "cow mooing one times"} +{"filepath": "data/multi_event_train/syn_3296.wav", "onoffCaption": "sheep goat bleating at 2.22-4.22, 5.571-7.571", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_3327.wav", "onoffCaption": "sheep goat bleating at 0.979-2.979, 5.085-7.085", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_3368.wav", "onoffCaption": "car horn honking at 2.877-6.531, 7.446-9.989", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_3403.wav", "onoffCaption": "door knocking at 2.848-5.472, 7.49-10.0", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_3518.wav", "onoffCaption": "cat meowing at 0.029-1.924, 3.136-5.031", "frequencyCaption": "cat meowing two times"} +{"filepath": "data/multi_event_train/syn_3625.wav", "onoffCaption": "whistling at 1.721-4.696, 5.425-8.4", "frequencyCaption": "whistling two times"} +{"filepath": "data/multi_event_train/syn_3670.wav", "onoffCaption": "whistling at 1.871-9.882", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_3695.wav", "onoffCaption": "explosion at 2.44-5.993 and sheep goat bleating at 4.285-6.285", "frequencyCaption": "explosion one times and sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_3771.wav", "onoffCaption": "explosion at 2.574-5.574, 6.762-9.762", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_3907.wav", "onoffCaption": "train horn at 0.688-2.825 and door slamming at 1.492-2.47 and sneeze at 5.398-7.01, 7.662-9.274", "frequencyCaption": "train horn one times and door slamming one times and sneeze two times"} +{"filepath": "data/multi_event_train/syn_24.wav", "onoffCaption": "door slamming at 0.185-2.098, 3.22-5.133, 7.597-9.51", "frequencyCaption": "door slamming three times"} +{"filepath": "data/multi_event_train/syn_93.wav", "onoffCaption": "woman laughing at 3.523-6.804", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/multi_event_train/syn_104.wav", "onoffCaption": "car horn honking at 1.439-3.786, 5.888-8.792", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_156.wav", "onoffCaption": "cow mooing at 0.403-3.413 and cat meowing at 1.774-6.774", "frequencyCaption": "cow mooing one times and cat meowing one times"} +{"filepath": "data/multi_event_train/syn_449.wav", "onoffCaption": "cat meowing at 0.961-2.377 and car horn honking at 1.892-5.067", "frequencyCaption": "cat meowing one times and car horn honking one times"} +{"filepath": "data/multi_event_train/syn_500.wav", "onoffCaption": "train horn at 0.129-3.489, 5.32-7.76", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_552.wav", "onoffCaption": "duck quacking at 1.62-3.62, 4.619-6.619 and cow mooing at 1.955-4.965, 5.474-8.484", "frequencyCaption": "duck quacking two times and cow mooing two times"} +{"filepath": "data/multi_event_train/syn_668.wav", "onoffCaption": "door knocking at 0.117-3.617", "frequencyCaption": "door knocking one times"} +{"filepath": "data/multi_event_train/syn_773.wav", "onoffCaption": "woman laughing at 2.284-6.336", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/multi_event_train/syn_796.wav", "onoffCaption": "door slamming at 0.302-1.475, 2.628-3.801, 5.758-6.931 and car horn honking at 2.659-7.566", "frequencyCaption": "door slamming three times and car horn honking one times"} +{"filepath": "data/multi_event_train/syn_804.wav", "onoffCaption": "sneeze at 1.835-4.91, 5.489-8.564", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_856.wav", "onoffCaption": "cow mooing at 0.081-5.061 and explosion at 1.242-3.833, 4.45-6.514, 7.335-10.0", "frequencyCaption": "cow mooing one times and explosion three times"} +{"filepath": "data/multi_event_train/syn_1030.wav", "onoffCaption": "cat meowing at 0.829-1.84, 3.044-4.055, 4.829-5.84 and gunshot at 2.193-4.193, 4.87-6.87, 7.81-9.81", "frequencyCaption": "cat meowing three times and gunshot three times"} +{"filepath": "data/multi_event_train/syn_1062.wav", "onoffCaption": "whistling at 0.568-8.579 and train horn at 1.664-6.566, 7.957-10.0", "frequencyCaption": "whistling one times and train horn two times"} +{"filepath": "data/multi_event_train/syn_1087.wav", "onoffCaption": "train horn at 2.12-5.52, 6.427-9.827 and thump thud at 2.993-6.911", "frequencyCaption": "train horn two times and thump thud one times"} +{"filepath": "data/multi_event_train/syn_1179.wav", "onoffCaption": "thump thud at 0.225-2.725, 4.517-7.017 and train horn at 2.293-5.653, 6.302-9.662", "frequencyCaption": "thump thud two times and train horn two times"} +{"filepath": "data/multi_event_train/syn_1434.wav", "onoffCaption": "cat meowing at 2.858-7.858", "frequencyCaption": "cat meowing one times"} +{"filepath": "data/multi_event_train/syn_1466.wav", "onoffCaption": "train horn at 0.155-10.0", "frequencyCaption": "train horn one times"} +{"filepath": "data/multi_event_train/syn_1483.wav", "onoffCaption": "sneeze at 1.223-2.906 and sheep goat bleating at 1.495-3.495 and dog barking at 2.465-4.465", "frequencyCaption": "sneeze one times and sheep goat bleating one times and dog barking one times"} +{"filepath": "data/multi_event_train/syn_1598.wav", "onoffCaption": "door slamming at 0.59-1.707, 3.721-5.045", "frequencyCaption": "door slamming two times"} +{"filepath": "data/multi_event_train/syn_1879.wav", "onoffCaption": "woman laughing at 0.565-8.01 and gunshot at 2.025-4.025", "frequencyCaption": "woman laughing one times and gunshot one times"} +{"filepath": "data/multi_event_train/syn_1930.wav", "onoffCaption": "train horn at 0.044-4.484 and thump thud at 0.511-2.85, 4.648-6.987 and sneeze at 3.344-7.4", "frequencyCaption": "train horn one times and thump thud two times and sneeze one times"} +{"filepath": "data/multi_event_train/syn_1962.wav", "onoffCaption": "explosion at 2.685-5.685, 6.915-9.787", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_1987.wav", "onoffCaption": "burping belching at 0.826-6.427, 6.931-9.038 and dog barking at 1.463-3.463, 4.507-6.507 and thump thud at 4.019-6.519", "frequencyCaption": "burping belching two times and dog barking two times and thump thud one times"} +{"filepath": "data/multi_event_train/syn_3000.wav", "onoffCaption": "sneeze at 2.175-4.421, 6.906-9.152", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_3052.wav", "onoffCaption": "duck quacking at 3.237-5.237", "frequencyCaption": "duck quacking one times"} +{"filepath": "data/multi_event_train/syn_3149.wav", "onoffCaption": "whistling at 0.327-8.712", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_3221.wav", "onoffCaption": "whistling at 0.692-10.0", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_3404.wav", "onoffCaption": "door knocking at 1.875-6.314", "frequencyCaption": "door knocking one times"} +{"filepath": "data/multi_event_train/syn_3456.wav", "onoffCaption": "explosion at 0.557-3.557, 4.707-7.707 and gunshot at 0.902-2.902, 3.819-6.059, 7.052-9.052", "frequencyCaption": "explosion two times and gunshot three times"} +{"filepath": "data/multi_event_train/syn_3677.wav", "onoffCaption": "woman laughing at 2.965-5.063", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/multi_event_train/syn_3849.wav", "onoffCaption": "car horn honking at 0.921-5.828, 6.938-10.0", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_3900.wav", "onoffCaption": "gunshot at 0.379-2.379, 3.498-5.498, 6.33-8.33", "frequencyCaption": "gunshot three times"} +{"filepath": "data/multi_event_train/syn_3952.wav", "onoffCaption": "tapping clicking clanking at 0.053-3.493, 5.295-8.735 and door slamming at 3.514-4.687, 5.589-6.762, 7.398-8.571", "frequencyCaption": "tapping clicking clanking two times and door slamming three times"} +{"filepath": "data/multi_event_train/syn_71.wav", "onoffCaption": "car horn honking at 0.054-2.054, 2.567-5.48, 6.163-8.547 and dog barking at 1.396-3.396", "frequencyCaption": "car horn honking three times and dog barking one times"} +{"filepath": "data/multi_event_train/syn_94.wav", "onoffCaption": "whistling at 0.664-2.673 and dog barking at 5.547-7.547", "frequencyCaption": "whistling one times and dog barking one times"} +{"filepath": "data/multi_event_train/syn_103.wav", "onoffCaption": "train horn at 3.383-6.917, 7.644-10.0", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_119.wav", "onoffCaption": "explosion at 1.17-6.17", "frequencyCaption": "explosion one times"} +{"filepath": "data/multi_event_train/syn_224.wav", "onoffCaption": "explosion at 0.69-3.443, 4.949-7.678", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_271.wav", "onoffCaption": "sheep goat bleating at 0.252-2.252, 3.168-5.863, 7.483-9.483", "frequencyCaption": "sheep goat bleating three times"} +{"filepath": "data/multi_event_train/syn_294.wav", "onoffCaption": "dog barking at 4.009-6.009, 6.713-8.713", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_train/syn_325.wav", "onoffCaption": "train horn at 2.454-4.934", "frequencyCaption": "train horn one times"} +{"filepath": "data/multi_event_train/syn_370.wav", "onoffCaption": "woman laughing at 0.539-3.611", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/multi_event_train/syn_395.wav", "onoffCaption": "explosion at 0.154-5.154, 6.077-10.0", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_454.wav", "onoffCaption": "cat meowing at 0.659-1.744, 2.58-3.665 and car horn honking at 6.169-10.0", "frequencyCaption": "cat meowing two times and car horn honking one times"} +{"filepath": "data/multi_event_train/syn_555.wav", "onoffCaption": "train horn at 2.939-6.139, 7.128-9.199", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_627.wav", "onoffCaption": "sneeze at 0.592-2.299, 3.698-5.783", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_726.wav", "onoffCaption": "sneeze at 1.533-3.061, 4.94-6.468", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_769.wav", "onoffCaption": "burping belching at 3.011-6.19, 7.094-10.0", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_851.wav", "onoffCaption": "duck quacking at 0.562-2.562, 4.942-6.942", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_950.wav", "onoffCaption": "sheep goat bleating at 0.278-2.278, 3.82-6.577", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_1065.wav", "onoffCaption": "burping belching at 0.199-2.293, 3.025-5.119, 6.426-8.52", "frequencyCaption": "burping belching three times"} +{"filepath": "data/multi_event_train/syn_1080.wav", "onoffCaption": "woman laughing at 2.234-8.968", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/multi_event_train/syn_1164.wav", "onoffCaption": "door knocking at 2.967-5.319, 6.283-8.635", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_1181.wav", "onoffCaption": "cat meowing at 0.605-2.793, 3.637-5.597, 7.0-8.011", "frequencyCaption": "cat meowing three times"} +{"filepath": "data/multi_event_train/syn_1216.wav", "onoffCaption": "whistling at 0.333-7.988", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_1243.wav", "onoffCaption": "cat meowing at 0.456-1.596 and sheep goat bleating at 0.866-2.866 and sneeze at 1.641-3.186, 5.174-7.413, 8.442-9.534", "frequencyCaption": "cat meowing one times and sheep goat bleating one times and sneeze three times"} +{"filepath": "data/multi_event_train/syn_1259.wav", "onoffCaption": "explosion at 0.006-2.094, 3.253-5.341", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_1317.wav", "onoffCaption": "door knocking at 0.43-3.539 and duck quacking at 2.429-4.429", "frequencyCaption": "door knocking one times and duck quacking one times"} +{"filepath": "data/multi_event_train/syn_1342.wav", "onoffCaption": "train horn at 2.787-6.147, 7.196-10.0", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_1358.wav", "onoffCaption": "burping belching at 1.915-3.95, 4.703-6.738", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_1429.wav", "onoffCaption": "sneeze at 2.999-4.091, 5.749-7.988 and door slamming at 5.615-8.332", "frequencyCaption": "sneeze two times and door slamming one times"} +{"filepath": "data/multi_event_train/syn_1433.wav", "onoffCaption": "thump thud at 2.715-6.382, 7.011-9.511", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_1528.wav", "onoffCaption": "car horn honking at 3.34-5.84, 6.745-9.245", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_1532.wav", "onoffCaption": "door slamming at 0.251-1.089, 2.484-3.322, 5.322-6.16", "frequencyCaption": "door slamming three times"} +{"filepath": "data/multi_event_train/syn_1615.wav", "onoffCaption": "car horn honking at 3.507-7.907", "frequencyCaption": "car horn honking one times"} +{"filepath": "data/multi_event_train/syn_1640.wav", "onoffCaption": "thump thud at 2.133-4.633", "frequencyCaption": "thump thud one times"} +{"filepath": "data/multi_event_train/syn_1714.wav", "onoffCaption": "cow mooing at 2.052-7.032 and cat meowing at 3.252-6.493 and burping belching at 3.449-5.48, 6.955-9.955", "frequencyCaption": "cow mooing one times and cat meowing one times and burping belching two times"} +{"filepath": "data/multi_event_train/syn_1741.wav", "onoffCaption": "cat meowing at 3.699-4.708, 7.085-8.094, 8.677-9.686", "frequencyCaption": "cat meowing three times"} +{"filepath": "data/multi_event_train/syn_1836.wav", "onoffCaption": "whistling at 1.123-3.998, 6.213-9.088", "frequencyCaption": "whistling two times"} +{"filepath": "data/multi_event_train/syn_1937.wav", "onoffCaption": "explosion at 0.473-5.473, 7.065-10.0", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_3007.wav", "onoffCaption": "spraying at 3.438-4.379, 5.961-8.545", "frequencyCaption": "spraying two times"} +{"filepath": "data/multi_event_train/syn_3106.wav", "onoffCaption": "thump thud at 0.143-4.518 and burping belching at 7.45-9.485", "frequencyCaption": "thump thud one times and burping belching one times"} +{"filepath": "data/multi_event_train/syn_3274.wav", "onoffCaption": "sheep goat bleating at 2.531-4.531, 5.953-8.314", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_3291.wav", "onoffCaption": "cat meowing at 2.873-4.768, 5.368-7.308, 8.311-9.866", "frequencyCaption": "cat meowing three times"} +{"filepath": "data/multi_event_train/syn_3375.wav", "onoffCaption": "gunshot at 1.097-3.097, 3.963-5.963, 7.832-9.832 and sneeze at 3.728-5.016", "frequencyCaption": "gunshot three times and sneeze one times"} +{"filepath": "data/multi_event_train/syn_3390.wav", "onoffCaption": "train horn at 1.323-4.643", "frequencyCaption": "train horn one times"} +{"filepath": "data/multi_event_train/syn_3451.wav", "onoffCaption": "door knocking at 1.209-3.299 and spraying at 3.065-3.916, 5.992-6.896", "frequencyCaption": "door knocking one times and spraying two times"} +{"filepath": "data/multi_event_train/syn_3550.wav", "onoffCaption": "car horn honking at 1.828-4.341", "frequencyCaption": "car horn honking one times"} +{"filepath": "data/multi_event_train/syn_3622.wav", "onoffCaption": "door knocking at 3.698-8.137", "frequencyCaption": "door knocking one times"} +{"filepath": "data/multi_event_train/syn_3638.wav", "onoffCaption": "dog barking at 3.178-5.178, 7.122-9.122", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_train/syn_3688.wav", "onoffCaption": "explosion at 0.51-2.512, 3.826-6.694, 7.756-10.0 and dog barking at 1.669-3.669, 6.087-8.087 and tapping clicking clanking at 4.081-7.521", "frequencyCaption": "explosion three times and dog barking two times and tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_3692.wav", "onoffCaption": "cat meowing at 0.796-2.691, 3.33-5.225", "frequencyCaption": "cat meowing two times"} +{"filepath": "data/multi_event_train/syn_3723.wav", "onoffCaption": "door knocking at 0.865-4.633, 5.792-9.56", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_3739.wav", "onoffCaption": "explosion at 1.129-4.969, 5.71-8.318 and sneeze at 5.171-7.488, 8.553-9.656", "frequencyCaption": "explosion two times and sneeze two times"} +{"filepath": "data/multi_event_train/syn_3789.wav", "onoffCaption": "train horn at 1.777-5.977, 7.19-10.0 and whistling at 3.008-8.183", "frequencyCaption": "train horn two times and whistling one times"} +{"filepath": "data/multi_event_train/syn_3793.wav", "onoffCaption": "woman laughing at 0.114-2.397, 4.314-6.412", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_3854.wav", "onoffCaption": "sheep goat bleating at 0.091-2.091", "frequencyCaption": "sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_3955.wav", "onoffCaption": "burping belching at 0.1-2.303 and sneeze at 0.938-2.226, 2.785-4.073, 5.144-6.432 and spraying at 2.713-3.235", "frequencyCaption": "burping belching one times and sneeze three times and spraying one times"} +{"filepath": "data/multi_event_train/syn_255.wav", "onoffCaption": "duck quacking at 2.246-4.246 and car horn honking at 6.104-8.104", "frequencyCaption": "duck quacking one times and car horn honking one times"} +{"filepath": "data/multi_event_train/syn_401.wav", "onoffCaption": "explosion at 1.188-4.188, 6.166-8.234 and door slamming at 6.603-7.284", "frequencyCaption": "explosion two times and door slamming one times"} +{"filepath": "data/multi_event_train/syn_495.wav", "onoffCaption": "tapping clicking clanking at 1.277-4.717 and spraying at 6.204-7.268, 9.239-9.814", "frequencyCaption": "tapping clicking clanking one times and spraying two times"} +{"filepath": "data/multi_event_train/syn_603.wav", "onoffCaption": "door slamming at 0.31-1.21", "frequencyCaption": "door slamming one times"} +{"filepath": "data/multi_event_train/syn_672.wav", "onoffCaption": "sheep goat bleating at 2.385-5.705, 7.194-9.194", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_697.wav", "onoffCaption": "car horn honking at 2.257-5.17, 6.998-9.911", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_718.wav", "onoffCaption": "explosion at 0.272-3.272, 3.864-6.864", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_905.wav", "onoffCaption": "dog barking at 0.109-2.109, 2.928-4.928, 6.585-8.585", "frequencyCaption": "dog barking three times"} +{"filepath": "data/multi_event_train/syn_974.wav", "onoffCaption": "cow mooing at 0.23-3.24, 3.796-6.806", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_1131.wav", "onoffCaption": "thump thud at 3.383-7.301", "frequencyCaption": "thump thud one times"} +{"filepath": "data/multi_event_train/syn_1228.wav", "onoffCaption": "spraying at 2.431-3.181, 4.888-5.829, 7.392-9.784", "frequencyCaption": "spraying three times"} +{"filepath": "data/multi_event_train/syn_1333.wav", "onoffCaption": "tapping clicking clanking at 0.156-3.596", "frequencyCaption": "tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_1499.wav", "onoffCaption": "door knocking at 0.679-2.9, 4.216-6.437, 7.091-9.312", "frequencyCaption": "door knocking three times"} +{"filepath": "data/multi_event_train/syn_1567.wav", "onoffCaption": "spraying at 1.396-2.653", "frequencyCaption": "spraying one times"} +{"filepath": "data/multi_event_train/syn_1582.wav", "onoffCaption": "tapping clicking clanking at 0.881-4.321, 5.587-9.027 and cow mooing at 0.881-5.861", "frequencyCaption": "tapping clicking clanking two times and cow mooing one times"} +{"filepath": "data/multi_event_train/syn_1765.wav", "onoffCaption": "gunshot at 3.529-5.529, 7.826-9.826", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_1780.wav", "onoffCaption": "whistling at 0.548-3.523", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_1863.wav", "onoffCaption": "dog barking at 0.423-2.423 and whistling at 6.542-10.0", "frequencyCaption": "dog barking one times and whistling one times"} +{"filepath": "data/multi_event_train/syn_1886.wav", "onoffCaption": "duck quacking at 3.169-5.169 and train horn at 7.932-10.0", "frequencyCaption": "duck quacking one times and train horn one times"} +{"filepath": "data/multi_event_train/syn_1909.wav", "onoffCaption": "gunshot at 3.317-5.317, 7.169-9.169", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_1978.wav", "onoffCaption": "door knocking at 1.1-5.61 and gunshot at 3.2-5.2, 6.765-9.271", "frequencyCaption": "door knocking one times and gunshot two times"} +{"filepath": "data/multi_event_train/syn_3039.wav", "onoffCaption": "thump thud at 0.21-2.71, 3.351-5.851, 6.52-9.02", "frequencyCaption": "thump thud three times"} +{"filepath": "data/multi_event_train/syn_3048.wav", "onoffCaption": "tapping clicking clanking at 2.359-5.799, 7.591-9.639", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_3122.wav", "onoffCaption": "door knocking at 0.12-3.24 and duck quacking at 0.249-2.249, 2.872-4.872 and cow mooing at 0.975-3.944", "frequencyCaption": "door knocking one times and duck quacking two times and cow mooing one times"} +{"filepath": "data/multi_event_train/syn_3153.wav", "onoffCaption": "spraying at 2.568-3.3, 4.971-5.703", "frequencyCaption": "spraying two times"} +{"filepath": "data/multi_event_train/syn_3320.wav", "onoffCaption": "door slamming at 0.339-3.12, 5.228-6.603 and car horn honking at 1.221-6.128", "frequencyCaption": "door slamming two times and car horn honking one times"} +{"filepath": "data/multi_event_train/syn_3351.wav", "onoffCaption": "whistling at 0.082-2.091, 2.634-4.643, 5.173-7.182", "frequencyCaption": "whistling three times"} +{"filepath": "data/multi_event_train/syn_3505.wav", "onoffCaption": "gunshot at 0.828-2.828, 3.51-5.551, 6.26-8.26", "frequencyCaption": "gunshot three times"} +{"filepath": "data/multi_event_train/syn_3574.wav", "onoffCaption": "dog barking at 0.732-2.732, 3.583-5.583, 6.4-8.4", "frequencyCaption": "dog barking three times"} +{"filepath": "data/multi_event_train/syn_3707.wav", "onoffCaption": "thump thud at 2.729-5.776, 7.199-9.97", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_3776.wav", "onoffCaption": "sneeze at 0.514-3.174, 3.942-6.345, 7.027-8.527 and duck quacking at 3.844-5.844, 6.531-8.531", "frequencyCaption": "sneeze three times and duck quacking two times"} +{"filepath": "data/multi_event_train/syn_3801.wav", "onoffCaption": "woman laughing at 0.433-3.72, 4.347-7.634", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_3895.wav", "onoffCaption": "tapping clicking clanking at 0.957-4.397, 6.045-8.62", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_168.wav", "onoffCaption": "gunshot at 0.302-2.302, 3.007-5.007, 5.65-7.65 and sneeze at 1.729-4.804, 5.308-7.304", "frequencyCaption": "gunshot three times and sneeze two times"} +{"filepath": "data/multi_event_train/syn_200.wav", "onoffCaption": "thump thud at 0.027-4.402, 5.833-10.0", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_425.wav", "onoffCaption": "whistling at 0.103-7.853", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_470.wav", "onoffCaption": "dog barking at 0.337-2.337 and cat meowing at 1.04-2.051, 2.899-4.443, 4.964-6.15 and gunshot at 2.741-4.741", "frequencyCaption": "dog barking one times and cat meowing three times and gunshot one times"} +{"filepath": "data/multi_event_train/syn_656.wav", "onoffCaption": "explosion at 2.865-7.865", "frequencyCaption": "explosion one times"} +{"filepath": "data/multi_event_train/syn_820.wav", "onoffCaption": "burping belching at 2.113-7.714", "frequencyCaption": "burping belching one times"} +{"filepath": "data/multi_event_train/syn_921.wav", "onoffCaption": "tapping clicking clanking at 3.542-6.982", "frequencyCaption": "tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_991.wav", "onoffCaption": "gunshot at 3.601-5.642, 7.694-9.735", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_1115.wav", "onoffCaption": "tapping clicking clanking at 1.078-4.518 and cat meowing at 2.764-4.381, 6.623-8.24", "frequencyCaption": "tapping clicking clanking one times and cat meowing two times"} +{"filepath": "data/multi_event_train/syn_1140.wav", "onoffCaption": "car horn honking at 0.279-3.192, 5.549-8.462", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_1298.wav", "onoffCaption": "cat meowing at 1.184-2.459, 3.186-4.461", "frequencyCaption": "cat meowing two times"} +{"filepath": "data/multi_event_train/syn_1366.wav", "onoffCaption": "door slamming at 0.202-1.717, 2.995-4.51, 5.792-7.307 and spraying at 1.267-2.524 and woman laughing at 1.332-3.526, 5.309-7.503", "frequencyCaption": "door slamming three times and spraying one times and woman laughing two times"} +{"filepath": "data/multi_event_train/syn_1383.wav", "onoffCaption": "cow mooing at 2.663-5.961", "frequencyCaption": "cow mooing one times"} +{"filepath": "data/multi_event_train/syn_1458.wav", "onoffCaption": "door knocking at 2.411-6.85", "frequencyCaption": "door knocking one times"} +{"filepath": "data/multi_event_train/syn_1516.wav", "onoffCaption": "sneeze at 2.129-4.442, 6.085-8.893", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_1543.wav", "onoffCaption": "car horn honking at 1.994-4.341, 6.759-9.106", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_1730.wav", "onoffCaption": "door slamming at 0.66-1.913, 3.512-5.512 and spraying at 7.065-7.666", "frequencyCaption": "door slamming two times and spraying one times"} +{"filepath": "data/multi_event_train/syn_1812.wav", "onoffCaption": "sneeze at 0.01-2.134, 4.32-6.444", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_1847.wav", "onoffCaption": "train horn at 2.659-6.419", "frequencyCaption": "train horn one times"} +{"filepath": "data/multi_event_train/syn_3076.wav", "onoffCaption": "cow mooing at 0.452-5.432, 7.721-10.0", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_3089.wav", "onoffCaption": "explosion at 2.8-5.4, 7.474-10.0", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_3177.wav", "onoffCaption": "thump thud at 1.086-4.133", "frequencyCaption": "thump thud one times"} +{"filepath": "data/multi_event_train/syn_3192.wav", "onoffCaption": "woman laughing at 3.884-6.239, 6.838-9.193", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_3304.wav", "onoffCaption": "train horn at 2.616-6.816, 7.46-9.94", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_3420.wav", "onoffCaption": "door knocking at 0.306-3.153, 5.11-7.174 and car horn honking at 4.113-7.608", "frequencyCaption": "door knocking two times and car horn honking one times"} +{"filepath": "data/multi_event_train/syn_3521.wav", "onoffCaption": "tapping clicking clanking at 1.082-4.522, 5.042-7.335", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_3591.wav", "onoffCaption": "door knocking at 2.49-5.546 and car horn honking at 7.149-10.0", "frequencyCaption": "door knocking one times and car horn honking one times"} +{"filepath": "data/multi_event_train/syn_3649.wav", "onoffCaption": "dog barking at 0.503-3.823, 4.345-7.665", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_train/syn_3752.wav", "onoffCaption": "sheep goat bleating at 1.849-3.849, 6.339-8.339 and duck quacking at 2.367-4.367, 5.96-7.96", "frequencyCaption": "sheep goat bleating two times and duck quacking two times"} +{"filepath": "data/multi_event_train/syn_3825.wav", "onoffCaption": "tapping clicking clanking at 0.088-3.528, 4.234-7.674 and sneeze at 0.459-2.455, 3.231-5.593, 6.22-7.394", "frequencyCaption": "tapping clicking clanking two times and sneeze three times"} +{"filepath": "data/multi_event_train/syn_3870.wav", "onoffCaption": "dog barking at 0.159-2.159 and burping belching at 3.641-10.0", "frequencyCaption": "dog barking one times and burping belching one times"} +{"filepath": "data/multi_event_train/syn_55.wav", "onoffCaption": "car horn honking at 0.516-3.302, 4.306-7.092", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_127.wav", "onoffCaption": "explosion at 0.637-2.701, 3.39-5.454, 6.295-8.359", "frequencyCaption": "explosion three times"} +{"filepath": "data/multi_event_train/syn_172.wav", "onoffCaption": "woman laughing at 0.259-2.678, 3.762-6.181, 7.293-9.712", "frequencyCaption": "woman laughing three times"} +{"filepath": "data/multi_event_train/syn_197.wav", "onoffCaption": "sheep goat bleating at 1.284-3.284, 3.836-5.836, 7.193-9.193", "frequencyCaption": "sheep goat bleating three times"} +{"filepath": "data/multi_event_train/syn_301.wav", "onoffCaption": "door knocking at 3.503-6.57", "frequencyCaption": "door knocking one times"} +{"filepath": "data/multi_event_train/syn_354.wav", "onoffCaption": "explosion at 2.583-6.136, 6.737-10.0", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_524.wav", "onoffCaption": "duck quacking at 0.427-2.427, 3.177-5.177, 6.113-8.113 and burping belching at 2.667-5.667, 6.53-9.53", "frequencyCaption": "duck quacking three times and burping belching two times"} +{"filepath": "data/multi_event_train/syn_571.wav", "onoffCaption": "cow mooing at 0.128-3.138, 4.344-7.354 and dog barking at 4.593-6.593", "frequencyCaption": "cow mooing two times and dog barking one times"} +{"filepath": "data/multi_event_train/syn_594.wav", "onoffCaption": "cat meowing at 2.648-3.66, 4.641-5.653", "frequencyCaption": "cat meowing two times"} +{"filepath": "data/multi_event_train/syn_619.wav", "onoffCaption": "whistling at 0.822-7.241", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_702.wav", "onoffCaption": "duck quacking at 0.54-2.54", "frequencyCaption": "duck quacking one times"} +{"filepath": "data/multi_event_train/syn_757.wav", "onoffCaption": "cow mooing at 0.723-3.705, 4.306-6.958 and train horn at 1.18-7.649", "frequencyCaption": "cow mooing two times and train horn one times"} +{"filepath": "data/multi_event_train/syn_875.wav", "onoffCaption": "duck quacking at 1.336-3.336, 5.54-7.54 and car horn honking at 1.58-6.487, 7.594-9.594 and gunshot at 2.079-4.079, 5.251-7.251", "frequencyCaption": "duck quacking two times and car horn honking two times and gunshot two times"} +{"filepath": "data/multi_event_train/syn_890.wav", "onoffCaption": "sneeze at 3.586-5.505, 6.712-8.631", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_1014.wav", "onoffCaption": "train horn at 0.202-2.357 and spraying at 6.382-6.882", "frequencyCaption": "train horn one times and spraying one times"} +{"filepath": "data/multi_event_train/syn_1041.wav", "onoffCaption": "cat meowing at 0.344-1.353 and sheep goat bleating at 4.842-6.842, 7.989-9.989", "frequencyCaption": "cat meowing one times and sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_1232.wav", "onoffCaption": "explosion at 0.237-5.237", "frequencyCaption": "explosion one times"} +{"filepath": "data/multi_event_train/syn_1267.wav", "onoffCaption": "car horn honking at 0.041-3.882, 5.425-7.938 and spraying at 2.028-2.895", "frequencyCaption": "car horn honking two times and spraying one times"} +{"filepath": "data/multi_event_train/syn_1282.wav", "onoffCaption": "gunshot at 1.822-3.822 and cow mooing at 2.568-7.548", "frequencyCaption": "gunshot one times and cow mooing one times"} +{"filepath": "data/multi_event_train/syn_1329.wav", "onoffCaption": "door knocking at 0.595-3.651, 4.593-6.72", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_1399.wav", "onoffCaption": "sneeze at 0.117-3.765, 4.445-5.537 and explosion at 0.46-3.46 and car horn honking at 1.044-3.044", "frequencyCaption": "sneeze two times and explosion one times and car horn honking one times"} +{"filepath": "data/multi_event_train/syn_1417.wav", "onoffCaption": "sheep goat bleating at 1.462-3.462, 4.727-6.727 and tapping clicking clanking at 2.217-5.657, 7.32-10.0", "frequencyCaption": "sheep goat bleating two times and tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_1442.wav", "onoffCaption": "tapping clicking clanking at 0.277-3.717, 5.707-9.147", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_1559.wav", "onoffCaption": "thump thud at 0.397-4.064, 5.612-9.279", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_1631.wav", "onoffCaption": "car horn honking at 0.189-2.702", "frequencyCaption": "car horn honking one times"} +{"filepath": "data/multi_event_train/syn_1664.wav", "onoffCaption": "train horn at 0.002-4.07", "frequencyCaption": "train horn one times"} +{"filepath": "data/multi_event_train/syn_1808.wav", "onoffCaption": "woman laughing at 2.822-5.876, 6.644-9.698", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_1913.wav", "onoffCaption": "sneeze at 1.993-3.239, 4.458-6.571, 7.242-9.645", "frequencyCaption": "sneeze three times"} +{"filepath": "data/multi_event_train/syn_1946.wav", "onoffCaption": "burping belching at 2.326-6.662 and door slamming at 4.842-5.82", "frequencyCaption": "burping belching one times and door slamming one times"} +{"filepath": "data/multi_event_train/syn_3023.wav", "onoffCaption": "door slamming at 0.184-2.312, 3.655-5.783, 6.912-9.04", "frequencyCaption": "door slamming three times"} +{"filepath": "data/multi_event_train/syn_3093.wav", "onoffCaption": "woman laughing at 0.977-3.569", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/multi_event_train/syn_3138.wav", "onoffCaption": "train horn at 2.088-7.165 and explosion at 2.61-5.61 and door slamming at 3.402-4.655", "frequencyCaption": "train horn one times and explosion one times and door slamming one times"} +{"filepath": "data/multi_event_train/syn_3188.wav", "onoffCaption": "sneeze at 0.033-1.321, 2.162-4.776, 5.79-7.402", "frequencyCaption": "sneeze three times"} +{"filepath": "data/multi_event_train/syn_3205.wav", "onoffCaption": "thump thud at 1.166-4.213, 4.916-7.242 and gunshot at 6.167-8.167", "frequencyCaption": "thump thud two times and gunshot one times"} +{"filepath": "data/multi_event_train/syn_3250.wav", "onoffCaption": "duck quacking at 0.323-2.323, 4.398-6.398", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_3475.wav", "onoffCaption": "woman laughing at 0.214-2.806, 3.565-5.773", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_3490.wav", "onoffCaption": "spraying at 0.003-0.784, 2.076-4.204, 5.949-6.457", "frequencyCaption": "spraying three times"} +{"filepath": "data/multi_event_train/syn_3606.wav", "onoffCaption": "cow mooing at 0.694-3.676, 5.839-8.821", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_3653.wav", "onoffCaption": "thump thud at 0.056-3.974, 5.313-9.231", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_3748.wav", "onoffCaption": "door slamming at 0.261-1.74", "frequencyCaption": "door slamming one times"} +{"filepath": "data/multi_event_train/syn_3924.wav", "onoffCaption": "woman laughing at 0.478-3.532, 5.351-8.05", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_3971.wav", "onoffCaption": "sheep goat bleating at 1.131-3.131 and thump thud at 4.915-9.365 and dog barking at 6.582-8.582", "frequencyCaption": "sheep goat bleating one times and thump thud one times and dog barking one times"} +{"filepath": "data/multi_event_train/syn_3994.wav", "onoffCaption": "burping belching at 1.116-3.739, 5.597-8.418", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_14.wav", "onoffCaption": "gunshot at 1.876-3.876, 5.278-7.278 and explosion at 2.328-5.057", "frequencyCaption": "gunshot two times and explosion one times"} +{"filepath": "data/multi_event_train/syn_133.wav", "onoffCaption": "cow mooing at 1.851-6.831", "frequencyCaption": "cow mooing one times"} +{"filepath": "data/multi_event_train/syn_166.wav", "onoffCaption": "car horn honking at 1.533-3.88, 6.373-8.72 and tapping clicking clanking at 3.047-6.487", "frequencyCaption": "car horn honking two times and tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_183.wav", "onoffCaption": "spraying at 0.373-1.314, 2.792-3.733 and cat meowing at 2.599-3.611", "frequencyCaption": "spraying two times and cat meowing one times"} +{"filepath": "data/multi_event_train/syn_315.wav", "onoffCaption": "door slamming at 0.256-1.489, 2.931-4.164", "frequencyCaption": "door slamming two times"} +{"filepath": "data/multi_event_train/syn_340.wav", "onoffCaption": "door slamming at 2.894-5.323 and cow mooing at 7.457-10.0", "frequencyCaption": "door slamming one times and cow mooing one times"} +{"filepath": "data/multi_event_train/syn_530.wav", "onoffCaption": "whistling at 0.753-9.138", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_565.wav", "onoffCaption": "cow mooing at 0.66-3.629 and woman laughing at 7.388-10.0", "frequencyCaption": "cow mooing one times and woman laughing one times"} +{"filepath": "data/multi_event_train/syn_580.wav", "onoffCaption": "door slamming at 3.578-6.578, 7.636-9.832", "frequencyCaption": "door slamming two times"} +{"filepath": "data/multi_event_train/syn_658.wav", "onoffCaption": "explosion at 2.357-7.357", "frequencyCaption": "explosion one times"} +{"filepath": "data/multi_event_train/syn_716.wav", "onoffCaption": "dog barking at 0.108-2.108, 2.668-4.668, 5.421-7.421 and gunshot at 1.186-3.186, 5.192-7.192 and door knocking at 4.129-6.193", "frequencyCaption": "dog barking three times and gunshot two times and door knocking one times"} +{"filepath": "data/multi_event_train/syn_743.wav", "onoffCaption": "cow mooing at 0.853-3.835, 5.646-8.205", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_834.wav", "onoffCaption": "duck quacking at 2.186-4.186, 5.978-7.978", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_861.wav", "onoffCaption": "thump thud at 2.521-6.439, 7.515-10.0", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_1000.wav", "onoffCaption": "gunshot at 2.613-4.613, 6.416-8.416 and door knocking at 3.178-5.399, 6.317-8.686", "frequencyCaption": "gunshot two times and door knocking two times"} +{"filepath": "data/multi_event_train/syn_1055.wav", "onoffCaption": "door slamming at 1.103-2.22, 4.078-5.195, 7.409-8.526", "frequencyCaption": "door slamming three times"} +{"filepath": "data/multi_event_train/syn_1226.wav", "onoffCaption": "dog barking at 1.16-3.16", "frequencyCaption": "dog barking one times"} +{"filepath": "data/multi_event_train/syn_1273.wav", "onoffCaption": "whistling at 0.636-2.865", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_1296.wav", "onoffCaption": "whistling at 0.45-2.679", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_1368.wav", "onoffCaption": "door knocking at 2.254-4.656, 6.714-8.841", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_1456.wav", "onoffCaption": "tapping clicking clanking at 1.369-4.809", "frequencyCaption": "tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_1518.wav", "onoffCaption": "explosion at 0.098-5.098 and tapping clicking clanking at 0.832-4.272", "frequencyCaption": "explosion one times and tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_1625.wav", "onoffCaption": "cat meowing at 0.696-2.446, 3.021-4.771, 5.412-7.162", "frequencyCaption": "cat meowing three times"} +{"filepath": "data/multi_event_train/syn_1670.wav", "onoffCaption": "burping belching at 2.796-4.919, 5.517-7.64", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_1681.wav", "onoffCaption": "cow mooing at 1.387-4.356, 6.831-9.813", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_1695.wav", "onoffCaption": "gunshot at 0.724-2.724, 4.717-6.717 and duck quacking at 1.185-3.185, 4.117-6.117, 7.028-9.028", "frequencyCaption": "gunshot two times and duck quacking three times"} +{"filepath": "data/multi_event_train/syn_1849.wav", "onoffCaption": "thump thud at 0.642-5.092, 5.855-7.909", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_1907.wav", "onoffCaption": "explosion at 1.151-6.151", "frequencyCaption": "explosion one times"} +{"filepath": "data/multi_event_train/syn_1952.wav", "onoffCaption": "door knocking at 0.422-3.975 and duck quacking at 1.2-3.2 and car horn honking at 6.424-9.242", "frequencyCaption": "door knocking one times and duck quacking one times and car horn honking one times"} +{"filepath": "data/multi_event_train/syn_3037.wav", "onoffCaption": "duck quacking at 0.241-2.241, 3.858-5.858 and thump thud at 4.667-6.895, 7.566-10.0", "frequencyCaption": "duck quacking two times and thump thud two times"} +{"filepath": "data/multi_event_train/syn_3062.wav", "onoffCaption": "woman laughing at 2.849-5.049", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/multi_event_train/syn_3087.wav", "onoffCaption": "duck quacking at 0.031-2.031, 3.164-5.164 and burping belching at 7.688-10.0", "frequencyCaption": "duck quacking two times and burping belching one times"} +{"filepath": "data/multi_event_train/syn_3179.wav", "onoffCaption": "gunshot at 1.377-3.377, 4.054-6.054", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_3211.wav", "onoffCaption": "sheep goat bleating at 0.633-4.553, 5.305-7.305 and tapping clicking clanking at 1.74-5.18, 7.248-10.0", "frequencyCaption": "sheep goat bleating two times and tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_3244.wav", "onoffCaption": "cow mooing at 2.773-6.071, 7.377-10.0", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_3434.wav", "onoffCaption": "duck quacking at 3.054-5.054, 7.036-9.036", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_3461.wav", "onoffCaption": "door slamming at 2.061-4.842, 6.857-9.638", "frequencyCaption": "door slamming two times"} +{"filepath": "data/multi_event_train/syn_3484.wav", "onoffCaption": "duck quacking at 0.197-2.197, 2.792-4.792, 5.774-7.774 and thump thud at 3.437-6.484", "frequencyCaption": "duck quacking three times and thump thud one times"} +{"filepath": "data/multi_event_train/syn_3612.wav", "onoffCaption": "whistling at 0.85-5.334, 7.349-10.0", "frequencyCaption": "whistling two times"} +{"filepath": "data/multi_event_train/syn_3647.wav", "onoffCaption": "dog barking at 0.531-3.851, 5.617-7.617 and train horn at 0.823-3.303, 4.388-6.934 and tapping clicking clanking at 2.164-5.604", "frequencyCaption": "dog barking two times and train horn two times and tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_3709.wav", "onoffCaption": "train horn at 0.105-2.242, 2.805-4.942, 5.66-7.797", "frequencyCaption": "train horn three times"} +{"filepath": "data/multi_event_train/syn_3930.wav", "onoffCaption": "door knocking at 0.868-4.421", "frequencyCaption": "door knocking one times"} +{"filepath": "data/multi_event_train/syn_3965.wav", "onoffCaption": "woman laughing at 0.226-3.614, 4.453-7.841 and cow mooing at 3.081-6.05 and door slamming at 3.829-5.829", "frequencyCaption": "woman laughing two times and cow mooing one times and door slamming one times"} +{"filepath": "data/multi_event_train/syn_3980.wav", "onoffCaption": "door slamming at 0.164-3.138 and sheep goat bleating at 0.911-2.911, 5.018-7.018 and car horn honking at 2.298-7.205", "frequencyCaption": "door slamming one times and sheep goat bleating two times and car horn honking one times"} +{"filepath": "data/multi_event_train/syn_41.wav", "onoffCaption": "tapping clicking clanking at 2.541-5.981", "frequencyCaption": "tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_129.wav", "onoffCaption": "dog barking at 1.053-3.053, 4.927-6.927 and cat meowing at 4.681-9.041 and sneeze at 6.661-8.273", "frequencyCaption": "dog barking two times and cat meowing one times and sneeze one times"} +{"filepath": "data/multi_event_train/syn_199.wav", "onoffCaption": "woman laughing at 0.853-7.587 and door knocking at 2.12-4.21 and door slamming at 6.068-7.185, 7.926-9.043", "frequencyCaption": "woman laughing one times and door knocking one times and door slamming two times"} +{"filepath": "data/multi_event_train/syn_214.wav", "onoffCaption": "sneeze at 2.226-3.838, 4.85-6.395", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_241.wav", "onoffCaption": "explosion at 0.978-5.978", "frequencyCaption": "explosion one times"} +{"filepath": "data/multi_event_train/syn_431.wav", "onoffCaption": "spraying at 2.906-3.847, 4.661-5.602, 6.824-7.765", "frequencyCaption": "spraying three times"} +{"filepath": "data/multi_event_train/syn_464.wav", "onoffCaption": "door knocking at 0.881-3.728, 4.455-7.302 and door slamming at 1.699-4.578, 5.851-6.656 and explosion at 2.481-6.481", "frequencyCaption": "door knocking two times and door slamming two times and explosion one times"} +{"filepath": "data/multi_event_train/syn_481.wav", "onoffCaption": "gunshot at 2.466-4.466, 5.127-7.127 and door knocking at 3.505-8.207 and train horn at 3.535-6.935", "frequencyCaption": "gunshot two times and door knocking one times and train horn one times"} +{"filepath": "data/multi_event_train/syn_617.wav", "onoffCaption": "spraying at 2.92-3.57, 5.893-6.543, 8.392-9.042", "frequencyCaption": "spraying three times"} +{"filepath": "data/multi_event_train/syn_642.wav", "onoffCaption": "gunshot at 2.277-4.277, 5.537-7.537 and cat meowing at 6.26-7.26", "frequencyCaption": "gunshot two times and cat meowing one times"} +{"filepath": "data/multi_event_train/syn_759.wav", "onoffCaption": "door slamming at 3.234-5.46 and gunshot at 5.625-7.625", "frequencyCaption": "door slamming one times and gunshot one times"} +{"filepath": "data/multi_event_train/syn_884.wav", "onoffCaption": "spraying at 0.199-0.699, 1.763-2.263, 2.935-3.435 and cat meowing at 0.762-2.333, 2.93-4.501", "frequencyCaption": "spraying three times and cat meowing two times"} +{"filepath": "data/multi_event_train/syn_935.wav", "onoffCaption": "sheep goat bleating at 2.538-4.538, 6.466-8.466", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_960.wav", "onoffCaption": "sneeze at 0.1-3.21, 4.615-6.7", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_985.wav", "onoffCaption": "burping belching at 3.179-6.358, 7.834-10.0", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_1101.wav", "onoffCaption": "sneeze at 0.201-2.44, 3.115-5.028, 5.81-7.895 and gunshot at 1.528-3.528", "frequencyCaption": "sneeze three times and gunshot one times"} +{"filepath": "data/multi_event_train/syn_1154.wav", "onoffCaption": "spraying at 0.02-1.104 and whistling at 0.446-5.946", "frequencyCaption": "spraying one times and whistling one times"} +{"filepath": "data/multi_event_train/syn_1170.wav", "onoffCaption": "spraying at 2.656-4.418, 6.913-8.17", "frequencyCaption": "spraying two times"} +{"filepath": "data/multi_event_train/syn_1269.wav", "onoffCaption": "duck quacking at 0.627-2.627, 3.358-5.358", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_1327.wav", "onoffCaption": "burping belching at 0.01-2.135, 3.537-5.662, 6.649-8.774 and door knocking at 1.426-3.586", "frequencyCaption": "burping belching three times and door knocking one times"} +{"filepath": "data/multi_event_train/syn_1372.wav", "onoffCaption": "dog barking at 2.736-4.736, 6.873-8.873", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_train/syn_1397.wav", "onoffCaption": "cat meowing at 0.314-1.898, 2.63-4.214", "frequencyCaption": "cat meowing two times"} +{"filepath": "data/multi_event_train/syn_1403.wav", "onoffCaption": "explosion at 1.981-4.734, 6.313-9.313", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_1419.wav", "onoffCaption": "train horn at 0.012-4.012 and spraying at 6.3-7.381", "frequencyCaption": "train horn one times and spraying one times"} +{"filepath": "data/multi_event_train/syn_1502.wav", "onoffCaption": "duck quacking at 0.082-2.082 and car horn honking at 0.559-4.054 and spraying at 5.939-8.523", "frequencyCaption": "duck quacking one times and car horn honking one times and spraying one times"} +{"filepath": "data/multi_event_train/syn_1526.wav", "onoffCaption": "thump thud at 0.761-2.989, 4.754-6.982", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_1557.wav", "onoffCaption": "train horn at 0.528-8.728", "frequencyCaption": "train horn one times"} +{"filepath": "data/multi_event_train/syn_1724.wav", "onoffCaption": "door slamming at 0.416-1.267, 2.1-2.951, 4.414-5.265", "frequencyCaption": "door slamming three times"} +{"filepath": "data/multi_event_train/syn_1771.wav", "onoffCaption": "thump thud at 0.394-2.622, 3.385-5.613, 6.502-8.73", "frequencyCaption": "thump thud three times"} +{"filepath": "data/multi_event_train/syn_1794.wav", "onoffCaption": "door knocking at 0.846-3.576, 5.727-8.351", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_1806.wav", "onoffCaption": "explosion at 1.47-6.47 and door slamming at 1.716-4.497, 5.429-6.908 and burping belching at 3.429-7.429", "frequencyCaption": "explosion one times and door slamming two times and burping belching one times"} +{"filepath": "data/multi_event_train/syn_1853.wav", "onoffCaption": "cow mooing at 0.434-3.416, 3.954-6.789, 7.523-9.86", "frequencyCaption": "cow mooing three times"} +{"filepath": "data/multi_event_train/syn_1948.wav", "onoffCaption": "train horn at 1.774-6.851 and tapping clicking clanking at 3.442-6.882", "frequencyCaption": "train horn one times and tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_3078.wav", "onoffCaption": "sneeze at 0.026-1.483 and thump thud at 3.614-6.114, 7.183-10.0", "frequencyCaption": "sneeze one times and thump thud two times"} +{"filepath": "data/multi_event_train/syn_3136.wav", "onoffCaption": "train horn at 0.866-5.768, 7.973-10.0", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_3163.wav", "onoffCaption": "car horn honking at 0.131-2.478", "frequencyCaption": "car horn honking one times"} +{"filepath": "data/multi_event_train/syn_3186.wav", "onoffCaption": "spraying at 1.654-3.416, 4.687-7.123 and cat meowing at 2.384-5.625, 7.118-10.0", "frequencyCaption": "spraying two times and cat meowing two times"} +{"filepath": "data/multi_event_train/syn_3310.wav", "onoffCaption": "train horn at 1.864-5.398 and door knocking at 4.175-6.239, 6.937-9.001", "frequencyCaption": "train horn one times and door knocking two times"} +{"filepath": "data/multi_event_train/syn_3345.wav", "onoffCaption": "door knocking at 3.904-8.176", "frequencyCaption": "door knocking one times"} +{"filepath": "data/multi_event_train/syn_3535.wav", "onoffCaption": "gunshot at 1.061-3.061, 4.558-6.558 and duck quacking at 3.044-5.044", "frequencyCaption": "gunshot two times and duck quacking one times"} +{"filepath": "data/multi_event_train/syn_3560.wav", "onoffCaption": "car horn honking at 0.198-4.71, 6.293-10.0", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_3585.wav", "onoffCaption": "cat meowing at 2.877-4.017", "frequencyCaption": "cat meowing one times"} +{"filepath": "data/multi_event_train/syn_3608.wav", "onoffCaption": "cow mooing at 2.655-5.953 and sneeze at 7.997-10.0", "frequencyCaption": "cow mooing one times and sneeze one times"} +{"filepath": "data/multi_event_train/syn_3713.wav", "onoffCaption": "whistling at 0.531-5.015, 7.0-10.0", "frequencyCaption": "whistling two times"} +{"filepath": "data/multi_event_train/syn_3746.wav", "onoffCaption": "car horn honking at 0.427-3.213", "frequencyCaption": "car horn honking one times"} +{"filepath": "data/multi_event_train/syn_3831.wav", "onoffCaption": "sheep goat bleating at 2.297-4.297, 6.101-8.101", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_3864.wav", "onoffCaption": "sneeze at 0.203-1.703", "frequencyCaption": "sneeze one times"} +{"filepath": "data/multi_event_train/syn_3881.wav", "onoffCaption": "sneeze at 0.434-2.117, 2.96-4.643, 5.251-6.934", "frequencyCaption": "sneeze three times"} +{"filepath": "data/multi_event_train/syn_158.wav", "onoffCaption": "sheep goat bleating at 0.22-2.22, 4.379-6.379, 7.198-9.198", "frequencyCaption": "sheep goat bleating three times"} +{"filepath": "data/multi_event_train/syn_230.wav", "onoffCaption": "thump thud at 0.17-2.509, 4.2-6.539", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_265.wav", "onoffCaption": "burping belching at 0.549-2.58, 3.304-5.335 and cow mooing at 1.658-4.64, 5.717-8.699", "frequencyCaption": "burping belching two times and cow mooing two times"} +{"filepath": "data/multi_event_train/syn_280.wav", "onoffCaption": "sheep goat bleating at 3.352-5.352, 6.27-9.27", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_331.wav", "onoffCaption": "woman laughing at 0.703-2.986, 4.385-6.668 and burping belching at 3.892-6.892", "frequencyCaption": "woman laughing two times and burping belching one times"} +{"filepath": "data/multi_event_train/syn_415.wav", "onoffCaption": "gunshot at 0.476-2.476 and cat meowing at 0.509-2.503, 3.064-4.648, 5.727-7.962", "frequencyCaption": "gunshot one times and cat meowing three times"} +{"filepath": "data/multi_event_train/syn_440.wav", "onoffCaption": "duck quacking at 0.184-2.184, 3.112-5.112", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_633.wav", "onoffCaption": "cat meowing at 0.287-1.434, 2.463-3.61", "frequencyCaption": "cat meowing two times"} +{"filepath": "data/multi_event_train/syn_666.wav", "onoffCaption": "cow mooing at 2.642-5.94", "frequencyCaption": "cow mooing one times"} +{"filepath": "data/multi_event_train/syn_683.wav", "onoffCaption": "door slamming at 2.21-3.601", "frequencyCaption": "door slamming one times"} +{"filepath": "data/multi_event_train/syn_699.wav", "onoffCaption": "door slamming at 2.043-4.269, 4.82-6.211, 8.048-9.572", "frequencyCaption": "door slamming three times"} +{"filepath": "data/multi_event_train/syn_728.wav", "onoffCaption": "train horn at 2.321-6.389, 7.291-9.903", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_767.wav", "onoffCaption": "duck quacking at 0.926-2.926, 4.515-6.515 and cat meowing at 1.965-3.925", "frequencyCaption": "duck quacking two times and cat meowing one times"} +{"filepath": "data/multi_event_train/syn_798.wav", "onoffCaption": "whistling at 0.489-6.899", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_911.wav", "onoffCaption": "cat meowing at 0.48-1.666 and whistling at 0.61-10.0", "frequencyCaption": "cat meowing one times and whistling one times"} +{"filepath": "data/multi_event_train/syn_944.wav", "onoffCaption": "burping belching at 1.37-5.393, 6.183-9.183", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_1125.wav", "onoffCaption": "spraying at 2.52-3.371, 4.122-6.582, 7.329-7.956", "frequencyCaption": "spraying three times"} +{"filepath": "data/multi_event_train/syn_1195.wav", "onoffCaption": "door knocking at 0.96-3.2", "frequencyCaption": "door knocking one times"} +{"filepath": "data/multi_event_train/syn_1218.wav", "onoffCaption": "tapping clicking clanking at 1.668-5.108", "frequencyCaption": "tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_1257.wav", "onoffCaption": "sneeze at 1.556-2.844, 3.917-6.002 and duck quacking at 2.435-4.435, 5.674-7.674 and spraying at 3.075-3.659, 5.829-6.413", "frequencyCaption": "sneeze two times and duck quacking two times and spraying two times"} +{"filepath": "data/multi_event_train/syn_1303.wav", "onoffCaption": "car horn honking at 1.45-3.797 and spraying at 2.235-3.086 and dog barking at 6.931-9.852", "frequencyCaption": "car horn honking one times and spraying one times and dog barking one times"} +{"filepath": "data/multi_event_train/syn_1356.wav", "onoffCaption": "door knocking at 3.378-6.931", "frequencyCaption": "door knocking one times"} +{"filepath": "data/multi_event_train/syn_1468.wav", "onoffCaption": "cat meowing at 0.276-1.378, 2.979-4.245 and gunshot at 7.143-9.143", "frequencyCaption": "cat meowing two times and gunshot one times"} +{"filepath": "data/multi_event_train/syn_1573.wav", "onoffCaption": "dog barking at 0.216-2.216, 3.355-5.355, 6.148-8.148 and spraying at 0.276-0.845 and tapping clicking clanking at 5.304-8.744", "frequencyCaption": "dog barking three times and spraying one times and tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_1596.wav", "onoffCaption": "cat meowing at 2.368-3.554, 4.623-7.014", "frequencyCaption": "cat meowing two times"} +{"filepath": "data/multi_event_train/syn_1601.wav", "onoffCaption": "door knocking at 0.127-2.149 and cow mooing at 3.366-6.376", "frequencyCaption": "door knocking one times and cow mooing one times"} +{"filepath": "data/multi_event_train/syn_1700.wav", "onoffCaption": "whistling at 1.146-8.764", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_1755.wav", "onoffCaption": "sneeze at 0.305-1.892, 4.262-6.375", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_1822.wav", "onoffCaption": "train horn at 1.537-4.417, 6.155-9.035", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_1877.wav", "onoffCaption": "gunshot at 2.109-4.109 and whistling at 7.924-10.0", "frequencyCaption": "gunshot one times and whistling one times"} +{"filepath": "data/multi_event_train/syn_1892.wav", "onoffCaption": "explosion at 0.321-3.377", "frequencyCaption": "explosion one times"} +{"filepath": "data/multi_event_train/syn_1939.wav", "onoffCaption": "door knocking at 0.279-3.126, 3.663-6.51", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_1989.wav", "onoffCaption": "sheep goat bleating at 0.042-2.042 and woman laughing at 4.477-7.069", "frequencyCaption": "sheep goat bleating one times and woman laughing one times"} +{"filepath": "data/multi_event_train/syn_3009.wav", "onoffCaption": "sheep goat bleating at 0.509-2.509", "frequencyCaption": "sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_3112.wav", "onoffCaption": "woman laughing at 3.607-6.212 and duck quacking at 5.845-7.845", "frequencyCaption": "woman laughing one times and duck quacking one times"} +{"filepath": "data/multi_event_train/syn_3147.wav", "onoffCaption": "whistling at 2.305-6.789, 7.817-10.0", "frequencyCaption": "whistling two times"} +{"filepath": "data/multi_event_train/syn_3334.wav", "onoffCaption": "spraying at 0.947-1.697, 3.096-3.846, 4.38-5.13 and whistling at 7.494-10.0", "frequencyCaption": "spraying three times and whistling one times"} +{"filepath": "data/multi_event_train/syn_3361.wav", "onoffCaption": "dog barking at 0.659-2.659, 4.075-6.075 and cow mooing at 2.011-6.991, 7.547-10.0", "frequencyCaption": "dog barking two times and cow mooing two times"} +{"filepath": "data/multi_event_train/syn_3384.wav", "onoffCaption": "gunshot at 0.991-2.991 and spraying at 3.058-4.753", "frequencyCaption": "gunshot one times and spraying one times"} +{"filepath": "data/multi_event_train/syn_3511.wav", "onoffCaption": "cat meowing at 0.223-1.232, 1.827-2.836 and whistling at 0.758-8.413", "frequencyCaption": "cat meowing two times and whistling one times"} +{"filepath": "data/multi_event_train/syn_3544.wav", "onoffCaption": "tapping clicking clanking at 1.273-4.713, 5.387-8.827 and car horn honking at 1.592-3.939 and sheep goat bleating at 1.733-3.733", "frequencyCaption": "tapping clicking clanking two times and car horn honking one times and sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_3679.wav", "onoffCaption": "spraying at 0.11-0.737, 1.279-1.787, 3.007-5.467 and door knocking at 7.869-10.0", "frequencyCaption": "spraying three times and door knocking one times"} +{"filepath": "data/multi_event_train/syn_3686.wav", "onoffCaption": "explosion at 1.044-6.044, 6.567-10.0", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_3737.wav", "onoffCaption": "sheep goat bleating at 0.425-2.425, 4.108-6.108 and door slamming at 7.848-8.748", "frequencyCaption": "sheep goat bleating two times and door slamming one times"} +{"filepath": "data/multi_event_train/syn_3762.wav", "onoffCaption": "duck quacking at 2.729-4.729", "frequencyCaption": "duck quacking one times"} +{"filepath": "data/multi_event_train/syn_3787.wav", "onoffCaption": "burping belching at 2.836-6.095, 7.143-9.467", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_3815.wav", "onoffCaption": "cow mooing at 2.141-5.151 and explosion at 2.372-7.372", "frequencyCaption": "cow mooing one times and explosion one times"} +{"filepath": "data/multi_event_train/syn_3840.wav", "onoffCaption": "duck quacking at 1.918-3.918, 5.313-7.313", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_30.wav", "onoffCaption": "duck quacking at 1.581-3.581, 5.379-7.379", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_65.wav", "onoffCaption": "gunshot at 0.859-2.859", "frequencyCaption": "gunshot one times"} +{"filepath": "data/multi_event_train/syn_80.wav", "onoffCaption": "sneeze at 0.482-2.478, 3.427-5.423, 7.453-9.449 and explosion at 0.882-3.754, 4.584-6.904", "frequencyCaption": "sneeze three times and explosion two times"} +{"filepath": "data/multi_event_train/syn_117.wav", "onoffCaption": "explosion at 1.659-4.715, 6.7-9.756", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_142.wav", "onoffCaption": "spraying at 0.225-0.733, 1.499-2.007 and dog barking at 0.699-4.019, 4.823-8.143 and door knocking at 0.984-4.734, 5.73-7.951", "frequencyCaption": "spraying two times and dog barking two times and door knocking two times"} +{"filepath": "data/multi_event_train/syn_364.wav", "onoffCaption": "car horn honking at 0.766-3.692, 5.467-8.228", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_381.wav", "onoffCaption": "thump thud at 0.034-3.952, 6.217-8.717 and gunshot at 1.044-3.085", "frequencyCaption": "thump thud two times and gunshot one times"} +{"filepath": "data/multi_event_train/syn_514.wav", "onoffCaption": "cow mooing at 0.534-3.516 and door slamming at 3.555-4.574, 6.074-7.093", "frequencyCaption": "cow mooing one times and door slamming two times"} +{"filepath": "data/multi_event_train/syn_541.wav", "onoffCaption": "car horn honking at 2.908-6.749", "frequencyCaption": "car horn honking one times"} +{"filepath": "data/multi_event_train/syn_629.wav", "onoffCaption": "burping belching at 1.02-6.144, 6.845-9.4", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_732.wav", "onoffCaption": "explosion at 0.038-2.126 and thump thud at 0.211-2.982, 4.408-6.482", "frequencyCaption": "explosion one times and thump thud two times"} +{"filepath": "data/multi_event_train/syn_782.wav", "onoffCaption": "thump thud at 1.036-3.375, 4.184-6.955 and cat meowing at 2.618-5.859, 7.36-10.0", "frequencyCaption": "thump thud two times and cat meowing two times"} +{"filepath": "data/multi_event_train/syn_810.wav", "onoffCaption": "dog barking at 0.258-2.258, 4.132-6.132, 7.99-9.99", "frequencyCaption": "dog barking three times"} +{"filepath": "data/multi_event_train/syn_845.wav", "onoffCaption": "thump thud at 2.152-6.07", "frequencyCaption": "thump thud one times"} +{"filepath": "data/multi_event_train/syn_1024.wav", "onoffCaption": "woman laughing at 0.196-2.277, 4.543-6.624", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_1071.wav", "onoffCaption": "door slamming at 0.205-2.205, 3.528-5.528, 6.22-7.473 and duck quacking at 2.613-4.613", "frequencyCaption": "door slamming three times and duck quacking one times"} +{"filepath": "data/multi_event_train/syn_1094.wav", "onoffCaption": "sheep goat bleating at 2.335-4.335, 5.743-7.743", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_1202.wav", "onoffCaption": "cat meowing at 0.067-1.076", "frequencyCaption": "cat meowing one times"} +{"filepath": "data/multi_event_train/syn_1319.wav", "onoffCaption": "train horn at 0.993-3.873, 4.733-7.613", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_1472.wav", "onoffCaption": "thump thud at 0.435-4.353 and sheep goat bleating at 1.679-3.679, 5.085-7.085", "frequencyCaption": "thump thud one times and sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_1497.wav", "onoffCaption": "door slamming at 0.326-2.326, 4.646-6.646", "frequencyCaption": "door slamming two times"} +{"filepath": "data/multi_event_train/syn_1569.wav", "onoffCaption": "cow mooing at 0.08-3.09, 4.676-7.686 and door slamming at 7.197-9.197", "frequencyCaption": "cow mooing two times and door slamming one times"} +{"filepath": "data/multi_event_train/syn_1654.wav", "onoffCaption": "tapping clicking clanking at 3.172-6.612", "frequencyCaption": "tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_1838.wav", "onoffCaption": "sheep goat bleating at 1.288-4.608, 6.25-9.57 and duck quacking at 2.365-4.365", "frequencyCaption": "sheep goat bleating two times and duck quacking one times"} +{"filepath": "data/multi_event_train/syn_1888.wav", "onoffCaption": "tapping clicking clanking at 0.995-4.435 and cow mooing at 2.244-5.226, 6.414-9.383", "frequencyCaption": "tapping clicking clanking one times and cow mooing two times"} +{"filepath": "data/multi_event_train/syn_1923.wav", "onoffCaption": "gunshot at 1.011-3.011, 4.239-6.239, 7.057-9.057", "frequencyCaption": "gunshot three times"} +{"filepath": "data/multi_event_train/syn_1976.wav", "onoffCaption": "cat meowing at 2.801-4.795, 5.356-7.544, 8.821-9.961", "frequencyCaption": "cat meowing three times"} +{"filepath": "data/multi_event_train/syn_1993.wav", "onoffCaption": "woman laughing at 2.007-4.089, 6.48-8.562", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_3013.wav", "onoffCaption": "sheep goat bleating at 2.166-4.166 and cow mooing at 4.473-7.442 and gunshot at 6.86-8.86", "frequencyCaption": "sheep goat bleating one times and cow mooing one times and gunshot one times"} +{"filepath": "data/multi_event_train/syn_3046.wav", "onoffCaption": "sheep goat bleating at 0.979-2.979 and door slamming at 5.08-6.604, 7.268-9.697", "frequencyCaption": "sheep goat bleating one times and door slamming two times"} +{"filepath": "data/multi_event_train/syn_3108.wav", "onoffCaption": "spraying at 0.384-2.512, 4.662-7.003 and sneeze at 0.575-2.821", "frequencyCaption": "spraying two times and sneeze one times"} +{"filepath": "data/multi_event_train/syn_3235.wav", "onoffCaption": "cat meowing at 0.814-1.825", "frequencyCaption": "cat meowing one times"} +{"filepath": "data/multi_event_train/syn_3260.wav", "onoffCaption": "cat meowing at 0.62-2.56, 3.135-5.075, 6.248-8.188", "frequencyCaption": "cat meowing three times"} +{"filepath": "data/multi_event_train/syn_3285.wav", "onoffCaption": "gunshot at 0.533-2.533", "frequencyCaption": "gunshot one times"} +{"filepath": "data/multi_event_train/syn_3410.wav", "onoffCaption": "duck quacking at 0.698-2.698, 4.798-6.798", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_3445.wav", "onoffCaption": "cow mooing at 1.175-4.185, 4.703-7.672 and door knocking at 6.169-8.549", "frequencyCaption": "cow mooing two times and door knocking one times"} +{"filepath": "data/multi_event_train/syn_3636.wav", "onoffCaption": "train horn at 1.218-3.873, 5.846-8.552", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_3663.wav", "onoffCaption": "explosion at 2.042-4.049", "frequencyCaption": "explosion one times"} +{"filepath": "data/multi_event_train/syn_3778.wav", "onoffCaption": "train horn at 2.917-6.237, 7.741-10.0", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_3914.wav", "onoffCaption": "thump thud at 0.368-2.868, 3.643-6.143, 7.238-9.738", "frequencyCaption": "thump thud three times"} +{"filepath": "data/multi_event_train/syn_3941.wav", "onoffCaption": "car horn honking at 0.289-5.196, 7.372-10.0", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_37.wav", "onoffCaption": "burping belching at 0.202-5.202, 6.756-10.0", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_62.wav", "onoffCaption": "door slamming at 0.077-0.758, 1.309-3.309 and car horn honking at 7.208-10.0", "frequencyCaption": "door slamming two times and car horn honking one times"} +{"filepath": "data/multi_event_train/syn_87.wav", "onoffCaption": "cat meowing at 3.989-5.929, 7.639-9.389", "frequencyCaption": "cat meowing two times"} +{"filepath": "data/multi_event_train/syn_110.wav", "onoffCaption": "woman laughing at 1.572-3.654, 4.928-7.01", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_145.wav", "onoffCaption": "cat meowing at 2.595-3.808", "frequencyCaption": "cat meowing one times"} +{"filepath": "data/multi_event_train/syn_336.wav", "onoffCaption": "spraying at 2.725-3.725", "frequencyCaption": "spraying one times"} +{"filepath": "data/multi_event_train/syn_408.wav", "onoffCaption": "spraying at 0.198-0.979, 2.793-3.42, 4.839-6.572", "frequencyCaption": "spraying three times"} +{"filepath": "data/multi_event_train/syn_513.wav", "onoffCaption": "thump thud at 0.607-4.525, 5.536-9.454 and woman laughing at 3.351-7.403", "frequencyCaption": "thump thud two times and woman laughing one times"} +{"filepath": "data/multi_event_train/syn_546.wav", "onoffCaption": "burping belching at 0.499-2.597", "frequencyCaption": "burping belching one times"} +{"filepath": "data/multi_event_train/syn_760.wav", "onoffCaption": "car horn honking at 2.233-6.555, 7.446-10.0", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_785.wav", "onoffCaption": "sheep goat bleating at 0.214-2.214 and tapping clicking clanking at 4.133-7.573", "frequencyCaption": "sheep goat bleating one times and tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_817.wav", "onoffCaption": "burping belching at 0.67-2.768, 5.205-7.33", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_842.wav", "onoffCaption": "explosion at 3.225-6.225", "frequencyCaption": "explosion one times"} +{"filepath": "data/multi_event_train/syn_959.wav", "onoffCaption": "door knocking at 0.278-2.581, 4.559-6.961, 7.965-10.0", "frequencyCaption": "door knocking three times"} +{"filepath": "data/multi_event_train/syn_1023.wav", "onoffCaption": "spraying at 0.154-1.005, 3.277-4.972", "frequencyCaption": "spraying two times"} +{"filepath": "data/multi_event_train/syn_1076.wav", "onoffCaption": "woman laughing at 0.237-7.249, 7.811-10.0", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_1093.wav", "onoffCaption": "spraying at 0.32-1.052, 3.287-5.415 and burping belching at 6.898-10.0", "frequencyCaption": "spraying two times and burping belching one times"} +{"filepath": "data/multi_event_train/syn_1138.wav", "onoffCaption": "car horn honking at 0.532-2.532, 3.585-6.576, 7.794-10.0", "frequencyCaption": "car horn honking three times"} +{"filepath": "data/multi_event_train/syn_1188.wav", "onoffCaption": "cow mooing at 3.604-8.033", "frequencyCaption": "cow mooing one times"} +{"filepath": "data/multi_event_train/syn_1250.wav", "onoffCaption": "sheep goat bleating at 3.127-5.127, 5.942-7.942", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_1420.wav", "onoffCaption": "door slamming at 3.092-4.07, 5.862-6.713 and thump thud at 5.014-7.514", "frequencyCaption": "door slamming two times and thump thud one times"} +{"filepath": "data/multi_event_train/syn_1427.wav", "onoffCaption": "tapping clicking clanking at 3.687-7.127", "frequencyCaption": "tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_1475.wav", "onoffCaption": "dog barking at 2.524-4.524 and duck quacking at 2.891-4.891, 5.41-7.41", "frequencyCaption": "dog barking one times and duck quacking two times"} +{"filepath": "data/multi_event_train/syn_1490.wav", "onoffCaption": "woman laughing at 0.474-3.269", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/multi_event_train/syn_1606.wav", "onoffCaption": "train horn at 3.052-6.372", "frequencyCaption": "train horn one times"} +{"filepath": "data/multi_event_train/syn_1924.wav", "onoffCaption": "woman laughing at 0.787-3.353, 4.339-6.905 and door knocking at 2.189-5.957", "frequencyCaption": "woman laughing two times and door knocking one times"} +{"filepath": "data/multi_event_train/syn_1971.wav", "onoffCaption": "duck quacking at 0.795-2.795, 4.693-6.693", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_1994.wav", "onoffCaption": "burping belching at 0.079-2.11", "frequencyCaption": "burping belching one times"} +{"filepath": "data/multi_event_train/syn_3014.wav", "onoffCaption": "duck quacking at 2.193-4.193, 4.922-6.922, 7.799-9.799", "frequencyCaption": "duck quacking three times"} +{"filepath": "data/multi_event_train/syn_3041.wav", "onoffCaption": "door slamming at 1.161-2.926 and whistling at 7.103-10.0", "frequencyCaption": "door slamming one times and whistling one times"} +{"filepath": "data/multi_event_train/syn_3232.wav", "onoffCaption": "cat meowing at 2.11-4.05, 5.92-7.86", "frequencyCaption": "cat meowing two times"} +{"filepath": "data/multi_event_train/syn_3267.wav", "onoffCaption": "thump thud at 0.009-4.459, 6.04-8.228", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_3329.wav", "onoffCaption": "thump thud at 0.967-4.014, 6.031-9.078", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_3417.wav", "onoffCaption": "spraying at 0.093-1.034", "frequencyCaption": "spraying one times"} +{"filepath": "data/multi_event_train/syn_3442.wav", "onoffCaption": "gunshot at 0.47-2.47, 4.029-6.029 and cow mooing at 4.86-7.842", "frequencyCaption": "gunshot two times and cow mooing one times"} +{"filepath": "data/multi_event_train/syn_3559.wav", "onoffCaption": "burping belching at 0.082-2.176, 4.473-7.294 and woman laughing at 0.393-2.676", "frequencyCaption": "burping belching two times and woman laughing one times"} +{"filepath": "data/multi_event_train/syn_3631.wav", "onoffCaption": "sneeze at 3.473-4.761, 6.665-9.279", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_3664.wav", "onoffCaption": "tapping clicking clanking at 2.176-5.616, 6.881-9.797", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_3681.wav", "onoffCaption": "tapping clicking clanking at 0.021-3.461", "frequencyCaption": "tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_3808.wav", "onoffCaption": "car horn honking at 0.792-3.139 and woman laughing at 1.428-3.783, 4.399-7.212", "frequencyCaption": "car horn honking one times and woman laughing two times"} +{"filepath": "data/multi_event_train/syn_3913.wav", "onoffCaption": "train horn at 0.685-4.045", "frequencyCaption": "train horn one times"} +{"filepath": "data/multi_event_train/syn_3946.wav", "onoffCaption": "car horn honking at 0.998-3.911 and thump thud at 2.478-6.396", "frequencyCaption": "car horn honking one times and thump thud one times"} +{"filepath": "data/multi_event_train/syn_78.wav", "onoffCaption": "door slamming at 2.514-3.494, 4.288-5.268, 6.573-7.553 and thump thud at 3.927-6.698", "frequencyCaption": "door slamming three times and thump thud one times"} +{"filepath": "data/multi_event_train/syn_237.wav", "onoffCaption": "dog barking at 1.736-3.736, 5.817-7.817 and door slamming at 3.506-4.006, 4.74-5.24", "frequencyCaption": "dog barking two times and door slamming two times"} +{"filepath": "data/multi_event_train/syn_262.wav", "onoffCaption": "explosion at 0.161-2.879", "frequencyCaption": "explosion one times"} +{"filepath": "data/multi_event_train/syn_278.wav", "onoffCaption": "whistling at 0.888-3.863, 5.403-8.378 and spraying at 1.588-3.98", "frequencyCaption": "whistling two times and spraying one times"} +{"filepath": "data/multi_event_train/syn_287.wav", "onoffCaption": "train horn at 2.767-6.887", "frequencyCaption": "train horn one times"} +{"filepath": "data/multi_event_train/syn_363.wav", "onoffCaption": "car horn honking at 0.079-2.897, 4.356-7.174 and thump thud at 5.249-7.588", "frequencyCaption": "car horn honking two times and thump thud one times"} +{"filepath": "data/multi_event_train/syn_379.wav", "onoffCaption": "cat meowing at 0.541-3.445 and sheep goat bleating at 3.471-5.471, 7.835-9.835 and burping belching at 5.151-8.651", "frequencyCaption": "cat meowing one times and sheep goat bleating two times and burping belching one times"} +{"filepath": "data/multi_event_train/syn_386.wav", "onoffCaption": "tapping clicking clanking at 3.089-6.529", "frequencyCaption": "tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_447.wav", "onoffCaption": "door knocking at 1.369-7.429 and cat meowing at 1.777-3.725, 5.36-7.308", "frequencyCaption": "door knocking one times and cat meowing two times"} +{"filepath": "data/multi_event_train/syn_634.wav", "onoffCaption": "sheep goat bleating at 3.048-5.048, 6.235-8.355 and woman laughing at 4.232-6.348, 7.658-9.852", "frequencyCaption": "sheep goat bleating two times and woman laughing two times"} +{"filepath": "data/multi_event_train/syn_661.wav", "onoffCaption": "car horn honking at 0.639-5.039, 6.08-8.118", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_684.wav", "onoffCaption": "whistling at 1.082-8.737", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_735.wav", "onoffCaption": "thump thud at 2.444-4.672, 5.658-7.997 and gunshot at 2.509-4.528", "frequencyCaption": "thump thud two times and gunshot one times"} +{"filepath": "data/multi_event_train/syn_858.wav", "onoffCaption": "woman laughing at 1.337-3.62 and spraying at 6.255-7.339", "frequencyCaption": "woman laughing one times and spraying one times"} +{"filepath": "data/multi_event_train/syn_916.wav", "onoffCaption": "duck quacking at 0.154-2.154, 3.43-5.43", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_943.wav", "onoffCaption": "thump thud at 0.617-4.992", "frequencyCaption": "thump thud one times"} +{"filepath": "data/multi_event_train/syn_1089.wav", "onoffCaption": "duck quacking at 0.838-2.838, 4.658-6.658 and sheep goat bleating at 3.276-5.276", "frequencyCaption": "duck quacking two times and sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_1177.wav", "onoffCaption": "door slamming at 0.485-2.398, 4.742-7.703", "frequencyCaption": "door slamming two times"} +{"filepath": "data/multi_event_train/syn_1192.wav", "onoffCaption": "burping belching at 3.097-7.097", "frequencyCaption": "burping belching one times"} +{"filepath": "data/multi_event_train/syn_1205.wav", "onoffCaption": "door slamming at 1.534-2.434, 3.424-4.324 and burping belching at 6.945-9.945", "frequencyCaption": "door slamming two times and burping belching one times"} +{"filepath": "data/multi_event_train/syn_1304.wav", "onoffCaption": "sneeze at 0.136-2.382, 3.132-5.378, 6.294-8.54 and cat meowing at 3.431-5.391, 6.289-8.237", "frequencyCaption": "sneeze three times and cat meowing two times"} +{"filepath": "data/multi_event_train/syn_1351.wav", "onoffCaption": "door slamming at 0.002-1.002, 2.305-4.305", "frequencyCaption": "door slamming two times"} +{"filepath": "data/multi_event_train/syn_1521.wav", "onoffCaption": "sheep goat bleating at 0.099-2.099 and whistling at 0.517-8.267", "frequencyCaption": "sheep goat bleating one times and whistling one times"} +{"filepath": "data/multi_event_train/syn_1591.wav", "onoffCaption": "cow mooing at 0.046-3.015, 4.496-7.465, 7.986-10.0 and door slamming at 0.486-1.719", "frequencyCaption": "cow mooing three times and door slamming one times"} +{"filepath": "data/multi_event_train/syn_1649.wav", "onoffCaption": "sheep goat bleating at 1.382-3.382 and duck quacking at 6.325-8.325", "frequencyCaption": "sheep goat bleating one times and duck quacking one times"} +{"filepath": "data/multi_event_train/syn_1653.wav", "onoffCaption": "duck quacking at 2.236-4.236", "frequencyCaption": "duck quacking one times"} +{"filepath": "data/multi_event_train/syn_1707.wav", "onoffCaption": "train horn at 0.201-3.001 and car horn honking at 0.44-2.44", "frequencyCaption": "train horn one times and car horn honking one times"} +{"filepath": "data/multi_event_train/syn_1748.wav", "onoffCaption": "car horn honking at 1.705-5.292 and cow mooing at 2.18-7.16", "frequencyCaption": "car horn honking one times and cow mooing one times"} +{"filepath": "data/multi_event_train/syn_1752.wav", "onoffCaption": "burping belching at 1.373-5.242", "frequencyCaption": "burping belching one times"} +{"filepath": "data/multi_event_train/syn_1825.wav", "onoffCaption": "door knocking at 2.788-5.037, 6.324-8.552", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_1870.wav", "onoffCaption": "duck quacking at 2.886-4.886, 7.079-9.079", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_3115.wav", "onoffCaption": "train horn at 1.259-4.499, 5.745-8.4", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_3140.wav", "onoffCaption": "whistling at 0.617-2.626, 3.818-6.71 and door knocking at 2.508-5.132, 6.443-9.067", "frequencyCaption": "whistling two times and door knocking two times"} +{"filepath": "data/multi_event_train/syn_3228.wav", "onoffCaption": "duck quacking at 1.877-3.877", "frequencyCaption": "duck quacking one times"} +{"filepath": "data/multi_event_train/syn_3282.wav", "onoffCaption": "cat meowing at 0.19-1.745, 3.887-5.442 and woman laughing at 7.712-9.81", "frequencyCaption": "cat meowing two times and woman laughing one times"} +{"filepath": "data/multi_event_train/syn_3298.wav", "onoffCaption": "door slamming at 3.537-6.511", "frequencyCaption": "door slamming one times"} +{"filepath": "data/multi_event_train/syn_3333.wav", "onoffCaption": "door slamming at 0.342-1.515, 2.303-3.452, 4.241-6.437", "frequencyCaption": "door slamming three times"} +{"filepath": "data/multi_event_train/syn_3366.wav", "onoffCaption": "car horn honking at 3.033-7.282", "frequencyCaption": "car horn honking one times"} +{"filepath": "data/multi_event_train/syn_3383.wav", "onoffCaption": "dog barking at 0.353-5.99 and door slamming at 5.231-7.594", "frequencyCaption": "dog barking one times and door slamming one times"} +{"filepath": "data/multi_event_train/syn_3399.wav", "onoffCaption": "woman laughing at 1.997-5.284 and car horn honking at 4.179-8.02", "frequencyCaption": "woman laughing one times and car horn honking one times"} +{"filepath": "data/multi_event_train/syn_3458.wav", "onoffCaption": "gunshot at 2.555-4.555, 6.02-8.02", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_3516.wav", "onoffCaption": "thump thud at 1.147-3.918 and sneeze at 5.679-9.327", "frequencyCaption": "thump thud one times and sneeze one times"} +{"filepath": "data/multi_event_train/syn_3543.wav", "onoffCaption": "car horn honking at 1.374-5.028, 5.948-8.323", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_3730.wav", "onoffCaption": "duck quacking at 2.802-4.802, 5.763-7.763", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_3765.wav", "onoffCaption": "cow mooing at 2.874-5.843 and cat meowing at 6.157-7.184", "frequencyCaption": "cow mooing one times and cat meowing one times"} +{"filepath": "data/multi_event_train/syn_3780.wav", "onoffCaption": "sheep goat bleating at 1.923-5.563, 7.028-10.0", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_3847.wav", "onoffCaption": "train horn at 2.831-7.733 and car horn honking at 3.775-8.024", "frequencyCaption": "train horn one times and car horn honking one times"} +{"filepath": "data/multi_event_train/syn_213.wav", "onoffCaption": "sheep goat bleating at 4.077-6.077 and whistling at 4.832-7.807 and woman laughing at 5.713-8.132", "frequencyCaption": "sheep goat bleating one times and whistling one times and woman laughing one times"} +{"filepath": "data/multi_event_train/syn_246.wav", "onoffCaption": "gunshot at 1.206-3.206", "frequencyCaption": "gunshot one times"} +{"filepath": "data/multi_event_train/syn_308.wav", "onoffCaption": "thump thud at 2.127-4.898, 7.007-9.507", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_412.wav", "onoffCaption": "cat meowing at 0.117-1.129 and woman laughing at 2.319-4.435", "frequencyCaption": "cat meowing one times and woman laughing one times"} +{"filepath": "data/multi_event_train/syn_463.wav", "onoffCaption": "sneeze at 1.924-3.027, 3.851-4.954 and explosion at 7.605-10.0", "frequencyCaption": "sneeze two times and explosion one times"} +{"filepath": "data/multi_event_train/syn_486.wav", "onoffCaption": "tapping clicking clanking at 2.505-5.945, 7.953-10.0", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_509.wav", "onoffCaption": "thump thud at 1.212-4.879, 5.485-8.191", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_578.wav", "onoffCaption": "explosion at 1.569-4.569 and door slamming at 6.423-8.852", "frequencyCaption": "explosion one times and door slamming one times"} +{"filepath": "data/multi_event_train/syn_610.wav", "onoffCaption": "tapping clicking clanking at 1.008-4.448, 6.821-10.0", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_645.wav", "onoffCaption": "gunshot at 2.581-4.581, 5.606-7.606", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_899.wav", "onoffCaption": "door slamming at 1.574-2.965, 4.028-5.419, 6.017-7.408", "frequencyCaption": "door slamming three times"} +{"filepath": "data/multi_event_train/syn_967.wav", "onoffCaption": "explosion at 2.942-7.863", "frequencyCaption": "explosion one times"} +{"filepath": "data/multi_event_train/syn_982.wav", "onoffCaption": "tapping clicking clanking at 3.045-6.485", "frequencyCaption": "tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_1039.wav", "onoffCaption": "gunshot at 1.37-3.37, 4.286-6.286, 7.048-9.048", "frequencyCaption": "gunshot three times"} +{"filepath": "data/multi_event_train/syn_1048.wav", "onoffCaption": "woman laughing at 0.025-3.11", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/multi_event_train/syn_1122.wav", "onoffCaption": "thump thud at 3.834-8.284", "frequencyCaption": "thump thud one times"} +{"filepath": "data/multi_event_train/syn_1153.wav", "onoffCaption": "sneeze at 0.825-2.437, 4.345-5.957 and dog barking at 2.113-4.513", "frequencyCaption": "sneeze two times and dog barking one times"} +{"filepath": "data/multi_event_train/syn_1320.wav", "onoffCaption": "cow mooing at 0.004-3.302", "frequencyCaption": "cow mooing one times"} +{"filepath": "data/multi_event_train/syn_1375.wav", "onoffCaption": "explosion at 3.847-6.375, 7.628-10.0", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_1390.wav", "onoffCaption": "sheep goat bleating at 0.762-2.762, 4.683-6.683", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_1505.wav", "onoffCaption": "tapping clicking clanking at 0.221-3.661, 5.651-8.267", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_1574.wav", "onoffCaption": "cow mooing at 0.572-3.582, 5.412-8.311", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_1638.wav", "onoffCaption": "cow mooing at 1.479-4.777, 5.348-8.33 and duck quacking at 4.231-6.231, 7.487-9.487", "frequencyCaption": "cow mooing two times and duck quacking two times"} +{"filepath": "data/multi_event_train/syn_1688.wav", "onoffCaption": "cat meowing at 0.14-1.184 and burping belching at 4.809-6.916", "frequencyCaption": "cat meowing one times and burping belching one times"} +{"filepath": "data/multi_event_train/syn_1723.wav", "onoffCaption": "cow mooing at 0.814-3.824, 5.162-8.172", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_1776.wav", "onoffCaption": "train horn at 2.256-6.324 and tapping clicking clanking at 2.356-5.796, 6.811-9.098", "frequencyCaption": "train horn one times and tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_1793.wav", "onoffCaption": "sheep goat bleating at 0.474-3.77, 4.683-7.979", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_1801.wav", "onoffCaption": "thump thud at 1.867-4.329, 5.915-8.377", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_1895.wav", "onoffCaption": "thump thud at 3.141-6.188", "frequencyCaption": "thump thud one times"} +{"filepath": "data/multi_event_train/syn_3131.wav", "onoffCaption": "sheep goat bleating at 0.574-2.574, 5.046-7.046 and burping belching at 0.963-4.523", "frequencyCaption": "sheep goat bleating two times and burping belching one times"} +{"filepath": "data/multi_event_train/syn_3259.wav", "onoffCaption": "cat meowing at 1.523-4.553", "frequencyCaption": "cat meowing one times"} +{"filepath": "data/multi_event_train/syn_3317.wav", "onoffCaption": "tapping clicking clanking at 2.77-6.21", "frequencyCaption": "tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_3342.wav", "onoffCaption": "thump thud at 0.556-3.603, 4.335-7.382", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_3499.wav", "onoffCaption": "thump thud at 0.149-3.816", "frequencyCaption": "thump thud one times"} +{"filepath": "data/multi_event_train/syn_3567.wav", "onoffCaption": "woman laughing at 1.141-3.246", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/multi_event_train/syn_3582.wav", "onoffCaption": "train horn at 2.172-4.639, 6.039-8.506 and thump thud at 7.093-9.555", "frequencyCaption": "train horn two times and thump thud one times"} +{"filepath": "data/multi_event_train/syn_3714.wav", "onoffCaption": "spraying at 1.752-2.356, 3.004-3.945, 4.761-7.197", "frequencyCaption": "spraying three times"} +{"filepath": "data/multi_event_train/syn_3741.wav", "onoffCaption": "thump thud at 2.691-6.609", "frequencyCaption": "thump thud one times"} +{"filepath": "data/multi_event_train/syn_3812.wav", "onoffCaption": "woman laughing at 1.805-4.005, 5.087-7.287", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_3863.wav", "onoffCaption": "sneeze at 0.002-1.328, 2.29-5.202 and gunshot at 7.546-9.546", "frequencyCaption": "sneeze two times and gunshot one times"} +{"filepath": "data/multi_event_train/syn_3886.wav", "onoffCaption": "whistling at 0.26-6.542, 7.991-10.0", "frequencyCaption": "whistling two times"} +{"filepath": "data/multi_event_train/syn_3909.wav", "onoffCaption": "woman laughing at 1.825-3.923, 4.569-6.667, 7.773-9.871", "frequencyCaption": "woman laughing three times"} +{"filepath": "data/multi_event_train/syn_3978.wav", "onoffCaption": "cat meowing at 0.014-1.598, 3.728-5.312", "frequencyCaption": "cat meowing two times"} +{"filepath": "data/multi_event_train/syn_13.wav", "onoffCaption": "train horn at 1.267-6.125", "frequencyCaption": "train horn one times"} +{"filepath": "data/multi_event_train/syn_161.wav", "onoffCaption": "duck quacking at 2.251-4.251, 5.882-7.882", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_184.wav", "onoffCaption": "door slamming at 1.129-4.09, 5.351-8.312", "frequencyCaption": "door slamming two times"} +{"filepath": "data/multi_event_train/syn_209.wav", "onoffCaption": "cow mooing at 0.235-3.217, 4.196-7.178 and duck quacking at 0.314-2.314, 3.528-5.528, 6.184-8.184", "frequencyCaption": "cow mooing two times and duck quacking three times"} +{"filepath": "data/multi_event_train/syn_312.wav", "onoffCaption": "door slamming at 2.09-4.548, 5.452-7.91", "frequencyCaption": "door slamming two times"} +{"filepath": "data/multi_event_train/syn_436.wav", "onoffCaption": "sneeze at 0.157-1.84, 2.888-4.801, 5.695-6.859 and explosion at 1.858-6.452", "frequencyCaption": "sneeze three times and explosion one times"} +{"filepath": "data/multi_event_train/syn_537.wav", "onoffCaption": "thump thud at 0.307-4.682, 5.581-9.956", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_744.wav", "onoffCaption": "car horn honking at 0.921-3.834, 6.306-9.219", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_829.wav", "onoffCaption": "thump thud at 1.369-5.036, 5.978-8.234", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_833.wav", "onoffCaption": "thump thud at 0.767-3.106, 4.114-6.453, 7.09-9.429", "frequencyCaption": "thump thud three times"} +{"filepath": "data/multi_event_train/syn_866.wav", "onoffCaption": "sheep goat bleating at 0.53-4.45", "frequencyCaption": "sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_928.wav", "onoffCaption": "tapping clicking clanking at 1.238-4.678, 6.197-8.498", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_932.wav", "onoffCaption": "woman laughing at 1.002-4.074, 6.289-9.361", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_1007.wav", "onoffCaption": "door slamming at 1.093-3.093, 4.313-6.313", "frequencyCaption": "door slamming two times"} +{"filepath": "data/multi_event_train/syn_1106.wav", "onoffCaption": "car horn honking at 3.019-6.194, 7.499-10.0", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_1274.wav", "onoffCaption": "duck quacking at 1.809-3.809 and whistling at 5.361-10.0", "frequencyCaption": "duck quacking one times and whistling one times"} +{"filepath": "data/multi_event_train/syn_1291.wav", "onoffCaption": "train horn at 0.326-3.726, 4.95-7.162", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_1451.wav", "onoffCaption": "thump thud at 0.034-3.952, 4.674-7.174 and spraying at 3.911-5.644, 6.476-8.912", "frequencyCaption": "thump thud two times and spraying two times"} +{"filepath": "data/multi_event_train/syn_1550.wav", "onoffCaption": "cow mooing at 2.579-5.589, 7.075-10.0", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_1622.wav", "onoffCaption": "door slamming at 0.324-2.782, 3.662-6.12, 7.194-9.652", "frequencyCaption": "door slamming three times"} +{"filepath": "data/multi_event_train/syn_1739.wav", "onoffCaption": "train horn at 0.383-10.0 and sneeze at 3.175-4.421, 5.109-6.355, 7.415-8.661", "frequencyCaption": "train horn one times and sneeze three times"} +{"filepath": "data/multi_event_train/syn_1854.wav", "onoffCaption": "spraying at 0.11-0.685, 1.718-2.622, 3.534-4.266", "frequencyCaption": "spraying three times"} +{"filepath": "data/multi_event_train/syn_1955.wav", "onoffCaption": "spraying at 2.243-2.818 and explosion at 2.78-5.78", "frequencyCaption": "spraying one times and explosion one times"} +{"filepath": "data/multi_event_train/syn_3030.wav", "onoffCaption": "door knocking at 0.001-2.848, 4.27-7.117", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_3065.wav", "onoffCaption": "car horn honking at 2.783-6.624, 7.492-10.0", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_3080.wav", "onoffCaption": "sheep goat bleating at 0.185-3.825", "frequencyCaption": "sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_3164.wav", "onoffCaption": "door slamming at 3.034-5.492, 6.137-7.678", "frequencyCaption": "door slamming two times"} +{"filepath": "data/multi_event_train/syn_3181.wav", "onoffCaption": "thump thud at 0.103-4.553 and cat meowing at 1.125-3.102", "frequencyCaption": "thump thud one times and cat meowing one times"} +{"filepath": "data/multi_event_train/syn_3216.wav", "onoffCaption": "whistling at 3.804-10.0", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_3429.wav", "onoffCaption": "door knocking at 0.197-3.317, 5.04-7.55", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_3433.wav", "onoffCaption": "sheep goat bleating at 1.432-3.432, 5.775-7.775", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_3466.wav", "onoffCaption": "tapping clicking clanking at 0.407-3.847, 5.443-8.883", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_3528.wav", "onoffCaption": "tapping clicking clanking at 3.215-6.655, 7.623-10.0", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_3532.wav", "onoffCaption": "gunshot at 1.231-3.231, 4.12-6.12 and explosion at 7.291-10.0", "frequencyCaption": "gunshot two times and explosion one times"} +{"filepath": "data/multi_event_train/syn_3640.wav", "onoffCaption": "sneeze at 0.411-2.524 and dog barking at 0.973-2.973 and spraying at 5.929-6.556, 7.556-8.183, 9.228-9.855", "frequencyCaption": "sneeze one times and dog barking one times and spraying three times"} +{"filepath": "data/multi_event_train/syn_3836.wav", "onoffCaption": "cat meowing at 0.111-1.666, 2.677-4.232, 6.241-7.796", "frequencyCaption": "cat meowing three times"} +{"filepath": "data/multi_event_train/syn_3937.wav", "onoffCaption": "car horn honking at 0.116-3.029, 3.973-6.427, 7.604-9.604", "frequencyCaption": "car horn honking three times"} +{"filepath": "data/multi_event_train/syn_46.wav", "onoffCaption": "duck quacking at 1.528-3.528", "frequencyCaption": "duck quacking one times"} +{"filepath": "data/multi_event_train/syn_134.wav", "onoffCaption": "sheep goat bleating at 3.009-7.729", "frequencyCaption": "sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_222.wav", "onoffCaption": "spraying at 0.061-0.569, 1.14-2.081", "frequencyCaption": "spraying two times"} +{"filepath": "data/multi_event_train/syn_339.wav", "onoffCaption": "car horn honking at 1.248-3.248, 3.785-5.785 and spraying at 1.875-2.939 and gunshot at 3.353-5.353", "frequencyCaption": "car horn honking two times and spraying one times and gunshot one times"} +{"filepath": "data/multi_event_train/syn_347.wav", "onoffCaption": "door knocking at 2.775-5.895, 6.592-9.712", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_407.wav", "onoffCaption": "duck quacking at 1.025-3.025, 4.357-6.357, 7.309-9.309", "frequencyCaption": "duck quacking three times"} +{"filepath": "data/multi_event_train/syn_479.wav", "onoffCaption": "car horn honking at 2.446-6.033", "frequencyCaption": "car horn honking one times"} +{"filepath": "data/multi_event_train/syn_562.wav", "onoffCaption": "door knocking at 2.335-4.462", "frequencyCaption": "door knocking one times"} +{"filepath": "data/multi_event_train/syn_587.wav", "onoffCaption": "duck quacking at 2.07-4.07, 5.856-7.856", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_674.wav", "onoffCaption": "burping belching at 0.475-2.573, 3.598-6.116, 6.89-9.015", "frequencyCaption": "burping belching three times"} +{"filepath": "data/multi_event_train/syn_691.wav", "onoffCaption": "explosion at 0.294-4.888, 6.62-10.0", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_711.wav", "onoffCaption": "woman laughing at 2.479-4.716, 6.449-8.729 and car horn honking at 3.55-6.063", "frequencyCaption": "woman laughing two times and car horn honking one times"} +{"filepath": "data/multi_event_train/syn_883.wav", "onoffCaption": "woman laughing at 1.491-3.975, 4.989-7.473", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_998.wav", "onoffCaption": "tapping clicking clanking at 0.324-3.764 and car horn honking at 6.065-8.851", "frequencyCaption": "tapping clicking clanking one times and car horn honking one times"} +{"filepath": "data/multi_event_train/syn_1052.wav", "onoffCaption": "woman laughing at 1.457-8.902", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/multi_event_train/syn_1149.wav", "onoffCaption": "car horn honking at 0.002-4.909, 5.614-10.0", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_1221.wav", "onoffCaption": "sneeze at 0.416-4.916 and cat meowing at 6.14-8.328", "frequencyCaption": "sneeze one times and cat meowing one times"} +{"filepath": "data/multi_event_train/syn_1344.wav", "onoffCaption": "cow mooing at 0.37-3.352 and train horn at 6.332-9.866", "frequencyCaption": "cow mooing one times and train horn one times"} +{"filepath": "data/multi_event_train/syn_1404.wav", "onoffCaption": "cow mooing at 0.174-3.156, 3.958-6.94, 7.572-10.0", "frequencyCaption": "cow mooing three times"} +{"filepath": "data/multi_event_train/syn_1609.wav", "onoffCaption": "tapping clicking clanking at 3.452-6.892", "frequencyCaption": "tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_1677.wav", "onoffCaption": "tapping clicking clanking at 0.138-3.578 and gunshot at 1.678-3.678, 4.883-7.389", "frequencyCaption": "tapping clicking clanking one times and gunshot two times"} +{"filepath": "data/multi_event_train/syn_1692.wav", "onoffCaption": "sneeze at 2.938-4.169 and thump thud at 6.382-9.153", "frequencyCaption": "sneeze one times and thump thud one times"} +{"filepath": "data/multi_event_train/syn_1712.wav", "onoffCaption": "explosion at 4.023-7.023", "frequencyCaption": "explosion one times"} +{"filepath": "data/multi_event_train/syn_1789.wav", "onoffCaption": "cat meowing at 2.803-4.387, 5.33-6.516 and cow mooing at 4.765-7.775", "frequencyCaption": "cat meowing two times and cow mooing one times"} +{"filepath": "data/multi_event_train/syn_1880.wav", "onoffCaption": "spraying at 1.107-3.543", "frequencyCaption": "spraying one times"} +{"filepath": "data/multi_event_train/syn_1900.wav", "onoffCaption": "cat meowing at 0.685-2.256 and duck quacking at 5.166-7.166", "frequencyCaption": "cat meowing one times and duck quacking one times"} +{"filepath": "data/multi_event_train/syn_3243.wav", "onoffCaption": "dog barking at 2.647-4.647", "frequencyCaption": "dog barking one times"} +{"filepath": "data/multi_event_train/syn_3326.wav", "onoffCaption": "door knocking at 1.874-6.17 and dog barking at 2.187-4.187", "frequencyCaption": "door knocking one times and dog barking one times"} +{"filepath": "data/multi_event_train/syn_3358.wav", "onoffCaption": "thump thud at 0.013-2.241", "frequencyCaption": "thump thud one times"} +{"filepath": "data/multi_event_train/syn_3483.wav", "onoffCaption": "sneeze at 1.519-3.98, 5.122-7.583 and door slamming at 5.232-7.595, 8.514-9.747", "frequencyCaption": "sneeze two times and door slamming two times"} +{"filepath": "data/multi_event_train/syn_3598.wav", "onoffCaption": "train horn at 3.652-7.652", "frequencyCaption": "train horn one times"} +{"filepath": "data/multi_event_train/syn_3615.wav", "onoffCaption": "cat meowing at 2.269-3.84, 5.6-7.171, 7.761-9.332", "frequencyCaption": "cat meowing three times"} +{"filepath": "data/multi_event_train/syn_3770.wav", "onoffCaption": "spraying at 2.216-3.463, 4.491-7.075", "frequencyCaption": "spraying two times"} +{"filepath": "data/multi_event_train/syn_3795.wav", "onoffCaption": "whistling at 0.521-9.132", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_3807.wav", "onoffCaption": "door knocking at 0.858-2.948 and gunshot at 1.961-4.131", "frequencyCaption": "door knocking one times and gunshot one times"} +{"filepath": "data/multi_event_train/syn_3879.wav", "onoffCaption": "duck quacking at 0.504-2.504, 3.187-5.187", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_3962.wav", "onoffCaption": "door slamming at 0.085-0.936 and dog barking at 0.218-2.218", "frequencyCaption": "door slamming one times and dog barking one times"} +{"filepath": "data/multi_event_train/syn_3987.wav", "onoffCaption": "woman laughing at 0.024-2.616, 4.505-6.534 and cow mooing at 1.114-4.124, 5.734-8.744 and explosion at 1.707-4.575", "frequencyCaption": "woman laughing two times and cow mooing two times and explosion one times"} +{"filepath": "data/multi_event_train/syn_38.wav", "onoffCaption": "whistling at 2.329-10.0", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_77.wav", "onoffCaption": "whistling at 0.632-8.382", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_88.wav", "onoffCaption": "explosion at 0.507-5.507, 6.588-10.0", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_105.wav", "onoffCaption": "sheep goat bleating at 2.711-4.711, 5.831-7.831", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_277.wav", "onoffCaption": "sheep goat bleating at 3.395-5.395, 6.002-8.002 and door knocking at 4.127-8.277", "frequencyCaption": "sheep goat bleating two times and door knocking one times"} +{"filepath": "data/multi_event_train/syn_292.wav", "onoffCaption": "woman laughing at 2.321-4.437, 6.796-8.912", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_389.wav", "onoffCaption": "woman laughing at 2.815-5.234", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/multi_event_train/syn_448.wav", "onoffCaption": "burping belching at 2.701-5.701, 6.788-9.788", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_452.wav", "onoffCaption": "cow mooing at 1.714-5.012, 7.457-9.998 and train horn at 1.894-4.374", "frequencyCaption": "cow mooing two times and train horn one times"} +{"filepath": "data/multi_event_train/syn_549.wav", "onoffCaption": "train horn at 2.855-7.932 and dog barking at 5.735-7.735", "frequencyCaption": "train horn one times and dog barking one times"} +{"filepath": "data/multi_event_train/syn_553.wav", "onoffCaption": "door knocking at 0.301-2.489, 3.653-5.841", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_621.wav", "onoffCaption": "thump thud at 3.315-6.086, 7.481-10.0", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_818.wav", "onoffCaption": "door knocking at 0.557-2.684 and spraying at 5.889-7.064, 8.823-9.998", "frequencyCaption": "door knocking one times and spraying two times"} +{"filepath": "data/multi_event_train/syn_903.wav", "onoffCaption": "car horn honking at 0.806-4.393", "frequencyCaption": "car horn honking one times"} +{"filepath": "data/multi_event_train/syn_956.wav", "onoffCaption": "door slamming at 0.088-1.066, 1.694-2.532, 3.141-5.141", "frequencyCaption": "door slamming three times"} +{"filepath": "data/multi_event_train/syn_1063.wav", "onoffCaption": "woman laughing at 2.638-5.733, 6.69-8.916", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_1079.wav", "onoffCaption": "cat meowing at 0.552-2.688, 5.012-6.159, 7.267-10.0", "frequencyCaption": "cat meowing three times"} +{"filepath": "data/multi_event_train/syn_1137.wav", "onoffCaption": "whistling at 2.697-7.181, 7.71-10.0", "frequencyCaption": "whistling two times"} +{"filepath": "data/multi_event_train/syn_1162.wav", "onoffCaption": "door slamming at 0.678-3.678, 4.346-5.495 and train horn at 7.049-10.0", "frequencyCaption": "door slamming two times and train horn one times"} +{"filepath": "data/multi_event_train/syn_1178.wav", "onoffCaption": "thump thud at 3.23-7.148", "frequencyCaption": "thump thud one times"} +{"filepath": "data/multi_event_train/syn_1187.wav", "onoffCaption": "dog barking at 1.943-3.943, 4.593-6.593, 7.488-9.497", "frequencyCaption": "dog barking three times"} +{"filepath": "data/multi_event_train/syn_1311.wav", "onoffCaption": "spraying at 0.006-1.768, 3.268-5.03 and door knocking at 0.879-3.342, 4.739-7.202 and door slamming at 0.879-2.403, 2.917-4.441", "frequencyCaption": "spraying two times and door knocking two times and door slamming two times"} +{"filepath": "data/multi_event_train/syn_1435.wav", "onoffCaption": "burping belching at 1.147-3.473 and cow mooing at 1.605-4.574", "frequencyCaption": "burping belching one times and cow mooing one times"} +{"filepath": "data/multi_event_train/syn_1534.wav", "onoffCaption": "tapping clicking clanking at 2.592-6.032, 7.042-10.0", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_1561.wav", "onoffCaption": "sheep goat bleating at 0.655-2.655, 4.69-6.69", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_1584.wav", "onoffCaption": "woman laughing at 0.521-3.575 and tapping clicking clanking at 2.057-5.497, 7.773-10.0", "frequencyCaption": "woman laughing one times and tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_1747.wav", "onoffCaption": "door knocking at 0.364-3.201", "frequencyCaption": "door knocking one times"} +{"filepath": "data/multi_event_train/syn_1830.wav", "onoffCaption": "whistling at 1.244-4.119", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_1865.wav", "onoffCaption": "door slamming at 3.362-4.841", "frequencyCaption": "door slamming one times"} +{"filepath": "data/multi_event_train/syn_3001.wav", "onoffCaption": "door slamming at 0.226-1.399, 2.054-4.054", "frequencyCaption": "door slamming two times"} +{"filepath": "data/multi_event_train/syn_3100.wav", "onoffCaption": "woman laughing at 0.453-2.937 and dog barking at 1.239-3.239 and cat meowing at 5.498-7.438", "frequencyCaption": "woman laughing one times and dog barking one times and cat meowing one times"} +{"filepath": "data/multi_event_train/syn_3155.wav", "onoffCaption": "tapping clicking clanking at 1.428-4.868, 5.411-8.851 and dog barking at 2.667-4.667, 5.798-7.798", "frequencyCaption": "tapping clicking clanking two times and dog barking two times"} +{"filepath": "data/multi_event_train/syn_3268.wav", "onoffCaption": "door slamming at 0.163-2.944, 4.553-5.531", "frequencyCaption": "door slamming two times"} +{"filepath": "data/multi_event_train/syn_3373.wav", "onoffCaption": "door knocking at 0.756-5.458, 6.45-10.0", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_3396.wav", "onoffCaption": "spraying at 0.33-2.79, 3.695-6.155 and whistling at 0.506-8.345", "frequencyCaption": "spraying two times and whistling one times"} +{"filepath": "data/multi_event_train/syn_3418.wav", "onoffCaption": "tapping clicking clanking at 2.902-6.342, 7.418-10.0", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_3503.wav", "onoffCaption": "door knocking at 0.979-3.603, 5.118-7.245", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_3556.wav", "onoffCaption": "whistling at 1.284-6.459, 7.504-10.0", "frequencyCaption": "whistling two times"} +{"filepath": "data/multi_event_train/syn_3725.wav", "onoffCaption": "sheep goat bleating at 0.533-2.533, 4.782-7.45 and spraying at 3.404-4.308", "frequencyCaption": "sheep goat bleating two times and spraying one times"} +{"filepath": "data/multi_event_train/syn_3848.wav", "onoffCaption": "door slamming at 0.005-2.133 and duck quacking at 0.54-2.54, 3.892-5.892", "frequencyCaption": "door slamming one times and duck quacking two times"} +{"filepath": "data/multi_event_train/syn_3852.wav", "onoffCaption": "spraying at 0.81-1.31 and duck quacking at 3.213-5.213", "frequencyCaption": "spraying one times and duck quacking one times"} +{"filepath": "data/multi_event_train/syn_3949.wav", "onoffCaption": "door slamming at 2.945-5.726, 6.358-9.139", "frequencyCaption": "door slamming two times"} +{"filepath": "data/multi_event_train/syn_3953.wav", "onoffCaption": "door slamming at 1.582-3.778 and gunshot at 1.671-3.671, 4.86-6.86, 7.927-9.927 and explosion at 1.95-5.077, 5.781-8.781", "frequencyCaption": "door slamming one times and gunshot three times and explosion two times"} +{"filepath": "data/multi_event_train/syn_22.wav", "onoffCaption": "gunshot at 0.208-2.208, 3.513-5.513", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_92.wav", "onoffCaption": "train horn at 1.487-4.287 and car horn honking at 6.554-10.0", "frequencyCaption": "train horn one times and car horn honking one times"} +{"filepath": "data/multi_event_train/syn_121.wav", "onoffCaption": "dog barking at 0.074-2.074, 4.268-6.268", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_train/syn_150.wav", "onoffCaption": "cat meowing at 3.961-5.236, 7.028-8.612 and sheep goat bleating at 5.34-7.34", "frequencyCaption": "cat meowing two times and sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_238.wav", "onoffCaption": "cat meowing at 1.32-3.26, 5.223-6.489", "frequencyCaption": "cat meowing two times"} +{"filepath": "data/multi_event_train/syn_288.wav", "onoffCaption": "whistling at 2.216-6.7", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_323.wav", "onoffCaption": "woman laughing at 2.454-5.842", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/multi_event_train/syn_376.wav", "onoffCaption": "tapping clicking clanking at 0.29-3.73, 6.02-9.46", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_393.wav", "onoffCaption": "burping belching at 2.451-4.777, 5.569-7.604 and cow mooing at 2.659-5.957, 6.533-9.831", "frequencyCaption": "burping belching two times and cow mooing two times"} +{"filepath": "data/multi_event_train/syn_506.wav", "onoffCaption": "train horn at 0.082-2.756", "frequencyCaption": "train horn one times"} +{"filepath": "data/multi_event_train/syn_577.wav", "onoffCaption": "spraying at 3.475-4.05, 6.365-7.429", "frequencyCaption": "spraying two times"} +{"filepath": "data/multi_event_train/syn_720.wav", "onoffCaption": "train horn at 0.504-3.904, 6.135-8.272 and thump thud at 3.277-6.048, 6.725-9.496", "frequencyCaption": "train horn two times and thump thud two times"} +{"filepath": "data/multi_event_train/syn_775.wav", "onoffCaption": "cat meowing at 2.974-3.985, 4.638-5.649, 6.693-7.704", "frequencyCaption": "cat meowing three times"} +{"filepath": "data/multi_event_train/syn_790.wav", "onoffCaption": "explosion at 0.948-2.95, 3.893-6.893", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_802.wav", "onoffCaption": "woman laughing at 0.011-4.063", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/multi_event_train/syn_857.wav", "onoffCaption": "woman laughing at 1.419-4.214", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/multi_event_train/syn_896.wav", "onoffCaption": "door slamming at 2.591-5.565 and cat meowing at 8.569-9.879", "frequencyCaption": "door slamming one times and cat meowing one times"} +{"filepath": "data/multi_event_train/syn_919.wav", "onoffCaption": "explosion at 0.047-3.047 and train horn at 0.453-3.853, 4.62-6.757 and sheep goat bleating at 5.955-7.955", "frequencyCaption": "explosion one times and train horn two times and sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_1036.wav", "onoffCaption": "thump thud at 0.983-5.433, 7.61-10.0", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_1086.wav", "onoffCaption": "door slamming at 0.124-1.273 and explosion at 0.735-2.823, 4.138-6.226, 6.896-8.984", "frequencyCaption": "door slamming one times and explosion three times"} +{"filepath": "data/multi_event_train/syn_1210.wav", "onoffCaption": "spraying at 0.162-1.246, 1.853-2.937, 3.553-4.637 and burping belching at 0.281-7.449", "frequencyCaption": "spraying three times and burping belching one times"} +{"filepath": "data/multi_event_train/syn_1245.wav", "onoffCaption": "whistling at 2.827-5.056, 6.187-8.416", "frequencyCaption": "whistling two times"} +{"filepath": "data/multi_event_train/syn_1411.wav", "onoffCaption": "sneeze at 2.848-5.087", "frequencyCaption": "sneeze one times"} +{"filepath": "data/multi_event_train/syn_1460.wav", "onoffCaption": "thump thud at 0.766-3.537, 4.514-6.887", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_1485.wav", "onoffCaption": "train horn at 0.635-3.115, 4.967-7.447 and whistling at 2.075-4.95 and dog barking at 2.827-4.827, 5.893-7.893", "frequencyCaption": "train horn two times and whistling one times and dog barking two times"} +{"filepath": "data/multi_event_train/syn_1613.wav", "onoffCaption": "thump thud at 1.417-3.917, 6.277-9.048", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_1646.wav", "onoffCaption": "burping belching at 0.959-4.239, 6.326-8.357", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_1708.wav", "onoffCaption": "gunshot at 3.501-5.501, 6.563-8.563", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_1931.wav", "onoffCaption": "tapping clicking clanking at 2.988-6.428 and whistling at 3.277-5.286, 5.868-8.31", "frequencyCaption": "tapping clicking clanking one times and whistling two times"} +{"filepath": "data/multi_event_train/syn_1964.wav", "onoffCaption": "cat meowing at 2.184-3.916 and spraying at 5.222-6.126, 7.621-8.525", "frequencyCaption": "cat meowing one times and spraying two times"} +{"filepath": "data/multi_event_train/syn_1981.wav", "onoffCaption": "sneeze at 2.341-4.955 and spraying at 3.88-4.784, 6.693-9.277", "frequencyCaption": "sneeze one times and spraying two times"} +{"filepath": "data/multi_event_train/syn_3054.wav", "onoffCaption": "whistling at 3.252-10.0", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_3227.wav", "onoffCaption": "burping belching at 1.558-5.894", "frequencyCaption": "burping belching one times"} +{"filepath": "data/multi_event_train/syn_3272.wav", "onoffCaption": "burping belching at 0.208-2.534, 3.348-5.674", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_3297.wav", "onoffCaption": "train horn at 0.472-3.792 and duck quacking at 5.278-7.278, 7.887-9.887", "frequencyCaption": "train horn one times and duck quacking two times"} +{"filepath": "data/multi_event_train/syn_3369.wav", "onoffCaption": "spraying at 0.138-0.989, 2.815-5.251 and cat meowing at 0.541-5.541, 6.668-8.204 and car horn honking at 5.496-7.961", "frequencyCaption": "spraying two times and cat meowing two times and car horn honking one times"} +{"filepath": "data/multi_event_train/syn_3402.wav", "onoffCaption": "dog barking at 0.304-2.304, 3.917-5.917 and spraying at 3.578-4.228, 4.758-5.408 and duck quacking at 4.149-6.149", "frequencyCaption": "dog barking two times and spraying two times and duck quacking one times"} +{"filepath": "data/multi_event_train/syn_3457.wav", "onoffCaption": "car horn honking at 0.049-4.449 and door slamming at 2.436-4.436, 6.293-7.466", "frequencyCaption": "car horn honking one times and door slamming two times"} +{"filepath": "data/multi_event_train/syn_3519.wav", "onoffCaption": "woman laughing at 0.968-4.356, 6.084-8.682", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_3624.wav", "onoffCaption": "door knocking at 1.229-4.845 and cat meowing at 1.724-2.999 and spraying at 7.504-8.679", "frequencyCaption": "door knocking one times and cat meowing one times and spraying one times"} +{"filepath": "data/multi_event_train/syn_3671.wav", "onoffCaption": "door knocking at 3.586-5.898, 6.835-9.147", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_3694.wav", "onoffCaption": "dog barking at 1.084-3.084", "frequencyCaption": "dog barking one times"} +{"filepath": "data/multi_event_train/syn_3906.wav", "onoffCaption": "gunshot at 1.193-3.193, 5.106-7.106", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_3977.wav", "onoffCaption": "dog barking at 0.384-2.822 and train horn at 5.374-7.854", "frequencyCaption": "dog barking one times and train horn one times"} +{"filepath": "data/multi_event_train/syn_53.wav", "onoffCaption": "gunshot at 1.059-3.059", "frequencyCaption": "gunshot one times"} +{"filepath": "data/multi_event_train/syn_174.wav", "onoffCaption": "car horn honking at 2.435-5.93, 6.754-9.219", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_191.wav", "onoffCaption": "dog barking at 1.037-3.037, 4.049-6.049, 7.259-9.259", "frequencyCaption": "dog barking three times"} +{"filepath": "data/multi_event_train/syn_206.wav", "onoffCaption": "door slamming at 0.219-1.368, 2.751-3.9 and sheep goat bleating at 2.449-7.329", "frequencyCaption": "door slamming two times and sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_249.wav", "onoffCaption": "spraying at 1.489-3.949, 5.232-5.74, 6.275-7.025 and explosion at 3.381-8.302", "frequencyCaption": "spraying three times and explosion one times"} +{"filepath": "data/multi_event_train/syn_307.wav", "onoffCaption": "gunshot at 0.873-2.873, 4.967-6.967 and duck quacking at 4.095-6.095, 7.153-9.153", "frequencyCaption": "gunshot two times and duck quacking two times"} +{"filepath": "data/multi_event_train/syn_352.wav", "onoffCaption": "door knocking at 0.553-2.774, 3.711-5.932", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_439.wav", "onoffCaption": "car horn honking at 3.541-7.195", "frequencyCaption": "car horn honking one times"} +{"filepath": "data/multi_event_train/syn_489.wav", "onoffCaption": "burping belching at 0.236-3.236, 3.906-6.906, 7.783-10.0 and sheep goat bleating at 0.829-4.149", "frequencyCaption": "burping belching three times and sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_522.wav", "onoffCaption": "sneeze at 1.585-3.824, 4.766-7.005, 7.533-9.772", "frequencyCaption": "sneeze three times"} +{"filepath": "data/multi_event_train/syn_592.wav", "onoffCaption": "car horn honking at 3.597-6.523", "frequencyCaption": "car horn honking one times"} +{"filepath": "data/multi_event_train/syn_650.wav", "onoffCaption": "burping belching at 0.679-3.679, 4.496-7.496", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_704.wav", "onoffCaption": "spraying at 0.353-0.853, 2.125-2.625, 4.729-5.229 and train horn at 2.949-7.149", "frequencyCaption": "spraying three times and train horn one times"} +{"filepath": "data/multi_event_train/syn_751.wav", "onoffCaption": "dog barking at 0.236-2.236, 2.934-4.934, 6.462-8.462 and cat meowing at 0.323-1.598", "frequencyCaption": "dog barking three times and cat meowing one times"} +{"filepath": "data/multi_event_train/syn_826.wav", "onoffCaption": "cat meowing at 1.916-3.46 and tapping clicking clanking at 5.548-8.988", "frequencyCaption": "cat meowing one times and tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_873.wav", "onoffCaption": "burping belching at 0.036-3.905, 5.109-7.574 and whistling at 0.109-2.118, 2.672-4.681", "frequencyCaption": "burping belching two times and whistling two times"} +{"filepath": "data/multi_event_train/syn_968.wav", "onoffCaption": "burping belching at 2.876-5.876, 6.506-8.876 and door slamming at 8.343-9.343", "frequencyCaption": "burping belching two times and door slamming one times"} +{"filepath": "data/multi_event_train/syn_1012.wav", "onoffCaption": "cow mooing at 2.858-7.838", "frequencyCaption": "cow mooing one times"} +{"filepath": "data/multi_event_train/syn_1047.wav", "onoffCaption": "door knocking at 0.765-3.495 and cat meowing at 6.322-8.217", "frequencyCaption": "door knocking one times and cat meowing one times"} +{"filepath": "data/multi_event_train/syn_1109.wav", "onoffCaption": "door knocking at 0.19-2.317, 3.942-6.775", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_1234.wav", "onoffCaption": "woman laughing at 0.092-2.792, 4.844-7.544", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_1261.wav", "onoffCaption": "dog barking at 0.85-2.85, 3.551-5.551 and door knocking at 1.585-4.694, 5.258-8.367 and spraying at 2.752-4.88", "frequencyCaption": "dog barking two times and door knocking two times and spraying one times"} +{"filepath": "data/multi_event_train/syn_1284.wav", "onoffCaption": "dog barking at 0.114-2.114, 3.412-5.412 and cat meowing at 2.154-4.342 and gunshot at 2.395-4.395, 6.662-8.662", "frequencyCaption": "dog barking two times and cat meowing one times and gunshot two times"} +{"filepath": "data/multi_event_train/syn_1360.wav", "onoffCaption": "door knocking at 2.493-6.255, 7.002-10.0", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_1444.wav", "onoffCaption": "gunshot at 0.506-2.506 and burping belching at 0.555-5.555", "frequencyCaption": "gunshot one times and burping belching one times"} +{"filepath": "data/multi_event_train/syn_1637.wav", "onoffCaption": "tapping clicking clanking at 4.251-7.691", "frequencyCaption": "tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_1662.wav", "onoffCaption": "tapping clicking clanking at 0.128-3.568, 4.812-7.009, 7.776-9.841 and door slamming at 1.132-3.59, 5.164-7.622", "frequencyCaption": "tapping clicking clanking three times and door slamming two times"} +{"filepath": "data/multi_event_train/syn_1687.wav", "onoffCaption": "woman laughing at 0.062-7.507", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/multi_event_train/syn_1736.wav", "onoffCaption": "train horn at 0.102-3.502, 4.222-6.522", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_1779.wav", "onoffCaption": "cat meowing at 0.502-3.532, 5.329-8.359", "frequencyCaption": "cat meowing two times"} +{"filepath": "data/multi_event_train/syn_1915.wav", "onoffCaption": "train horn at 0.937-3.577, 5.909-8.278", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_1940.wav", "onoffCaption": "cat meowing at 3.102-4.251 and sheep goat bleating at 7.5-9.5", "frequencyCaption": "cat meowing one times and sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_3025.wav", "onoffCaption": "train horn at 0.202-3.002, 5.381-8.181", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_3070.wav", "onoffCaption": "spraying at 2.319-2.946, 5.07-5.697", "frequencyCaption": "spraying two times"} +{"filepath": "data/multi_event_train/syn_3095.wav", "onoffCaption": "door knocking at 1.645-4.182, 6.269-9.116", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_3203.wav", "onoffCaption": "train horn at 2.208-5.978 and door slamming at 7.84-9.231", "frequencyCaption": "train horn one times and door slamming one times"} +{"filepath": "data/multi_event_train/syn_3219.wav", "onoffCaption": "gunshot at 2.327-4.327, 6.446-8.446", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_3256.wav", "onoffCaption": "cat meowing at 2.573-4.144, 6.409-7.98 and sneeze at 3.459-4.713", "frequencyCaption": "cat meowing two times and sneeze one times"} +{"filepath": "data/multi_event_train/syn_3318.wav", "onoffCaption": "car horn honking at 0.37-4.77 and sneeze at 5.499-6.663, 7.188-8.352", "frequencyCaption": "car horn honking one times and sneeze two times"} +{"filepath": "data/multi_event_train/syn_3426.wav", "onoffCaption": "spraying at 2.304-4.888", "frequencyCaption": "spraying one times"} +{"filepath": "data/multi_event_train/syn_3473.wav", "onoffCaption": "train horn at 3.563-9.623", "frequencyCaption": "train horn one times"} +{"filepath": "data/multi_event_train/syn_3496.wav", "onoffCaption": "door knocking at 1.616-6.318", "frequencyCaption": "door knocking one times"} +{"filepath": "data/multi_event_train/syn_3568.wav", "onoffCaption": "spraying at 2.545-3.545, 4.18-5.18, 6.037-7.037", "frequencyCaption": "spraying three times"} +{"filepath": "data/multi_event_train/syn_3600.wav", "onoffCaption": "sneeze at 1.585-3.498, 4.742-7.001", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_3655.wav", "onoffCaption": "gunshot at 1.525-3.655, 4.353-6.353", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_3839.wav", "onoffCaption": "sheep goat bleating at 1.587-4.883, 6.121-8.121 and door knocking at 2.008-4.383, 5.019-7.248 and duck quacking at 5.02-7.02", "frequencyCaption": "sheep goat bleating two times and door knocking two times and duck quacking one times"} +{"filepath": "data/multi_event_train/syn_3889.wav", "onoffCaption": "sheep goat bleating at 0.727-4.647, 5.179-7.179", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_3922.wav", "onoffCaption": "tapping clicking clanking at 2.51-5.95, 6.743-9.167", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_3992.wav", "onoffCaption": "car horn honking at 0.694-4.943, 6.084-10.0", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_49.wav", "onoffCaption": "cat meowing at 2.928-4.545 and tapping clicking clanking at 6.788-10.0", "frequencyCaption": "cat meowing one times and tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_169.wav", "onoffCaption": "car horn honking at 3.743-8.65", "frequencyCaption": "car horn honking one times"} +{"filepath": "data/multi_event_train/syn_253.wav", "onoffCaption": "car horn honking at 0.476-4.063, 6.176-9.023 and door slamming at 3.689-5.885", "frequencyCaption": "car horn honking two times and door slamming one times"} +{"filepath": "data/multi_event_train/syn_348.wav", "onoffCaption": "duck quacking at 1.696-3.696 and car horn honking at 6.827-10.0", "frequencyCaption": "duck quacking one times and car horn honking one times"} +{"filepath": "data/multi_event_train/syn_423.wav", "onoffCaption": "sneeze at 0.039-1.984, 3.045-5.358 and duck quacking at 0.331-2.331, 4.696-6.696 and cat meowing at 4.868-6.758, 7.943-9.833", "frequencyCaption": "sneeze two times and duck quacking two times and cat meowing two times"} +{"filepath": "data/multi_event_train/syn_424.wav", "onoffCaption": "dog barking at 2.088-4.088, 5.769-7.769", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_train/syn_476.wav", "onoffCaption": "cow mooing at 2.637-5.619, 6.636-9.618", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_493.wav", "onoffCaption": "train horn at 2.842-6.082 and thump thud at 4.328-6.79", "frequencyCaption": "train horn one times and thump thud one times"} +{"filepath": "data/multi_event_train/syn_538.wav", "onoffCaption": "door slamming at 3.054-4.357 and sneeze at 7.84-9.785", "frequencyCaption": "door slamming one times and sneeze one times"} +{"filepath": "data/multi_event_train/syn_588.wav", "onoffCaption": "sneeze at 0.202-3.85, 5.004-6.096", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_605.wav", "onoffCaption": "woman laughing at 3.674-6.26", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/multi_event_train/syn_869.wav", "onoffCaption": "gunshot at 3.09-5.09, 7.337-9.337", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_920.wav", "onoffCaption": "tapping clicking clanking at 0.304-3.744, 5.854-9.294", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_927.wav", "onoffCaption": "whistling at 2.837-7.321", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_972.wav", "onoffCaption": "whistling at 0.085-9.75 and cow mooing at 0.316-4.745, 6.432-8.861 and spraying at 0.327-1.059", "frequencyCaption": "whistling one times and cow mooing two times and spraying one times"} +{"filepath": "data/multi_event_train/syn_997.wav", "onoffCaption": "cat meowing at 0.354-1.456, 2.312-3.414 and cow mooing at 1.639-6.068, 7.489-10.0 and duck quacking at 2.356-4.356", "frequencyCaption": "cat meowing two times and cow mooing two times and duck quacking one times"} +{"filepath": "data/multi_event_train/syn_1008.wav", "onoffCaption": "gunshot at 2.084-4.324 and sneeze at 2.735-4.974", "frequencyCaption": "gunshot one times and sneeze one times"} +{"filepath": "data/multi_event_train/syn_1113.wav", "onoffCaption": "explosion at 3.046-7.046 and thump thud at 4.891-8.558", "frequencyCaption": "explosion one times and thump thud one times"} +{"filepath": "data/multi_event_train/syn_1114.wav", "onoffCaption": "car horn honking at 2.766-6.261, 7.379-9.379", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_1146.wav", "onoffCaption": "sneeze at 3.579-5.896, 6.434-9.281", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_1335.wav", "onoffCaption": "explosion at 1.32-4.873 and sheep goat bleating at 1.995-3.995", "frequencyCaption": "explosion one times and sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_1385.wav", "onoffCaption": "cat meowing at 0.38-1.924, 3.984-5.528, 6.641-8.185", "frequencyCaption": "cat meowing three times"} +{"filepath": "data/multi_event_train/syn_1459.wav", "onoffCaption": "whistling at 0.017-8.361", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_1510.wav", "onoffCaption": "tapping clicking clanking at 1.596-5.036", "frequencyCaption": "tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_1542.wav", "onoffCaption": "car horn honking at 2.759-7.159, 7.739-10.0", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_1545.wav", "onoffCaption": "thump thud at 1.587-4.358 and car horn honking at 7.312-9.777", "frequencyCaption": "thump thud one times and car horn honking one times"} +{"filepath": "data/multi_event_train/syn_1678.wav", "onoffCaption": "explosion at 0.267-3.82, 5.934-8.806", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_1763.wav", "onoffCaption": "whistling at 2.661-5.536, 6.975-9.85", "frequencyCaption": "whistling two times"} +{"filepath": "data/multi_event_train/syn_1786.wav", "onoffCaption": "cow mooing at 0.537-3.835 and thump thud at 5.986-8.757", "frequencyCaption": "cow mooing one times and thump thud one times"} +{"filepath": "data/multi_event_train/syn_1814.wav", "onoffCaption": "woman laughing at 0.504-2.741, 3.614-5.818, 7.061-9.416", "frequencyCaption": "woman laughing three times"} +{"filepath": "data/multi_event_train/syn_1841.wav", "onoffCaption": "burping belching at 0.979-5.002", "frequencyCaption": "burping belching one times"} +{"filepath": "data/multi_event_train/syn_1846.wav", "onoffCaption": "door knocking at 3.081-5.171, 6.785-8.875", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_3088.wav", "onoffCaption": "woman laughing at 1.437-8.882", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/multi_event_train/syn_3124.wav", "onoffCaption": "sheep goat bleating at 1.512-3.512, 4.025-6.025", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_3171.wav", "onoffCaption": "whistling at 0.029-5.427, 6.409-10.0", "frequencyCaption": "whistling two times"} +{"filepath": "data/multi_event_train/syn_3176.wav", "onoffCaption": "woman laughing at 2.821-5.104 and spraying at 5.22-6.301, 7.497-8.578", "frequencyCaption": "woman laughing one times and spraying two times"} +{"filepath": "data/multi_event_train/syn_3193.wav", "onoffCaption": "thump thud at 0.666-3.166 and sneeze at 1.27-5.326, 6.655-8.041", "frequencyCaption": "thump thud one times and sneeze two times"} +{"filepath": "data/multi_event_train/syn_3194.wav", "onoffCaption": "woman laughing at 2.365-5.437, 6.472-8.84 and spraying at 3.387-4.468, 6.773-7.854", "frequencyCaption": "woman laughing two times and spraying two times"} +{"filepath": "data/multi_event_train/syn_3302.wav", "onoffCaption": "burping belching at 2.505-4.612, 5.841-7.872", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_3357.wav", "onoffCaption": "cow mooing at 0.093-3.075 and explosion at 7.184-10.0", "frequencyCaption": "cow mooing one times and explosion one times"} +{"filepath": "data/multi_event_train/syn_3469.wav", "onoffCaption": "sheep goat bleating at 3.297-5.297, 7.545-9.545", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_3520.wav", "onoffCaption": "cow mooing at 0.447-3.745", "frequencyCaption": "cow mooing one times"} +{"filepath": "data/multi_event_train/syn_3527.wav", "onoffCaption": "cow mooing at 0.083-3.381", "frequencyCaption": "cow mooing one times"} +{"filepath": "data/multi_event_train/syn_3572.wav", "onoffCaption": "door knocking at 2.958-7.397 and door slamming at 4.054-4.994", "frequencyCaption": "door knocking one times and door slamming one times"} +{"filepath": "data/multi_event_train/syn_3597.wav", "onoffCaption": "door slamming at 0.711-2.839, 4.986-6.377", "frequencyCaption": "door slamming two times"} +{"filepath": "data/multi_event_train/syn_3701.wav", "onoffCaption": "explosion at 1.073-3.393, 4.548-6.868, 7.945-10.0", "frequencyCaption": "explosion three times"} +{"filepath": "data/multi_event_train/syn_3754.wav", "onoffCaption": "car horn honking at 0.048-4.448, 5.584-7.585 and spraying at 2.765-3.849", "frequencyCaption": "car horn honking two times and spraying one times"} +{"filepath": "data/multi_event_train/syn_3823.wav", "onoffCaption": "door slamming at 2.956-5.917 and dog barking at 4.289-6.289", "frequencyCaption": "door slamming one times and dog barking one times"} +{"filepath": "data/multi_event_train/syn_3824.wav", "onoffCaption": "explosion at 0.787-3.787, 4.651-7.651 and sheep goat bleating at 3.192-5.192, 5.983-8.134", "frequencyCaption": "explosion two times and sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_3876.wav", "onoffCaption": "burping belching at 0.142-2.507, 3.011-5.376", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_3893.wav", "onoffCaption": "duck quacking at 0.014-2.014, 3.379-5.379, 7.391-9.391", "frequencyCaption": "duck quacking three times"} +{"filepath": "data/multi_event_train/syn_3938.wav", "onoffCaption": "spraying at 1.836-4.228, 5.119-6.183, 6.916-9.044", "frequencyCaption": "spraying three times"} +{"filepath": "data/multi_event_train/syn_3988.wav", "onoffCaption": "door slamming at 2.053-4.416 and burping belching at 7.773-10.0", "frequencyCaption": "door slamming one times and burping belching one times"} +{"filepath": "data/multi_event_train/syn_201.wav", "onoffCaption": "duck quacking at 0.022-2.022, 3.195-5.195, 5.934-7.934 and door slamming at 0.033-1.15", "frequencyCaption": "duck quacking three times and door slamming one times"} +{"filepath": "data/multi_event_train/syn_471.wav", "onoffCaption": "sneeze at 0.419-1.583 and explosion at 3.872-5.965, 7.719-10.0", "frequencyCaption": "sneeze one times and explosion two times"} +{"filepath": "data/multi_event_train/syn_494.wav", "onoffCaption": "cat meowing at 1.089-2.644, 5.032-6.587 and thump thud at 4.146-6.485 and sheep goat bleating at 5.667-7.667", "frequencyCaption": "cat meowing two times and thump thud one times and sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_657.wav", "onoffCaption": "cow mooing at 0.315-3.613, 5.99-8.959", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_719.wav", "onoffCaption": "cat meowing at 0.265-1.531, 3.646-5.378", "frequencyCaption": "cat meowing two times"} +{"filepath": "data/multi_event_train/syn_975.wav", "onoffCaption": "car horn honking at 0.708-5.615", "frequencyCaption": "car horn honking one times"} +{"filepath": "data/multi_event_train/syn_990.wav", "onoffCaption": "spraying at 0.108-0.84, 1.923-3.685, 4.653-5.9", "frequencyCaption": "spraying three times"} +{"filepath": "data/multi_event_train/syn_1141.wav", "onoffCaption": "sneeze at 0.18-3.388, 4.291-5.585", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_1229.wav", "onoffCaption": "dog barking at 2.048-5.368, 7.558-10.0", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_train/syn_1299.wav", "onoffCaption": "cat meowing at 0.318-4.678, 5.599-7.154 and burping belching at 0.472-4.472", "frequencyCaption": "cat meowing two times and burping belching one times"} +{"filepath": "data/multi_event_train/syn_1332.wav", "onoffCaption": "dog barking at 0.323-2.323, 3.124-5.124, 7.606-9.606", "frequencyCaption": "dog barking three times"} +{"filepath": "data/multi_event_train/syn_1367.wav", "onoffCaption": "whistling at 1.395-3.624 and duck quacking at 2.012-4.012 and woman laughing at 6.398-8.753", "frequencyCaption": "whistling one times and duck quacking one times and woman laughing one times"} +{"filepath": "data/multi_event_train/syn_1382.wav", "onoffCaption": "door knocking at 1.252-3.627, 4.328-6.703, 7.558-9.933", "frequencyCaption": "door knocking three times"} +{"filepath": "data/multi_event_train/syn_1517.wav", "onoffCaption": "explosion at 0.423-3.423 and spraying at 1.514-2.141, 4.439-5.008", "frequencyCaption": "explosion one times and spraying two times"} +{"filepath": "data/multi_event_train/syn_1731.wav", "onoffCaption": "thump thud at 0.204-4.654 and door slamming at 0.552-1.691", "frequencyCaption": "thump thud one times and door slamming one times"} +{"filepath": "data/multi_event_train/syn_1764.wav", "onoffCaption": "door knocking at 2.123-5.873", "frequencyCaption": "door knocking one times"} +{"filepath": "data/multi_event_train/syn_1781.wav", "onoffCaption": "dog barking at 1.246-3.246, 5.194-7.194", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_train/syn_1813.wav", "onoffCaption": "spraying at 0.171-2.631, 3.957-4.861, 6.029-8.421", "frequencyCaption": "spraying three times"} +{"filepath": "data/multi_event_train/syn_1908.wav", "onoffCaption": "tapping clicking clanking at 1.365-4.805", "frequencyCaption": "tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_3038.wav", "onoffCaption": "cow mooing at 1.901-4.87", "frequencyCaption": "cow mooing one times"} +{"filepath": "data/multi_event_train/syn_3123.wav", "onoffCaption": "spraying at 1.022-3.414 and burping belching at 5.862-9.862", "frequencyCaption": "spraying one times and burping belching one times"} +{"filepath": "data/multi_event_train/syn_3305.wav", "onoffCaption": "cow mooing at 0.655-5.084, 5.786-8.564", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_3350.wav", "onoffCaption": "sheep goat bleating at 0.712-2.712, 3.787-6.394 and duck quacking at 6.274-8.274", "frequencyCaption": "sheep goat bleating two times and duck quacking one times"} +{"filepath": "data/multi_event_train/syn_3575.wav", "onoffCaption": "tapping clicking clanking at 0.812-4.252, 5.887-8.473", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_3590.wav", "onoffCaption": "whistling at 0.635-6.135, 7.948-10.0", "frequencyCaption": "whistling two times"} +{"filepath": "data/multi_event_train/syn_3648.wav", "onoffCaption": "spraying at 3.426-4.076 and sheep goat bleating at 7.566-9.566", "frequencyCaption": "spraying one times and sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_3706.wav", "onoffCaption": "whistling at 0.945-8.6", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_3753.wav", "onoffCaption": "tapping clicking clanking at 3.307-6.747", "frequencyCaption": "tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_3871.wav", "onoffCaption": "explosion at 3.077-5.83, 6.473-9.226", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_3894.wav", "onoffCaption": "sheep goat bleating at 0.201-2.201, 3.431-5.431", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_54.wav", "onoffCaption": "door slamming at 3.055-4.446, 5.885-8.248", "frequencyCaption": "door slamming two times"} +{"filepath": "data/multi_event_train/syn_126.wav", "onoffCaption": "dog barking at 0.327-2.327", "frequencyCaption": "dog barking one times"} +{"filepath": "data/multi_event_train/syn_254.wav", "onoffCaption": "gunshot at 0.466-2.466, 3.589-5.589, 7.552-9.552", "frequencyCaption": "gunshot three times"} +{"filepath": "data/multi_event_train/syn_355.wav", "onoffCaption": "duck quacking at 2.57-4.57, 5.358-7.358 and sheep goat bleating at 7.642-9.642", "frequencyCaption": "duck quacking two times and sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_570.wav", "onoffCaption": "train horn at 2.982-5.782, 6.951-9.751", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_595.wav", "onoffCaption": "sneeze at 3.065-7.594", "frequencyCaption": "sneeze one times"} +{"filepath": "data/multi_event_train/syn_602.wav", "onoffCaption": "duck quacking at 0.964-2.964, 5.143-7.143", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_618.wav", "onoffCaption": "door slamming at 2.714-3.853, 4.445-5.584, 6.529-7.668", "frequencyCaption": "door slamming three times"} +{"filepath": "data/multi_event_train/syn_703.wav", "onoffCaption": "tapping clicking clanking at 1.009-4.449, 6.831-10.0", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_874.wav", "onoffCaption": "thump thud at 0.65-5.1, 6.105-8.333", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_891.wav", "onoffCaption": "thump thud at 2.63-5.13 and cat meowing at 3.305-5.037, 5.676-7.408", "frequencyCaption": "thump thud one times and cat meowing two times"} +{"filepath": "data/multi_event_train/syn_1040.wav", "onoffCaption": "sheep goat bleating at 1.747-3.747", "frequencyCaption": "sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_1233.wav", "onoffCaption": "gunshot at 3.194-5.194", "frequencyCaption": "gunshot one times"} +{"filepath": "data/multi_event_train/syn_1328.wav", "onoffCaption": "gunshot at 0.77-2.77, 4.869-7.375, 7.885-9.885", "frequencyCaption": "gunshot three times"} +{"filepath": "data/multi_event_train/syn_1416.wav", "onoffCaption": "door slamming at 0.159-0.84 and whistling at 0.385-4.869", "frequencyCaption": "door slamming one times and whistling one times"} +{"filepath": "data/multi_event_train/syn_1665.wav", "onoffCaption": "duck quacking at 1.743-3.743, 4.623-6.623, 7.499-9.499", "frequencyCaption": "duck quacking three times"} +{"filepath": "data/multi_event_train/syn_1680.wav", "onoffCaption": "train horn at 0.177-2.577, 3.945-6.558 and explosion at 0.391-3.263, 3.995-6.867, 7.672-10.0 and cat meowing at 1.708-6.068", "frequencyCaption": "train horn two times and explosion three times and cat meowing one times"} +{"filepath": "data/multi_event_train/syn_1809.wav", "onoffCaption": "tapping clicking clanking at 0.425-3.865, 4.963-7.504", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_1912.wav", "onoffCaption": "tapping clicking clanking at 2.261-5.701", "frequencyCaption": "tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_3022.wav", "onoffCaption": "sheep goat bleating at 0.296-2.296 and train horn at 4.943-8.423", "frequencyCaption": "sheep goat bleating one times and train horn one times"} +{"filepath": "data/multi_event_train/syn_3139.wav", "onoffCaption": "sheep goat bleating at 2.639-4.639", "frequencyCaption": "sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_3204.wav", "onoffCaption": "duck quacking at 0.444-2.444 and thump thud at 5.921-8.968", "frequencyCaption": "duck quacking one times and thump thud one times"} +{"filepath": "data/multi_event_train/syn_3251.wav", "onoffCaption": "tapping clicking clanking at 0.42-3.86 and spraying at 2.566-3.141", "frequencyCaption": "tapping clicking clanking one times and spraying one times"} +{"filepath": "data/multi_event_train/syn_3474.wav", "onoffCaption": "tapping clicking clanking at 0.664-4.104, 6.255-8.849", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_3491.wav", "onoffCaption": "cat meowing at 0.031-1.391 and door knocking at 4.522-9.224", "frequencyCaption": "cat meowing one times and door knocking one times"} +{"filepath": "data/multi_event_train/syn_3607.wav", "onoffCaption": "thump thud at 0.385-4.303, 5.233-7.577 and duck quacking at 5.528-7.528", "frequencyCaption": "thump thud two times and duck quacking one times"} +{"filepath": "data/multi_event_train/syn_3652.wav", "onoffCaption": "gunshot at 0.653-2.653", "frequencyCaption": "gunshot one times"} +{"filepath": "data/multi_event_train/syn_3749.wav", "onoffCaption": "explosion at 0.571-5.165", "frequencyCaption": "explosion one times"} +{"filepath": "data/multi_event_train/syn_3970.wav", "onoffCaption": "dog barking at 3.47-5.47, 6.969-8.969", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_train/syn_3995.wav", "onoffCaption": "dog barking at 0.185-2.185, 2.955-4.955", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_train/syn_70.wav", "onoffCaption": "dog barking at 1.993-3.993, 4.548-6.548", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_train/syn_95.wav", "onoffCaption": "thump thud at 0.47-3.517, 4.762-7.809", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_102.wav", "onoffCaption": "door knocking at 2.705-5.825, 7.509-9.76", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_173.wav", "onoffCaption": "tapping clicking clanking at 0.086-3.526 and whistling at 0.707-9.805 and explosion at 2.13-4.137, 5.322-7.329", "frequencyCaption": "tapping clicking clanking one times and whistling one times and explosion two times"} +{"filepath": "data/multi_event_train/syn_196.wav", "onoffCaption": "explosion at 0.777-2.779", "frequencyCaption": "explosion one times"} +{"filepath": "data/multi_event_train/syn_300.wav", "onoffCaption": "thump thud at 1.121-5.571", "frequencyCaption": "thump thud one times"} +{"filepath": "data/multi_event_train/syn_371.wav", "onoffCaption": "duck quacking at 1.22-3.22, 5.626-7.626", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_394.wav", "onoffCaption": "woman laughing at 2.892-5.138", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/multi_event_train/syn_525.wav", "onoffCaption": "explosion at 0.454-2.715, 3.764-6.025 and whistling at 1.919-7.094", "frequencyCaption": "explosion two times and whistling one times"} +{"filepath": "data/multi_event_train/syn_554.wav", "onoffCaption": "cat meowing at 1.653-2.928, 4.67-5.945", "frequencyCaption": "cat meowing two times"} +{"filepath": "data/multi_event_train/syn_669.wav", "onoffCaption": "train horn at 2.326-6.326, 7.15-10.0", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_727.wav", "onoffCaption": "whistling at 0.48-10.0 and door knocking at 1.258-6.091", "frequencyCaption": "whistling one times and door knocking one times"} +{"filepath": "data/multi_event_train/syn_756.wav", "onoffCaption": "duck quacking at 2.343-4.343, 4.875-6.875, 7.865-9.865", "frequencyCaption": "duck quacking three times"} +{"filepath": "data/multi_event_train/syn_772.wav", "onoffCaption": "explosion at 2.289-5.161, 5.982-8.854", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_797.wav", "onoffCaption": "sheep goat bleating at 1.447-3.447, 5.844-7.844 and dog barking at 2.75-4.75", "frequencyCaption": "sheep goat bleating two times and dog barking one times"} +{"filepath": "data/multi_event_train/syn_821.wav", "onoffCaption": "gunshot at 0.209-2.209, 3.627-5.627 and tapping clicking clanking at 0.683-4.123", "frequencyCaption": "gunshot two times and tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_850.wav", "onoffCaption": "train horn at 0.525-4.295 and door knocking at 0.696-3.233 and cat meowing at 2.715-5.619, 6.598-8.169", "frequencyCaption": "train horn one times and door knocking one times and cat meowing two times"} +{"filepath": "data/multi_event_train/syn_1015.wav", "onoffCaption": "car horn honking at 0.191-3.117 and spraying at 2.06-2.568, 3.885-4.393, 5.536-6.044 and door slamming at 4.087-5.027, 6.379-7.319", "frequencyCaption": "car horn honking one times and spraying three times and door slamming two times"} +{"filepath": "data/multi_event_train/syn_1064.wav", "onoffCaption": "door slamming at 1.414-4.293", "frequencyCaption": "door slamming one times"} +{"filepath": "data/multi_event_train/syn_1081.wav", "onoffCaption": "door slamming at 1.436-4.153, 4.833-5.852", "frequencyCaption": "door slamming two times"} +{"filepath": "data/multi_event_train/syn_1217.wav", "onoffCaption": "explosion at 0.391-3.391 and dog barking at 2.243-4.243", "frequencyCaption": "explosion one times and dog barking one times"} +{"filepath": "data/multi_event_train/syn_1242.wav", "onoffCaption": "spraying at 0.567-1.067, 2.157-2.938, 4.063-5.32", "frequencyCaption": "spraying three times"} +{"filepath": "data/multi_event_train/syn_1266.wav", "onoffCaption": "woman laughing at 0.647-2.93, 5.093-7.293 and dog barking at 4.855-6.855, 7.951-9.951", "frequencyCaption": "woman laughing two times and dog barking two times"} +{"filepath": "data/multi_event_train/syn_1283.wav", "onoffCaption": "whistling at 1.596-9.981", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_1359.wav", "onoffCaption": "train horn at 0.245-3.645, 4.367-7.767 and woman laughing at 3.448-6.148", "frequencyCaption": "train horn two times and woman laughing one times"} +{"filepath": "data/multi_event_train/syn_1398.wav", "onoffCaption": "woman laughing at 0.118-4.17, 5.424-7.707", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_1432.wav", "onoffCaption": "door slamming at 3.069-4.61", "frequencyCaption": "door slamming one times"} +{"filepath": "data/multi_event_train/syn_1443.wav", "onoffCaption": "cow mooing at 2.99-7.97", "frequencyCaption": "cow mooing one times"} +{"filepath": "data/multi_event_train/syn_1529.wav", "onoffCaption": "spraying at 0.045-3.064, 4.628-5.136, 5.647-6.498 and whistling at 0.17-7.92", "frequencyCaption": "spraying three times and whistling one times"} +{"filepath": "data/multi_event_train/syn_1558.wav", "onoffCaption": "spraying at 0.1-0.675", "frequencyCaption": "spraying one times"} +{"filepath": "data/multi_event_train/syn_1614.wav", "onoffCaption": "train horn at 2.717-10.0", "frequencyCaption": "train horn one times"} +{"filepath": "data/multi_event_train/syn_1630.wav", "onoffCaption": "spraying at 3.453-3.975, 6.279-6.787, 7.637-8.264", "frequencyCaption": "spraying three times"} +{"filepath": "data/multi_event_train/syn_1641.wav", "onoffCaption": "tapping clicking clanking at 0.201-3.641, 4.988-8.428", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_1936.wav", "onoffCaption": "woman laughing at 0.152-2.852, 4.099-6.799", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_1947.wav", "onoffCaption": "door knocking at 3.182-5.912, 7.132-10.0", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_3006.wav", "onoffCaption": "door knocking at 3.963-7.516", "frequencyCaption": "door knocking one times"} +{"filepath": "data/multi_event_train/syn_3077.wav", "onoffCaption": "whistling at 1.496-5.98", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_3092.wav", "onoffCaption": "thump thud at 1.644-6.019, 7.447-10.0", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_3189.wav", "onoffCaption": "explosion at 0.54-5.54, 6.412-10.0 and door knocking at 2.556-5.286, 6.463-8.926", "frequencyCaption": "explosion two times and door knocking two times"} +{"filepath": "data/multi_event_train/syn_3220.wav", "onoffCaption": "sneeze at 3.349-4.513, 5.349-7.752", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_3275.wav", "onoffCaption": "duck quacking at 0.417-2.417, 3.876-5.876", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_3290.wav", "onoffCaption": "tapping clicking clanking at 0.227-3.667, 4.679-6.849 and duck quacking at 3.852-5.852, 7.038-9.038 and explosion at 5.023-9.617", "frequencyCaption": "tapping clicking clanking two times and duck quacking two times and explosion one times"} +{"filepath": "data/multi_event_train/syn_3421.wav", "onoffCaption": "cat meowing at 0.013-1.584, 2.383-4.331, 5.483-8.387 and door knocking at 0.75-5.283, 7.441-10.0", "frequencyCaption": "cat meowing three times and door knocking two times"} +{"filepath": "data/multi_event_train/syn_3450.wav", "onoffCaption": "duck quacking at 2.7-4.7, 5.509-7.509", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_3623.wav", "onoffCaption": "car horn honking at 3.793-6.579", "frequencyCaption": "car horn honking one times"} +{"filepath": "data/multi_event_train/syn_3676.wav", "onoffCaption": "burping belching at 1.812-3.843, 6.336-8.367", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_3693.wav", "onoffCaption": "sheep goat bleating at 2.944-4.944", "frequencyCaption": "sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_3738.wav", "onoffCaption": "thump thud at 0.424-4.799", "frequencyCaption": "thump thud one times"} +{"filepath": "data/multi_event_train/syn_3788.wav", "onoffCaption": "sheep goat bleating at 0.787-5.507, 6.694-8.694", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_3925.wav", "onoffCaption": "woman laughing at 0.292-7.737", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/multi_event_train/syn_3954.wav", "onoffCaption": "sneeze at 0.104-2.023, 2.539-4.484, 6.855-8.101", "frequencyCaption": "sneeze three times"} +{"filepath": "data/multi_event_train/syn_25.wav", "onoffCaption": "gunshot at 1.625-3.625, 5.899-8.172 and whistling at 3.782-8.957", "frequencyCaption": "gunshot two times and whistling one times"} +{"filepath": "data/multi_event_train/syn_157.wav", "onoffCaption": "train horn at 0.233-2.37, 3.224-5.601", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_225.wav", "onoffCaption": "tapping clicking clanking at 2.105-5.545, 7.367-10.0", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_324.wav", "onoffCaption": "spraying at 0.009-0.509, 2.341-3.425", "frequencyCaption": "spraying two times"} +{"filepath": "data/multi_event_train/syn_400.wav", "onoffCaption": "train horn at 2.694-5.574", "frequencyCaption": "train horn one times"} +{"filepath": "data/multi_event_train/syn_501.wav", "onoffCaption": "cow mooing at 0.234-3.532 and whistling at 1.104-6.737 and explosion at 2.284-7.284, 7.816-10.0", "frequencyCaption": "cow mooing one times and whistling one times and explosion two times"} +{"filepath": "data/multi_event_train/syn_673.wav", "onoffCaption": "woman laughing at 0.788-3.583 and door knocking at 2.854-5.701", "frequencyCaption": "woman laughing one times and door knocking one times"} +{"filepath": "data/multi_event_train/syn_805.wav", "onoffCaption": "spraying at 1.964-4.548", "frequencyCaption": "spraying one times"} +{"filepath": "data/multi_event_train/syn_904.wav", "onoffCaption": "car horn honking at 0.948-4.535, 6.325-8.809", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_1031.wav", "onoffCaption": "explosion at 0.294-2.468", "frequencyCaption": "explosion one times"} +{"filepath": "data/multi_event_train/syn_1130.wav", "onoffCaption": "woman laughing at 1.1-3.455, 4.433-6.788 and cat meowing at 1.581-3.575", "frequencyCaption": "woman laughing two times and cat meowing one times"} +{"filepath": "data/multi_event_train/syn_1467.wav", "onoffCaption": "door knocking at 1.655-5.405, 7.308-9.808", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_1482.wav", "onoffCaption": "tapping clicking clanking at 0.246-3.686, 4.457-7.897 and door slamming at 2.901-3.739", "frequencyCaption": "tapping clicking clanking two times and door slamming one times"} +{"filepath": "data/multi_event_train/syn_1498.wav", "onoffCaption": "whistling at 0.046-8.057 and sneeze at 2.099-3.644, 5.951-8.197", "frequencyCaption": "whistling one times and sneeze two times"} +{"filepath": "data/multi_event_train/syn_1566.wav", "onoffCaption": "train horn at 2.16-7.904", "frequencyCaption": "train horn one times"} +{"filepath": "data/multi_event_train/syn_1583.wav", "onoffCaption": "door slamming at 0.07-2.29, 3.986-6.206", "frequencyCaption": "door slamming two times"} +{"filepath": "data/multi_event_train/syn_1599.wav", "onoffCaption": "whistling at 0.128-9.793", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_1862.wav", "onoffCaption": "gunshot at 0.694-2.694, 3.323-5.323 and burping belching at 2.026-6.026, 6.855-10.0", "frequencyCaption": "gunshot two times and burping belching two times"} +{"filepath": "data/multi_event_train/syn_1878.wav", "onoffCaption": "whistling at 0.389-8.4", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_1887.wav", "onoffCaption": "cow mooing at 0.512-4.941 and thump thud at 7.964-10.0", "frequencyCaption": "cow mooing one times and thump thud one times"} +{"filepath": "data/multi_event_train/syn_1963.wav", "onoffCaption": "woman laughing at 1.24-3.595 and tapping clicking clanking at 1.503-4.943", "frequencyCaption": "woman laughing one times and tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_1979.wav", "onoffCaption": "door slamming at 0.075-1.214, 3.128-5.909", "frequencyCaption": "door slamming two times"} +{"filepath": "data/multi_event_train/syn_1986.wav", "onoffCaption": "door slamming at 1.025-3.806, 5.303-7.303", "frequencyCaption": "door slamming two times"} +{"filepath": "data/multi_event_train/syn_3049.wav", "onoffCaption": "woman laughing at 2.442-7.481", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/multi_event_train/syn_3053.wav", "onoffCaption": "woman laughing at 2.766-5.861, 7.24-10.0", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_3148.wav", "onoffCaption": "tapping clicking clanking at 0.276-3.716", "frequencyCaption": "tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_3152.wav", "onoffCaption": "sheep goat bleating at 0.454-2.454, 3.049-5.049, 6.197-8.197", "frequencyCaption": "sheep goat bleating three times"} +{"filepath": "data/multi_event_train/syn_3405.wav", "onoffCaption": "whistling at 0.099-8.11", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_3504.wav", "onoffCaption": "door knocking at 0.793-5.495 and thump thud at 3.052-5.391", "frequencyCaption": "door knocking one times and thump thud one times"} +{"filepath": "data/multi_event_train/syn_3800.wav", "onoffCaption": "tapping clicking clanking at 0.979-4.419, 5.28-7.969 and explosion at 2.588-5.588 and sneeze at 7.902-9.196", "frequencyCaption": "tapping clicking clanking two times and explosion one times and sneeze one times"} +{"filepath": "data/multi_event_train/syn_3901.wav", "onoffCaption": "sheep goat bleating at 0.683-2.683, 4.233-6.233", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_118.wav", "onoffCaption": "train horn at 0.018-2.818, 3.936-6.736, 7.383-10.0", "frequencyCaption": "train horn three times"} +{"filepath": "data/multi_event_train/syn_270.wav", "onoffCaption": "duck quacking at 0.329-2.329, 3.818-5.818 and sheep goat bleating at 2.212-7.092", "frequencyCaption": "duck quacking two times and sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_295.wav", "onoffCaption": "burping belching at 0.305-2.336, 4.218-6.249, 6.891-8.922", "frequencyCaption": "burping belching three times"} +{"filepath": "data/multi_event_train/syn_455.wav", "onoffCaption": "thump thud at 2.92-7.37 and sneeze at 4.19-5.424, 6.465-7.922", "frequencyCaption": "thump thud one times and sneeze two times"} +{"filepath": "data/multi_event_train/syn_626.wav", "onoffCaption": "dog barking at 1.493-3.493", "frequencyCaption": "dog barking one times"} +{"filepath": "data/multi_event_train/syn_696.wav", "onoffCaption": "gunshot at 1.907-3.907, 4.559-6.559, 7.45-9.45", "frequencyCaption": "gunshot three times"} +{"filepath": "data/multi_event_train/syn_768.wav", "onoffCaption": "duck quacking at 3.151-5.151, 6.78-8.78", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_951.wav", "onoffCaption": "cow mooing at 2.278-5.26, 6.421-9.403", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_1165.wav", "onoffCaption": "spraying at 2.992-3.992, 4.604-5.208", "frequencyCaption": "spraying two times"} +{"filepath": "data/multi_event_train/syn_1180.wav", "onoffCaption": "duck quacking at 0.539-2.539, 3.257-5.257, 6.102-8.102", "frequencyCaption": "duck quacking three times"} +{"filepath": "data/multi_event_train/syn_1258.wav", "onoffCaption": "whistling at 0.723-6.223 and woman laughing at 1.377-8.111 and sheep goat bleating at 6.748-8.748", "frequencyCaption": "whistling one times and woman laughing one times and sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_1316.wav", "onoffCaption": "car horn honking at 2.275-5.862", "frequencyCaption": "car horn honking one times"} +{"filepath": "data/multi_event_train/syn_1343.wav", "onoffCaption": "duck quacking at 1.608-3.608, 4.465-6.465, 7.576-9.576", "frequencyCaption": "duck quacking three times"} +{"filepath": "data/multi_event_train/syn_1428.wav", "onoffCaption": "train horn at 0.016-2.153, 3.202-5.339, 5.864-8.001", "frequencyCaption": "train horn three times"} +{"filepath": "data/multi_event_train/syn_1533.wav", "onoffCaption": "cow mooing at 2.782-5.764, 6.836-9.177", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_1715.wav", "onoffCaption": "thump thud at 1.218-3.718, 4.628-7.128", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_1740.wav", "onoffCaption": "train horn at 1.767-5.127, 6.095-9.455", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_1837.wav", "onoffCaption": "sheep goat bleating at 2.059-6.779 and whistling at 2.884-8.059 and woman laughing at 4.959-7.378", "frequencyCaption": "sheep goat bleating one times and whistling one times and woman laughing one times"} +{"filepath": "data/multi_event_train/syn_3107.wav", "onoffCaption": "door slamming at 0.022-0.873 and cat meowing at 1.093-2.403 and train horn at 5.959-10.0", "frequencyCaption": "door slamming one times and cat meowing one times and train horn one times"} +{"filepath": "data/multi_event_train/syn_3321.wav", "onoffCaption": "cat meowing at 3.21-4.21, 5.7-6.712", "frequencyCaption": "cat meowing two times"} +{"filepath": "data/multi_event_train/syn_3374.wav", "onoffCaption": "dog barking at 0.078-2.078, 3.208-5.208, 5.9-7.9", "frequencyCaption": "dog barking three times"} +{"filepath": "data/multi_event_train/syn_3391.wav", "onoffCaption": "explosion at 0.269-3.141 and duck quacking at 0.455-2.455 and door knocking at 5.976-8.328", "frequencyCaption": "explosion one times and duck quacking one times and door knocking one times"} +{"filepath": "data/multi_event_train/syn_3551.wav", "onoffCaption": "door knocking at 2.84-7.673", "frequencyCaption": "door knocking one times"} +{"filepath": "data/multi_event_train/syn_3639.wav", "onoffCaption": "woman laughing at 0.177-2.377, 3.151-5.351, 6.045-8.245", "frequencyCaption": "woman laughing three times"} +{"filepath": "data/multi_event_train/syn_3689.wav", "onoffCaption": "sheep goat bleating at 2.212-4.212, 5.084-7.569", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_3722.wav", "onoffCaption": "cat meowing at 0.333-2.327, 3.269-4.805", "frequencyCaption": "cat meowing two times"} +{"filepath": "data/multi_event_train/syn_3777.wav", "onoffCaption": "duck quacking at 2.801-4.801 and spraying at 3.013-5.141, 6.062-6.663", "frequencyCaption": "duck quacking one times and spraying two times"} +{"filepath": "data/multi_event_train/syn_3792.wav", "onoffCaption": "explosion at 2.566-4.659, 7.128-9.212", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_3855.wav", "onoffCaption": "duck quacking at 1.625-3.625 and door knocking at 7.025-9.525", "frequencyCaption": "duck quacking one times and door knocking one times"} +{"filepath": "data/multi_event_train/syn_159.wav", "onoffCaption": "sneeze at 0.134-3.209, 4.258-7.138, 7.933-9.852", "frequencyCaption": "sneeze three times"} +{"filepath": "data/multi_event_train/syn_231.wav", "onoffCaption": "duck quacking at 0.318-2.318, 2.949-4.949, 6.008-8.008", "frequencyCaption": "duck quacking three times"} +{"filepath": "data/multi_event_train/syn_264.wav", "onoffCaption": "dog barking at 0.128-2.128 and gunshot at 1.2-3.2, 5.28-7.28", "frequencyCaption": "dog barking one times and gunshot two times"} +{"filepath": "data/multi_event_train/syn_414.wav", "onoffCaption": "door knocking at 2.845-5.469, 6.563-9.4", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_632.wav", "onoffCaption": "explosion at 2.325-4.389, 4.942-7.006 and tapping clicking clanking at 3.383-6.823", "frequencyCaption": "explosion two times and tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_667.wav", "onoffCaption": "thump thud at 2.149-6.599, 7.743-10.0", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_682.wav", "onoffCaption": "cow mooing at 0.843-5.272, 7.321-10.0", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_729.wav", "onoffCaption": "door knocking at 0.601-4.369, 5.79-8.142", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_799.wav", "onoffCaption": "sheep goat bleating at 0.047-2.047 and whistling at 1.195-5.679, 7.057-10.0", "frequencyCaption": "sheep goat bleating one times and whistling two times"} +{"filepath": "data/multi_event_train/syn_910.wav", "onoffCaption": "thump thud at 0.422-3.193, 3.817-6.156, 7.264-9.455", "frequencyCaption": "thump thud three times"} +{"filepath": "data/multi_event_train/syn_1124.wav", "onoffCaption": "woman laughing at 2.461-4.829 and explosion at 2.722-4.81, 5.629-7.693", "frequencyCaption": "woman laughing one times and explosion two times"} +{"filepath": "data/multi_event_train/syn_1302.wav", "onoffCaption": "explosion at 3.167-5.428, 6.839-9.1", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_1357.wav", "onoffCaption": "explosion at 0.493-3.246, 4.246-6.607 and door slamming at 1.075-2.554", "frequencyCaption": "explosion two times and door slamming one times"} +{"filepath": "data/multi_event_train/syn_1469.wav", "onoffCaption": "duck quacking at 0.592-2.592, 3.215-5.215, 5.836-7.836 and train horn at 1.241-4.601, 5.638-8.998 and door slamming at 5.79-6.768", "frequencyCaption": "duck quacking three times and train horn two times and door slamming one times"} +{"filepath": "data/multi_event_train/syn_1572.wav", "onoffCaption": "sneeze at 0.074-1.987, 3.317-4.42, 5.2-7.661 and door knocking at 1.161-5.671, 7.944-10.0", "frequencyCaption": "sneeze three times and door knocking two times"} +{"filepath": "data/multi_event_train/syn_1597.wav", "onoffCaption": "cat meowing at 0.941-2.496 and dog barking at 1.668-3.668 and explosion at 6.21-10.0", "frequencyCaption": "cat meowing one times and dog barking one times and explosion one times"} +{"filepath": "data/multi_event_train/syn_1701.wav", "onoffCaption": "spraying at 2.038-2.642", "frequencyCaption": "spraying one times"} +{"filepath": "data/multi_event_train/syn_1754.wav", "onoffCaption": "car horn honking at 0.031-2.817, 4.405-6.905", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_1876.wav", "onoffCaption": "woman laughing at 0.697-3.181, 3.691-5.891", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_1893.wav", "onoffCaption": "door slamming at 0.078-1.217 and dog barking at 3.861-5.861, 6.925-8.925 and thump thud at 4.033-8.483", "frequencyCaption": "door slamming one times and dog barking two times and thump thud one times"} +{"filepath": "data/multi_event_train/syn_1988.wav", "onoffCaption": "door knocking at 0.078-2.168, 2.716-4.806, 5.45-7.54 and cat meowing at 1.282-2.866, 3.698-5.282, 5.908-7.492", "frequencyCaption": "door knocking three times and cat meowing three times"} +{"filepath": "data/multi_event_train/syn_3146.wav", "onoffCaption": "thump thud at 0.966-3.737, 4.659-7.43 and sneeze at 1.51-2.764, 3.704-4.95, 5.513-7.729 and cow mooing at 1.812-6.792", "frequencyCaption": "thump thud two times and sneeze three times and cow mooing one times"} +{"filepath": "data/multi_event_train/syn_3335.wav", "onoffCaption": "whistling at 3.008-8.508", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_3385.wav", "onoffCaption": "sneeze at 3.023-7.523", "frequencyCaption": "sneeze one times"} +{"filepath": "data/multi_event_train/syn_3510.wav", "onoffCaption": "dog barking at 2.689-4.689, 6.283-8.283", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_train/syn_3678.wav", "onoffCaption": "gunshot at 2.519-4.519 and whistling at 2.939-8.114", "frequencyCaption": "gunshot one times and whistling one times"} +{"filepath": "data/multi_event_train/syn_3763.wav", "onoffCaption": "explosion at 2.855-5.573, 6.2-8.918", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_3786.wav", "onoffCaption": "door slamming at 2.563-3.854, 5.558-6.363", "frequencyCaption": "door slamming two times"} +{"filepath": "data/multi_event_train/syn_3814.wav", "onoffCaption": "whistling at 1.399-9.41", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_64.wav", "onoffCaption": "dog barking at 0.582-2.582, 4.995-6.995 and spraying at 0.846-1.596 and door knocking at 4.54-8.29", "frequencyCaption": "dog barking two times and spraying one times and door knocking one times"} +{"filepath": "data/multi_event_train/syn_81.wav", "onoffCaption": "thump thud at 1.031-5.481", "frequencyCaption": "thump thud one times"} +{"filepath": "data/multi_event_train/syn_281.wav", "onoffCaption": "cow mooing at 0.963-3.973, 4.899-7.683 and train horn at 3.96-7.96", "frequencyCaption": "cow mooing two times and train horn one times"} +{"filepath": "data/multi_event_train/syn_441.wav", "onoffCaption": "gunshot at 0.774-2.774, 3.707-5.707, 7.02-9.02", "frequencyCaption": "gunshot three times"} +{"filepath": "data/multi_event_train/syn_540.wav", "onoffCaption": "thump thud at 2.697-6.364, 7.409-10.0", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_844.wav", "onoffCaption": "tapping clicking clanking at 0.698-4.138, 5.254-8.184", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_945.wav", "onoffCaption": "burping belching at 2.96-7.296", "frequencyCaption": "burping belching one times"} +{"filepath": "data/multi_event_train/syn_1070.wav", "onoffCaption": "gunshot at 0.081-2.081, 3.247-5.247, 6.513-8.513", "frequencyCaption": "gunshot three times"} +{"filepath": "data/multi_event_train/syn_1095.wav", "onoffCaption": "spraying at 0.659-2.145 and gunshot at 5.491-7.491", "frequencyCaption": "spraying one times and gunshot one times"} +{"filepath": "data/multi_event_train/syn_1171.wav", "onoffCaption": "dog barking at 0.481-2.481, 3.695-5.695", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_train/syn_1194.wav", "onoffCaption": "sneeze at 1.52-4.134, 5.429-7.668 and door slamming at 2.583-3.886", "frequencyCaption": "sneeze two times and door slamming one times"} +{"filepath": "data/multi_event_train/syn_1219.wav", "onoffCaption": "sheep goat bleating at 2.316-4.316 and train horn at 7.392-10.0", "frequencyCaption": "sheep goat bleating one times and train horn one times"} +{"filepath": "data/multi_event_train/syn_1426.wav", "onoffCaption": "sheep goat bleating at 2.896-4.896 and door slamming at 4.863-5.701, 7.543-9.067", "frequencyCaption": "sheep goat bleating one times and door slamming two times"} +{"filepath": "data/multi_event_train/syn_1527.wav", "onoffCaption": "spraying at 0.031-1.115, 2.086-3.17 and car horn honking at 2.198-5.417", "frequencyCaption": "spraying two times and car horn honking one times"} +{"filepath": "data/multi_event_train/syn_1823.wav", "onoffCaption": "tapping clicking clanking at 0.895-4.335, 5.021-8.461 and cow mooing at 2.805-5.787", "frequencyCaption": "tapping clicking clanking two times and cow mooing one times"} +{"filepath": "data/multi_event_train/syn_1839.wav", "onoffCaption": "cat meowing at 2.895-4.205, 5.243-6.553, 7.151-8.461", "frequencyCaption": "cat meowing three times"} +{"filepath": "data/multi_event_train/syn_1922.wav", "onoffCaption": "burping belching at 0.283-3.789, 4.889-8.395", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_1938.wav", "onoffCaption": "cat meowing at 2.138-3.15, 4.397-5.409 and sneeze at 7.522-8.625", "frequencyCaption": "cat meowing two times and sneeze one times"} +{"filepath": "data/multi_event_train/syn_3008.wav", "onoffCaption": "gunshot at 0.446-2.616 and burping belching at 4.722-6.753, 7.735-10.0", "frequencyCaption": "gunshot one times and burping belching two times"} +{"filepath": "data/multi_event_train/syn_3012.wav", "onoffCaption": "train horn at 2.914-7.816", "frequencyCaption": "train horn one times"} +{"filepath": "data/multi_event_train/syn_3109.wav", "onoffCaption": "whistling at 0.123-8.134 and dog barking at 2.714-4.714, 6.881-8.881", "frequencyCaption": "whistling one times and dog barking two times"} +{"filepath": "data/multi_event_train/syn_3113.wav", "onoffCaption": "thump thud at 0.383-2.611", "frequencyCaption": "thump thud one times"} +{"filepath": "data/multi_event_train/syn_3360.wav", "onoffCaption": "dog barking at 0.244-2.244, 4.643-6.643 and sneeze at 1.545-3.228, 4.283-6.202 and woman laughing at 2.999-5.282, 7.507-9.943", "frequencyCaption": "dog barking two times and sneeze two times and woman laughing two times"} +{"filepath": "data/multi_event_train/syn_3444.wav", "onoffCaption": "door knocking at 0.208-3.958, 4.484-6.611, 7.771-10.0", "frequencyCaption": "door knocking three times"} +{"filepath": "data/multi_event_train/syn_3545.wav", "onoffCaption": "door slamming at 2.58-4.58, 5.255-7.255 and car horn honking at 5.744-7.744", "frequencyCaption": "door slamming two times and car horn honking one times"} +{"filepath": "data/multi_event_train/syn_3736.wav", "onoffCaption": "sheep goat bleating at 0.976-2.976 and gunshot at 1.201-3.201, 4.086-6.086, 7.889-9.889", "frequencyCaption": "sheep goat bleating one times and gunshot three times"} +{"filepath": "data/multi_event_train/syn_3841.wav", "onoffCaption": "thump thud at 3.493-7.868 and door knocking at 6.458-8.77", "frequencyCaption": "thump thud one times and door knocking one times"} +{"filepath": "data/multi_event_train/syn_3940.wav", "onoffCaption": "burping belching at 3.845-6.075 and thump thud at 6.247-8.586", "frequencyCaption": "burping belching one times and thump thud one times"} +{"filepath": "data/multi_event_train/syn_31.wav", "onoffCaption": "explosion at 0.114-5.035, 6.617-10.0", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_40.wav", "onoffCaption": "cow mooing at 0.732-5.712, 6.321-10.0", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_116.wav", "onoffCaption": "burping belching at 2.633-6.656, 7.887-10.0", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_143.wav", "onoffCaption": "burping belching at 1.128-3.651 and spraying at 1.561-2.561, 3.832-4.916, 5.585-7.851", "frequencyCaption": "burping belching one times and spraying three times"} +{"filepath": "data/multi_event_train/syn_330.wav", "onoffCaption": "sneeze at 0.333-3.443, 4.453-7.563", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_365.wav", "onoffCaption": "duck quacking at 0.019-2.019 and sheep goat bleating at 0.592-2.592 and explosion at 5.364-10.0", "frequencyCaption": "duck quacking one times and sheep goat bleating one times and explosion one times"} +{"filepath": "data/multi_event_train/syn_380.wav", "onoffCaption": "car horn honking at 2.009-6.916 and tapping clicking clanking at 2.997-6.437", "frequencyCaption": "car horn honking one times and tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_515.wav", "onoffCaption": "cow mooing at 0.715-3.684 and thump thud at 1.196-3.424 and cat meowing at 8.242-9.253", "frequencyCaption": "cow mooing one times and thump thud one times and cat meowing one times"} +{"filepath": "data/multi_event_train/syn_581.wav", "onoffCaption": "dog barking at 0.183-2.183", "frequencyCaption": "dog barking one times"} +{"filepath": "data/multi_event_train/syn_628.wav", "onoffCaption": "dog barking at 2.434-4.434", "frequencyCaption": "dog barking one times"} +{"filepath": "data/multi_event_train/syn_698.wav", "onoffCaption": "cat meowing at 1.384-4.414", "frequencyCaption": "cat meowing one times"} +{"filepath": "data/multi_event_train/syn_733.wav", "onoffCaption": "duck quacking at 3.485-5.485, 7.687-9.687", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_766.wav", "onoffCaption": "cat meowing at 0.072-1.967, 2.942-4.936 and sneeze at 0.947-2.178, 4.106-6.423", "frequencyCaption": "cat meowing two times and sneeze two times"} +{"filepath": "data/multi_event_train/syn_783.wav", "onoffCaption": "explosion at 0.107-3.101, 5.265-8.259", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_811.wav", "onoffCaption": "thump thud at 0.206-2.668, 4.309-6.537", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_860.wav", "onoffCaption": "car horn honking at 1.847-5.434 and burping belching at 2.504-5.504, 6.182-9.182", "frequencyCaption": "car horn honking one times and burping belching two times"} +{"filepath": "data/multi_event_train/syn_885.wav", "onoffCaption": "door knocking at 3.613-8.613", "frequencyCaption": "door knocking one times"} +{"filepath": "data/multi_event_train/syn_1025.wav", "onoffCaption": "tapping clicking clanking at 0.035-3.475, 5.44-7.618 and whistling at 0.845-2.854 and cow mooing at 2.891-7.871", "frequencyCaption": "tapping clicking clanking two times and whistling one times and cow mooing one times"} +{"filepath": "data/multi_event_train/syn_1054.wav", "onoffCaption": "explosion at 2.388-7.388", "frequencyCaption": "explosion one times"} +{"filepath": "data/multi_event_train/syn_1203.wav", "onoffCaption": "sneeze at 0.363-2.359, 3.331-5.327, 6.245-8.241 and whistling at 1.162-5.646 and burping belching at 4.98-7.21", "frequencyCaption": "sneeze three times and whistling one times and burping belching one times"} +{"filepath": "data/multi_event_train/syn_1256.wav", "onoffCaption": "sheep goat bleating at 1.756-3.756, 6.06-8.06", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_1318.wav", "onoffCaption": "dog barking at 2.585-4.585, 5.452-7.452", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_train/syn_1402.wav", "onoffCaption": "car horn honking at 0.149-2.614, 3.555-6.02, 6.806-9.271", "frequencyCaption": "car horn honking three times"} +{"filepath": "data/multi_event_train/syn_1473.wav", "onoffCaption": "spraying at 0.374-1.106, 2.858-3.939 and burping belching at 2.746-5.746, 7.61-10.0", "frequencyCaption": "spraying two times and burping belching two times"} +{"filepath": "data/multi_event_train/syn_1496.wav", "onoffCaption": "burping belching at 1.063-5.063, 6.187-10.0 and train horn at 1.338-3.738", "frequencyCaption": "burping belching two times and train horn one times"} +{"filepath": "data/multi_event_train/syn_1519.wav", "onoffCaption": "explosion at 0.451-3.578, 4.185-7.312 and car horn honking at 4.164-7.383", "frequencyCaption": "explosion two times and car horn honking one times"} +{"filepath": "data/multi_event_train/syn_1568.wav", "onoffCaption": "sneeze at 0.072-4.572 and car horn honking at 2.743-5.208", "frequencyCaption": "sneeze one times and car horn honking one times"} +{"filepath": "data/multi_event_train/syn_1600.wav", "onoffCaption": "tapping clicking clanking at 2.86-6.3, 7.003-9.229 and gunshot at 3.295-5.425", "frequencyCaption": "tapping clicking clanking two times and gunshot one times"} +{"filepath": "data/multi_event_train/syn_1655.wav", "onoffCaption": "woman laughing at 0.265-2.684 and train horn at 0.391-4.459", "frequencyCaption": "woman laughing one times and train horn one times"} +{"filepath": "data/multi_event_train/syn_1889.wav", "onoffCaption": "door knocking at 3.623-6.086, 7.931-10.0", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_1906.wav", "onoffCaption": "door slamming at 2.942-3.747, 5.972-6.777", "frequencyCaption": "door slamming two times"} +{"filepath": "data/multi_event_train/syn_1977.wav", "onoffCaption": "gunshot at 3.187-5.187 and thump thud at 7.862-10.0", "frequencyCaption": "gunshot one times and thump thud one times"} +{"filepath": "data/multi_event_train/syn_1992.wav", "onoffCaption": "cat meowing at 0.094-3.335", "frequencyCaption": "cat meowing one times"} +{"filepath": "data/multi_event_train/syn_3036.wav", "onoffCaption": "cat meowing at 2.02-3.38, 4.785-5.812 and woman laughing at 2.276-5.557, 7.167-10.0 and dog barking at 3.171-5.171, 6.364-8.364", "frequencyCaption": "cat meowing two times and woman laughing two times and dog barking two times"} +{"filepath": "data/multi_event_train/syn_3047.wav", "onoffCaption": "car horn honking at 0.568-3.481, 4.098-6.975", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_3234.wav", "onoffCaption": "cow mooing at 0.597-5.026, 6.061-8.703", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_3261.wav", "onoffCaption": "explosion at 3.738-7.578", "frequencyCaption": "explosion one times"} +{"filepath": "data/multi_event_train/syn_3284.wav", "onoffCaption": "thump thud at 0.415-4.865, 6.186-8.495", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_3411.wav", "onoffCaption": "door knocking at 0.393-2.642 and whistling at 5.46-9.944", "frequencyCaption": "door knocking one times and whistling one times"} +{"filepath": "data/multi_event_train/syn_3460.wav", "onoffCaption": "explosion at 4.039-6.792", "frequencyCaption": "explosion one times"} +{"filepath": "data/multi_event_train/syn_3485.wav", "onoffCaption": "whistling at 1.733-6.217 and cow mooing at 2.774-6.072, 7.142-10.0", "frequencyCaption": "whistling one times and cow mooing two times"} +{"filepath": "data/multi_event_train/syn_3637.wav", "onoffCaption": "sheep goat bleating at 0.468-2.468, 3.357-5.579, 6.358-8.358", "frequencyCaption": "sheep goat bleating three times"} +{"filepath": "data/multi_event_train/syn_3662.wav", "onoffCaption": "spraying at 0.628-2.114, 3.145-5.537, 6.065-8.117", "frequencyCaption": "spraying three times"} +{"filepath": "data/multi_event_train/syn_3687.wav", "onoffCaption": "cat meowing at 3.218-5.108, 5.993-6.993", "frequencyCaption": "cat meowing two times"} +{"filepath": "data/multi_event_train/syn_3779.wav", "onoffCaption": "cow mooing at 0.863-3.832", "frequencyCaption": "cow mooing one times"} +{"filepath": "data/multi_event_train/syn_3915.wav", "onoffCaption": "spraying at 0.661-1.918, 2.505-4.633", "frequencyCaption": "spraying two times"} +{"filepath": "data/multi_event_train/syn_3981.wav", "onoffCaption": "dog barking at 1.467-3.467, 5.657-7.657", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_train/syn_15.wav", "onoffCaption": "duck quacking at 0.518-2.518, 3.085-5.085, 6.404-8.404 and cow mooing at 2.413-6.842, 7.53-10.0 and car horn honking at 3.791-7.632", "frequencyCaption": "duck quacking three times and cow mooing two times and car horn honking one times"} +{"filepath": "data/multi_event_train/syn_132.wav", "onoffCaption": "whistling at 2.824-5.053, 5.615-7.844", "frequencyCaption": "whistling two times"} +{"filepath": "data/multi_event_train/syn_167.wav", "onoffCaption": "explosion at 2.758-5.511, 7.256-10.0", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_182.wav", "onoffCaption": "door slamming at 2.185-3.664, 5.817-7.12 and car horn honking at 2.817-7.724", "frequencyCaption": "door slamming two times and car horn honking one times"} +{"filepath": "data/multi_event_train/syn_314.wav", "onoffCaption": "tapping clicking clanking at 0.093-3.533, 4.054-7.494", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_341.wav", "onoffCaption": "car horn honking at 0.634-5.146, 6.739-10.0", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_531.wav", "onoffCaption": "explosion at 0.061-5.061, 7.036-9.994", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_564.wav", "onoffCaption": "cow mooing at 3.722-6.704", "frequencyCaption": "cow mooing one times"} +{"filepath": "data/multi_event_train/syn_643.wav", "onoffCaption": "spraying at 2.746-3.83, 5.719-6.803", "frequencyCaption": "spraying two times"} +{"filepath": "data/multi_event_train/syn_659.wav", "onoffCaption": "door slamming at 1.024-1.829, 2.415-3.22, 5.384-6.189", "frequencyCaption": "door slamming three times"} +{"filepath": "data/multi_event_train/syn_717.wav", "onoffCaption": "sheep goat bleating at 1.082-3.082, 3.853-5.853 and thump thud at 7.758-10.0", "frequencyCaption": "sheep goat bleating two times and thump thud one times"} +{"filepath": "data/multi_event_train/syn_742.wav", "onoffCaption": "burping belching at 3.38-6.639", "frequencyCaption": "burping belching one times"} +{"filepath": "data/multi_event_train/syn_758.wav", "onoffCaption": "train horn at 1.569-6.646, 7.351-10.0", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_835.wav", "onoffCaption": "sheep goat bleating at 1.813-3.813, 5.405-7.405 and burping belching at 2.387-5.931", "frequencyCaption": "sheep goat bleating two times and burping belching one times"} +{"filepath": "data/multi_event_train/syn_1001.wav", "onoffCaption": "sneeze at 0.148-4.204, 5.432-9.488", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_1227.wav", "onoffCaption": "tapping clicking clanking at 0.311-3.751", "frequencyCaption": "tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_1268.wav", "onoffCaption": "spraying at 0.814-1.383 and sheep goat bleating at 0.996-2.996 and train horn at 6.867-10.0", "frequencyCaption": "spraying one times and sheep goat bleating one times and train horn one times"} +{"filepath": "data/multi_event_train/syn_1272.wav", "onoffCaption": "door slamming at 1.698-4.659, 6.438-8.801 and cat meowing at 2.838-3.85", "frequencyCaption": "door slamming two times and cat meowing one times"} +{"filepath": "data/multi_event_train/syn_1297.wav", "onoffCaption": "door slamming at 1.376-3.376 and burping belching at 6.395-8.625", "frequencyCaption": "door slamming one times and burping belching one times"} +{"filepath": "data/multi_event_train/syn_1369.wav", "onoffCaption": "woman laughing at 0.167-3.239, 4.685-7.757", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_1373.wav", "onoffCaption": "explosion at 0.042-2.91 and sneeze at 1.567-3.68, 5.675-7.829 and dog barking at 6.99-8.99", "frequencyCaption": "explosion one times and sneeze two times and dog barking one times"} +{"filepath": "data/multi_event_train/syn_1396.wav", "onoffCaption": "sheep goat bleating at 1.953-6.833, 7.799-9.799", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_1457.wav", "onoffCaption": "spraying at 2.206-2.728 and sheep goat bleating at 2.967-4.967", "frequencyCaption": "spraying one times and sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_1624.wav", "onoffCaption": "train horn at 0.453-2.893", "frequencyCaption": "train horn one times"} +{"filepath": "data/multi_event_train/syn_1671.wav", "onoffCaption": "sneeze at 0.545-1.648", "frequencyCaption": "sneeze one times"} +{"filepath": "data/multi_event_train/syn_1694.wav", "onoffCaption": "sneeze at 0.873-3.983", "frequencyCaption": "sneeze one times"} +{"filepath": "data/multi_event_train/syn_1725.wav", "onoffCaption": "train horn at 0.363-4.133 and whistling at 0.652-6.152, 6.997-10.0", "frequencyCaption": "train horn one times and whistling two times"} +{"filepath": "data/multi_event_train/syn_1848.wav", "onoffCaption": "sneeze at 0.755-2.049, 2.763-4.057, 4.699-5.993 and cow mooing at 0.817-3.786", "frequencyCaption": "sneeze three times and cow mooing one times"} +{"filepath": "data/multi_event_train/syn_1953.wav", "onoffCaption": "whistling at 0.392-9.463 and car horn honking at 2.538-4.538", "frequencyCaption": "whistling one times and car horn honking one times"} +{"filepath": "data/multi_event_train/syn_3063.wav", "onoffCaption": "tapping clicking clanking at 1.047-4.487 and sheep goat bleating at 2.67-4.67", "frequencyCaption": "tapping clicking clanking one times and sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_3086.wav", "onoffCaption": "train horn at 1.792-4.259, 5.145-7.569", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_3178.wav", "onoffCaption": "burping belching at 0.665-4.209, 6.437-9.981", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_3210.wav", "onoffCaption": "tapping clicking clanking at 2.508-5.948, 7.415-9.678", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_3245.wav", "onoffCaption": "tapping clicking clanking at 1.293-4.733, 5.996-9.436 and train horn at 3.296-5.951, 6.594-9.249", "frequencyCaption": "tapping clicking clanking two times and train horn two times"} +{"filepath": "data/multi_event_train/syn_3311.wav", "onoffCaption": "sneeze at 1.807-3.193, 4.448-5.834, 6.424-7.81", "frequencyCaption": "sneeze three times"} +{"filepath": "data/multi_event_train/syn_3435.wav", "onoffCaption": "thump thud at 2.366-4.828, 6.309-8.771", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_3613.wav", "onoffCaption": "tapping clicking clanking at 0.319-3.759 and dog barking at 5.607-7.607", "frequencyCaption": "tapping clicking clanking one times and dog barking one times"} +{"filepath": "data/multi_event_train/syn_3646.wav", "onoffCaption": "tapping clicking clanking at 1.605-5.045, 6.511-8.868", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_3708.wav", "onoffCaption": "dog barking at 0.956-2.956 and sneeze at 1.521-3.466", "frequencyCaption": "dog barking one times and sneeze one times"} +{"filepath": "data/multi_event_train/syn_3747.wav", "onoffCaption": "door slamming at 3.944-5.083, 5.988-7.127", "frequencyCaption": "door slamming two times"} +{"filepath": "data/multi_event_train/syn_3931.wav", "onoffCaption": "explosion at 0.693-3.422, 4.387-7.116", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_3964.wav", "onoffCaption": "sheep goat bleating at 2.099-4.099, 6.494-8.494", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_128.wav", "onoffCaption": "train horn at 0.374-5.276, 6.495-9.026", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_198.wav", "onoffCaption": "gunshot at 0.784-2.784, 3.801-5.801, 6.312-8.312", "frequencyCaption": "gunshot three times"} +{"filepath": "data/multi_event_train/syn_215.wav", "onoffCaption": "whistling at 0.949-8.699", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_240.wav", "onoffCaption": "door slamming at 1.471-2.371, 3.25-4.15, 5.324-6.224", "frequencyCaption": "door slamming three times"} +{"filepath": "data/multi_event_train/syn_430.wav", "onoffCaption": "spraying at 1.877-2.461, 3.411-3.995, 4.65-5.234", "frequencyCaption": "spraying three times"} +{"filepath": "data/multi_event_train/syn_465.wav", "onoffCaption": "dog barking at 0.878-6.515, 7.656-10.0 and tapping clicking clanking at 3.261-6.701", "frequencyCaption": "dog barking two times and tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_616.wav", "onoffCaption": "sheep goat bleating at 2.186-4.186, 4.941-6.941", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_934.wav", "onoffCaption": "spraying at 1.534-2.709, 5.115-5.69, 7.889-10.0", "frequencyCaption": "spraying three times"} +{"filepath": "data/multi_event_train/syn_984.wav", "onoffCaption": "door knocking at 3.227-7.737", "frequencyCaption": "door knocking one times"} +{"filepath": "data/multi_event_train/syn_1100.wav", "onoffCaption": "sneeze at 2.114-5.189, 6.823-9.898", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_1155.wav", "onoffCaption": "explosion at 0.63-3.383, 4.501-7.194", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_1326.wav", "onoffCaption": "tapping clicking clanking at 2.131-5.571, 7.638-10.0 and explosion at 2.898-7.898", "frequencyCaption": "tapping clicking clanking two times and explosion one times"} +{"filepath": "data/multi_event_train/syn_1418.wav", "onoffCaption": "woman laughing at 1.728-3.928, 4.951-7.688", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_1503.wav", "onoffCaption": "train horn at 1.911-4.711, 5.396-7.503", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_1556.wav", "onoffCaption": "explosion at 1.122-3.875 and dog barking at 7.283-9.283", "frequencyCaption": "explosion one times and dog barking one times"} +{"filepath": "data/multi_event_train/syn_1770.wav", "onoffCaption": "sneeze at 1.912-4.987, 5.626-7.238", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_1795.wav", "onoffCaption": "thump thud at 0.158-3.205, 5.267-7.767", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_1852.wav", "onoffCaption": "woman laughing at 0.084-2.2, 2.942-5.058, 5.698-7.814 and duck quacking at 6.149-8.149", "frequencyCaption": "woman laughing three times and duck quacking one times"} +{"filepath": "data/multi_event_train/syn_1949.wav", "onoffCaption": "cat meowing at 2.368-5.272, 6.225-9.129 and sheep goat bleating at 7.594-9.594", "frequencyCaption": "cat meowing two times and sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_3079.wav", "onoffCaption": "burping belching at 2.466-4.696", "frequencyCaption": "burping belching one times"} +{"filepath": "data/multi_event_train/syn_3137.wav", "onoffCaption": "sneeze at 2.975-4.069", "frequencyCaption": "sneeze one times"} +{"filepath": "data/multi_event_train/syn_3162.wav", "onoffCaption": "duck quacking at 2.2-4.2 and train horn at 2.651-5.811", "frequencyCaption": "duck quacking one times and train horn one times"} +{"filepath": "data/multi_event_train/syn_3187.wav", "onoffCaption": "cow mooing at 0.219-4.648, 6.771-9.484", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_3344.wav", "onoffCaption": "dog barking at 0.909-2.909", "frequencyCaption": "dog barking one times"} +{"filepath": "data/multi_event_train/syn_3534.wav", "onoffCaption": "cat meowing at 3.579-6.609", "frequencyCaption": "cat meowing one times"} +{"filepath": "data/multi_event_train/syn_3584.wav", "onoffCaption": "explosion at 2.028-7.028, 7.591-10.0", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_3609.wav", "onoffCaption": "cat meowing at 0.249-1.793, 3.33-4.874, 6.573-8.117 and thump thud at 2.851-5.079", "frequencyCaption": "cat meowing three times and thump thud one times"} +{"filepath": "data/multi_event_train/syn_3712.wav", "onoffCaption": "tapping clicking clanking at 0.164-3.604 and cat meowing at 1.057-2.332 and explosion at 7.493-10.0", "frequencyCaption": "tapping clicking clanking one times and cat meowing one times and explosion one times"} +{"filepath": "data/multi_event_train/syn_3830.wav", "onoffCaption": "sneeze at 2.907-4.435, 6.484-8.887", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_3865.wav", "onoffCaption": "door knocking at 0.025-2.337 and dog barking at 0.545-2.545 and woman laughing at 6.061-8.166", "frequencyCaption": "door knocking one times and dog barking one times and woman laughing one times"} +{"filepath": "data/multi_event_train/syn_212.wav", "onoffCaption": "woman laughing at 3.453-5.736, 6.978-9.178", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_309.wav", "onoffCaption": "sheep goat bleating at 0.718-2.718, 4.398-6.398, 7.843-9.843", "frequencyCaption": "sheep goat bleating three times"} +{"filepath": "data/multi_event_train/syn_437.wav", "onoffCaption": "car horn honking at 2.062-6.311, 6.944-9.159", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_462.wav", "onoffCaption": "duck quacking at 0.153-2.153, 2.773-4.773, 5.307-7.307", "frequencyCaption": "duck quacking three times"} +{"filepath": "data/multi_event_train/syn_480.wav", "onoffCaption": "explosion at 1.567-3.631, 4.557-6.621, 7.919-9.983", "frequencyCaption": "explosion three times"} +{"filepath": "data/multi_event_train/syn_579.wav", "onoffCaption": "cat meowing at 0.489-1.849, 2.754-3.763, 5.08-6.83 and car horn honking at 4.296-6.809 and spraying at 7.112-7.963", "frequencyCaption": "cat meowing three times and car horn honking one times and spraying one times"} +{"filepath": "data/multi_event_train/syn_644.wav", "onoffCaption": "duck quacking at 0.586-2.586", "frequencyCaption": "duck quacking one times"} +{"filepath": "data/multi_event_train/syn_828.wav", "onoffCaption": "spraying at 2.197-4.781", "frequencyCaption": "spraying one times"} +{"filepath": "data/multi_event_train/syn_933.wav", "onoffCaption": "spraying at 0.268-0.843, 1.489-2.064, 3.367-3.942 and duck quacking at 1.62-3.62", "frequencyCaption": "spraying three times and duck quacking one times"} +{"filepath": "data/multi_event_train/syn_961.wav", "onoffCaption": "cat meowing at 2.641-3.653, 4.491-5.502", "frequencyCaption": "cat meowing two times"} +{"filepath": "data/multi_event_train/syn_983.wav", "onoffCaption": "dog barking at 2.408-4.408, 5.296-7.296", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_train/syn_1107.wav", "onoffCaption": "dog barking at 0.471-2.471, 4.869-6.869, 7.846-9.846", "frequencyCaption": "dog barking three times"} +{"filepath": "data/multi_event_train/syn_1374.wav", "onoffCaption": "car horn honking at 2.04-5.215, 6.66-9.835", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_1391.wav", "onoffCaption": "cat meowing at 0.847-2.579", "frequencyCaption": "cat meowing one times"} +{"filepath": "data/multi_event_train/syn_1551.wav", "onoffCaption": "cat meowing at 2.852-5.04, 7.233-8.508", "frequencyCaption": "cat meowing two times"} +{"filepath": "data/multi_event_train/syn_1639.wav", "onoffCaption": "whistling at 1.187-10.0 and thump thud at 3.478-7.928", "frequencyCaption": "whistling one times and thump thud one times"} +{"filepath": "data/multi_event_train/syn_1722.wav", "onoffCaption": "woman laughing at 0.231-7.676 and duck quacking at 3.115-5.115, 6.127-8.127", "frequencyCaption": "woman laughing one times and duck quacking two times"} +{"filepath": "data/multi_event_train/syn_1807.wav", "onoffCaption": "burping belching at 0.067-2.888, 4.14-6.961", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_1855.wav", "onoffCaption": "thump thud at 3.516-5.744, 6.49-8.718", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_3165.wav", "onoffCaption": "thump thud at 0.594-5.044 and whistling at 7.097-10.0", "frequencyCaption": "thump thud one times and whistling one times"} +{"filepath": "data/multi_event_train/syn_3180.wav", "onoffCaption": "woman laughing at 3.115-5.72", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/multi_event_train/syn_3316.wav", "onoffCaption": "sneeze at 0.781-2.488, 3.315-4.489 and thump thud at 7.487-10.0", "frequencyCaption": "sneeze two times and thump thud one times"} +{"filepath": "data/multi_event_train/syn_3428.wav", "onoffCaption": "spraying at 3.09-6.109, 7.503-9.198", "frequencyCaption": "spraying two times"} +{"filepath": "data/multi_event_train/syn_3533.wav", "onoffCaption": "spraying at 2.095-3.176, 3.935-5.668 and door knocking at 3.221-5.951, 7.684-10.0", "frequencyCaption": "spraying two times and door knocking two times"} +{"filepath": "data/multi_event_train/syn_3561.wav", "onoffCaption": "tapping clicking clanking at 2.666-6.106, 7.024-10.0", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_3583.wav", "onoffCaption": "cow mooing at 1.846-6.275, 7.561-9.783", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_3740.wav", "onoffCaption": "duck quacking at 0.909-2.909, 3.443-5.443, 6.475-8.475 and spraying at 3.475-4.079, 4.593-5.197, 6.294-6.898 and sneeze at 4.982-7.443", "frequencyCaption": "duck quacking three times and spraying three times and sneeze one times"} +{"filepath": "data/multi_event_train/syn_3837.wav", "onoffCaption": "explosion at 0.018-2.02, 2.817-4.905, 6.975-9.847", "frequencyCaption": "explosion three times"} +{"filepath": "data/multi_event_train/syn_3862.wav", "onoffCaption": "duck quacking at 0.105-2.105, 2.639-4.639, 5.211-7.211 and sheep goat bleating at 0.824-2.824, 3.832-5.832, 7.049-9.049 and train horn at 1.048-7.108", "frequencyCaption": "duck quacking three times and sheep goat bleating three times and train horn one times"} +{"filepath": "data/multi_event_train/syn_3880.wav", "onoffCaption": "explosion at 2.536-5.592, 6.352-9.352", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_3979.wav", "onoffCaption": "thump thud at 1.813-4.584 and sheep goat bleating at 2.77-4.77 and cat meowing at 7.863-9.407", "frequencyCaption": "thump thud one times and sheep goat bleating one times and cat meowing one times"} +{"filepath": "data/multi_event_train/syn_247.wav", "onoffCaption": "car horn honking at 1.058-5.458 and spraying at 8.191-8.775", "frequencyCaption": "car horn honking one times and spraying one times"} +{"filepath": "data/multi_event_train/syn_346.wav", "onoffCaption": "tapping clicking clanking at 3.424-6.864 and gunshot at 4.765-6.765 and sneeze at 4.781-7.094", "frequencyCaption": "tapping clicking clanking one times and gunshot one times and sneeze one times"} +{"filepath": "data/multi_event_train/syn_487.wav", "onoffCaption": "gunshot at 2.351-4.521 and door slamming at 6.019-7.252", "frequencyCaption": "gunshot one times and door slamming one times"} +{"filepath": "data/multi_event_train/syn_611.wav", "onoffCaption": "train horn at 0.168-10.0 and dog barking at 0.823-2.823, 5.236-7.236", "frequencyCaption": "train horn one times and dog barking two times"} +{"filepath": "data/multi_event_train/syn_710.wav", "onoffCaption": "burping belching at 2.117-5.661, 6.597-10.0", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_898.wav", "onoffCaption": "duck quacking at 3.594-5.594, 7.372-9.372", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_966.wav", "onoffCaption": "train horn at 3.481-6.801", "frequencyCaption": "train horn one times"} +{"filepath": "data/multi_event_train/syn_1049.wav", "onoffCaption": "woman laughing at 2.805-5.03", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/multi_event_train/syn_1152.wav", "onoffCaption": "dog barking at 3.6-6.521", "frequencyCaption": "dog barking one times"} +{"filepath": "data/multi_event_train/syn_1220.wav", "onoffCaption": "gunshot at 0.582-2.582, 3.6-5.6, 7.272-9.778 and cat meowing at 3.225-4.372", "frequencyCaption": "gunshot three times and cat meowing one times"} +{"filepath": "data/multi_event_train/syn_1321.wav", "onoffCaption": "cow mooing at 0.732-3.701 and gunshot at 2.699-4.699, 5.95-7.95", "frequencyCaption": "cow mooing one times and gunshot two times"} +{"filepath": "data/multi_event_train/syn_1504.wav", "onoffCaption": "gunshot at 2.245-4.245, 4.846-6.846", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_1676.wav", "onoffCaption": "thump thud at 0.322-4.772, 6.46-9.046 and gunshot at 1.752-3.752", "frequencyCaption": "thump thud two times and gunshot one times"} +{"filepath": "data/multi_event_train/syn_1689.wav", "onoffCaption": "tapping clicking clanking at 0.371-3.811, 5.342-8.311", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_1693.wav", "onoffCaption": "sneeze at 2.165-3.872, 4.533-6.24 and door knocking at 2.828-5.068", "frequencyCaption": "sneeze two times and door knocking one times"} +{"filepath": "data/multi_event_train/syn_1777.wav", "onoffCaption": "whistling at 3.115-8.29", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_1788.wav", "onoffCaption": "door knocking at 0.823-3.932 and train horn at 6.027-9.797", "frequencyCaption": "door knocking one times and train horn one times"} +{"filepath": "data/multi_event_train/syn_1792.wav", "onoffCaption": "train horn at 1.612-6.514, 7.852-10.0", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_1800.wav", "onoffCaption": "duck quacking at 0.833-2.833 and dog barking at 4.411-6.411, 7.889-9.889", "frequencyCaption": "duck quacking one times and dog barking two times"} +{"filepath": "data/multi_event_train/syn_3130.wav", "onoffCaption": "gunshot at 3.509-6.015", "frequencyCaption": "gunshot one times"} +{"filepath": "data/multi_event_train/syn_3242.wav", "onoffCaption": "whistling at 0.169-3.144, 4.486-7.391", "frequencyCaption": "whistling two times"} +{"filepath": "data/multi_event_train/syn_3258.wav", "onoffCaption": "gunshot at 1.913-3.913 and tapping clicking clanking at 7.042-10.0", "frequencyCaption": "gunshot one times and tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_3343.wav", "onoffCaption": "woman laughing at 0.244-2.527", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/multi_event_train/syn_3359.wav", "onoffCaption": "door slamming at 0.337-2.465", "frequencyCaption": "door slamming one times"} +{"filepath": "data/multi_event_train/syn_3498.wav", "onoffCaption": "door slamming at 0.132-2.59, 4.435-7.409 and tapping clicking clanking at 0.201-3.641, 5.333-8.773", "frequencyCaption": "door slamming two times and tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_3566.wav", "onoffCaption": "gunshot at 0.056-2.056, 3.398-5.398 and spraying at 2.521-3.029, 4.355-4.855, 5.358-7.942", "frequencyCaption": "gunshot two times and spraying three times"} +{"filepath": "data/multi_event_train/syn_3614.wav", "onoffCaption": "spraying at 3.09-3.871, 4.998-5.779", "frequencyCaption": "spraying two times"} +{"filepath": "data/multi_event_train/syn_3715.wav", "onoffCaption": "whistling at 2.984-10.0", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_3887.wav", "onoffCaption": "dog barking at 1.311-3.311, 4.547-7.468", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_train/syn_47.wav", "onoffCaption": "sneeze at 2.538-3.702, 5.546-6.8 and duck quacking at 3.634-5.634, 7.978-9.978", "frequencyCaption": "sneeze two times and duck quacking two times"} +{"filepath": "data/multi_event_train/syn_135.wav", "onoffCaption": "sneeze at 0.624-1.716, 2.751-5.365, 5.998-8.974", "frequencyCaption": "sneeze three times"} +{"filepath": "data/multi_event_train/syn_208.wav", "onoffCaption": "car horn honking at 0.093-4.493, 6.927-10.0", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_313.wav", "onoffCaption": "car horn honking at 0.15-2.15 and door knocking at 4.327-6.576, 7.544-10.0", "frequencyCaption": "car horn honking one times and door knocking two times"} +{"filepath": "data/multi_event_train/syn_478.wav", "onoffCaption": "burping belching at 0.132-2.362, 3.987-6.313 and woman laughing at 3.778-7.166", "frequencyCaption": "burping belching two times and woman laughing one times"} +{"filepath": "data/multi_event_train/syn_563.wav", "onoffCaption": "sneeze at 0.216-1.51, 3.715-4.809", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_586.wav", "onoffCaption": "cat meowing at 0.42-2.17 and door knocking at 2.95-6.712 and door slamming at 6.165-7.145", "frequencyCaption": "cat meowing one times and door knocking one times and door slamming one times"} +{"filepath": "data/multi_event_train/syn_745.wav", "onoffCaption": "whistling at 1.282-9.032 and sneeze at 3.16-5.156, 7.293-9.289", "frequencyCaption": "whistling one times and sneeze two times"} +{"filepath": "data/multi_event_train/syn_867.wav", "onoffCaption": "tapping clicking clanking at 0.487-3.927, 6.34-9.002", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_882.wav", "onoffCaption": "gunshot at 0.137-2.137, 4.515-6.515", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_999.wav", "onoffCaption": "thump thud at 0.977-4.024, 5.736-8.075", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_1053.wav", "onoffCaption": "sheep goat bleating at 2.992-4.992, 5.618-7.618", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_1148.wav", "onoffCaption": "whistling at 1.666-7.607 and cat meowing at 7.548-9.084", "frequencyCaption": "whistling one times and cat meowing one times"} +{"filepath": "data/multi_event_train/syn_1275.wav", "onoffCaption": "duck quacking at 1.585-3.585, 4.114-6.114", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_1290.wav", "onoffCaption": "whistling at 1.248-9.259 and door knocking at 1.729-4.576, 5.851-8.698", "frequencyCaption": "whistling one times and door knocking two times"} +{"filepath": "data/multi_event_train/syn_1405.wav", "onoffCaption": "explosion at 0.883-5.883 and spraying at 6.432-6.94, 8.695-9.203", "frequencyCaption": "explosion one times and spraying two times"} +{"filepath": "data/multi_event_train/syn_1623.wav", "onoffCaption": "door knocking at 1.398-7.458", "frequencyCaption": "door knocking one times"} +{"filepath": "data/multi_event_train/syn_1738.wav", "onoffCaption": "sneeze at 2.589-3.692, 4.688-6.684, 7.629-9.174", "frequencyCaption": "sneeze three times"} +{"filepath": "data/multi_event_train/syn_1901.wav", "onoffCaption": "tapping clicking clanking at 1.129-4.569", "frequencyCaption": "tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_3031.wav", "onoffCaption": "whistling at 3.449-8.949", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_3217.wav", "onoffCaption": "door slamming at 1.776-3.167, 5.234-6.134", "frequencyCaption": "door slamming two times"} +{"filepath": "data/multi_event_train/syn_3467.wav", "onoffCaption": "cat meowing at 3.211-4.755, 5.561-7.538", "frequencyCaption": "cat meowing two times"} +{"filepath": "data/multi_event_train/syn_3482.wav", "onoffCaption": "sheep goat bleating at 3.012-5.012, 6.095-8.095", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_3599.wav", "onoffCaption": "burping belching at 0.315-3.605, 4.439-6.669 and dog barking at 4.742-6.742 and train horn at 5.884-8.364", "frequencyCaption": "burping belching two times and dog barking one times and train horn one times"} +{"filepath": "data/multi_event_train/syn_3641.wav", "onoffCaption": "car horn honking at 0.367-3.542, 4.043-7.218 and cow mooing at 0.835-5.815, 7.094-9.789", "frequencyCaption": "car horn honking two times and cow mooing two times"} +{"filepath": "data/multi_event_train/syn_3878.wav", "onoffCaption": "cow mooing at 2.415-7.395 and car horn honking at 2.709-7.031", "frequencyCaption": "cow mooing one times and car horn honking one times"} +{"filepath": "data/multi_event_train/syn_3963.wav", "onoffCaption": "tapping clicking clanking at 0.761-4.201, 6.468-9.015", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_3986.wav", "onoffCaption": "tapping clicking clanking at 1.468-4.908, 6.936-9.688", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_12.wav", "onoffCaption": "car horn honking at 0.54-5.447 and duck quacking at 3.391-5.391, 7.677-9.677", "frequencyCaption": "car horn honking one times and duck quacking two times"} +{"filepath": "data/multi_event_train/syn_63.wav", "onoffCaption": "woman laughing at 1.87-4.436, 6.27-8.553", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_86.wav", "onoffCaption": "train horn at 3.875-7.875", "frequencyCaption": "train horn one times"} +{"filepath": "data/multi_event_train/syn_111.wav", "onoffCaption": "car horn honking at 0.756-2.756, 4.181-6.708", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_160.wav", "onoffCaption": "cow mooing at 0.675-5.655", "frequencyCaption": "cow mooing one times"} +{"filepath": "data/multi_event_train/syn_185.wav", "onoffCaption": "cow mooing at 1.172-4.182 and door knocking at 2.643-6.915", "frequencyCaption": "cow mooing one times and door knocking one times"} +{"filepath": "data/multi_event_train/syn_279.wav", "onoffCaption": "cat meowing at 2.652-4.84, 5.951-8.087", "frequencyCaption": "cat meowing two times"} +{"filepath": "data/multi_event_train/syn_337.wav", "onoffCaption": "spraying at 3.394-3.963", "frequencyCaption": "spraying one times"} +{"filepath": "data/multi_event_train/syn_362.wav", "onoffCaption": "gunshot at 3.922-5.922", "frequencyCaption": "gunshot one times"} +{"filepath": "data/multi_event_train/syn_387.wav", "onoffCaption": "thump thud at 0.64-2.979, 3.98-6.751", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_536.wav", "onoffCaption": "thump thud at 1.602-6.052 and burping belching at 2.209-4.316, 5.552-7.659", "frequencyCaption": "thump thud one times and burping belching two times"} +{"filepath": "data/multi_event_train/syn_547.wav", "onoffCaption": "tapping clicking clanking at 1.737-5.177, 6.879-10.0", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_734.wav", "onoffCaption": "sneeze at 1.264-3.51, 5.768-8.014", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_832.wav", "onoffCaption": "door knocking at 0.635-2.699", "frequencyCaption": "door knocking one times"} +{"filepath": "data/multi_event_train/syn_843.wav", "onoffCaption": "dog barking at 0.476-2.476, 3.128-5.128 and duck quacking at 7.098-9.098", "frequencyCaption": "dog barking two times and duck quacking one times"} +{"filepath": "data/multi_event_train/syn_929.wav", "onoffCaption": "burping belching at 1.548-7.492 and cat meowing at 2.021-3.065, 4.919-5.963", "frequencyCaption": "burping belching one times and cat meowing two times"} +{"filepath": "data/multi_event_train/syn_958.wav", "onoffCaption": "burping belching at 3.236-6.236", "frequencyCaption": "burping belching one times"} +{"filepath": "data/multi_event_train/syn_1006.wav", "onoffCaption": "tapping clicking clanking at 0.14-3.58, 5.135-8.575", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_1077.wav", "onoffCaption": "tapping clicking clanking at 0.177-3.617, 5.908-7.999 and cat meowing at 0.395-1.581 and whistling at 0.574-8.959", "frequencyCaption": "tapping clicking clanking two times and cat meowing one times and whistling one times"} +{"filepath": "data/multi_event_train/syn_1092.wav", "onoffCaption": "burping belching at 0.135-4.004, 5.916-8.783", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_1189.wav", "onoffCaption": "whistling at 3.358-10.0", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_1204.wav", "onoffCaption": "sheep goat bleating at 0.234-2.234, 2.756-4.756, 5.332-7.332", "frequencyCaption": "sheep goat bleating three times"} +{"filepath": "data/multi_event_train/syn_1421.wav", "onoffCaption": "cat meowing at 0.461-1.563 and burping belching at 0.476-5.476, 7.964-10.0", "frequencyCaption": "cat meowing one times and burping belching two times"} +{"filepath": "data/multi_event_train/syn_1450.wav", "onoffCaption": "gunshot at 0.897-2.897 and cat meowing at 5.661-7.197", "frequencyCaption": "gunshot one times and cat meowing one times"} +{"filepath": "data/multi_event_train/syn_1652.wav", "onoffCaption": "door slamming at 0.328-1.179 and cat meowing at 4.049-5.198, 6.096-7.245 and spraying at 5.556-6.497, 8.78-9.955", "frequencyCaption": "door slamming one times and cat meowing two times and spraying two times"} +{"filepath": "data/multi_event_train/syn_1749.wav", "onoffCaption": "gunshot at 1.998-3.998 and cow mooing at 5.91-8.879", "frequencyCaption": "gunshot one times and cow mooing one times"} +{"filepath": "data/multi_event_train/syn_1925.wav", "onoffCaption": "explosion at 1.732-4.604", "frequencyCaption": "explosion one times"} +{"filepath": "data/multi_event_train/syn_1954.wav", "onoffCaption": "duck quacking at 1.19-3.19, 5.027-7.027", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_3015.wav", "onoffCaption": "explosion at 0.061-5.061 and door knocking at 0.093-3.855", "frequencyCaption": "explosion one times and door knocking one times"} +{"filepath": "data/multi_event_train/syn_3064.wav", "onoffCaption": "door knocking at 0.013-2.365, 3.513-5.865", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_3081.wav", "onoffCaption": "woman laughing at 1.463-4.548, 5.645-7.891", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_3266.wav", "onoffCaption": "burping belching at 0.198-3.4 and gunshot at 7.023-9.524", "frequencyCaption": "burping belching one times and gunshot one times"} +{"filepath": "data/multi_event_train/syn_3283.wav", "onoffCaption": "cow mooing at 3.078-8.058", "frequencyCaption": "cow mooing one times"} +{"filepath": "data/multi_event_train/syn_3398.wav", "onoffCaption": "sheep goat bleating at 1.518-3.518 and sneeze at 6.834-8.16", "frequencyCaption": "sheep goat bleating one times and sneeze one times"} +{"filepath": "data/multi_event_train/syn_3432.wav", "onoffCaption": "sneeze at 2.248-3.482, 5.54-6.774", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_3443.wav", "onoffCaption": "whistling at 0.159-5.334", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_3529.wav", "onoffCaption": "door knocking at 1.623-4.46", "frequencyCaption": "door knocking one times"} +{"filepath": "data/multi_event_train/syn_3558.wav", "onoffCaption": "spraying at 1.0-2.486 and cow mooing at 5.754-8.723", "frequencyCaption": "spraying one times and cow mooing one times"} +{"filepath": "data/multi_event_train/syn_3630.wav", "onoffCaption": "train horn at 0.886-4.286, 6.072-8.872 and tapping clicking clanking at 2.266-5.706", "frequencyCaption": "train horn two times and tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_3936.wav", "onoffCaption": "spraying at 0.586-2.281, 4.305-6.0, 6.736-8.431 and sheep goat bleating at 3.311-7.231, 7.902-9.902", "frequencyCaption": "spraying three times and sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_3947.wav", "onoffCaption": "train horn at 0.112-2.912 and whistling at 1.057-4.032 and cow mooing at 5.466-8.476", "frequencyCaption": "train horn one times and whistling one times and cow mooing one times"} +{"filepath": "data/multi_event_train/syn_36.wav", "onoffCaption": "sheep goat bleating at 2.518-5.518, 6.541-9.541", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_144.wav", "onoffCaption": "cat meowing at 0.305-5.305, 6.539-10.0", "frequencyCaption": "cat meowing two times"} +{"filepath": "data/multi_event_train/syn_409.wav", "onoffCaption": "explosion at 1.848-4.848, 5.553-8.553", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_413.wav", "onoffCaption": "door slamming at 1.337-1.837, 2.978-3.478", "frequencyCaption": "door slamming two times"} +{"filepath": "data/multi_event_train/syn_508.wav", "onoffCaption": "gunshot at 2.17-4.17, 5.36-7.36", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_512.wav", "onoffCaption": "dog barking at 0.622-2.622", "frequencyCaption": "dog barking one times"} +{"filepath": "data/multi_event_train/syn_660.wav", "onoffCaption": "whistling at 2.177-10.0", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_685.wav", "onoffCaption": "cow mooing at 0.06-3.029, 4.064-6.951, 7.727-9.748", "frequencyCaption": "cow mooing three times"} +{"filepath": "data/multi_event_train/syn_761.wav", "onoffCaption": "thump thud at 0.843-5.293", "frequencyCaption": "thump thud one times"} +{"filepath": "data/multi_event_train/syn_784.wav", "onoffCaption": "cow mooing at 0.475-3.444, 5.211-7.742 and tapping clicking clanking at 3.309-6.749", "frequencyCaption": "cow mooing two times and tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_816.wav", "onoffCaption": "explosion at 1.724-3.726, 5.689-7.691", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_917.wav", "onoffCaption": "explosion at 1.982-6.982 and tapping clicking clanking at 3.567-7.007", "frequencyCaption": "explosion one times and tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_1022.wav", "onoffCaption": "whistling at 0.122-8.133 and door slamming at 0.182-1.321, 2.275-2.775, 4.526-6.526", "frequencyCaption": "whistling one times and door slamming three times"} +{"filepath": "data/multi_event_train/syn_1038.wav", "onoffCaption": "train horn at 0.264-2.938, 5.056-7.536 and tapping clicking clanking at 0.704-4.144 and door knocking at 2.107-4.944, 5.54-8.377", "frequencyCaption": "train horn two times and tapping clicking clanking one times and door knocking two times"} +{"filepath": "data/multi_event_train/syn_1123.wav", "onoffCaption": "tapping clicking clanking at 0.144-3.584, 5.286-7.478", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_1139.wav", "onoffCaption": "woman laughing at 0.626-2.82, 5.16-7.354", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_1251.wav", "onoffCaption": "thump thud at 2.421-6.796", "frequencyCaption": "thump thud one times"} +{"filepath": "data/multi_event_train/syn_1350.wav", "onoffCaption": "door knocking at 0.09-2.927, 5.354-7.487 and woman laughing at 1.506-3.7, 4.763-6.851", "frequencyCaption": "door knocking two times and woman laughing two times"} +{"filepath": "data/multi_event_train/syn_1474.wav", "onoffCaption": "gunshot at 2.464-4.464", "frequencyCaption": "gunshot one times"} +{"filepath": "data/multi_event_train/syn_1491.wav", "onoffCaption": "tapping clicking clanking at 0.394-3.834, 5.02-8.46", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_1575.wav", "onoffCaption": "tapping clicking clanking at 0.154-3.594, 4.396-6.695 and burping belching at 0.413-2.507, 3.041-5.148 and door slamming at 0.419-3.38, 4.399-7.36", "frequencyCaption": "tapping clicking clanking two times and burping belching two times and door slamming two times"} +{"filepath": "data/multi_event_train/syn_1590.wav", "onoffCaption": "tapping clicking clanking at 2.991-6.431 and sneeze at 4.173-6.132", "frequencyCaption": "tapping clicking clanking one times and sneeze one times"} +{"filepath": "data/multi_event_train/syn_1607.wav", "onoffCaption": "duck quacking at 2.35-4.35, 5.08-7.08", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_1706.wav", "onoffCaption": "train horn at 2.838-7.019", "frequencyCaption": "train horn one times"} +{"filepath": "data/multi_event_train/syn_1871.wav", "onoffCaption": "woman laughing at 0.217-2.855", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/multi_event_train/syn_1894.wav", "onoffCaption": "gunshot at 0.413-2.919, 4.301-6.807", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_1970.wav", "onoffCaption": "tapping clicking clanking at 0.321-3.761, 5.461-8.901", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_1995.wav", "onoffCaption": "tapping clicking clanking at 0.432-3.872, 4.555-7.09", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_3040.wav", "onoffCaption": "duck quacking at 2.579-4.579, 6.775-8.775", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_3141.wav", "onoffCaption": "cat meowing at 0.69-1.83, 2.623-3.763, 4.769-5.909 and car horn honking at 7.826-10.0", "frequencyCaption": "cat meowing three times and car horn honking one times"} +{"filepath": "data/multi_event_train/syn_3229.wav", "onoffCaption": "woman laughing at 3.984-10.0", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/multi_event_train/syn_3233.wav", "onoffCaption": "thump thud at 2.174-4.674, 6.749-9.249 and door knocking at 2.65-6.266", "frequencyCaption": "thump thud two times and door knocking one times"} +{"filepath": "data/multi_event_train/syn_3328.wav", "onoffCaption": "whistling at 0.727-7.976 and thump thud at 1.013-5.463", "frequencyCaption": "whistling one times and thump thud one times"} +{"filepath": "data/multi_event_train/syn_3332.wav", "onoffCaption": "whistling at 0.582-7.158", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_3416.wav", "onoffCaption": "cow mooing at 0.067-3.365, 3.932-6.914 and train horn at 2.227-6.667", "frequencyCaption": "cow mooing two times and train horn one times"} +{"filepath": "data/multi_event_train/syn_3517.wav", "onoffCaption": "gunshot at 1.138-3.138", "frequencyCaption": "gunshot one times"} +{"filepath": "data/multi_event_train/syn_3665.wav", "onoffCaption": "car horn honking at 0.511-4.833 and whistling at 3.066-5.295, 6.173-8.456", "frequencyCaption": "car horn honking one times and whistling two times"} +{"filepath": "data/multi_event_train/syn_3680.wav", "onoffCaption": "sneeze at 0.858-2.386", "frequencyCaption": "sneeze one times"} +{"filepath": "data/multi_event_train/syn_3764.wav", "onoffCaption": "tapping clicking clanking at 1.057-4.497, 6.107-8.928 and whistling at 1.373-3.382, 4.394-7.369", "frequencyCaption": "tapping clicking clanking two times and whistling two times"} +{"filepath": "data/multi_event_train/syn_3781.wav", "onoffCaption": "car horn honking at 2.047-5.634 and door slamming at 8.135-8.94", "frequencyCaption": "car horn honking one times and door slamming one times"} +{"filepath": "data/multi_event_train/syn_3809.wav", "onoffCaption": "thump thud at 3.075-5.846, 6.39-9.161", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_3813.wav", "onoffCaption": "train horn at 1.522-10.0", "frequencyCaption": "train horn one times"} +{"filepath": "data/multi_event_train/syn_3908.wav", "onoffCaption": "spraying at 0.562-2.295 and gunshot at 5.254-7.384", "frequencyCaption": "spraying one times and gunshot one times"} +{"filepath": "data/multi_event_train/syn_3912.wav", "onoffCaption": "gunshot at 2.345-4.345, 5.839-7.839", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_79.wav", "onoffCaption": "door knocking at 1.555-4.675, 6.777-9.897", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_189.wav", "onoffCaption": "dog barking at 2.828-4.828, 6.53-8.53", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_train/syn_204.wav", "onoffCaption": "car horn honking at 2.9-5.365, 7.793-10.0", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_236.wav", "onoffCaption": "car horn honking at 2.477-4.977, 5.601-8.002", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_263.wav", "onoffCaption": "cow mooing at 0.559-3.857, 5.176-8.474", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_286.wav", "onoffCaption": "spraying at 3.75-6.186, 7.898-10.0", "frequencyCaption": "spraying two times"} +{"filepath": "data/multi_event_train/syn_378.wav", "onoffCaption": "woman laughing at 0.099-2.894, 4.837-7.297 and explosion at 0.157-3.157", "frequencyCaption": "woman laughing two times and explosion one times"} +{"filepath": "data/multi_event_train/syn_421.wav", "onoffCaption": "tapping clicking clanking at 2.375-5.815 and sheep goat bleating at 4.545-6.545", "frequencyCaption": "tapping clicking clanking one times and sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_446.wav", "onoffCaption": "cat meowing at 0.124-1.337, 2.502-3.715, 4.842-6.055", "frequencyCaption": "cat meowing three times"} +{"filepath": "data/multi_event_train/syn_635.wav", "onoffCaption": "sheep goat bleating at 0.123-2.123, 2.76-4.76, 5.892-7.892", "frequencyCaption": "sheep goat bleating three times"} +{"filepath": "data/multi_event_train/syn_652.wav", "onoffCaption": "cow mooing at 3.561-6.571, 7.654-10.0 and duck quacking at 4.115-6.115", "frequencyCaption": "cow mooing two times and duck quacking one times"} +{"filepath": "data/multi_event_train/syn_859.wav", "onoffCaption": "duck quacking at 0.31-2.31, 4.52-6.52 and thump thud at 4.117-8.035", "frequencyCaption": "duck quacking two times and thump thud one times"} +{"filepath": "data/multi_event_train/syn_925.wav", "onoffCaption": "whistling at 1.132-8.882", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_942.wav", "onoffCaption": "door slamming at 2.154-3.054", "frequencyCaption": "door slamming one times"} +{"filepath": "data/multi_event_train/syn_1088.wav", "onoffCaption": "cow mooing at 0.067-3.365 and explosion at 6.581-10.0", "frequencyCaption": "cow mooing one times and explosion one times"} +{"filepath": "data/multi_event_train/syn_1111.wav", "onoffCaption": "whistling at 2.179-9.929", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_1176.wav", "onoffCaption": "spraying at 0.051-0.801, 2.592-3.342, 5.836-6.586 and dog barking at 1.423-3.423", "frequencyCaption": "spraying three times and dog barking one times"} +{"filepath": "data/multi_event_train/syn_1193.wav", "onoffCaption": "cow mooing at 0.157-3.455, 4.239-7.537 and door slamming at 1.499-2.479, 3.672-4.652", "frequencyCaption": "cow mooing two times and door slamming two times"} +{"filepath": "data/multi_event_train/syn_1279.wav", "onoffCaption": "tapping clicking clanking at 0.933-4.373, 4.929-7.481", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_1305.wav", "onoffCaption": "woman laughing at 3.907-6.19 and duck quacking at 7.724-9.724", "frequencyCaption": "woman laughing one times and duck quacking one times"} +{"filepath": "data/multi_event_train/syn_1362.wav", "onoffCaption": "sheep goat bleating at 2.838-4.838", "frequencyCaption": "sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_1387.wav", "onoffCaption": "dog barking at 1.304-3.304, 3.955-5.955, 6.664-8.664", "frequencyCaption": "dog barking three times"} +{"filepath": "data/multi_event_train/syn_1520.wav", "onoffCaption": "tapping clicking clanking at 2.832-6.272, 6.792-9.216", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_1547.wav", "onoffCaption": "dog barking at 0.286-2.286, 4.345-6.345", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_train/syn_1648.wav", "onoffCaption": "gunshot at 0.398-2.398 and cat meowing at 2.098-3.11, 5.228-6.24", "frequencyCaption": "gunshot one times and cat meowing two times"} +{"filepath": "data/multi_event_train/syn_1734.wav", "onoffCaption": "explosion at 2.33-5.33, 6.621-8.676 and duck quacking at 3.013-5.013, 5.937-7.937", "frequencyCaption": "explosion two times and duck quacking two times"} +{"filepath": "data/multi_event_train/syn_1753.wav", "onoffCaption": "thump thud at 1.028-4.695, 5.376-9.043", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_1824.wav", "onoffCaption": "dog barking at 0.275-2.275, 4.236-6.636", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_train/syn_1843.wav", "onoffCaption": "train horn at 2.459-5.779 and cow mooing at 2.492-5.502", "frequencyCaption": "train horn one times and cow mooing one times"} +{"filepath": "data/multi_event_train/syn_1958.wav", "onoffCaption": "sheep goat bleating at 2.891-4.891 and spraying at 8.283-9.347", "frequencyCaption": "sheep goat bleating one times and spraying one times"} +{"filepath": "data/multi_event_train/syn_3068.wav", "onoffCaption": "door knocking at 2.829-6.204, 7.014-10.0", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_3114.wav", "onoffCaption": "cow mooing at 2.794-5.776, 6.601-9.583", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_3173.wav", "onoffCaption": "woman laughing at 3.122-6.222", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/multi_event_train/syn_3196.wav", "onoffCaption": "duck quacking at 0.461-2.461, 2.976-4.976, 5.619-7.619", "frequencyCaption": "duck quacking three times"} +{"filepath": "data/multi_event_train/syn_3299.wav", "onoffCaption": "whistling at 0.056-8.441", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_3300.wav", "onoffCaption": "spraying at 0.055-1.055 and woman laughing at 0.776-4.057, 4.642-7.923", "frequencyCaption": "spraying one times and woman laughing two times"} +{"filepath": "data/multi_event_train/syn_3367.wav", "onoffCaption": "cow mooing at 0.244-4.673 and thump thud at 2.481-5.528, 7.722-9.95", "frequencyCaption": "cow mooing one times and thump thud two times"} +{"filepath": "data/multi_event_train/syn_3382.wav", "onoffCaption": "cat meowing at 1.522-6.522, 8.431-9.443", "frequencyCaption": "cat meowing two times"} +{"filepath": "data/multi_event_train/syn_3459.wav", "onoffCaption": "train horn at 2.054-6.912 and door slamming at 4.29-6.486, 7.105-9.233", "frequencyCaption": "train horn one times and door slamming two times"} +{"filepath": "data/multi_event_train/syn_3525.wav", "onoffCaption": "dog barking at 2.799-4.799, 6.347-8.347", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_train/syn_3542.wav", "onoffCaption": "car horn honking at 0.751-3.664 and duck quacking at 6.723-8.723", "frequencyCaption": "car horn honking one times and duck quacking one times"} +{"filepath": "data/multi_event_train/syn_3731.wav", "onoffCaption": "sheep goat bleating at 0.142-2.142, 3.083-5.083 and burping belching at 7.866-9.897", "frequencyCaption": "sheep goat bleating two times and burping belching one times"} +{"filepath": "data/multi_event_train/syn_3756.wav", "onoffCaption": "cow mooing at 1.082-6.062, 7.102-10.0", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_3821.wav", "onoffCaption": "sneeze at 1.225-2.611, 3.422-4.808, 5.872-7.258 and cat meowing at 2.894-4.465, 6.911-7.922", "frequencyCaption": "sneeze three times and cat meowing two times"} +{"filepath": "data/multi_event_train/syn_3846.wav", "onoffCaption": "car horn honking at 0.208-4.53, 6.828-9.725 and whistling at 1.886-7.888", "frequencyCaption": "car horn honking two times and whistling one times"} +{"filepath": "data/multi_event_train/syn_51.wav", "onoffCaption": "cat meowing at 0.538-5.538, 6.28-8.719 and door knocking at 5.551-7.678", "frequencyCaption": "cat meowing two times and door knocking one times"} +{"filepath": "data/multi_event_train/syn_139.wav", "onoffCaption": "car horn honking at 0.702-3.628, 4.583-7.509", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_251.wav", "onoffCaption": "whistling at 1.799-6.974, 7.85-10.0", "frequencyCaption": "whistling two times"} +{"filepath": "data/multi_event_train/syn_350.wav", "onoffCaption": "train horn at 0.188-3.668, 5.14-8.62 and gunshot at 2.187-4.187, 5.945-7.945 and door knocking at 2.794-5.914, 6.611-8.799", "frequencyCaption": "train horn two times and gunshot two times and door knocking two times"} +{"filepath": "data/multi_event_train/syn_474.wav", "onoffCaption": "dog barking at 0.186-3.107, 4.014-6.014, 6.644-8.644 and duck quacking at 0.882-2.882, 5.239-7.239", "frequencyCaption": "dog barking three times and duck quacking two times"} +{"filepath": "data/multi_event_train/syn_491.wav", "onoffCaption": "duck quacking at 3.523-5.523", "frequencyCaption": "duck quacking one times"} +{"filepath": "data/multi_event_train/syn_590.wav", "onoffCaption": "cat meowing at 2.657-7.017, 7.526-10.0", "frequencyCaption": "cat meowing two times"} +{"filepath": "data/multi_event_train/syn_607.wav", "onoffCaption": "whistling at 1.666-10.0", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_706.wav", "onoffCaption": "sneeze at 0.5-2.183, 4.167-5.85", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_749.wav", "onoffCaption": "thump thud at 0.647-3.147, 4.243-6.582 and dog barking at 4.148-6.148", "frequencyCaption": "thump thud two times and dog barking one times"} +{"filepath": "data/multi_event_train/syn_871.wav", "onoffCaption": "train horn at 1.148-3.822, 5.745-8.419 and sheep goat bleating at 2.434-4.434", "frequencyCaption": "train horn two times and sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_894.wav", "onoffCaption": "cow mooing at 2.365-6.794, 7.82-10.0", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_970.wav", "onoffCaption": "door knocking at 2.028-4.796, 6.209-8.977 and whistling at 2.521-5.496", "frequencyCaption": "door knocking two times and whistling one times"} +{"filepath": "data/multi_event_train/syn_995.wav", "onoffCaption": "woman laughing at 3.336-6.131", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/multi_event_train/syn_1045.wav", "onoffCaption": "woman laughing at 3.529-10.0", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/multi_event_train/syn_1144.wav", "onoffCaption": "thump thud at 0.479-4.929 and tapping clicking clanking at 3.187-6.627", "frequencyCaption": "thump thud one times and tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_1236.wav", "onoffCaption": "cow mooing at 2.627-5.596, 6.917-9.886", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_1337.wav", "onoffCaption": "burping belching at 2.433-4.636, 5.194-7.367", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_1409.wav", "onoffCaption": "sneeze at 3.14-5.601, 7.313-9.789 and thump thud at 3.253-6.3", "frequencyCaption": "sneeze two times and thump thud one times"} +{"filepath": "data/multi_event_train/syn_1413.wav", "onoffCaption": "sneeze at 0.398-2.105 and spraying at 0.939-3.067", "frequencyCaption": "sneeze one times and spraying one times"} +{"filepath": "data/multi_event_train/syn_1508.wav", "onoffCaption": "explosion at 0.103-3.29", "frequencyCaption": "explosion one times"} +{"filepath": "data/multi_event_train/syn_1512.wav", "onoffCaption": "door slamming at 0.683-2.596 and car horn honking at 5.318-9.159", "frequencyCaption": "door slamming one times and car horn honking one times"} +{"filepath": "data/multi_event_train/syn_1660.wav", "onoffCaption": "cow mooing at 0.418-3.428, 3.973-6.583", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_1685.wav", "onoffCaption": "sheep goat bleating at 0.076-2.076, 2.698-4.698, 5.861-7.861", "frequencyCaption": "sheep goat bleating three times"} +{"filepath": "data/multi_event_train/syn_1761.wav", "onoffCaption": "thump thud at 0.738-5.188, 7.319-9.819", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_1784.wav", "onoffCaption": "cow mooing at 0.738-5.167, 5.903-8.899", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_1816.wav", "onoffCaption": "cat meowing at 0.631-4.764, 6.378-7.527", "frequencyCaption": "cat meowing two times"} +{"filepath": "data/multi_event_train/syn_1917.wav", "onoffCaption": "burping belching at 0.12-3.664, 4.722-8.266", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_3027.wav", "onoffCaption": "explosion at 1.301-6.301, 7.603-9.923 and gunshot at 2.288-4.762, 5.447-7.921", "frequencyCaption": "explosion two times and gunshot two times"} +{"filepath": "data/multi_event_train/syn_3126.wav", "onoffCaption": "spraying at 0.002-0.906, 1.602-3.335 and gunshot at 6.789-8.789", "frequencyCaption": "spraying two times and gunshot one times"} +{"filepath": "data/multi_event_train/syn_3254.wav", "onoffCaption": "burping belching at 1.066-4.346 and sheep goat bleating at 2.429-4.429, 5.198-7.198", "frequencyCaption": "burping belching one times and sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_3355.wav", "onoffCaption": "cat meowing at 0.682-1.709, 2.888-3.915, 4.78-5.807 and burping belching at 0.736-3.359", "frequencyCaption": "cat meowing three times and burping belching one times"} +{"filepath": "data/multi_event_train/syn_3471.wav", "onoffCaption": "spraying at 1.916-3.0", "frequencyCaption": "spraying one times"} +{"filepath": "data/multi_event_train/syn_3494.wav", "onoffCaption": "explosion at 2.713-5.441, 7.005-9.733", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_3570.wav", "onoffCaption": "gunshot at 1.296-3.296, 4.812-6.812", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_3595.wav", "onoffCaption": "cat meowing at 2.868-7.228, 8.142-9.291", "frequencyCaption": "cat meowing two times"} +{"filepath": "data/multi_event_train/syn_3602.wav", "onoffCaption": "explosion at 0.527-3.527 and train horn at 1.368-3.808, 5.59-8.03", "frequencyCaption": "explosion one times and train horn two times"} +{"filepath": "data/multi_event_train/syn_3618.wav", "onoffCaption": "sheep goat bleating at 3.257-5.257", "frequencyCaption": "sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_3703.wav", "onoffCaption": "explosion at 0.155-5.155 and sheep goat bleating at 0.686-3.686, 4.666-7.666 and duck quacking at 6.741-8.741", "frequencyCaption": "explosion one times and sheep goat bleating two times and duck quacking one times"} +{"filepath": "data/multi_event_train/syn_3719.wav", "onoffCaption": "sheep goat bleating at 0.275-2.275, 3.223-5.223, 5.77-7.77", "frequencyCaption": "sheep goat bleating three times"} +{"filepath": "data/multi_event_train/syn_3874.wav", "onoffCaption": "duck quacking at 3.559-5.559, 6.561-8.561", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_3891.wav", "onoffCaption": "whistling at 0.043-2.918 and dog barking at 0.052-2.052", "frequencyCaption": "whistling one times and dog barking one times"} +{"filepath": "data/multi_event_train/syn_3990.wav", "onoffCaption": "tapping clicking clanking at 0.705-4.145, 5.601-7.818", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_90.wav", "onoffCaption": "woman laughing at 0.803-3.408, 3.992-6.787", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_123.wav", "onoffCaption": "sheep goat bleating at 0.202-2.202, 3.375-5.375 and sneeze at 3.476-5.472, 6.037-8.033 and cow mooing at 5.23-8.24", "frequencyCaption": "sheep goat bleating two times and sneeze two times and cow mooing one times"} +{"filepath": "data/multi_event_train/syn_176.wav", "onoffCaption": "cow mooing at 3.536-6.834, 7.601-10.0", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_193.wav", "onoffCaption": "explosion at 0.339-3.339, 5.515-7.877", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_305.wav", "onoffCaption": "train horn at 3.158-5.625, 6.413-9.115 and cow mooing at 5.053-8.022", "frequencyCaption": "train horn two times and cow mooing one times"} +{"filepath": "data/multi_event_train/syn_374.wav", "onoffCaption": "dog barking at 0.106-2.106, 3.22-5.22", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_train/syn_391.wav", "onoffCaption": "explosion at 0.435-3.164, 4.541-6.834 and spraying at 3.273-6.292", "frequencyCaption": "explosion two times and spraying one times"} +{"filepath": "data/multi_event_train/syn_520.wav", "onoffCaption": "thump thud at 1.037-3.376 and gunshot at 5.909-7.909", "frequencyCaption": "thump thud one times and gunshot one times"} +{"filepath": "data/multi_event_train/syn_575.wav", "onoffCaption": "door knocking at 2.665-5.433 and spraying at 7.735-8.339, 8.877-9.481", "frequencyCaption": "door knocking one times and spraying two times"} +{"filepath": "data/multi_event_train/syn_639.wav", "onoffCaption": "dog barking at 0.024-2.024, 2.943-4.943, 5.802-7.802", "frequencyCaption": "dog barking three times"} +{"filepath": "data/multi_event_train/syn_648.wav", "onoffCaption": "car horn honking at 0.672-4.326 and tapping clicking clanking at 6.904-10.0", "frequencyCaption": "car horn honking one times and tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_722.wav", "onoffCaption": "whistling at 0.563-8.574 and tapping clicking clanking at 1.14-4.58", "frequencyCaption": "whistling one times and tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_753.wav", "onoffCaption": "explosion at 0.834-3.834, 5.832-8.832", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_824.wav", "onoffCaption": "burping belching at 2.968-5.198, 6.24-9.24", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_855.wav", "onoffCaption": "sneeze at 1.041-3.655, 5.037-7.651", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_1010.wav", "onoffCaption": "cow mooing at 2.924-5.893", "frequencyCaption": "cow mooing one times"} +{"filepath": "data/multi_event_train/syn_1084.wav", "onoffCaption": "explosion at 2.02-7.02", "frequencyCaption": "explosion one times"} +{"filepath": "data/multi_event_train/syn_1212.wav", "onoffCaption": "cow mooing at 0.686-3.696 and door knocking at 7.844-10.0", "frequencyCaption": "cow mooing one times and door knocking one times"} +{"filepath": "data/multi_event_train/syn_1263.wav", "onoffCaption": "whistling at 2.648-5.523, 6.602-9.287", "frequencyCaption": "whistling two times"} +{"filepath": "data/multi_event_train/syn_1286.wav", "onoffCaption": "sneeze at 2.554-4.513 and thump thud at 6.691-10.0", "frequencyCaption": "sneeze one times and thump thud one times"} +{"filepath": "data/multi_event_train/syn_1309.wav", "onoffCaption": "cow mooing at 3.532-7.961 and spraying at 4.142-5.389, 6.187-7.128", "frequencyCaption": "cow mooing one times and spraying two times"} +{"filepath": "data/multi_event_train/syn_1378.wav", "onoffCaption": "door slamming at 0.234-2.234 and explosion at 5.159-7.877 and cat meowing at 5.508-7.502", "frequencyCaption": "door slamming one times and explosion one times and cat meowing one times"} +{"filepath": "data/multi_event_train/syn_1437.wav", "onoffCaption": "car horn honking at 0.038-2.964, 4.164-6.34 and dog barking at 0.425-2.425", "frequencyCaption": "car horn honking two times and dog barking one times"} +{"filepath": "data/multi_event_train/syn_1446.wav", "onoffCaption": "cow mooing at 3.706-6.688, 7.502-10.0", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_1635.wav", "onoffCaption": "car horn honking at 3.734-6.081, 6.76-8.924", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_1644.wav", "onoffCaption": "woman laughing at 4.06-6.479, 7.285-9.704", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_1828.wav", "onoffCaption": "dog barking at 0.029-2.029", "frequencyCaption": "dog barking one times"} +{"filepath": "data/multi_event_train/syn_1859.wav", "onoffCaption": "sneeze at 0.081-3.156 and door slamming at 1.281-1.962", "frequencyCaption": "sneeze one times and door slamming one times"} +{"filepath": "data/multi_event_train/syn_1933.wav", "onoffCaption": "spraying at 0.054-0.835, 1.62-2.189, 3.452-5.185 and sheep goat bleating at 1.264-3.264, 3.814-5.814, 7.277-9.277", "frequencyCaption": "spraying three times and sheep goat bleating three times"} +{"filepath": "data/multi_event_train/syn_1942.wav", "onoffCaption": "sheep goat bleating at 1.739-3.739, 6.232-8.232", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_3003.wav", "onoffCaption": "whistling at 1.472-9.857 and burping belching at 1.552-4.552", "frequencyCaption": "whistling one times and burping belching one times"} +{"filepath": "data/multi_event_train/syn_3072.wav", "onoffCaption": "gunshot at 0.892-2.892, 4.482-6.983", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_3097.wav", "onoffCaption": "tapping clicking clanking at 0.349-3.789, 4.563-8.003", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_3118.wav", "onoffCaption": "door knocking at 1.689-5.457", "frequencyCaption": "door knocking one times"} +{"filepath": "data/multi_event_train/syn_3169.wav", "onoffCaption": "train horn at 3.286-8.188", "frequencyCaption": "train horn one times"} +{"filepath": "data/multi_event_train/syn_3201.wav", "onoffCaption": "woman laughing at 0.017-6.751 and sheep goat bleating at 4.882-6.882", "frequencyCaption": "woman laughing one times and sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_3270.wav", "onoffCaption": "spraying at 0.023-1.718, 3.241-4.936 and burping belching at 2.094-5.374", "frequencyCaption": "spraying two times and burping belching one times"} +{"filepath": "data/multi_event_train/syn_3295.wav", "onoffCaption": "gunshot at 0.894-2.894 and tapping clicking clanking at 1.696-5.136, 6.245-8.873 and cat meowing at 3.659-4.744", "frequencyCaption": "gunshot one times and tapping clicking clanking two times and cat meowing one times"} +{"filepath": "data/multi_event_train/syn_3424.wav", "onoffCaption": "door slamming at 2.693-3.832, 5.0-6.139, 6.66-7.799", "frequencyCaption": "door slamming three times"} +{"filepath": "data/multi_event_train/syn_3455.wav", "onoffCaption": "whistling at 0.61-9.665", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_3626.wav", "onoffCaption": "tapping clicking clanking at 3.218-6.658", "frequencyCaption": "tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_3657.wav", "onoffCaption": "gunshot at 0.034-2.034, 3.662-5.662, 7.081-9.081", "frequencyCaption": "gunshot three times"} +{"filepath": "data/multi_event_train/syn_3920.wav", "onoffCaption": "door slamming at 2.209-5.17", "frequencyCaption": "door slamming one times"} +{"filepath": "data/multi_event_train/syn_3975.wav", "onoffCaption": "whistling at 1.833-7.333 and thump thud at 3.092-7.467 and cat meowing at 4.531-6.115", "frequencyCaption": "whistling one times and thump thud one times and cat meowing one times"} +{"filepath": "data/multi_event_train/syn_20.wav", "onoffCaption": "thump thud at 0.02-4.47", "frequencyCaption": "thump thud one times"} +{"filepath": "data/multi_event_train/syn_75.wav", "onoffCaption": "burping belching at 0.181-2.704, 4.987-7.081 and duck quacking at 1.551-3.551, 5.137-7.137", "frequencyCaption": "burping belching two times and duck quacking two times"} +{"filepath": "data/multi_event_train/syn_107.wav", "onoffCaption": "cow mooing at 2.043-7.023", "frequencyCaption": "cow mooing one times"} +{"filepath": "data/multi_event_train/syn_148.wav", "onoffCaption": "door knocking at 2.621-4.809 and explosion at 6.949-9.821", "frequencyCaption": "door knocking one times and explosion one times"} +{"filepath": "data/multi_event_train/syn_152.wav", "onoffCaption": "burping belching at 0.519-3.778 and tapping clicking clanking at 6.986-10.0", "frequencyCaption": "burping belching one times and tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_321.wav", "onoffCaption": "woman laughing at 0.731-3.336, 4.07-6.675, 7.954-10.0", "frequencyCaption": "woman laughing three times"} +{"filepath": "data/multi_event_train/syn_405.wav", "onoffCaption": "cow mooing at 0.652-5.632, 6.45-10.0", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_504.wav", "onoffCaption": "door knocking at 0.327-5.16, 5.971-8.528 and dog barking at 2.262-4.262 and cat meowing at 3.064-7.424", "frequencyCaption": "door knocking two times and dog barking one times and cat meowing one times"} +{"filepath": "data/multi_event_train/syn_551.wav", "onoffCaption": "cow mooing at 0.409-3.378, 3.897-6.651", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_689.wav", "onoffCaption": "gunshot at 1.993-3.993 and tapping clicking clanking at 6.029-9.469", "frequencyCaption": "gunshot one times and tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_777.wav", "onoffCaption": "burping belching at 0.758-2.961", "frequencyCaption": "burping belching one times"} +{"filepath": "data/multi_event_train/syn_792.wav", "onoffCaption": "dog barking at 0.781-2.781", "frequencyCaption": "dog barking one times"} +{"filepath": "data/multi_event_train/syn_800.wav", "onoffCaption": "spraying at 0.055-0.563, 1.558-2.558, 3.071-3.922 and woman laughing at 5.64-7.84", "frequencyCaption": "spraying three times and woman laughing one times"} +{"filepath": "data/multi_event_train/syn_901.wav", "onoffCaption": "door slamming at 0.191-1.715, 3.34-4.359 and sneeze at 7.275-10.0", "frequencyCaption": "door slamming two times and sneeze one times"} +{"filepath": "data/multi_event_train/syn_1034.wav", "onoffCaption": "whistling at 2.152-9.807", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_1061.wav", "onoffCaption": "train horn at 0.129-2.266 and sheep goat bleating at 1.564-4.86, 5.722-7.722 and spraying at 6.161-7.408", "frequencyCaption": "train horn one times and sheep goat bleating two times and spraying one times"} +{"filepath": "data/multi_event_train/syn_1135.wav", "onoffCaption": "cat meowing at 0.983-2.123, 2.981-5.67 and thump thud at 1.87-6.32", "frequencyCaption": "cat meowing two times and thump thud one times"} +{"filepath": "data/multi_event_train/syn_1247.wav", "onoffCaption": "burping belching at 3.068-6.612, 7.278-9.991", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_1462.wav", "onoffCaption": "explosion at 0.227-3.095, 4.092-6.774, 7.66-9.67", "frequencyCaption": "explosion three times"} +{"filepath": "data/multi_event_train/syn_1478.wav", "onoffCaption": "explosion at 0.04-2.912, 4.447-7.319", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_1487.wav", "onoffCaption": "thump thud at 2.941-7.391", "frequencyCaption": "thump thud one times"} +{"filepath": "data/multi_event_train/syn_1563.wav", "onoffCaption": "duck quacking at 2.444-4.444 and woman laughing at 7.55-10.0", "frequencyCaption": "duck quacking one times and woman laughing one times"} +{"filepath": "data/multi_event_train/syn_1579.wav", "onoffCaption": "door knocking at 0.166-3.928, 4.859-8.621 and door slamming at 6.402-9.183", "frequencyCaption": "door knocking two times and door slamming one times"} +{"filepath": "data/multi_event_train/syn_1586.wav", "onoffCaption": "door knocking at 1.791-4.031, 5.253-7.493 and train horn at 2.702-5.902", "frequencyCaption": "door knocking two times and train horn one times"} +{"filepath": "data/multi_event_train/syn_1611.wav", "onoffCaption": "thump thud at 0.696-4.363 and sheep goat bleating at 2.839-5.919, 7.879-9.879", "frequencyCaption": "thump thud one times and sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_1867.wav", "onoffCaption": "cow mooing at 2.159-5.141, 5.718-8.591", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_1882.wav", "onoffCaption": "gunshot at 0.885-3.125, 5.094-7.334 and sheep goat bleating at 2.826-4.826, 6.678-8.678", "frequencyCaption": "gunshot two times and sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_1898.wav", "onoffCaption": "duck quacking at 3.245-5.245, 6.684-8.684", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_1966.wav", "onoffCaption": "explosion at 0.09-2.097, 4.572-7.1", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_1983.wav", "onoffCaption": "dog barking at 0.369-2.369, 4.251-6.251", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_train/syn_1999.wav", "onoffCaption": "cat meowing at 2.229-3.24", "frequencyCaption": "cat meowing one times"} +{"filepath": "data/multi_event_train/syn_3056.wav", "onoffCaption": "train horn at 1.134-3.774", "frequencyCaption": "train horn one times"} +{"filepath": "data/multi_event_train/syn_3157.wav", "onoffCaption": "train horn at 0.176-4.296 and cat meowing at 7.151-8.706", "frequencyCaption": "train horn one times and cat meowing one times"} +{"filepath": "data/multi_event_train/syn_3225.wav", "onoffCaption": "car horn honking at 2.801-5.727, 6.289-8.754 and duck quacking at 4.606-6.606", "frequencyCaption": "car horn honking two times and duck quacking one times"} +{"filepath": "data/multi_event_train/syn_3400.wav", "onoffCaption": "explosion at 1.732-4.726, 7.146-10.0", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_3501.wav", "onoffCaption": "duck quacking at 0.252-2.252", "frequencyCaption": "duck quacking one times"} +{"filepath": "data/multi_event_train/syn_3673.wav", "onoffCaption": "spraying at 0.097-1.161, 2.725-4.853 and gunshot at 0.222-2.222, 4.038-6.038, 7.509-9.509", "frequencyCaption": "spraying two times and gunshot three times"} +{"filepath": "data/multi_event_train/syn_3696.wav", "onoffCaption": "train horn at 1.872-5.992", "frequencyCaption": "train horn one times"} +{"filepath": "data/multi_event_train/syn_3768.wav", "onoffCaption": "burping belching at 0.085-4.085, 5.224-9.224 and train horn at 5.33-9.53", "frequencyCaption": "burping belching two times and train horn one times"} +{"filepath": "data/multi_event_train/syn_3805.wav", "onoffCaption": "explosion at 0.285-2.813, 5.27-8.27 and duck quacking at 1.387-3.387", "frequencyCaption": "explosion two times and duck quacking one times"} +{"filepath": "data/multi_event_train/syn_3904.wav", "onoffCaption": "tapping clicking clanking at 2.524-5.964, 7.219-10.0", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_3951.wav", "onoffCaption": "burping belching at 2.919-6.121, 6.638-9.84", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_220.wav", "onoffCaption": "cow mooing at 1.917-6.346, 7.969-10.0", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_227.wav", "onoffCaption": "cat meowing at 3.979-5.006 and cow mooing at 7.653-10.0", "frequencyCaption": "cat meowing one times and cow mooing one times"} +{"filepath": "data/multi_event_train/syn_275.wav", "onoffCaption": "tapping clicking clanking at 0.18-3.62", "frequencyCaption": "tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_290.wav", "onoffCaption": "train horn at 0.463-3.703, 5.072-7.818 and whistling at 5.549-7.778", "frequencyCaption": "train horn two times and whistling one times"} +{"filepath": "data/multi_event_train/syn_402.wav", "onoffCaption": "woman laughing at 1.858-4.496, 5.165-7.649 and burping belching at 2.466-5.466", "frequencyCaption": "woman laughing two times and burping belching one times"} +{"filepath": "data/multi_event_train/syn_450.wav", "onoffCaption": "tapping clicking clanking at 3.642-7.082", "frequencyCaption": "tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_519.wav", "onoffCaption": "tapping clicking clanking at 2.995-6.435", "frequencyCaption": "tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_623.wav", "onoffCaption": "dog barking at 0.836-2.836, 3.847-5.847, 6.983-8.983", "frequencyCaption": "dog barking three times"} +{"filepath": "data/multi_event_train/syn_671.wav", "onoffCaption": "explosion at 2.427-4.52, 5.452-8.143", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_676.wav", "onoffCaption": "spraying at 2.443-3.443 and train horn at 2.832-6.192", "frequencyCaption": "spraying one times and train horn one times"} +{"filepath": "data/multi_event_train/syn_693.wav", "onoffCaption": "burping belching at 1.052-6.653, 7.502-10.0", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_694.wav", "onoffCaption": "tapping clicking clanking at 0.286-3.726, 4.727-8.167", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_738.wav", "onoffCaption": "burping belching at 2.978-9.955", "frequencyCaption": "burping belching one times"} +{"filepath": "data/multi_event_train/syn_788.wav", "onoffCaption": "woman laughing at 2.872-4.964, 6.418-8.711", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_906.wav", "onoffCaption": "duck quacking at 1.199-3.199, 5.451-7.451 and spraying at 1.857-2.798", "frequencyCaption": "duck quacking two times and spraying one times"} +{"filepath": "data/multi_event_train/syn_954.wav", "onoffCaption": "woman laughing at 1.448-3.685, 5.85-8.087", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_1029.wav", "onoffCaption": "woman laughing at 0.266-2.852 and spraying at 1.809-2.393, 4.645-5.229", "frequencyCaption": "woman laughing one times and spraying two times"} +{"filepath": "data/multi_event_train/syn_1132.wav", "onoffCaption": "train horn at 0.148-3.508, 4.725-8.085", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_1160.wav", "onoffCaption": "sheep goat bleating at 0.318-2.318, 3.094-5.094, 7.441-9.441", "frequencyCaption": "sheep goat bleating three times"} +{"filepath": "data/multi_event_train/syn_1185.wav", "onoffCaption": "thump thud at 0.834-3.605 and duck quacking at 1.364-3.364, 5.81-7.81", "frequencyCaption": "thump thud one times and duck quacking two times"} +{"filepath": "data/multi_event_train/syn_1208.wav", "onoffCaption": "spraying at 1.244-1.752, 4.209-4.959 and sneeze at 7.711-9.037", "frequencyCaption": "spraying two times and sneeze one times"} +{"filepath": "data/multi_event_train/syn_1313.wav", "onoffCaption": "cow mooing at 0.194-3.163, 5.187-7.601 and burping belching at 0.273-2.304 and woman laughing at 3.885-6.585", "frequencyCaption": "cow mooing two times and burping belching one times and woman laughing one times"} +{"filepath": "data/multi_event_train/syn_1341.wav", "onoffCaption": "cow mooing at 2.719-5.701, 6.832-9.814", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_1346.wav", "onoffCaption": "spraying at 0.168-1.019", "frequencyCaption": "spraying one times"} +{"filepath": "data/multi_event_train/syn_1536.wav", "onoffCaption": "car horn honking at 1.992-5.833, 6.819-10.0", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_1564.wav", "onoffCaption": "cat meowing at 0.177-2.072, 4.308-6.203 and duck quacking at 0.678-2.678, 4.915-6.915", "frequencyCaption": "cat meowing two times and duck quacking two times"} +{"filepath": "data/multi_event_train/syn_1581.wav", "onoffCaption": "train horn at 2.551-10.0", "frequencyCaption": "train horn one times"} +{"filepath": "data/multi_event_train/syn_1710.wav", "onoffCaption": "cat meowing at 0.096-1.198, 2.271-3.373, 4.359-5.461 and explosion at 0.76-2.824, 3.965-6.029 and sheep goat bleating at 4.978-6.978", "frequencyCaption": "cat meowing three times and explosion two times and sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_1717.wav", "onoffCaption": "explosion at 0.595-3.595 and woman laughing at 6.279-8.634", "frequencyCaption": "explosion one times and woman laughing one times"} +{"filepath": "data/multi_event_train/syn_1745.wav", "onoffCaption": "burping belching at 0.264-3.264, 4.921-7.921", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_1832.wav", "onoffCaption": "train horn at 2.432-5.072 and cow mooing at 2.509-5.478, 6.707-9.676", "frequencyCaption": "train horn one times and cow mooing two times"} +{"filepath": "data/multi_event_train/syn_1860.wav", "onoffCaption": "cat meowing at 0.869-2.009, 4.238-5.265, 6.706-8.666", "frequencyCaption": "cat meowing three times"} +{"filepath": "data/multi_event_train/syn_1885.wav", "onoffCaption": "thump thud at 0.058-2.286", "frequencyCaption": "thump thud one times"} +{"filepath": "data/multi_event_train/syn_1929.wav", "onoffCaption": "train horn at 1.408-3.563, 4.706-6.861", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_3019.wav", "onoffCaption": "tapping clicking clanking at 1.778-5.218, 6.534-9.974", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_3102.wav", "onoffCaption": "dog barking at 2.444-4.444 and tapping clicking clanking at 5.38-8.82 and thump thud at 6.34-8.568", "frequencyCaption": "dog barking one times and tapping clicking clanking one times and thump thud one times"} +{"filepath": "data/multi_event_train/syn_3150.wav", "onoffCaption": "door knocking at 0.459-2.922 and sneeze at 7.356-9.419", "frequencyCaption": "door knocking one times and sneeze one times"} +{"filepath": "data/multi_event_train/syn_3238.wav", "onoffCaption": "whistling at 0.15-9.815 and sheep goat bleating at 1.787-3.787, 4.729-6.729", "frequencyCaption": "whistling one times and sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_3323.wav", "onoffCaption": "spraying at 0.067-0.817", "frequencyCaption": "spraying one times"} +{"filepath": "data/multi_event_train/syn_3324.wav", "onoffCaption": "sneeze at 2.984-7.04", "frequencyCaption": "sneeze one times"} +{"filepath": "data/multi_event_train/syn_3371.wav", "onoffCaption": "duck quacking at 2.226-4.226, 5.912-7.912", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_3394.wav", "onoffCaption": "explosion at 2.848-6.401, 7.091-10.0", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_3506.wav", "onoffCaption": "whistling at 0.097-7.847 and door knocking at 0.498-3.266, 4.128-6.508, 7.796-10.0", "frequencyCaption": "whistling one times and door knocking three times"} +{"filepath": "data/multi_event_train/syn_3554.wav", "onoffCaption": "train horn at 0.045-4.113, 4.781-7.661", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_3669.wav", "onoffCaption": "burping belching at 0.131-2.892, 4.13-6.891, 7.797-10.0", "frequencyCaption": "burping belching three times"} +{"filepath": "data/multi_event_train/syn_3727.wav", "onoffCaption": "sneeze at 1.669-3.665", "frequencyCaption": "sneeze one times"} +{"filepath": "data/multi_event_train/syn_3772.wav", "onoffCaption": "door knocking at 2.434-5.501, 6.209-9.276", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_3775.wav", "onoffCaption": "car horn honking at 0.698-3.516 and sheep goat bleating at 4.98-6.98", "frequencyCaption": "car horn honking one times and sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_3790.wav", "onoffCaption": "cow mooing at 3.114-6.083, 7.337-10.0", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_3797.wav", "onoffCaption": "duck quacking at 0.574-2.574, 3.148-5.148, 5.713-7.713 and dog barking at 2.459-4.459, 6.087-8.087", "frequencyCaption": "duck quacking three times and dog barking two times"} +{"filepath": "data/multi_event_train/syn_3802.wav", "onoffCaption": "whistling at 0.586-8.597 and sheep goat bleating at 1.539-3.539, 5.134-7.134", "frequencyCaption": "whistling one times and sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_3850.wav", "onoffCaption": "thump thud at 0.06-4.51, 6.619-8.847", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_3919.wav", "onoffCaption": "sheep goat bleating at 0.38-2.38", "frequencyCaption": "sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_68.wav", "onoffCaption": "spraying at 1.145-1.72, 2.499-3.074, 3.755-4.33 and tapping clicking clanking at 1.944-5.384, 6.002-8.278", "frequencyCaption": "spraying three times and tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_72.wav", "onoffCaption": "spraying at 2.92-3.547, 4.121-5.378, 6.063-8.191", "frequencyCaption": "spraying three times"} +{"filepath": "data/multi_event_train/syn_97.wav", "onoffCaption": "thump thud at 0.697-4.364, 5.526-7.754", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_100.wav", "onoffCaption": "door knocking at 2.269-5.389 and explosion at 7.884-10.0", "frequencyCaption": "door knocking one times and explosion one times"} +{"filepath": "data/multi_event_train/syn_272.wav", "onoffCaption": "burping belching at 0.523-4.029, 6.193-9.699", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_297.wav", "onoffCaption": "tapping clicking clanking at 1.345-4.785, 6.911-10.0", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_369.wav", "onoffCaption": "burping belching at 0.269-2.367", "frequencyCaption": "burping belching one times"} +{"filepath": "data/multi_event_train/syn_396.wav", "onoffCaption": "cow mooing at 0.073-3.055, 5.173-8.042", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_457.wav", "onoffCaption": "explosion at 1.716-4.903 and door knocking at 3.096-5.16, 6.335-8.399", "frequencyCaption": "explosion one times and door knocking two times"} +{"filepath": "data/multi_event_train/syn_556.wav", "onoffCaption": "sneeze at 0.228-3.436, 4.412-5.658 and door slamming at 1.53-4.504, 6.22-9.194", "frequencyCaption": "sneeze two times and door slamming two times"} +{"filepath": "data/multi_event_train/syn_624.wav", "onoffCaption": "sneeze at 0.49-2.449 and thump thud at 5.007-9.457", "frequencyCaption": "sneeze one times and thump thud one times"} +{"filepath": "data/multi_event_train/syn_848.wav", "onoffCaption": "thump thud at 3.74-6.24, 7.599-9.797", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_852.wav", "onoffCaption": "tapping clicking clanking at 1.814-5.254, 6.511-9.021", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_949.wav", "onoffCaption": "spraying at 2.482-4.215, 6.71-7.794, 8.844-9.445", "frequencyCaption": "spraying three times"} +{"filepath": "data/multi_event_train/syn_953.wav", "onoffCaption": "car horn honking at 3.959-6.872", "frequencyCaption": "car horn honking one times"} +{"filepath": "data/multi_event_train/syn_1066.wav", "onoffCaption": "explosion at 2.375-4.695, 5.778-8.098 and woman laughing at 4.233-6.349, 7.227-9.343", "frequencyCaption": "explosion two times and woman laughing two times"} +{"filepath": "data/multi_event_train/syn_1083.wav", "onoffCaption": "woman laughing at 2.835-5.043, 7.399-9.607", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_1099.wav", "onoffCaption": "burping belching at 3.336-6.515", "frequencyCaption": "burping belching one times"} +{"filepath": "data/multi_event_train/syn_1167.wav", "onoffCaption": "burping belching at 0.344-3.344, 5.134-7.499", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_1182.wav", "onoffCaption": "spraying at 1.885-4.277, 5.008-5.508, 7.252-9.014", "frequencyCaption": "spraying three times"} +{"filepath": "data/multi_event_train/syn_1198.wav", "onoffCaption": "door knocking at 1.186-6.019, 7.123-10.0", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_1215.wav", "onoffCaption": "tapping clicking clanking at 0.347-3.787, 5.034-8.474", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_1314.wav", "onoffCaption": "thump thud at 3.539-7.989", "frequencyCaption": "thump thud one times"} +{"filepath": "data/multi_event_train/syn_1430.wav", "onoffCaption": "tapping clicking clanking at 2.991-6.431, 7.373-10.0", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_1531.wav", "onoffCaption": "cat meowing at 0.302-2.296, 3.126-5.12 and car horn honking at 1.986-5.481", "frequencyCaption": "cat meowing two times and car horn honking one times"} +{"filepath": "data/multi_event_train/syn_1643.wav", "onoffCaption": "thump thud at 2.695-7.07", "frequencyCaption": "thump thud one times"} +{"filepath": "data/multi_event_train/syn_1659.wav", "onoffCaption": "explosion at 3.679-6.551, 7.448-10.0", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_1742.wav", "onoffCaption": "tapping clicking clanking at 0.157-3.597, 5.964-8.192 and gunshot at 2.244-4.374, 4.978-6.978, 7.529-9.699", "frequencyCaption": "tapping clicking clanking two times and gunshot three times"} +{"filepath": "data/multi_event_train/syn_1758.wav", "onoffCaption": "burping belching at 0.668-3.191, 4.416-7.208 and car horn honking at 1.278-4.191", "frequencyCaption": "burping belching two times and car horn honking one times"} +{"filepath": "data/multi_event_train/syn_1835.wav", "onoffCaption": "woman laughing at 2.231-4.869, 6.638-9.276", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_1934.wav", "onoffCaption": "thump thud at 0.624-4.291 and explosion at 7.861-10.0", "frequencyCaption": "thump thud one times and explosion one times"} +{"filepath": "data/multi_event_train/syn_3004.wav", "onoffCaption": "duck quacking at 0.12-2.12, 3.932-5.932 and door slamming at 8.291-9.191", "frequencyCaption": "duck quacking two times and door slamming one times"} +{"filepath": "data/multi_event_train/syn_3105.wav", "onoffCaption": "thump thud at 2.398-4.898, 6.624-9.124", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_3277.wav", "onoffCaption": "woman laughing at 0.86-3.143, 4.265-6.548, 7.463-9.746", "frequencyCaption": "woman laughing three times"} +{"filepath": "data/multi_event_train/syn_3288.wav", "onoffCaption": "sneeze at 0.323-1.415, 2.705-3.797, 5.231-6.323 and train horn at 2.628-5.068", "frequencyCaption": "sneeze three times and train horn one times"} +{"filepath": "data/multi_event_train/syn_3292.wav", "onoffCaption": "burping belching at 1.879-5.879", "frequencyCaption": "burping belching one times"} +{"filepath": "data/multi_event_train/syn_3376.wav", "onoffCaption": "train horn at 0.694-4.094, 5.406-8.061", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_3389.wav", "onoffCaption": "thump thud at 0.609-5.059", "frequencyCaption": "thump thud one times"} +{"filepath": "data/multi_event_train/syn_3393.wav", "onoffCaption": "door slamming at 0.336-1.509", "frequencyCaption": "door slamming one times"} +{"filepath": "data/multi_event_train/syn_3448.wav", "onoffCaption": "gunshot at 2.818-4.818 and door knocking at 4.83-6.99, 7.814-9.974", "frequencyCaption": "gunshot one times and door knocking two times"} +{"filepath": "data/multi_event_train/syn_3452.wav", "onoffCaption": "spraying at 3.48-4.002, 5.912-6.434", "frequencyCaption": "spraying two times"} +{"filepath": "data/multi_event_train/syn_3549.wav", "onoffCaption": "cow mooing at 1.512-4.522 and sheep goat bleating at 6.085-9.381", "frequencyCaption": "cow mooing one times and sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_3553.wav", "onoffCaption": "explosion at 0.046-3.102, 4.365-7.421 and cow mooing at 1.437-6.417", "frequencyCaption": "explosion two times and cow mooing one times"} +{"filepath": "data/multi_event_train/syn_3621.wav", "onoffCaption": "duck quacking at 0.251-2.251, 3.603-5.603 and door slamming at 3.149-4.914, 5.503-6.503", "frequencyCaption": "duck quacking two times and door slamming two times"} +{"filepath": "data/multi_event_train/syn_3720.wav", "onoffCaption": "gunshot at 0.119-2.119, 3.389-5.389, 6.323-8.323", "frequencyCaption": "gunshot three times"} +{"filepath": "data/multi_event_train/syn_3857.wav", "onoffCaption": "car horn honking at 1.094-4.02, 4.596-7.522", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_3956.wav", "onoffCaption": "car horn honking at 3.505-8.412", "frequencyCaption": "car horn honking one times"} +{"filepath": "data/multi_event_train/syn_27.wav", "onoffCaption": "sheep goat bleating at 3.479-5.479, 6.791-8.791 and cow mooing at 3.583-8.012", "frequencyCaption": "sheep goat bleating two times and cow mooing one times"} +{"filepath": "data/multi_event_train/syn_56.wav", "onoffCaption": "duck quacking at 0.787-2.787 and cat meowing at 1.168-2.18, 3.484-4.496", "frequencyCaption": "duck quacking one times and cat meowing two times"} +{"filepath": "data/multi_event_train/syn_124.wav", "onoffCaption": "explosion at 0.32-2.413 and dog barking at 4.449-6.449, 7.727-9.727 and tapping clicking clanking at 4.578-8.018", "frequencyCaption": "explosion one times and dog barking two times and tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_155.wav", "onoffCaption": "sheep goat bleating at 3.874-6.954", "frequencyCaption": "sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_268.wav", "onoffCaption": "burping belching at 2.076-7.076, 7.833-10.0", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_326.wav", "onoffCaption": "sneeze at 0.217-1.32 and duck quacking at 3.736-5.736", "frequencyCaption": "sneeze one times and duck quacking one times"} +{"filepath": "data/multi_event_train/syn_373.wav", "onoffCaption": "door knocking at 2.926-5.694, 6.774-9.493", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_418.wav", "onoffCaption": "train horn at 0.085-3.619", "frequencyCaption": "train horn one times"} +{"filepath": "data/multi_event_train/syn_469.wav", "onoffCaption": "train horn at 2.048-5.808, 6.803-9.524", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_503.wav", "onoffCaption": "door slamming at 2.492-4.975 and explosion at 2.856-5.117, 6.1-8.361", "frequencyCaption": "door slamming one times and explosion two times"} +{"filepath": "data/multi_event_train/syn_597.wav", "onoffCaption": "dog barking at 2.154-4.154 and car horn honking at 6.717-9.643", "frequencyCaption": "dog barking one times and car horn honking one times"} +{"filepath": "data/multi_event_train/syn_725.wav", "onoffCaption": "car horn honking at 0.486-3.661, 4.479-6.592", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_770.wav", "onoffCaption": "explosion at 0.494-2.668, 3.329-5.503 and gunshot at 2.729-4.859 and spraying at 9.35-10.0", "frequencyCaption": "explosion two times and gunshot one times and spraying one times"} +{"filepath": "data/multi_event_train/syn_795.wav", "onoffCaption": "woman laughing at 0.279-2.36, 4.524-6.605, 7.54-9.621", "frequencyCaption": "woman laughing three times"} +{"filepath": "data/multi_event_train/syn_807.wav", "onoffCaption": "dog barking at 0.412-2.412 and sneeze at 3.819-5.347, 6.581-7.967", "frequencyCaption": "dog barking one times and sneeze two times"} +{"filepath": "data/multi_event_train/syn_876.wav", "onoffCaption": "whistling at 2.475-4.704, 5.69-7.699 and train horn at 6.152-8.952", "frequencyCaption": "whistling two times and train horn one times"} +{"filepath": "data/multi_event_train/syn_893.wav", "onoffCaption": "explosion at 0.254-3.254 and sneeze at 5.264-7.327", "frequencyCaption": "explosion one times and sneeze one times"} +{"filepath": "data/multi_event_train/syn_988.wav", "onoffCaption": "sheep goat bleating at 1.533-3.533 and cat meowing at 6.128-7.488", "frequencyCaption": "sheep goat bleating one times and cat meowing one times"} +{"filepath": "data/multi_event_train/syn_1033.wav", "onoffCaption": "gunshot at 1.931-4.437", "frequencyCaption": "gunshot one times"} +{"filepath": "data/multi_event_train/syn_1042.wav", "onoffCaption": "whistling at 1.292-9.042", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_1128.wav", "onoffCaption": "sheep goat bleating at 1.761-3.761, 5.687-7.687", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_1159.wav", "onoffCaption": "burping belching at 0.102-2.517 and door slamming at 0.169-3.048", "frequencyCaption": "burping belching one times and door slamming one times"} +{"filepath": "data/multi_event_train/syn_1240.wav", "onoffCaption": "car horn honking at 3.031-6.618 and spraying at 7.178-8.435", "frequencyCaption": "car horn honking one times and spraying one times"} +{"filepath": "data/multi_event_train/syn_1414.wav", "onoffCaption": "car horn honking at 2.864-5.79, 7.277-10.0", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_1465.wav", "onoffCaption": "door knocking at 0.143-3.21, 4.674-7.054", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_1480.wav", "onoffCaption": "train horn at 3.285-6.445", "frequencyCaption": "train horn one times"} +{"filepath": "data/multi_event_train/syn_1616.wav", "onoffCaption": "sheep goat bleating at 1.724-3.724, 4.694-6.694, 7.757-9.757", "frequencyCaption": "sheep goat bleating three times"} +{"filepath": "data/multi_event_train/syn_1910.wav", "onoffCaption": "gunshot at 3.62-5.62, 7.01-9.01", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_1961.wav", "onoffCaption": "explosion at 0.83-5.83 and car horn honking at 2.125-6.525, 7.843-10.0", "frequencyCaption": "explosion one times and car horn honking two times"} +{"filepath": "data/multi_event_train/syn_1984.wav", "onoffCaption": "explosion at 0.271-3.271 and spraying at 0.606-3.042, 4.649-7.085 and gunshot at 4.573-6.592", "frequencyCaption": "explosion one times and spraying two times and gunshot one times"} +{"filepath": "data/multi_event_train/syn_3020.wav", "onoffCaption": "thump thud at 0.274-2.613, 4.736-7.075 and door slamming at 1.616-2.421, 4.032-5.335 and cat meowing at 4.18-7.421", "frequencyCaption": "thump thud two times and door slamming two times and cat meowing one times"} +{"filepath": "data/multi_event_train/syn_3051.wav", "onoffCaption": "explosion at 0.583-5.583, 7.425-10.0", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_3222.wav", "onoffCaption": "whistling at 2.234-10.0", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_3339.wav", "onoffCaption": "sneeze at 3.7-5.659 and car horn honking at 7.905-10.0", "frequencyCaption": "sneeze one times and car horn honking one times"} +{"filepath": "data/multi_event_train/syn_3407.wav", "onoffCaption": "cat meowing at 0.704-1.716, 3.124-5.064 and spraying at 1.93-4.322 and woman laughing at 7.89-10.0", "frequencyCaption": "cat meowing two times and spraying one times and woman laughing one times"} +{"filepath": "data/multi_event_train/syn_3476.wav", "onoffCaption": "sneeze at 0.2-1.294, 2.325-4.449", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_3493.wav", "onoffCaption": "explosion at 2.754-5.015", "frequencyCaption": "explosion one times"} +{"filepath": "data/multi_event_train/syn_3588.wav", "onoffCaption": "door knocking at 3.157-5.727, 7.114-9.684", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_3674.wav", "onoffCaption": "burping belching at 0.178-3.738, 4.276-7.836 and duck quacking at 7.421-9.421", "frequencyCaption": "burping belching two times and duck quacking one times"} +{"filepath": "data/multi_event_train/syn_3691.wav", "onoffCaption": "train horn at 2.007-10.0", "frequencyCaption": "train horn one times"} +{"filepath": "data/multi_event_train/syn_3818.wav", "onoffCaption": "train horn at 0.086-2.966, 5.129-7.598", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_3869.wav", "onoffCaption": "spraying at 0.068-1.152, 3.159-3.734", "frequencyCaption": "spraying two times"} +{"filepath": "data/multi_event_train/syn_3903.wav", "onoffCaption": "gunshot at 0.325-2.325, 3.275-5.275, 6.343-8.343", "frequencyCaption": "gunshot three times"} +{"filepath": "data/multi_event_train/syn_3972.wav", "onoffCaption": "explosion at 2.923-4.93 and cat meowing at 7.772-9.308", "frequencyCaption": "explosion one times and cat meowing one times"} +{"filepath": "data/multi_event_train/syn_3997.wav", "onoffCaption": "spraying at 0.219-1.086, 2.175-3.042 and gunshot at 1.073-3.073, 4.828-6.828", "frequencyCaption": "spraying two times and gunshot two times"} +{"filepath": "data/multi_event_train/syn_171.wav", "onoffCaption": "sheep goat bleating at 0.622-2.622, 3.342-5.342 and explosion at 0.656-5.577, 6.689-10.0", "frequencyCaption": "sheep goat bleating two times and explosion two times"} +{"filepath": "data/multi_event_train/syn_194.wav", "onoffCaption": "cow mooing at 4.267-9.247", "frequencyCaption": "cow mooing one times"} +{"filepath": "data/multi_event_train/syn_203.wav", "onoffCaption": "spraying at 0.497-2.192, 2.836-3.777, 4.996-5.847", "frequencyCaption": "spraying three times"} +{"filepath": "data/multi_event_train/syn_219.wav", "onoffCaption": "burping belching at 0.188-3.188 and door knocking at 0.709-4.471", "frequencyCaption": "burping belching one times and door knocking one times"} +{"filepath": "data/multi_event_train/syn_302.wav", "onoffCaption": "spraying at 2.0-2.75", "frequencyCaption": "spraying one times"} +{"filepath": "data/multi_event_train/syn_318.wav", "onoffCaption": "door knocking at 3.232-6.079, 7.926-10.0", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_357.wav", "onoffCaption": "car horn honking at 0.444-3.939", "frequencyCaption": "car horn honking one times"} +{"filepath": "data/multi_event_train/syn_527.wav", "onoffCaption": "explosion at 0.505-3.373 and tapping clicking clanking at 6.208-9.648", "frequencyCaption": "explosion one times and tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_572.wav", "onoffCaption": "whistling at 1.684-3.693, 5.499-7.606", "frequencyCaption": "whistling two times"} +{"filepath": "data/multi_event_train/syn_701.wav", "onoffCaption": "door slamming at 0.027-0.967, 2.312-3.252 and sneeze at 6.756-8.256", "frequencyCaption": "door slamming two times and sneeze one times"} +{"filepath": "data/multi_event_train/syn_754.wav", "onoffCaption": "dog barking at 2.725-4.725, 5.904-7.904", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_train/syn_823.wav", "onoffCaption": "whistling at 0.976-6.151, 7.109-10.0", "frequencyCaption": "whistling two times"} +{"filepath": "data/multi_event_train/syn_938.wav", "onoffCaption": "cat meowing at 0.419-3.323, 4.149-6.126, 7.37-9.364", "frequencyCaption": "cat meowing three times"} +{"filepath": "data/multi_event_train/syn_1017.wav", "onoffCaption": "sneeze at 1.526-3.765 and dog barking at 3.672-5.672, 6.695-8.695", "frequencyCaption": "sneeze one times and dog barking two times"} +{"filepath": "data/multi_event_train/syn_1231.wav", "onoffCaption": "door knocking at 0.655-3.03, 4.154-6.529, 7.337-9.712", "frequencyCaption": "door knocking three times"} +{"filepath": "data/multi_event_train/syn_1264.wav", "onoffCaption": "sneeze at 2.384-3.912, 4.476-5.802, 6.605-9.066", "frequencyCaption": "sneeze three times"} +{"filepath": "data/multi_event_train/syn_1281.wav", "onoffCaption": "burping belching at 0.058-5.058, 7.551-9.892 and door slamming at 1.441-3.637", "frequencyCaption": "burping belching two times and door slamming one times"} +{"filepath": "data/multi_event_train/syn_1365.wav", "onoffCaption": "train horn at 3.067-9.536", "frequencyCaption": "train horn one times"} +{"filepath": "data/multi_event_train/syn_1380.wav", "onoffCaption": "thump thud at 1.007-3.507, 5.962-8.301", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_1441.wav", "onoffCaption": "thump thud at 1.625-4.125, 5.821-7.956", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_1628.wav", "onoffCaption": "spraying at 1.654-3.387", "frequencyCaption": "spraying one times"} +{"filepath": "data/multi_event_train/syn_1632.wav", "onoffCaption": "explosion at 0.414-3.142", "frequencyCaption": "explosion one times"} +{"filepath": "data/multi_event_train/syn_1667.wav", "onoffCaption": "train horn at 0.181-5.258, 7.479-10.0", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_1682.wav", "onoffCaption": "train horn at 3.047-5.202, 5.893-8.048", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_1729.wav", "onoffCaption": "burping belching at 2.765-4.968, 6.119-8.322", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_1733.wav", "onoffCaption": "spraying at 2.841-4.098, 6.577-8.339", "frequencyCaption": "spraying two times"} +{"filepath": "data/multi_event_train/syn_1799.wav", "onoffCaption": "cat meowing at 0.443-2.027, 4.267-5.851", "frequencyCaption": "cat meowing two times"} +{"filepath": "data/multi_event_train/syn_1945.wav", "onoffCaption": "dog barking at 2.352-4.352 and gunshot at 7.525-9.525", "frequencyCaption": "dog barking one times and gunshot one times"} +{"filepath": "data/multi_event_train/syn_3075.wav", "onoffCaption": "gunshot at 0.101-2.101", "frequencyCaption": "gunshot one times"} +{"filepath": "data/multi_event_train/syn_3090.wav", "onoffCaption": "sheep goat bleating at 0.67-2.67, 3.618-5.618 and gunshot at 2.733-4.752, 6.302-8.321", "frequencyCaption": "sheep goat bleating two times and gunshot two times"} +{"filepath": "data/multi_event_train/syn_3206.wav", "onoffCaption": "car horn honking at 0.644-3.109, 5.341-7.806", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_3253.wav", "onoffCaption": "cat meowing at 0.657-2.017", "frequencyCaption": "cat meowing one times"} +{"filepath": "data/multi_event_train/syn_3307.wav", "onoffCaption": "burping belching at 2.496-6.04, 6.739-9.063 and woman laughing at 3.176-5.768", "frequencyCaption": "burping belching two times and woman laughing one times"} +{"filepath": "data/multi_event_train/syn_3348.wav", "onoffCaption": "sheep goat bleating at 1.975-3.975 and burping belching at 2.183-4.944, 5.965-8.088", "frequencyCaption": "sheep goat bleating one times and burping belching two times"} +{"filepath": "data/multi_event_train/syn_3423.wav", "onoffCaption": "thump thud at 2.28-4.508, 6.92-9.148", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_3538.wav", "onoffCaption": "duck quacking at 2.278-4.278, 5.652-7.652", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_3605.wav", "onoffCaption": "dog barking at 0.121-2.121", "frequencyCaption": "dog barking one times"} +{"filepath": "data/multi_event_train/syn_3650.wav", "onoffCaption": "whistling at 0.406-2.415, 3.048-5.057, 6.24-8.249 and spraying at 4.176-4.826, 5.812-6.876", "frequencyCaption": "whistling three times and spraying two times"} +{"filepath": "data/multi_event_train/syn_3751.wav", "onoffCaption": "gunshot at 0.272-2.272, 4.238-6.331, 7.459-9.459", "frequencyCaption": "gunshot three times"} +{"filepath": "data/multi_event_train/syn_3927.wav", "onoffCaption": "door slamming at 0.028-2.028", "frequencyCaption": "door slamming one times"} +{"filepath": "data/multi_event_train/syn_19.wav", "onoffCaption": "gunshot at 2.917-4.917", "frequencyCaption": "gunshot one times"} +{"filepath": "data/multi_event_train/syn_256.wav", "onoffCaption": "sheep goat bleating at 1.495-3.495, 4.567-6.567, 7.123-9.123", "frequencyCaption": "sheep goat bleating three times"} +{"filepath": "data/multi_event_train/syn_426.wav", "onoffCaption": "woman laughing at 2.649-4.932, 5.692-7.975", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_432.wav", "onoffCaption": "door knocking at 0.055-3.43 and spraying at 7.599-8.2", "frequencyCaption": "door knocking one times and spraying one times"} +{"filepath": "data/multi_event_train/syn_473.wav", "onoffCaption": "gunshot at 0.698-2.698, 3.947-5.947, 6.742-8.742 and explosion at 4.807-7.863", "frequencyCaption": "gunshot three times and explosion one times"} +{"filepath": "data/multi_event_train/syn_496.wav", "onoffCaption": "car horn honking at 2.094-5.313, 5.949-9.168", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_529.wav", "onoffCaption": "door knocking at 3.432-5.592", "frequencyCaption": "door knocking one times"} +{"filepath": "data/multi_event_train/syn_568.wav", "onoffCaption": "dog barking at 0.403-2.403, 4.637-6.637", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_train/syn_600.wav", "onoffCaption": "train horn at 2.315-8.03", "frequencyCaption": "train horn one times"} +{"filepath": "data/multi_event_train/syn_641.wav", "onoffCaption": "cow mooing at 0.533-3.515, 4.376-7.358 and duck quacking at 3.39-5.39 and whistling at 5.257-8.232", "frequencyCaption": "cow mooing two times and duck quacking one times and whistling one times"} +{"filepath": "data/multi_event_train/syn_655.wav", "onoffCaption": "spraying at 0.001-0.852", "frequencyCaption": "spraying one times"} +{"filepath": "data/multi_event_train/syn_839.wav", "onoffCaption": "door slamming at 2.325-3.442, 4.75-5.867, 6.927-8.044 and sneeze at 3.071-4.359, 4.959-6.247, 7.169-8.457", "frequencyCaption": "door slamming three times and sneeze three times"} +{"filepath": "data/multi_event_train/syn_889.wav", "onoffCaption": "sheep goat bleating at 3.745-5.745, 7.04-9.04", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_922.wav", "onoffCaption": "door knocking at 2.749-5.596, 6.697-9.593", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_936.wav", "onoffCaption": "gunshot at 2.615-4.656, 5.659-7.7", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_977.wav", "onoffCaption": "car horn honking at 0.024-3.199, 4.429-7.604 and door slamming at 1.132-1.983, 2.772-3.623, 5.279-6.13 and spraying at 3.137-3.918, 5.76-6.541", "frequencyCaption": "car horn honking two times and door slamming three times and spraying two times"} +{"filepath": "data/multi_event_train/syn_992.wav", "onoffCaption": "train horn at 0.88-6.595, 7.622-10.0", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_1019.wav", "onoffCaption": "door slamming at 0.69-3.664 and duck quacking at 7.922-9.922", "frequencyCaption": "door slamming one times and duck quacking one times"} +{"filepath": "data/multi_event_train/syn_1058.wav", "onoffCaption": "door slamming at 0.388-2.388, 3.626-5.626 and explosion at 2.533-5.286, 6.989-9.497", "frequencyCaption": "door slamming two times and explosion two times"} +{"filepath": "data/multi_event_train/syn_1102.wav", "onoffCaption": "gunshot at 3.478-5.608, 6.53-8.77", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_1116.wav", "onoffCaption": "door slamming at 2.112-4.24, 5.163-7.291", "frequencyCaption": "door slamming two times"} +{"filepath": "data/multi_event_train/syn_1143.wav", "onoffCaption": "sneeze at 2.226-4.185, 5.315-7.274, 7.89-9.849", "frequencyCaption": "sneeze three times"} +{"filepath": "data/multi_event_train/syn_1330.wav", "onoffCaption": "burping belching at 3.154-6.154", "frequencyCaption": "burping belching one times"} +{"filepath": "data/multi_event_train/syn_1515.wav", "onoffCaption": "spraying at 0.068-0.669 and woman laughing at 2.812-4.904, 6.245-8.327", "frequencyCaption": "spraying one times and woman laughing two times"} +{"filepath": "data/multi_event_train/syn_1540.wav", "onoffCaption": "sheep goat bleating at 1.643-3.643, 4.39-6.39, 7.124-9.124", "frequencyCaption": "sheep goat bleating three times"} +{"filepath": "data/multi_event_train/syn_1554.wav", "onoffCaption": "cat meowing at 2.76-3.781 and door slamming at 5.94-7.079", "frequencyCaption": "cat meowing one times and door slamming one times"} +{"filepath": "data/multi_event_train/syn_1698.wav", "onoffCaption": "spraying at 3.088-5.216", "frequencyCaption": "spraying one times"} +{"filepath": "data/multi_event_train/syn_1766.wav", "onoffCaption": "dog barking at 2.815-4.815, 5.782-8.22", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_train/syn_1783.wav", "onoffCaption": "door knocking at 2.091-5.644, 6.754-10.0", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_1811.wav", "onoffCaption": "duck quacking at 0.315-2.315, 3.397-5.397 and dog barking at 0.403-2.403, 3.261-5.261, 6.58-8.58", "frequencyCaption": "duck quacking two times and dog barking three times"} +{"filepath": "data/multi_event_train/syn_1844.wav", "onoffCaption": "train horn at 2.463-6.903", "frequencyCaption": "train horn one times"} +{"filepath": "data/multi_event_train/syn_1850.wav", "onoffCaption": "sheep goat bleating at 0.308-2.308, 3.201-5.201, 7.564-10.0", "frequencyCaption": "sheep goat bleating three times"} +{"filepath": "data/multi_event_train/syn_3121.wav", "onoffCaption": "burping belching at 0.675-4.698, 6.615-10.0", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_3160.wav", "onoffCaption": "burping belching at 0.221-2.344, 4.641-6.672, 7.976-10.0", "frequencyCaption": "burping belching three times"} +{"filepath": "data/multi_event_train/syn_3174.wav", "onoffCaption": "cow mooing at 0.128-3.138, 4.765-7.734 and gunshot at 0.227-2.227, 2.757-4.85, 6.127-8.127 and tapping clicking clanking at 5.439-8.879", "frequencyCaption": "cow mooing two times and gunshot three times and tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_3185.wav", "onoffCaption": "gunshot at 1.368-3.368, 4.358-6.358", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_3191.wav", "onoffCaption": "explosion at 3.122-6.122", "frequencyCaption": "explosion one times"} +{"filepath": "data/multi_event_train/syn_3249.wav", "onoffCaption": "car horn honking at 0.422-2.887, 3.961-6.426", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_3352.wav", "onoffCaption": "dog barking at 0.73-4.05, 6.487-9.807", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_train/syn_3439.wav", "onoffCaption": "sneeze at 0.638-5.638, 6.306-10.0", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_3489.wav", "onoffCaption": "burping belching at 0.191-2.952 and car horn honking at 6.918-10.0", "frequencyCaption": "burping belching one times and car horn honking one times"} +{"filepath": "data/multi_event_train/syn_3522.wav", "onoffCaption": "tapping clicking clanking at 2.936-6.376 and dog barking at 3.613-5.613 and spraying at 4.705-6.467, 7.598-9.293", "frequencyCaption": "tapping clicking clanking one times and dog barking one times and spraying two times"} +{"filepath": "data/multi_event_train/syn_3536.wav", "onoffCaption": "gunshot at 1.616-3.616, 5.17-7.17", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_3577.wav", "onoffCaption": "car horn honking at 0.724-5.631, 6.312-10.0", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_3592.wav", "onoffCaption": "sheep goat bleating at 0.138-3.218, 3.865-5.865 and car horn honking at 2.443-6.765", "frequencyCaption": "sheep goat bleating two times and car horn honking one times"} +{"filepath": "data/multi_event_train/syn_3704.wav", "onoffCaption": "sneeze at 1.924-3.027, 4.925-6.028 and cow mooing at 7.95-10.0", "frequencyCaption": "sneeze two times and cow mooing one times"} +{"filepath": "data/multi_event_train/syn_3826.wav", "onoffCaption": "spraying at 0.306-2.001 and cat meowing at 6.478-10.0", "frequencyCaption": "spraying one times and cat meowing one times"} +{"filepath": "data/multi_event_train/syn_3832.wav", "onoffCaption": "burping belching at 0.032-3.032 and explosion at 5.43-10.0", "frequencyCaption": "burping belching one times and explosion one times"} +{"filepath": "data/multi_event_train/syn_3873.wav", "onoffCaption": "thump thud at 0.405-4.855, 5.664-10.0", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_3896.wav", "onoffCaption": "gunshot at 0.419-2.419, 3.141-5.141, 6.145-8.145 and burping belching at 1.956-5.979", "frequencyCaption": "gunshot three times and burping belching one times"} +{"filepath": "data/multi_event_train/syn_3929.wav", "onoffCaption": "burping belching at 2.342-4.377, 5.498-7.533", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_3968.wav", "onoffCaption": "thump thud at 1.08-5.53, 7.974-10.0", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_58.wav", "onoffCaption": "tapping clicking clanking at 2.24-5.68, 6.942-10.0", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_217.wav", "onoffCaption": "duck quacking at 1.061-3.061, 4.757-6.757, 7.312-9.312", "frequencyCaption": "duck quacking three times"} +{"filepath": "data/multi_event_train/syn_242.wav", "onoffCaption": "woman laughing at 0.008-2.233, 3.414-5.519, 7.665-10.0", "frequencyCaption": "woman laughing three times"} +{"filepath": "data/multi_event_train/syn_258.wav", "onoffCaption": "door knocking at 2.89-6.506, 7.322-9.482", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_343.wav", "onoffCaption": "duck quacking at 3.426-5.426, 6.592-8.592", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_359.wav", "onoffCaption": "cow mooing at 0.104-3.086 and woman laughing at 5.605-10.0", "frequencyCaption": "cow mooing one times and woman laughing one times"} +{"filepath": "data/multi_event_train/syn_467.wav", "onoffCaption": "thump thud at 1.125-3.896, 5.7-8.471", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_482.wav", "onoffCaption": "door knocking at 2.099-4.723, 6.801-9.425", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_566.wav", "onoffCaption": "dog barking at 2.388-4.388, 6.567-8.567", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_train/syn_599.wav", "onoffCaption": "whistling at 0.26-5.435, 6.002-8.011", "frequencyCaption": "whistling two times"} +{"filepath": "data/multi_event_train/syn_614.wav", "onoffCaption": "woman laughing at 0.863-2.968", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/multi_event_train/syn_715.wav", "onoffCaption": "woman laughing at 2.516-6.568", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/multi_event_train/syn_878.wav", "onoffCaption": "spraying at 0.47-0.992 and whistling at 2.956-5.831", "frequencyCaption": "spraying one times and whistling one times"} +{"filepath": "data/multi_event_train/syn_963.wav", "onoffCaption": "cow mooing at 0.407-3.705, 4.406-7.704", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_986.wav", "onoffCaption": "duck quacking at 2.219-4.219, 5.442-7.442", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_1157.wav", "onoffCaption": "car horn honking at 1.378-4.553", "frequencyCaption": "car horn honking one times"} +{"filepath": "data/multi_event_train/syn_1225.wav", "onoffCaption": "train horn at 2.208-5.978", "frequencyCaption": "train horn one times"} +{"filepath": "data/multi_event_train/syn_1324.wav", "onoffCaption": "duck quacking at 0.456-2.456, 3.38-5.38, 6.356-8.356 and thump thud at 2.341-4.68, 5.337-7.676", "frequencyCaption": "duck quacking three times and thump thud two times"} +{"filepath": "data/multi_event_train/syn_1371.wav", "onoffCaption": "gunshot at 0.281-2.411 and whistling at 4.833-9.317", "frequencyCaption": "gunshot one times and whistling one times"} +{"filepath": "data/multi_event_train/syn_1394.wav", "onoffCaption": "sheep goat bleating at 0.453-2.453, 3.709-5.709", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_1501.wav", "onoffCaption": "door slamming at 3.171-5.629", "frequencyCaption": "door slamming one times"} +{"filepath": "data/multi_event_train/syn_1669.wav", "onoffCaption": "door slamming at 0.076-0.881, 1.714-2.519 and car horn honking at 5.815-8.28", "frequencyCaption": "door slamming two times and car horn honking one times"} +{"filepath": "data/multi_event_train/syn_1673.wav", "onoffCaption": "train horn at 2.681-5.881, 7.622-10.0", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_1696.wav", "onoffCaption": "woman laughing at 2.735-9.469", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/multi_event_train/syn_1727.wav", "onoffCaption": "woman laughing at 2.101-4.295, 5.621-7.848 and whistling at 2.263-5.138", "frequencyCaption": "woman laughing two times and whistling one times"} +{"filepath": "data/multi_event_train/syn_1768.wav", "onoffCaption": "train horn at 0.389-2.789", "frequencyCaption": "train horn one times"} +{"filepath": "data/multi_event_train/syn_1772.wav", "onoffCaption": "cow mooing at 3.636-6.934", "frequencyCaption": "cow mooing one times"} +{"filepath": "data/multi_event_train/syn_1797.wav", "onoffCaption": "door slamming at 0.328-2.456, 3.815-5.943", "frequencyCaption": "door slamming two times"} +{"filepath": "data/multi_event_train/syn_1805.wav", "onoffCaption": "gunshot at 3.296-5.296", "frequencyCaption": "gunshot one times"} +{"filepath": "data/multi_event_train/syn_3135.wav", "onoffCaption": "car horn honking at 2.114-6.436, 7.636-10.0", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_3208.wav", "onoffCaption": "door slamming at 3.143-4.121, 5.763-6.741, 7.247-8.225", "frequencyCaption": "door slamming three times"} +{"filepath": "data/multi_event_train/syn_3247.wav", "onoffCaption": "spraying at 1.987-2.487, 4.039-4.539 and door slamming at 7.66-9.425", "frequencyCaption": "spraying two times and door slamming one times"} +{"filepath": "data/multi_event_train/syn_3313.wav", "onoffCaption": "cow mooing at 0.136-4.565, 6.674-10.0", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_3346.wav", "onoffCaption": "tapping clicking clanking at 2.996-6.436, 7.718-10.0", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_3478.wav", "onoffCaption": "door slamming at 2.783-5.757, 6.792-8.988", "frequencyCaption": "door slamming two times"} +{"filepath": "data/multi_event_train/syn_3563.wav", "onoffCaption": "explosion at 0.035-2.763, 3.269-5.997", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_3586.wav", "onoffCaption": "dog barking at 2.001-4.001, 6.436-8.436", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_train/syn_3611.wav", "onoffCaption": "burping belching at 0.545-3.545", "frequencyCaption": "burping belching one times"} +{"filepath": "data/multi_event_train/syn_3710.wav", "onoffCaption": "sheep goat bleating at 0.418-2.418, 3.248-5.248, 6.327-8.327", "frequencyCaption": "sheep goat bleating three times"} +{"filepath": "data/multi_event_train/syn_3745.wav", "onoffCaption": "whistling at 2.291-10.0", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_3867.wav", "onoffCaption": "burping belching at 1.62-4.381, 6.604-9.365", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_3882.wav", "onoffCaption": "woman laughing at 0.645-3.699 and sneeze at 6.075-7.167, 7.708-8.8", "frequencyCaption": "woman laughing one times and sneeze two times"} +{"filepath": "data/multi_event_train/syn_3966.wav", "onoffCaption": "thump thud at 0.699-3.746, 6.205-9.122", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_3999.wav", "onoffCaption": "tapping clicking clanking at 2.904-6.344, 7.265-10.0", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_17.wav", "onoffCaption": "cow mooing at 0.467-5.447, 6.568-8.92", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_42.wav", "onoffCaption": "sheep goat bleating at 3.727-5.727", "frequencyCaption": "sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_130.wav", "onoffCaption": "thump thud at 0.503-3.274, 5.351-8.122 and cow mooing at 2.094-6.523", "frequencyCaption": "thump thud two times and cow mooing one times"} +{"filepath": "data/multi_event_train/syn_165.wav", "onoffCaption": "explosion at 0.033-3.033, 4.03-6.902", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_180.wav", "onoffCaption": "door knocking at 1.887-4.199, 5.442-7.754", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_316.wav", "onoffCaption": "cow mooing at 0.186-4.615 and gunshot at 6.965-8.965", "frequencyCaption": "cow mooing one times and gunshot one times"} +{"filepath": "data/multi_event_train/syn_498.wav", "onoffCaption": "woman laughing at 0.389-3.184 and spraying at 0.479-1.736 and sheep goat bleating at 6.814-8.814", "frequencyCaption": "woman laughing one times and spraying one times and sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_583.wav", "onoffCaption": "cat meowing at 2.992-4.609, 5.325-6.942, 8.008-9.625", "frequencyCaption": "cat meowing three times"} +{"filepath": "data/multi_event_train/syn_740.wav", "onoffCaption": "woman laughing at 0.233-2.933, 4.001-6.701, 7.57-10.0", "frequencyCaption": "woman laughing three times"} +{"filepath": "data/multi_event_train/syn_837.wav", "onoffCaption": "tapping clicking clanking at 1.154-4.594", "frequencyCaption": "tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_862.wav", "onoffCaption": "cow mooing at 0.176-3.158", "frequencyCaption": "cow mooing one times"} +{"filepath": "data/multi_event_train/syn_887.wav", "onoffCaption": "gunshot at 1.975-3.975 and explosion at 6.851-8.853", "frequencyCaption": "gunshot one times and explosion one times"} +{"filepath": "data/multi_event_train/syn_979.wav", "onoffCaption": "cow mooing at 0.418-5.398, 6.857-10.0", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_1003.wav", "onoffCaption": "door knocking at 2.873-7.575", "frequencyCaption": "door knocking one times"} +{"filepath": "data/multi_event_train/syn_1056.wav", "onoffCaption": "spraying at 0.772-3.791, 4.499-5.126, 6.351-7.432 and door knocking at 2.666-6.816", "frequencyCaption": "spraying three times and door knocking one times"} +{"filepath": "data/multi_event_train/syn_1118.wav", "onoffCaption": "sneeze at 1.98-4.219, 5.391-8.105", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_1270.wav", "onoffCaption": "explosion at 0.304-3.057, 5.349-7.61", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_1295.wav", "onoffCaption": "tapping clicking clanking at 0.66-4.1, 5.72-8.425", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_1400.wav", "onoffCaption": "cat meowing at 0.025-1.046, 1.59-4.35", "frequencyCaption": "cat meowing two times"} +{"filepath": "data/multi_event_train/syn_1455.wav", "onoffCaption": "tapping clicking clanking at 0.635-4.075, 5.126-8.566 and car horn honking at 1.938-6.338", "frequencyCaption": "tapping clicking clanking two times and car horn honking one times"} +{"filepath": "data/multi_event_train/syn_1626.wav", "onoffCaption": "tapping clicking clanking at 2.667-6.107, 7.915-10.0", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_1904.wav", "onoffCaption": "gunshot at 0.344-2.344, 3.876-5.876", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_1951.wav", "onoffCaption": "thump thud at 0.001-2.501, 3.395-5.623", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_3034.wav", "onoffCaption": "thump thud at 2.608-5.108, 6.01-8.51", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_3061.wav", "onoffCaption": "cow mooing at 1.872-4.841", "frequencyCaption": "cow mooing one times"} +{"filepath": "data/multi_event_train/syn_3084.wav", "onoffCaption": "whistling at 0.882-3.757, 4.534-6.763 and cat meowing at 1.107-2.651, 3.757-5.023", "frequencyCaption": "whistling two times and cat meowing two times"} +{"filepath": "data/multi_event_train/syn_3212.wav", "onoffCaption": "explosion at 0.067-2.131", "frequencyCaption": "explosion one times"} +{"filepath": "data/multi_event_train/syn_3309.wav", "onoffCaption": "train horn at 0.142-4.142 and duck quacking at 1.183-3.183, 4.552-6.552 and door knocking at 2.065-4.377, 5.166-7.478", "frequencyCaption": "train horn one times and duck quacking two times and door knocking two times"} +{"filepath": "data/multi_event_train/syn_3437.wav", "onoffCaption": "train horn at 3.389-7.457", "frequencyCaption": "train horn one times"} +{"filepath": "data/multi_event_train/syn_3462.wav", "onoffCaption": "cat meowing at 1.262-4.503, 6.9-10.0 and spraying at 1.386-3.846 and duck quacking at 2.352-4.352", "frequencyCaption": "cat meowing two times and spraying one times and duck quacking one times"} +{"filepath": "data/multi_event_train/syn_3487.wav", "onoffCaption": "cow mooing at 0.381-3.391, 4.246-7.074 and car horn honking at 2.377-6.889", "frequencyCaption": "cow mooing two times and car horn honking one times"} +{"filepath": "data/multi_event_train/syn_3579.wav", "onoffCaption": "door knocking at 0.011-4.544, 5.239-9.772", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_3644.wav", "onoffCaption": "dog barking at 1.946-3.946, 4.723-6.723 and sneeze at 4.503-6.906, 7.706-10.0", "frequencyCaption": "dog barking two times and sneeze two times"} +{"filepath": "data/multi_event_train/syn_3898.wav", "onoffCaption": "explosion at 0.238-3.791", "frequencyCaption": "explosion one times"} +{"filepath": "data/multi_event_train/syn_3933.wav", "onoffCaption": "door slamming at 0.57-3.449, 4.57-6.085, 6.734-7.572", "frequencyCaption": "door slamming three times"} +{"filepath": "data/multi_event_train/syn_3983.wav", "onoffCaption": "sneeze at 1.528-3.652, 4.23-5.775, 6.779-9.439", "frequencyCaption": "sneeze three times"} +{"filepath": "data/multi_event_train/syn_66.wav", "onoffCaption": "sheep goat bleating at 3.755-5.755, 6.54-8.54", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_83.wav", "onoffCaption": "burping belching at 1.755-4.957, 6.169-8.93", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_114.wav", "onoffCaption": "thump thud at 0.273-3.32, 4.785-7.832 and whistling at 2.289-7.464", "frequencyCaption": "thump thud two times and whistling one times"} +{"filepath": "data/multi_event_train/syn_299.wav", "onoffCaption": "tapping clicking clanking at 2.854-6.294", "frequencyCaption": "tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_367.wav", "onoffCaption": "burping belching at 2.702-5.992", "frequencyCaption": "burping belching one times"} +{"filepath": "data/multi_event_train/syn_382.wav", "onoffCaption": "tapping clicking clanking at 3.22-6.66, 7.835-10.0", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_428.wav", "onoffCaption": "door knocking at 3.238-5.426, 6.557-8.745", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_459.wav", "onoffCaption": "door slamming at 0.017-1.27 and train horn at 0.31-4.31, 5.293-7.405 and duck quacking at 2.271-4.271, 5.79-7.79", "frequencyCaption": "door slamming one times and train horn two times and duck quacking two times"} +{"filepath": "data/multi_event_train/syn_533.wav", "onoffCaption": "train horn at 2.06-4.527, 5.17-7.637", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_542.wav", "onoffCaption": "door knocking at 2.58-4.932, 5.435-8.165", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_731.wav", "onoffCaption": "cow mooing at 1.42-4.718", "frequencyCaption": "cow mooing one times"} +{"filepath": "data/multi_event_train/syn_781.wav", "onoffCaption": "cat meowing at 2.711-3.924, 4.973-6.186, 7.019-8.232", "frequencyCaption": "cat meowing three times"} +{"filepath": "data/multi_event_train/syn_846.wav", "onoffCaption": "cow mooing at 2.238-6.667, 7.302-10.0", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_1072.wav", "onoffCaption": "train horn at 1.178-7.238", "frequencyCaption": "train horn one times"} +{"filepath": "data/multi_event_train/syn_1097.wav", "onoffCaption": "whistling at 0.339-4.823, 6.015-8.024", "frequencyCaption": "whistling two times"} +{"filepath": "data/multi_event_train/syn_1169.wav", "onoffCaption": "sneeze at 0.092-1.186, 1.806-2.9, 3.554-4.648 and cat meowing at 6.817-8.765", "frequencyCaption": "sneeze three times and cat meowing one times"} +{"filepath": "data/multi_event_train/syn_1201.wav", "onoffCaption": "train horn at 2.979-5.116, 5.771-7.908", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_1254.wav", "onoffCaption": "sheep goat bleating at 0.196-2.196, 3.225-5.225 and cow mooing at 7.435-10.0", "frequencyCaption": "sheep goat bleating two times and cow mooing one times"} +{"filepath": "data/multi_event_train/syn_1424.wav", "onoffCaption": "gunshot at 0.785-2.785, 3.691-5.691, 6.961-8.961", "frequencyCaption": "gunshot three times"} +{"filepath": "data/multi_event_train/syn_1602.wav", "onoffCaption": "explosion at 2.752-4.926, 6.126-8.3", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_1657.wav", "onoffCaption": "burping belching at 1.032-4.211 and spraying at 7.591-9.077", "frequencyCaption": "burping belching one times and spraying one times"} +{"filepath": "data/multi_event_train/syn_1719.wav", "onoffCaption": "car horn honking at 2.991-5.917, 7.165-9.757", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_1920.wav", "onoffCaption": "cat meowing at 2.121-5.151, 6.69-8.226", "frequencyCaption": "cat meowing two times"} +{"filepath": "data/multi_event_train/syn_3010.wav", "onoffCaption": "car horn honking at 0.242-3.896, 5.279-8.933", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_3236.wav", "onoffCaption": "burping belching at 1.875-5.875", "frequencyCaption": "burping belching one times"} +{"filepath": "data/multi_event_train/syn_3263.wav", "onoffCaption": "dog barking at 3.569-5.569, 6.486-8.486 and thump thud at 4.01-7.928", "frequencyCaption": "dog barking two times and thump thud one times"} +{"filepath": "data/multi_event_train/syn_3286.wav", "onoffCaption": "gunshot at 3.647-5.887, 7.673-9.913", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_3378.wav", "onoffCaption": "door knocking at 0.404-3.172, 5.052-7.899 and tapping clicking clanking at 3.784-7.224 and car horn honking at 6.433-8.433", "frequencyCaption": "door knocking two times and tapping clicking clanking one times and car horn honking one times"} +{"filepath": "data/multi_event_train/syn_3446.wav", "onoffCaption": "sheep goat bleating at 0.347-3.987, 5.155-8.795", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_3635.wav", "onoffCaption": "cat meowing at 0.466-2.01 and duck quacking at 5.427-7.427", "frequencyCaption": "cat meowing one times and duck quacking one times"} +{"filepath": "data/multi_event_train/syn_3660.wav", "onoffCaption": "tapping clicking clanking at 2.607-6.047, 6.693-10.0", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_3685.wav", "onoffCaption": "gunshot at 3.213-5.719", "frequencyCaption": "gunshot one times"} +{"filepath": "data/multi_event_train/syn_3828.wav", "onoffCaption": "dog barking at 3.288-5.288 and burping belching at 3.369-5.572, 7.868-10.0", "frequencyCaption": "dog barking one times and burping belching two times"} +{"filepath": "data/multi_event_train/syn_3859.wav", "onoffCaption": "spraying at 0.065-1.065, 2.246-3.246, 5.682-6.682", "frequencyCaption": "spraying three times"} +{"filepath": "data/multi_event_train/syn_3942.wav", "onoffCaption": "door knocking at 0.673-2.922, 4.302-6.551 and cow mooing at 3.207-6.189", "frequencyCaption": "door knocking two times and cow mooing one times"} +{"filepath": "data/multi_event_train/syn_29.wav", "onoffCaption": "tapping clicking clanking at 2.876-6.316, 7.077-10.0", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_33.wav", "onoffCaption": "sheep goat bleating at 0.232-2.232, 3.068-5.068, 5.691-7.691", "frequencyCaption": "sheep goat bleating three times"} +{"filepath": "data/multi_event_train/syn_141.wav", "onoffCaption": "dog barking at 2.679-4.679, 5.988-7.988", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_train/syn_229.wav", "onoffCaption": "cow mooing at 0.504-3.802 and sheep goat bleating at 6.873-10.0", "frequencyCaption": "cow mooing one times and sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_332.wav", "onoffCaption": "explosion at 2.929-5.658, 6.447-9.447", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_416.wav", "onoffCaption": "cat meowing at 1.978-3.873, 4.787-6.682", "frequencyCaption": "cat meowing two times"} +{"filepath": "data/multi_event_train/syn_517.wav", "onoffCaption": "woman laughing at 0.043-7.055 and sheep goat bleating at 1.034-3.034, 4.335-6.335", "frequencyCaption": "woman laughing one times and sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_665.wav", "onoffCaption": "thump thud at 0.333-3.104, 4.26-6.811", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_764.wav", "onoffCaption": "explosion at 2.603-5.332", "frequencyCaption": "explosion one times"} +{"filepath": "data/multi_event_train/syn_809.wav", "onoffCaption": "tapping clicking clanking at 0.597-4.037, 5.572-8.29", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_813.wav", "onoffCaption": "sneeze at 0.338-1.664, 3.859-6.726, 7.402-8.902", "frequencyCaption": "sneeze three times"} +{"filepath": "data/multi_event_train/syn_908.wav", "onoffCaption": "duck quacking at 2.231-4.231, 5.703-7.703", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_912.wav", "onoffCaption": "whistling at 0.37-5.87", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_1027.wav", "onoffCaption": "burping belching at 0.405-2.928 and spraying at 7.113-8.808", "frequencyCaption": "burping belching one times and spraying one times"} +{"filepath": "data/multi_event_train/syn_1126.wav", "onoffCaption": "cat meowing at 2.066-3.798, 4.339-6.071, 6.707-8.439", "frequencyCaption": "cat meowing three times"} +{"filepath": "data/multi_event_train/syn_1471.wav", "onoffCaption": "whistling at 2.484-5.459, 7.427-9.449", "frequencyCaption": "whistling two times"} +{"filepath": "data/multi_event_train/syn_1494.wav", "onoffCaption": "whistling at 0.825-3.7, 4.527-7.523", "frequencyCaption": "whistling two times"} +{"filepath": "data/multi_event_train/syn_1570.wav", "onoffCaption": "cow mooing at 0.068-3.078 and dog barking at 5.42-7.858", "frequencyCaption": "cow mooing one times and dog barking one times"} +{"filepath": "data/multi_event_train/syn_1595.wav", "onoffCaption": "door slamming at 1.444-4.444, 6.63-9.63", "frequencyCaption": "door slamming two times"} +{"filepath": "data/multi_event_train/syn_1874.wav", "onoffCaption": "gunshot at 0.023-2.023", "frequencyCaption": "gunshot one times"} +{"filepath": "data/multi_event_train/syn_1891.wav", "onoffCaption": "burping belching at 0.954-3.157 and tapping clicking clanking at 1.277-4.717, 6.507-9.947", "frequencyCaption": "burping belching one times and tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_1975.wav", "onoffCaption": "explosion at 1.831-4.831, 5.888-8.888 and spraying at 2.557-3.307, 4.647-5.514 and tapping clicking clanking at 4.125-7.565", "frequencyCaption": "explosion two times and spraying two times and tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_1990.wav", "onoffCaption": "door slamming at 0.013-1.186, 1.716-2.889, 3.808-4.981", "frequencyCaption": "door slamming three times"} +{"filepath": "data/multi_event_train/syn_3045.wav", "onoffCaption": "explosion at 2.17-4.49 and train horn at 6.537-10.0", "frequencyCaption": "explosion one times and train horn one times"} +{"filepath": "data/multi_event_train/syn_3144.wav", "onoffCaption": "gunshot at 0.205-2.205, 4.257-6.257", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_3409.wav", "onoffCaption": "door knocking at 0.161-2.382, 3.18-5.984", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_3413.wav", "onoffCaption": "explosion at 0.773-3.773, 4.786-7.786 and duck quacking at 1.458-3.458, 5.083-7.083", "frequencyCaption": "explosion two times and duck quacking two times"} +{"filepath": "data/multi_event_train/syn_3508.wav", "onoffCaption": "cow mooing at 2.491-5.789, 6.803-8.835", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_3512.wav", "onoffCaption": "explosion at 2.04-7.04 and thump thud at 2.667-7.042, 7.744-10.0", "frequencyCaption": "explosion one times and thump thud two times"} +{"filepath": "data/multi_event_train/syn_3816.wav", "onoffCaption": "train horn at 3.343-6.143, 7.821-10.0", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_3917.wav", "onoffCaption": "sneeze at 2.112-4.573", "frequencyCaption": "sneeze one times"} +{"filepath": "data/multi_event_train/syn_99.wav", "onoffCaption": "cow mooing at 0.562-3.86, 4.984-8.282", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_233.wav", "onoffCaption": "dog barking at 0.177-2.177 and woman laughing at 0.336-3.036, 4.006-6.644, 7.419-9.665", "frequencyCaption": "dog barking one times and woman laughing three times"} +{"filepath": "data/multi_event_train/syn_266.wav", "onoffCaption": "cow mooing at 1.671-4.681, 6.283-9.293 and woman laughing at 2.644-5.236, 5.829-7.91", "frequencyCaption": "cow mooing two times and woman laughing two times"} +{"filepath": "data/multi_event_train/syn_283.wav", "onoffCaption": "whistling at 0.42-3.295, 3.993-6.222, 7.259-9.502", "frequencyCaption": "whistling three times"} +{"filepath": "data/multi_event_train/syn_328.wav", "onoffCaption": "whistling at 1.879-7.054 and tapping clicking clanking at 3.661-7.101, 7.666-9.803", "frequencyCaption": "whistling one times and tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_398.wav", "onoffCaption": "sneeze at 2.719-4.832, 6.205-8.318", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_443.wav", "onoffCaption": "explosion at 0.593-5.593, 7.99-10.0", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_558.wav", "onoffCaption": "sheep goat bleating at 2.197-4.197, 4.74-6.74, 7.47-10.0 and door knocking at 2.662-6.215", "frequencyCaption": "sheep goat bleating three times and door knocking one times"} +{"filepath": "data/multi_event_train/syn_630.wav", "onoffCaption": "thump thud at 2.609-5.109, 6.484-8.984", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_662.wav", "onoffCaption": "gunshot at 1.884-3.884, 6.184-8.184", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_680.wav", "onoffCaption": "thump thud at 1.832-5.499, 6.258-9.925", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_779.wav", "onoffCaption": "thump thud at 2.493-4.832, 7.066-9.405", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_947.wav", "onoffCaption": "train horn at 0.341-2.821, 3.65-6.05", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_1068.wav", "onoffCaption": "car horn honking at 0.181-4.43, 6.693-10.0", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_1173.wav", "onoffCaption": "explosion at 0.221-2.314, 3.781-5.899", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_1196.wav", "onoffCaption": "sheep goat bleating at 1.568-3.568, 4.446-6.446", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_1300.wav", "onoffCaption": "sneeze at 0.212-1.458, 3.742-4.988 and dog barking at 1.234-3.234", "frequencyCaption": "sneeze two times and dog barking one times"} +{"filepath": "data/multi_event_train/syn_1355.wav", "onoffCaption": "cat meowing at 2.938-4.886, 6.198-8.146", "frequencyCaption": "cat meowing two times"} +{"filepath": "data/multi_event_train/syn_1525.wav", "onoffCaption": "train horn at 0.469-3.109, 4.904-7.77", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_1618.wav", "onoffCaption": "cat meowing at 3.514-4.535, 6.027-7.24, 8.265-9.575", "frequencyCaption": "cat meowing three times"} +{"filepath": "data/multi_event_train/syn_1703.wav", "onoffCaption": "burping belching at 2.136-7.737 and sheep goat bleating at 4.673-6.673, 7.794-9.794", "frequencyCaption": "burping belching one times and sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_1756.wav", "onoffCaption": "car horn honking at 1.746-4.211, 5.004-7.53", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_1821.wav", "onoffCaption": "door knocking at 0.277-4.979, 6.493-10.0", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_3111.wav", "onoffCaption": "thump thud at 1.355-5.805 and door slamming at 1.839-3.839, 4.949-6.862, 8.179-9.328", "frequencyCaption": "thump thud one times and door slamming three times"} +{"filepath": "data/multi_event_train/syn_3279.wav", "onoffCaption": "dog barking at 0.184-3.105 and door knocking at 0.409-2.63 and cat meowing at 5.215-6.236", "frequencyCaption": "dog barking one times and door knocking one times and cat meowing one times"} +{"filepath": "data/multi_event_train/syn_3337.wav", "onoffCaption": "dog barking at 0.337-2.337", "frequencyCaption": "dog barking one times"} +{"filepath": "data/multi_event_train/syn_3362.wav", "onoffCaption": "train horn at 0.443-2.58, 3.154-5.291, 7.352-9.489", "frequencyCaption": "train horn three times"} +{"filepath": "data/multi_event_train/syn_3387.wav", "onoffCaption": "sheep goat bleating at 1.837-3.837, 4.859-6.859, 7.452-9.452 and car horn honking at 3.359-6.578", "frequencyCaption": "sheep goat bleating three times and car horn honking one times"} +{"filepath": "data/multi_event_train/syn_3547.wav", "onoffCaption": "gunshot at 3.96-6.053, 7.128-9.128", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_3734.wav", "onoffCaption": "train horn at 2.954-5.434, 7.906-10.0", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_3761.wav", "onoffCaption": "tapping clicking clanking at 1.41-4.85, 6.821-10.0", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_3784.wav", "onoffCaption": "sneeze at 1.346-3.47", "frequencyCaption": "sneeze one times"} +{"filepath": "data/multi_event_train/syn_3843.wav", "onoffCaption": "woman laughing at 0.101-3.173 and spraying at 2.896-3.747", "frequencyCaption": "woman laughing one times and spraying one times"} +{"filepath": "data/multi_event_train/syn_3958.wav", "onoffCaption": "burping belching at 0.069-7.237", "frequencyCaption": "burping belching one times"} +{"filepath": "data/multi_event_train/syn_109.wav", "onoffCaption": "spraying at 3.713-4.297", "frequencyCaption": "spraying one times"} +{"filepath": "data/multi_event_train/syn_113.wav", "onoffCaption": "thump thud at 2.654-6.572", "frequencyCaption": "thump thud one times"} +{"filepath": "data/multi_event_train/syn_234.wav", "onoffCaption": "explosion at 3.255-6.255", "frequencyCaption": "explosion one times"} +{"filepath": "data/multi_event_train/syn_261.wav", "onoffCaption": "car horn honking at 0.263-2.776, 3.88-6.374, 7.098-9.563 and explosion at 0.45-2.452", "frequencyCaption": "car horn honking three times and explosion one times"} +{"filepath": "data/multi_event_train/syn_284.wav", "onoffCaption": "woman laughing at 2.194-4.894, 6.241-8.941 and explosion at 2.707-7.707 and tapping clicking clanking at 3.378-6.818", "frequencyCaption": "woman laughing two times and explosion one times and tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_411.wav", "onoffCaption": "door slamming at 2.769-4.002, 5.099-6.332", "frequencyCaption": "door slamming two times"} +{"filepath": "data/multi_event_train/syn_444.wav", "onoffCaption": "duck quacking at 0.65-2.65, 3.391-5.391, 6.599-8.599", "frequencyCaption": "duck quacking three times"} +{"filepath": "data/multi_event_train/syn_545.wav", "onoffCaption": "whistling at 1.186-4.161, 5.356-8.315", "frequencyCaption": "whistling two times"} +{"filepath": "data/multi_event_train/syn_637.wav", "onoffCaption": "door slamming at 2.817-4.817, 6.014-7.389", "frequencyCaption": "door slamming two times"} +{"filepath": "data/multi_event_train/syn_687.wav", "onoffCaption": "duck quacking at 0.504-2.504, 4.873-6.873 and sheep goat bleating at 5.253-7.253", "frequencyCaption": "duck quacking two times and sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_915.wav", "onoffCaption": "cat meowing at 2.392-5.296, 6.243-9.147", "frequencyCaption": "cat meowing two times"} +{"filepath": "data/multi_event_train/syn_940.wav", "onoffCaption": "cat meowing at 0.108-1.418, 3.144-4.715 and explosion at 2.253-4.427, 6.188-8.362", "frequencyCaption": "cat meowing two times and explosion two times"} +{"filepath": "data/multi_event_train/syn_1075.wav", "onoffCaption": "door slamming at 3.377-4.668, 7.131-8.27", "frequencyCaption": "door slamming two times"} +{"filepath": "data/multi_event_train/syn_1121.wav", "onoffCaption": "tapping clicking clanking at 0.164-3.604, 4.325-6.915", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_1174.wav", "onoffCaption": "burping belching at 2.91-9.59", "frequencyCaption": "burping belching one times"} +{"filepath": "data/multi_event_train/syn_1191.wav", "onoffCaption": "dog barking at 0.073-2.073, 2.957-4.957", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_train/syn_1249.wav", "onoffCaption": "thump thud at 0.104-4.479, 5.297-8.068", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_1307.wav", "onoffCaption": "whistling at 3.185-6.16, 7.3-9.569", "frequencyCaption": "whistling two times"} +{"filepath": "data/multi_event_train/syn_1352.wav", "onoffCaption": "sheep goat bleating at 0.42-2.42, 3.209-5.209, 5.845-7.845 and gunshot at 3.536-5.536", "frequencyCaption": "sheep goat bleating three times and gunshot one times"} +{"filepath": "data/multi_event_train/syn_1423.wav", "onoffCaption": "door slamming at 2.046-4.409 and cow mooing at 7.179-10.0", "frequencyCaption": "door slamming one times and cow mooing one times"} +{"filepath": "data/multi_event_train/syn_1439.wav", "onoffCaption": "thump thud at 1.856-6.306", "frequencyCaption": "thump thud one times"} +{"filepath": "data/multi_event_train/syn_1489.wav", "onoffCaption": "door knocking at 0.208-3.976, 5.205-8.973", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_1522.wav", "onoffCaption": "burping belching at 0.811-3.811, 5.905-8.428 and cat meowing at 0.94-4.285", "frequencyCaption": "burping belching two times and cat meowing one times"} +{"filepath": "data/multi_event_train/syn_1577.wav", "onoffCaption": "dog barking at 2.246-4.246", "frequencyCaption": "dog barking one times"} +{"filepath": "data/multi_event_train/syn_1592.wav", "onoffCaption": "tapping clicking clanking at 1.029-4.469 and woman laughing at 6.167-9.448", "frequencyCaption": "tapping clicking clanking one times and woman laughing one times"} +{"filepath": "data/multi_event_train/syn_1704.wav", "onoffCaption": "door slamming at 3.323-4.576, 5.335-6.588 and cow mooing at 3.682-6.651", "frequencyCaption": "door slamming two times and cow mooing one times"} +{"filepath": "data/multi_event_train/syn_1751.wav", "onoffCaption": "spraying at 0.112-1.196, 1.83-2.914 and duck quacking at 1.325-3.325 and train horn at 1.764-4.438", "frequencyCaption": "spraying two times and duck quacking one times and train horn one times"} +{"filepath": "data/multi_event_train/syn_1826.wav", "onoffCaption": "explosion at 2.068-5.068 and cow mooing at 7.619-10.0", "frequencyCaption": "explosion one times and cow mooing one times"} +{"filepath": "data/multi_event_train/syn_1873.wav", "onoffCaption": "spraying at 0.0-0.867, 1.694-2.561", "frequencyCaption": "spraying two times"} +{"filepath": "data/multi_event_train/syn_1896.wav", "onoffCaption": "cat meowing at 0.271-1.826 and duck quacking at 5.576-7.576", "frequencyCaption": "cat meowing one times and duck quacking one times"} +{"filepath": "data/multi_event_train/syn_1968.wav", "onoffCaption": "duck quacking at 0.499-2.499, 4.923-6.923", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_3058.wav", "onoffCaption": "woman laughing at 0.114-6.848, 7.984-10.0", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_3116.wav", "onoffCaption": "cat meowing at 0.504-1.516, 3.439-4.451, 5.303-6.315 and train horn at 0.902-4.102 and duck quacking at 2.916-4.916", "frequencyCaption": "cat meowing three times and train horn one times and duck quacking one times"} +{"filepath": "data/multi_event_train/syn_3143.wav", "onoffCaption": "duck quacking at 1.039-3.039, 4.428-6.428 and sneeze at 3.454-5.767, 7.844-10.0", "frequencyCaption": "duck quacking two times and sneeze two times"} +{"filepath": "data/multi_event_train/syn_3330.wav", "onoffCaption": "thump thud at 1.915-6.365", "frequencyCaption": "thump thud one times"} +{"filepath": "data/multi_event_train/syn_3365.wav", "onoffCaption": "burping belching at 2.845-6.351, 7.13-9.237 and cow mooing at 6.748-9.717", "frequencyCaption": "burping belching two times and cow mooing one times"} +{"filepath": "data/multi_event_train/syn_3380.wav", "onoffCaption": "tapping clicking clanking at 0.218-3.658, 4.449-7.889 and burping belching at 3.457-5.564, 7.966-10.0", "frequencyCaption": "tapping clicking clanking two times and burping belching two times"} +{"filepath": "data/multi_event_train/syn_3515.wav", "onoffCaption": "car horn honking at 0.091-2.091, 3.107-5.559", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_3540.wav", "onoffCaption": "burping belching at 2.061-5.561", "frequencyCaption": "burping belching one times"} +{"filepath": "data/multi_event_train/syn_3628.wav", "onoffCaption": "door slamming at 0.036-1.551, 2.263-3.402, 3.906-6.623", "frequencyCaption": "door slamming three times"} +{"filepath": "data/multi_event_train/syn_3698.wav", "onoffCaption": "car horn honking at 0.33-3.243, 4.313-6.313 and door slamming at 8.776-9.276", "frequencyCaption": "car horn honking two times and door slamming one times"} +{"filepath": "data/multi_event_train/syn_3733.wav", "onoffCaption": "whistling at 1.749-10.0", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_3766.wav", "onoffCaption": "door slamming at 3.189-4.208, 5.391-7.391, 8.414-9.929", "frequencyCaption": "door slamming three times"} +{"filepath": "data/multi_event_train/syn_3783.wav", "onoffCaption": "woman laughing at 0.348-2.767, 4.374-6.793", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_3811.wav", "onoffCaption": "cow mooing at 0.547-3.529 and dog barking at 6.774-8.774", "frequencyCaption": "cow mooing one times and dog barking one times"} +{"filepath": "data/multi_event_train/syn_3844.wav", "onoffCaption": "duck quacking at 0.575-2.575, 3.754-5.754 and spraying at 2.908-4.083, 5.036-6.211, 7.264-8.439", "frequencyCaption": "duck quacking two times and spraying three times"} +{"filepath": "data/multi_event_train/syn_3945.wav", "onoffCaption": "burping belching at 2.053-8.14 and door knocking at 3.198-6.254", "frequencyCaption": "burping belching one times and door knocking one times"} +{"filepath": "data/multi_event_train/syn_34.wav", "onoffCaption": "train horn at 1.481-5.549, 7.631-9.823", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_61.wav", "onoffCaption": "duck quacking at 0.2-2.2", "frequencyCaption": "duck quacking one times"} +{"filepath": "data/multi_event_train/syn_84.wav", "onoffCaption": "car horn honking at 0.489-4.143, 5.186-8.038", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_137.wav", "onoffCaption": "door slamming at 0.068-2.849", "frequencyCaption": "door slamming one times"} +{"filepath": "data/multi_event_train/syn_146.wav", "onoffCaption": "tapping clicking clanking at 1.273-4.713, 6.496-9.936 and spraying at 1.889-3.146, 3.685-4.536, 5.216-5.8", "frequencyCaption": "tapping clicking clanking two times and spraying three times"} +{"filepath": "data/multi_event_train/syn_335.wav", "onoffCaption": "whistling at 0.294-8.679", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_360.wav", "onoffCaption": "gunshot at 2.465-4.971, 6.528-9.034", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_385.wav", "onoffCaption": "cow mooing at 3.411-6.421", "frequencyCaption": "cow mooing one times"} +{"filepath": "data/multi_event_train/syn_510.wav", "onoffCaption": "whistling at 0.347-10.0", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_561.wav", "onoffCaption": "door slamming at 0.024-0.964, 2.21-3.975 and thump thud at 6.63-9.13", "frequencyCaption": "door slamming two times and thump thud one times"} +{"filepath": "data/multi_event_train/syn_678.wav", "onoffCaption": "burping belching at 0.375-3.554, 4.177-6.336", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_736.wav", "onoffCaption": "cat meowing at 1.867-2.877, 3.724-5.295", "frequencyCaption": "cat meowing two times"} +{"filepath": "data/multi_event_train/syn_763.wav", "onoffCaption": "sheep goat bleating at 2.894-4.894 and cat meowing at 3.637-4.649", "frequencyCaption": "sheep goat bleating one times and cat meowing one times"} +{"filepath": "data/multi_event_train/syn_786.wav", "onoffCaption": "door slamming at 3.21-4.734", "frequencyCaption": "door slamming one times"} +{"filepath": "data/multi_event_train/syn_814.wav", "onoffCaption": "spraying at 3.628-4.378, 5.721-6.471", "frequencyCaption": "spraying two times"} +{"filepath": "data/multi_event_train/syn_841.wav", "onoffCaption": "train horn at 3.28-7.04", "frequencyCaption": "train horn one times"} +{"filepath": "data/multi_event_train/syn_1020.wav", "onoffCaption": "sneeze at 2.574-6.63, 7.459-10.0", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_1090.wav", "onoffCaption": "door knocking at 3.538-5.602, 7.279-9.343", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_1206.wav", "onoffCaption": "sheep goat bleating at 1.226-3.226", "frequencyCaption": "sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_1253.wav", "onoffCaption": "dog barking at 2.277-4.277, 6.028-8.028", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_train/syn_1348.wav", "onoffCaption": "car horn honking at 3.545-7.945", "frequencyCaption": "car horn honking one times"} +{"filepath": "data/multi_event_train/syn_1476.wav", "onoffCaption": "cow mooing at 1.587-4.569, 5.899-8.881", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_1493.wav", "onoffCaption": "tapping clicking clanking at 0.435-3.875, 5.267-7.646 and spraying at 1.453-2.08", "frequencyCaption": "tapping clicking clanking two times and spraying one times"} +{"filepath": "data/multi_event_train/syn_1538.wav", "onoffCaption": "door knocking at 0.508-2.91, 5.169-7.571", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_1588.wav", "onoffCaption": "cow mooing at 0.188-3.17, 5.076-8.058", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_1605.wav", "onoffCaption": "spraying at 3.885-5.58, 7.472-8.556", "frequencyCaption": "spraying two times"} +{"filepath": "data/multi_event_train/syn_1650.wav", "onoffCaption": "door knocking at 0.332-3.388, 4.519-7.366", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_1869.wav", "onoffCaption": "door knocking at 0.021-2.148, 4.194-6.321, 7.719-9.846", "frequencyCaption": "door knocking three times"} +{"filepath": "data/multi_event_train/syn_1927.wav", "onoffCaption": "duck quacking at 0.096-2.096, 3.435-5.435", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_1972.wav", "onoffCaption": "train horn at 0.413-4.853", "frequencyCaption": "train horn one times"} +{"filepath": "data/multi_event_train/syn_1997.wav", "onoffCaption": "spraying at 1.763-3.02 and sheep goat bleating at 4.59-6.59", "frequencyCaption": "spraying one times and sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_3017.wav", "onoffCaption": "duck quacking at 0.399-2.399 and explosion at 0.665-5.665 and cat meowing at 7.386-8.386", "frequencyCaption": "duck quacking one times and explosion one times and cat meowing one times"} +{"filepath": "data/multi_event_train/syn_3042.wav", "onoffCaption": "train horn at 0.39-8.59 and spraying at 2.788-3.692, 4.267-4.836, 5.639-6.147", "frequencyCaption": "train horn one times and spraying three times"} +{"filepath": "data/multi_event_train/syn_3159.wav", "onoffCaption": "tapping clicking clanking at 0.107-3.547", "frequencyCaption": "tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_3231.wav", "onoffCaption": "burping belching at 0.186-5.787, 7.524-9.555 and door slamming at 2.231-3.996", "frequencyCaption": "burping belching two times and door slamming one times"} +{"filepath": "data/multi_event_train/syn_3264.wav", "onoffCaption": "whistling at 2.172-5.047, 6.058-8.933", "frequencyCaption": "whistling two times"} +{"filepath": "data/multi_event_train/syn_3281.wav", "onoffCaption": "burping belching at 0.199-3.639, 5.384-7.667 and cat meowing at 4.673-6.568, 7.795-9.69", "frequencyCaption": "burping belching two times and cat meowing two times"} +{"filepath": "data/multi_event_train/syn_3414.wav", "onoffCaption": "thump thud at 3.291-5.791, 7.021-9.521", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_3441.wav", "onoffCaption": "spraying at 0.092-2.552, 4.511-5.161", "frequencyCaption": "spraying two times"} +{"filepath": "data/multi_event_train/syn_3632.wav", "onoffCaption": "thump thud at 1.952-4.452, 6.848-9.348", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_3667.wav", "onoffCaption": "tapping clicking clanking at 3.039-6.479, 7.37-9.609", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_3682.wav", "onoffCaption": "burping belching at 1.994-4.755, 5.399-8.16", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_3729.wav", "onoffCaption": "thump thud at 2.368-4.83", "frequencyCaption": "thump thud one times"} +{"filepath": "data/multi_event_train/syn_3799.wav", "onoffCaption": "woman laughing at 1.673-4.773, 5.556-8.656 and spraying at 2.654-3.738, 5.882-8.025", "frequencyCaption": "woman laughing two times and spraying two times"} +{"filepath": "data/multi_event_train/syn_3910.wav", "onoffCaption": "woman laughing at 0.775-3.194, 5.597-8.563", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_3961.wav", "onoffCaption": "sneeze at 0.001-1.255, 1.77-3.024 and explosion at 6.263-10.0", "frequencyCaption": "sneeze two times and explosion one times"} +{"filepath": "data/multi_event_train/syn_10.wav", "onoffCaption": "sneeze at 0.063-1.976, 2.801-4.714 and explosion at 7.673-10.0", "frequencyCaption": "sneeze two times and explosion one times"} +{"filepath": "data/multi_event_train/syn_45.wav", "onoffCaption": "cow mooing at 0.355-3.337, 4.617-7.599", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_162.wav", "onoffCaption": "woman laughing at 1.442-4.496", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/multi_event_train/syn_187.wav", "onoffCaption": "burping belching at 0.292-7.269 and spraying at 6.756-7.34", "frequencyCaption": "burping belching one times and spraying one times"} +{"filepath": "data/multi_event_train/syn_311.wav", "onoffCaption": "dog barking at 2.879-4.879, 5.486-7.486", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_train/syn_344.wav", "onoffCaption": "gunshot at 3.671-5.671, 7.994-9.994", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_534.wav", "onoffCaption": "cow mooing at 0.076-5.056 and door knocking at 0.179-2.559 and woman laughing at 5.2-7.281", "frequencyCaption": "cow mooing one times and door knocking one times and woman laughing one times"} +{"filepath": "data/multi_event_train/syn_584.wav", "onoffCaption": "gunshot at 0.179-2.179 and burping belching at 0.417-2.647 and door slamming at 5.169-7.169", "frequencyCaption": "gunshot one times and burping belching one times and door slamming one times"} +{"filepath": "data/multi_event_train/syn_609.wav", "onoffCaption": "train horn at 0.029-2.684, 3.212-5.405 and door slamming at 2.16-3.138", "frequencyCaption": "train horn two times and door slamming one times"} +{"filepath": "data/multi_event_train/syn_712.wav", "onoffCaption": "burping belching at 0.041-2.076, 2.801-4.836, 5.602-7.637", "frequencyCaption": "burping belching three times"} +{"filepath": "data/multi_event_train/syn_747.wav", "onoffCaption": "duck quacking at 0.484-2.484, 3.334-5.334, 5.954-7.954", "frequencyCaption": "duck quacking three times"} +{"filepath": "data/multi_event_train/syn_830.wav", "onoffCaption": "woman laughing at 0.458-2.574 and spraying at 0.798-1.32 and burping belching at 5.364-8.364", "frequencyCaption": "woman laughing one times and spraying one times and burping belching one times"} +{"filepath": "data/multi_event_train/syn_865.wav", "onoffCaption": "duck quacking at 2.366-4.366, 5.105-7.105 and cow mooing at 3.278-6.26", "frequencyCaption": "duck quacking two times and cow mooing one times"} +{"filepath": "data/multi_event_train/syn_880.wav", "onoffCaption": "car horn honking at 1.336-4.249 and spraying at 2.806-4.063", "frequencyCaption": "car horn honking one times and spraying one times"} +{"filepath": "data/multi_event_train/syn_1004.wav", "onoffCaption": "whistling at 3.77-10.0", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_1051.wav", "onoffCaption": "car horn honking at 2.969-5.316, 6.834-8.89", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_1222.wav", "onoffCaption": "burping belching at 3.161-7.497", "frequencyCaption": "burping belching one times"} +{"filepath": "data/multi_event_train/syn_1277.wav", "onoffCaption": "explosion at 0.578-5.578", "frequencyCaption": "explosion one times"} +{"filepath": "data/multi_event_train/syn_1292.wav", "onoffCaption": "dog barking at 0.479-2.479, 4.616-6.616", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_train/syn_1339.wav", "onoffCaption": "train horn at 1.157-7.217", "frequencyCaption": "train horn one times"} +{"filepath": "data/multi_event_train/syn_1389.wav", "onoffCaption": "cat meowing at 0.527-1.674, 2.783-3.93, 4.549-5.696 and gunshot at 2.167-4.167, 6.628-8.628", "frequencyCaption": "cat meowing three times and gunshot two times"} +{"filepath": "data/multi_event_train/syn_1407.wav", "onoffCaption": "gunshot at 2.758-4.758, 6.25-8.25", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_1452.wav", "onoffCaption": "thump thud at 0.107-4.557, 6.901-10.0 and gunshot at 0.577-2.596, 4.566-6.585", "frequencyCaption": "thump thud two times and gunshot two times"} +{"filepath": "data/multi_event_train/syn_1549.wav", "onoffCaption": "burping belching at 0.348-3.348 and door knocking at 2.068-4.445, 5.651-8.028", "frequencyCaption": "burping belching one times and door knocking two times"} +{"filepath": "data/multi_event_train/syn_1621.wav", "onoffCaption": "door knocking at 1.772-6.211", "frequencyCaption": "door knocking one times"} +{"filepath": "data/multi_event_train/syn_1674.wav", "onoffCaption": "thump thud at 3.223-5.685", "frequencyCaption": "thump thud one times"} +{"filepath": "data/multi_event_train/syn_1691.wav", "onoffCaption": "woman laughing at 0.558-2.752 and duck quacking at 1.9-3.9, 5.909-7.909", "frequencyCaption": "woman laughing one times and duck quacking two times"} +{"filepath": "data/multi_event_train/syn_1818.wav", "onoffCaption": "woman laughing at 1.357-4.452, 5.885-8.98", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_1903.wav", "onoffCaption": "spraying at 2.334-2.842, 3.905-4.413, 5.561-6.069", "frequencyCaption": "spraying three times"} +{"filepath": "data/multi_event_train/syn_1956.wav", "onoffCaption": "sheep goat bleating at 2.9-4.9 and dog barking at 2.959-4.959", "frequencyCaption": "sheep goat bleating one times and dog barking one times"} +{"filepath": "data/multi_event_train/syn_3033.wav", "onoffCaption": "thump thud at 1.152-5.527", "frequencyCaption": "thump thud one times"} +{"filepath": "data/multi_event_train/syn_3066.wav", "onoffCaption": "car horn honking at 0.514-3.027 and sheep goat bleating at 0.548-2.548", "frequencyCaption": "car horn honking one times and sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_3083.wav", "onoffCaption": "whistling at 3.592-10.0", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_3128.wav", "onoffCaption": "burping belching at 0.775-4.335 and tapping clicking clanking at 2.973-6.413", "frequencyCaption": "burping belching one times and tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_3198.wav", "onoffCaption": "cow mooing at 0.412-5.392, 6.283-10.0 and dog barking at 0.442-2.442, 3.318-5.318, 6.398-8.398", "frequencyCaption": "cow mooing two times and dog barking three times"} +{"filepath": "data/multi_event_train/syn_3215.wav", "onoffCaption": "dog barking at 0.091-2.091, 3.213-5.213, 6.426-8.426", "frequencyCaption": "dog barking three times"} +{"filepath": "data/multi_event_train/syn_3240.wav", "onoffCaption": "tapping clicking clanking at 1.525-4.965, 5.786-9.226", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_3430.wav", "onoffCaption": "whistling at 1.553-9.491", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_3465.wav", "onoffCaption": "duck quacking at 1.309-3.309", "frequencyCaption": "duck quacking one times"} +{"filepath": "data/multi_event_train/syn_3480.wav", "onoffCaption": "spraying at 0.255-2.383, 4.595-6.723", "frequencyCaption": "spraying two times"} +{"filepath": "data/multi_event_train/syn_3616.wav", "onoffCaption": "train horn at 0.002-2.482, 3.51-5.99 and door slamming at 0.294-1.669", "frequencyCaption": "train horn two times and door slamming one times"} +{"filepath": "data/multi_event_train/syn_3643.wav", "onoffCaption": "cow mooing at 0.5-3.469", "frequencyCaption": "cow mooing one times"} +{"filepath": "data/multi_event_train/syn_3758.wav", "onoffCaption": "door knocking at 0.079-2.847, 4.619-6.84 and dog barking at 5.333-7.333", "frequencyCaption": "door knocking two times and dog barking one times"} +{"filepath": "data/multi_event_train/syn_3934.wav", "onoffCaption": "door knocking at 0.916-4.684", "frequencyCaption": "door knocking one times"} +{"filepath": "data/multi_event_train/syn_3984.wav", "onoffCaption": "train horn at 1.384-4.744 and thump thud at 2.294-4.794, 5.817-7.852 and car horn honking at 3.846-6.759", "frequencyCaption": "train horn one times and thump thud two times and car horn honking one times"} +{"filepath": "data/multi_event_train/syn_178.wav", "onoffCaption": "cow mooing at 3.381-7.81", "frequencyCaption": "cow mooing one times"} +{"filepath": "data/multi_event_train/syn_210.wav", "onoffCaption": "whistling at 0.071-2.08, 2.921-4.93 and explosion at 0.913-3.913, 5.782-8.782", "frequencyCaption": "whistling two times and explosion two times"} +{"filepath": "data/multi_event_train/syn_245.wav", "onoffCaption": "explosion at 1.121-4.177, 6.294-9.35", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_435.wav", "onoffCaption": "woman laughing at 0.561-3.199, 4.189-6.827 and gunshot at 3.425-5.425", "frequencyCaption": "woman laughing two times and gunshot one times"} +{"filepath": "data/multi_event_train/syn_460.wav", "onoffCaption": "duck quacking at 3.484-5.484, 6.175-8.175", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_485.wav", "onoffCaption": "tapping clicking clanking at 0.224-3.664 and door knocking at 6.512-9.28", "frequencyCaption": "tapping clicking clanking one times and door knocking one times"} +{"filepath": "data/multi_event_train/syn_613.wav", "onoffCaption": "whistling at 0.43-8.085 and dog barking at 2.187-4.625, 5.225-7.225 and cow mooing at 3.707-6.689", "frequencyCaption": "whistling one times and dog barking two times and cow mooing one times"} +{"filepath": "data/multi_event_train/syn_646.wav", "onoffCaption": "train horn at 3.064-7.922", "frequencyCaption": "train horn one times"} +{"filepath": "data/multi_event_train/syn_708.wav", "onoffCaption": "gunshot at 2.261-4.767, 7.175-9.175", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_931.wav", "onoffCaption": "thump thud at 0.167-2.667", "frequencyCaption": "thump thud one times"} +{"filepath": "data/multi_event_train/syn_964.wav", "onoffCaption": "sheep goat bleating at 1.528-3.528, 4.975-6.975", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_981.wav", "onoffCaption": "car horn honking at 0.323-4.645 and gunshot at 7.394-9.394", "frequencyCaption": "car horn honking one times and gunshot one times"} +{"filepath": "data/multi_event_train/syn_1105.wav", "onoffCaption": "sheep goat bleating at 2.371-4.371, 6.611-8.611", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_1150.wav", "onoffCaption": "duck quacking at 2.106-4.106, 5.268-7.268", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_1238.wav", "onoffCaption": "sheep goat bleating at 3.412-6.492", "frequencyCaption": "sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_1288.wav", "onoffCaption": "cat meowing at 0.746-2.363", "frequencyCaption": "cat meowing one times"} +{"filepath": "data/multi_event_train/syn_1323.wav", "onoffCaption": "dog barking at 0.979-2.979, 4.467-6.467, 7.269-9.269", "frequencyCaption": "dog barking three times"} +{"filepath": "data/multi_event_train/syn_1376.wav", "onoffCaption": "explosion at 2.257-7.257", "frequencyCaption": "explosion one times"} +{"filepath": "data/multi_event_train/syn_1393.wav", "onoffCaption": "tapping clicking clanking at 0.14-3.58, 5.201-7.253 and duck quacking at 2.426-4.426", "frequencyCaption": "tapping clicking clanking two times and duck quacking one times"} +{"filepath": "data/multi_event_train/syn_1448.wav", "onoffCaption": "woman laughing at 0.367-2.851, 5.089-7.789", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_1506.wav", "onoffCaption": "dog barking at 0.726-2.726, 4.078-6.078", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_train/syn_1553.wav", "onoffCaption": "tapping clicking clanking at 0.156-3.596, 4.737-7.641", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_1720.wav", "onoffCaption": "gunshot at 1.155-3.155, 5.398-7.528 and thump thud at 3.363-7.738", "frequencyCaption": "gunshot two times and thump thud one times"} +{"filepath": "data/multi_event_train/syn_1775.wav", "onoffCaption": "door knocking at 2.969-5.816, 7.75-10.0", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_1790.wav", "onoffCaption": "door knocking at 0.011-2.391, 4.036-6.546, 7.09-9.7 and train horn at 1.029-5.469 and thump thud at 1.711-4.211", "frequencyCaption": "door knocking three times and train horn one times and thump thud one times"} +{"filepath": "data/multi_event_train/syn_1802.wav", "onoffCaption": "sneeze at 0.384-2.697, 4.475-6.788", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_1857.wav", "onoffCaption": "whistling at 1.733-4.608", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_1919.wav", "onoffCaption": "sheep goat bleating at 0.776-2.776, 3.517-5.517, 6.164-8.164 and car horn honking at 4.011-8.411", "frequencyCaption": "sheep goat bleating three times and car horn honking one times"} +{"filepath": "data/multi_event_train/syn_3029.wav", "onoffCaption": "cow mooing at 3.488-6.47, 7.941-10.0 and burping belching at 3.869-6.869", "frequencyCaption": "cow mooing two times and burping belching one times"} +{"filepath": "data/multi_event_train/syn_3099.wav", "onoffCaption": "gunshot at 1.876-3.876, 4.38-6.38 and woman laughing at 2.584-4.778", "frequencyCaption": "gunshot two times and woman laughing one times"} +{"filepath": "data/multi_event_train/syn_3132.wav", "onoffCaption": "door knocking at 0.692-5.525, 6.427-10.0", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_3167.wav", "onoffCaption": "woman laughing at 3.333-10.0", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/multi_event_train/syn_3182.wav", "onoffCaption": "tapping clicking clanking at 1.809-5.249, 6.999-9.867", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_3314.wav", "onoffCaption": "sheep goat bleating at 1.93-3.93", "frequencyCaption": "sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_3341.wav", "onoffCaption": "spraying at 3.082-5.666, 7.209-9.793", "frequencyCaption": "spraying two times"} +{"filepath": "data/multi_event_train/syn_3531.wav", "onoffCaption": "sneeze at 0.185-3.26, 3.84-6.316, 7.242-10.0", "frequencyCaption": "sneeze three times"} +{"filepath": "data/multi_event_train/syn_3564.wav", "onoffCaption": "sheep goat bleating at 0.809-2.809, 4.118-6.65, 7.416-9.416", "frequencyCaption": "sheep goat bleating three times"} +{"filepath": "data/multi_event_train/syn_3581.wav", "onoffCaption": "explosion at 0.101-2.189", "frequencyCaption": "explosion one times"} +{"filepath": "data/multi_event_train/syn_3659.wav", "onoffCaption": "sneeze at 0.638-2.751, 3.548-5.661, 6.812-8.925", "frequencyCaption": "sneeze three times"} +{"filepath": "data/multi_event_train/syn_3717.wav", "onoffCaption": "door knocking at 1.199-3.359, 4.572-7.531", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_3742.wav", "onoffCaption": "cat meowing at 0.275-1.286", "frequencyCaption": "cat meowing one times"} +{"filepath": "data/multi_event_train/syn_3835.wav", "onoffCaption": "cat meowing at 2.698-3.707, 4.225-6.202", "frequencyCaption": "cat meowing two times"} +{"filepath": "data/multi_event_train/syn_3860.wav", "onoffCaption": "duck quacking at 0.859-2.859, 3.637-5.637, 6.544-8.544", "frequencyCaption": "duck quacking three times"} +{"filepath": "data/multi_event_train/syn_3885.wav", "onoffCaption": "cow mooing at 3.53-8.51", "frequencyCaption": "cow mooing one times"} +{"filepath": "data/multi_event_train/syn_2120.wav", "onoffCaption": "spraying at 0.057-2.449, 3.255-3.763", "frequencyCaption": "spraying two times"} +{"filepath": "data/multi_event_train/syn_2175.wav", "onoffCaption": "spraying at 2.071-3.071 and sheep goat bleating at 3.431-7.351", "frequencyCaption": "spraying one times and sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_2190.wav", "onoffCaption": "train horn at 1.771-5.305", "frequencyCaption": "train horn one times"} +{"filepath": "data/multi_event_train/syn_2248.wav", "onoffCaption": "explosion at 1.654-4.372, 5.029-7.747 and car horn honking at 2.113-4.626, 5.21-7.675 and burping belching at 5.214-7.54", "frequencyCaption": "explosion two times and car horn honking two times and burping belching one times"} +{"filepath": "data/multi_event_train/syn_2306.wav", "onoffCaption": "cow mooing at 0.725-3.694, 6.08-8.942", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_2353.wav", "onoffCaption": "car horn honking at 1.862-3.862, 4.452-6.452, 7.24-9.24", "frequencyCaption": "car horn honking three times"} +{"filepath": "data/multi_event_train/syn_2438.wav", "onoffCaption": "sheep goat bleating at 1.107-4.107 and duck quacking at 3.052-5.052", "frequencyCaption": "sheep goat bleating one times and duck quacking one times"} +{"filepath": "data/multi_event_train/syn_2477.wav", "onoffCaption": "whistling at 2.127-5.002, 5.545-8.42", "frequencyCaption": "whistling two times"} +{"filepath": "data/multi_event_train/syn_2488.wav", "onoffCaption": "sheep goat bleating at 0.735-2.735, 3.386-5.386 and door slamming at 1.8-3.033, 3.723-4.956 and tapping clicking clanking at 2.198-5.638, 6.329-9.769", "frequencyCaption": "sheep goat bleating two times and door slamming two times and tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_2523.wav", "onoffCaption": "woman laughing at 2.767-4.967, 5.806-8.006", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_2576.wav", "onoffCaption": "explosion at 0.277-2.868 and cat meowing at 6.026-10.0", "frequencyCaption": "explosion one times and cat meowing one times"} +{"filepath": "data/multi_event_train/syn_2593.wav", "onoffCaption": "dog barking at 1.825-3.825, 4.517-6.517", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_train/syn_2705.wav", "onoffCaption": "woman laughing at 0.971-4.071 and explosion at 7.935-10.0", "frequencyCaption": "woman laughing one times and explosion one times"} +{"filepath": "data/multi_event_train/syn_2750.wav", "onoffCaption": "gunshot at 2.845-4.845, 5.516-7.516", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_2827.wav", "onoffCaption": "burping belching at 0.845-3.171", "frequencyCaption": "burping belching one times"} +{"filepath": "data/multi_event_train/syn_2872.wav", "onoffCaption": "door slamming at 3.272-6.233", "frequencyCaption": "door slamming one times"} +{"filepath": "data/multi_event_train/syn_2897.wav", "onoffCaption": "car horn honking at 1.909-5.75, 7.713-10.0", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_2969.wav", "onoffCaption": "duck quacking at 0.892-2.892", "frequencyCaption": "duck quacking one times"} +{"filepath": "data/multi_event_train/syn_2996.wav", "onoffCaption": "dog barking at 2.902-4.902, 7.003-9.003", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_train/syn_4078.wav", "onoffCaption": "door slamming at 0.238-0.738", "frequencyCaption": "door slamming one times"} +{"filepath": "data/multi_event_train/syn_4136.wav", "onoffCaption": "burping belching at 3.19-7.213 and duck quacking at 3.224-5.224, 5.953-7.953", "frequencyCaption": "burping belching one times and duck quacking two times"} +{"filepath": "data/multi_event_train/syn_4163.wav", "onoffCaption": "train horn at 1.452-4.332 and cow mooing at 6.448-9.746", "frequencyCaption": "train horn one times and cow mooing one times"} +{"filepath": "data/multi_event_train/syn_4186.wav", "onoffCaption": "car horn honking at 1.257-3.604 and dog barking at 6.705-10.0", "frequencyCaption": "car horn honking one times and dog barking one times"} +{"filepath": "data/multi_event_train/syn_4310.wav", "onoffCaption": "train horn at 2.161-6.342, 6.935-9.277", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_4345.wav", "onoffCaption": "gunshot at 0.772-2.772", "frequencyCaption": "gunshot one times"} +{"filepath": "data/multi_event_train/syn_4535.wav", "onoffCaption": "sheep goat bleating at 2.485-6.125", "frequencyCaption": "sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_4560.wav", "onoffCaption": "burping belching at 0.436-3.197 and gunshot at 1.84-4.346", "frequencyCaption": "burping belching one times and gunshot one times"} +{"filepath": "data/multi_event_train/syn_4585.wav", "onoffCaption": "thump thud at 0.62-4.538, 6.81-9.31", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_4608.wav", "onoffCaption": "whistling at 3.318-7.802", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_4713.wav", "onoffCaption": "train horn at 0.267-3.427, 5.079-8.239", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_4746.wav", "onoffCaption": "gunshot at 1.968-3.968, 4.801-6.801", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_4831.wav", "onoffCaption": "tapping clicking clanking at 0.749-4.189", "frequencyCaption": "tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_4864.wav", "onoffCaption": "sheep goat bleating at 0.308-2.308, 4.509-6.509", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_4881.wav", "onoffCaption": "sneeze at 0.109-2.512, 3.965-6.397 and whistling at 2.282-7.782", "frequencyCaption": "sneeze two times and whistling one times"} +{"filepath": "data/multi_event_train/syn_6.wav", "onoffCaption": "thump thud at 0.14-3.807, 5.41-9.077 and spraying at 0.454-0.954, 1.799-2.65, 4.17-4.745", "frequencyCaption": "thump thud two times and spraying three times"} +{"filepath": "data/multi_event_train/syn_2021.wav", "onoffCaption": "door knocking at 0.429-5.429, 6.778-10.0", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_2074.wav", "onoffCaption": "whistling at 0.727-5.211, 6.879-10.0", "frequencyCaption": "whistling two times"} +{"filepath": "data/multi_event_train/syn_2091.wav", "onoffCaption": "spraying at 1.831-4.85, 6.142-7.837, 8.47-9.12", "frequencyCaption": "spraying three times"} +{"filepath": "data/multi_event_train/syn_2207.wav", "onoffCaption": "thump thud at 2.65-5.15", "frequencyCaption": "thump thud one times"} +{"filepath": "data/multi_event_train/syn_2252.wav", "onoffCaption": "explosion at 0.012-3.565, 5.39-7.843", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_2349.wav", "onoffCaption": "thump thud at 0.309-2.809, 3.923-6.423, 7.405-9.905", "frequencyCaption": "thump thud three times"} +{"filepath": "data/multi_event_train/syn_2422.wav", "onoffCaption": "car horn honking at 2.361-6.761, 7.274-10.0", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_2453.wav", "onoffCaption": "thump thud at 3.225-5.687, 6.839-9.605", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_2492.wav", "onoffCaption": "explosion at 0.455-5.455 and burping belching at 2.437-6.306, 7.467-9.59", "frequencyCaption": "explosion one times and burping belching two times"} +{"filepath": "data/multi_event_train/syn_2539.wav", "onoffCaption": "sneeze at 0.488-2.612, 3.718-6.686, 7.684-9.212", "frequencyCaption": "sneeze three times"} +{"filepath": "data/multi_event_train/syn_2548.wav", "onoffCaption": "door knocking at 2.153-4.616, 5.711-8.174", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_2589.wav", "onoffCaption": "gunshot at 3.892-5.892", "frequencyCaption": "gunshot one times"} +{"filepath": "data/multi_event_train/syn_2604.wav", "onoffCaption": "thump thud at 0.045-3.092, 3.677-5.905 and train horn at 1.656-4.856, 5.734-8.934", "frequencyCaption": "thump thud two times and train horn two times"} +{"filepath": "data/multi_event_train/syn_2651.wav", "onoffCaption": "explosion at 2.423-5.151, 6.028-8.756", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_2868.wav", "onoffCaption": "thump thud at 0.855-5.23, 6.386-10.0", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_2926.wav", "onoffCaption": "dog barking at 0.219-3.14, 4.211-7.132", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_train/syn_2973.wav", "onoffCaption": "tapping clicking clanking at 2.703-6.143, 7.187-9.19", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_4037.wav", "onoffCaption": "train horn at 1.92-10.0", "frequencyCaption": "train horn one times"} +{"filepath": "data/multi_event_train/syn_4062.wav", "onoffCaption": "cat meowing at 0.012-1.596, 2.868-3.877, 5.167-7.299 and dog barking at 6.274-8.274", "frequencyCaption": "cat meowing three times and dog barking one times"} +{"filepath": "data/multi_event_train/syn_4087.wav", "onoffCaption": "explosion at 2.553-6.106, 7.68-10.0", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_4179.wav", "onoffCaption": "duck quacking at 2.786-4.786, 5.324-7.324", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_4211.wav", "onoffCaption": "thump thud at 0.315-2.654, 4.121-6.645 and spraying at 5.969-6.75", "frequencyCaption": "thump thud two times and spraying one times"} +{"filepath": "data/multi_event_train/syn_4244.wav", "onoffCaption": "car horn honking at 0.049-3.636 and dog barking at 0.885-2.885 and spraying at 1.371-2.435", "frequencyCaption": "car horn honking one times and dog barking one times and spraying one times"} +{"filepath": "data/multi_event_train/syn_4434.wav", "onoffCaption": "sneeze at 3.28-5.519, 7.824-10.0", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_4461.wav", "onoffCaption": "cat meowing at 3.162-4.311, 5.935-6.946", "frequencyCaption": "cat meowing two times"} +{"filepath": "data/multi_event_train/syn_4484.wav", "onoffCaption": "door knocking at 3.211-5.674, 6.972-9.435", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_4612.wav", "onoffCaption": "door knocking at 0.089-4.528, 5.195-9.634", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_4647.wav", "onoffCaption": "sneeze at 0.569-2.156, 3.144-4.731, 5.782-7.369 and gunshot at 5.878-7.878", "frequencyCaption": "sneeze three times and gunshot one times"} +{"filepath": "data/multi_event_train/syn_4709.wav", "onoffCaption": "whistling at 0.206-6.583, 7.637-10.0 and door knocking at 2.165-4.735", "frequencyCaption": "whistling two times and door knocking one times"} +{"filepath": "data/multi_event_train/syn_4930.wav", "onoffCaption": "explosion at 2.446-7.446", "frequencyCaption": "explosion one times"} +{"filepath": "data/multi_event_train/syn_4965.wav", "onoffCaption": "explosion at 3.189-6.245, 7.568-10.0", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_4980.wav", "onoffCaption": "cat meowing at 2.685-3.712 and door slamming at 2.893-4.032, 6.014-7.153", "frequencyCaption": "cat meowing one times and door slamming two times"} +{"filepath": "data/multi_event_train/syn_2005.wav", "onoffCaption": "thump thud at 0.343-4.01, 4.582-7.082 and car horn honking at 1.183-5.583, 7.486-10.0", "frequencyCaption": "thump thud two times and car horn honking two times"} +{"filepath": "data/multi_event_train/syn_2050.wav", "onoffCaption": "sneeze at 2.987-4.09 and spraying at 4.169-5.073, 5.726-6.327, 7.545-8.629", "frequencyCaption": "sneeze one times and spraying three times"} +{"filepath": "data/multi_event_train/syn_2223.wav", "onoffCaption": "duck quacking at 0.332-2.332, 2.942-4.942, 6.627-8.627", "frequencyCaption": "duck quacking three times"} +{"filepath": "data/multi_event_train/syn_2239.wav", "onoffCaption": "sheep goat bleating at 0.339-2.339, 3.156-5.156", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_2276.wav", "onoffCaption": "thump thud at 0.322-4.697, 6.983-9.599", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_2293.wav", "onoffCaption": "car horn honking at 0.996-3.461, 4.536-7.001, 7.588-10.0 and tapping clicking clanking at 2.689-6.129", "frequencyCaption": "car horn honking three times and tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_2322.wav", "onoffCaption": "thump thud at 0.418-3.465, 4.569-7.161", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_2338.wav", "onoffCaption": "spraying at 0.027-1.513, 3.975-5.232, 7.548-8.723 and burping belching at 2.07-4.593", "frequencyCaption": "spraying three times and burping belching one times"} +{"filepath": "data/multi_event_train/syn_2388.wav", "onoffCaption": "woman laughing at 0.794-3.002, 4.227-6.435", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_2406.wav", "onoffCaption": "car horn honking at 0.603-2.603 and train horn at 3.813-6.973", "frequencyCaption": "car horn honking one times and train horn one times"} +{"filepath": "data/multi_event_train/syn_2620.wav", "onoffCaption": "cat meowing at 2.446-4.582, 5.348-7.484", "frequencyCaption": "cat meowing two times"} +{"filepath": "data/multi_event_train/syn_2675.wav", "onoffCaption": "dog barking at 2.962-4.962, 7.387-9.387", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_train/syn_2690.wav", "onoffCaption": "duck quacking at 0.393-2.393, 3.405-5.405, 5.915-7.915", "frequencyCaption": "duck quacking three times"} +{"filepath": "data/multi_event_train/syn_2774.wav", "onoffCaption": "sheep goat bleating at 2.619-4.619, 5.242-7.242", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_2819.wav", "onoffCaption": "sheep goat bleating at 0.032-2.032, 3.521-5.521 and door knocking at 0.864-3.984, 6.406-9.526", "frequencyCaption": "sheep goat bleating two times and door knocking two times"} +{"filepath": "data/multi_event_train/syn_2902.wav", "onoffCaption": "sneeze at 0.001-2.462, 4.092-6.553", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_2957.wav", "onoffCaption": "door slamming at 0.986-3.865, 4.823-7.702", "frequencyCaption": "door slamming two times"} +{"filepath": "data/multi_event_train/syn_4013.wav", "onoffCaption": "tapping clicking clanking at 0.088-3.528, 5.482-8.922", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_4046.wav", "onoffCaption": "cow mooing at 0.641-5.07 and door slamming at 1.475-3.24, 4.431-6.196, 6.906-8.671", "frequencyCaption": "cow mooing one times and door slamming three times"} +{"filepath": "data/multi_event_train/syn_4108.wav", "onoffCaption": "spraying at 1.708-2.649, 4.881-5.508", "frequencyCaption": "spraying two times"} +{"filepath": "data/multi_event_train/syn_4112.wav", "onoffCaption": "sneeze at 3.141-4.418", "frequencyCaption": "sneeze one times"} +{"filepath": "data/multi_event_train/syn_4235.wav", "onoffCaption": "car horn honking at 0.77-5.092, 6.546-9.404", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_4260.wav", "onoffCaption": "woman laughing at 0.463-3.517, 5.483-8.537", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_4285.wav", "onoffCaption": "cow mooing at 0.753-3.722, 4.97-7.106", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_4410.wav", "onoffCaption": "woman laughing at 0.635-7.369, 7.95-10.0", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_4445.wav", "onoffCaption": "whistling at 2.422-8.914", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_4544.wav", "onoffCaption": "spraying at 1.576-2.326, 2.849-3.599, 4.646-5.396 and gunshot at 1.949-3.949, 5.622-7.622", "frequencyCaption": "spraying three times and gunshot two times"} +{"filepath": "data/multi_event_train/syn_4636.wav", "onoffCaption": "train horn at 0.178-2.852", "frequencyCaption": "train horn one times"} +{"filepath": "data/multi_event_train/syn_4663.wav", "onoffCaption": "sneeze at 2.824-5.484, 6.738-7.832 and duck quacking at 2.914-4.914", "frequencyCaption": "sneeze two times and duck quacking one times"} +{"filepath": "data/multi_event_train/syn_4686.wav", "onoffCaption": "car horn honking at 1.868-5.363, 6.193-9.688", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_4778.wav", "onoffCaption": "dog barking at 1.856-3.856, 4.606-6.606, 7.924-9.924 and tapping clicking clanking at 2.775-6.215", "frequencyCaption": "dog barking three times and tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_4840.wav", "onoffCaption": "whistling at 0.068-4.552", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_4914.wav", "onoffCaption": "dog barking at 2.546-4.546, 5.133-7.484", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_train/syn_4941.wav", "onoffCaption": "duck quacking at 0.364-2.364, 4.648-6.648", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_2104.wav", "onoffCaption": "car horn honking at 2.211-4.558, 6.798-9.145", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_2151.wav", "onoffCaption": "gunshot at 2.256-4.256, 4.891-6.891", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_2156.wav", "onoffCaption": "dog barking at 0.052-2.052, 2.948-4.948, 5.728-7.728 and door slamming at 1.308-4.282, 5.326-8.3", "frequencyCaption": "dog barking three times and door slamming two times"} +{"filepath": "data/multi_event_train/syn_2289.wav", "onoffCaption": "spraying at 1.58-2.312, 3.778-4.51, 5.034-5.766 and gunshot at 2.716-4.716, 5.541-7.541", "frequencyCaption": "spraying three times and gunshot two times"} +{"filepath": "data/multi_event_train/syn_2377.wav", "onoffCaption": "cat meowing at 3.482-7.842", "frequencyCaption": "cat meowing one times"} +{"filepath": "data/multi_event_train/syn_2392.wav", "onoffCaption": "sneeze at 3.873-5.869, 6.876-8.872", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_2449.wav", "onoffCaption": "train horn at 0.152-2.632 and spraying at 0.716-1.583 and woman laughing at 5.347-7.702", "frequencyCaption": "train horn one times and spraying one times and woman laughing one times"} +{"filepath": "data/multi_event_train/syn_2500.wav", "onoffCaption": "door slamming at 1.297-2.135 and spraying at 4.361-4.869, 7.135-7.704", "frequencyCaption": "door slamming one times and spraying two times"} +{"filepath": "data/multi_event_train/syn_2507.wav", "onoffCaption": "door knocking at 0.199-2.387", "frequencyCaption": "door knocking one times"} +{"filepath": "data/multi_event_train/syn_2552.wav", "onoffCaption": "sheep goat bleating at 0.008-2.008 and woman laughing at 0.341-3.441, 4.758-7.858", "frequencyCaption": "sheep goat bleating one times and woman laughing two times"} +{"filepath": "data/multi_event_train/syn_2668.wav", "onoffCaption": "tapping clicking clanking at 2.077-5.517, 6.443-8.613 and cow mooing at 5.63-8.612", "frequencyCaption": "tapping clicking clanking two times and cow mooing one times"} +{"filepath": "data/multi_event_train/syn_2721.wav", "onoffCaption": "gunshot at 3.487-5.487", "frequencyCaption": "gunshot one times"} +{"filepath": "data/multi_event_train/syn_2773.wav", "onoffCaption": "door knocking at 0.433-2.896 and door slamming at 3.612-4.987, 7.366-8.741", "frequencyCaption": "door knocking one times and door slamming two times"} +{"filepath": "data/multi_event_train/syn_2791.wav", "onoffCaption": "door knocking at 0.652-4.402", "frequencyCaption": "door knocking one times"} +{"filepath": "data/multi_event_train/syn_2803.wav", "onoffCaption": "thump thud at 0.011-2.35 and spraying at 4.888-6.145", "frequencyCaption": "thump thud one times and spraying one times"} +{"filepath": "data/multi_event_train/syn_2804.wav", "onoffCaption": "cow mooing at 0.049-4.478, 6.606-9.378 and thump thud at 2.151-4.922, 6.328-8.828 and burping belching at 3.01-5.108", "frequencyCaption": "cow mooing two times and thump thud two times and burping belching one times"} +{"filepath": "data/multi_event_train/syn_2856.wav", "onoffCaption": "duck quacking at 1.872-3.872, 6.151-8.151", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_2918.wav", "onoffCaption": "gunshot at 3.13-5.13, 6.539-8.539", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_4009.wav", "onoffCaption": "whistling at 1.047-6.547, 7.7-10.0", "frequencyCaption": "whistling two times"} +{"filepath": "data/multi_event_train/syn_4147.wav", "onoffCaption": "whistling at 1.949-7.124", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_4298.wav", "onoffCaption": "train horn at 0.227-2.667, 4.148-6.296", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_4334.wav", "onoffCaption": "explosion at 0.551-3.551 and cat meowing at 0.943-3.079", "frequencyCaption": "explosion one times and cat meowing one times"} +{"filepath": "data/multi_event_train/syn_4361.wav", "onoffCaption": "cow mooing at 2.731-5.7, 6.649-9.618", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_4366.wav", "onoffCaption": "duck quacking at 0.872-2.872, 4.328-6.328 and explosion at 1.918-4.79, 5.543-8.271", "frequencyCaption": "duck quacking two times and explosion two times"} +{"filepath": "data/multi_event_train/syn_4383.wav", "onoffCaption": "car horn honking at 1.956-5.175, 5.822-8.12 and burping belching at 4.017-7.017", "frequencyCaption": "car horn honking two times and burping belching one times"} +{"filepath": "data/multi_event_train/syn_4384.wav", "onoffCaption": "door knocking at 0.945-4.001 and train horn at 6.737-8.892", "frequencyCaption": "door knocking one times and train horn one times"} +{"filepath": "data/multi_event_train/syn_4511.wav", "onoffCaption": "car horn honking at 3.701-8.213", "frequencyCaption": "car horn honking one times"} +{"filepath": "data/multi_event_train/syn_4679.wav", "onoffCaption": "thump thud at 0.164-4.614, 5.439-9.889", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_4730.wav", "onoffCaption": "train horn at 1.755-4.429, 5.275-7.949", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_4737.wav", "onoffCaption": "gunshot at 1.048-3.048, 4.748-6.748 and spraying at 3.137-4.201 and sheep goat bleating at 3.936-5.936", "frequencyCaption": "gunshot two times and spraying one times and sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_4762.wav", "onoffCaption": "door slamming at 2.032-4.49, 6.927-8.927", "frequencyCaption": "door slamming two times"} +{"filepath": "data/multi_event_train/syn_4787.wav", "onoffCaption": "woman laughing at 0.311-2.511, 3.934-6.526 and sneeze at 2.213-3.8, 6.089-7.676", "frequencyCaption": "woman laughing two times and sneeze two times"} +{"filepath": "data/multi_event_train/syn_4815.wav", "onoffCaption": "sneeze at 0.265-1.499 and whistling at 5.673-7.902", "frequencyCaption": "sneeze one times and whistling one times"} +{"filepath": "data/multi_event_train/syn_4847.wav", "onoffCaption": "whistling at 0.013-7.763 and tapping clicking clanking at 0.111-3.551 and sneeze at 1.548-3.135, 4.059-5.646", "frequencyCaption": "whistling one times and tapping clicking clanking one times and sneeze two times"} +{"filepath": "data/multi_event_train/syn_2002.wav", "onoffCaption": "woman laughing at 3.333-10.0", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/multi_event_train/syn_2018.wav", "onoffCaption": "spraying at 0.405-1.032, 1.643-2.27, 3.029-3.656 and explosion at 0.527-5.527, 6.694-10.0", "frequencyCaption": "spraying three times and explosion two times"} +{"filepath": "data/multi_event_train/syn_2103.wav", "onoffCaption": "sneeze at 1.671-3.984, 5.21-7.523 and burping belching at 1.76-3.99, 5.309-7.346 and door slamming at 4.12-6.837", "frequencyCaption": "sneeze two times and burping belching two times and door slamming one times"} +{"filepath": "data/multi_event_train/syn_2119.wav", "onoffCaption": "dog barking at 2.203-4.203, 6.323-8.323", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_train/syn_2271.wav", "onoffCaption": "tapping clicking clanking at 0.592-4.032, 5.924-9.364", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_2294.wav", "onoffCaption": "gunshot at 0.764-2.764 and spraying at 5.053-6.134, 8.451-9.532", "frequencyCaption": "gunshot one times and spraying two times"} +{"filepath": "data/multi_event_train/syn_2325.wav", "onoffCaption": "dog barking at 0.642-2.642, 3.414-5.414, 6.681-8.681 and cow mooing at 4.632-7.601", "frequencyCaption": "dog barking three times and cow mooing one times"} +{"filepath": "data/multi_event_train/syn_2370.wav", "onoffCaption": "thump thud at 0.04-2.502", "frequencyCaption": "thump thud one times"} +{"filepath": "data/multi_event_train/syn_2395.wav", "onoffCaption": "spraying at 0.189-0.697, 1.468-1.976, 2.924-3.432 and burping belching at 1.098-3.328", "frequencyCaption": "spraying three times and burping belching one times"} +{"filepath": "data/multi_event_train/syn_2454.wav", "onoffCaption": "cat meowing at 0.477-2.425, 3.605-5.553, 7.46-9.408", "frequencyCaption": "cat meowing three times"} +{"filepath": "data/multi_event_train/syn_2555.wav", "onoffCaption": "cat meowing at 0.068-1.484, 3.808-5.768", "frequencyCaption": "cat meowing two times"} +{"filepath": "data/multi_event_train/syn_2627.wav", "onoffCaption": "burping belching at 0.031-4.031, 5.198-9.198", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_2697.wav", "onoffCaption": "whistling at 2.205-4.214, 4.925-6.934 and sneeze at 4.976-6.15, 8.57-9.744", "frequencyCaption": "whistling two times and sneeze two times"} +{"filepath": "data/multi_event_train/syn_2726.wav", "onoffCaption": "sneeze at 1.043-3.446 and thump thud at 6.486-8.825", "frequencyCaption": "sneeze one times and thump thud one times"} +{"filepath": "data/multi_event_train/syn_2796.wav", "onoffCaption": "spraying at 0.031-1.206, 2.852-4.099, 5.641-8.225 and tapping clicking clanking at 6.006-9.446", "frequencyCaption": "spraying three times and tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_2851.wav", "onoffCaption": "burping belching at 0.624-4.13, 5.824-9.33", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_2950.wav", "onoffCaption": "explosion at 3.244-5.973 and sheep goat bleating at 3.421-5.421", "frequencyCaption": "explosion one times and sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_4014.wav", "onoffCaption": "door knocking at 2.683-5.063, 7.022-9.402", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_4041.wav", "onoffCaption": "explosion at 2.544-7.544", "frequencyCaption": "explosion one times"} +{"filepath": "data/multi_event_train/syn_4115.wav", "onoffCaption": "cow mooing at 3.295-8.275", "frequencyCaption": "cow mooing one times"} +{"filepath": "data/multi_event_train/syn_4140.wav", "onoffCaption": "door slamming at 1.027-3.906 and whistling at 6.755-9.73", "frequencyCaption": "door slamming one times and whistling one times"} +{"filepath": "data/multi_event_train/syn_4228.wav", "onoffCaption": "woman laughing at 0.009-2.107, 4.329-6.427", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_4232.wav", "onoffCaption": "cat meowing at 0.174-1.449, 2.265-3.54, 4.479-5.754", "frequencyCaption": "cat meowing three times"} +{"filepath": "data/multi_event_train/syn_4329.wav", "onoffCaption": "burping belching at 2.373-5.873", "frequencyCaption": "burping belching one times"} +{"filepath": "data/multi_event_train/syn_4333.wav", "onoffCaption": "door knocking at 0.096-4.629 and cow mooing at 2.461-5.471, 6.445-9.405", "frequencyCaption": "door knocking one times and cow mooing two times"} +{"filepath": "data/multi_event_train/syn_4417.wav", "onoffCaption": "explosion at 0.436-3.027, 4.242-6.244, 7.375-10.0", "frequencyCaption": "explosion three times"} +{"filepath": "data/multi_event_train/syn_4458.wav", "onoffCaption": "door slamming at 2.041-3.806", "frequencyCaption": "door slamming one times"} +{"filepath": "data/multi_event_train/syn_4516.wav", "onoffCaption": "thump thud at 3.061-6.108, 6.938-9.985", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_4543.wav", "onoffCaption": "dog barking at 3.213-5.213 and sheep goat bleating at 7.736-9.736", "frequencyCaption": "dog barking one times and sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_4664.wav", "onoffCaption": "car horn honking at 0.073-2.586 and cat meowing at 0.304-1.304, 2.382-3.382, 4.485-5.485", "frequencyCaption": "car horn honking one times and cat meowing three times"} +{"filepath": "data/multi_event_train/syn_4681.wav", "onoffCaption": "explosion at 1.201-6.201, 7.318-10.0 and train horn at 3.436-6.636", "frequencyCaption": "explosion two times and train horn one times"} +{"filepath": "data/multi_event_train/syn_4765.wav", "onoffCaption": "sheep goat bleating at 0.926-2.926, 3.878-6.477, 7.176-9.176", "frequencyCaption": "sheep goat bleating three times"} +{"filepath": "data/multi_event_train/syn_4780.wav", "onoffCaption": "cat meowing at 0.028-1.303, 2.305-3.876, 5.456-6.483", "frequencyCaption": "cat meowing three times"} +{"filepath": "data/multi_event_train/syn_4808.wav", "onoffCaption": "duck quacking at 2.642-4.642, 6.256-8.256", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_4812.wav", "onoffCaption": "burping belching at 3.173-7.509", "frequencyCaption": "burping belching one times"} +{"filepath": "data/multi_event_train/syn_4909.wav", "onoffCaption": "train horn at 0.691-4.891, 5.967-8.26", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_4913.wav", "onoffCaption": "sneeze at 0.276-1.51 and whistling at 5.819-10.0", "frequencyCaption": "sneeze one times and whistling one times"} +{"filepath": "data/multi_event_train/syn_2026.wav", "onoffCaption": "sheep goat bleating at 0.154-2.154, 3.022-5.022, 6.172-8.172", "frequencyCaption": "sheep goat bleating three times"} +{"filepath": "data/multi_event_train/syn_2057.wav", "onoffCaption": "burping belching at 2.683-6.227, 6.907-10.0", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_2200.wav", "onoffCaption": "whistling at 2.022-9.772", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_2224.wav", "onoffCaption": "burping belching at 2.351-4.382", "frequencyCaption": "burping belching one times"} +{"filepath": "data/multi_event_train/syn_2255.wav", "onoffCaption": "tapping clicking clanking at 2.125-5.565 and sneeze at 2.99-5.466, 6.608-7.772", "frequencyCaption": "tapping clicking clanking one times and sneeze two times"} +{"filepath": "data/multi_event_train/syn_2401.wav", "onoffCaption": "cow mooing at 0.674-5.103, 6.783-10.0", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_2470.wav", "onoffCaption": "door slamming at 0.396-1.771, 2.767-4.142", "frequencyCaption": "door slamming two times"} +{"filepath": "data/multi_event_train/syn_2495.wav", "onoffCaption": "door slamming at 0.565-2.928, 3.911-4.762, 6.499-7.439", "frequencyCaption": "door slamming three times"} +{"filepath": "data/multi_event_train/syn_2603.wav", "onoffCaption": "sheep goat bleating at 1.829-3.829 and cow mooing at 6.879-9.848", "frequencyCaption": "sheep goat bleating one times and cow mooing one times"} +{"filepath": "data/multi_event_train/syn_2672.wav", "onoffCaption": "cat meowing at 1.868-3.485 and door slamming at 2.29-4.516, 6.876-9.102", "frequencyCaption": "cat meowing one times and door slamming two times"} +{"filepath": "data/multi_event_train/syn_2718.wav", "onoffCaption": "sneeze at 0.124-1.581, 3.955-5.483", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_2769.wav", "onoffCaption": "car horn honking at 0.692-4.941", "frequencyCaption": "car horn honking one times"} +{"filepath": "data/multi_event_train/syn_2905.wav", "onoffCaption": "burping belching at 0.043-4.442, 6.234-10.0 and sheep goat bleating at 0.792-2.792, 3.816-5.816, 6.776-8.776", "frequencyCaption": "burping belching two times and sheep goat bleating three times"} +{"filepath": "data/multi_event_train/syn_2974.wav", "onoffCaption": "sheep goat bleating at 3.18-6.5", "frequencyCaption": "sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_2991.wav", "onoffCaption": "thump thud at 0.605-4.272 and tapping clicking clanking at 0.78-4.22 and sheep goat bleating at 1.8-3.8, 4.865-6.865, 7.674-9.674", "frequencyCaption": "thump thud one times and tapping clicking clanking one times and sheep goat bleating three times"} +{"filepath": "data/multi_event_train/syn_4030.wav", "onoffCaption": "sheep goat bleating at 2.192-4.192, 4.895-6.895 and whistling at 3.502-9.002", "frequencyCaption": "sheep goat bleating two times and whistling one times"} +{"filepath": "data/multi_event_train/syn_4065.wav", "onoffCaption": "thump thud at 0.945-3.992, 5.083-8.13", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_4080.wav", "onoffCaption": "whistling at 1.188-3.197, 5.237-7.246 and car horn honking at 3.639-7.961", "frequencyCaption": "whistling two times and car horn honking one times"} +{"filepath": "data/multi_event_train/syn_4216.wav", "onoffCaption": "woman laughing at 0.33-3.611, 4.272-6.404", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_4267.wav", "onoffCaption": "tapping clicking clanking at 2.832-6.272 and car horn honking at 5.367-7.832", "frequencyCaption": "tapping clicking clanking one times and car horn honking one times"} +{"filepath": "data/multi_event_train/syn_4282.wav", "onoffCaption": "dog barking at 1.217-3.217 and train horn at 6.035-9.515", "frequencyCaption": "dog barking one times and train horn one times"} +{"filepath": "data/multi_event_train/syn_4399.wav", "onoffCaption": "door knocking at 1.174-3.637 and sheep goat bleating at 6.536-10.0", "frequencyCaption": "door knocking one times and sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_4433.wav", "onoffCaption": "woman laughing at 0.612-2.98, 5.346-7.714", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_4442.wav", "onoffCaption": "door knocking at 2.41-4.631, 5.512-7.733", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_4466.wav", "onoffCaption": "thump thud at 0.065-2.565, 3.598-5.778, 6.412-8.912 and gunshot at 2.169-4.169, 4.865-6.865, 7.796-9.796", "frequencyCaption": "thump thud three times and gunshot three times"} +{"filepath": "data/multi_event_train/syn_4483.wav", "onoffCaption": "cow mooing at 0.202-3.171, 4.097-7.066, 7.97-10.0", "frequencyCaption": "cow mooing three times"} +{"filepath": "data/multi_event_train/syn_4528.wav", "onoffCaption": "burping belching at 0.152-3.152, 5.315-8.315 and woman laughing at 3.008-5.613 and door slamming at 3.552-4.785, 6.323-7.556", "frequencyCaption": "burping belching two times and woman laughing one times and door slamming two times"} +{"filepath": "data/multi_event_train/syn_4559.wav", "onoffCaption": "tapping clicking clanking at 0.302-3.742, 4.486-6.714 and burping belching at 1.799-6.799", "frequencyCaption": "tapping clicking clanking two times and burping belching one times"} +{"filepath": "data/multi_event_train/syn_4598.wav", "onoffCaption": "duck quacking at 1.01-3.01, 3.848-5.848, 6.532-8.532", "frequencyCaption": "duck quacking three times"} +{"filepath": "data/multi_event_train/syn_4631.wav", "onoffCaption": "burping belching at 0.8-3.621, 5.735-8.514", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_4640.wav", "onoffCaption": "cat meowing at 2.515-3.825, 4.843-6.593", "frequencyCaption": "cat meowing two times"} +{"filepath": "data/multi_event_train/syn_4937.wav", "onoffCaption": "spraying at 0.644-1.891, 3.986-4.986", "frequencyCaption": "spraying two times"} +{"filepath": "data/multi_event_train/syn_4946.wav", "onoffCaption": "car horn honking at 0.421-3.347, 3.927-6.853, 7.568-10.0", "frequencyCaption": "car horn honking three times"} +{"filepath": "data/multi_event_train/syn_4987.wav", "onoffCaption": "gunshot at 0.526-2.526 and dog barking at 5.505-7.505", "frequencyCaption": "gunshot one times and dog barking one times"} +{"filepath": "data/multi_event_train/syn_1.wav", "onoffCaption": "burping belching at 1.324-8.492", "frequencyCaption": "burping belching one times"} +{"filepath": "data/multi_event_train/syn_2069.wav", "onoffCaption": "sheep goat bleating at 0.568-2.568, 3.305-5.305 and door slamming at 7.71-8.561", "frequencyCaption": "sheep goat bleating two times and door slamming one times"} +{"filepath": "data/multi_event_train/syn_2073.wav", "onoffCaption": "dog barking at 0.81-2.81, 4.698-6.698 and sneeze at 1.441-3.687, 5.397-7.873", "frequencyCaption": "dog barking two times and sneeze two times"} +{"filepath": "data/multi_event_train/syn_2096.wav", "onoffCaption": "sneeze at 0.54-1.643 and car horn honking at 0.905-5.305, 6.304-10.0", "frequencyCaption": "sneeze one times and car horn honking two times"} +{"filepath": "data/multi_event_train/syn_2168.wav", "onoffCaption": "woman laughing at 2.695-5.976, 6.949-9.534", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_2172.wav", "onoffCaption": "woman laughing at 2.653-6.229", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/multi_event_train/syn_2197.wav", "onoffCaption": "woman laughing at 0.631-3.05, 5.474-7.715", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_2301.wav", "onoffCaption": "door slamming at 0.391-2.309, 3.454-5.372 and sneeze at 2.098-5.208", "frequencyCaption": "door slamming two times and sneeze one times"} +{"filepath": "data/multi_event_train/syn_2425.wav", "onoffCaption": "car horn honking at 0.069-3.564, 5.485-8.276", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_2524.wav", "onoffCaption": "cow mooing at 3.119-7.548", "frequencyCaption": "cow mooing one times"} +{"filepath": "data/multi_event_train/syn_2656.wav", "onoffCaption": "tapping clicking clanking at 0.232-3.672, 4.981-8.421", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_2757.wav", "onoffCaption": "car horn honking at 1.995-6.395, 7.171-10.0", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_2820.wav", "onoffCaption": "burping belching at 0.103-2.138, 3.472-5.507, 6.832-8.867", "frequencyCaption": "burping belching three times"} +{"filepath": "data/multi_event_train/syn_2875.wav", "onoffCaption": "door knocking at 1.909-6.442", "frequencyCaption": "door knocking one times"} +{"filepath": "data/multi_event_train/syn_2921.wav", "onoffCaption": "sheep goat bleating at 1.122-3.122, 4.877-6.877", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_4131.wav", "onoffCaption": "train horn at 0.546-3.22 and sheep goat bleating at 1.516-3.516, 4.159-6.159, 7.47-9.47", "frequencyCaption": "train horn one times and sheep goat bleating three times"} +{"filepath": "data/multi_event_train/syn_4243.wav", "onoffCaption": "woman laughing at 1.329-3.411, 5.369-7.451", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_4259.wav", "onoffCaption": "tapping clicking clanking at 0.446-3.886, 5.188-7.422 and spraying at 5.404-6.404", "frequencyCaption": "tapping clicking clanking two times and spraying one times"} +{"filepath": "data/multi_event_train/syn_4342.wav", "onoffCaption": "woman laughing at 1.285-4.357 and sneeze at 1.929-4.405, 5.129-7.605", "frequencyCaption": "woman laughing one times and sneeze two times"} +{"filepath": "data/multi_event_train/syn_4358.wav", "onoffCaption": "dog barking at 1.161-3.161, 5.169-7.169", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_train/syn_4499.wav", "onoffCaption": "car horn honking at 0.823-5.73 and door slamming at 2.357-3.748, 5.822-7.213 and duck quacking at 3.094-5.094, 7.242-9.242", "frequencyCaption": "car horn honking one times and door slamming two times and duck quacking two times"} +{"filepath": "data/multi_event_train/syn_4567.wav", "onoffCaption": "dog barking at 2.991-4.991, 7.131-9.131", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_train/syn_4582.wav", "onoffCaption": "explosion at 0.407-3.125 and gunshot at 3.084-5.084, 6.253-8.253 and burping belching at 4.468-7.647", "frequencyCaption": "explosion one times and gunshot two times and burping belching one times"} +{"filepath": "data/multi_event_train/syn_4615.wav", "onoffCaption": "tapping clicking clanking at 0.349-3.789, 5.332-7.939 and cow mooing at 2.213-5.511, 7.329-10.0", "frequencyCaption": "tapping clicking clanking two times and cow mooing two times"} +{"filepath": "data/multi_event_train/syn_4714.wav", "onoffCaption": "thump thud at 1.441-4.488, 5.626-8.673", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_4863.wav", "onoffCaption": "sheep goat bleating at 2.651-5.731, 7.327-9.327", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_4879.wav", "onoffCaption": "sneeze at 1.717-3.005, 4.541-5.829", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_4886.wav", "onoffCaption": "woman laughing at 2.456-5.048, 5.701-8.293", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_4962.wav", "onoffCaption": "spraying at 0.625-2.32, 2.931-3.798", "frequencyCaption": "spraying two times"} +{"filepath": "data/multi_event_train/syn_4978.wav", "onoffCaption": "door knocking at 1.907-4.095", "frequencyCaption": "door knocking one times"} +{"filepath": "data/multi_event_train/syn_2098.wav", "onoffCaption": "duck quacking at 1.532-3.532", "frequencyCaption": "duck quacking one times"} +{"filepath": "data/multi_event_train/syn_2127.wav", "onoffCaption": "explosion at 3.111-5.983, 6.942-9.582", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_2183.wav", "onoffCaption": "thump thud at 2.161-5.208, 6.064-8.472", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_2315.wav", "onoffCaption": "explosion at 1.424-6.424 and cat meowing at 2.368-4.316 and car horn honking at 2.424-4.424", "frequencyCaption": "explosion one times and cat meowing one times and car horn honking one times"} +{"filepath": "data/multi_event_train/syn_2354.wav", "onoffCaption": "door knocking at 0.652-4.152, 5.012-7.135, 7.746-9.81", "frequencyCaption": "door knocking three times"} +{"filepath": "data/multi_event_train/syn_2571.wav", "onoffCaption": "spraying at 2.456-4.584, 5.711-7.473", "frequencyCaption": "spraying two times"} +{"filepath": "data/multi_event_train/syn_2594.wav", "onoffCaption": "car horn honking at 2.033-4.533, 5.2-7.7", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_2619.wav", "onoffCaption": "duck quacking at 1.522-3.522 and cat meowing at 1.603-3.791", "frequencyCaption": "duck quacking one times and cat meowing one times"} +{"filepath": "data/multi_event_train/syn_2658.wav", "onoffCaption": "explosion at 0.811-5.677, 7.813-10.0 and door knocking at 2.02-5.77", "frequencyCaption": "explosion two times and door knocking one times"} +{"filepath": "data/multi_event_train/syn_2702.wav", "onoffCaption": "whistling at 0.515-3.39, 3.92-6.795 and explosion at 2.138-5.01, 6.901-9.773", "frequencyCaption": "whistling two times and explosion two times"} +{"filepath": "data/multi_event_train/syn_2743.wav", "onoffCaption": "tapping clicking clanking at 2.399-5.839, 6.987-10.0", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_2834.wav", "onoffCaption": "cow mooing at 2.952-5.962, 7.639-10.0 and woman laughing at 3.706-6.406", "frequencyCaption": "cow mooing two times and woman laughing one times"} +{"filepath": "data/multi_event_train/syn_2890.wav", "onoffCaption": "duck quacking at 2.845-4.845", "frequencyCaption": "duck quacking one times"} +{"filepath": "data/multi_event_train/syn_4125.wav", "onoffCaption": "thump thud at 1.182-3.682 and door knocking at 6.723-9.57", "frequencyCaption": "thump thud one times and door knocking one times"} +{"filepath": "data/multi_event_train/syn_4164.wav", "onoffCaption": "whistling at 3.425-5.654", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_4181.wav", "onoffCaption": "car horn honking at 0.313-3.488, 5.773-8.948", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_4317.wav", "onoffCaption": "sneeze at 0.088-2.201, 3.06-5.377, 6.262-7.496 and door slamming at 0.488-2.708, 3.344-4.244 and train horn at 0.872-6.587, 7.647-10.0", "frequencyCaption": "sneeze three times and door slamming two times and train horn two times"} +{"filepath": "data/multi_event_train/syn_4356.wav", "onoffCaption": "train horn at 2.962-6.722", "frequencyCaption": "train horn one times"} +{"filepath": "data/multi_event_train/syn_4429.wav", "onoffCaption": "duck quacking at 0.833-2.833, 3.531-5.531, 6.133-8.133 and woman laughing at 5.524-8.624", "frequencyCaption": "duck quacking three times and woman laughing one times"} +{"filepath": "data/multi_event_train/syn_4468.wav", "onoffCaption": "door slamming at 0.051-2.509", "frequencyCaption": "door slamming one times"} +{"filepath": "data/multi_event_train/syn_4532.wav", "onoffCaption": "door slamming at 2.31-4.793, 5.41-6.701", "frequencyCaption": "door slamming two times"} +{"filepath": "data/multi_event_train/syn_4573.wav", "onoffCaption": "burping belching at 1.245-5.114 and door slamming at 2.463-4.463, 5.179-7.179", "frequencyCaption": "burping belching one times and door slamming two times"} +{"filepath": "data/multi_event_train/syn_4596.wav", "onoffCaption": "dog barking at 0.023-2.023, 2.648-4.648, 6.804-8.804", "frequencyCaption": "dog barking three times"} +{"filepath": "data/multi_event_train/syn_4700.wav", "onoffCaption": "door slamming at 1.79-4.01 and door knocking at 7.65-10.0", "frequencyCaption": "door slamming one times and door knocking one times"} +{"filepath": "data/multi_event_train/syn_4741.wav", "onoffCaption": "sneeze at 3.285-7.785", "frequencyCaption": "sneeze one times"} +{"filepath": "data/multi_event_train/syn_4836.wav", "onoffCaption": "car horn honking at 0.067-2.993, 3.783-6.709, 7.913-10.0", "frequencyCaption": "car horn honking three times"} +{"filepath": "data/multi_event_train/syn_4877.wav", "onoffCaption": "explosion at 0.204-2.524, 3.236-5.41, 6.621-8.623", "frequencyCaption": "explosion three times"} +{"filepath": "data/multi_event_train/syn_4892.wav", "onoffCaption": "thump thud at 0.249-3.916, 5.15-8.817 and cat meowing at 0.688-1.697, 2.233-3.242, 3.778-4.787", "frequencyCaption": "thump thud two times and cat meowing three times"} +{"filepath": "data/multi_event_train/syn_4989.wav", "onoffCaption": "sheep goat bleating at 0.79-2.79 and woman laughing at 1.659-4.251, 4.803-7.395 and gunshot at 3.514-5.514, 6.423-8.696", "frequencyCaption": "sheep goat bleating one times and woman laughing two times and gunshot two times"} +{"filepath": "data/multi_event_train/syn_2028.wav", "onoffCaption": "tapping clicking clanking at 3.282-6.722", "frequencyCaption": "tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_2032.wav", "onoffCaption": "thump thud at 1.878-4.217", "frequencyCaption": "thump thud one times"} +{"filepath": "data/multi_event_train/syn_2129.wav", "onoffCaption": "cat meowing at 2.578-3.663, 4.726-6.036, 7.089-8.089", "frequencyCaption": "cat meowing three times"} +{"filepath": "data/multi_event_train/syn_2133.wav", "onoffCaption": "train horn at 3.33-6.57, 7.334-10.0", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_2166.wav", "onoffCaption": "cow mooing at 2.414-6.843, 7.633-10.0", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_2241.wav", "onoffCaption": "whistling at 3.139-8.314", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_2340.wav", "onoffCaption": "car horn honking at 0.508-4.349 and explosion at 7.279-9.281", "frequencyCaption": "car horn honking one times and explosion one times"} +{"filepath": "data/multi_event_train/syn_2464.wav", "onoffCaption": "tapping clicking clanking at 0.896-4.336 and duck quacking at 1.153-3.153, 4.089-6.089, 6.905-8.905", "frequencyCaption": "tapping clicking clanking one times and duck quacking three times"} +{"filepath": "data/multi_event_train/syn_2481.wav", "onoffCaption": "spraying at 1.831-2.331 and door slamming at 4.61-6.61, 7.521-9.521", "frequencyCaption": "spraying one times and door slamming two times"} +{"filepath": "data/multi_event_train/syn_2530.wav", "onoffCaption": "train horn at 3.401-6.641", "frequencyCaption": "train horn one times"} +{"filepath": "data/multi_event_train/syn_2565.wav", "onoffCaption": "sneeze at 0.005-1.505", "frequencyCaption": "sneeze one times"} +{"filepath": "data/multi_event_train/syn_2580.wav", "onoffCaption": "gunshot at 3.815-5.815, 7.3-9.3", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_2617.wav", "onoffCaption": "duck quacking at 1.56-3.56, 4.821-6.821", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_2716.wav", "onoffCaption": "sheep goat bleating at 0.826-2.826 and thump thud at 5.91-8.41", "frequencyCaption": "sheep goat bleating one times and thump thud one times"} +{"filepath": "data/multi_event_train/syn_2861.wav", "onoffCaption": "sneeze at 2.056-7.056 and tapping clicking clanking at 4.55-7.99 and thump thud at 5.027-7.798", "frequencyCaption": "sneeze one times and tapping clicking clanking one times and thump thud one times"} +{"filepath": "data/multi_event_train/syn_2884.wav", "onoffCaption": "door knocking at 0.648-2.951, 4.123-6.605, 7.909-9.931", "frequencyCaption": "door knocking three times"} +{"filepath": "data/multi_event_train/syn_2960.wav", "onoffCaption": "woman laughing at 0.175-2.257 and door slamming at 0.823-2.823 and sneeze at 6.102-7.559", "frequencyCaption": "woman laughing one times and door slamming one times and sneeze one times"} +{"filepath": "data/multi_event_train/syn_2985.wav", "onoffCaption": "thump thud at 0.261-2.6, 3.314-5.653 and sneeze at 4.087-6.172", "frequencyCaption": "thump thud two times and sneeze one times"} +{"filepath": "data/multi_event_train/syn_4071.wav", "onoffCaption": "cat meowing at 2.995-4.097, 4.741-5.843", "frequencyCaption": "cat meowing two times"} +{"filepath": "data/multi_event_train/syn_4094.wav", "onoffCaption": "whistling at 3.031-9.342", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_4170.wav", "onoffCaption": "thump thud at 1.319-5.237 and cat meowing at 4.051-5.587", "frequencyCaption": "thump thud one times and cat meowing one times"} +{"filepath": "data/multi_event_train/syn_4195.wav", "onoffCaption": "whistling at 2.822-5.051, 5.722-8.376 and explosion at 6.118-8.836", "frequencyCaption": "whistling two times and explosion one times"} +{"filepath": "data/multi_event_train/syn_4202.wav", "onoffCaption": "whistling at 2.434-10.0", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_4218.wav", "onoffCaption": "cow mooing at 0.267-3.565 and woman laughing at 1.052-3.657", "frequencyCaption": "cow mooing one times and woman laughing one times"} +{"filepath": "data/multi_event_train/syn_4303.wav", "onoffCaption": "tapping clicking clanking at 0.66-4.1, 6.49-9.93 and cat meowing at 1.059-2.603, 4.596-6.14 and sheep goat bleating at 3.085-5.085", "frequencyCaption": "tapping clicking clanking two times and cat meowing two times and sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_4319.wav", "onoffCaption": "door slamming at 3.854-5.003, 5.659-7.885", "frequencyCaption": "door slamming two times"} +{"filepath": "data/multi_event_train/syn_4427.wav", "onoffCaption": "sheep goat bleating at 3.094-5.094, 6.139-8.139", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_4526.wav", "onoffCaption": "spraying at 0.254-1.335, 3.355-5.05", "frequencyCaption": "spraying two times"} +{"filepath": "data/multi_event_train/syn_4654.wav", "onoffCaption": "cat meowing at 3.661-6.565, 7.709-8.895", "frequencyCaption": "cat meowing two times"} +{"filepath": "data/multi_event_train/syn_4755.wav", "onoffCaption": "cow mooing at 4.05-7.019 and spraying at 6.688-7.769, 9.086-9.713", "frequencyCaption": "cow mooing one times and spraying two times"} +{"filepath": "data/multi_event_train/syn_4822.wav", "onoffCaption": "train horn at 3.056-5.523 and car horn honking at 3.381-5.846 and burping belching at 4.458-7.458", "frequencyCaption": "train horn one times and car horn honking one times and burping belching one times"} +{"filepath": "data/multi_event_train/syn_4838.wav", "onoffCaption": "door knocking at 0.085-2.306 and spraying at 5.037-5.621 and duck quacking at 5.649-7.649", "frequencyCaption": "door knocking one times and spraying one times and duck quacking one times"} +{"filepath": "data/multi_event_train/syn_4923.wav", "onoffCaption": "gunshot at 0.444-2.444, 3.118-5.248 and explosion at 2.474-7.474", "frequencyCaption": "gunshot two times and explosion one times"} +{"filepath": "data/multi_event_train/syn_4939.wav", "onoffCaption": "gunshot at 2.558-5.032, 5.941-7.941", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_2016.wav", "onoffCaption": "whistling at 0.036-5.211 and burping belching at 2.935-5.058, 6.513-9.513", "frequencyCaption": "whistling one times and burping belching two times"} +{"filepath": "data/multi_event_train/syn_2067.wav", "onoffCaption": "sneeze at 0.627-2.69", "frequencyCaption": "sneeze one times"} +{"filepath": "data/multi_event_train/syn_2082.wav", "onoffCaption": "spraying at 2.94-5.959", "frequencyCaption": "spraying one times"} +{"filepath": "data/multi_event_train/syn_2158.wav", "onoffCaption": "whistling at 0.357-8.012 and gunshot at 3.468-5.468, 7.306-9.306", "frequencyCaption": "whistling one times and gunshot two times"} +{"filepath": "data/multi_event_train/syn_2199.wav", "onoffCaption": "thump thud at 3.283-6.95", "frequencyCaption": "thump thud one times"} +{"filepath": "data/multi_event_train/syn_2214.wav", "onoffCaption": "cat meowing at 0.151-1.417, 3.482-4.748", "frequencyCaption": "cat meowing two times"} +{"filepath": "data/multi_event_train/syn_2265.wav", "onoffCaption": "whistling at 1.369-5.853 and tapping clicking clanking at 2.623-6.063, 7.495-10.0", "frequencyCaption": "whistling one times and tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_2280.wav", "onoffCaption": "burping belching at 0.359-5.359, 6.238-8.564 and tapping clicking clanking at 3.033-6.473", "frequencyCaption": "burping belching two times and tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_2415.wav", "onoffCaption": "car horn honking at 0.386-3.881, 5.676-9.171", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_2431.wav", "onoffCaption": "explosion at 0.062-5.062, 7.284-9.469", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_2440.wav", "onoffCaption": "door slamming at 0.123-0.623, 2.142-2.642 and sheep goat bleating at 1.348-3.348, 3.893-5.893, 6.837-8.837", "frequencyCaption": "door slamming two times and sheep goat bleating three times"} +{"filepath": "data/multi_event_train/syn_2633.wav", "onoffCaption": "whistling at 0.759-3.634, 4.275-6.577", "frequencyCaption": "whistling two times"} +{"filepath": "data/multi_event_train/syn_2642.wav", "onoffCaption": "tapping clicking clanking at 1.34-4.78, 7.05-10.0", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_2728.wav", "onoffCaption": "cat meowing at 0.493-3.523 and woman laughing at 0.638-2.73", "frequencyCaption": "cat meowing one times and woman laughing one times"} +{"filepath": "data/multi_event_train/syn_2759.wav", "onoffCaption": "gunshot at 1.592-3.611", "frequencyCaption": "gunshot one times"} +{"filepath": "data/multi_event_train/syn_2935.wav", "onoffCaption": "door knocking at 0.728-4.478 and duck quacking at 2.452-4.452, 5.784-7.784", "frequencyCaption": "door knocking one times and duck quacking two times"} +{"filepath": "data/multi_event_train/syn_2944.wav", "onoffCaption": "door slamming at 3.33-5.33", "frequencyCaption": "door slamming one times"} +{"filepath": "data/multi_event_train/syn_4024.wav", "onoffCaption": "car horn honking at 2.651-5.164, 5.914-8.427 and thump thud at 3.286-6.057, 7.195-9.966", "frequencyCaption": "car horn honking two times and thump thud two times"} +{"filepath": "data/multi_event_train/syn_4055.wav", "onoffCaption": "tapping clicking clanking at 2.017-5.457, 7.274-10.0", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_4226.wav", "onoffCaption": "tapping clicking clanking at 0.981-4.421, 5.748-8.421", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_4257.wav", "onoffCaption": "cow mooing at 2.78-5.762", "frequencyCaption": "cow mooing one times"} +{"filepath": "data/multi_event_train/syn_4296.wav", "onoffCaption": "cat meowing at 0.666-2.626, 3.341-5.301", "frequencyCaption": "cat meowing two times"} +{"filepath": "data/multi_event_train/syn_4403.wav", "onoffCaption": "door slamming at 2.323-3.626, 4.97-6.273, 7.601-8.904", "frequencyCaption": "door slamming three times"} +{"filepath": "data/multi_event_train/syn_4472.wav", "onoffCaption": "door knocking at 2.045-4.422, 5.41-7.787", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_4497.wav", "onoffCaption": "whistling at 2.277-10.0", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_4518.wav", "onoffCaption": "spraying at 3.256-4.503, 5.811-7.058, 8.236-9.483", "frequencyCaption": "spraying three times"} +{"filepath": "data/multi_event_train/syn_4569.wav", "onoffCaption": "train horn at 2.431-5.965, 7.28-9.954", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_4601.wav", "onoffCaption": "cow mooing at 0.4-5.38, 7.824-10.0", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_4670.wav", "onoffCaption": "whistling at 3.544-9.044", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_4695.wav", "onoffCaption": "burping belching at 0.668-4.668, 6.056-10.0", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_4888.wav", "onoffCaption": "burping belching at 3.083-6.083", "frequencyCaption": "burping belching one times"} +{"filepath": "data/multi_event_train/syn_4907.wav", "onoffCaption": "whistling at 1.293-9.304", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_4976.wav", "onoffCaption": "tapping clicking clanking at 0.552-3.992, 5.863-9.303", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_4993.wav", "onoffCaption": "train horn at 0.195-2.869 and cat meowing at 6.247-7.291, 8.178-9.488", "frequencyCaption": "train horn one times and cat meowing two times"} +{"filepath": "data/multi_event_train/syn_2043.wav", "onoffCaption": "cat meowing at 3.812-5.752", "frequencyCaption": "cat meowing one times"} +{"filepath": "data/multi_event_train/syn_2059.wav", "onoffCaption": "burping belching at 0.09-2.197 and door knocking at 0.721-3.788, 5.023-8.09", "frequencyCaption": "burping belching one times and door knocking two times"} +{"filepath": "data/multi_event_train/syn_2142.wav", "onoffCaption": "door slamming at 0.363-1.616, 2.579-4.805, 6.932-8.223", "frequencyCaption": "door slamming three times"} +{"filepath": "data/multi_event_train/syn_2230.wav", "onoffCaption": "train horn at 0.572-6.632", "frequencyCaption": "train horn one times"} +{"filepath": "data/multi_event_train/syn_2331.wav", "onoffCaption": "thump thud at 0.794-3.022, 3.604-5.832, 6.585-8.813", "frequencyCaption": "thump thud three times"} +{"filepath": "data/multi_event_train/syn_2364.wav", "onoffCaption": "explosion at 2.299-5.293, 6.682-9.682", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_2381.wav", "onoffCaption": "woman laughing at 0.245-2.327, 2.828-5.334, 7.526-9.642", "frequencyCaption": "woman laughing three times"} +{"filepath": "data/multi_event_train/syn_2514.wav", "onoffCaption": "gunshot at 3.315-5.315, 7.012-9.012", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_2629.wav", "onoffCaption": "spraying at 0.982-2.715, 4.089-5.822 and cow mooing at 1.108-6.088, 7.379-10.0", "frequencyCaption": "spraying two times and cow mooing two times"} +{"filepath": "data/multi_event_train/syn_2666.wav", "onoffCaption": "thump thud at 0.125-2.896, 4.014-6.785, 7.495-10.0 and woman laughing at 2.778-4.978", "frequencyCaption": "thump thud three times and woman laughing one times"} +{"filepath": "data/multi_event_train/syn_2683.wav", "onoffCaption": "train horn at 2.278-10.0", "frequencyCaption": "train horn one times"} +{"filepath": "data/multi_event_train/syn_2699.wav", "onoffCaption": "sheep goat bleating at 3.383-5.383, 7.401-9.401", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_2732.wav", "onoffCaption": "cow mooing at 3.624-8.604", "frequencyCaption": "cow mooing one times"} +{"filepath": "data/multi_event_train/syn_2767.wav", "onoffCaption": "dog barking at 2.432-4.432, 5.383-7.383", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_train/syn_2782.wav", "onoffCaption": "door knocking at 0.89-4.01, 5.434-7.971 and burping belching at 4.746-7.072", "frequencyCaption": "door knocking two times and burping belching one times"} +{"filepath": "data/multi_event_train/syn_2798.wav", "onoffCaption": "thump thud at 0.059-2.559, 3.231-5.68, 6.442-8.781 and spraying at 2.588-3.096", "frequencyCaption": "thump thud three times and spraying one times"} +{"filepath": "data/multi_event_train/syn_2810.wav", "onoffCaption": "door slamming at 2.837-3.642", "frequencyCaption": "door slamming one times"} +{"filepath": "data/multi_event_train/syn_2911.wav", "onoffCaption": "gunshot at 1.534-3.704, 6.11-8.11", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_4000.wav", "onoffCaption": "burping belching at 2.612-6.118, 6.622-10.0", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_4101.wav", "onoffCaption": "whistling at 0.049-7.142 and door slamming at 1.786-3.786, 5.681-7.681", "frequencyCaption": "whistling one times and door slamming two times"} +{"filepath": "data/multi_event_train/syn_4269.wav", "onoffCaption": "car horn honking at 0.1-2.565", "frequencyCaption": "car horn honking one times"} +{"filepath": "data/multi_event_train/syn_4273.wav", "onoffCaption": "woman laughing at 3.317-5.672", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/multi_event_train/syn_4368.wav", "onoffCaption": "sneeze at 3.702-5.615, 6.946-8.859", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_4372.wav", "onoffCaption": "cat meowing at 1.592-2.592, 3.323-4.333, 4.964-7.647", "frequencyCaption": "cat meowing three times"} +{"filepath": "data/multi_event_train/syn_4397.wav", "onoffCaption": "dog barking at 0.044-2.444 and cow mooing at 6.758-10.0", "frequencyCaption": "dog barking one times and cow mooing one times"} +{"filepath": "data/multi_event_train/syn_4419.wav", "onoffCaption": "car horn honking at 3.16-5.946, 6.874-9.66", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_4456.wav", "onoffCaption": "cat meowing at 3.282-4.592, 6.073-8.963 and woman laughing at 3.328-5.933", "frequencyCaption": "cat meowing two times and woman laughing one times"} +{"filepath": "data/multi_event_train/syn_4557.wav", "onoffCaption": "car horn honking at 0.072-3.913, 6.076-9.917 and duck quacking at 0.139-2.139, 3.74-5.74", "frequencyCaption": "car horn honking two times and duck quacking two times"} +{"filepath": "data/multi_event_train/syn_4625.wav", "onoffCaption": "woman laughing at 0.112-3.5, 4.993-7.276 and cow mooing at 2.294-7.274", "frequencyCaption": "woman laughing two times and cow mooing one times"} +{"filepath": "data/multi_event_train/syn_4724.wav", "onoffCaption": "whistling at 3.107-10.0", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_4806.wav", "onoffCaption": "explosion at 1.533-6.399", "frequencyCaption": "explosion one times"} +{"filepath": "data/multi_event_train/syn_4849.wav", "onoffCaption": "burping belching at 2.476-5.237, 6.847-9.668", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_4853.wav", "onoffCaption": "car horn honking at 2.537-4.884, 6.616-8.963", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_4948.wav", "onoffCaption": "woman laughing at 0.01-3.11 and door slamming at 0.11-2.593, 4.13-7.091", "frequencyCaption": "woman laughing one times and door slamming two times"} +{"filepath": "data/multi_event_train/syn_4952.wav", "onoffCaption": "tapping clicking clanking at 2.225-5.665", "frequencyCaption": "tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_2110.wav", "onoffCaption": "whistling at 0.11-8.121 and gunshot at 0.66-2.66, 4.32-6.32", "frequencyCaption": "whistling one times and gunshot two times"} +{"filepath": "data/multi_event_train/syn_2117.wav", "onoffCaption": "duck quacking at 2.303-4.303, 4.821-6.821, 7.375-9.375", "frequencyCaption": "duck quacking three times"} +{"filepath": "data/multi_event_train/syn_2145.wav", "onoffCaption": "car horn honking at 2.249-5.067, 6.86-9.325", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_2278.wav", "onoffCaption": "car horn honking at 0.005-3.18 and cat meowing at 0.584-2.532 and sneeze at 1.507-2.784", "frequencyCaption": "car horn honking one times and cat meowing one times and sneeze one times"} +{"filepath": "data/multi_event_train/syn_2336.wav", "onoffCaption": "burping belching at 2.727-6.75, 7.974-10.0", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_2363.wav", "onoffCaption": "explosion at 0.209-2.937, 4.197-6.925", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_2408.wav", "onoffCaption": "duck quacking at 0.919-2.919, 4.192-6.192, 6.918-8.918", "frequencyCaption": "duck quacking three times"} +{"filepath": "data/multi_event_train/syn_2513.wav", "onoffCaption": "tapping clicking clanking at 1.575-5.015, 5.631-7.968", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_2541.wav", "onoffCaption": "door knocking at 2.431-7.431 and spraying at 3.38-3.984, 6.273-8.401", "frequencyCaption": "door knocking one times and spraying two times"} +{"filepath": "data/multi_event_train/syn_2546.wav", "onoffCaption": "car horn honking at 0.134-3.629, 4.572-6.776", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_2735.wav", "onoffCaption": "train horn at 2.719-7.049", "frequencyCaption": "train horn one times"} +{"filepath": "data/multi_event_train/syn_2760.wav", "onoffCaption": "woman laughing at 0.243-2.829 and whistling at 6.242-10.0", "frequencyCaption": "woman laughing one times and whistling one times"} +{"filepath": "data/multi_event_train/syn_2785.wav", "onoffCaption": "sheep goat bleating at 0.364-3.364 and gunshot at 2.49-4.49, 5.326-7.326 and thump thud at 4.036-6.375, 7.809-10.0", "frequencyCaption": "sheep goat bleating one times and gunshot two times and thump thud two times"} +{"filepath": "data/multi_event_train/syn_2817.wav", "onoffCaption": "door slamming at 0.034-2.162, 3.44-6.414", "frequencyCaption": "door slamming two times"} +{"filepath": "data/multi_event_train/syn_2842.wav", "onoffCaption": "woman laughing at 3.832-6.398, 7.087-9.653 and thump thud at 3.997-7.664", "frequencyCaption": "woman laughing two times and thump thud one times"} +{"filepath": "data/multi_event_train/syn_2845.wav", "onoffCaption": "burping belching at 0.066-2.189 and dog barking at 4.94-6.94, 7.927-9.927", "frequencyCaption": "burping belching one times and dog barking two times"} +{"filepath": "data/multi_event_train/syn_2959.wav", "onoffCaption": "thump thud at 2.464-6.131", "frequencyCaption": "thump thud one times"} +{"filepath": "data/multi_event_train/syn_4106.wav", "onoffCaption": "burping belching at 2.396-5.396, 6.29-9.29", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_4154.wav", "onoffCaption": "burping belching at 0.1-3.38 and duck quacking at 4.7-6.7", "frequencyCaption": "burping belching one times and duck quacking one times"} +{"filepath": "data/multi_event_train/syn_4320.wav", "onoffCaption": "woman laughing at 1.992-4.347 and sheep goat bleating at 2.326-4.326, 6.491-8.491", "frequencyCaption": "woman laughing one times and sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_4327.wav", "onoffCaption": "tapping clicking clanking at 0.526-3.966, 4.575-8.015", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_4375.wav", "onoffCaption": "car horn honking at 1.515-5.837, 7.812-9.812", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_4390.wav", "onoffCaption": "door knocking at 0.626-2.978, 3.775-6.087, 6.804-9.392", "frequencyCaption": "door knocking three times"} +{"filepath": "data/multi_event_train/syn_4502.wav", "onoffCaption": "duck quacking at 2.496-4.496 and car horn honking at 2.541-5.041, 5.542-8.042", "frequencyCaption": "duck quacking one times and car horn honking two times"} +{"filepath": "data/multi_event_train/syn_4550.wav", "onoffCaption": "gunshot at 0.379-2.379, 3.425-5.425 and cat meowing at 6.709-9.613", "frequencyCaption": "gunshot two times and cat meowing one times"} +{"filepath": "data/multi_event_train/syn_4638.wav", "onoffCaption": "tapping clicking clanking at 2.149-5.589, 6.582-10.0", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_4688.wav", "onoffCaption": "train horn at 1.924-4.564", "frequencyCaption": "train horn one times"} +{"filepath": "data/multi_event_train/syn_4723.wav", "onoffCaption": "train horn at 0.301-8.501 and spraying at 1.872-2.813, 3.956-5.718, 6.281-7.062", "frequencyCaption": "train horn one times and spraying three times"} +{"filepath": "data/multi_event_train/syn_4771.wav", "onoffCaption": "thump thud at 2.044-6.494, 7.6-10.0", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_4776.wav", "onoffCaption": "burping belching at 1.744-5.288, 7.129-10.0", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_4793.wav", "onoffCaption": "duck quacking at 0.789-2.789, 3.91-5.91", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_4794.wav", "onoffCaption": "thump thud at 3.096-7.471", "frequencyCaption": "thump thud one times"} +{"filepath": "data/multi_event_train/syn_4801.wav", "onoffCaption": "sheep goat bleating at 2.842-4.842, 5.542-7.542", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_4854.wav", "onoffCaption": "sneeze at 0.166-1.711, 3.091-4.185", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_2011.wav", "onoffCaption": "cow mooing at 3.455-7.884", "frequencyCaption": "cow mooing one times"} +{"filepath": "data/multi_event_train/syn_2044.wav", "onoffCaption": "cow mooing at 2.752-6.05", "frequencyCaption": "cow mooing one times"} +{"filepath": "data/multi_event_train/syn_2237.wav", "onoffCaption": "spraying at 2.505-3.68, 5.61-8.07 and burping belching at 4.712-7.335", "frequencyCaption": "spraying two times and burping belching one times"} +{"filepath": "data/multi_event_train/syn_2246.wav", "onoffCaption": "train horn at 2.386-4.866, 5.514-7.994", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_2262.wav", "onoffCaption": "door slamming at 0.296-1.82, 2.403-3.927, 4.953-6.477", "frequencyCaption": "door slamming three times"} +{"filepath": "data/multi_event_train/syn_2287.wav", "onoffCaption": "sneeze at 2.468-4.08, 4.643-6.255, 7.115-8.727", "frequencyCaption": "sneeze three times"} +{"filepath": "data/multi_event_train/syn_2379.wav", "onoffCaption": "thump thud at 1.14-3.602, 4.287-6.749", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_2386.wav", "onoffCaption": "sneeze at 0.998-3.401, 4.07-5.347", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_2412.wav", "onoffCaption": "whistling at 2.899-8.074", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_2447.wav", "onoffCaption": "explosion at 0.429-5.429, 6.245-10.0", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_2509.wav", "onoffCaption": "car horn honking at 0.405-3.9, 5.336-7.484", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_2610.wav", "onoffCaption": "spraying at 0.285-2.721, 3.874-6.31, 7.014-9.45", "frequencyCaption": "spraying three times"} +{"filepath": "data/multi_event_train/syn_2634.wav", "onoffCaption": "cow mooing at 1.887-6.316 and dog barking at 2.493-4.493, 5.504-7.504", "frequencyCaption": "cow mooing one times and dog barking two times"} +{"filepath": "data/multi_event_train/syn_2661.wav", "onoffCaption": "spraying at 0.128-1.823, 2.851-4.337, 5.4-6.05 and duck quacking at 1.565-3.565, 5.072-7.072 and dog barking at 1.874-3.874", "frequencyCaption": "spraying three times and duck quacking two times and dog barking one times"} +{"filepath": "data/multi_event_train/syn_2684.wav", "onoffCaption": "sheep goat bleating at 0.422-2.422", "frequencyCaption": "sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_2858.wav", "onoffCaption": "spraying at 1.549-2.281, 3.504-4.236, 4.962-5.694", "frequencyCaption": "spraying three times"} +{"filepath": "data/multi_event_train/syn_2916.wav", "onoffCaption": "train horn at 1.358-4.598, 5.88-9.12", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_2943.wav", "onoffCaption": "door slamming at 2.913-4.913, 6.059-8.059", "frequencyCaption": "door slamming two times"} +{"filepath": "data/multi_event_train/syn_4007.wav", "onoffCaption": "train horn at 0.741-4.221, 6.423-9.063 and cat meowing at 2.233-3.593, 5.797-7.692", "frequencyCaption": "train horn two times and cat meowing two times"} +{"filepath": "data/multi_event_train/syn_4048.wav", "onoffCaption": "woman laughing at 3.569-6.364 and sneeze at 4.859-6.855", "frequencyCaption": "woman laughing one times and sneeze one times"} +{"filepath": "data/multi_event_train/syn_4052.wav", "onoffCaption": "duck quacking at 2.19-4.19, 6.433-8.433", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_4076.wav", "onoffCaption": "thump thud at 0.403-3.174 and sheep goat bleating at 2.8-4.8, 6.068-8.068", "frequencyCaption": "thump thud one times and sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_4093.wav", "onoffCaption": "door knocking at 3.703-5.924, 7.166-9.387", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_4149.wav", "onoffCaption": "whistling at 3.415-9.124", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_4153.wav", "onoffCaption": "door knocking at 0.839-3.569, 4.834-7.564 and sneeze at 1.952-4.037", "frequencyCaption": "door knocking two times and sneeze one times"} +{"filepath": "data/multi_event_train/syn_4188.wav", "onoffCaption": "duck quacking at 2.14-4.14", "frequencyCaption": "duck quacking one times"} +{"filepath": "data/multi_event_train/syn_4221.wav", "onoffCaption": "whistling at 0.086-8.471", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_4274.wav", "onoffCaption": "cat meowing at 0.042-1.144, 2.589-3.598 and woman laughing at 0.866-3.452, 4.245-6.831", "frequencyCaption": "cat meowing two times and woman laughing two times"} +{"filepath": "data/multi_event_train/syn_4291.wav", "onoffCaption": "thump thud at 1.138-5.588 and whistling at 1.843-6.327 and woman laughing at 5.378-7.797", "frequencyCaption": "thump thud one times and whistling one times and woman laughing one times"} +{"filepath": "data/multi_event_train/syn_4404.wav", "onoffCaption": "burping belching at 2.51-5.51, 7.101-9.541 and gunshot at 2.836-4.836", "frequencyCaption": "burping belching two times and gunshot one times"} +{"filepath": "data/multi_event_train/syn_4420.wav", "onoffCaption": "duck quacking at 2.843-4.843, 5.486-7.486 and thump thud at 5.54-8.311", "frequencyCaption": "duck quacking two times and thump thud one times"} +{"filepath": "data/multi_event_train/syn_4451.wav", "onoffCaption": "train horn at 0.239-3.039, 5.026-7.426", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_4505.wav", "onoffCaption": "door knocking at 1.084-3.654", "frequencyCaption": "door knocking one times"} +{"filepath": "data/multi_event_train/syn_4622.wav", "onoffCaption": "whistling at 2.926-5.155, 7.141-9.447", "frequencyCaption": "whistling two times"} +{"filepath": "data/multi_event_train/syn_4677.wav", "onoffCaption": "cow mooing at 3.047-7.476 and woman laughing at 3.474-6.269", "frequencyCaption": "cow mooing one times and woman laughing one times"} +{"filepath": "data/multi_event_train/syn_4692.wav", "onoffCaption": "duck quacking at 0.013-2.013", "frequencyCaption": "duck quacking one times"} +{"filepath": "data/multi_event_train/syn_4739.wav", "onoffCaption": "door slamming at 1.895-3.813", "frequencyCaption": "door slamming one times"} +{"filepath": "data/multi_event_train/syn_4789.wav", "onoffCaption": "sneeze at 1.002-5.058", "frequencyCaption": "sneeze one times"} +{"filepath": "data/multi_event_train/syn_4900.wav", "onoffCaption": "cow mooing at 0.223-3.233, 4.638-7.648 and door knocking at 4.601-6.761", "frequencyCaption": "cow mooing two times and door knocking one times"} +{"filepath": "data/multi_event_train/syn_4924.wav", "onoffCaption": "burping belching at 0.009-5.953", "frequencyCaption": "burping belching one times"} +{"filepath": "data/multi_event_train/syn_4955.wav", "onoffCaption": "door knocking at 0.394-5.096, 6.053-10.0", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_8.wav", "onoffCaption": "sheep goat bleating at 0.236-2.236, 3.143-5.143, 6.147-8.147", "frequencyCaption": "sheep goat bleating three times"} +{"filepath": "data/multi_event_train/syn_2035.wav", "onoffCaption": "thump thud at 3.809-6.148, 7.831-10.0", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_2060.wav", "onoffCaption": "sheep goat bleating at 0.166-5.046, 6.154-8.154 and explosion at 0.486-2.66, 3.478-6.478", "frequencyCaption": "sheep goat bleating two times and explosion two times"} +{"filepath": "data/multi_event_train/syn_2085.wav", "onoffCaption": "burping belching at 3.03-5.395, 5.899-8.216", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_2161.wav", "onoffCaption": "door knocking at 2.239-6.511", "frequencyCaption": "door knocking one times"} +{"filepath": "data/multi_event_train/syn_2184.wav", "onoffCaption": "woman laughing at 2.236-4.473, 6.682-8.919 and sneeze at 2.47-6.97 and sheep goat bleating at 6.116-8.116", "frequencyCaption": "woman laughing two times and sneeze one times and sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_2213.wav", "onoffCaption": "duck quacking at 2.521-4.521, 6.283-8.283", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_2308.wav", "onoffCaption": "cow mooing at 0.549-3.531, 5.902-8.609", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_2436.wav", "onoffCaption": "thump thud at 3.198-6.865", "frequencyCaption": "thump thud one times"} +{"filepath": "data/multi_event_train/syn_2463.wav", "onoffCaption": "whistling at 1.38-9.391", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_2486.wav", "onoffCaption": "gunshot at 1.07-3.343", "frequencyCaption": "gunshot one times"} +{"filepath": "data/multi_event_train/syn_2537.wav", "onoffCaption": "train horn at 2.728-7.586", "frequencyCaption": "train horn one times"} +{"filepath": "data/multi_event_train/syn_2578.wav", "onoffCaption": "gunshot at 2.509-4.509, 6.605-8.605", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_2645.wav", "onoffCaption": "sheep goat bleating at 0.42-2.42, 4.869-6.869 and cat meowing at 2.605-3.915, 4.495-6.631", "frequencyCaption": "sheep goat bleating two times and cat meowing two times"} +{"filepath": "data/multi_event_train/syn_2829.wav", "onoffCaption": "whistling at 0.202-5.377", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_2833.wav", "onoffCaption": "sneeze at 2.403-6.051, 7.994-10.0", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_2899.wav", "onoffCaption": "door knocking at 0.036-2.66, 4.527-7.364 and dog barking at 0.341-2.341, 2.973-4.973", "frequencyCaption": "door knocking two times and dog barking two times"} +{"filepath": "data/multi_event_train/syn_2928.wav", "onoffCaption": "burping belching at 0.293-6.973 and woman laughing at 2.69-4.806, 5.463-7.568", "frequencyCaption": "burping belching one times and woman laughing two times"} +{"filepath": "data/multi_event_train/syn_2932.wav", "onoffCaption": "spraying at 0.047-0.651, 1.26-2.164, 3.361-4.608 and dog barking at 0.972-2.972, 3.608-5.608", "frequencyCaption": "spraying three times and dog barking two times"} +{"filepath": "data/multi_event_train/syn_2967.wav", "onoffCaption": "cow mooing at 2.971-5.94, 7.066-10.0", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_2982.wav", "onoffCaption": "door knocking at 2.719-5.099", "frequencyCaption": "door knocking one times"} +{"filepath": "data/multi_event_train/syn_4023.wav", "onoffCaption": "door knocking at 1.077-4.577, 5.819-7.908", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_4039.wav", "onoffCaption": "thump thud at 2.056-4.556, 6.038-8.538", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_4122.wav", "onoffCaption": "cat meowing at 3.331-4.342, 4.92-5.931, 6.61-7.621", "frequencyCaption": "cat meowing three times"} +{"filepath": "data/multi_event_train/syn_4138.wav", "onoffCaption": "door knocking at 2.83-5.182, 6.312-8.664", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_4205.wav", "onoffCaption": "spraying at 0.092-2.484, 4.849-7.241", "frequencyCaption": "spraying two times"} +{"filepath": "data/multi_event_train/syn_4250.wav", "onoffCaption": "burping belching at 0.753-3.932", "frequencyCaption": "burping belching one times"} +{"filepath": "data/multi_event_train/syn_4351.wav", "onoffCaption": "dog barking at 0.601-2.601, 5.002-7.002", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_train/syn_4475.wav", "onoffCaption": "burping belching at 1.004-7.684 and whistling at 1.432-5.916, 6.702-9.65 and gunshot at 2.55-4.55, 5.582-7.582", "frequencyCaption": "burping belching one times and whistling two times and gunshot two times"} +{"filepath": "data/multi_event_train/syn_4490.wav", "onoffCaption": "door knocking at 2.648-4.775 and tapping clicking clanking at 7.088-10.0", "frequencyCaption": "door knocking one times and tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_4574.wav", "onoffCaption": "sneeze at 4.1-6.346", "frequencyCaption": "sneeze one times"} +{"filepath": "data/multi_event_train/syn_4606.wav", "onoffCaption": "thump thud at 1.232-4.003, 6.177-8.891", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_4653.wav", "onoffCaption": "tapping clicking clanking at 2.647-6.087", "frequencyCaption": "tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_4707.wav", "onoffCaption": "explosion at 0.051-5.051", "frequencyCaption": "explosion one times"} +{"filepath": "data/multi_event_train/syn_4748.wav", "onoffCaption": "thump thud at 1.279-3.779, 5.309-7.809", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_4895.wav", "onoffCaption": "gunshot at 3.74-6.214, 7.578-9.578", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_4971.wav", "onoffCaption": "cat meowing at 0.262-2.256", "frequencyCaption": "cat meowing one times"} +{"filepath": "data/multi_event_train/syn_4994.wav", "onoffCaption": "cat meowing at 3.746-4.746, 5.536-6.536 and sheep goat bleating at 4.329-6.329", "frequencyCaption": "cat meowing two times and sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_2048.wav", "onoffCaption": "burping belching at 0.76-4.304, 5.102-8.646", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_2134.wav", "onoffCaption": "spraying at 0.015-1.19, 2.932-5.368", "frequencyCaption": "spraying two times"} +{"filepath": "data/multi_event_train/syn_2153.wav", "onoffCaption": "spraying at 0.046-1.046 and explosion at 0.666-5.666, 6.451-10.0", "frequencyCaption": "spraying one times and explosion two times"} +{"filepath": "data/multi_event_train/syn_2209.wav", "onoffCaption": "gunshot at 2.013-4.013", "frequencyCaption": "gunshot one times"} +{"filepath": "data/multi_event_train/syn_2312.wav", "onoffCaption": "explosion at 2.43-7.351", "frequencyCaption": "explosion one times"} +{"filepath": "data/multi_event_train/syn_2347.wav", "onoffCaption": "explosion at 0.017-3.017, 3.938-6.938 and dog barking at 1.463-3.463", "frequencyCaption": "explosion two times and dog barking one times"} +{"filepath": "data/multi_event_train/syn_2479.wav", "onoffCaption": "burping belching at 1.689-7.776", "frequencyCaption": "burping belching one times"} +{"filepath": "data/multi_event_train/syn_2505.wav", "onoffCaption": "thump thud at 1.734-6.109, 6.786-10.0", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_2562.wav", "onoffCaption": "car horn honking at 0.787-5.694, 7.691-10.0", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_2587.wav", "onoffCaption": "sheep goat bleating at 3.286-5.286", "frequencyCaption": "sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_2711.wav", "onoffCaption": "train horn at 2.669-4.824, 6.196-8.66", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_2744.wav", "onoffCaption": "car horn honking at 0.047-2.973 and cat meowing at 6.678-8.219", "frequencyCaption": "car horn honking one times and cat meowing one times"} +{"filepath": "data/multi_event_train/syn_2801.wav", "onoffCaption": "burping belching at 1.71-5.0", "frequencyCaption": "burping belching one times"} +{"filepath": "data/multi_event_train/syn_2866.wav", "onoffCaption": "cat meowing at 0.382-2.57, 3.17-4.181, 4.892-6.078 and whistling at 0.515-8.9 and duck quacking at 5.413-7.413", "frequencyCaption": "cat meowing three times and whistling one times and duck quacking one times"} +{"filepath": "data/multi_event_train/syn_2883.wav", "onoffCaption": "sheep goat bleating at 3.204-5.204", "frequencyCaption": "sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_2998.wav", "onoffCaption": "spraying at 0.088-1.345, 3.612-4.869", "frequencyCaption": "spraying two times"} +{"filepath": "data/multi_event_train/syn_4089.wav", "onoffCaption": "sneeze at 0.337-1.591 and train horn at 3.809-9.524", "frequencyCaption": "sneeze one times and train horn one times"} +{"filepath": "data/multi_event_train/syn_4177.wav", "onoffCaption": "dog barking at 0.177-2.177 and sneeze at 0.871-3.485, 5.324-7.465 and woman laughing at 3.376-6.014, 7.273-10.0", "frequencyCaption": "dog barking one times and sneeze two times and woman laughing two times"} +{"filepath": "data/multi_event_train/syn_4192.wav", "onoffCaption": "woman laughing at 0.638-3.433, 5.851-8.646", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_4278.wav", "onoffCaption": "gunshot at 1.428-3.521, 4.261-6.354, 7.169-9.262 and door slamming at 5.994-7.369", "frequencyCaption": "gunshot three times and door slamming one times"} +{"filepath": "data/multi_event_train/syn_4304.wav", "onoffCaption": "train horn at 2.458-6.639, 7.492-9.932", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_4363.wav", "onoffCaption": "thump thud at 0.301-2.64 and duck quacking at 0.65-2.65, 4.552-6.552", "frequencyCaption": "thump thud one times and duck quacking two times"} +{"filepath": "data/multi_event_train/syn_4386.wav", "onoffCaption": "sneeze at 0.984-4.059, 5.924-8.999", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_4521.wav", "onoffCaption": "sneeze at 0.704-2.663, 3.814-5.773", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_4546.wav", "onoffCaption": "dog barking at 0.715-2.715, 5.2-7.2", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_train/syn_4591.wav", "onoffCaption": "sheep goat bleating at 2.507-4.507, 5.973-7.973", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_4649.wav", "onoffCaption": "spraying at 0.22-1.087, 2.362-3.229 and cat meowing at 6.586-8.526", "frequencyCaption": "spraying two times and cat meowing one times"} +{"filepath": "data/multi_event_train/syn_4735.wav", "onoffCaption": "train horn at 0.231-5.308", "frequencyCaption": "train horn one times"} +{"filepath": "data/multi_event_train/syn_4752.wav", "onoffCaption": "cow mooing at 1.023-3.992, 4.756-7.738 and cat meowing at 2.326-3.87, 4.908-6.452", "frequencyCaption": "cow mooing two times and cat meowing two times"} +{"filepath": "data/multi_event_train/syn_4825.wav", "onoffCaption": "burping belching at 0.564-5.564, 7.134-9.46", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_4870.wav", "onoffCaption": "thump thud at 1.655-6.03, 6.852-9.508", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_2106.wav", "onoffCaption": "car horn honking at 1.789-4.302, 5.357-7.87", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_2274.wav", "onoffCaption": "explosion at 0.13-5.051, 6.09-10.0", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_2291.wav", "onoffCaption": "cat meowing at 1.616-2.628, 3.395-6.299, 7.041-8.143", "frequencyCaption": "cat meowing three times"} +{"filepath": "data/multi_event_train/syn_2320.wav", "onoffCaption": "gunshot at 0.53-2.803, 3.85-5.85, 7.066-9.066", "frequencyCaption": "gunshot three times"} +{"filepath": "data/multi_event_train/syn_2375.wav", "onoffCaption": "door slamming at 1.481-2.872, 3.714-5.714, 6.695-7.844 and explosion at 5.251-8.438", "frequencyCaption": "door slamming three times and explosion one times"} +{"filepath": "data/multi_event_train/syn_2390.wav", "onoffCaption": "duck quacking at 0.077-2.077, 4.153-6.153", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_2451.wav", "onoffCaption": "burping belching at 0.021-3.311 and cat meowing at 5.102-7.042", "frequencyCaption": "burping belching one times and cat meowing one times"} +{"filepath": "data/multi_event_train/syn_2550.wav", "onoffCaption": "train horn at 2.524-8.268", "frequencyCaption": "train horn one times"} +{"filepath": "data/multi_event_train/syn_2622.wav", "onoffCaption": "tapping clicking clanking at 0.613-4.053 and train horn at 0.753-3.393, 4.437-7.077", "frequencyCaption": "tapping clicking clanking one times and train horn two times"} +{"filepath": "data/multi_event_train/syn_2638.wav", "onoffCaption": "gunshot at 0.324-2.324, 3.995-6.501 and whistling at 6.819-8.828", "frequencyCaption": "gunshot two times and whistling one times"} +{"filepath": "data/multi_event_train/syn_2688.wav", "onoffCaption": "dog barking at 0.336-2.336, 3.264-5.264, 5.967-7.967 and tapping clicking clanking at 2.286-5.726", "frequencyCaption": "dog barking three times and tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_2723.wav", "onoffCaption": "spraying at 0.799-1.974, 2.594-3.178, 4.428-6.19 and woman laughing at 2.694-4.977, 6.495-9.48", "frequencyCaption": "spraying three times and woman laughing two times"} +{"filepath": "data/multi_event_train/syn_2739.wav", "onoffCaption": "spraying at 1.824-2.451, 3.203-4.898, 7.269-8.516", "frequencyCaption": "spraying three times"} +{"filepath": "data/multi_event_train/syn_2776.wav", "onoffCaption": "sheep goat bleating at 2.702-4.702, 6.792-8.792", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_2793.wav", "onoffCaption": "spraying at 2.441-3.068, 5.222-5.849 and explosion at 7.549-10.0", "frequencyCaption": "spraying two times and explosion one times"} +{"filepath": "data/multi_event_train/syn_2854.wav", "onoffCaption": "cow mooing at 2.28-7.26", "frequencyCaption": "cow mooing one times"} +{"filepath": "data/multi_event_train/syn_2955.wav", "onoffCaption": "sneeze at 3.434-4.82, 5.419-6.805", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_4044.wav", "onoffCaption": "duck quacking at 1.493-3.493, 4.339-6.339, 7.456-9.456", "frequencyCaption": "duck quacking three times"} +{"filepath": "data/multi_event_train/syn_4110.wav", "onoffCaption": "tapping clicking clanking at 1.135-4.575, 5.943-8.406", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_4145.wav", "onoffCaption": "sheep goat bleating at 3.411-5.411, 6.238-8.238", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_4237.wav", "onoffCaption": "spraying at 0.24-1.726", "frequencyCaption": "spraying one times"} +{"filepath": "data/multi_event_train/syn_4336.wav", "onoffCaption": "burping belching at 2.645-5.824, 7.891-9.922", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_4408.wav", "onoffCaption": "explosion at 1.757-4.757, 5.479-8.479 and car horn honking at 6.199-8.199", "frequencyCaption": "explosion two times and car horn honking one times"} +{"filepath": "data/multi_event_train/syn_4412.wav", "onoffCaption": "cat meowing at 0.27-1.63", "frequencyCaption": "cat meowing one times"} +{"filepath": "data/multi_event_train/syn_4509.wav", "onoffCaption": "woman laughing at 0.787-4.363, 6.706-10.0", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_4513.wav", "onoffCaption": "gunshot at 0.752-2.752, 3.852-5.852, 6.881-8.881", "frequencyCaption": "gunshot three times"} +{"filepath": "data/multi_event_train/syn_4661.wav", "onoffCaption": "thump thud at 1.443-3.782, 5.47-7.809", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_4684.wav", "onoffCaption": "door slamming at 2.704-4.079, 6.287-7.811", "frequencyCaption": "door slamming two times"} +{"filepath": "data/multi_event_train/syn_4760.wav", "onoffCaption": "tapping clicking clanking at 0.533-3.973, 5.147-8.587", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_4785.wav", "onoffCaption": "whistling at 0.652-8.663", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_4817.wav", "onoffCaption": "train horn at 0.488-4.488 and sheep goat bleating at 0.833-2.833, 3.373-5.373, 5.952-7.952", "frequencyCaption": "train horn one times and sheep goat bleating three times"} +{"filepath": "data/multi_event_train/syn_4842.wav", "onoffCaption": "tapping clicking clanking at 2.652-6.092 and whistling at 2.885-8.385 and sheep goat bleating at 6.014-8.014", "frequencyCaption": "tapping clicking clanking one times and whistling one times and sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_4916.wav", "onoffCaption": "duck quacking at 2.204-4.204, 6.278-8.278", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_4959.wav", "onoffCaption": "cat meowing at 0.048-1.06, 1.852-2.864, 4.697-5.709 and train horn at 0.069-3.549, 4.338-7.276", "frequencyCaption": "cat meowing three times and train horn two times"} +{"filepath": "data/multi_event_train/syn_4.wav", "onoffCaption": "spraying at 0.375-2.503 and car horn honking at 4.549-8.136", "frequencyCaption": "spraying one times and car horn honking one times"} +{"filepath": "data/multi_event_train/syn_2007.wav", "onoffCaption": "train horn at 3.118-6.278, 7.28-10.0", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_2023.wav", "onoffCaption": "car horn honking at 4.143-7.638", "frequencyCaption": "car horn honking one times"} +{"filepath": "data/multi_event_train/syn_2052.wav", "onoffCaption": "spraying at 2.342-2.992 and cat meowing at 3.473-4.483, 5.231-6.258, 7.243-8.553", "frequencyCaption": "spraying one times and cat meowing three times"} +{"filepath": "data/multi_event_train/syn_2076.wav", "onoffCaption": "spraying at 0.727-1.508 and thump thud at 5.495-9.413", "frequencyCaption": "spraying one times and thump thud one times"} +{"filepath": "data/multi_event_train/syn_2089.wav", "onoffCaption": "duck quacking at 0.81-2.81 and spraying at 3.644-4.144, 5.805-6.656", "frequencyCaption": "duck quacking one times and spraying two times"} +{"filepath": "data/multi_event_train/syn_2093.wav", "onoffCaption": "sneeze at 0.898-2.175, 3.979-5.305 and spraying at 3.016-3.524, 4.306-4.814", "frequencyCaption": "sneeze two times and spraying two times"} +{"filepath": "data/multi_event_train/syn_2138.wav", "onoffCaption": "burping belching at 1.702-6.038, 7.211-10.0", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_2149.wav", "onoffCaption": "door knocking at 1.85-6.683, 7.825-10.0", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_2188.wav", "onoffCaption": "train horn at 3.407-9.876", "frequencyCaption": "train horn one times"} +{"filepath": "data/multi_event_train/syn_2205.wav", "onoffCaption": "thump thud at 3.709-6.171", "frequencyCaption": "thump thud one times"} +{"filepath": "data/multi_event_train/syn_2221.wav", "onoffCaption": "woman laughing at 2.818-5.89, 7.129-10.0", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_2250.wav", "onoffCaption": "explosion at 2.937-7.937", "frequencyCaption": "explosion one times"} +{"filepath": "data/multi_event_train/syn_2404.wav", "onoffCaption": "tapping clicking clanking at 2.396-5.836, 7.121-10.0", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_2420.wav", "onoffCaption": "cat meowing at 0.79-2.15 and woman laughing at 5.494-7.61", "frequencyCaption": "cat meowing one times and woman laughing one times"} +{"filepath": "data/multi_event_train/syn_2475.wav", "onoffCaption": "door knocking at 3.062-5.374, 6.46-8.772 and woman laughing at 3.618-6.037, 7.169-9.518", "frequencyCaption": "door knocking two times and woman laughing two times"} +{"filepath": "data/multi_event_train/syn_2490.wav", "onoffCaption": "cow mooing at 2.202-5.171, 6.377-8.411", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_2606.wav", "onoffCaption": "door knocking at 2.842-5.61", "frequencyCaption": "door knocking one times"} +{"filepath": "data/multi_event_train/syn_2653.wav", "onoffCaption": "cat meowing at 3.516-4.601", "frequencyCaption": "cat meowing one times"} +{"filepath": "data/multi_event_train/syn_2677.wav", "onoffCaption": "whistling at 0.283-4.767, 5.551-10.0", "frequencyCaption": "whistling two times"} +{"filepath": "data/multi_event_train/syn_2692.wav", "onoffCaption": "explosion at 0.361-3.417, 5.348-8.404", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_2748.wav", "onoffCaption": "thump thud at 0.302-2.802, 4.125-6.625 and dog barking at 0.683-2.683", "frequencyCaption": "thump thud two times and dog barking one times"} +{"filepath": "data/multi_event_train/syn_2789.wav", "onoffCaption": "explosion at 1.018-4.571", "frequencyCaption": "explosion one times"} +{"filepath": "data/multi_event_train/syn_2825.wav", "onoffCaption": "door knocking at 0.964-4.732", "frequencyCaption": "door knocking one times"} +{"filepath": "data/multi_event_train/syn_2900.wav", "onoffCaption": "train horn at 0.446-3.12, 4.402-6.539 and thump thud at 3.737-7.404", "frequencyCaption": "train horn two times and thump thud one times"} +{"filepath": "data/multi_event_train/syn_2924.wav", "onoffCaption": "burping belching at 0.202-6.882", "frequencyCaption": "burping belching one times"} +{"filepath": "data/multi_event_train/syn_2971.wav", "onoffCaption": "burping belching at 1.991-6.39, 7.29-9.52", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_2994.wav", "onoffCaption": "tapping clicking clanking at 2.3-5.74, 6.598-10.0", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_4011.wav", "onoffCaption": "sneeze at 2.458-3.561, 4.16-6.297", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_4035.wav", "onoffCaption": "car horn honking at 3.885-6.703, 7.76-10.0", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_4060.wav", "onoffCaption": "gunshot at 2.22-4.22, 4.777-6.777, 7.486-9.486", "frequencyCaption": "gunshot three times"} +{"filepath": "data/multi_event_train/syn_4085.wav", "onoffCaption": "dog barking at 0.613-2.613, 3.151-5.151, 6.447-8.447", "frequencyCaption": "dog barking three times"} +{"filepath": "data/multi_event_train/syn_4213.wav", "onoffCaption": "explosion at 0.298-5.298, 5.925-8.075", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_4246.wav", "onoffCaption": "duck quacking at 1.745-3.745, 4.943-6.943", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_4262.wav", "onoffCaption": "car horn honking at 0.025-2.025 and spraying at 3.726-4.353, 6.476-7.103, 8.766-9.393", "frequencyCaption": "car horn honking one times and spraying three times"} +{"filepath": "data/multi_event_train/syn_4287.wav", "onoffCaption": "door knocking at 2.855-5.207", "frequencyCaption": "door knocking one times"} +{"filepath": "data/multi_event_train/syn_4308.wav", "onoffCaption": "explosion at 0.564-5.564, 7.629-10.0", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_4379.wav", "onoffCaption": "dog barking at 0.247-2.247", "frequencyCaption": "dog barking one times"} +{"filepath": "data/multi_event_train/syn_4436.wav", "onoffCaption": "gunshot at 3.193-5.433, 7.188-9.188", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_4447.wav", "onoffCaption": "thump thud at 3.806-7.724", "frequencyCaption": "thump thud one times"} +{"filepath": "data/multi_event_train/syn_4463.wav", "onoffCaption": "tapping clicking clanking at 0.406-3.846, 6.106-8.202", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_4486.wav", "onoffCaption": "car horn honking at 3.658-6.584, 7.528-9.535", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_4578.wav", "onoffCaption": "car horn honking at 1.804-5.023", "frequencyCaption": "car horn honking one times"} +{"filepath": "data/multi_event_train/syn_4610.wav", "onoffCaption": "sneeze at 0.396-3.604, 4.394-7.602", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_4634.wav", "onoffCaption": "door slamming at 0.545-1.778 and duck quacking at 4.483-6.483", "frequencyCaption": "door slamming one times and duck quacking one times"} +{"filepath": "data/multi_event_train/syn_4645.wav", "onoffCaption": "door slamming at 0.033-1.324, 2.029-3.32, 4.632-5.923", "frequencyCaption": "door slamming three times"} +{"filepath": "data/multi_event_train/syn_4829.wav", "onoffCaption": "car horn honking at 0.514-3.427, 4.395-7.321, 7.878-10.0", "frequencyCaption": "car horn honking three times"} +{"filepath": "data/multi_event_train/syn_4858.wav", "onoffCaption": "train horn at 0.62-4.688, 5.25-9.318", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_4899.wav", "onoffCaption": "explosion at 2.455-7.455", "frequencyCaption": "explosion one times"} +{"filepath": "data/multi_event_train/syn_4932.wav", "onoffCaption": "burping belching at 0.383-3.927, 5.815-9.359", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_4943.wav", "onoffCaption": "explosion at 0.369-3.363, 4.428-7.086 and cow mooing at 1.762-5.06", "frequencyCaption": "explosion two times and cow mooing one times"} +{"filepath": "data/multi_event_train/syn_4967.wav", "onoffCaption": "door knocking at 1.44-5.59, 6.272-8.575 and cat meowing at 2.072-3.285, 3.8-5.013", "frequencyCaption": "door knocking two times and cat meowing two times"} +{"filepath": "data/multi_event_train/syn_4982.wav", "onoffCaption": "door knocking at 0.035-2.535, 3.47-5.97, 6.806-9.306 and door slamming at 0.149-0.83, 1.334-2.185", "frequencyCaption": "door knocking three times and door slamming two times"} +{"filepath": "data/multi_event_train/syn_2039.wav", "onoffCaption": "explosion at 3.189-7.029", "frequencyCaption": "explosion one times"} +{"filepath": "data/multi_event_train/syn_2122.wav", "onoffCaption": "spraying at 2.788-3.639, 4.382-5.233", "frequencyCaption": "spraying two times"} +{"filepath": "data/multi_event_train/syn_2177.wav", "onoffCaption": "cat meowing at 0.579-2.715 and door knocking at 1.119-4.672, 5.375-8.928", "frequencyCaption": "cat meowing one times and door knocking two times"} +{"filepath": "data/multi_event_train/syn_2192.wav", "onoffCaption": "whistling at 2.771-10.0", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_2304.wav", "onoffCaption": "cat meowing at 0.014-1.55, 2.249-3.785, 5.054-6.59 and door knocking at 0.12-2.341, 4.211-6.432", "frequencyCaption": "cat meowing three times and door knocking two times"} +{"filepath": "data/multi_event_train/syn_2351.wav", "onoffCaption": "train horn at 0.696-5.136 and door knocking at 6.603-9.978", "frequencyCaption": "train horn one times and door knocking one times"} +{"filepath": "data/multi_event_train/syn_2521.wav", "onoffCaption": "door knocking at 0.54-6.6, 7.245-9.422", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_2574.wav", "onoffCaption": "train horn at 0.214-5.072, 7.224-10.0", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_2591.wav", "onoffCaption": "car horn honking at 0.359-3.145", "frequencyCaption": "car horn honking one times"} +{"filepath": "data/multi_event_train/syn_2649.wav", "onoffCaption": "car horn honking at 0.111-4.511 and thump thud at 2.579-5.626 and door slamming at 2.596-3.596, 4.32-5.3, 6.247-7.622", "frequencyCaption": "car horn honking one times and thump thud one times and door slamming three times"} +{"filepath": "data/multi_event_train/syn_2707.wav", "onoffCaption": "whistling at 1.983-6.467 and gunshot at 3.437-5.437, 5.946-7.946", "frequencyCaption": "whistling one times and gunshot two times"} +{"filepath": "data/multi_event_train/syn_2752.wav", "onoffCaption": "burping belching at 0.679-6.28", "frequencyCaption": "burping belching one times"} +{"filepath": "data/multi_event_train/syn_2870.wav", "onoffCaption": "door knocking at 2.203-5.259, 7.501-9.947", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_2895.wav", "onoffCaption": "cow mooing at 0.668-3.65, 4.571-7.553 and dog barking at 2.804-4.804, 6.928-9.366", "frequencyCaption": "cow mooing two times and dog barking two times"} +{"filepath": "data/multi_event_train/syn_4134.wav", "onoffCaption": "tapping clicking clanking at 2.236-5.676", "frequencyCaption": "tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_4184.wav", "onoffCaption": "gunshot at 2.648-4.648 and woman laughing at 7.575-10.0", "frequencyCaption": "gunshot one times and woman laughing one times"} +{"filepath": "data/multi_event_train/syn_4209.wav", "onoffCaption": "door slamming at 2.279-3.532 and thump thud at 6.078-8.306 and door knocking at 7.164-9.352", "frequencyCaption": "door slamming one times and thump thud one times and door knocking one times"} +{"filepath": "data/multi_event_train/syn_4312.wav", "onoffCaption": "tapping clicking clanking at 0.624-4.064, 4.836-8.276", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_4347.wav", "onoffCaption": "cat meowing at 1.85-4.754, 6.696-7.708", "frequencyCaption": "cat meowing two times"} +{"filepath": "data/multi_event_train/syn_4479.wav", "onoffCaption": "duck quacking at 3.224-5.224 and whistling at 7.322-10.0", "frequencyCaption": "duck quacking one times and whistling one times"} +{"filepath": "data/multi_event_train/syn_4562.wav", "onoffCaption": "sheep goat bleating at 0.675-2.675, 5.049-7.049 and spraying at 2.84-4.968", "frequencyCaption": "sheep goat bleating two times and spraying one times"} +{"filepath": "data/multi_event_train/syn_4587.wav", "onoffCaption": "door slamming at 1.539-4.256, 5.225-7.942 and gunshot at 5.959-7.959", "frequencyCaption": "door slamming two times and gunshot one times"} +{"filepath": "data/multi_event_train/syn_4711.wav", "onoffCaption": "cow mooing at 2.135-5.145, 6.111-8.674 and explosion at 3.265-8.265", "frequencyCaption": "cow mooing two times and explosion one times"} +{"filepath": "data/multi_event_train/syn_4744.wav", "onoffCaption": "explosion at 2.169-4.43, 5.09-7.351", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_4833.wav", "onoffCaption": "duck quacking at 1.645-3.645, 5.743-7.743", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_4866.wav", "onoffCaption": "sneeze at 0.481-5.01, 6.331-8.038", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_4883.wav", "onoffCaption": "door slamming at 2.32-4.233, 5.384-7.384", "frequencyCaption": "door slamming two times"} +{"filepath": "data/multi_event_train/syn_4928.wav", "onoffCaption": "thump thud at 0.16-2.622, 3.878-6.34", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_4998.wav", "onoffCaption": "train horn at 0.32-2.787, 4.37-7.25 and sheep goat bleating at 3.461-6.781, 7.415-9.415", "frequencyCaption": "train horn two times and sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_2125.wav", "onoffCaption": "burping belching at 3.079-6.079, 7.222-9.897", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_2170.wav", "onoffCaption": "spraying at 0.023-2.607, 3.651-4.255, 4.818-6.065 and duck quacking at 0.269-2.269", "frequencyCaption": "spraying three times and duck quacking one times"} +{"filepath": "data/multi_event_train/syn_2195.wav", "onoffCaption": "burping belching at 0.087-3.346, 4.593-6.616 and explosion at 4.732-7.732", "frequencyCaption": "burping belching two times and explosion one times"} +{"filepath": "data/multi_event_train/syn_2218.wav", "onoffCaption": "tapping clicking clanking at 2.113-5.553, 6.579-9.274", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_2303.wav", "onoffCaption": "burping belching at 2.154-4.975, 5.67-8.491", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_2356.wav", "onoffCaption": "cow mooing at 2.672-5.641, 7.662-10.0", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_2526.wav", "onoffCaption": "cow mooing at 0.244-3.226, 4.079-6.625, 7.594-10.0 and car horn honking at 3.697-6.872", "frequencyCaption": "cow mooing three times and car horn honking one times"} +{"filepath": "data/multi_event_train/syn_2700.wav", "onoffCaption": "dog barking at 1.524-3.524, 4.817-6.817", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_train/syn_2755.wav", "onoffCaption": "car horn honking at 1.663-5.317, 6.053-8.202", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_2822.wav", "onoffCaption": "whistling at 2.19-5.065, 6.03-8.905", "frequencyCaption": "whistling two times"} +{"filepath": "data/multi_event_train/syn_2892.wav", "onoffCaption": "car horn honking at 0.074-3.728 and door slamming at 6.804-8.328", "frequencyCaption": "car horn honking one times and door slamming one times"} +{"filepath": "data/multi_event_train/syn_2939.wav", "onoffCaption": "cow mooing at 1.111-4.093, 4.704-6.768, 7.68-10.0", "frequencyCaption": "cow mooing three times"} +{"filepath": "data/multi_event_train/syn_2989.wav", "onoffCaption": "cat meowing at 2.02-3.575, 4.742-6.286 and door knocking at 2.836-6.452", "frequencyCaption": "cat meowing two times and door knocking one times"} +{"filepath": "data/multi_event_train/syn_4028.wav", "onoffCaption": "whistling at 2.589-7.073", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_4098.wav", "onoffCaption": "sheep goat bleating at 1.437-3.437, 3.974-6.969 and door knocking at 1.638-6.638, 7.29-10.0", "frequencyCaption": "sheep goat bleating two times and door knocking two times"} +{"filepath": "data/multi_event_train/syn_4133.wav", "onoffCaption": "explosion at 0.035-2.209, 4.651-7.242 and door slamming at 0.721-1.86, 2.689-4.454, 5.508-7.728", "frequencyCaption": "explosion two times and door slamming three times"} +{"filepath": "data/multi_event_train/syn_4161.wav", "onoffCaption": "sneeze at 2.452-4.928, 5.8-8.276", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_4166.wav", "onoffCaption": "cat meowing at 1.548-3.438, 4.507-6.397, 7.079-8.969", "frequencyCaption": "cat meowing three times"} +{"filepath": "data/multi_event_train/syn_4183.wav", "onoffCaption": "burping belching at 1.619-5.125, 6.442-9.948", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_4340.wav", "onoffCaption": "cow mooing at 0.835-3.817, 5.853-8.84", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_4530.wav", "onoffCaption": "whistling at 1.37-10.0 and gunshot at 3.876-5.876", "frequencyCaption": "whistling one times and gunshot one times"} +{"filepath": "data/multi_event_train/syn_4537.wav", "onoffCaption": "burping belching at 1.095-4.639, 5.725-8.07 and spraying at 1.935-2.802, 3.928-4.555", "frequencyCaption": "burping belching two times and spraying two times"} +{"filepath": "data/multi_event_train/syn_4565.wav", "onoffCaption": "burping belching at 1.951-4.712, 5.866-8.138", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_4580.wav", "onoffCaption": "thump thud at 0.07-2.841, 5.122-7.893 and sheep goat bleating at 1.296-3.296", "frequencyCaption": "thump thud two times and sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_4716.wav", "onoffCaption": "burping belching at 3.69-7.25", "frequencyCaption": "burping belching one times"} +{"filepath": "data/multi_event_train/syn_4834.wav", "onoffCaption": "door slamming at 2.952-5.952, 7.214-10.0", "frequencyCaption": "door slamming two times"} +{"filepath": "data/multi_event_train/syn_4861.wav", "onoffCaption": "duck quacking at 0.25-2.25 and thump thud at 0.885-3.224 and door knocking at 6.572-10.0", "frequencyCaption": "duck quacking one times and thump thud one times and door knocking one times"} +{"filepath": "data/multi_event_train/syn_4884.wav", "onoffCaption": "cat meowing at 0.347-1.357 and spraying at 0.846-2.021", "frequencyCaption": "cat meowing one times and spraying one times"} +{"filepath": "data/multi_event_train/syn_3.wav", "onoffCaption": "duck quacking at 0.203-2.203, 2.739-4.739 and train horn at 1.131-5.461, 6.713-9.193", "frequencyCaption": "duck quacking two times and train horn two times"} +{"filepath": "data/multi_event_train/syn_2000.wav", "onoffCaption": "burping belching at 0.839-3.839, 5.914-8.914", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_2024.wav", "onoffCaption": "thump thud at 0.692-5.142", "frequencyCaption": "thump thud one times"} +{"filepath": "data/multi_event_train/syn_2071.wav", "onoffCaption": "woman laughing at 2.728-5.147, 6.23-8.649", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_2094.wav", "onoffCaption": "burping belching at 0.22-2.843, 3.448-5.813, 7.816-10.0", "frequencyCaption": "burping belching three times"} +{"filepath": "data/multi_event_train/syn_2202.wav", "onoffCaption": "tapping clicking clanking at 0.947-4.387, 5.953-9.393 and woman laughing at 1.93-4.167", "frequencyCaption": "tapping clicking clanking two times and woman laughing one times"} +{"filepath": "data/multi_event_train/syn_2257.wav", "onoffCaption": "door knocking at 0.06-3.676 and sneeze at 0.255-3.365 and door slamming at 2.1-3.119", "frequencyCaption": "door knocking one times and sneeze one times and door slamming one times"} +{"filepath": "data/multi_event_train/syn_2296.wav", "onoffCaption": "gunshot at 0.013-2.013, 3.206-5.206, 6.18-8.18", "frequencyCaption": "gunshot three times"} +{"filepath": "data/multi_event_train/syn_2319.wav", "onoffCaption": "thump thud at 0.838-3.3 and woman laughing at 2.775-5.57, 6.239-9.034", "frequencyCaption": "thump thud one times and woman laughing two times"} +{"filepath": "data/multi_event_train/syn_2427.wav", "onoffCaption": "sneeze at 2.616-3.79, 4.47-6.177 and cat meowing at 7.473-9.017", "frequencyCaption": "sneeze two times and cat meowing one times"} +{"filepath": "data/multi_event_train/syn_2456.wav", "onoffCaption": "door slamming at 0.534-3.017, 4.185-6.668, 7.849-10.0", "frequencyCaption": "door slamming three times"} +{"filepath": "data/multi_event_train/syn_2468.wav", "onoffCaption": "dog barking at 0.272-2.272, 4.17-6.17, 7.135-9.135", "frequencyCaption": "dog barking three times"} +{"filepath": "data/multi_event_train/syn_2472.wav", "onoffCaption": "woman laughing at 2.006-8.74", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/multi_event_train/syn_2497.wav", "onoffCaption": "sneeze at 1.87-4.109, 4.688-6.927", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_2569.wav", "onoffCaption": "car horn honking at 0.611-5.518, 7.265-10.0 and sheep goat bleating at 0.81-2.81, 4.698-6.698", "frequencyCaption": "car horn honking two times and sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_2573.wav", "onoffCaption": "dog barking at 0.815-2.815, 3.608-5.608 and explosion at 6.902-9.493", "frequencyCaption": "dog barking two times and explosion one times"} +{"filepath": "data/multi_event_train/syn_2596.wav", "onoffCaption": "duck quacking at 0.302-2.302 and door slamming at 6.473-9.254", "frequencyCaption": "duck quacking one times and door slamming one times"} +{"filepath": "data/multi_event_train/syn_2601.wav", "onoffCaption": "burping belching at 2.939-5.037 and spraying at 4.187-4.837, 5.94-7.949, 9.176-9.78", "frequencyCaption": "burping belching one times and spraying three times"} +{"filepath": "data/multi_event_train/syn_2654.wav", "onoffCaption": "cat meowing at 0.362-1.778, 2.393-3.809, 4.914-6.33 and car horn honking at 3.445-6.231", "frequencyCaption": "cat meowing three times and car horn honking one times"} +{"filepath": "data/multi_event_train/syn_2838.wav", "onoffCaption": "door slamming at 1.159-3.522 and dog barking at 2.177-4.177, 4.702-6.702, 7.836-9.836", "frequencyCaption": "door slamming one times and dog barking three times"} +{"filepath": "data/multi_event_train/syn_2849.wav", "onoffCaption": "door knocking at 2.197-4.965, 5.543-8.39", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_2877.wav", "onoffCaption": "woman laughing at 1.891-4.457, 5.083-7.451 and dog barking at 4.627-7.065, 7.622-10.0", "frequencyCaption": "woman laughing two times and dog barking two times"} +{"filepath": "data/multi_event_train/syn_2888.wav", "onoffCaption": "gunshot at 3.402-5.402, 6.683-8.683", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_2923.wav", "onoffCaption": "spraying at 2.881-3.785", "frequencyCaption": "spraying one times"} +{"filepath": "data/multi_event_train/syn_2952.wav", "onoffCaption": "duck quacking at 1.431-3.431, 5.499-7.499", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_2976.wav", "onoffCaption": "dog barking at 0.106-2.106, 3.066-5.066, 7.405-9.405", "frequencyCaption": "dog barking three times"} +{"filepath": "data/multi_event_train/syn_2993.wav", "onoffCaption": "spraying at 0.875-2.637, 3.389-4.389, 5.063-6.004 and door slamming at 2.355-4.268", "frequencyCaption": "spraying three times and door slamming one times"} +{"filepath": "data/multi_event_train/syn_4032.wav", "onoffCaption": "car horn honking at 0.015-2.515, 3.464-5.964", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_4067.wav", "onoffCaption": "cow mooing at 1.537-5.966 and burping belching at 3.187-5.81", "frequencyCaption": "cow mooing one times and burping belching one times"} +{"filepath": "data/multi_event_train/syn_4082.wav", "onoffCaption": "door knocking at 0.068-4.443 and duck quacking at 0.264-2.264 and dog barking at 2.234-4.234, 4.989-6.989", "frequencyCaption": "door knocking one times and duck quacking one times and dog barking two times"} +{"filepath": "data/multi_event_train/syn_4129.wav", "onoffCaption": "tapping clicking clanking at 1.318-4.758 and sheep goat bleating at 3.041-5.041", "frequencyCaption": "tapping clicking clanking one times and sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_4199.wav", "onoffCaption": "dog barking at 0.158-2.158, 3.19-5.19, 7.467-9.467", "frequencyCaption": "dog barking three times"} +{"filepath": "data/multi_event_train/syn_4214.wav", "onoffCaption": "train horn at 1.922-5.456", "frequencyCaption": "train horn one times"} +{"filepath": "data/multi_event_train/syn_4230.wav", "onoffCaption": "train horn at 0.323-3.563, 5.508-8.471", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_4241.wav", "onoffCaption": "whistling at 0.723-3.698 and tapping clicking clanking at 5.638-9.078", "frequencyCaption": "whistling one times and tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_4315.wav", "onoffCaption": "burping belching at 1.063-7.743 and cat meowing at 6.9-7.921, 8.476-9.52", "frequencyCaption": "burping belching one times and cat meowing two times"} +{"filepath": "data/multi_event_train/syn_4415.wav", "onoffCaption": "car horn honking at 0.981-5.381, 6.324-10.0", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_4431.wav", "onoffCaption": "woman laughing at 0.175-6.909, 7.845-9.926", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_4464.wav", "onoffCaption": "door slamming at 2.516-3.494, 4.587-5.527, 6.595-7.97", "frequencyCaption": "door slamming three times"} +{"filepath": "data/multi_event_train/syn_4481.wav", "onoffCaption": "door knocking at 2.762-5.492, 6.815-8.975", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_4617.wav", "onoffCaption": "gunshot at 1.616-3.616, 4.192-6.192, 7.032-9.032", "frequencyCaption": "gunshot three times"} +{"filepath": "data/multi_event_train/syn_4642.wav", "onoffCaption": "explosion at 3.239-8.105", "frequencyCaption": "explosion one times"} +{"filepath": "data/multi_event_train/syn_4658.wav", "onoffCaption": "train horn at 1.537-10.0", "frequencyCaption": "train horn one times"} +{"filepath": "data/multi_event_train/syn_4666.wav", "onoffCaption": "cow mooing at 1.708-5.006, 6.168-9.466", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_4683.wav", "onoffCaption": "tapping clicking clanking at 3.129-6.569", "frequencyCaption": "tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_4743.wav", "onoffCaption": "gunshot at 2.914-4.914, 7.35-9.35", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_4759.wav", "onoffCaption": "sneeze at 3.36-5.473, 6.677-8.79", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_4798.wav", "onoffCaption": "tapping clicking clanking at 0.768-4.208, 5.52-8.96", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_4935.wav", "onoffCaption": "cow mooing at 1.516-4.814, 7.173-10.0", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_4960.wav", "onoffCaption": "spraying at 1.552-2.799, 4.582-7.042 and train horn at 2.416-5.896", "frequencyCaption": "spraying two times and train horn one times"} +{"filepath": "data/multi_event_train/syn_4985.wav", "onoffCaption": "train horn at 0.388-4.388 and sneeze at 1.181-2.458, 4.879-6.113", "frequencyCaption": "train horn one times and sneeze two times"} +{"filepath": "data/multi_event_train/syn_2055.wav", "onoffCaption": "door slamming at 0.237-1.215, 1.804-3.345, 4.557-7.436 and duck quacking at 0.321-2.321", "frequencyCaption": "door slamming three times and duck quacking one times"} +{"filepath": "data/multi_event_train/syn_2101.wav", "onoffCaption": "gunshot at 1.697-3.827", "frequencyCaption": "gunshot one times"} +{"filepath": "data/multi_event_train/syn_2154.wav", "onoffCaption": "dog barking at 0.723-2.723, 3.316-5.316, 6.483-8.483", "frequencyCaption": "dog barking three times"} +{"filepath": "data/multi_event_train/syn_2226.wav", "onoffCaption": "door slamming at 0.091-2.091, 2.603-3.454, 4.045-4.883", "frequencyCaption": "door slamming three times"} +{"filepath": "data/multi_event_train/syn_2269.wav", "onoffCaption": "door slamming at 1.059-2.972 and gunshot at 2.674-4.674, 5.625-7.625", "frequencyCaption": "door slamming one times and gunshot two times"} +{"filepath": "data/multi_event_train/syn_2273.wav", "onoffCaption": "woman laughing at 1.117-3.354, 3.88-6.117", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_2327.wav", "onoffCaption": "door slamming at 1.389-4.363, 6.253-9.227", "frequencyCaption": "door slamming two times"} +{"filepath": "data/multi_event_train/syn_2368.wav", "onoffCaption": "car horn honking at 2.647-7.159", "frequencyCaption": "car horn honking one times"} +{"filepath": "data/multi_event_train/syn_2372.wav", "onoffCaption": "whistling at 0.111-2.986", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_2397.wav", "onoffCaption": "tapping clicking clanking at 0.796-4.236", "frequencyCaption": "tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_2403.wav", "onoffCaption": "explosion at 2.342-5.469 and gunshot at 7.933-10.0", "frequencyCaption": "explosion one times and gunshot one times"} +{"filepath": "data/multi_event_train/syn_2419.wav", "onoffCaption": "gunshot at 0.477-2.477, 3.149-5.149 and door knocking at 1.493-4.613", "frequencyCaption": "gunshot two times and door knocking one times"} +{"filepath": "data/multi_event_train/syn_2502.wav", "onoffCaption": "gunshot at 0.827-2.827, 4.117-6.117", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_2518.wav", "onoffCaption": "cow mooing at 0.012-2.981", "frequencyCaption": "cow mooing one times"} +{"filepath": "data/multi_event_train/syn_2557.wav", "onoffCaption": "sneeze at 3.357-5.818, 7.048-8.576", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_2625.wav", "onoffCaption": "thump thud at 1.569-6.019, 6.528-10.0", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_2670.wav", "onoffCaption": "cow mooing at 1.159-4.128, 5.227-8.196 and explosion at 2.413-7.413", "frequencyCaption": "cow mooing two times and explosion one times"} +{"filepath": "data/multi_event_train/syn_2695.wav", "onoffCaption": "train horn at 0.6-4.781 and spraying at 0.721-1.453", "frequencyCaption": "train horn one times and spraying one times"} +{"filepath": "data/multi_event_train/syn_2724.wav", "onoffCaption": "woman laughing at 0.35-7.362 and tapping clicking clanking at 3.906-7.346", "frequencyCaption": "woman laughing one times and tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_2771.wav", "onoffCaption": "door slamming at 0.63-3.604, 4.655-7.629 and cat meowing at 2.295-4.235, 5.62-6.629 and duck quacking at 6.304-8.304", "frequencyCaption": "door slamming two times and cat meowing two times and duck quacking one times"} +{"filepath": "data/multi_event_train/syn_2794.wav", "onoffCaption": "gunshot at 1.926-4.199, 5.42-7.42", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_2806.wav", "onoffCaption": "burping belching at 0.386-2.589, 3.628-5.831, 6.503-8.706 and door knocking at 2.198-7.031", "frequencyCaption": "burping belching three times and door knocking one times"} +{"filepath": "data/multi_event_train/syn_2853.wav", "onoffCaption": "thump thud at 1.981-4.443, 6.001-8.463", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_2907.wav", "onoffCaption": "cow mooing at 0.749-3.759 and sheep goat bleating at 2.754-7.474 and dog barking at 3.083-5.083, 6.184-8.184", "frequencyCaption": "cow mooing one times and sheep goat bleating one times and dog barking two times"} +{"filepath": "data/multi_event_train/syn_2948.wav", "onoffCaption": "cat meowing at 0.559-2.291 and car horn honking at 6.429-10.0", "frequencyCaption": "cat meowing one times and car horn honking one times"} +{"filepath": "data/multi_event_train/syn_4016.wav", "onoffCaption": "sneeze at 2.058-3.289, 5.747-6.978 and door knocking at 4.891-7.194", "frequencyCaption": "sneeze two times and door knocking one times"} +{"filepath": "data/multi_event_train/syn_4043.wav", "onoffCaption": "door knocking at 0.075-4.225 and train horn at 6.32-10.0", "frequencyCaption": "door knocking one times and train horn one times"} +{"filepath": "data/multi_event_train/syn_4059.wav", "onoffCaption": "gunshot at 0.745-2.745, 4.879-6.879", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_4117.wav", "onoffCaption": "cat meowing at 0.091-1.112, 2.04-4.57, 5.573-7.533", "frequencyCaption": "cat meowing three times"} +{"filepath": "data/multi_event_train/syn_4142.wav", "onoffCaption": "dog barking at 0.414-2.414, 3.557-5.557, 6.334-8.334 and train horn at 1.087-3.242", "frequencyCaption": "dog barking three times and train horn one times"} +{"filepath": "data/multi_event_train/syn_4158.wav", "onoffCaption": "burping belching at 0.49-4.49, 5.449-9.449", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_4265.wav", "onoffCaption": "dog barking at 0.175-5.812, 6.78-10.0", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_train/syn_4280.wav", "onoffCaption": "dog barking at 2.669-4.669 and sheep goat bleating at 7.512-9.512", "frequencyCaption": "dog barking one times and sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_4331.wav", "onoffCaption": "cow mooing at 1.208-6.188 and duck quacking at 4.284-6.284", "frequencyCaption": "cow mooing one times and duck quacking one times"} +{"filepath": "data/multi_event_train/syn_4364.wav", "onoffCaption": "burping belching at 0.19-3.19, 5.308-8.308 and dog barking at 0.267-2.267 and gunshot at 1.186-3.186, 5.249-7.249", "frequencyCaption": "burping belching two times and dog barking one times and gunshot two times"} +{"filepath": "data/multi_event_train/syn_4381.wav", "onoffCaption": "spraying at 0.23-1.925 and car horn honking at 4.134-8.646", "frequencyCaption": "spraying one times and car horn honking one times"} +{"filepath": "data/multi_event_train/syn_4440.wav", "onoffCaption": "cow mooing at 0.007-2.976, 5.371-7.46", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_4514.wav", "onoffCaption": "tapping clicking clanking at 1.267-4.707, 5.587-9.027", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_4541.wav", "onoffCaption": "dog barking at 2.832-4.832, 5.772-7.772", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_train/syn_4629.wav", "onoffCaption": "duck quacking at 0.526-2.526, 3.179-5.179", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_4633.wav", "onoffCaption": "duck quacking at 0.812-2.812, 3.631-5.631, 6.45-8.45 and cat meowing at 2.522-4.077 and burping belching at 2.965-6.965", "frequencyCaption": "duck quacking three times and cat meowing one times and burping belching one times"} +{"filepath": "data/multi_event_train/syn_4699.wav", "onoffCaption": "door knocking at 3.099-5.121, 5.729-7.751", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_4728.wav", "onoffCaption": "explosion at 0.713-5.634, 6.498-9.498", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_4732.wav", "onoffCaption": "tapping clicking clanking at 2.353-5.793, 6.59-9.076", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_4767.wav", "onoffCaption": "dog barking at 2.492-4.492, 6.811-8.811", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_train/syn_4782.wav", "onoffCaption": "whistling at 3.517-10.0", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_4810.wav", "onoffCaption": "cat meowing at 2.171-3.171", "frequencyCaption": "cat meowing one times"} +{"filepath": "data/multi_event_train/syn_4845.wav", "onoffCaption": "door knocking at 2.751-5.321, 6.377-8.947", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_4911.wav", "onoffCaption": "thump thud at 1.568-3.907, 4.45-6.789, 7.746-10.0", "frequencyCaption": "thump thud three times"} +{"filepath": "data/multi_event_train/syn_4944.wav", "onoffCaption": "woman laughing at 0.34-2.54, 4.896-7.096", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_2115.wav", "onoffCaption": "sheep goat bleating at 0.902-2.902, 4.596-6.596 and cow mooing at 1.699-4.709, 5.792-8.247", "frequencyCaption": "sheep goat bleating two times and cow mooing two times"} +{"filepath": "data/multi_event_train/syn_2140.wav", "onoffCaption": "burping belching at 0.728-3.728, 4.523-7.523 and car horn honking at 2.581-7.488", "frequencyCaption": "burping belching two times and car horn honking one times"} +{"filepath": "data/multi_event_train/syn_2228.wav", "onoffCaption": "whistling at 1.17-6.67", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_2298.wav", "onoffCaption": "car horn honking at 0.039-2.857, 3.365-6.183, 6.974-9.792", "frequencyCaption": "car horn honking three times"} +{"filepath": "data/multi_event_train/syn_2333.wav", "onoffCaption": "gunshot at 3.701-5.701, 6.615-8.615", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_2366.wav", "onoffCaption": "woman laughing at 2.103-4.328, 4.915-7.115", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_2383.wav", "onoffCaption": "door slamming at 3.631-6.06, 6.913-7.764", "frequencyCaption": "door slamming two times"} +{"filepath": "data/multi_event_train/syn_2458.wav", "onoffCaption": "duck quacking at 1.91-3.91, 5.691-7.691", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_2516.wav", "onoffCaption": "tapping clicking clanking at 2.089-5.529 and train horn at 2.833-7.014", "frequencyCaption": "tapping clicking clanking one times and train horn one times"} +{"filepath": "data/multi_event_train/syn_2543.wav", "onoffCaption": "cow mooing at 3.059-6.028, 7.252-10.0", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_2730.wav", "onoffCaption": "dog barking at 0.421-2.421, 4.285-6.285", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_train/syn_2765.wav", "onoffCaption": "whistling at 0.918-3.793, 4.747-7.622", "frequencyCaption": "whistling two times"} +{"filepath": "data/multi_event_train/syn_2780.wav", "onoffCaption": "sneeze at 3.002-4.166, 4.977-7.343", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_2812.wav", "onoffCaption": "train horn at 1.579-5.579 and sheep goat bleating at 3.344-5.344", "frequencyCaption": "train horn one times and sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_2847.wav", "onoffCaption": "dog barking at 0.439-2.439 and tapping clicking clanking at 5.168-8.608", "frequencyCaption": "dog barking one times and tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_2909.wav", "onoffCaption": "duck quacking at 0.518-2.518, 4.844-6.844", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_4018.wav", "onoffCaption": "spraying at 0.372-0.976, 2.675-3.302, 5.276-5.851 and car horn honking at 3.509-7.831", "frequencyCaption": "spraying three times and car horn honking one times"} +{"filepath": "data/multi_event_train/syn_4103.wav", "onoffCaption": "cat meowing at 2.286-5.316, 6.353-8.313 and woman laughing at 5.253-7.453 and sneeze at 5.34-6.666", "frequencyCaption": "cat meowing two times and woman laughing one times and sneeze one times"} +{"filepath": "data/multi_event_train/syn_4156.wav", "onoffCaption": "explosion at 0.661-3.661", "frequencyCaption": "explosion one times"} +{"filepath": "data/multi_event_train/syn_4325.wav", "onoffCaption": "gunshot at 0.211-2.211", "frequencyCaption": "gunshot one times"} +{"filepath": "data/multi_event_train/syn_4370.wav", "onoffCaption": "gunshot at 1.844-3.844, 5.097-7.097, 7.762-9.762", "frequencyCaption": "gunshot three times"} +{"filepath": "data/multi_event_train/syn_4395.wav", "onoffCaption": "dog barking at 0.207-2.607, 4.311-6.311", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_train/syn_4500.wav", "onoffCaption": "explosion at 0.364-5.23, 7.676-10.0 and burping belching at 1.243-5.579", "frequencyCaption": "explosion two times and burping belching one times"} +{"filepath": "data/multi_event_train/syn_4555.wav", "onoffCaption": "door knocking at 0.82-5.353, 6.393-8.795", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_4668.wav", "onoffCaption": "duck quacking at 1.603-3.603, 5.311-7.311", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_4726.wav", "onoffCaption": "thump thud at 2.21-5.877", "frequencyCaption": "thump thud one times"} +{"filepath": "data/multi_event_train/syn_4773.wav", "onoffCaption": "sheep goat bleating at 1.094-3.094, 4.244-6.244, 7.536-9.536", "frequencyCaption": "sheep goat bleating three times"} +{"filepath": "data/multi_event_train/syn_4796.wav", "onoffCaption": "sneeze at 0.242-1.568, 2.708-4.034, 4.92-6.246", "frequencyCaption": "sneeze three times"} +{"filepath": "data/multi_event_train/syn_4804.wav", "onoffCaption": "duck quacking at 0.366-2.366, 3.404-5.404 and woman laughing at 7.732-10.0", "frequencyCaption": "duck quacking two times and woman laughing one times"} +{"filepath": "data/multi_event_train/syn_4851.wav", "onoffCaption": "door slamming at 0.634-1.937, 3.148-4.451, 6.589-7.892", "frequencyCaption": "door slamming three times"} +{"filepath": "data/multi_event_train/syn_4905.wav", "onoffCaption": "burping belching at 1.445-7.389 and cow mooing at 4.409-7.707", "frequencyCaption": "burping belching one times and cow mooing one times"} +{"filepath": "data/multi_event_train/syn_2014.wav", "onoffCaption": "duck quacking at 0.1-2.1, 2.681-4.681, 5.3-7.3 and woman laughing at 0.878-3.103, 5.052-7.548", "frequencyCaption": "duck quacking three times and woman laughing two times"} +{"filepath": "data/multi_event_train/syn_2041.wav", "onoffCaption": "gunshot at 0.756-2.756, 4.93-6.93", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_2232.wav", "onoffCaption": "cow mooing at 0.046-3.015 and sneeze at 0.117-1.704", "frequencyCaption": "cow mooing one times and sneeze one times"} +{"filepath": "data/multi_event_train/syn_2267.wav", "onoffCaption": "thump thud at 0.246-4.621 and sheep goat bleating at 1.704-3.704, 5.086-8.086 and dog barking at 4.822-6.822", "frequencyCaption": "thump thud one times and sheep goat bleating two times and dog barking one times"} +{"filepath": "data/multi_event_train/syn_2282.wav", "onoffCaption": "spraying at 0.196-0.797", "frequencyCaption": "spraying one times"} +{"filepath": "data/multi_event_train/syn_2329.wav", "onoffCaption": "sneeze at 0.061-2.378 and gunshot at 0.877-2.877 and dog barking at 1.829-3.829", "frequencyCaption": "sneeze one times and gunshot one times and dog barking one times"} +{"filepath": "data/multi_event_train/syn_2399.wav", "onoffCaption": "cow mooing at 0.806-5.235", "frequencyCaption": "cow mooing one times"} +{"filepath": "data/multi_event_train/syn_2417.wav", "onoffCaption": "gunshot at 2.087-4.087, 4.956-6.956, 7.941-9.941", "frequencyCaption": "gunshot three times"} +{"filepath": "data/multi_event_train/syn_2442.wav", "onoffCaption": "door knocking at 3.887-6.289, 7.303-9.705", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_2559.wav", "onoffCaption": "car horn honking at 1.034-4.529 and dog barking at 6.57-8.57", "frequencyCaption": "car horn honking one times and dog barking one times"} +{"filepath": "data/multi_event_train/syn_2631.wav", "onoffCaption": "burping belching at 0.394-3.596 and door slamming at 1.021-3.021 and thump thud at 5.105-9.555", "frequencyCaption": "burping belching one times and door slamming one times and thump thud one times"} +{"filepath": "data/multi_event_train/syn_2664.wav", "onoffCaption": "car horn honking at 0.045-2.831 and thump thud at 6.644-9.415", "frequencyCaption": "car horn honking one times and thump thud one times"} +{"filepath": "data/multi_event_train/syn_2681.wav", "onoffCaption": "woman laughing at 0.0-3.085 and door knocking at 5.304-8.804", "frequencyCaption": "woman laughing one times and door knocking one times"} +{"filepath": "data/multi_event_train/syn_2808.wav", "onoffCaption": "cat meowing at 0.026-1.026, 1.84-2.84 and woman laughing at 4.173-6.456", "frequencyCaption": "cat meowing two times and woman laughing one times"} +{"filepath": "data/multi_event_train/syn_2913.wav", "onoffCaption": "woman laughing at 3.473-6.268", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/multi_event_train/syn_2946.wav", "onoffCaption": "burping belching at 2.011-7.612", "frequencyCaption": "burping belching one times"} +{"filepath": "data/multi_event_train/syn_4002.wav", "onoffCaption": "gunshot at 0.13-2.13 and woman laughing at 1.245-3.445, 5.933-8.133", "frequencyCaption": "gunshot one times and woman laughing two times"} +{"filepath": "data/multi_event_train/syn_4057.wav", "onoffCaption": "door knocking at 1.551-3.854, 6.338-8.641", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_4119.wav", "onoffCaption": "gunshot at 4.016-6.016", "frequencyCaption": "gunshot one times"} +{"filepath": "data/multi_event_train/syn_4224.wav", "onoffCaption": "duck quacking at 0.733-2.733, 3.793-5.793, 6.393-8.393 and sheep goat bleating at 2.124-5.204 and cat meowing at 6.595-8.543", "frequencyCaption": "duck quacking three times and sheep goat bleating one times and cat meowing one times"} +{"filepath": "data/multi_event_train/syn_4271.wav", "onoffCaption": "cat meowing at 2.651-3.736, 4.47-6.63, 7.481-8.502", "frequencyCaption": "cat meowing three times"} +{"filepath": "data/multi_event_train/syn_4294.wav", "onoffCaption": "whistling at 1.904-4.779, 6.225-8.454", "frequencyCaption": "whistling two times"} +{"filepath": "data/multi_event_train/syn_4401.wav", "onoffCaption": "cow mooing at 0.424-3.406, 5.35-7.852 and car horn honking at 0.85-5.099, 5.722-8.254", "frequencyCaption": "cow mooing two times and car horn honking two times"} +{"filepath": "data/multi_event_train/syn_4454.wav", "onoffCaption": "door knocking at 2.973-5.436 and spraying at 7.871-8.621", "frequencyCaption": "door knocking one times and spraying one times"} +{"filepath": "data/multi_event_train/syn_4627.wav", "onoffCaption": "cat meowing at 0.803-1.814, 2.735-4.045 and woman laughing at 3.119-6.406", "frequencyCaption": "cat meowing two times and woman laughing one times"} +{"filepath": "data/multi_event_train/syn_4672.wav", "onoffCaption": "car horn honking at 0.057-3.552, 4.582-7.095", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_4697.wav", "onoffCaption": "gunshot at 0.129-2.129, 2.887-4.887, 5.731-8.232 and door knocking at 5.47-8.2", "frequencyCaption": "gunshot three times and door knocking one times"} +{"filepath": "data/multi_event_train/syn_4769.wav", "onoffCaption": "woman laughing at 2.871-5.29 and duck quacking at 3.644-5.644, 7.393-9.393", "frequencyCaption": "woman laughing one times and duck quacking two times"} +{"filepath": "data/multi_event_train/syn_4921.wav", "onoffCaption": "dog barking at 0.349-2.787, 4.269-6.269", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_train/syn_4950.wav", "onoffCaption": "cat meowing at 0.651-2.268, 3.577-4.588, 5.723-7.267 and duck quacking at 3.073-5.073", "frequencyCaption": "cat meowing three times and duck quacking one times"} +{"filepath": "data/multi_event_train/syn_2030.wav", "onoffCaption": "thump thud at 3.336-7.786", "frequencyCaption": "thump thud one times"} +{"filepath": "data/multi_event_train/syn_2065.wav", "onoffCaption": "sneeze at 0.087-1.375, 2.994-5.307, 6.757-8.702", "frequencyCaption": "sneeze three times"} +{"filepath": "data/multi_event_train/syn_2080.wav", "onoffCaption": "dog barking at 2.86-4.86, 6.259-8.259", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_train/syn_2164.wav", "onoffCaption": "duck quacking at 1.029-3.029, 4.586-6.586, 7.745-9.745", "frequencyCaption": "duck quacking three times"} +{"filepath": "data/multi_event_train/syn_2216.wav", "onoffCaption": "car horn honking at 1.183-4.77 and cat meowing at 3.31-4.927, 6.66-8.277", "frequencyCaption": "car horn honking one times and cat meowing two times"} +{"filepath": "data/multi_event_train/syn_2243.wav", "onoffCaption": "sneeze at 0.224-1.327 and duck quacking at 0.322-2.322", "frequencyCaption": "sneeze one times and duck quacking one times"} +{"filepath": "data/multi_event_train/syn_2358.wav", "onoffCaption": "burping belching at 2.599-5.801, 7.699-10.0", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_2433.wav", "onoffCaption": "gunshot at 0.442-2.442", "frequencyCaption": "gunshot one times"} +{"filepath": "data/multi_event_train/syn_2466.wav", "onoffCaption": "whistling at 1.816-6.3", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_2483.wav", "onoffCaption": "dog barking at 1.83-3.83, 5.526-7.526", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_train/syn_2528.wav", "onoffCaption": "explosion at 1.045-3.798, 4.36-6.68", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_2598.wav", "onoffCaption": "burping belching at 2.37-6.37, 7.357-10.0", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_2615.wav", "onoffCaption": "duck quacking at 3.183-5.183", "frequencyCaption": "duck quacking one times"} +{"filepath": "data/multi_event_train/syn_2640.wav", "onoffCaption": "dog barking at 0.901-2.901, 5.102-7.102", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_train/syn_2879.wav", "onoffCaption": "spraying at 0.77-3.354, 5.572-6.353 and woman laughing at 3.378-6.473, 7.664-10.0 and sheep goat bleating at 4.777-6.777", "frequencyCaption": "spraying two times and woman laughing two times and sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_2937.wav", "onoffCaption": "door knocking at 0.913-3.98, 5.526-7.766", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_2962.wav", "onoffCaption": "burping belching at 0.503-3.503, 5.097-8.097 and door knocking at 7.1-9.502", "frequencyCaption": "burping belching two times and door knocking one times"} +{"filepath": "data/multi_event_train/syn_2987.wav", "onoffCaption": "dog barking at 0.633-2.633, 4.02-6.02, 7.631-9.742", "frequencyCaption": "dog barking three times"} +{"filepath": "data/multi_event_train/syn_4026.wav", "onoffCaption": "door slamming at 0.769-1.269 and dog barking at 4.994-6.994", "frequencyCaption": "door slamming one times and dog barking one times"} +{"filepath": "data/multi_event_train/syn_4073.wav", "onoffCaption": "duck quacking at 0.686-2.686, 4.676-6.676", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_4096.wav", "onoffCaption": "tapping clicking clanking at 0.677-4.117", "frequencyCaption": "tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_4168.wav", "onoffCaption": "door knocking at 2.99-5.342 and tapping clicking clanking at 7.271-10.0", "frequencyCaption": "door knocking one times and tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_4200.wav", "onoffCaption": "whistling at 2.405-5.38 and car horn honking at 3.25-7.65", "frequencyCaption": "whistling one times and car horn honking one times"} +{"filepath": "data/multi_event_train/syn_4255.wav", "onoffCaption": "door slamming at 2.703-3.82, 4.869-6.16, 6.968-8.509", "frequencyCaption": "door slamming three times"} +{"filepath": "data/multi_event_train/syn_4425.wav", "onoffCaption": "whistling at 1.576-7.683", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_4470.wav", "onoffCaption": "door slamming at 0.129-0.81, 1.333-3.559, 4.597-5.097 and door knocking at 7.313-10.0", "frequencyCaption": "door slamming three times and door knocking one times"} +{"filepath": "data/multi_event_train/syn_4495.wav", "onoffCaption": "sneeze at 3.101-5.186, 7.527-9.234", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_4603.wav", "onoffCaption": "cow mooing at 1.547-4.557, 5.816-8.826 and cat meowing at 2.421-4.415", "frequencyCaption": "cow mooing two times and cat meowing one times"} +{"filepath": "data/multi_event_train/syn_4656.wav", "onoffCaption": "burping belching at 1.681-4.681, 5.243-8.243", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_4718.wav", "onoffCaption": "train horn at 2.361-4.801 and sheep goat bleating at 7.701-9.701", "frequencyCaption": "train horn one times and sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_4974.wav", "onoffCaption": "cow mooing at 0.276-3.574 and burping belching at 0.774-3.774 and thump thud at 1.698-4.16", "frequencyCaption": "cow mooing one times and burping belching one times and thump thud one times"} +{"filepath": "data/multi_event_train/syn_4991.wav", "onoffCaption": "explosion at 3.281-8.281", "frequencyCaption": "explosion one times"} +{"filepath": "data/multi_event_train/syn_2131.wav", "onoffCaption": "spraying at 2.591-3.113 and duck quacking at 4.191-6.191 and cow mooing at 4.489-7.499", "frequencyCaption": "spraying one times and duck quacking one times and cow mooing one times"} +{"filepath": "data/multi_event_train/syn_2181.wav", "onoffCaption": "door knocking at 2.812-7.345, 7.981-10.0", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_2259.wav", "onoffCaption": "dog barking at 0.445-2.445, 3.999-5.999 and gunshot at 4.324-6.324", "frequencyCaption": "dog barking two times and gunshot one times"} +{"filepath": "data/multi_event_train/syn_2317.wav", "onoffCaption": "tapping clicking clanking at 0.145-3.585", "frequencyCaption": "tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_2342.wav", "onoffCaption": "burping belching at 1.788-6.124, 7.024-10.0", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_2429.wav", "onoffCaption": "duck quacking at 2.986-4.986 and door slamming at 6.513-9.23", "frequencyCaption": "duck quacking one times and door slamming one times"} +{"filepath": "data/multi_event_train/syn_2499.wav", "onoffCaption": "sheep goat bleating at 0.024-3.32 and dog barking at 1.92-3.92", "frequencyCaption": "sheep goat bleating one times and dog barking one times"} +{"filepath": "data/multi_event_train/syn_2532.wav", "onoffCaption": "explosion at 2.04-4.631, 7.094-10.0", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_2567.wav", "onoffCaption": "door slamming at 0.156-2.074", "frequencyCaption": "door slamming one times"} +{"filepath": "data/multi_event_train/syn_2582.wav", "onoffCaption": "car horn honking at 3.286-7.535", "frequencyCaption": "car horn honking one times"} +{"filepath": "data/multi_event_train/syn_2714.wav", "onoffCaption": "sneeze at 2.176-4.121, 6.135-8.08", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_2741.wav", "onoffCaption": "sneeze at 0.258-1.422, 3.657-4.821 and woman laughing at 7.507-10.0", "frequencyCaption": "sneeze two times and woman laughing one times"} +{"filepath": "data/multi_event_train/syn_2836.wav", "onoffCaption": "thump thud at 2.554-5.054", "frequencyCaption": "thump thud one times"} +{"filepath": "data/multi_event_train/syn_2863.wav", "onoffCaption": "duck quacking at 0.365-2.365 and car horn honking at 4.688-8.183 and cat meowing at 5.873-7.148", "frequencyCaption": "duck quacking one times and car horn honking one times and cat meowing one times"} +{"filepath": "data/multi_event_train/syn_2886.wav", "onoffCaption": "burping belching at 0.982-4.542", "frequencyCaption": "burping belching one times"} +{"filepath": "data/multi_event_train/syn_2978.wav", "onoffCaption": "cat meowing at 0.357-1.901, 4.148-5.692, 6.877-8.421", "frequencyCaption": "cat meowing three times"} +{"filepath": "data/multi_event_train/syn_4069.wav", "onoffCaption": "train horn at 0.267-3.747, 4.969-7.643", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_4127.wav", "onoffCaption": "explosion at 1.4-3.407, 5.53-7.544", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_4172.wav", "onoffCaption": "thump thud at 3.642-6.104", "frequencyCaption": "thump thud one times"} +{"filepath": "data/multi_event_train/syn_4197.wav", "onoffCaption": "car horn honking at 0.548-2.895, 4.036-6.962 and duck quacking at 2.823-4.823, 5.992-7.992", "frequencyCaption": "car horn honking two times and duck quacking two times"} +{"filepath": "data/multi_event_train/syn_4301.wav", "onoffCaption": "door knocking at 0.963-3.315, 4.703-7.055", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_4354.wav", "onoffCaption": "car horn honking at 2.552-5.465", "frequencyCaption": "car horn honking one times"} +{"filepath": "data/multi_event_train/syn_4488.wav", "onoffCaption": "gunshot at 0.894-3.064, 4.684-6.684 and spraying at 1.03-1.762, 3.015-3.747", "frequencyCaption": "gunshot two times and spraying two times"} +{"filepath": "data/multi_event_train/syn_4524.wav", "onoffCaption": "explosion at 1.076-3.083, 5.017-7.024", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_4571.wav", "onoffCaption": "thump thud at 0.313-2.541 and door slamming at 4.244-5.082, 5.615-6.453, 7.056-7.894", "frequencyCaption": "thump thud one times and door slamming three times"} +{"filepath": "data/multi_event_train/syn_4594.wav", "onoffCaption": "woman laughing at 3.108-5.713, 6.503-8.987 and door slamming at 5.806-7.197", "frequencyCaption": "woman laughing two times and door slamming one times"} +{"filepath": "data/multi_event_train/syn_4619.wav", "onoffCaption": "door knocking at 0.244-2.814, 4.673-7.012 and gunshot at 0.491-2.491 and sneeze at 3.661-4.895", "frequencyCaption": "door knocking two times and gunshot one times and sneeze one times"} +{"filepath": "data/multi_event_train/syn_4702.wav", "onoffCaption": "thump thud at 2.28-6.73, 7.816-10.0", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_4757.wav", "onoffCaption": "dog barking at 0.149-2.149 and tapping clicking clanking at 0.464-3.904 and burping belching at 5.381-8.925", "frequencyCaption": "dog barking one times and tapping clicking clanking one times and burping belching one times"} +{"filepath": "data/multi_event_train/syn_4820.wav", "onoffCaption": "cow mooing at 2.125-5.135 and gunshot at 3.983-5.983, 7.818-9.818", "frequencyCaption": "cow mooing one times and gunshot two times"} +{"filepath": "data/multi_event_train/syn_4872.wav", "onoffCaption": "thump thud at 2.268-6.718", "frequencyCaption": "thump thud one times"} +{"filepath": "data/multi_event_train/syn_4875.wav", "onoffCaption": "door slamming at 0.609-2.609, 4.435-6.435 and sheep goat bleating at 2.272-4.272, 5.291-7.291", "frequencyCaption": "door slamming two times and sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_4890.wav", "onoffCaption": "sheep goat bleating at 3.192-5.192, 5.962-7.962 and spraying at 3.225-5.353, 6.646-8.774", "frequencyCaption": "sheep goat bleating two times and spraying two times"} +{"filepath": "data/multi_event_train/syn_4969.wav", "onoffCaption": "sneeze at 0.02-1.979 and door slamming at 1.078-2.453", "frequencyCaption": "sneeze one times and door slamming one times"} +{"filepath": "data/multi_event_train/syn_2037.wav", "onoffCaption": "cow mooing at 1.731-4.713, 6.368-9.35 and spraying at 2.99-4.247, 5.36-6.617", "frequencyCaption": "cow mooing two times and spraying two times"} +{"filepath": "data/multi_event_train/syn_2078.wav", "onoffCaption": "cat meowing at 0.134-3.038, 4.259-7.163 and explosion at 0.649-2.742, 3.601-5.694 and whistling at 1.104-5.588, 6.512-9.029", "frequencyCaption": "cat meowing two times and explosion two times and whistling two times"} +{"filepath": "data/multi_event_train/syn_2136.wav", "onoffCaption": "spraying at 0.061-0.665, 2.012-2.793 and explosion at 6.775-8.839", "frequencyCaption": "spraying two times and explosion one times"} +{"filepath": "data/multi_event_train/syn_2163.wav", "onoffCaption": "train horn at 1.284-4.084, 5.069-7.95 and gunshot at 2.44-4.44", "frequencyCaption": "train horn two times and gunshot one times"} +{"filepath": "data/multi_event_train/syn_2186.wav", "onoffCaption": "train horn at 2.174-7.918 and door knocking at 2.306-4.494, 6.83-9.018", "frequencyCaption": "train horn one times and door knocking two times"} +{"filepath": "data/multi_event_train/syn_2310.wav", "onoffCaption": "cow mooing at 0.644-5.073, 7.545-10.0", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_2345.wav", "onoffCaption": "cow mooing at 0.329-5.309, 6.179-9.161", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_2461.wav", "onoffCaption": "explosion at 1.207-3.3, 3.876-5.969 and car horn honking at 2.517-5.43, 7.292-10.0", "frequencyCaption": "explosion two times and car horn honking two times"} +{"filepath": "data/multi_event_train/syn_2535.wav", "onoffCaption": "cow mooing at 0.06-3.07, 5.163-8.173 and thump thud at 0.481-4.148, 4.943-7.171", "frequencyCaption": "cow mooing two times and thump thud two times"} +{"filepath": "data/multi_event_train/syn_2560.wav", "onoffCaption": "thump thud at 3.064-6.111", "frequencyCaption": "thump thud one times"} +{"filepath": "data/multi_event_train/syn_2585.wav", "onoffCaption": "cow mooing at 0.75-5.73, 6.405-8.768", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_2608.wav", "onoffCaption": "train horn at 1.016-10.0", "frequencyCaption": "train horn one times"} +{"filepath": "data/multi_event_train/syn_2713.wav", "onoffCaption": "explosion at 1.31-4.304, 5.298-8.298", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_2746.wav", "onoffCaption": "explosion at 0.892-3.764, 4.383-6.907, 7.915-10.0", "frequencyCaption": "explosion three times"} +{"filepath": "data/multi_event_train/syn_2831.wav", "onoffCaption": "gunshot at 2.394-4.394, 6.612-9.086", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_2864.wav", "onoffCaption": "train horn at 0.519-5.377, 6.619-9.02", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_2881.wav", "onoffCaption": "dog barking at 0.363-2.363, 4.329-6.329, 7.313-9.313", "frequencyCaption": "dog barking three times"} +{"filepath": "data/multi_event_train/syn_2980.wav", "onoffCaption": "door slamming at 2.98-4.495, 5.851-7.366", "frequencyCaption": "door slamming two times"} +{"filepath": "data/multi_event_train/syn_4120.wav", "onoffCaption": "sneeze at 0.206-2.452, 4.198-5.372, 6.923-9.537", "frequencyCaption": "sneeze three times"} +{"filepath": "data/multi_event_train/syn_4175.wav", "onoffCaption": "duck quacking at 2.753-4.753, 6.321-8.321", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_4190.wav", "onoffCaption": "sheep goat bleating at 0.805-2.805", "frequencyCaption": "sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_4248.wav", "onoffCaption": "duck quacking at 0.163-2.163 and tapping clicking clanking at 5.673-9.113", "frequencyCaption": "duck quacking one times and tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_4306.wav", "onoffCaption": "tapping clicking clanking at 3.279-6.719 and sheep goat bleating at 7.072-9.072", "frequencyCaption": "tapping clicking clanking one times and sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_4353.wav", "onoffCaption": "duck quacking at 0.65-2.65, 4.02-6.02", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_4438.wav", "onoffCaption": "thump thud at 0.905-4.572 and cat meowing at 6.995-8.566", "frequencyCaption": "thump thud one times and cat meowing one times"} +{"filepath": "data/multi_event_train/syn_4523.wav", "onoffCaption": "duck quacking at 2.504-4.504 and train horn at 6.766-10.0", "frequencyCaption": "duck quacking one times and train horn one times"} +{"filepath": "data/multi_event_train/syn_4576.wav", "onoffCaption": "gunshot at 0.73-2.73, 4.574-6.574 and train horn at 3.274-6.594", "frequencyCaption": "gunshot two times and train horn one times"} +{"filepath": "data/multi_event_train/syn_4593.wav", "onoffCaption": "car horn honking at 0.119-5.026 and sneeze at 0.407-1.638 and door slamming at 0.512-2.64", "frequencyCaption": "car horn honking one times and sneeze one times and door slamming one times"} +{"filepath": "data/multi_event_train/syn_4705.wav", "onoffCaption": "sheep goat bleating at 0.586-4.226, 6.002-9.642", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_4750.wav", "onoffCaption": "tapping clicking clanking at 1.972-5.412, 6.469-9.909 and woman laughing at 2.03-4.128", "frequencyCaption": "tapping clicking clanking two times and woman laughing one times"} +{"filepath": "data/multi_event_train/syn_4827.wav", "onoffCaption": "dog barking at 0.577-2.577 and train horn at 0.636-4.756, 5.88-10.0", "frequencyCaption": "dog barking one times and train horn two times"} +{"filepath": "data/multi_event_train/syn_4897.wav", "onoffCaption": "cow mooing at 2.385-5.367 and spraying at 8.29-8.917", "frequencyCaption": "cow mooing one times and spraying one times"} +{"filepath": "data/multi_event_train/syn_2013.wav", "onoffCaption": "dog barking at 1.338-3.338, 4.879-6.879", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_train/syn_2062.wav", "onoffCaption": "car horn honking at 1.119-3.632, 5.271-7.784 and dog barking at 4.978-6.978, 7.545-9.545", "frequencyCaption": "car horn honking two times and dog barking two times"} +{"filepath": "data/multi_event_train/syn_2087.wav", "onoffCaption": "door knocking at 0.82-5.259 and burping belching at 1.654-7.255 and door slamming at 6.373-9.347", "frequencyCaption": "door knocking one times and burping belching one times and door slamming one times"} +{"filepath": "data/multi_event_train/syn_2108.wav", "onoffCaption": "cow mooing at 3.083-6.093", "frequencyCaption": "cow mooing one times"} +{"filepath": "data/multi_event_train/syn_2179.wav", "onoffCaption": "gunshot at 0.366-2.366 and door slamming at 4.91-5.761, 7.513-8.364", "frequencyCaption": "gunshot one times and door slamming two times"} +{"filepath": "data/multi_event_train/syn_2211.wav", "onoffCaption": "sneeze at 0.014-4.514, 5.427-7.903", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_2244.wav", "onoffCaption": "train horn at 1.183-4.583, 5.705-9.105", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_2434.wav", "onoffCaption": "dog barking at 0.142-2.142, 3.472-5.472, 6.656-8.656", "frequencyCaption": "dog barking three times"} +{"filepath": "data/multi_event_train/syn_2484.wav", "onoffCaption": "cat meowing at 0.11-3.351, 4.446-7.687", "frequencyCaption": "cat meowing two times"} +{"filepath": "data/multi_event_train/syn_2612.wav", "onoffCaption": "cat meowing at 4.007-5.562, 6.692-7.967", "frequencyCaption": "cat meowing two times"} +{"filepath": "data/multi_event_train/syn_2647.wav", "onoffCaption": "spraying at 0.301-0.876, 3.183-3.758", "frequencyCaption": "spraying two times"} +{"filepath": "data/multi_event_train/syn_2709.wav", "onoffCaption": "train horn at 1.594-5.594", "frequencyCaption": "train horn one times"} +{"filepath": "data/multi_event_train/syn_2930.wav", "onoffCaption": "tapping clicking clanking at 0.463-3.903, 4.444-6.621, 7.812-10.0", "frequencyCaption": "tapping clicking clanking three times"} +{"filepath": "data/multi_event_train/syn_2965.wav", "onoffCaption": "door knocking at 0.177-3.552", "frequencyCaption": "door knocking one times"} +{"filepath": "data/multi_event_train/syn_4021.wav", "onoffCaption": "dog barking at 2.558-4.558 and sheep goat bleating at 6.288-8.288", "frequencyCaption": "dog barking one times and sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_4074.wav", "onoffCaption": "gunshot at 1.364-3.364, 4.698-6.698, 7.739-9.739", "frequencyCaption": "gunshot three times"} +{"filepath": "data/multi_event_train/syn_4091.wav", "onoffCaption": "train horn at 3.3-6.18, 6.958-9.838", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_4207.wav", "onoffCaption": "spraying at 0.091-0.995", "frequencyCaption": "spraying one times"} +{"filepath": "data/multi_event_train/syn_4252.wav", "onoffCaption": "duck quacking at 0.773-2.773, 4.659-6.659 and cat meowing at 6.245-7.257, 8.139-9.683", "frequencyCaption": "duck quacking two times and cat meowing two times"} +{"filepath": "data/multi_event_train/syn_4349.wav", "onoffCaption": "whistling at 0.804-2.813, 4.259-7.09 and sheep goat bleating at 1.958-6.678", "frequencyCaption": "whistling two times and sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_4422.wav", "onoffCaption": "door knocking at 3.405-5.785", "frequencyCaption": "door knocking one times"} +{"filepath": "data/multi_event_train/syn_4477.wav", "onoffCaption": "cat meowing at 0.083-1.349, 2.041-3.625 and gunshot at 2.761-4.761", "frequencyCaption": "cat meowing two times and gunshot one times"} +{"filepath": "data/multi_event_train/syn_4492.wav", "onoffCaption": "gunshot at 0.382-2.382, 3.831-5.831", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_4539.wav", "onoffCaption": "duck quacking at 3.101-5.101 and thump thud at 7.995-10.0", "frequencyCaption": "duck quacking one times and thump thud one times"} +{"filepath": "data/multi_event_train/syn_4589.wav", "onoffCaption": "burping belching at 0.511-3.511, 4.303-7.246, 7.967-10.0", "frequencyCaption": "burping belching three times"} +{"filepath": "data/multi_event_train/syn_4604.wav", "onoffCaption": "thump thud at 3.427-5.927 and tapping clicking clanking at 3.66-7.1", "frequencyCaption": "thump thud one times and tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_4651.wav", "onoffCaption": "gunshot at 1.084-3.084, 3.952-5.952, 7.198-9.198 and whistling at 1.399-9.784", "frequencyCaption": "gunshot three times and whistling one times"} +{"filepath": "data/multi_event_train/syn_4690.wav", "onoffCaption": "tapping clicking clanking at 1.909-5.349 and dog barking at 7.675-9.675", "frequencyCaption": "tapping clicking clanking one times and dog barking one times"} +{"filepath": "data/multi_event_train/syn_4868.wav", "onoffCaption": "tapping clicking clanking at 0.529-3.969", "frequencyCaption": "tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_4926.wav", "onoffCaption": "gunshot at 1.876-3.876, 4.842-6.842, 7.718-9.718", "frequencyCaption": "gunshot three times"} +{"filepath": "data/multi_event_train/syn_4973.wav", "onoffCaption": "woman laughing at 2.777-5.546", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/multi_event_train/syn_4996.wav", "onoffCaption": "gunshot at 2.571-4.701, 5.405-7.535", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_2046.wav", "onoffCaption": "thump thud at 0.536-3.307 and gunshot at 3.348-5.348", "frequencyCaption": "thump thud one times and gunshot one times"} +{"filepath": "data/multi_event_train/syn_2235.wav", "onoffCaption": "explosion at 2.514-5.514, 7.383-10.0", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_2260.wav", "onoffCaption": "train horn at 2.617-7.694", "frequencyCaption": "train horn one times"} +{"filepath": "data/multi_event_train/syn_2285.wav", "onoffCaption": "tapping clicking clanking at 0.136-3.576 and dog barking at 0.345-2.745", "frequencyCaption": "tapping clicking clanking one times and dog barking one times"} +{"filepath": "data/multi_event_train/syn_2334.wav", "onoffCaption": "thump thud at 0.05-3.097, 4.756-7.218 and dog barking at 0.212-2.212", "frequencyCaption": "thump thud two times and dog barking one times"} +{"filepath": "data/multi_event_train/syn_2410.wav", "onoffCaption": "gunshot at 1.449-3.449 and cat meowing at 2.183-4.143 and sneeze at 3.218-4.392", "frequencyCaption": "gunshot one times and cat meowing one times and sneeze one times"} +{"filepath": "data/multi_event_train/syn_2445.wav", "onoffCaption": "cat meowing at 0.275-1.811, 3.469-5.005, 6.461-7.997 and dog barking at 2.825-4.825 and sneeze at 5.34-8.0", "frequencyCaption": "cat meowing three times and dog barking one times and sneeze one times"} +{"filepath": "data/multi_event_train/syn_2636.wav", "onoffCaption": "train horn at 2.96-6.36, 7.243-10.0", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_2663.wav", "onoffCaption": "whistling at 0.052-2.281, 4.45-7.014 and train horn at 4.814-8.214", "frequencyCaption": "whistling two times and train horn one times"} +{"filepath": "data/multi_event_train/syn_2679.wav", "onoffCaption": "spraying at 2.8-3.3, 5.687-7.42", "frequencyCaption": "spraying two times"} +{"filepath": "data/multi_event_train/syn_2686.wav", "onoffCaption": "train horn at 2.685-7.762", "frequencyCaption": "train horn one times"} +{"filepath": "data/multi_event_train/syn_2762.wav", "onoffCaption": "door slamming at 0.1-2.32, 4.242-6.462 and dog barking at 3.512-5.512, 6.099-8.099", "frequencyCaption": "door slamming two times and dog barking two times"} +{"filepath": "data/multi_event_train/syn_2778.wav", "onoffCaption": "car horn honking at 0.462-3.681 and sheep goat bleating at 5.35-7.35", "frequencyCaption": "car horn honking one times and sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_2787.wav", "onoffCaption": "tapping clicking clanking at 1.579-5.019 and burping belching at 2.926-5.926, 7.082-10.0", "frequencyCaption": "tapping clicking clanking one times and burping belching two times"} +{"filepath": "data/multi_event_train/syn_2914.wav", "onoffCaption": "cow mooing at 2.723-7.152", "frequencyCaption": "cow mooing one times"} +{"filepath": "data/multi_event_train/syn_2941.wav", "onoffCaption": "duck quacking at 0.171-2.171 and spraying at 3.39-3.994 and sneeze at 5.409-6.795", "frequencyCaption": "duck quacking one times and spraying one times and sneeze one times"} +{"filepath": "data/multi_event_train/syn_4005.wav", "onoffCaption": "sneeze at 1.605-4.066, 6.518-8.437", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_4050.wav", "onoffCaption": "cat meowing at 0.481-1.525, 3.829-4.873 and tapping clicking clanking at 1.071-4.511", "frequencyCaption": "cat meowing two times and tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_4104.wav", "onoffCaption": "explosion at 0.572-2.665, 5.145-7.238 and door slamming at 3.172-4.289, 5.792-6.909", "frequencyCaption": "explosion two times and door slamming two times"} +{"filepath": "data/multi_event_train/syn_4223.wav", "onoffCaption": "duck quacking at 0.184-2.184, 3.018-5.018, 6.317-8.317 and sheep goat bleating at 6.077-8.077", "frequencyCaption": "duck quacking three times and sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_4276.wav", "onoffCaption": "gunshot at 3.244-5.718, 6.806-9.28", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_4293.wav", "onoffCaption": "sneeze at 2.122-3.579, 5.94-7.397 and cow mooing at 3.03-6.012", "frequencyCaption": "sneeze two times and cow mooing one times"} +{"filepath": "data/multi_event_train/syn_4338.wav", "onoffCaption": "burping belching at 1.041-3.406 and sneeze at 2.307-4.392", "frequencyCaption": "burping belching one times and sneeze one times"} +{"filepath": "data/multi_event_train/syn_4388.wav", "onoffCaption": "sneeze at 1.411-2.657 and explosion at 2.597-5.591", "frequencyCaption": "sneeze one times and explosion one times"} +{"filepath": "data/multi_event_train/syn_4406.wav", "onoffCaption": "door knocking at 0.49-3.61, 4.378-7.498 and train horn at 5.486-7.966", "frequencyCaption": "door knocking two times and train horn one times"} +{"filepath": "data/multi_event_train/syn_4453.wav", "onoffCaption": "explosion at 0.123-3.25, 5.341-8.468", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_4548.wav", "onoffCaption": "whistling at 2.538-10.0", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_4620.wav", "onoffCaption": "woman laughing at 1.411-4.465, 4.987-8.041 and gunshot at 1.892-3.892", "frequencyCaption": "woman laughing two times and gunshot one times"} +{"filepath": "data/multi_event_train/syn_4675.wav", "onoffCaption": "whistling at 0.21-8.595 and sneeze at 0.27-2.229", "frequencyCaption": "whistling one times and sneeze one times"} +{"filepath": "data/multi_event_train/syn_4819.wav", "onoffCaption": "cat meowing at 1.092-4.333", "frequencyCaption": "cat meowing one times"} +{"filepath": "data/multi_event_train/syn_4856.wav", "onoffCaption": "thump thud at 0.115-2.577, 4.284-6.746, 7.956-10.0", "frequencyCaption": "thump thud three times"} +{"filepath": "data/multi_event_train/syn_4902.wav", "onoffCaption": "spraying at 0.111-0.611, 1.41-1.979, 2.877-4.052 and gunshot at 6.942-8.942", "frequencyCaption": "spraying three times and gunshot one times"} +{"filepath": "data/multi_event_train/syn_4957.wav", "onoffCaption": "whistling at 1.592-10.0", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_2009.wav", "onoffCaption": "thump thud at 0.776-5.151", "frequencyCaption": "thump thud one times"} +{"filepath": "data/multi_event_train/syn_2077.wav", "onoffCaption": "whistling at 1.112-10.0", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_2112.wav", "onoffCaption": "explosion at 2.23-5.23 and door slamming at 8.138-9.157", "frequencyCaption": "explosion one times and door slamming one times"} +{"filepath": "data/multi_event_train/syn_2147.wav", "onoffCaption": "woman laughing at 1.159-4.547", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/multi_event_train/syn_2189.wav", "onoffCaption": "train horn at 0.649-3.323, 4.516-7.037", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_2361.wav", "onoffCaption": "train horn at 0.466-2.621, 3.841-5.996, 7.71-9.865", "frequencyCaption": "train horn three times"} +{"filepath": "data/multi_event_train/syn_2384.wav", "onoffCaption": "door slamming at 0.36-2.488, 4.172-6.3", "frequencyCaption": "door slamming two times"} +{"filepath": "data/multi_event_train/syn_2421.wav", "onoffCaption": "spraying at 0.679-1.926, 3.703-4.95 and door knocking at 3.2-5.93", "frequencyCaption": "spraying two times and door knocking one times"} +{"filepath": "data/multi_event_train/syn_2511.wav", "onoffCaption": "sheep goat bleating at 0.209-2.209", "frequencyCaption": "sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_2544.wav", "onoffCaption": "whistling at 2.084-4.313 and door slamming at 4.281-5.534", "frequencyCaption": "whistling one times and door slamming one times"} +{"filepath": "data/multi_event_train/syn_2737.wav", "onoffCaption": "sneeze at 0.645-2.558, 3.721-5.634, 6.689-8.602", "frequencyCaption": "sneeze three times"} +{"filepath": "data/multi_event_train/syn_2815.wav", "onoffCaption": "cat meowing at 2.48-4.23", "frequencyCaption": "cat meowing one times"} +{"filepath": "data/multi_event_train/syn_2840.wav", "onoffCaption": "burping belching at 0.204-3.484, 4.239-7.156 and whistling at 0.478-8.863", "frequencyCaption": "burping belching two times and whistling one times"} +{"filepath": "data/multi_event_train/syn_2925.wav", "onoffCaption": "tapping clicking clanking at 0.44-3.88", "frequencyCaption": "tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_4151.wav", "onoffCaption": "gunshot at 0.002-2.002, 3.068-5.068, 6.124-8.124", "frequencyCaption": "gunshot three times"} +{"filepath": "data/multi_event_train/syn_4239.wav", "onoffCaption": "cow mooing at 1.84-5.138, 6.741-10.0", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_4247.wav", "onoffCaption": "gunshot at 3.188-5.318, 7.34-9.34", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_4289.wav", "onoffCaption": "spraying at 2.817-3.992, 5.823-6.823", "frequencyCaption": "spraying two times"} +{"filepath": "data/multi_event_train/syn_4322.wav", "onoffCaption": "cow mooing at 0.833-3.802, 5.116-8.098", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_4377.wav", "onoffCaption": "whistling at 0.24-5.74 and spraying at 0.76-2.007, 2.668-3.915 and explosion at 1.028-6.028", "frequencyCaption": "whistling one times and spraying two times and explosion one times"} +{"filepath": "data/multi_event_train/syn_4392.wav", "onoffCaption": "spraying at 0.221-0.79, 1.312-1.881, 4.348-4.917", "frequencyCaption": "spraying three times"} +{"filepath": "data/multi_event_train/syn_4449.wav", "onoffCaption": "sheep goat bleating at 3.247-5.247", "frequencyCaption": "sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_4507.wav", "onoffCaption": "cat meowing at 0.37-2.364 and woman laughing at 2.598-5.184 and spraying at 4.817-7.836", "frequencyCaption": "cat meowing one times and woman laughing one times and spraying one times"} +{"filepath": "data/multi_event_train/syn_4552.wav", "onoffCaption": "whistling at 1.266-6.441, 7.836-10.0", "frequencyCaption": "whistling two times"} +{"filepath": "data/multi_event_train/syn_4721.wav", "onoffCaption": "car horn honking at 1.973-6.295", "frequencyCaption": "car horn honking one times"} +{"filepath": "data/multi_event_train/syn_4774.wav", "onoffCaption": "woman laughing at 0.137-2.331, 4.071-6.657 and cat meowing at 1.489-2.799", "frequencyCaption": "woman laughing two times and cat meowing one times"} +{"filepath": "data/multi_event_train/syn_4791.wav", "onoffCaption": "thump thud at 1.876-4.338, 5.469-7.931 and door knocking at 5.537-8.0", "frequencyCaption": "thump thud two times and door knocking one times"} +{"filepath": "data/multi_event_train/syn_4803.wav", "onoffCaption": "cat meowing at 1.28-2.546 and door slamming at 3.938-4.789, 5.958-8.184", "frequencyCaption": "cat meowing one times and door slamming two times"} +{"filepath": "data/multi_event_train/syn_4918.wav", "onoffCaption": "car horn honking at 0.143-3.056, 5.143-8.069", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_5.wav", "onoffCaption": "train horn at 3.048-7.116 and cat meowing at 6.564-7.75", "frequencyCaption": "train horn one times and cat meowing one times"} +{"filepath": "data/multi_event_train/syn_2022.wav", "onoffCaption": "sneeze at 0.421-2.738", "frequencyCaption": "sneeze one times"} +{"filepath": "data/multi_event_train/syn_2092.wav", "onoffCaption": "train horn at 0.127-4.457, 5.738-10.0", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_2139.wav", "onoffCaption": "tapping clicking clanking at 3.088-6.528, 7.884-10.0", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_2204.wav", "onoffCaption": "cat meowing at 3.308-5.058, 7.31-9.06", "frequencyCaption": "cat meowing two times"} +{"filepath": "data/multi_event_train/syn_2251.wav", "onoffCaption": "thump thud at 0.879-5.329", "frequencyCaption": "thump thud one times"} +{"filepath": "data/multi_event_train/syn_2350.wav", "onoffCaption": "train horn at 0.102-4.302, 5.661-8.516", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_2474.wav", "onoffCaption": "gunshot at 0.141-2.141, 4.521-6.521, 7.761-9.891", "frequencyCaption": "gunshot three times"} +{"filepath": "data/multi_event_train/syn_2491.wav", "onoffCaption": "spraying at 0.317-0.921 and sheep goat bleating at 2.923-4.923 and burping belching at 3.97-6.97", "frequencyCaption": "spraying one times and sheep goat bleating one times and burping belching one times"} +{"filepath": "data/multi_event_train/syn_2607.wav", "onoffCaption": "door knocking at 0.375-3.105 and duck quacking at 2.675-4.675", "frequencyCaption": "door knocking one times and duck quacking one times"} +{"filepath": "data/multi_event_train/syn_2652.wav", "onoffCaption": "thump thud at 3.045-7.495", "frequencyCaption": "thump thud one times"} +{"filepath": "data/multi_event_train/syn_2706.wav", "onoffCaption": "duck quacking at 1.028-3.028, 4.755-6.755, 7.606-9.606", "frequencyCaption": "duck quacking three times"} +{"filepath": "data/multi_event_train/syn_2749.wav", "onoffCaption": "spraying at 1.223-2.304", "frequencyCaption": "spraying one times"} +{"filepath": "data/multi_event_train/syn_2970.wav", "onoffCaption": "sneeze at 0.91-3.524", "frequencyCaption": "sneeze one times"} +{"filepath": "data/multi_event_train/syn_2995.wav", "onoffCaption": "train horn at 3.448-9.917", "frequencyCaption": "train horn one times"} +{"filepath": "data/multi_event_train/syn_4034.wav", "onoffCaption": "dog barking at 1.213-4.533, 6.858-10.0", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_train/syn_4061.wav", "onoffCaption": "door slamming at 3.947-4.798 and dog barking at 7.487-9.487", "frequencyCaption": "door slamming one times and dog barking one times"} +{"filepath": "data/multi_event_train/syn_4084.wav", "onoffCaption": "sheep goat bleating at 0.916-2.916, 4.112-6.918, 7.548-9.548", "frequencyCaption": "sheep goat bleating three times"} +{"filepath": "data/multi_event_train/syn_4160.wav", "onoffCaption": "gunshot at 1.982-4.483, 5.194-7.194", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_4185.wav", "onoffCaption": "cow mooing at 1.666-4.964, 6.339-9.308", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_4212.wav", "onoffCaption": "gunshot at 2.485-4.485, 5.365-7.365", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_4309.wav", "onoffCaption": "car horn honking at 0.258-4.507, 5.729-9.978", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_4437.wav", "onoffCaption": "cow mooing at 0.883-3.852, 4.639-7.608", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_4462.wav", "onoffCaption": "spraying at 1.642-4.661 and train horn at 2.0-5.32", "frequencyCaption": "spraying one times and train horn one times"} +{"filepath": "data/multi_event_train/syn_4487.wav", "onoffCaption": "woman laughing at 0.162-3.257, 4.068-6.552 and dog barking at 0.198-2.198", "frequencyCaption": "woman laughing two times and dog barking one times"} +{"filepath": "data/multi_event_train/syn_4536.wav", "onoffCaption": "woman laughing at 2.541-5.636, 7.344-10.0", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_4579.wav", "onoffCaption": "burping belching at 0.802-4.242, 5.248-8.688 and gunshot at 2.58-4.58, 6.553-8.553", "frequencyCaption": "burping belching two times and gunshot two times"} +{"filepath": "data/multi_event_train/syn_4611.wav", "onoffCaption": "dog barking at 2.046-4.046, 5.252-7.252", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_train/syn_4644.wav", "onoffCaption": "gunshot at 1.209-3.209, 5.051-7.324 and train horn at 2.986-7.106, 7.822-10.0", "frequencyCaption": "gunshot two times and train horn two times"} +{"filepath": "data/multi_event_train/syn_4828.wav", "onoffCaption": "car horn honking at 2.865-7.114 and thump thud at 4.049-6.277", "frequencyCaption": "car horn honking one times and thump thud one times"} +{"filepath": "data/multi_event_train/syn_4832.wav", "onoffCaption": "whistling at 1.547-9.202", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_4898.wav", "onoffCaption": "spraying at 0.281-1.538, 2.407-3.664, 4.717-5.974", "frequencyCaption": "spraying three times"} +{"filepath": "data/multi_event_train/syn_4929.wav", "onoffCaption": "gunshot at 1.601-3.601", "frequencyCaption": "gunshot one times"} +{"filepath": "data/multi_event_train/syn_4933.wav", "onoffCaption": "sheep goat bleating at 2.998-4.998, 6.444-8.444", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_4966.wav", "onoffCaption": "cow mooing at 3.137-6.106", "frequencyCaption": "cow mooing one times"} +{"filepath": "data/multi_event_train/syn_4983.wav", "onoffCaption": "whistling at 1.786-4.015, 5.199-7.52", "frequencyCaption": "whistling two times"} +{"filepath": "data/multi_event_train/syn_2038.wav", "onoffCaption": "duck quacking at 0.312-2.312, 3.475-5.475, 6.684-8.684 and thump thud at 1.912-4.412", "frequencyCaption": "duck quacking three times and thump thud one times"} +{"filepath": "data/multi_event_train/syn_2088.wav", "onoffCaption": "cat meowing at 0.012-1.902 and burping belching at 5.241-8.531", "frequencyCaption": "cat meowing one times and burping belching one times"} +{"filepath": "data/multi_event_train/syn_2123.wav", "onoffCaption": "tapping clicking clanking at 0.644-4.084, 5.681-8.568", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_2176.wav", "onoffCaption": "cow mooing at 2.931-7.911", "frequencyCaption": "cow mooing one times"} +{"filepath": "data/multi_event_train/syn_2193.wav", "onoffCaption": "whistling at 0.783-9.168", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_2305.wav", "onoffCaption": "sneeze at 0.788-2.747, 3.395-4.683 and gunshot at 1.228-3.228", "frequencyCaption": "sneeze two times and gunshot one times"} +{"filepath": "data/multi_event_train/syn_2374.wav", "onoffCaption": "sneeze at 2.903-4.067, 4.883-6.047 and train horn at 3.325-5.965, 7.216-9.683", "frequencyCaption": "sneeze two times and train horn two times"} +{"filepath": "data/multi_event_train/syn_2520.wav", "onoffCaption": "sheep goat bleating at 0.244-4.164", "frequencyCaption": "sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_2575.wav", "onoffCaption": "door knocking at 0.167-2.519, 3.115-5.467, 7.305-9.657", "frequencyCaption": "door knocking three times"} +{"filepath": "data/multi_event_train/syn_2590.wav", "onoffCaption": "door slamming at 0.593-1.533, 3.478-4.418", "frequencyCaption": "door slamming two times"} +{"filepath": "data/multi_event_train/syn_2639.wav", "onoffCaption": "train horn at 3.428-10.0", "frequencyCaption": "train horn one times"} +{"filepath": "data/multi_event_train/syn_2648.wav", "onoffCaption": "gunshot at 0.06-2.06", "frequencyCaption": "gunshot one times"} +{"filepath": "data/multi_event_train/syn_2722.wav", "onoffCaption": "duck quacking at 1.492-3.492, 5.29-7.29", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_2753.wav", "onoffCaption": "thump thud at 0.078-2.306, 4.188-6.416 and train horn at 0.566-3.206 and dog barking at 2.021-5.341, 6.271-9.591", "frequencyCaption": "thump thud two times and train horn one times and dog barking two times"} +{"filepath": "data/multi_event_train/syn_2824.wav", "onoffCaption": "woman laughing at 0.649-3.004, 5.141-7.589 and door knocking at 3.792-6.859, 7.738-10.0", "frequencyCaption": "woman laughing two times and door knocking two times"} +{"filepath": "data/multi_event_train/syn_2871.wav", "onoffCaption": "cow mooing at 1.316-4.614, 5.217-8.144", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_2894.wav", "onoffCaption": "gunshot at 1.267-3.267, 4.861-6.861", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_4135.wav", "onoffCaption": "duck quacking at 0.516-2.516", "frequencyCaption": "duck quacking one times"} +{"filepath": "data/multi_event_train/syn_4144.wav", "onoffCaption": "sneeze at 0.05-2.163, 4.515-6.628, 7.634-9.747", "frequencyCaption": "sneeze three times"} +{"filepath": "data/multi_event_train/syn_4208.wav", "onoffCaption": "dog barking at 3.387-5.387", "frequencyCaption": "dog barking one times"} +{"filepath": "data/multi_event_train/syn_4313.wav", "onoffCaption": "burping belching at 1.703-3.826, 4.405-7.405 and dog barking at 5.109-7.109, 7.775-9.775", "frequencyCaption": "burping belching two times and dog barking two times"} +{"filepath": "data/multi_event_train/syn_4346.wav", "onoffCaption": "tapping clicking clanking at 0.824-4.264 and cow mooing at 6.055-10.0", "frequencyCaption": "tapping clicking clanking one times and cow mooing one times"} +{"filepath": "data/multi_event_train/syn_4478.wav", "onoffCaption": "duck quacking at 0.198-2.198, 2.933-4.933 and cow mooing at 1.378-4.347", "frequencyCaption": "duck quacking two times and cow mooing one times"} +{"filepath": "data/multi_event_train/syn_4563.wav", "onoffCaption": "explosion at 0.394-3.394", "frequencyCaption": "explosion one times"} +{"filepath": "data/multi_event_train/syn_4586.wav", "onoffCaption": "door knocking at 3.024-6.133 and cat meowing at 3.165-4.525", "frequencyCaption": "door knocking one times and cat meowing one times"} +{"filepath": "data/multi_event_train/syn_4710.wav", "onoffCaption": "burping belching at 1.228-4.407 and sneeze at 2.434-3.728", "frequencyCaption": "burping belching one times and sneeze one times"} +{"filepath": "data/multi_event_train/syn_4745.wav", "onoffCaption": "burping belching at 0.033-3.212, 4.738-7.917 and sheep goat bleating at 3.435-5.435, 6.487-8.487", "frequencyCaption": "burping belching two times and sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_4816.wav", "onoffCaption": "woman laughing at 3.804-6.029 and dog barking at 5.915-7.915", "frequencyCaption": "woman laughing one times and dog barking one times"} +{"filepath": "data/multi_event_train/syn_4867.wav", "onoffCaption": "duck quacking at 0.123-2.123, 4.217-6.217 and gunshot at 4.27-6.27", "frequencyCaption": "duck quacking two times and gunshot one times"} +{"filepath": "data/multi_event_train/syn_4882.wav", "onoffCaption": "spraying at 0.017-0.868, 2.396-3.247", "frequencyCaption": "spraying two times"} +{"filepath": "data/multi_event_train/syn_4999.wav", "onoffCaption": "car horn honking at 0.007-3.226, 5.675-8.894 and cat meowing at 0.879-1.981", "frequencyCaption": "car horn honking two times and cat meowing one times"} +{"filepath": "data/multi_event_train/syn_2049.wav", "onoffCaption": "sheep goat bleating at 2.376-4.376, 5.004-7.004", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_2053.wav", "onoffCaption": "cow mooing at 3.22-6.23", "frequencyCaption": "cow mooing one times"} +{"filepath": "data/multi_event_train/syn_2107.wav", "onoffCaption": "door knocking at 0.202-2.579 and thump thud at 7.565-10.0", "frequencyCaption": "door knocking one times and thump thud one times"} +{"filepath": "data/multi_event_train/syn_2148.wav", "onoffCaption": "spraying at 1.992-3.754 and door slamming at 2.991-4.382, 5.138-6.529, 7.82-9.211", "frequencyCaption": "spraying one times and door slamming three times"} +{"filepath": "data/multi_event_train/syn_2152.wav", "onoffCaption": "tapping clicking clanking at 3.493-6.933 and cow mooing at 3.754-7.052", "frequencyCaption": "tapping clicking clanking one times and cow mooing one times"} +{"filepath": "data/multi_event_train/syn_2321.wav", "onoffCaption": "whistling at 2.723-7.207", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_2391.wav", "onoffCaption": "cat meowing at 2.812-4.772, 6.893-8.853", "frequencyCaption": "cat meowing two times"} +{"filepath": "data/multi_event_train/syn_2405.wav", "onoffCaption": "whistling at 0.352-3.227", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_2504.wav", "onoffCaption": "sneeze at 3.377-5.336", "frequencyCaption": "sneeze one times"} +{"filepath": "data/multi_event_train/syn_2551.wav", "onoffCaption": "door slamming at 2.538-3.677, 4.833-5.972", "frequencyCaption": "door slamming two times"} +{"filepath": "data/multi_event_train/syn_2689.wav", "onoffCaption": "woman laughing at 2.249-8.983", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/multi_event_train/syn_2777.wav", "onoffCaption": "burping belching at 0.147-3.691", "frequencyCaption": "burping belching one times"} +{"filepath": "data/multi_event_train/syn_2792.wav", "onoffCaption": "burping belching at 1.622-5.491", "frequencyCaption": "burping belching one times"} +{"filepath": "data/multi_event_train/syn_2800.wav", "onoffCaption": "door slamming at 0.311-1.544, 3.521-4.754", "frequencyCaption": "door slamming two times"} +{"filepath": "data/multi_event_train/syn_2855.wav", "onoffCaption": "car horn honking at 0.434-5.341", "frequencyCaption": "car horn honking one times"} +{"filepath": "data/multi_event_train/syn_2901.wav", "onoffCaption": "whistling at 0.354-5.854", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_4111.wav", "onoffCaption": "woman laughing at 2.429-4.534, 5.334-7.689 and dog barking at 3.371-5.371, 6.548-8.548", "frequencyCaption": "woman laughing two times and dog barking two times"} +{"filepath": "data/multi_event_train/syn_4279.wav", "onoffCaption": "car horn honking at 2.345-7.252", "frequencyCaption": "car horn honking one times"} +{"filepath": "data/multi_event_train/syn_4286.wav", "onoffCaption": "cow mooing at 0.317-5.297", "frequencyCaption": "cow mooing one times"} +{"filepath": "data/multi_event_train/syn_4337.wav", "onoffCaption": "spraying at 2.863-3.73 and cat meowing at 6.571-9.601", "frequencyCaption": "spraying one times and cat meowing one times"} +{"filepath": "data/multi_event_train/syn_4362.wav", "onoffCaption": "dog barking at 2.986-6.306", "frequencyCaption": "dog barking one times"} +{"filepath": "data/multi_event_train/syn_4387.wav", "onoffCaption": "cow mooing at 1.656-4.666, 6.806-9.081", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_4409.wav", "onoffCaption": "cat meowing at 0.497-2.491, 4.841-6.835 and car horn honking at 1.246-5.568", "frequencyCaption": "cat meowing two times and car horn honking one times"} +{"filepath": "data/multi_event_train/syn_4512.wav", "onoffCaption": "car horn honking at 1.26-5.101, 5.957-8.883", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_4547.wav", "onoffCaption": "train horn at 1.784-4.944", "frequencyCaption": "train horn one times"} +{"filepath": "data/multi_event_train/syn_4734.wav", "onoffCaption": "gunshot at 3.456-5.475, 7.633-9.633", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_4761.wav", "onoffCaption": "car horn honking at 1.518-5.172, 6.256-9.91", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_4784.wav", "onoffCaption": "dog barking at 2.425-4.425, 5.507-7.507 and spraying at 5.914-7.171, 8.215-9.39", "frequencyCaption": "dog barking two times and spraying two times"} +{"filepath": "data/multi_event_train/syn_4843.wav", "onoffCaption": "door slamming at 1.734-3.862, 5.016-7.144 and woman laughing at 4.551-6.632, 7.972-10.0", "frequencyCaption": "door slamming two times and woman laughing two times"} +{"filepath": "data/multi_event_train/syn_4958.wav", "onoffCaption": "cat meowing at 0.111-1.213, 2.139-3.241 and tapping clicking clanking at 1.733-5.173 and gunshot at 4.216-6.216", "frequencyCaption": "cat meowing two times and tapping clicking clanking one times and gunshot one times"} +{"filepath": "data/multi_event_train/syn_2006.wav", "onoffCaption": "explosion at 0.8-3.794, 5.316-8.31", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_2220.wav", "onoffCaption": "cow mooing at 0.297-3.307, 4.166-7.148", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_2227.wav", "onoffCaption": "train horn at 0.365-2.765, 3.678-6.078, 6.632-9.032 and cat meowing at 3.73-5.301", "frequencyCaption": "train horn three times and cat meowing one times"} +{"filepath": "data/multi_event_train/syn_2275.wav", "onoffCaption": "dog barking at 0.194-2.194, 4.569-6.569 and tapping clicking clanking at 0.203-3.643", "frequencyCaption": "dog barking two times and tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_2290.wav", "onoffCaption": "door knocking at 0.345-4.617, 6.113-10.0", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_2450.wav", "onoffCaption": "explosion at 3.699-8.293 and sheep goat bleating at 4.38-6.38, 7.404-9.404", "frequencyCaption": "explosion one times and sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_2623.wav", "onoffCaption": "sheep goat bleating at 1.467-6.187", "frequencyCaption": "sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_2671.wav", "onoffCaption": "sneeze at 0.735-2.012, 4.199-5.476", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_2676.wav", "onoffCaption": "gunshot at 2.71-4.71", "frequencyCaption": "gunshot one times"} +{"filepath": "data/multi_event_train/syn_2693.wav", "onoffCaption": "sheep goat bleating at 0.134-2.134, 2.98-4.98, 6.05-8.05", "frequencyCaption": "sheep goat bleating three times"} +{"filepath": "data/multi_event_train/syn_2694.wav", "onoffCaption": "cat meowing at 2.385-4.333, 5.505-7.453", "frequencyCaption": "cat meowing two times"} +{"filepath": "data/multi_event_train/syn_2738.wav", "onoffCaption": "whistling at 0.685-5.169, 7.608-10.0", "frequencyCaption": "whistling two times"} +{"filepath": "data/multi_event_train/syn_2788.wav", "onoffCaption": "burping belching at 0.0-4.023, 5.919-8.245 and whistling at 0.417-2.426", "frequencyCaption": "burping belching two times and whistling one times"} +{"filepath": "data/multi_event_train/syn_2954.wav", "onoffCaption": "sneeze at 0.438-2.397, 3.235-5.194 and spraying at 3.26-3.835, 5.502-6.077", "frequencyCaption": "sneeze two times and spraying two times"} +{"filepath": "data/multi_event_train/syn_4010.wav", "onoffCaption": "door slamming at 1.845-2.962, 5.286-6.403, 7.731-8.848", "frequencyCaption": "door slamming three times"} +{"filepath": "data/multi_event_train/syn_4017.wav", "onoffCaption": "explosion at 2.384-5.112 and woman laughing at 7.997-10.0", "frequencyCaption": "explosion one times and woman laughing one times"} +{"filepath": "data/multi_event_train/syn_4045.wav", "onoffCaption": "spraying at 1.773-2.273, 3.359-3.859, 5.793-6.293", "frequencyCaption": "spraying three times"} +{"filepath": "data/multi_event_train/syn_4236.wav", "onoffCaption": "door slamming at 3.015-5.241, 6.472-8.698 and spraying at 3.798-4.373, 5.108-7.544, 8.162-8.894", "frequencyCaption": "door slamming two times and spraying three times"} +{"filepath": "data/multi_event_train/syn_4263.wav", "onoffCaption": "train horn at 3.019-6.789 and sneeze at 3.37-5.483", "frequencyCaption": "train horn one times and sneeze one times"} +{"filepath": "data/multi_event_train/syn_4378.wav", "onoffCaption": "tapping clicking clanking at 0.679-4.119, 5.177-7.992 and cow mooing at 3.921-6.89", "frequencyCaption": "tapping clicking clanking two times and cow mooing one times"} +{"filepath": "data/multi_event_train/syn_4413.wav", "onoffCaption": "whistling at 0.02-7.77 and car horn honking at 3.683-6.858", "frequencyCaption": "whistling one times and car horn honking one times"} +{"filepath": "data/multi_event_train/syn_4441.wav", "onoffCaption": "car horn honking at 0.078-4.478, 5.779-10.0", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_4446.wav", "onoffCaption": "thump thud at 0.432-2.932, 4.167-6.395", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_4508.wav", "onoffCaption": "door knocking at 0.277-4.027, 5.925-8.174 and dog barking at 1.887-3.887", "frequencyCaption": "door knocking two times and dog barking one times"} +{"filepath": "data/multi_event_train/syn_4635.wav", "onoffCaption": "sneeze at 0.402-2.715, 5.087-7.4", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_4660.wav", "onoffCaption": "thump thud at 0.05-2.278, 3.47-5.565", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_4685.wav", "onoffCaption": "thump thud at 1.015-4.682, 6.109-8.296", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_4859.wav", "onoffCaption": "door knocking at 0.375-2.75, 4.289-6.664 and thump thud at 4.902-7.364", "frequencyCaption": "door knocking two times and thump thud one times"} +{"filepath": "data/multi_event_train/syn_4917.wav", "onoffCaption": "door knocking at 0.45-3.559 and cow mooing at 1.189-4.171, 5.069-7.646", "frequencyCaption": "door knocking one times and cow mooing two times"} +{"filepath": "data/multi_event_train/syn_4942.wav", "onoffCaption": "duck quacking at 2.841-4.841, 6.439-8.439", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_4945.wav", "onoffCaption": "sneeze at 3.167-4.421, 5.355-6.609", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_2001.wav", "onoffCaption": "cat meowing at 3.737-6.767, 7.269-10.0", "frequencyCaption": "cat meowing two times"} +{"filepath": "data/multi_event_train/syn_2054.wav", "onoffCaption": "explosion at 3.827-8.421", "frequencyCaption": "explosion one times"} +{"filepath": "data/multi_event_train/syn_2100.wav", "onoffCaption": "spraying at 2.764-3.368, 4.537-5.141", "frequencyCaption": "spraying two times"} +{"filepath": "data/multi_event_train/syn_2124.wav", "onoffCaption": "gunshot at 3.266-5.436, 7.546-9.546", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_2155.wav", "onoffCaption": "cow mooing at 1.989-4.999, 7.306-10.0", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_2268.wav", "onoffCaption": "whistling at 2.877-10.0", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_2272.wav", "onoffCaption": "door slamming at 2.858-3.997, 4.816-5.621, 6.487-8.405", "frequencyCaption": "door slamming three times"} +{"filepath": "data/multi_event_train/syn_2297.wav", "onoffCaption": "car horn honking at 3.459-5.924, 7.102-9.567", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_2326.wav", "onoffCaption": "train horn at 1.47-5.67 and gunshot at 1.569-3.569, 4.393-6.393, 7.694-9.694 and whistling at 2.957-5.832", "frequencyCaption": "train horn one times and gunshot three times and whistling one times"} +{"filepath": "data/multi_event_train/syn_2369.wav", "onoffCaption": "woman laughing at 1.147-3.739, 4.978-7.57", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_2373.wav", "onoffCaption": "cow mooing at 0.035-4.464 and cat meowing at 7.286-8.426", "frequencyCaption": "cow mooing one times and cat meowing one times"} +{"filepath": "data/multi_event_train/syn_2396.wav", "onoffCaption": "door slamming at 0.228-2.448, 3.938-6.158", "frequencyCaption": "door slamming two times"} +{"filepath": "data/multi_event_train/syn_2402.wav", "onoffCaption": "door knocking at 2.793-5.849", "frequencyCaption": "door knocking one times"} +{"filepath": "data/multi_event_train/syn_2418.wav", "onoffCaption": "dog barking at 0.291-2.291 and explosion at 0.294-5.294, 6.146-8.21", "frequencyCaption": "dog barking one times and explosion two times"} +{"filepath": "data/multi_event_train/syn_2457.wav", "onoffCaption": "dog barking at 3.02-5.02, 6.881-8.881", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_train/syn_2503.wav", "onoffCaption": "sheep goat bleating at 0.386-3.706", "frequencyCaption": "sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_2519.wav", "onoffCaption": "spraying at 3.627-4.228, 5.239-5.971, 6.818-8.976", "frequencyCaption": "spraying three times"} +{"filepath": "data/multi_event_train/syn_2556.wav", "onoffCaption": "train horn at 0.173-5.917", "frequencyCaption": "train horn one times"} +{"filepath": "data/multi_event_train/syn_2572.wav", "onoffCaption": "spraying at 0.005-0.589, 1.147-1.669, 2.456-3.703 and sheep goat bleating at 5.9-8.98", "frequencyCaption": "spraying three times and sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_2624.wav", "onoffCaption": "gunshot at 1.164-3.638", "frequencyCaption": "gunshot one times"} +{"filepath": "data/multi_event_train/syn_2725.wav", "onoffCaption": "cat meowing at 0.226-1.439, 2.668-3.881 and duck quacking at 7.397-9.397", "frequencyCaption": "cat meowing two times and duck quacking one times"} +{"filepath": "data/multi_event_train/syn_2770.wav", "onoffCaption": "train horn at 0.143-5.858, 6.746-9.611", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_2795.wav", "onoffCaption": "woman laughing at 3.182-5.298, 5.997-8.113", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_2807.wav", "onoffCaption": "woman laughing at 2.828-9.562 and train horn at 3.108-7.966", "frequencyCaption": "woman laughing one times and train horn one times"} +{"filepath": "data/multi_event_train/syn_2848.wav", "onoffCaption": "whistling at 2.81-9.727", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_2852.wav", "onoffCaption": "sneeze at 2.011-3.718 and duck quacking at 2.159-4.159", "frequencyCaption": "sneeze one times and duck quacking one times"} +{"filepath": "data/multi_event_train/syn_2906.wav", "onoffCaption": "cow mooing at 2.08-5.049", "frequencyCaption": "cow mooing one times"} +{"filepath": "data/multi_event_train/syn_2949.wav", "onoffCaption": "woman laughing at 3.906-6.261", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/multi_event_train/syn_2953.wav", "onoffCaption": "explosion at 0.864-3.864, 5.072-8.009", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_4042.wav", "onoffCaption": "dog barking at 2.567-5.488, 7.079-10.0", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_train/syn_4058.wav", "onoffCaption": "sneeze at 0.092-2.495, 3.276-4.863 and duck quacking at 0.416-2.416, 4.8-6.8 and gunshot at 3.904-5.904, 6.856-8.856", "frequencyCaption": "sneeze two times and duck quacking two times and gunshot two times"} +{"filepath": "data/multi_event_train/syn_4116.wav", "onoffCaption": "sheep goat bleating at 0.718-2.718, 4.892-6.892 and door knocking at 1.376-5.909, 6.721-8.865 and dog barking at 5.228-7.228", "frequencyCaption": "sheep goat bleating two times and door knocking two times and dog barking one times"} +{"filepath": "data/multi_event_train/syn_4143.wav", "onoffCaption": "whistling at 0.576-8.961 and dog barking at 0.748-6.385 and woman laughing at 2.533-5.138", "frequencyCaption": "whistling one times and dog barking one times and woman laughing one times"} +{"filepath": "data/multi_event_train/syn_4159.wav", "onoffCaption": "thump thud at 0.331-4.249 and duck quacking at 2.631-4.631, 5.476-7.476", "frequencyCaption": "thump thud one times and duck quacking two times"} +{"filepath": "data/multi_event_train/syn_4231.wav", "onoffCaption": "car horn honking at 1.169-3.634 and door slamming at 7.65-9.563", "frequencyCaption": "car horn honking one times and door slamming one times"} +{"filepath": "data/multi_event_train/syn_4264.wav", "onoffCaption": "burping belching at 1.205-4.711, 5.317-8.823 and woman laughing at 2.257-5.311", "frequencyCaption": "burping belching two times and woman laughing one times"} +{"filepath": "data/multi_event_train/syn_4281.wav", "onoffCaption": "woman laughing at 0.493-2.718", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/multi_event_train/syn_4330.wav", "onoffCaption": "gunshot at 1.206-3.206, 4.669-6.669 and sheep goat bleating at 2.87-4.87, 6.162-8.162", "frequencyCaption": "gunshot two times and sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_4365.wav", "onoffCaption": "tapping clicking clanking at 0.181-3.621, 4.89-7.477 and door slamming at 0.518-2.059, 2.809-4.35", "frequencyCaption": "tapping clicking clanking two times and door slamming two times"} +{"filepath": "data/multi_event_train/syn_4380.wav", "onoffCaption": "cow mooing at 1.129-4.427 and tapping clicking clanking at 2.306-5.746, 7.032-9.339", "frequencyCaption": "cow mooing one times and tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_4414.wav", "onoffCaption": "door slamming at 1.825-2.663, 4.903-5.741", "frequencyCaption": "door slamming two times"} +{"filepath": "data/multi_event_train/syn_4515.wav", "onoffCaption": "sneeze at 0.751-2.664, 3.184-5.097", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_4540.wav", "onoffCaption": "train horn at 0.83-6.545, 7.829-10.0", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_4628.wav", "onoffCaption": "duck quacking at 0.235-2.235, 3.981-5.981, 7.014-9.014 and door slamming at 0.25-1.15", "frequencyCaption": "duck quacking three times and door slamming one times"} +{"filepath": "data/multi_event_train/syn_4632.wav", "onoffCaption": "spraying at 0.339-1.071, 2.214-2.798, 3.436-4.5 and sneeze at 3.126-5.211, 6.084-8.169", "frequencyCaption": "spraying three times and sneeze two times"} +{"filepath": "data/multi_event_train/syn_4667.wav", "onoffCaption": "door knocking at 2.552-4.642, 6.518-8.608", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_4682.wav", "onoffCaption": "door slamming at 0.297-1.776, 2.503-3.354, 4.599-5.577", "frequencyCaption": "door slamming three times"} +{"filepath": "data/multi_event_train/syn_4698.wav", "onoffCaption": "cow mooing at 0.428-5.408 and whistling at 0.539-8.924", "frequencyCaption": "cow mooing one times and whistling one times"} +{"filepath": "data/multi_event_train/syn_4729.wav", "onoffCaption": "car horn honking at 1.494-4.42, 5.18-7.375", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_4733.wav", "onoffCaption": "woman laughing at 1.817-4.011 and door knocking at 5.746-8.593", "frequencyCaption": "woman laughing one times and door knocking one times"} +{"filepath": "data/multi_event_train/syn_4766.wav", "onoffCaption": "sheep goat bleating at 0.695-2.695, 3.642-5.642, 6.254-8.254", "frequencyCaption": "sheep goat bleating three times"} +{"filepath": "data/multi_event_train/syn_4783.wav", "onoffCaption": "cow mooing at 2.223-5.233, 7.477-10.0", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_4799.wav", "onoffCaption": "sneeze at 0.383-4.912, 6.313-7.59", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_4811.wav", "onoffCaption": "door knocking at 3.503-5.815, 7.402-9.714", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_4844.wav", "onoffCaption": "car horn honking at 0.575-3.04 and whistling at 1.136-6.311, 7.842-10.0 and door knocking at 5.702-8.47", "frequencyCaption": "car horn honking one times and whistling two times and door knocking one times"} +{"filepath": "data/multi_event_train/syn_4910.wav", "onoffCaption": "dog barking at 3.332-5.332, 6.55-8.55", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_train/syn_2171.wav", "onoffCaption": "gunshot at 3.247-5.247", "frequencyCaption": "gunshot one times"} +{"filepath": "data/multi_event_train/syn_2194.wav", "onoffCaption": "door knocking at 2.709-6.262, 7.986-10.0", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_2219.wav", "onoffCaption": "cow mooing at 0.867-5.296, 6.169-10.0", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_2302.wav", "onoffCaption": "burping belching at 2.824-5.824, 6.756-8.973", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_2318.wav", "onoffCaption": "whistling at 0.004-2.233, 3.194-6.069, 7.167-9.818", "frequencyCaption": "whistling three times"} +{"filepath": "data/multi_event_train/syn_2357.wav", "onoffCaption": "sneeze at 3.056-4.148, 6.17-7.262, 8.722-9.814", "frequencyCaption": "sneeze three times"} +{"filepath": "data/multi_event_train/syn_2469.wav", "onoffCaption": "burping belching at 0.092-3.092, 3.702-5.932, 7.024-9.674", "frequencyCaption": "burping belching three times"} +{"filepath": "data/multi_event_train/syn_2527.wav", "onoffCaption": "spraying at 1.357-3.941, 4.523-7.107", "frequencyCaption": "spraying two times"} +{"filepath": "data/multi_event_train/syn_2597.wav", "onoffCaption": "cat meowing at 3.19-5.13, 6.832-8.772", "frequencyCaption": "cat meowing two times"} +{"filepath": "data/multi_event_train/syn_2701.wav", "onoffCaption": "explosion at 1.956-4.685, 5.357-8.086", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_2754.wav", "onoffCaption": "sneeze at 0.324-1.824, 2.589-3.843 and tapping clicking clanking at 6.289-9.729", "frequencyCaption": "sneeze two times and tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_2823.wav", "onoffCaption": "sheep goat bleating at 0.14-3.14", "frequencyCaption": "sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_2876.wav", "onoffCaption": "duck quacking at 2.428-4.428, 6.174-8.174", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_2893.wav", "onoffCaption": "door slamming at 1.258-3.741", "frequencyCaption": "door slamming one times"} +{"filepath": "data/multi_event_train/syn_2938.wav", "onoffCaption": "duck quacking at 0.341-2.341, 3.83-5.83", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_2988.wav", "onoffCaption": "cow mooing at 1.038-4.02", "frequencyCaption": "cow mooing one times"} +{"filepath": "data/multi_event_train/syn_4029.wav", "onoffCaption": "tapping clicking clanking at 3.41-6.85", "frequencyCaption": "tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_4099.wav", "onoffCaption": "thump thud at 0.883-3.222, 5.031-7.37 and sneeze at 1.897-4.557, 5.208-7.167", "frequencyCaption": "thump thud two times and sneeze two times"} +{"filepath": "data/multi_event_train/syn_4132.wav", "onoffCaption": "whistling at 0.88-10.0", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_4167.wav", "onoffCaption": "burping belching at 0.189-7.357 and explosion at 2.581-7.581 and cat meowing at 7.678-8.689", "frequencyCaption": "burping belching one times and explosion one times and cat meowing one times"} +{"filepath": "data/multi_event_train/syn_4182.wav", "onoffCaption": "gunshot at 1.168-3.168, 5.131-7.131", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_4314.wav", "onoffCaption": "burping belching at 2.924-5.924, 6.913-8.948", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_4341.wav", "onoffCaption": "burping belching at 1.82-4.82", "frequencyCaption": "burping belching one times"} +{"filepath": "data/multi_event_train/syn_4531.wav", "onoffCaption": "whistling at 2.216-9.966", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_4564.wav", "onoffCaption": "burping belching at 1.236-4.236, 5.53-8.53 and sneeze at 5.994-7.288", "frequencyCaption": "burping belching two times and sneeze one times"} +{"filepath": "data/multi_event_train/syn_4581.wav", "onoffCaption": "door knocking at 0.688-4.241 and door slamming at 7.512-8.412", "frequencyCaption": "door knocking one times and door slamming one times"} +{"filepath": "data/multi_event_train/syn_4659.wav", "onoffCaption": "whistling at 0.504-8.08 and explosion at 2.227-6.227, 6.929-9.249 and spraying at 6.965-7.473, 7.993-8.501, 9.439-9.947", "frequencyCaption": "whistling one times and explosion two times and spraying three times"} +{"filepath": "data/multi_event_train/syn_4717.wav", "onoffCaption": "sheep goat bleating at 0.537-2.537, 3.04-5.78 and duck quacking at 1.164-3.164", "frequencyCaption": "sheep goat bleating two times and duck quacking one times"} +{"filepath": "data/multi_event_train/syn_4742.wav", "onoffCaption": "woman laughing at 3.527-6.165 and duck quacking at 5.318-7.318", "frequencyCaption": "woman laughing one times and duck quacking one times"} +{"filepath": "data/multi_event_train/syn_4835.wav", "onoffCaption": "cow mooing at 2.109-5.119 and gunshot at 7.477-9.477", "frequencyCaption": "cow mooing one times and gunshot one times"} +{"filepath": "data/multi_event_train/syn_4860.wav", "onoffCaption": "thump thud at 0.222-2.561, 4.496-7.267", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_4885.wav", "onoffCaption": "train horn at 0.303-3.663 and sheep goat bleating at 0.483-2.483 and cat meowing at 4.115-5.262, 7.386-8.533", "frequencyCaption": "train horn one times and sheep goat bleating one times and cat meowing two times"} +{"filepath": "data/multi_event_train/syn_2.wav", "onoffCaption": "gunshot at 3.979-5.979, 7.863-9.863", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_2070.wav", "onoffCaption": "tapping clicking clanking at 0.312-3.752, 6.233-8.612", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_2095.wav", "onoffCaption": "explosion at 0.659-3.531, 5.683-8.555", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_2203.wav", "onoffCaption": "train horn at 0.751-3.991, 4.924-7.607 and burping belching at 1.081-3.842", "frequencyCaption": "train horn two times and burping belching one times"} +{"filepath": "data/multi_event_train/syn_2256.wav", "onoffCaption": "whistling at 0.039-7.789", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_2426.wav", "onoffCaption": "burping belching at 0.083-3.952, 4.847-8.716", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_2496.wav", "onoffCaption": "burping belching at 0.734-2.841, 4.296-7.291 and duck quacking at 5.046-7.046, 7.787-9.787", "frequencyCaption": "burping belching two times and duck quacking two times"} +{"filepath": "data/multi_event_train/syn_2568.wav", "onoffCaption": "door knocking at 1.848-4.088, 5.701-7.941", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_2600.wav", "onoffCaption": "spraying at 1.873-3.606, 5.628-7.361", "frequencyCaption": "spraying two times"} +{"filepath": "data/multi_event_train/syn_2655.wav", "onoffCaption": "door slamming at 3.365-5.728, 6.77-9.133 and woman laughing at 5.218-7.784", "frequencyCaption": "door slamming two times and woman laughing one times"} +{"filepath": "data/multi_event_train/syn_2839.wav", "onoffCaption": "train horn at 2.721-7.623", "frequencyCaption": "train horn one times"} +{"filepath": "data/multi_event_train/syn_2889.wav", "onoffCaption": "woman laughing at 0.784-3.389, 5.389-7.994", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_2922.wav", "onoffCaption": "sheep goat bleating at 2.514-4.514, 6.364-9.354", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_2977.wav", "onoffCaption": "door knocking at 3.285-6.838, 7.652-9.812", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_2992.wav", "onoffCaption": "sneeze at 2.421-4.66, 5.402-7.641", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_4033.wav", "onoffCaption": "door knocking at 1.626-5.922, 6.87-9.119", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_4066.wav", "onoffCaption": "cat meowing at 0.594-2.73, 3.248-5.384, 7.175-9.311", "frequencyCaption": "cat meowing three times"} +{"filepath": "data/multi_event_train/syn_4083.wav", "onoffCaption": "door slamming at 2.678-3.911 and tapping clicking clanking at 6.349-9.789", "frequencyCaption": "door slamming one times and tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_4128.wav", "onoffCaption": "car horn honking at 0.637-5.149, 6.439-10.0", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_4198.wav", "onoffCaption": "door knocking at 2.713-5.025, 6.532-9.144 and burping belching at 2.951-4.986, 6.772-8.807", "frequencyCaption": "door knocking two times and burping belching two times"} +{"filepath": "data/multi_event_train/syn_4215.wav", "onoffCaption": "explosion at 0.955-3.949 and cat meowing at 2.343-3.759", "frequencyCaption": "explosion one times and cat meowing one times"} +{"filepath": "data/multi_event_train/syn_4240.wav", "onoffCaption": "train horn at 3.919-8.249", "frequencyCaption": "train horn one times"} +{"filepath": "data/multi_event_train/syn_4430.wav", "onoffCaption": "burping belching at 0.108-3.367, 4.557-7.03 and explosion at 3.841-8.841", "frequencyCaption": "burping belching two times and explosion one times"} +{"filepath": "data/multi_event_train/syn_4465.wav", "onoffCaption": "spraying at 1.705-2.213, 4.527-5.102", "frequencyCaption": "spraying two times"} +{"filepath": "data/multi_event_train/syn_4480.wav", "onoffCaption": "door slamming at 0.287-2.052, 4.508-7.482", "frequencyCaption": "door slamming two times"} +{"filepath": "data/multi_event_train/syn_4616.wav", "onoffCaption": "woman laughing at 2.164-4.769 and spraying at 6.976-7.976", "frequencyCaption": "woman laughing one times and spraying one times"} +{"filepath": "data/multi_event_train/syn_4643.wav", "onoffCaption": "burping belching at 0.436-2.559 and explosion at 0.7-5.621 and spraying at 7.99-9.723", "frequencyCaption": "burping belching one times and explosion one times and spraying one times"} +{"filepath": "data/multi_event_train/syn_4758.wav", "onoffCaption": "explosion at 0.898-5.898 and train horn at 2.17-8.639 and duck quacking at 2.778-4.778, 5.802-7.802", "frequencyCaption": "explosion one times and train horn one times and duck quacking two times"} +{"filepath": "data/multi_event_train/syn_4934.wav", "onoffCaption": "duck quacking at 2.328-4.328, 5.244-7.244 and sneeze at 2.934-4.997, 5.606-7.669", "frequencyCaption": "duck quacking two times and sneeze two times"} +{"filepath": "data/multi_event_train/syn_4961.wav", "onoffCaption": "train horn at 0.419-3.219, 5.239-7.706", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_4984.wav", "onoffCaption": "door knocking at 0.082-2.434, 4.087-6.099 and thump thud at 1.577-5.244 and dog barking at 5.732-7.732", "frequencyCaption": "door knocking two times and thump thud one times and dog barking one times"} +{"filepath": "data/multi_event_train/syn_2025.wav", "onoffCaption": "cat meowing at 0.213-2.349, 3.579-5.163", "frequencyCaption": "cat meowing two times"} +{"filepath": "data/multi_event_train/syn_2031.wav", "onoffCaption": "tapping clicking clanking at 0.15-3.59, 4.176-7.053, 7.619-9.914", "frequencyCaption": "tapping clicking clanking three times"} +{"filepath": "data/multi_event_train/syn_2064.wav", "onoffCaption": "gunshot at 1.132-3.132", "frequencyCaption": "gunshot one times"} +{"filepath": "data/multi_event_train/syn_2081.wav", "onoffCaption": "cow mooing at 0.092-3.074, 5.366-8.348", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_2217.wav", "onoffCaption": "door knocking at 0.221-3.983, 4.815-8.577 and whistling at 0.271-8.282 and car horn honking at 1.033-5.94", "frequencyCaption": "door knocking two times and whistling one times and car horn honking one times"} +{"filepath": "data/multi_event_train/syn_2242.wav", "onoffCaption": "burping belching at 3.447-6.727, 7.825-10.0", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_2359.wav", "onoffCaption": "spraying at 0.454-2.149 and whistling at 5.572-10.0", "frequencyCaption": "spraying one times and whistling one times"} +{"filepath": "data/multi_event_train/syn_2432.wav", "onoffCaption": "whistling at 0.178-9.045 and sneeze at 2.702-5.91", "frequencyCaption": "whistling one times and sneeze one times"} +{"filepath": "data/multi_event_train/syn_2467.wav", "onoffCaption": "door slamming at 2.516-4.879, 5.771-8.745", "frequencyCaption": "door slamming two times"} +{"filepath": "data/multi_event_train/syn_2473.wav", "onoffCaption": "explosion at 0.536-3.536 and door knocking at 3.283-5.595, 6.181-8.493", "frequencyCaption": "explosion one times and door knocking two times"} +{"filepath": "data/multi_event_train/syn_2482.wav", "onoffCaption": "thump thud at 2.321-5.988, 6.902-9.402", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_2529.wav", "onoffCaption": "cat meowing at 1.85-6.85, 7.75-10.0 and thump thud at 3.577-6.039", "frequencyCaption": "cat meowing two times and thump thud one times"} +{"filepath": "data/multi_event_train/syn_2599.wav", "onoffCaption": "sneeze at 0.377-1.611, 3.551-4.785", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_2614.wav", "onoffCaption": "train horn at 0.234-2.701, 3.897-6.569", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_2641.wav", "onoffCaption": "woman laughing at 0.106-3.206 and sheep goat bleating at 3.387-5.387, 6.381-8.381 and cat meowing at 5.944-6.971, 8.04-9.067", "frequencyCaption": "woman laughing one times and sheep goat bleating two times and cat meowing two times"} +{"filepath": "data/multi_event_train/syn_2878.wav", "onoffCaption": "sneeze at 2.708-4.621, 6.857-9.318", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_2936.wav", "onoffCaption": "car horn honking at 2.339-6.588", "frequencyCaption": "car horn honking one times"} +{"filepath": "data/multi_event_train/syn_2963.wav", "onoffCaption": "duck quacking at 0.591-2.591, 3.535-5.535", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_2986.wav", "onoffCaption": "thump thud at 0.431-2.931, 3.694-6.194, 6.919-9.854", "frequencyCaption": "thump thud three times"} +{"filepath": "data/multi_event_train/syn_4027.wav", "onoffCaption": "burping belching at 2.781-5.304, 7.447-9.97", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_4072.wav", "onoffCaption": "gunshot at 0.659-2.659, 4.787-6.787", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_4097.wav", "onoffCaption": "duck quacking at 2.279-4.279", "frequencyCaption": "duck quacking one times"} +{"filepath": "data/multi_event_train/syn_4169.wav", "onoffCaption": "thump thud at 1.232-5.682 and sheep goat bleating at 1.31-3.31", "frequencyCaption": "thump thud one times and sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_4201.wav", "onoffCaption": "thump thud at 0.928-4.595 and duck quacking at 1.99-3.99, 4.976-6.976", "frequencyCaption": "thump thud one times and duck quacking two times"} +{"filepath": "data/multi_event_train/syn_4254.wav", "onoffCaption": "burping belching at 0.617-4.117, 5.91-9.41", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_4424.wav", "onoffCaption": "explosion at 4.143-9.064", "frequencyCaption": "explosion one times"} +{"filepath": "data/multi_event_train/syn_4471.wav", "onoffCaption": "door knocking at 0.0-6.06 and gunshot at 2.217-4.258", "frequencyCaption": "door knocking one times and gunshot one times"} +{"filepath": "data/multi_event_train/syn_4494.wav", "onoffCaption": "spraying at 0.609-1.673 and duck quacking at 0.711-2.711, 3.85-5.85", "frequencyCaption": "spraying one times and duck quacking two times"} +{"filepath": "data/multi_event_train/syn_4602.wav", "onoffCaption": "tapping clicking clanking at 2.77-6.21, 7.235-9.279 and door knocking at 3.111-7.621", "frequencyCaption": "tapping clicking clanking two times and door knocking one times"} +{"filepath": "data/multi_event_train/syn_4657.wav", "onoffCaption": "car horn honking at 0.411-4.66, 5.261-7.726", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_4719.wav", "onoffCaption": "sneeze at 3.096-5.041", "frequencyCaption": "sneeze one times"} +{"filepath": "data/multi_event_train/syn_4920.wav", "onoffCaption": "sneeze at 0.108-2.722, 4.24-5.528, 6.997-9.077", "frequencyCaption": "sneeze three times"} +{"filepath": "data/multi_event_train/syn_4975.wav", "onoffCaption": "tapping clicking clanking at 2.428-5.868, 7.423-10.0", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_4990.wav", "onoffCaption": "cow mooing at 0.034-3.016 and train horn at 0.166-5.881", "frequencyCaption": "cow mooing one times and train horn one times"} +{"filepath": "data/multi_event_train/syn_2130.wav", "onoffCaption": "door slamming at 0.343-2.826, 4.639-7.122 and sheep goat bleating at 0.499-2.499, 4.127-6.127", "frequencyCaption": "door slamming two times and sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_2165.wav", "onoffCaption": "door slamming at 1.723-3.264, 4.189-5.73", "frequencyCaption": "door slamming two times"} +{"filepath": "data/multi_event_train/syn_2180.wav", "onoffCaption": "gunshot at 0.152-2.152, 3.02-5.02, 5.995-8.496", "frequencyCaption": "gunshot three times"} +{"filepath": "data/multi_event_train/syn_2258.wav", "onoffCaption": "door knocking at 0.871-5.573", "frequencyCaption": "door knocking one times"} +{"filepath": "data/multi_event_train/syn_2316.wav", "onoffCaption": "dog barking at 0.47-3.79 and car horn honking at 1.013-5.413 and door knocking at 3.196-5.384", "frequencyCaption": "dog barking one times and car horn honking one times and door knocking one times"} +{"filepath": "data/multi_event_train/syn_2343.wav", "onoffCaption": "dog barking at 0.514-2.514 and train horn at 2.989-8.704 and woman laughing at 5.063-7.701", "frequencyCaption": "dog barking one times and train horn one times and woman laughing one times"} +{"filepath": "data/multi_event_train/syn_2382.wav", "onoffCaption": "duck quacking at 1.839-3.839", "frequencyCaption": "duck quacking one times"} +{"filepath": "data/multi_event_train/syn_2428.wav", "onoffCaption": "sneeze at 2.249-4.863, 5.588-6.819, 7.797-10.0", "frequencyCaption": "sneeze three times"} +{"filepath": "data/multi_event_train/syn_2498.wav", "onoffCaption": "sheep goat bleating at 3.531-5.531, 6.48-8.48", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_2533.wav", "onoffCaption": "sheep goat bleating at 3.267-5.267", "frequencyCaption": "sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_2566.wav", "onoffCaption": "spraying at 0.111-0.611, 2.052-2.656, 4.625-7.015 and door knocking at 3.291-5.479, 7.437-9.625", "frequencyCaption": "spraying three times and door knocking two times"} +{"filepath": "data/multi_event_train/syn_2583.wav", "onoffCaption": "woman laughing at 1.036-3.404", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/multi_event_train/syn_2715.wav", "onoffCaption": "sheep goat bleating at 0.66-5.54, 7.267-9.267", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_2740.wav", "onoffCaption": "train horn at 0.131-3.901, 6.116-9.886", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_2837.wav", "onoffCaption": "whistling at 0.681-3.656, 5.848-8.823", "frequencyCaption": "whistling two times"} +{"filepath": "data/multi_event_train/syn_2862.wav", "onoffCaption": "dog barking at 0.087-2.525, 4.272-6.272", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_train/syn_2887.wav", "onoffCaption": "sheep goat bleating at 1.009-3.009, 4.409-6.409 and spraying at 2.342-4.778", "frequencyCaption": "sheep goat bleating two times and spraying one times"} +{"filepath": "data/multi_event_train/syn_2979.wav", "onoffCaption": "dog barking at 1.195-3.195, 4.515-6.515, 7.789-9.789 and woman laughing at 3.19-5.776 and door slamming at 3.266-5.486", "frequencyCaption": "dog barking three times and woman laughing one times and door slamming one times"} +{"filepath": "data/multi_event_train/syn_4068.wav", "onoffCaption": "sneeze at 0.318-4.818, 5.939-7.265, 8.03-9.284", "frequencyCaption": "sneeze three times"} +{"filepath": "data/multi_event_train/syn_4126.wav", "onoffCaption": "whistling at 2.345-10.0", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_4173.wav", "onoffCaption": "sheep goat bleating at 0.475-2.475 and cat meowing at 0.576-1.661 and sneeze at 2.632-3.735", "frequencyCaption": "sheep goat bleating one times and cat meowing one times and sneeze one times"} +{"filepath": "data/multi_event_train/syn_4196.wav", "onoffCaption": "gunshot at 3.737-5.737", "frequencyCaption": "gunshot one times"} +{"filepath": "data/multi_event_train/syn_4300.wav", "onoffCaption": "car horn honking at 1.553-5.14", "frequencyCaption": "car horn honking one times"} +{"filepath": "data/multi_event_train/syn_4355.wav", "onoffCaption": "cow mooing at 0.892-4.19, 5.52-7.717", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_4501.wav", "onoffCaption": "whistling at 0.143-7.798", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_4525.wav", "onoffCaption": "woman laughing at 0.406-2.631, 3.797-6.022", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_4570.wav", "onoffCaption": "door slamming at 0.189-1.129, 3.598-4.538, 5.299-6.239", "frequencyCaption": "door slamming three times"} +{"filepath": "data/multi_event_train/syn_4595.wav", "onoffCaption": "tapping clicking clanking at 0.081-3.521", "frequencyCaption": "tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_4618.wav", "onoffCaption": "explosion at 2.497-4.504 and door knocking at 7.28-10.0", "frequencyCaption": "explosion one times and door knocking one times"} +{"filepath": "data/multi_event_train/syn_4703.wav", "onoffCaption": "woman laughing at 1.525-3.725, 5.878-8.086", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_4756.wav", "onoffCaption": "cow mooing at 4.047-9.027", "frequencyCaption": "cow mooing one times"} +{"filepath": "data/multi_event_train/syn_4821.wav", "onoffCaption": "cat meowing at 2.463-4.423 and burping belching at 7.505-10.0", "frequencyCaption": "cat meowing one times and burping belching one times"} +{"filepath": "data/multi_event_train/syn_4874.wav", "onoffCaption": "burping belching at 1.013-4.013, 4.734-6.857", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_4891.wav", "onoffCaption": "explosion at 0.084-3.924 and sneeze at 3.631-5.314, 7.57-9.529", "frequencyCaption": "explosion one times and sneeze two times"} +{"filepath": "data/multi_event_train/syn_2040.wav", "onoffCaption": "sneeze at 0.191-2.136, 2.684-4.629", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_2114.wav", "onoffCaption": "gunshot at 0.672-2.672, 3.76-5.76 and tapping clicking clanking at 7.833-10.0", "frequencyCaption": "gunshot two times and tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_2141.wav", "onoffCaption": "whistling at 0.057-8.442", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_2229.wav", "onoffCaption": "explosion at 1.961-4.961, 6.699-9.699 and sheep goat bleating at 2.522-4.522", "frequencyCaption": "explosion two times and sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_2299.wav", "onoffCaption": "door slamming at 0.233-1.748, 2.833-4.348", "frequencyCaption": "door slamming two times"} +{"filepath": "data/multi_event_train/syn_2332.wav", "onoffCaption": "train horn at 1.612-4.932", "frequencyCaption": "train horn one times"} +{"filepath": "data/multi_event_train/syn_2367.wav", "onoffCaption": "burping belching at 2.641-5.006 and door slamming at 3.208-5.925 and dog barking at 4.614-6.614", "frequencyCaption": "burping belching one times and door slamming one times and dog barking one times"} +{"filepath": "data/multi_event_train/syn_2416.wav", "onoffCaption": "whistling at 0.288-8.901", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_2459.wav", "onoffCaption": "whistling at 1.055-6.636", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_2517.wav", "onoffCaption": "dog barking at 2.923-4.923, 6.67-8.67", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_train/syn_2542.wav", "onoffCaption": "cow mooing at 0.253-3.263 and cat meowing at 0.397-1.707 and spraying at 1.526-2.393", "frequencyCaption": "cow mooing one times and cat meowing one times and spraying one times"} +{"filepath": "data/multi_event_train/syn_2731.wav", "onoffCaption": "burping belching at 2.229-8.316", "frequencyCaption": "burping belching one times"} +{"filepath": "data/multi_event_train/syn_2764.wav", "onoffCaption": "tapping clicking clanking at 1.128-4.568, 5.185-7.486", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_2781.wav", "onoffCaption": "spraying at 2.741-3.345 and dog barking at 2.749-4.749", "frequencyCaption": "spraying one times and dog barking one times"} +{"filepath": "data/multi_event_train/syn_2809.wav", "onoffCaption": "dog barking at 1.625-3.625, 4.457-6.457", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_train/syn_2813.wav", "onoffCaption": "whistling at 0.073-8.084 and duck quacking at 0.917-2.917, 4.215-6.215 and sheep goat bleating at 2.786-4.786", "frequencyCaption": "whistling one times and duck quacking two times and sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_2846.wav", "onoffCaption": "thump thud at 3.629-6.129", "frequencyCaption": "thump thud one times"} +{"filepath": "data/multi_event_train/syn_2908.wav", "onoffCaption": "sheep goat bleating at 0.053-2.053, 4.129-6.129", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_2912.wav", "onoffCaption": "thump thud at 0.997-3.497, 4.826-7.597", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_4003.wav", "onoffCaption": "burping belching at 2.477-8.564", "frequencyCaption": "burping belching one times"} +{"filepath": "data/multi_event_train/syn_4019.wav", "onoffCaption": "explosion at 1.534-5.534, 6.985-10.0", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_4102.wav", "onoffCaption": "door knocking at 2.936-7.232 and door slamming at 5.741-8.458", "frequencyCaption": "door knocking one times and door slamming one times"} +{"filepath": "data/multi_event_train/syn_4157.wav", "onoffCaption": "tapping clicking clanking at 1.987-5.427, 6.586-8.643 and car horn honking at 3.357-6.576", "frequencyCaption": "tapping clicking clanking two times and car horn honking one times"} +{"filepath": "data/multi_event_train/syn_4270.wav", "onoffCaption": "gunshot at 0.201-2.201, 4.088-6.088", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_4295.wav", "onoffCaption": "dog barking at 2.052-4.052", "frequencyCaption": "dog barking one times"} +{"filepath": "data/multi_event_train/syn_4324.wav", "onoffCaption": "sneeze at 1.859-4.934, 6.814-9.889", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_4371.wav", "onoffCaption": "spraying at 0.137-0.869, 2.847-3.911, 5.493-8.077", "frequencyCaption": "spraying three times"} +{"filepath": "data/multi_event_train/syn_4394.wav", "onoffCaption": "cat meowing at 0.311-1.586, 2.6-3.875 and tapping clicking clanking at 2.157-5.597", "frequencyCaption": "cat meowing two times and tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_4455.wav", "onoffCaption": "duck quacking at 0.261-2.261, 3.583-5.583, 6.193-8.193", "frequencyCaption": "duck quacking three times"} +{"filepath": "data/multi_event_train/syn_4554.wav", "onoffCaption": "car horn honking at 2.582-6.423, 7.928-10.0", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_4626.wav", "onoffCaption": "burping belching at 2.098-5.642, 6.868-9.323", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_4669.wav", "onoffCaption": "thump thud at 2.425-4.887, 5.471-8.074", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_4727.wav", "onoffCaption": "tapping clicking clanking at 2.643-6.083, 7.748-9.849", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_4772.wav", "onoffCaption": "sneeze at 0.269-1.5, 3.185-4.416", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_4797.wav", "onoffCaption": "thump thud at 1.096-5.014", "frequencyCaption": "thump thud one times"} +{"filepath": "data/multi_event_train/syn_4805.wav", "onoffCaption": "cat meowing at 0.861-3.765", "frequencyCaption": "cat meowing one times"} +{"filepath": "data/multi_event_train/syn_4850.wav", "onoffCaption": "whistling at 3.246-10.0", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_2015.wav", "onoffCaption": "tapping clicking clanking at 0.308-3.748, 4.664-8.104 and spraying at 1.951-2.732, 4.676-5.457", "frequencyCaption": "tapping clicking clanking two times and spraying two times"} +{"filepath": "data/multi_event_train/syn_2047.wav", "onoffCaption": "tapping clicking clanking at 3.433-6.873", "frequencyCaption": "tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_2233.wav", "onoffCaption": "train horn at 3.961-6.428", "frequencyCaption": "train horn one times"} +{"filepath": "data/multi_event_train/syn_2234.wav", "onoffCaption": "duck quacking at 0.296-2.296, 2.961-4.961", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_2266.wav", "onoffCaption": "car horn honking at 0.862-4.449, 5.271-8.858", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_2283.wav", "onoffCaption": "explosion at 0.487-2.58, 3.857-6.725", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_2328.wav", "onoffCaption": "whistling at 0.999-6.047 and sheep goat bleating at 1.176-3.176, 3.966-5.966, 6.751-8.751", "frequencyCaption": "whistling one times and sheep goat bleating three times"} +{"filepath": "data/multi_event_train/syn_2398.wav", "onoffCaption": "door knocking at 1.309-4.418, 6.562-9.671 and woman laughing at 1.446-4.146, 4.759-6.771 and door slamming at 2.147-3.662, 5.397-6.397", "frequencyCaption": "door knocking two times and woman laughing two times and door slamming two times"} +{"filepath": "data/multi_event_train/syn_2411.wav", "onoffCaption": "tapping clicking clanking at 0.115-3.555, 5.71-8.203", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_2443.wav", "onoffCaption": "cat meowing at 2.254-4.004, 4.915-6.19 and train horn at 2.628-5.988, 6.935-9.02", "frequencyCaption": "cat meowing two times and train horn two times"} +{"filepath": "data/multi_event_train/syn_2558.wav", "onoffCaption": "whistling at 1.279-3.288", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_2630.wav", "onoffCaption": "whistling at 0.199-9.864 and thump thud at 0.926-4.593", "frequencyCaption": "whistling one times and thump thud one times"} +{"filepath": "data/multi_event_train/syn_2662.wav", "onoffCaption": "car horn honking at 2.538-6.938", "frequencyCaption": "car horn honking one times"} +{"filepath": "data/multi_event_train/syn_2665.wav", "onoffCaption": "door knocking at 0.383-5.085 and door slamming at 7.078-9.561", "frequencyCaption": "door knocking one times and door slamming one times"} +{"filepath": "data/multi_event_train/syn_2680.wav", "onoffCaption": "cow mooing at 0.205-5.185, 7.154-10.0", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_2687.wav", "onoffCaption": "woman laughing at 2.787-6.074, 6.661-8.766", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_2779.wav", "onoffCaption": "explosion at 2.806-5.067, 6.494-8.755", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_2915.wav", "onoffCaption": "sheep goat bleating at 0.649-2.649, 3.203-5.203, 5.869-7.869 and dog barking at 3.465-5.465", "frequencyCaption": "sheep goat bleating three times and dog barking one times"} +{"filepath": "data/multi_event_train/syn_2947.wav", "onoffCaption": "dog barking at 0.971-2.971 and thump thud at 5.698-10.0", "frequencyCaption": "dog barking one times and thump thud one times"} +{"filepath": "data/multi_event_train/syn_4004.wav", "onoffCaption": "tapping clicking clanking at 1.792-5.232, 7.002-10.0", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_4056.wav", "onoffCaption": "duck quacking at 3.429-5.429, 6.15-8.15", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_4118.wav", "onoffCaption": "sheep goat bleating at 3.47-5.47, 6.715-8.715", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_4225.wav", "onoffCaption": "gunshot at 0.572-2.572 and cat meowing at 7.208-8.22", "frequencyCaption": "gunshot one times and cat meowing one times"} +{"filepath": "data/multi_event_train/syn_4277.wav", "onoffCaption": "thump thud at 2.987-6.034", "frequencyCaption": "thump thud one times"} +{"filepath": "data/multi_event_train/syn_4292.wav", "onoffCaption": "explosion at 0.097-2.185, 4.513-6.926", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_4389.wav", "onoffCaption": "burping belching at 0.173-6.117, 6.715-10.0", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_4400.wav", "onoffCaption": "sheep goat bleating at 2.724-7.604 and thump thud at 3.747-6.247", "frequencyCaption": "sheep goat bleating one times and thump thud one times"} +{"filepath": "data/multi_event_train/syn_4452.wav", "onoffCaption": "cat meowing at 3.312-4.856, 6.196-8.904", "frequencyCaption": "cat meowing two times"} +{"filepath": "data/multi_event_train/syn_4549.wav", "onoffCaption": "sneeze at 1.038-4.113, 5.958-7.665 and door slamming at 5.131-8.092", "frequencyCaption": "sneeze two times and door slamming one times"} +{"filepath": "data/multi_event_train/syn_4621.wav", "onoffCaption": "whistling at 0.065-5.24", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_4673.wav", "onoffCaption": "tapping clicking clanking at 0.128-3.568", "frequencyCaption": "tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_4696.wav", "onoffCaption": "dog barking at 3.747-9.384", "frequencyCaption": "dog barking one times"} +{"filepath": "data/multi_event_train/syn_4768.wav", "onoffCaption": "tapping clicking clanking at 1.367-4.807", "frequencyCaption": "tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_4904.wav", "onoffCaption": "train horn at 3.17-5.637, 7.827-9.933", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_4951.wav", "onoffCaption": "sneeze at 2.27-3.434", "frequencyCaption": "sneeze one times"} +{"filepath": "data/multi_event_train/syn_4956.wav", "onoffCaption": "cat meowing at 2.574-4.145, 4.744-6.315, 7.461-9.032", "frequencyCaption": "cat meowing three times"} +{"filepath": "data/multi_event_train/syn_2008.wav", "onoffCaption": "gunshot at 0.098-2.098 and burping belching at 0.614-3.816, 5.977-8.798", "frequencyCaption": "gunshot one times and burping belching two times"} +{"filepath": "data/multi_event_train/syn_2012.wav", "onoffCaption": "thump thud at 0.055-2.394, 3.578-6.349", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_2109.wav", "onoffCaption": "sneeze at 1.144-4.792, 6.589-7.835", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_2113.wav", "onoffCaption": "thump thud at 0.539-3.039, 4.952-7.18", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_2146.wav", "onoffCaption": "car horn honking at 0.552-3.338, 5.746-8.532", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_2261.wav", "onoffCaption": "car horn honking at 0.166-3.341 and dog barking at 2.611-4.611 and sheep goat bleating at 5.761-7.761", "frequencyCaption": "car horn honking one times and dog barking one times and sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_2284.wav", "onoffCaption": "whistling at 1.837-4.812, 5.973-8.948 and sheep goat bleating at 5.456-8.536", "frequencyCaption": "whistling two times and sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_2360.wav", "onoffCaption": "tapping clicking clanking at 2.241-5.681, 7.788-9.791", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_2385.wav", "onoffCaption": "cat meowing at 2.894-3.921", "frequencyCaption": "cat meowing one times"} +{"filepath": "data/multi_event_train/syn_2444.wav", "onoffCaption": "gunshot at 0.397-2.397", "frequencyCaption": "gunshot one times"} +{"filepath": "data/multi_event_train/syn_2510.wav", "onoffCaption": "gunshot at 0.453-2.693, 4.329-6.459", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_2545.wav", "onoffCaption": "woman laughing at 0.559-3.145, 5.591-8.177", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_2637.wav", "onoffCaption": "woman laughing at 0.151-3.438, 5.006-8.293", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_2736.wav", "onoffCaption": "woman laughing at 0.176-3.752, 4.722-8.298 and explosion at 0.858-2.951 and door knocking at 4.024-6.376, 7.281-9.633", "frequencyCaption": "woman laughing two times and explosion one times and door knocking two times"} +{"filepath": "data/multi_event_train/syn_2814.wav", "onoffCaption": "gunshot at 1.732-3.732, 6.222-8.222", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_2841.wav", "onoffCaption": "burping belching at 0.477-3.477 and cat meowing at 2.608-4.358, 6.377-8.127", "frequencyCaption": "burping belching one times and cat meowing two times"} +{"filepath": "data/multi_event_train/syn_2940.wav", "onoffCaption": "door slamming at 0.759-3.476, 5.126-6.45, 7.827-9.13", "frequencyCaption": "door slamming three times"} +{"filepath": "data/multi_event_train/syn_4051.wav", "onoffCaption": "dog barking at 0.092-2.092", "frequencyCaption": "dog barking one times"} +{"filepath": "data/multi_event_train/syn_4150.wav", "onoffCaption": "whistling at 0.595-5.77, 7.744-10.0", "frequencyCaption": "whistling two times"} +{"filepath": "data/multi_event_train/syn_4222.wav", "onoffCaption": "spraying at 0.814-1.755, 3.619-4.369 and cow mooing at 2.19-5.2 and gunshot at 3.053-5.053, 6.329-8.329", "frequencyCaption": "spraying two times and cow mooing one times and gunshot two times"} +{"filepath": "data/multi_event_train/syn_4238.wav", "onoffCaption": "whistling at 0.013-7.763 and thump thud at 0.796-3.296, 4.227-6.566 and sneeze at 4.06-6.005, 7.006-8.951", "frequencyCaption": "whistling one times and thump thud two times and sneeze two times"} +{"filepath": "data/multi_event_train/syn_4288.wav", "onoffCaption": "door knocking at 0.772-3.152 and gunshot at 0.952-2.952, 3.707-5.707, 6.781-8.781", "frequencyCaption": "door knocking one times and gunshot three times"} +{"filepath": "data/multi_event_train/syn_4323.wav", "onoffCaption": "door knocking at 2.129-6.831", "frequencyCaption": "door knocking one times"} +{"filepath": "data/multi_event_train/syn_4339.wav", "onoffCaption": "door knocking at 2.612-4.702", "frequencyCaption": "door knocking one times"} +{"filepath": "data/multi_event_train/syn_4393.wav", "onoffCaption": "explosion at 0.73-3.73, 5.196-8.196", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_4407.wav", "onoffCaption": "thump thud at 0.715-3.215 and spraying at 2.174-3.869, 5.959-6.534, 7.932-8.713", "frequencyCaption": "thump thud one times and spraying three times"} +{"filepath": "data/multi_event_train/syn_4506.wav", "onoffCaption": "duck quacking at 2.427-4.427, 5.446-7.446", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_4674.wav", "onoffCaption": "woman laughing at 0.359-3.459 and gunshot at 0.473-2.473, 4.596-6.596 and cat meowing at 1.726-3.914", "frequencyCaption": "woman laughing one times and gunshot two times and cat meowing one times"} +{"filepath": "data/multi_event_train/syn_4691.wav", "onoffCaption": "dog barking at 3.46-5.86", "frequencyCaption": "dog barking one times"} +{"filepath": "data/multi_event_train/syn_4775.wav", "onoffCaption": "dog barking at 1.351-3.351, 5.849-7.849", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_train/syn_4790.wav", "onoffCaption": "spraying at 2.35-3.434, 3.962-5.046, 5.902-6.986 and car horn honking at 3.116-7.438", "frequencyCaption": "spraying three times and car horn honking one times"} +{"filepath": "data/multi_event_train/syn_4802.wav", "onoffCaption": "cow mooing at 0.621-3.59, 5.64-8.609 and woman laughing at 1.406-6.445, 7.25-9.348", "frequencyCaption": "cow mooing two times and woman laughing two times"} +{"filepath": "data/multi_event_train/syn_4818.wav", "onoffCaption": "burping belching at 1.606-8.774 and gunshot at 5.63-7.63", "frequencyCaption": "burping belching one times and gunshot one times"} +{"filepath": "data/multi_event_train/syn_4903.wav", "onoffCaption": "train horn at 3.661-8.101", "frequencyCaption": "train horn one times"} +{"filepath": "data/multi_event_train/syn_4919.wav", "onoffCaption": "door knocking at 2.36-5.207, 6.01-8.74", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_2079.wav", "onoffCaption": "gunshot at 2.234-4.234", "frequencyCaption": "gunshot one times"} +{"filepath": "data/multi_event_train/syn_2137.wav", "onoffCaption": "cow mooing at 2.569-5.538", "frequencyCaption": "cow mooing one times"} +{"filepath": "data/multi_event_train/syn_2162.wav", "onoffCaption": "thump thud at 0.332-4.707, 5.963-8.463 and car horn honking at 2.236-4.701", "frequencyCaption": "thump thud two times and car horn honking one times"} +{"filepath": "data/multi_event_train/syn_2335.wav", "onoffCaption": "tapping clicking clanking at 0.144-3.584, 5.766-8.185", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_2344.wav", "onoffCaption": "whistling at 3.642-8.126", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_2534.wav", "onoffCaption": "burping belching at 2.018-4.049, 4.623-6.654, 7.545-9.576", "frequencyCaption": "burping belching three times"} +{"filepath": "data/multi_event_train/syn_2561.wav", "onoffCaption": "thump thud at 2.0-5.667, 6.541-8.88", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_2584.wav", "onoffCaption": "explosion at 1.234-5.828, 6.719-10.0 and burping belching at 2.001-4.036", "frequencyCaption": "explosion two times and burping belching one times"} +{"filepath": "data/multi_event_train/syn_2609.wav", "onoffCaption": "door knocking at 0.956-5.106", "frequencyCaption": "door knocking one times"} +{"filepath": "data/multi_event_train/syn_2678.wav", "onoffCaption": "whistling at 2.467-10.0", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_2712.wav", "onoffCaption": "car horn honking at 2.13-6.53, 7.444-9.957", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_2763.wav", "onoffCaption": "sneeze at 2.235-3.466, 4.736-7.053, 7.596-10.0", "frequencyCaption": "sneeze three times"} +{"filepath": "data/multi_event_train/syn_2786.wav", "onoffCaption": "thump thud at 0.526-3.297, 5.681-7.846 and whistling at 3.429-6.404", "frequencyCaption": "thump thud two times and whistling one times"} +{"filepath": "data/multi_event_train/syn_2830.wav", "onoffCaption": "burping belching at 1.964-4.167", "frequencyCaption": "burping belching one times"} +{"filepath": "data/multi_event_train/syn_2865.wav", "onoffCaption": "sheep goat bleating at 0.352-2.352 and cow mooing at 1.577-4.559, 5.826-8.808", "frequencyCaption": "sheep goat bleating one times and cow mooing two times"} +{"filepath": "data/multi_event_train/syn_2880.wav", "onoffCaption": "thump thud at 0.527-3.027 and spraying at 5.201-6.896, 9.043-9.543", "frequencyCaption": "thump thud one times and spraying two times"} +{"filepath": "data/multi_event_train/syn_4105.wav", "onoffCaption": "thump thud at 2.466-6.916", "frequencyCaption": "thump thud one times"} +{"filepath": "data/multi_event_train/syn_4174.wav", "onoffCaption": "train horn at 3.731-5.886 and cat meowing at 6.148-7.719", "frequencyCaption": "train horn one times and cat meowing one times"} +{"filepath": "data/multi_event_train/syn_4191.wav", "onoffCaption": "tapping clicking clanking at 1.128-4.568, 5.787-8.419", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_4249.wav", "onoffCaption": "woman laughing at 3.737-6.809", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/multi_event_train/syn_4307.wav", "onoffCaption": "woman laughing at 2.521-9.255", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/multi_event_train/syn_4352.wav", "onoffCaption": "duck quacking at 2.31-4.31, 5.752-7.752 and door knocking at 6.221-8.684", "frequencyCaption": "duck quacking two times and door knocking one times"} +{"filepath": "data/multi_event_train/syn_4376.wav", "onoffCaption": "door slamming at 3.066-4.39, 4.96-6.284, 7.039-8.363", "frequencyCaption": "door slamming three times"} +{"filepath": "data/multi_event_train/syn_4439.wav", "onoffCaption": "spraying at 1.833-2.7, 3.222-4.089, 5.309-6.176", "frequencyCaption": "spraying three times"} +{"filepath": "data/multi_event_train/syn_4448.wav", "onoffCaption": "train horn at 2.19-8.25", "frequencyCaption": "train horn one times"} +{"filepath": "data/multi_event_train/syn_4522.wav", "onoffCaption": "door knocking at 0.198-2.575, 4.561-6.938 and woman laughing at 1.012-8.024", "frequencyCaption": "door knocking two times and woman laughing one times"} +{"filepath": "data/multi_event_train/syn_4553.wav", "onoffCaption": "cat meowing at 0.582-2.576", "frequencyCaption": "cat meowing one times"} +{"filepath": "data/multi_event_train/syn_4720.wav", "onoffCaption": "sneeze at 0.726-4.374 and spraying at 1.105-2.352, 3.415-4.662, 5.838-7.085", "frequencyCaption": "sneeze one times and spraying three times"} +{"filepath": "data/multi_event_train/syn_4751.wav", "onoffCaption": "cow mooing at 0.27-3.28 and burping belching at 6.449-9.449", "frequencyCaption": "cow mooing one times and burping belching one times"} +{"filepath": "data/multi_event_train/syn_4826.wav", "onoffCaption": "dog barking at 0.773-2.773 and cow mooing at 1.422-6.402, 7.024-10.0 and tapping clicking clanking at 3.237-6.677", "frequencyCaption": "dog barking one times and cow mooing two times and tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_4857.wav", "onoffCaption": "burping belching at 0.561-2.596, 3.115-5.15, 6.224-8.259", "frequencyCaption": "burping belching three times"} +{"filepath": "data/multi_event_train/syn_2036.wav", "onoffCaption": "whistling at 0.608-3.483, 4.251-6.383", "frequencyCaption": "whistling two times"} +{"filepath": "data/multi_event_train/syn_2063.wav", "onoffCaption": "cow mooing at 0.154-3.123 and car horn honking at 5.831-10.0", "frequencyCaption": "cow mooing one times and car horn honking one times"} +{"filepath": "data/multi_event_train/syn_2086.wav", "onoffCaption": "thump thud at 2.044-4.815, 6.182-8.692", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_2178.wav", "onoffCaption": "cow mooing at 2.45-7.43", "frequencyCaption": "cow mooing one times"} +{"filepath": "data/multi_event_train/syn_2187.wav", "onoffCaption": "thump thud at 2.111-4.339, 5.423-8.194", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_2210.wav", "onoffCaption": "dog barking at 0.214-2.214, 3.126-5.126, 6.017-8.017 and burping belching at 1.459-5.003, 6.152-8.659", "frequencyCaption": "dog barking three times and burping belching two times"} +{"filepath": "data/multi_event_train/syn_2245.wav", "onoffCaption": "gunshot at 0.323-2.323", "frequencyCaption": "gunshot one times"} +{"filepath": "data/multi_event_train/syn_2311.wav", "onoffCaption": "duck quacking at 0.64-2.64, 4.525-6.525, 7.803-9.803", "frequencyCaption": "duck quacking three times"} +{"filepath": "data/multi_event_train/syn_2435.wav", "onoffCaption": "gunshot at 2.92-4.92, 6.165-8.165", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_2613.wav", "onoffCaption": "sneeze at 3.626-4.79", "frequencyCaption": "sneeze one times"} +{"filepath": "data/multi_event_train/syn_2646.wav", "onoffCaption": "sneeze at 1.137-2.431, 3.879-6.36", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_2708.wav", "onoffCaption": "woman laughing at 1.941-6.98", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/multi_event_train/syn_2747.wav", "onoffCaption": "train horn at 2.336-6.536, 7.35-9.66", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_2931.wav", "onoffCaption": "woman laughing at 2.301-4.495", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/multi_event_train/syn_4020.wav", "onoffCaption": "dog barking at 2.093-4.093, 6.532-8.532 and gunshot at 5.252-7.252 and sneeze at 5.483-7.19", "frequencyCaption": "dog barking two times and gunshot one times and sneeze one times"} +{"filepath": "data/multi_event_train/syn_4075.wav", "onoffCaption": "cow mooing at 0.295-3.277", "frequencyCaption": "cow mooing one times"} +{"filepath": "data/multi_event_train/syn_4090.wav", "onoffCaption": "sheep goat bleating at 3.411-5.411, 6.681-8.681", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_4121.wav", "onoffCaption": "thump thud at 0.085-4.46, 5.692-8.463 and door slamming at 0.091-2.091", "frequencyCaption": "thump thud two times and door slamming one times"} +{"filepath": "data/multi_event_train/syn_4253.wav", "onoffCaption": "woman laughing at 3.8-7.087", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/multi_event_train/syn_4348.wav", "onoffCaption": "sneeze at 2.276-3.564, 4.675-6.358", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_4423.wav", "onoffCaption": "door slamming at 2.555-3.858 and spraying at 7.213-9.341", "frequencyCaption": "door slamming one times and spraying one times"} +{"filepath": "data/multi_event_train/syn_4476.wav", "onoffCaption": "burping belching at 0.592-4.928, 6.205-8.838", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_4489.wav", "onoffCaption": "duck quacking at 1.562-3.562, 4.618-6.618, 7.146-9.146", "frequencyCaption": "duck quacking three times"} +{"filepath": "data/multi_event_train/syn_4493.wav", "onoffCaption": "burping belching at 1.073-4.332, 5.408-8.408 and cat meowing at 4.695-6.585, 7.258-9.148", "frequencyCaption": "burping belching two times and cat meowing two times"} +{"filepath": "data/multi_event_train/syn_4538.wav", "onoffCaption": "thump thud at 0.955-3.294, 4.777-7.332 and cat meowing at 1.094-2.989, 5.376-7.316", "frequencyCaption": "thump thud two times and cat meowing two times"} +{"filepath": "data/multi_event_train/syn_4577.wav", "onoffCaption": "train horn at 0.185-4.366 and gunshot at 7.36-9.36", "frequencyCaption": "train horn one times and gunshot one times"} +{"filepath": "data/multi_event_train/syn_4588.wav", "onoffCaption": "sneeze at 0.785-3.024, 5.081-7.194, 7.912-9.975", "frequencyCaption": "sneeze three times"} +{"filepath": "data/multi_event_train/syn_4592.wav", "onoffCaption": "duck quacking at 2.429-4.429, 6.607-8.607", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_4605.wav", "onoffCaption": "cat meowing at 0.056-1.056, 2.31-4.677, 5.816-7.352 and door slamming at 1.937-2.437, 3.086-3.586", "frequencyCaption": "cat meowing three times and door slamming two times"} +{"filepath": "data/multi_event_train/syn_4704.wav", "onoffCaption": "spraying at 2.106-2.681, 3.676-6.112 and cat meowing at 2.174-3.44, 4.07-5.336, 6.08-7.346", "frequencyCaption": "spraying two times and cat meowing three times"} +{"filepath": "data/multi_event_train/syn_4869.wav", "onoffCaption": "gunshot at 2.43-4.43 and sneeze at 6.425-10.0", "frequencyCaption": "gunshot one times and sneeze one times"} +{"filepath": "data/multi_event_train/syn_4873.wav", "onoffCaption": "woman laughing at 2.935-6.02, 6.817-9.1", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_4896.wav", "onoffCaption": "cow mooing at 1.844-5.142, 6.268-9.566", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_4927.wav", "onoffCaption": "cow mooing at 2.408-5.418, 7.326-9.414", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_4968.wav", "onoffCaption": "duck quacking at 0.788-2.788, 4.242-6.242 and cow mooing at 2.553-5.522, 7.683-9.783", "frequencyCaption": "duck quacking two times and cow mooing two times"} +{"filepath": "data/multi_event_train/syn_4972.wav", "onoffCaption": "gunshot at 0.697-2.697, 4.279-6.279", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_4997.wav", "onoffCaption": "sneeze at 0.037-1.283 and dog barking at 4.35-6.35, 7.332-9.332", "frequencyCaption": "sneeze one times and dog barking two times"} +{"filepath": "data/multi_event_train/syn_2051.wav", "onoffCaption": "burping belching at 0.475-3.296, 5.659-8.092", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_2222.wav", "onoffCaption": "explosion at 3.458-5.551 and cat meowing at 5.386-6.386, 7.208-8.208", "frequencyCaption": "explosion one times and cat meowing two times"} +{"filepath": "data/multi_event_train/syn_2277.wav", "onoffCaption": "explosion at 1.115-3.208, 4.127-6.189", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_2292.wav", "onoffCaption": "gunshot at 1.525-3.525", "frequencyCaption": "gunshot one times"} +{"filepath": "data/multi_event_train/syn_2339.wav", "onoffCaption": "tapping clicking clanking at 3.039-6.479", "frequencyCaption": "tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_2389.wav", "onoffCaption": "explosion at 1.037-6.037, 6.705-9.214", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_2407.wav", "onoffCaption": "duck quacking at 0.106-2.106, 3.694-5.694", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_2452.wav", "onoffCaption": "gunshot at 2.289-4.289, 5.735-7.735", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_2460.wav", "onoffCaption": "duck quacking at 0.55-2.55 and tapping clicking clanking at 4.483-7.923", "frequencyCaption": "duck quacking one times and tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_2485.wav", "onoffCaption": "tapping clicking clanking at 2.678-6.118", "frequencyCaption": "tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_2549.wav", "onoffCaption": "thump thud at 1.047-4.094, 4.803-7.303", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_2621.wav", "onoffCaption": "car horn honking at 1.926-4.839, 6.023-8.936", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_2674.wav", "onoffCaption": "door knocking at 0.096-3.846, 5.923-9.673", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_2691.wav", "onoffCaption": "door knocking at 0.614-4.91, 5.98-8.229 and cow mooing at 4.553-7.535", "frequencyCaption": "door knocking two times and cow mooing one times"} +{"filepath": "data/multi_event_train/syn_2818.wav", "onoffCaption": "spraying at 2.945-3.945, 5.674-6.674, 7.659-8.659", "frequencyCaption": "spraying three times"} +{"filepath": "data/multi_event_train/syn_2903.wav", "onoffCaption": "cow mooing at 2.138-5.436, 6.232-9.201", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_2964.wav", "onoffCaption": "duck quacking at 3.185-5.185, 6.724-8.724", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_2981.wav", "onoffCaption": "woman laughing at 0.149-2.349, 3.471-6.171", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_4012.wav", "onoffCaption": "sheep goat bleating at 2.964-6.044", "frequencyCaption": "sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_4047.wav", "onoffCaption": "whistling at 1.46-9.065", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_4109.wav", "onoffCaption": "gunshot at 2.838-4.838, 6.567-8.567", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_4206.wav", "onoffCaption": "car horn honking at 1.392-6.299, 7.213-10.0", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_4261.wav", "onoffCaption": "gunshot at 0.303-2.809, 5.036-7.036", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_4284.wav", "onoffCaption": "spraying at 0.458-1.062 and cat meowing at 3.769-5.044, 5.973-7.248, 7.752-9.027", "frequencyCaption": "spraying one times and cat meowing three times"} +{"filepath": "data/multi_event_train/syn_4411.wav", "onoffCaption": "train horn at 0.37-2.507", "frequencyCaption": "train horn one times"} +{"filepath": "data/multi_event_train/syn_4444.wav", "onoffCaption": "duck quacking at 0.591-2.591", "frequencyCaption": "duck quacking one times"} +{"filepath": "data/multi_event_train/syn_4637.wav", "onoffCaption": "cow mooing at 3.246-6.228", "frequencyCaption": "cow mooing one times"} +{"filepath": "data/multi_event_train/syn_4650.wav", "onoffCaption": "dog barking at 3.055-5.055", "frequencyCaption": "dog barking one times"} +{"filepath": "data/multi_event_train/syn_4687.wav", "onoffCaption": "car horn honking at 2.758-4.758, 7.034-9.034", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_4915.wav", "onoffCaption": "gunshot at 0.044-2.044, 2.611-4.611, 5.506-7.506", "frequencyCaption": "gunshot three times"} +{"filepath": "data/multi_event_train/syn_4940.wav", "onoffCaption": "explosion at 0.203-3.203, 5.192-8.192", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_2004.wav", "onoffCaption": "burping belching at 2.243-4.337, 5.194-8.075", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_2105.wav", "onoffCaption": "gunshot at 0.096-2.336, 3.644-5.774, 6.28-8.28 and door slamming at 0.203-0.703", "frequencyCaption": "gunshot three times and door slamming one times"} +{"filepath": "data/multi_event_train/syn_2121.wav", "onoffCaption": "door knocking at 3.089-5.713, 7.505-10.0", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_2150.wav", "onoffCaption": "tapping clicking clanking at 3.558-6.998 and duck quacking at 6.009-8.009", "frequencyCaption": "tapping clicking clanking one times and duck quacking one times"} +{"filepath": "data/multi_event_train/syn_2174.wav", "onoffCaption": "dog barking at 0.653-2.653, 4.141-6.141", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_train/syn_2238.wav", "onoffCaption": "sheep goat bleating at 2.407-6.047, 6.802-10.0", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_2249.wav", "onoffCaption": "spraying at 0.058-0.925", "frequencyCaption": "spraying one times"} +{"filepath": "data/multi_event_train/syn_2288.wav", "onoffCaption": "thump thud at 3.546-5.885 and spraying at 7.512-8.687", "frequencyCaption": "thump thud one times and spraying one times"} +{"filepath": "data/multi_event_train/syn_2323.wav", "onoffCaption": "door slamming at 0.137-2.137, 2.976-4.976, 6.469-8.469", "frequencyCaption": "door slamming three times"} +{"filepath": "data/multi_event_train/syn_2352.wav", "onoffCaption": "whistling at 1.105-10.0", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_2376.wav", "onoffCaption": "woman laughing at 2.081-5.368, 6.66-9.298 and gunshot at 3.24-5.24, 5.749-7.749", "frequencyCaption": "woman laughing two times and gunshot two times"} +{"filepath": "data/multi_event_train/syn_2393.wav", "onoffCaption": "sneeze at 0.13-4.63, 5.693-10.0", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_2439.wav", "onoffCaption": "thump thud at 1.519-5.186 and woman laughing at 3.888-5.97, 7.447-9.529", "frequencyCaption": "thump thud one times and woman laughing two times"} +{"filepath": "data/multi_event_train/syn_2448.wav", "onoffCaption": "whistling at 2.912-8.412 and tapping clicking clanking at 5.008-8.448 and gunshot at 7.107-9.107", "frequencyCaption": "whistling one times and tapping clicking clanking one times and gunshot one times"} +{"filepath": "data/multi_event_train/syn_2489.wav", "onoffCaption": "tapping clicking clanking at 1.137-4.577, 6.143-8.158", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_2506.wav", "onoffCaption": "tapping clicking clanking at 0.235-3.675, 5.904-9.344", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_2522.wav", "onoffCaption": "thump thud at 0.022-2.793 and car horn honking at 1.65-6.557", "frequencyCaption": "thump thud one times and car horn honking one times"} +{"filepath": "data/multi_event_train/syn_2553.wav", "onoffCaption": "spraying at 0.053-0.628, 1.756-2.82, 3.538-4.795 and train horn at 0.586-5.026, 7.084-9.849", "frequencyCaption": "spraying three times and train horn two times"} +{"filepath": "data/multi_event_train/syn_2577.wav", "onoffCaption": "whistling at 0.231-5.731, 6.91-8.919", "frequencyCaption": "whistling two times"} +{"filepath": "data/multi_event_train/syn_2592.wav", "onoffCaption": "explosion at 2.034-4.562, 5.841-8.369", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_2704.wav", "onoffCaption": "spraying at 1.223-1.792, 2.643-3.393 and dog barking at 4.984-6.984", "frequencyCaption": "spraying two times and dog barking one times"} +{"filepath": "data/multi_event_train/syn_2720.wav", "onoffCaption": "whistling at 2.643-4.872, 5.859-8.467", "frequencyCaption": "whistling two times"} +{"filepath": "data/multi_event_train/syn_2775.wav", "onoffCaption": "sheep goat bleating at 2.154-4.154, 6.08-8.08", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_2790.wav", "onoffCaption": "sneeze at 2.831-4.894, 6.753-8.816", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_2802.wav", "onoffCaption": "whistling at 3.011-10.0", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_2857.wav", "onoffCaption": "sneeze at 3.411-6.025, 6.998-9.45", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_2873.wav", "onoffCaption": "cow mooing at 0.596-3.578, 6.07-9.052", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_2896.wav", "onoffCaption": "explosion at 3.203-8.203", "frequencyCaption": "explosion one times"} +{"filepath": "data/multi_event_train/syn_2919.wav", "onoffCaption": "thump thud at 0.206-2.977, 3.705-6.476", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_2956.wav", "onoffCaption": "woman laughing at 1.045-3.637, 4.725-7.363", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_2968.wav", "onoffCaption": "sneeze at 0.034-1.36 and tapping clicking clanking at 5.197-8.637", "frequencyCaption": "sneeze one times and tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_4008.wav", "onoffCaption": "woman laughing at 1.326-3.52, 5.953-8.147", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_4079.wav", "onoffCaption": "spraying at 0.063-1.549 and car horn honking at 3.496-6.422", "frequencyCaption": "spraying one times and car horn honking one times"} +{"filepath": "data/multi_event_train/syn_4113.wav", "onoffCaption": "car horn honking at 0.329-3.983, 4.793-7.611 and cow mooing at 5.071-8.04", "frequencyCaption": "car horn honking two times and cow mooing one times"} +{"filepath": "data/multi_event_train/syn_4146.wav", "onoffCaption": "sheep goat bleating at 3.618-5.618, 6.355-8.355", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_4162.wav", "onoffCaption": "tapping clicking clanking at 1.893-5.333, 7.249-10.0", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_4187.wav", "onoffCaption": "cow mooing at 0.198-3.496, 5.511-7.81", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_4234.wav", "onoffCaption": "door knocking at 1.232-3.695 and duck quacking at 7.38-9.38", "frequencyCaption": "door knocking one times and duck quacking one times"} +{"filepath": "data/multi_event_train/syn_4311.wav", "onoffCaption": "gunshot at 0.178-2.178, 3.114-5.114, 5.762-8.002 and explosion at 2.45-7.45", "frequencyCaption": "gunshot three times and explosion one times"} +{"filepath": "data/multi_event_train/syn_4335.wav", "onoffCaption": "sheep goat bleating at 0.499-2.499, 4.063-6.063, 7.651-9.651", "frequencyCaption": "sheep goat bleating three times"} +{"filepath": "data/multi_event_train/syn_4360.wav", "onoffCaption": "train horn at 2.318-6.648", "frequencyCaption": "train horn one times"} +{"filepath": "data/multi_event_train/syn_4385.wav", "onoffCaption": "burping belching at 0.088-5.088, 5.724-7.818", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_4510.wav", "onoffCaption": "sneeze at 0.848-2.807 and car horn honking at 7.218-10.0", "frequencyCaption": "sneeze one times and car horn honking one times"} +{"filepath": "data/multi_event_train/syn_4534.wav", "onoffCaption": "duck quacking at 0.48-2.48, 3.415-5.415, 7.851-9.851", "frequencyCaption": "duck quacking three times"} +{"filepath": "data/multi_event_train/syn_4545.wav", "onoffCaption": "train horn at 1.683-4.883, 6.583-9.433", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_4662.wav", "onoffCaption": "thump thud at 2.995-5.334, 5.838-8.209 and door knocking at 4.48-8.033", "frequencyCaption": "thump thud two times and door knocking one times"} +{"filepath": "data/multi_event_train/syn_4678.wav", "onoffCaption": "cow mooing at 1.341-4.351, 5.937-8.017", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_4736.wav", "onoffCaption": "door knocking at 2.538-7.24 and sheep goat bleating at 5.123-7.123", "frequencyCaption": "door knocking one times and sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_4747.wav", "onoffCaption": "train horn at 2.593-4.748, 7.117-9.772", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_4763.wav", "onoffCaption": "door knocking at 0.231-3.981", "frequencyCaption": "door knocking one times"} +{"filepath": "data/multi_event_train/syn_4779.wav", "onoffCaption": "cow mooing at 3.146-6.156 and door knocking at 3.765-6.265, 7.273-9.33", "frequencyCaption": "cow mooing one times and door knocking two times"} +{"filepath": "data/multi_event_train/syn_4786.wav", "onoffCaption": "train horn at 1.798-6.875 and dog barking at 5.284-7.284", "frequencyCaption": "train horn one times and dog barking one times"} +{"filepath": "data/multi_event_train/syn_4814.wav", "onoffCaption": "woman laughing at 2.443-5.238, 7.538-10.0", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_4830.wav", "onoffCaption": "spraying at 1.24-2.107, 4.396-5.263, 6.077-6.944", "frequencyCaption": "spraying three times"} +{"filepath": "data/multi_event_train/syn_4841.wav", "onoffCaption": "explosion at 0.673-5.673", "frequencyCaption": "explosion one times"} +{"filepath": "data/multi_event_train/syn_7.wav", "onoffCaption": "sheep goat bleating at 1.687-3.687, 4.963-6.963", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_2075.wav", "onoffCaption": "sneeze at 2.897-7.397", "frequencyCaption": "sneeze one times"} +{"filepath": "data/multi_event_train/syn_2090.wav", "onoffCaption": "cat meowing at 3.18-4.393, 6.703-7.916", "frequencyCaption": "cat meowing two times"} +{"filepath": "data/multi_event_train/syn_2191.wav", "onoffCaption": "door knocking at 1.462-3.839 and thump thud at 5.248-9.623", "frequencyCaption": "door knocking one times and thump thud one times"} +{"filepath": "data/multi_event_train/syn_2206.wav", "onoffCaption": "sneeze at 2.72-4.008, 4.819-6.502, 7.747-9.073", "frequencyCaption": "sneeze three times"} +{"filepath": "data/multi_event_train/syn_2253.wav", "onoffCaption": "sneeze at 0.069-1.161, 1.844-4.595, 5.283-7.491", "frequencyCaption": "sneeze three times"} +{"filepath": "data/multi_event_train/syn_2307.wav", "onoffCaption": "train horn at 0.621-3.061, 5.358-8.033", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_2348.wav", "onoffCaption": "door slamming at 2.104-3.869, 5.812-8.786 and dog barking at 2.325-4.325", "frequencyCaption": "door slamming two times and dog barking one times"} +{"filepath": "data/multi_event_train/syn_2423.wav", "onoffCaption": "gunshot at 2.171-4.672, 7.152-9.152", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_2538.wav", "onoffCaption": "door knocking at 2.954-6.329", "frequencyCaption": "door knocking one times"} +{"filepath": "data/multi_event_train/syn_2605.wav", "onoffCaption": "sheep goat bleating at 2.344-4.344, 6.559-8.559", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_2650.wav", "onoffCaption": "burping belching at 1.323-4.829, 5.63-7.737", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_2751.wav", "onoffCaption": "cow mooing at 0.385-3.354, 5.307-7.518", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_2826.wav", "onoffCaption": "thump thud at 2.171-6.546, 7.936-10.0", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_2927.wav", "onoffCaption": "sheep goat bleating at 2.616-4.616, 5.942-7.942", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_4036.wav", "onoffCaption": "door slamming at 1.098-3.294 and explosion at 2.32-5.32, 6.136-9.136 and sneeze at 8.145-9.309", "frequencyCaption": "door slamming one times and explosion two times and sneeze one times"} +{"filepath": "data/multi_event_train/syn_4086.wav", "onoffCaption": "thump thud at 0.396-2.735, 3.807-6.146, 7.197-9.536", "frequencyCaption": "thump thud three times"} +{"filepath": "data/multi_event_train/syn_4137.wav", "onoffCaption": "explosion at 0.362-3.362, 5.024-8.024 and tapping clicking clanking at 3.969-7.409", "frequencyCaption": "explosion two times and tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_4245.wav", "onoffCaption": "car horn honking at 0.888-4.729, 6.844-9.24", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_4344.wav", "onoffCaption": "gunshot at 2.648-4.648, 5.271-7.312", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_4460.wav", "onoffCaption": "gunshot at 2.04-4.04, 4.742-6.742", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_4485.wav", "onoffCaption": "cat meowing at 0.363-2.303, 3.082-5.022", "frequencyCaption": "cat meowing two times"} +{"filepath": "data/multi_event_train/syn_4561.wav", "onoffCaption": "cat meowing at 0.256-5.256", "frequencyCaption": "cat meowing one times"} +{"filepath": "data/multi_event_train/syn_4584.wav", "onoffCaption": "thump thud at 0.001-4.451, 5.57-10.0 and sneeze at 0.165-1.665", "frequencyCaption": "thump thud two times and sneeze one times"} +{"filepath": "data/multi_event_train/syn_4609.wav", "onoffCaption": "duck quacking at 0.204-2.204 and sneeze at 4.938-7.341", "frequencyCaption": "duck quacking one times and sneeze one times"} +{"filepath": "data/multi_event_train/syn_4613.wav", "onoffCaption": "cat meowing at 0.077-1.104, 1.625-2.652 and woman laughing at 5.845-8.94", "frequencyCaption": "cat meowing two times and woman laughing one times"} +{"filepath": "data/multi_event_train/syn_4708.wav", "onoffCaption": "burping belching at 2.626-4.751, 5.854-7.979", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_4712.wav", "onoffCaption": "sneeze at 0.454-2.066, 4.387-5.915 and sheep goat bleating at 4.63-6.63", "frequencyCaption": "sneeze two times and sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_4865.wav", "onoffCaption": "thump thud at 2.49-4.718, 6.791-9.291", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_4880.wav", "onoffCaption": "sheep goat bleating at 0.069-2.069 and spraying at 1.372-2.122", "frequencyCaption": "sheep goat bleating one times and spraying one times"} +{"filepath": "data/multi_event_train/syn_4931.wav", "onoffCaption": "door slamming at 2.992-4.516, 5.7-7.224", "frequencyCaption": "door slamming two times"} +{"filepath": "data/multi_event_train/syn_4964.wav", "onoffCaption": "door knocking at 0.461-4.9, 5.822-10.0", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_4981.wav", "onoffCaption": "train horn at 0.592-8.792", "frequencyCaption": "train horn one times"} +{"filepath": "data/multi_event_train/syn_2020.wav", "onoffCaption": "burping belching at 0.805-4.805, 7.041-9.152", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_2072.wav", "onoffCaption": "gunshot at 0.343-2.343, 4.093-6.093 and whistling at 0.77-6.27", "frequencyCaption": "gunshot two times and whistling one times"} +{"filepath": "data/multi_event_train/syn_2097.wav", "onoffCaption": "cat meowing at 0.499-2.07 and woman laughing at 4.3-6.382", "frequencyCaption": "cat meowing one times and woman laughing one times"} +{"filepath": "data/multi_event_train/syn_2169.wav", "onoffCaption": "thump thud at 1.556-4.327 and gunshot at 3.176-5.176", "frequencyCaption": "thump thud one times and gunshot one times"} +{"filepath": "data/multi_event_train/syn_2201.wav", "onoffCaption": "door slamming at 3.56-4.365, 6.282-7.087, 8.662-9.467", "frequencyCaption": "door slamming three times"} +{"filepath": "data/multi_event_train/syn_2424.wav", "onoffCaption": "cow mooing at 1.181-4.191 and explosion at 1.485-3.573 and gunshot at 2.08-4.08", "frequencyCaption": "cow mooing one times and explosion one times and gunshot one times"} +{"filepath": "data/multi_event_train/syn_2471.wav", "onoffCaption": "tapping clicking clanking at 0.354-3.794 and car horn honking at 0.545-5.452", "frequencyCaption": "tapping clicking clanking one times and car horn honking one times"} +{"filepath": "data/multi_event_train/syn_2476.wav", "onoffCaption": "woman laughing at 2.307-4.59", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/multi_event_train/syn_2493.wav", "onoffCaption": "gunshot at 2.205-4.205 and thump thud at 6.122-9.789", "frequencyCaption": "gunshot one times and thump thud one times"} +{"filepath": "data/multi_event_train/syn_2588.wav", "onoffCaption": "sheep goat bleating at 0.077-4.957, 5.948-10.0", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_2657.wav", "onoffCaption": "door slamming at 0.527-2.747, 3.423-4.947, 6.797-8.71", "frequencyCaption": "door slamming three times"} +{"filepath": "data/multi_event_train/syn_2869.wav", "onoffCaption": "burping belching at 0.032-3.472, 4.014-6.414 and explosion at 0.35-3.35, 4.571-7.435 and whistling at 3.319-8.494", "frequencyCaption": "burping belching two times and explosion two times and whistling one times"} +{"filepath": "data/multi_event_train/syn_2920.wav", "onoffCaption": "burping belching at 1.183-5.183, 6.898-10.0", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_2972.wav", "onoffCaption": "thump thud at 0.306-2.806, 3.519-6.279, 6.88-9.06", "frequencyCaption": "thump thud three times"} +{"filepath": "data/multi_event_train/syn_2997.wav", "onoffCaption": "burping belching at 1.791-5.791, 7.164-10.0", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_4031.wav", "onoffCaption": "sheep goat bleating at 0.181-2.181 and door knocking at 0.401-3.457", "frequencyCaption": "sheep goat bleating one times and door knocking one times"} +{"filepath": "data/multi_event_train/syn_4063.wav", "onoffCaption": "gunshot at 0.06-2.101, 3.276-5.317, 6.463-8.504", "frequencyCaption": "gunshot three times"} +{"filepath": "data/multi_event_train/syn_4178.wav", "onoffCaption": "gunshot at 1.149-3.149, 3.925-5.925, 6.522-8.522 and tapping clicking clanking at 5.417-8.857", "frequencyCaption": "gunshot three times and tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_4210.wav", "onoffCaption": "cow mooing at 2.521-5.49", "frequencyCaption": "cow mooing one times"} +{"filepath": "data/multi_event_train/syn_4242.wav", "onoffCaption": "duck quacking at 2.613-4.613", "frequencyCaption": "duck quacking one times"} +{"filepath": "data/multi_event_train/syn_4359.wav", "onoffCaption": "door slamming at 2.052-3.169, 3.806-4.923, 5.493-6.61 and woman laughing at 3.52-6.106", "frequencyCaption": "door slamming three times and woman laughing one times"} +{"filepath": "data/multi_event_train/syn_4435.wav", "onoffCaption": "spraying at 4.083-4.934", "frequencyCaption": "spraying one times"} +{"filepath": "data/multi_event_train/syn_4467.wav", "onoffCaption": "sheep goat bleating at 1.74-3.74 and thump thud at 6.828-9.328", "frequencyCaption": "sheep goat bleating one times and thump thud one times"} +{"filepath": "data/multi_event_train/syn_4482.wav", "onoffCaption": "explosion at 2.5-7.5 and duck quacking at 4.221-6.221", "frequencyCaption": "explosion one times and duck quacking one times"} +{"filepath": "data/multi_event_train/syn_4599.wav", "onoffCaption": "sneeze at 0.066-1.16 and door knocking at 5.548-9.101", "frequencyCaption": "sneeze one times and door knocking one times"} +{"filepath": "data/multi_event_train/syn_4614.wav", "onoffCaption": "duck quacking at 2.031-4.031 and train horn at 2.112-4.267, 5.729-8.564", "frequencyCaption": "duck quacking one times and train horn two times"} +{"filepath": "data/multi_event_train/syn_4646.wav", "onoffCaption": "sneeze at 0.795-4.443 and sheep goat bleating at 6.423-8.423", "frequencyCaption": "sneeze one times and sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_4878.wav", "onoffCaption": "dog barking at 0.779-2.779", "frequencyCaption": "dog barking one times"} +{"filepath": "data/multi_event_train/syn_4963.wav", "onoffCaption": "door knocking at 1.64-4.14, 5.135-7.775", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_4986.wav", "onoffCaption": "explosion at 0.519-2.612, 3.807-5.9, 7.053-9.146", "frequencyCaption": "explosion three times"} +{"filepath": "data/multi_event_train/syn_2027.wav", "onoffCaption": "cat meowing at 2.271-5.512, 7.359-8.499", "frequencyCaption": "cat meowing two times"} +{"filepath": "data/multi_event_train/syn_2126.wav", "onoffCaption": "spraying at 1.5-3.628, 4.53-6.658, 7.824-9.952", "frequencyCaption": "spraying three times"} +{"filepath": "data/multi_event_train/syn_2254.wav", "onoffCaption": "spraying at 3.935-4.51, 5.563-6.344, 7.392-9.784", "frequencyCaption": "spraying three times"} +{"filepath": "data/multi_event_train/syn_2300.wav", "onoffCaption": "sneeze at 3.324-5.8, 6.929-9.405", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_2355.wav", "onoffCaption": "duck quacking at 0.3-2.3, 3.841-5.841, 6.667-8.667", "frequencyCaption": "duck quacking three times"} +{"filepath": "data/multi_event_train/syn_2494.wav", "onoffCaption": "car horn honking at 1.413-3.926, 4.97-7.349 and tapping clicking clanking at 2.467-5.907, 7.887-10.0", "frequencyCaption": "car horn honking two times and tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_2570.wav", "onoffCaption": "door slamming at 0.573-1.876, 2.84-4.143", "frequencyCaption": "door slamming two times"} +{"filepath": "data/multi_event_train/syn_2595.wav", "onoffCaption": "woman laughing at 0.777-3.145, 5.247-7.615 and sheep goat bleating at 1.517-3.517", "frequencyCaption": "woman laughing two times and sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_2602.wav", "onoffCaption": "burping belching at 3.235-6.235, 7.539-10.0", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_2618.wav", "onoffCaption": "thump thud at 1.002-3.773", "frequencyCaption": "thump thud one times"} +{"filepath": "data/multi_event_train/syn_2703.wav", "onoffCaption": "cow mooing at 0.447-5.427, 7.486-9.883", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_2719.wav", "onoffCaption": "burping belching at 0.966-4.246, 5.414-7.779 and cat meowing at 3.195-4.812", "frequencyCaption": "burping belching two times and cat meowing one times"} +{"filepath": "data/multi_event_train/syn_2756.wav", "onoffCaption": "thump thud at 1.042-3.27, 4.464-6.692", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_2874.wav", "onoffCaption": "cow mooing at 2.648-5.63, 6.95-9.932", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_2891.wav", "onoffCaption": "explosion at 0.143-3.143", "frequencyCaption": "explosion one times"} +{"filepath": "data/multi_event_train/syn_2975.wav", "onoffCaption": "burping belching at 2.339-5.619, 6.718-9.701", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_2990.wav", "onoffCaption": "burping belching at 0.11-2.141, 2.953-4.984 and woman laughing at 7.255-9.538", "frequencyCaption": "burping belching two times and woman laughing one times"} +{"filepath": "data/multi_event_train/syn_4064.wav", "onoffCaption": "whistling at 1.653-6.137, 7.449-10.0", "frequencyCaption": "whistling two times"} +{"filepath": "data/multi_event_train/syn_4081.wav", "onoffCaption": "door knocking at 0.283-4.722 and woman laughing at 1.01-3.71, 4.329-6.947", "frequencyCaption": "door knocking one times and woman laughing two times"} +{"filepath": "data/multi_event_train/syn_4165.wav", "onoffCaption": "tapping clicking clanking at 1.022-4.462, 5.978-9.418 and gunshot at 1.274-3.274, 5.12-7.12", "frequencyCaption": "tapping clicking clanking two times and gunshot two times"} +{"filepath": "data/multi_event_train/syn_4180.wav", "onoffCaption": "cow mooing at 1.464-6.444", "frequencyCaption": "cow mooing one times"} +{"filepath": "data/multi_event_train/syn_4217.wav", "onoffCaption": "dog barking at 1.46-3.46, 4.833-6.833 and cat meowing at 1.471-3.411, 5.693-7.237", "frequencyCaption": "dog barking two times and cat meowing two times"} +{"filepath": "data/multi_event_train/syn_4316.wav", "onoffCaption": "car horn honking at 0.347-3.165 and cow mooing at 5.694-10.0", "frequencyCaption": "car horn honking one times and cow mooing one times"} +{"filepath": "data/multi_event_train/syn_4428.wav", "onoffCaption": "door knocking at 1.117-3.338, 4.015-6.392, 7.322-9.807", "frequencyCaption": "door knocking three times"} +{"filepath": "data/multi_event_train/syn_4432.wav", "onoffCaption": "door knocking at 0.13-2.977 and train horn at 1.697-4.577, 5.557-8.437", "frequencyCaption": "door knocking one times and train horn two times"} +{"filepath": "data/multi_event_train/syn_4529.wav", "onoffCaption": "sheep goat bleating at 1.449-3.449, 4.761-6.761 and door knocking at 4.19-6.69, 7.357-9.857", "frequencyCaption": "sheep goat bleating two times and door knocking two times"} +{"filepath": "data/multi_event_train/syn_4533.wav", "onoffCaption": "door slamming at 0.536-1.555, 2.537-3.537, 5.811-6.616", "frequencyCaption": "door slamming three times"} +{"filepath": "data/multi_event_train/syn_4641.wav", "onoffCaption": "explosion at 0.631-5.631 and sheep goat bleating at 2.075-5.995", "frequencyCaption": "explosion one times and sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_4740.wav", "onoffCaption": "tapping clicking clanking at 0.306-3.746, 5.611-8.049", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_4837.wav", "onoffCaption": "thump thud at 1.079-5.454", "frequencyCaption": "thump thud one times"} +{"filepath": "data/multi_event_train/syn_4936.wav", "onoffCaption": "spraying at 0.214-2.798, 3.672-6.256, 7.074-9.658", "frequencyCaption": "spraying three times"} +{"filepath": "data/multi_event_train/syn_2019.wav", "onoffCaption": "dog barking at 0.089-2.089, 2.72-4.72, 5.569-7.569 and train horn at 4.343-7.877 and door slamming at 5.704-7.924", "frequencyCaption": "dog barking three times and train horn one times and door slamming one times"} +{"filepath": "data/multi_event_train/syn_2068.wav", "onoffCaption": "duck quacking at 0.047-2.047, 2.698-4.698, 5.474-7.474 and cat meowing at 2.237-3.377 and dog barking at 6.219-8.219", "frequencyCaption": "duck quacking three times and cat meowing one times and dog barking one times"} +{"filepath": "data/multi_event_train/syn_2102.wav", "onoffCaption": "spraying at 1.298-3.06, 3.89-5.065, 6.352-8.788", "frequencyCaption": "spraying three times"} +{"filepath": "data/multi_event_train/syn_2173.wav", "onoffCaption": "car horn honking at 0.126-3.78 and cat meowing at 0.557-4.917, 6.481-7.628", "frequencyCaption": "car horn honking one times and cat meowing two times"} +{"filepath": "data/multi_event_train/syn_2196.wav", "onoffCaption": "sneeze at 1.355-4.43, 5.374-8.449", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_2371.wav", "onoffCaption": "cat meowing at 1.912-3.178, 4.339-5.605, 6.882-8.148", "frequencyCaption": "cat meowing three times"} +{"filepath": "data/multi_event_train/syn_2394.wav", "onoffCaption": "tapping clicking clanking at 2.112-5.552, 6.278-9.718", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_2525.wav", "onoffCaption": "train horn at 0.234-3.768, 4.385-7.919 and sneeze at 5.081-8.729", "frequencyCaption": "train horn two times and sneeze one times"} +{"filepath": "data/multi_event_train/syn_2554.wav", "onoffCaption": "thump thud at 2.155-4.655", "frequencyCaption": "thump thud one times"} +{"filepath": "data/multi_event_train/syn_2669.wav", "onoffCaption": "door knocking at 0.303-3.856 and train horn at 5.632-9.7", "frequencyCaption": "door knocking one times and train horn one times"} +{"filepath": "data/multi_event_train/syn_2727.wav", "onoffCaption": "train horn at 0.066-4.066, 4.98-8.98 and cat meowing at 1.727-2.727", "frequencyCaption": "train horn two times and cat meowing one times"} +{"filepath": "data/multi_event_train/syn_2772.wav", "onoffCaption": "burping belching at 1.808-4.808, 5.553-8.183 and car horn honking at 3.667-8.067", "frequencyCaption": "burping belching two times and car horn honking one times"} +{"filepath": "data/multi_event_train/syn_2821.wav", "onoffCaption": "door knocking at 0.226-3.073, 4.879-7.422", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_2850.wav", "onoffCaption": "dog barking at 0.215-2.215, 3.162-5.162, 6.429-8.429", "frequencyCaption": "dog barking three times"} +{"filepath": "data/multi_event_train/syn_4130.wav", "onoffCaption": "dog barking at 0.005-2.005 and explosion at 3.331-6.387, 7.394-10.0", "frequencyCaption": "dog barking one times and explosion two times"} +{"filepath": "data/multi_event_train/syn_4141.wav", "onoffCaption": "burping belching at 1.866-6.202, 7.316-10.0", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_4229.wav", "onoffCaption": "cow mooing at 2.388-5.398, 7.519-9.548", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_4258.wav", "onoffCaption": "explosion at 2.276-5.276, 6.95-9.95", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_4332.wav", "onoffCaption": "cat meowing at 1.898-3.482, 5.203-6.935", "frequencyCaption": "cat meowing two times"} +{"filepath": "data/multi_event_train/syn_4343.wav", "onoffCaption": "sheep goat bleating at 0.153-2.153, 2.766-4.766", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_4498.wav", "onoffCaption": "door slamming at 1.793-2.966, 4.605-6.129, 8.222-9.162", "frequencyCaption": "door slamming three times"} +{"filepath": "data/multi_event_train/syn_4517.wav", "onoffCaption": "cat meowing at 0.438-1.578, 3.851-6.376 and door slamming at 2.388-4.388, 4.942-6.942", "frequencyCaption": "cat meowing two times and door slamming two times"} +{"filepath": "data/multi_event_train/syn_4566.wav", "onoffCaption": "whistling at 2.552-7.727 and car horn honking at 3.898-8.41", "frequencyCaption": "whistling one times and car horn honking one times"} +{"filepath": "data/multi_event_train/syn_4583.wav", "onoffCaption": "spraying at 3.279-5.671, 6.189-7.056, 9.134-9.866", "frequencyCaption": "spraying three times"} +{"filepath": "data/multi_event_train/syn_4715.wav", "onoffCaption": "duck quacking at 1.168-3.168, 3.913-5.913, 7.163-9.163 and dog barking at 1.171-3.171", "frequencyCaption": "duck quacking three times and dog barking one times"} +{"filepath": "data/multi_event_train/syn_4764.wav", "onoffCaption": "woman laughing at 1.085-3.368 and door slamming at 5.864-6.702, 7.848-8.987", "frequencyCaption": "woman laughing one times and door slamming two times"} +{"filepath": "data/multi_event_train/syn_4781.wav", "onoffCaption": "woman laughing at 0.104-2.709, 3.749-5.831, 6.999-9.199", "frequencyCaption": "woman laughing three times"} +{"filepath": "data/multi_event_train/syn_4813.wav", "onoffCaption": "burping belching at 0.089-3.089, 4.311-7.192", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_4862.wav", "onoffCaption": "cow mooing at 1.786-4.768, 5.558-8.527", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_4887.wav", "onoffCaption": "woman laughing at 0.005-3.292", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/multi_event_train/syn_4908.wav", "onoffCaption": "gunshot at 0.567-2.608, 3.838-5.879, 6.969-9.01", "frequencyCaption": "gunshot three times"} +{"filepath": "data/multi_event_train/syn_4979.wav", "onoffCaption": "cow mooing at 0.323-4.752, 5.985-10.0", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_2003.wav", "onoffCaption": "door slamming at 2.017-4.475 and dog barking at 6.941-8.941", "frequencyCaption": "door slamming one times and dog barking one times"} +{"filepath": "data/multi_event_train/syn_2056.wav", "onoffCaption": "whistling at 2.868-4.877 and burping belching at 2.97-6.149, 7.093-10.0", "frequencyCaption": "whistling one times and burping belching two times"} +{"filepath": "data/multi_event_train/syn_2118.wav", "onoffCaption": "door slamming at 1.955-3.104, 4.415-5.564, 6.925-8.074 and car horn honking at 4.279-8.679", "frequencyCaption": "door slamming three times and car horn honking one times"} +{"filepath": "data/multi_event_train/syn_2157.wav", "onoffCaption": "door slamming at 3.384-4.617, 5.78-7.013, 7.976-9.209", "frequencyCaption": "door slamming three times"} +{"filepath": "data/multi_event_train/syn_2225.wav", "onoffCaption": "duck quacking at 2.673-4.673, 6.37-8.37 and spraying at 3.012-5.596", "frequencyCaption": "duck quacking two times and spraying one times"} +{"filepath": "data/multi_event_train/syn_2324.wav", "onoffCaption": "cow mooing at 2.87-6.168, 7.884-10.0", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_2400.wav", "onoffCaption": "cat meowing at 0.016-1.229", "frequencyCaption": "cat meowing one times"} +{"filepath": "data/multi_event_train/syn_2455.wav", "onoffCaption": "cat meowing at 1.766-2.778, 3.833-4.973, 5.712-7.444", "frequencyCaption": "cat meowing three times"} +{"filepath": "data/multi_event_train/syn_2501.wav", "onoffCaption": "gunshot at 0.124-2.124, 2.96-4.96 and door slamming at 0.289-1.438", "frequencyCaption": "gunshot two times and door slamming one times"} +{"filepath": "data/multi_event_train/syn_2673.wav", "onoffCaption": "woman laughing at 0.241-2.466", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/multi_event_train/syn_2696.wav", "onoffCaption": "cow mooing at 2.987-6.285", "frequencyCaption": "cow mooing one times"} +{"filepath": "data/multi_event_train/syn_2768.wav", "onoffCaption": "sneeze at 0.124-4.653, 5.641-6.935", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_2797.wav", "onoffCaption": "sheep goat bleating at 0.044-2.044, 3.147-5.147, 6.598-8.598", "frequencyCaption": "sheep goat bleating three times"} +{"filepath": "data/multi_event_train/syn_2805.wav", "onoffCaption": "sneeze at 0.029-1.123, 3.204-5.521", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_2904.wav", "onoffCaption": "whistling at 0.037-9.702 and dog barking at 0.082-2.082 and burping belching at 6.199-9.199", "frequencyCaption": "whistling one times and dog barking one times and burping belching one times"} +{"filepath": "data/multi_event_train/syn_2951.wav", "onoffCaption": "whistling at 2.043-10.0", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_4015.wav", "onoffCaption": "car horn honking at 2.146-6.468, 7.709-10.0", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_4114.wav", "onoffCaption": "sneeze at 0.7-3.103, 5.122-7.525", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_4233.wav", "onoffCaption": "whistling at 1.754-4.729 and woman laughing at 2.721-5.076", "frequencyCaption": "whistling one times and woman laughing one times"} +{"filepath": "data/multi_event_train/syn_4266.wav", "onoffCaption": "burping belching at 1.694-5.563, 6.453-10.0", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_4283.wav", "onoffCaption": "cow mooing at 0.564-3.862", "frequencyCaption": "cow mooing one times"} +{"filepath": "data/multi_event_train/syn_4299.wav", "onoffCaption": "car horn honking at 0.783-5.69, 7.139-10.0", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_4367.wav", "onoffCaption": "train horn at 0.133-3.373, 5.09-7.557 and dog barking at 2.429-4.429, 6.478-8.878", "frequencyCaption": "train horn two times and dog barking two times"} +{"filepath": "data/multi_event_train/syn_4382.wav", "onoffCaption": "gunshot at 0.052-2.553, 3.667-5.667, 6.354-8.354", "frequencyCaption": "gunshot three times"} +{"filepath": "data/multi_event_train/syn_4398.wav", "onoffCaption": "cat meowing at 0.031-1.18 and gunshot at 0.572-2.742, 5.068-7.238", "frequencyCaption": "cat meowing one times and gunshot two times"} +{"filepath": "data/multi_event_train/syn_4443.wav", "onoffCaption": "whistling at 0.006-7.756", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_4459.wav", "onoffCaption": "cat meowing at 0.808-2.363, 3.023-4.035 and tapping clicking clanking at 6.989-10.0", "frequencyCaption": "cat meowing two times and tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_4542.wav", "onoffCaption": "explosion at 0.903-4.03, 6.338-9.465", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_4558.wav", "onoffCaption": "whistling at 1.603-9.258", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_4630.wav", "onoffCaption": "cow mooing at 3.094-6.076, 7.671-10.0", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_4665.wav", "onoffCaption": "burping belching at 4.194-7.453", "frequencyCaption": "burping belching one times"} +{"filepath": "data/multi_event_train/syn_4680.wav", "onoffCaption": "cow mooing at 0.292-3.302, 4.366-7.335 and duck quacking at 6.865-8.865", "frequencyCaption": "cow mooing two times and duck quacking one times"} +{"filepath": "data/multi_event_train/syn_4731.wav", "onoffCaption": "burping belching at 0.174-3.174, 4.471-7.471 and woman laughing at 1.095-3.733", "frequencyCaption": "burping belching two times and woman laughing one times"} +{"filepath": "data/multi_event_train/syn_4846.wav", "onoffCaption": "sheep goat bleating at 3.216-5.216, 7.445-9.445", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_4947.wav", "onoffCaption": "cow mooing at 0.181-3.15, 4.35-7.332 and gunshot at 2.445-4.946, 5.779-7.779 and whistling at 4.017-6.892, 7.993-10.0", "frequencyCaption": "cow mooing two times and gunshot two times and whistling two times"} +{"filepath": "data/multi_event_train/syn_2042.wav", "onoffCaption": "sneeze at 2.179-4.175 and thump thud at 2.763-5.81, 7.173-9.414", "frequencyCaption": "sneeze one times and thump thud two times"} +{"filepath": "data/multi_event_train/syn_2159.wav", "onoffCaption": "door knocking at 2.29-5.137, 6.138-8.906", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_2231.wav", "onoffCaption": "cow mooing at 3.414-6.396, 7.031-9.087", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_2264.wav", "onoffCaption": "car horn honking at 3.121-5.468", "frequencyCaption": "car horn honking one times"} +{"filepath": "data/multi_event_train/syn_2270.wav", "onoffCaption": "train horn at 1.497-5.678, 6.589-10.0", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_2281.wav", "onoffCaption": "spraying at 0.544-2.306, 3.813-5.575 and train horn at 2.976-6.976", "frequencyCaption": "spraying two times and train horn one times"} +{"filepath": "data/multi_event_train/syn_2295.wav", "onoffCaption": "spraying at 3.423-4.05, 5.308-5.912, 7.048-7.632", "frequencyCaption": "spraying three times"} +{"filepath": "data/multi_event_train/syn_2414.wav", "onoffCaption": "sheep goat bleating at 1.213-3.213, 5.402-7.402", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_2626.wav", "onoffCaption": "cow mooing at 0.604-3.902, 5.015-7.984", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_2632.wav", "onoffCaption": "duck quacking at 2.26-4.26, 6.028-8.028", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_2667.wav", "onoffCaption": "sheep goat bleating at 0.259-2.259, 3.658-5.658 and cat meowing at 2.779-4.511", "frequencyCaption": "sheep goat bleating two times and cat meowing one times"} +{"filepath": "data/multi_event_train/syn_2682.wav", "onoffCaption": "cat meowing at 3.69-5.106, 7.025-8.965", "frequencyCaption": "cat meowing two times"} +{"filepath": "data/multi_event_train/syn_2729.wav", "onoffCaption": "woman laughing at 0.702-3.774, 4.528-7.6 and burping belching at 3.235-5.329", "frequencyCaption": "woman laughing two times and burping belching one times"} +{"filepath": "data/multi_event_train/syn_2799.wav", "onoffCaption": "sheep goat bleating at 0.527-2.527", "frequencyCaption": "sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_2910.wav", "onoffCaption": "sneeze at 1.626-3.309, 3.833-5.516, 6.281-7.964", "frequencyCaption": "sneeze three times"} +{"filepath": "data/multi_event_train/syn_2945.wav", "onoffCaption": "thump thud at 0.075-2.846, 3.421-6.192 and burping belching at 7.915-10.0", "frequencyCaption": "thump thud two times and burping belching one times"} +{"filepath": "data/multi_event_train/syn_4001.wav", "onoffCaption": "train horn at 0.358-6.073, 7.942-10.0", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_4040.wav", "onoffCaption": "door slamming at 1.741-4.522", "frequencyCaption": "door slamming one times"} +{"filepath": "data/multi_event_train/syn_4054.wav", "onoffCaption": "explosion at 0.122-5.122", "frequencyCaption": "explosion one times"} +{"filepath": "data/multi_event_train/syn_4227.wav", "onoffCaption": "tapping clicking clanking at 0.449-3.889, 4.56-6.915, 7.848-9.866", "frequencyCaption": "tapping clicking clanking three times"} +{"filepath": "data/multi_event_train/syn_4272.wav", "onoffCaption": "sheep goat bleating at 0.103-2.103 and thump thud at 5.105-9.48", "frequencyCaption": "sheep goat bleating one times and thump thud one times"} +{"filepath": "data/multi_event_train/syn_4297.wav", "onoffCaption": "door knocking at 1.869-3.891, 4.557-6.579, 7.312-9.334", "frequencyCaption": "door knocking three times"} +{"filepath": "data/multi_event_train/syn_4328.wav", "onoffCaption": "door slamming at 0.071-2.5, 3.7-5.618, 6.407-8.32", "frequencyCaption": "door slamming three times"} +{"filepath": "data/multi_event_train/syn_4369.wav", "onoffCaption": "thump thud at 4.339-6.839", "frequencyCaption": "thump thud one times"} +{"filepath": "data/multi_event_train/syn_4402.wav", "onoffCaption": "train horn at 3.277-6.597", "frequencyCaption": "train horn one times"} +{"filepath": "data/multi_event_train/syn_4416.wav", "onoffCaption": "sneeze at 0.054-1.641, 2.766-4.353, 5.195-6.782", "frequencyCaption": "sneeze three times"} +{"filepath": "data/multi_event_train/syn_4457.wav", "onoffCaption": "tapping clicking clanking at 1.373-4.813, 5.85-8.035", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_4519.wav", "onoffCaption": "burping belching at 2.319-8.999 and door slamming at 3.957-5.957, 7.089-10.0", "frequencyCaption": "burping belching one times and door slamming two times"} +{"filepath": "data/multi_event_train/syn_4624.wav", "onoffCaption": "train horn at 0.444-4.884, 5.959-10.0", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_4671.wav", "onoffCaption": "door knocking at 3.288-6.125", "frequencyCaption": "door knocking one times"} +{"filepath": "data/multi_event_train/syn_4694.wav", "onoffCaption": "thump thud at 2.224-4.724, 6.344-8.844", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_4809.wav", "onoffCaption": "door slamming at 0.796-3.513, 5.446-6.424", "frequencyCaption": "door slamming two times"} +{"filepath": "data/multi_event_train/syn_4848.wav", "onoffCaption": "spraying at 1.562-2.643, 3.178-4.259, 5.809-6.89", "frequencyCaption": "spraying three times"} +{"filepath": "data/multi_event_train/syn_4906.wav", "onoffCaption": "thump thud at 1.126-3.588 and cow mooing at 2.735-6.033", "frequencyCaption": "thump thud one times and cow mooing one times"} +{"filepath": "data/multi_event_train/syn_4912.wav", "onoffCaption": "thump thud at 1.033-5.408 and explosion at 6.642-9.514", "frequencyCaption": "thump thud one times and explosion one times"} +{"filepath": "data/multi_event_train/syn_4953.wav", "onoffCaption": "sneeze at 2.441-3.767 and dog barking at 2.683-4.683, 6.445-8.445", "frequencyCaption": "sneeze one times and dog barking two times"} +{"filepath": "data/multi_event_train/syn_2017.wav", "onoffCaption": "duck quacking at 2.591-4.591, 6.473-8.473", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_2029.wav", "onoffCaption": "thump thud at 0.12-4.57 and dog barking at 0.898-2.898, 4.555-6.555", "frequencyCaption": "thump thud one times and dog barking two times"} +{"filepath": "data/multi_event_train/syn_2058.wav", "onoffCaption": "door slamming at 0.54-1.713, 2.823-3.996, 5.038-6.211", "frequencyCaption": "door slamming three times"} +{"filepath": "data/multi_event_train/syn_2099.wav", "onoffCaption": "spraying at 1.117-1.625", "frequencyCaption": "spraying one times"} +{"filepath": "data/multi_event_train/syn_2116.wav", "onoffCaption": "sneeze at 3.4-4.634, 5.74-6.974", "frequencyCaption": "sneeze two times"} +{"filepath": "data/multi_event_train/syn_2132.wav", "onoffCaption": "tapping clicking clanking at 2.156-5.596", "frequencyCaption": "tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_2143.wav", "onoffCaption": "door slamming at 0.016-1.319, 2.87-4.173, 5.945-7.248 and spraying at 4.635-5.135, 7.255-7.882", "frequencyCaption": "door slamming three times and spraying two times"} +{"filepath": "data/multi_event_train/syn_2167.wav", "onoffCaption": "tapping clicking clanking at 0.555-3.995, 4.982-7.959 and car horn honking at 2.45-4.915 and dog barking at 3.612-5.612", "frequencyCaption": "tapping clicking clanking two times and car horn honking one times and dog barking one times"} +{"filepath": "data/multi_event_train/syn_2182.wav", "onoffCaption": "tapping clicking clanking at 2.81-6.25", "frequencyCaption": "tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_2330.wav", "onoffCaption": "explosion at 0.7-3.572 and thump thud at 5.734-9.652", "frequencyCaption": "explosion one times and thump thud one times"} +{"filepath": "data/multi_event_train/syn_2341.wav", "onoffCaption": "duck quacking at 0.59-2.59 and burping belching at 4.988-7.611", "frequencyCaption": "duck quacking one times and burping belching one times"} +{"filepath": "data/multi_event_train/syn_2365.wav", "onoffCaption": "tapping clicking clanking at 1.395-4.835 and door knocking at 3.354-5.731", "frequencyCaption": "tapping clicking clanking one times and door knocking one times"} +{"filepath": "data/multi_event_train/syn_2380.wav", "onoffCaption": "explosion at 3.921-6.915", "frequencyCaption": "explosion one times"} +{"filepath": "data/multi_event_train/syn_2441.wav", "onoffCaption": "sneeze at 0.431-1.523, 2.574-3.666 and gunshot at 0.854-2.854, 4.67-6.67", "frequencyCaption": "sneeze two times and gunshot two times"} +{"filepath": "data/multi_event_train/syn_2515.wav", "onoffCaption": "dog barking at 3.882-5.882, 6.574-8.574", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_train/syn_2540.wav", "onoffCaption": "sheep goat bleating at 0.606-2.606, 5.052-7.052 and door slamming at 5.485-6.624, 8.528-9.781", "frequencyCaption": "sheep goat bleating two times and door slamming two times"} +{"filepath": "data/multi_event_train/syn_2564.wav", "onoffCaption": "burping belching at 1.28-3.387, 4.428-6.535, 7.767-9.874", "frequencyCaption": "burping belching three times"} +{"filepath": "data/multi_event_train/syn_2581.wav", "onoffCaption": "cat meowing at 0.248-1.388, 3.452-4.727, 6.932-8.926", "frequencyCaption": "cat meowing three times"} +{"filepath": "data/multi_event_train/syn_2628.wav", "onoffCaption": "thump thud at 0.371-4.038", "frequencyCaption": "thump thud one times"} +{"filepath": "data/multi_event_train/syn_2698.wav", "onoffCaption": "explosion at 1.193-3.911, 5.087-7.427", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_2717.wav", "onoffCaption": "door slamming at 0.151-2.151", "frequencyCaption": "door slamming one times"} +{"filepath": "data/multi_event_train/syn_2733.wav", "onoffCaption": "sheep goat bleating at 2.91-4.91", "frequencyCaption": "sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_2766.wav", "onoffCaption": "woman laughing at 1.206-3.288, 3.961-6.868 and car horn honking at 2.938-6.113", "frequencyCaption": "woman laughing two times and car horn honking one times"} +{"filepath": "data/multi_event_train/syn_2783.wav", "onoffCaption": "door knocking at 2.496-4.736, 6.515-8.755", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_2811.wav", "onoffCaption": "whistling at 3.028-8.203", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_2835.wav", "onoffCaption": "whistling at 0.039-9.704 and spraying at 0.105-1.362, 3.426-4.33", "frequencyCaption": "whistling one times and spraying two times"} +{"filepath": "data/multi_event_train/syn_2844.wav", "onoffCaption": "thump thud at 3.178-6.225, 7.297-10.0", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_2860.wav", "onoffCaption": "whistling at 0.213-3.088, 3.926-6.389 and door knocking at 3.404-7.554", "frequencyCaption": "whistling two times and door knocking one times"} +{"filepath": "data/multi_event_train/syn_2885.wav", "onoffCaption": "duck quacking at 0.313-2.313, 4.547-6.547", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_4100.wav", "onoffCaption": "tapping clicking clanking at 3.081-6.521", "frequencyCaption": "tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_4155.wav", "onoffCaption": "door slamming at 0.152-1.052, 1.604-2.584, 3.551-4.668 and woman laughing at 7.554-10.0", "frequencyCaption": "door slamming three times and woman laughing one times"} +{"filepath": "data/multi_event_train/syn_4171.wav", "onoffCaption": "car horn honking at 0.875-3.661", "frequencyCaption": "car horn honking one times"} +{"filepath": "data/multi_event_train/syn_4194.wav", "onoffCaption": "duck quacking at 0.045-2.045, 4.189-6.189 and woman laughing at 0.11-7.122", "frequencyCaption": "duck quacking two times and woman laughing one times"} +{"filepath": "data/multi_event_train/syn_4219.wav", "onoffCaption": "whistling at 3.867-10.0", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_4268.wav", "onoffCaption": "train horn at 0.779-3.246, 4.852-6.993, 7.924-10.0 and car horn honking at 4.157-7.376", "frequencyCaption": "train horn three times and car horn honking one times"} +{"filepath": "data/multi_event_train/syn_4302.wav", "onoffCaption": "whistling at 1.87-9.525", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_4326.wav", "onoffCaption": "sheep goat bleating at 0.284-4.204, 5.202-9.122 and tapping clicking clanking at 2.879-6.319", "frequencyCaption": "sheep goat bleating two times and tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_4357.wav", "onoffCaption": "car horn honking at 0.415-2.762 and spraying at 1.964-2.591, 3.444-3.944, 5.15-6.407 and duck quacking at 2.399-4.399", "frequencyCaption": "car horn honking one times and spraying three times and duck quacking one times"} +{"filepath": "data/multi_event_train/syn_4373.wav", "onoffCaption": "woman laughing at 2.689-9.423", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/multi_event_train/syn_4396.wav", "onoffCaption": "woman laughing at 2.564-5.202, 6.278-8.561", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_4418.wav", "onoffCaption": "sneeze at 0.199-1.363, 2.216-3.38, 3.88-5.044", "frequencyCaption": "sneeze three times"} +{"filepath": "data/multi_event_train/syn_4503.wav", "onoffCaption": "cat meowing at 2.088-3.115 and whistling at 2.403-5.278, 6.471-9.346", "frequencyCaption": "cat meowing one times and whistling two times"} +{"filepath": "data/multi_event_train/syn_4527.wav", "onoffCaption": "whistling at 1.299-10.0", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_4556.wav", "onoffCaption": "sneeze at 0.693-2.193, 3.282-5.395, 6.632-9.449", "frequencyCaption": "sneeze three times"} +{"filepath": "data/multi_event_train/syn_4701.wav", "onoffCaption": "cat meowing at 1.878-3.153", "frequencyCaption": "cat meowing one times"} +{"filepath": "data/multi_event_train/syn_4725.wav", "onoffCaption": "door slamming at 0.723-2.026 and whistling at 1.016-8.671 and thump thud at 4.221-8.671", "frequencyCaption": "door slamming one times and whistling one times and thump thud one times"} +{"filepath": "data/multi_event_train/syn_4754.wav", "onoffCaption": "car horn honking at 0.071-2.536, 4.312-6.777", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_4770.wav", "onoffCaption": "cow mooing at 0.271-5.251, 6.816-9.785", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_4795.wav", "onoffCaption": "sheep goat bleating at 1.536-3.536 and train horn at 7.424-10.0", "frequencyCaption": "sheep goat bleating one times and train horn one times"} +{"filepath": "data/multi_event_train/syn_4807.wav", "onoffCaption": "whistling at 0.378-7.496 and duck quacking at 0.978-2.978", "frequencyCaption": "whistling one times and duck quacking one times"} +{"filepath": "data/multi_event_train/syn_4823.wav", "onoffCaption": "door knocking at 0.464-2.776, 3.809-6.112, 6.749-8.876", "frequencyCaption": "door knocking three times"} +{"filepath": "data/multi_event_train/syn_4852.wav", "onoffCaption": "cat meowing at 0.345-2.235 and door knocking at 5.757-7.978", "frequencyCaption": "cat meowing one times and door knocking one times"} +{"filepath": "data/multi_event_train/syn_4938.wav", "onoffCaption": "woman laughing at 3.72-6.774", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/multi_event_train/syn_4949.wav", "onoffCaption": "cat meowing at 2.909-3.994 and dog barking at 6.602-8.602", "frequencyCaption": "cat meowing one times and dog barking one times"} +{"filepath": "data/multi_event_train/syn_2033.wav", "onoffCaption": "cow mooing at 3.185-7.614", "frequencyCaption": "cow mooing one times"} +{"filepath": "data/multi_event_train/syn_2061.wav", "onoffCaption": "door slamming at 1.789-4.668 and door knocking at 2.337-4.586, 6.731-9.083 and dog barking at 3.416-5.416", "frequencyCaption": "door slamming one times and door knocking two times and dog barking one times"} +{"filepath": "data/multi_event_train/syn_2066.wav", "onoffCaption": "woman laughing at 0.892-3.247 and spraying at 8.202-9.283", "frequencyCaption": "woman laughing one times and spraying one times"} +{"filepath": "data/multi_event_train/syn_2083.wav", "onoffCaption": "spraying at 0.188-1.092", "frequencyCaption": "spraying one times"} +{"filepath": "data/multi_event_train/syn_2084.wav", "onoffCaption": "dog barking at 0.685-2.685 and tapping clicking clanking at 6.937-10.0", "frequencyCaption": "dog barking one times and tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_2128.wav", "onoffCaption": "tapping clicking clanking at 1.229-4.669 and spraying at 6.867-7.471, 8.495-9.099", "frequencyCaption": "tapping clicking clanking one times and spraying two times"} +{"filepath": "data/multi_event_train/syn_2198.wav", "onoffCaption": "dog barking at 0.7-2.7, 4.34-7.261 and gunshot at 2.054-4.073, 4.78-6.78, 7.354-9.354", "frequencyCaption": "dog barking two times and gunshot three times"} +{"filepath": "data/multi_event_train/syn_2215.wav", "onoffCaption": "whistling at 3.911-10.0", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_2240.wav", "onoffCaption": "cat meowing at 0.114-3.144, 4.645-6.585 and sheep goat bleating at 0.204-2.204", "frequencyCaption": "cat meowing two times and sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_2309.wav", "onoffCaption": "cow mooing at 1.54-5.969, 6.587-10.0", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_2314.wav", "onoffCaption": "whistling at 0.043-2.052, 3.186-5.195, 6.106-8.115 and tapping clicking clanking at 1.55-4.99, 5.612-9.052", "frequencyCaption": "whistling three times and tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_2430.wav", "onoffCaption": "spraying at 3.021-3.872, 4.802-5.706 and cow mooing at 7.834-10.0", "frequencyCaption": "spraying two times and cow mooing one times"} +{"filepath": "data/multi_event_train/syn_2437.wav", "onoffCaption": "gunshot at 2.885-4.885 and door slamming at 7.153-8.093", "frequencyCaption": "gunshot one times and door slamming one times"} +{"filepath": "data/multi_event_train/syn_2465.wav", "onoffCaption": "cow mooing at 3.343-6.353", "frequencyCaption": "cow mooing one times"} +{"filepath": "data/multi_event_train/syn_2480.wav", "onoffCaption": "spraying at 1.693-2.544, 3.905-4.756", "frequencyCaption": "spraying two times"} +{"filepath": "data/multi_event_train/syn_2531.wav", "onoffCaption": "thump thud at 0.081-4.531, 6.844-10.0", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_2616.wav", "onoffCaption": "whistling at 0.165-8.792 and door slamming at 0.317-1.168, 2.4-3.251, 4.02-4.871", "frequencyCaption": "whistling one times and door slamming three times"} +{"filepath": "data/multi_event_train/syn_2643.wav", "onoffCaption": "gunshot at 0.789-2.789, 3.662-5.662 and train horn at 2.218-6.399, 7.69-10.0", "frequencyCaption": "gunshot two times and train horn two times"} +{"filepath": "data/multi_event_train/syn_2644.wav", "onoffCaption": "cat meowing at 0.168-1.195, 1.774-3.751, 4.349-6.416", "frequencyCaption": "cat meowing three times"} +{"filepath": "data/multi_event_train/syn_2659.wav", "onoffCaption": "whistling at 0.1-5.275", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_2742.wav", "onoffCaption": "train horn at 2.253-5.653, 7.221-10.0", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_2758.wav", "onoffCaption": "car horn honking at 0.059-3.234, 5.075-8.25", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_2828.wav", "onoffCaption": "whistling at 0.57-8.32", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_2933.wav", "onoffCaption": "explosion at 0.276-3.148, 3.917-6.789", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_2934.wav", "onoffCaption": "burping belching at 0.338-7.315", "frequencyCaption": "burping belching one times"} +{"filepath": "data/multi_event_train/syn_2961.wav", "onoffCaption": "spraying at 4.013-6.473, 8.516-9.266", "frequencyCaption": "spraying two times"} +{"filepath": "data/multi_event_train/syn_2984.wav", "onoffCaption": "sheep goat bleating at 0.618-2.618 and gunshot at 5.151-7.151, 7.731-9.731", "frequencyCaption": "sheep goat bleating one times and gunshot two times"} +{"filepath": "data/multi_event_train/syn_4025.wav", "onoffCaption": "train horn at 3.629-6.096", "frequencyCaption": "train horn one times"} +{"filepath": "data/multi_event_train/syn_4070.wav", "onoffCaption": "door slamming at 0.029-1.32 and cow mooing at 2.727-6.025, 7.22-9.384 and sneeze at 4.803-7.279", "frequencyCaption": "door slamming one times and cow mooing two times and sneeze one times"} +{"filepath": "data/multi_event_train/syn_4095.wav", "onoffCaption": "thump thud at 3.766-6.537", "frequencyCaption": "thump thud one times"} +{"filepath": "data/multi_event_train/syn_4124.wav", "onoffCaption": "duck quacking at 0.025-2.025", "frequencyCaption": "duck quacking one times"} +{"filepath": "data/multi_event_train/syn_4203.wav", "onoffCaption": "spraying at 0.356-2.816, 4.552-5.202", "frequencyCaption": "spraying two times"} +{"filepath": "data/multi_event_train/syn_4251.wav", "onoffCaption": "woman laughing at 0.47-3.062, 5.365-7.586", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_4256.wav", "onoffCaption": "sheep goat bleating at 0.135-2.135, 3.296-5.296, 6.567-8.567", "frequencyCaption": "sheep goat bleating three times"} +{"filepath": "data/multi_event_train/syn_4318.wav", "onoffCaption": "gunshot at 3.162-5.162, 5.706-7.706 and duck quacking at 3.962-5.962, 7.3-9.3", "frequencyCaption": "gunshot two times and duck quacking two times"} +{"filepath": "data/multi_event_train/syn_4426.wav", "onoffCaption": "door knocking at 2.595-4.783, 5.891-8.728", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_4469.wav", "onoffCaption": "gunshot at 0.545-2.545 and woman laughing at 0.563-3.358, 4.196-6.991 and sheep goat bleating at 1.829-3.829", "frequencyCaption": "gunshot one times and woman laughing two times and sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_4473.wav", "onoffCaption": "sneeze at 3.586-8.086", "frequencyCaption": "sneeze one times"} +{"filepath": "data/multi_event_train/syn_4496.wav", "onoffCaption": "explosion at 2.265-5.265, 6.072-8.944", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_4568.wav", "onoffCaption": "cat meowing at 0.568-1.653, 2.506-3.591 and car horn honking at 7.41-10.0", "frequencyCaption": "cat meowing two times and car horn honking one times"} +{"filepath": "data/multi_event_train/syn_4572.wav", "onoffCaption": "duck quacking at 0.41-2.41, 4.509-6.509 and gunshot at 1.916-3.916", "frequencyCaption": "duck quacking two times and gunshot one times"} +{"filepath": "data/multi_event_train/syn_4597.wav", "onoffCaption": "cow mooing at 1.045-4.027, 4.606-7.588", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_4600.wav", "onoffCaption": "explosion at 0.464-4.464, 6.669-10.0", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_4607.wav", "onoffCaption": "cat meowing at 2.122-3.739, 4.759-6.069, 7.198-9.192", "frequencyCaption": "cat meowing three times"} +{"filepath": "data/multi_event_train/syn_4655.wav", "onoffCaption": "train horn at 0.649-3.889, 6.249-9.489", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_4839.wav", "onoffCaption": "door knocking at 3.565-6.94, 7.981-10.0", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_4876.wav", "onoffCaption": "sneeze at 0.389-3.499 and whistling at 0.703-5.878", "frequencyCaption": "sneeze one times and whistling one times"} +{"filepath": "data/multi_event_train/syn_4889.wav", "onoffCaption": "spraying at 2.837-3.412 and car horn honking at 6.908-10.0", "frequencyCaption": "spraying one times and car horn honking one times"} +{"filepath": "data/multi_event_train/syn_4893.wav", "onoffCaption": "door slamming at 3.845-4.864, 5.397-6.416 and duck quacking at 4.624-6.624", "frequencyCaption": "door slamming two times and duck quacking one times"} +{"filepath": "data/multi_event_train/syn_4922.wav", "onoffCaption": "door slamming at 0.069-0.75", "frequencyCaption": "door slamming one times"} +{"filepath": "data/multi_event_train/syn_4977.wav", "onoffCaption": "duck quacking at 1.623-3.623, 4.63-6.63, 7.915-9.915", "frequencyCaption": "duck quacking three times"} +{"filepath": "data/multi_event_train/syn_4988.wav", "onoffCaption": "spraying at 2.075-2.807, 4.719-5.57", "frequencyCaption": "spraying two times"} +{"filepath": "data/multi_event_train/syn_4992.wav", "onoffCaption": "door knocking at 0.681-5.056 and gunshot at 3.1-5.1, 7.191-9.191", "frequencyCaption": "door knocking one times and gunshot two times"} +{"filepath": "data/multi_event_train/syn_2034.wav", "onoffCaption": "duck quacking at 0.504-2.504, 3.307-5.307", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_2212.wav", "onoffCaption": "train horn at 2.927-6.697", "frequencyCaption": "train horn one times"} +{"filepath": "data/multi_event_train/syn_2247.wav", "onoffCaption": "dog barking at 0.081-2.081 and car horn honking at 0.117-4.366 and cat meowing at 6.32-7.904", "frequencyCaption": "dog barking one times and car horn honking one times and cat meowing one times"} +{"filepath": "data/multi_event_train/syn_2346.wav", "onoffCaption": "door knocking at 0.207-3.582", "frequencyCaption": "door knocking one times"} +{"filepath": "data/multi_event_train/syn_2462.wav", "onoffCaption": "tapping clicking clanking at 0.119-3.559, 4.558-6.763", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_2487.wav", "onoffCaption": "burping belching at 0.648-7.328 and door knocking at 2.358-6.126, 7.457-9.846", "frequencyCaption": "burping belching one times and door knocking two times"} +{"filepath": "data/multi_event_train/syn_2563.wav", "onoffCaption": "door slamming at 0.181-1.472 and explosion at 3.64-6.168 and dog barking at 4.954-6.954", "frequencyCaption": "door slamming one times and explosion one times and dog barking one times"} +{"filepath": "data/multi_event_train/syn_2579.wav", "onoffCaption": "cat meowing at 0.093-2.087", "frequencyCaption": "cat meowing one times"} +{"filepath": "data/multi_event_train/syn_2611.wav", "onoffCaption": "tapping clicking clanking at 0.362-3.802, 6.013-9.453", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_2710.wav", "onoffCaption": "duck quacking at 3.339-5.339, 7.228-9.228", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_2898.wav", "onoffCaption": "car horn honking at 2.151-6.473, 6.993-10.0", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_2966.wav", "onoffCaption": "train horn at 0.045-3.365, 4.328-7.648", "frequencyCaption": "train horn two times"} +{"filepath": "data/multi_event_train/syn_2983.wav", "onoffCaption": "gunshot at 0.554-2.554, 3.272-5.272 and train horn at 3.312-7.312", "frequencyCaption": "gunshot two times and train horn one times"} +{"filepath": "data/multi_event_train/syn_4022.wav", "onoffCaption": "car horn honking at 0.689-3.908", "frequencyCaption": "car horn honking one times"} +{"filepath": "data/multi_event_train/syn_4077.wav", "onoffCaption": "sheep goat bleating at 0.949-2.949, 3.937-5.937, 6.447-8.447", "frequencyCaption": "sheep goat bleating three times"} +{"filepath": "data/multi_event_train/syn_4088.wav", "onoffCaption": "spraying at 2.992-5.452", "frequencyCaption": "spraying one times"} +{"filepath": "data/multi_event_train/syn_4092.wav", "onoffCaption": "explosion at 2.473-5.073", "frequencyCaption": "explosion one times"} +{"filepath": "data/multi_event_train/syn_4139.wav", "onoffCaption": "whistling at 0.518-8.173", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_4176.wav", "onoffCaption": "car horn honking at 0.455-4.042", "frequencyCaption": "car horn honking one times"} +{"filepath": "data/multi_event_train/syn_4189.wav", "onoffCaption": "cat meowing at 0.138-1.498, 2.544-3.904, 5.026-6.386", "frequencyCaption": "cat meowing three times"} +{"filepath": "data/multi_event_train/syn_4193.wav", "onoffCaption": "woman laughing at 0.153-5.192 and cow mooing at 3.326-7.755", "frequencyCaption": "woman laughing one times and cow mooing one times"} +{"filepath": "data/multi_event_train/syn_4204.wav", "onoffCaption": "train horn at 3.537-7.977 and duck quacking at 6.259-8.259", "frequencyCaption": "train horn one times and duck quacking one times"} +{"filepath": "data/multi_event_train/syn_4421.wav", "onoffCaption": "cat meowing at 0.141-1.288 and sheep goat bleating at 0.715-4.035 and door knocking at 6.083-8.583", "frequencyCaption": "cat meowing one times and sheep goat bleating one times and door knocking one times"} +{"filepath": "data/multi_event_train/syn_4474.wav", "onoffCaption": "burping belching at 2.963-5.378, 6.525-8.94", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_4491.wav", "onoffCaption": "tapping clicking clanking at 2.783-6.223, 7.925-10.0", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_4520.wav", "onoffCaption": "dog barking at 1.345-4.665, 6.247-9.567", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_train/syn_4652.wav", "onoffCaption": "thump thud at 0.178-2.678, 4.247-6.747 and gunshot at 0.859-2.859, 3.774-5.774, 6.922-8.922", "frequencyCaption": "thump thud two times and gunshot three times"} +{"filepath": "data/multi_event_train/syn_4749.wav", "onoffCaption": "duck quacking at 1.65-3.65, 5.955-7.955", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_4824.wav", "onoffCaption": "woman laughing at 0.292-2.408 and tapping clicking clanking at 4.885-8.325", "frequencyCaption": "woman laughing one times and tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_4925.wav", "onoffCaption": "spraying at 0.314-1.314 and cow mooing at 5.093-8.075", "frequencyCaption": "spraying one times and cow mooing one times"} +{"filepath": "data/multi_event_train/syn_4970.wav", "onoffCaption": "gunshot at 1.78-3.78, 5.766-7.766", "frequencyCaption": "gunshot two times"} +{"filepath": "data/multi_event_train/syn_4995.wav", "onoffCaption": "duck quacking at 1.467-3.467, 5.668-7.668", "frequencyCaption": "duck quacking two times"} +{"filepath": "data/multi_event_train/syn_9.wav", "onoffCaption": "gunshot at 0.635-2.635", "frequencyCaption": "gunshot one times"} +{"filepath": "data/multi_event_train/syn_2135.wav", "onoffCaption": "cat meowing at 4.338-5.423, 7.709-8.794", "frequencyCaption": "cat meowing two times"} +{"filepath": "data/multi_event_train/syn_2160.wav", "onoffCaption": "car horn honking at 0.225-5.132, 6.96-9.007 and burping belching at 2.253-4.776", "frequencyCaption": "car horn honking two times and burping belching one times"} +{"filepath": "data/multi_event_train/syn_2185.wav", "onoffCaption": "thump thud at 2.942-7.392", "frequencyCaption": "thump thud one times"} +{"filepath": "data/multi_event_train/syn_2208.wav", "onoffCaption": "woman laughing at 1.114-8.559", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/multi_event_train/syn_2279.wav", "onoffCaption": "thump thud at 1.323-5.773, 7.153-10.0", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_2313.wav", "onoffCaption": "duck quacking at 1.898-3.898, 4.892-6.892 and sheep goat bleating at 4.573-6.573", "frequencyCaption": "duck quacking two times and sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_2478.wav", "onoffCaption": "dog barking at 0.087-2.087, 3.299-5.299, 6.614-8.614", "frequencyCaption": "dog barking three times"} +{"filepath": "data/multi_event_train/syn_2536.wav", "onoffCaption": "cat meowing at 0.427-3.668, 5.917-9.158 and woman laughing at 0.858-3.627", "frequencyCaption": "cat meowing two times and woman laughing one times"} +{"filepath": "data/multi_event_train/syn_2586.wav", "onoffCaption": "dog barking at 1.358-3.358, 5.489-7.489", "frequencyCaption": "dog barking two times"} +{"filepath": "data/multi_event_train/syn_2734.wav", "onoffCaption": "burping belching at 1.247-3.354 and spraying at 2.437-3.064, 4.131-5.215", "frequencyCaption": "burping belching one times and spraying two times"} +{"filepath": "data/multi_event_train/syn_2745.wav", "onoffCaption": "cow mooing at 0.875-3.857, 5.932-8.914", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_2832.wav", "onoffCaption": "explosion at 0.339-2.341, 4.836-7.714 and car horn honking at 1.971-4.757", "frequencyCaption": "explosion two times and car horn honking one times"} +{"filepath": "data/multi_event_train/syn_2867.wav", "onoffCaption": "tapping clicking clanking at 0.598-4.038, 5.634-9.074", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_2882.wav", "onoffCaption": "cow mooing at 2.804-7.784", "frequencyCaption": "cow mooing one times"} +{"filepath": "data/multi_event_train/syn_2929.wav", "onoffCaption": "tapping clicking clanking at 1.97-5.41 and sheep goat bleating at 2.461-4.461, 4.975-6.975", "frequencyCaption": "tapping clicking clanking one times and sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_2999.wav", "onoffCaption": "thump thud at 0.135-2.597, 3.618-6.08", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_4038.wav", "onoffCaption": "cow mooing at 2.757-7.186", "frequencyCaption": "cow mooing one times"} +{"filepath": "data/multi_event_train/syn_4123.wav", "onoffCaption": "thump thud at 0.25-4.7, 5.564-8.064", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_4305.wav", "onoffCaption": "whistling at 2.362-7.862", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_4350.wav", "onoffCaption": "burping belching at 3.881-6.207, 6.742-9.742", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_4575.wav", "onoffCaption": "explosion at 2.14-7.14, 7.84-10.0", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_4590.wav", "onoffCaption": "thump thud at 1.677-4.724", "frequencyCaption": "thump thud one times"} +{"filepath": "data/multi_event_train/syn_4648.wav", "onoffCaption": "gunshot at 3.838-6.078", "frequencyCaption": "gunshot one times"} +{"filepath": "data/multi_event_train/syn_4706.wav", "onoffCaption": "explosion at 0.24-2.993, 4.501-7.501 and duck quacking at 2.746-4.746, 6.768-8.768", "frequencyCaption": "explosion two times and duck quacking two times"} +{"filepath": "data/multi_event_train/syn_4753.wav", "onoffCaption": "gunshot at 2.893-4.893", "frequencyCaption": "gunshot one times"} +{"filepath": "data/multi_event_train/syn_4871.wav", "onoffCaption": "car horn honking at 1.632-3.979, 5.537-8.355", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_4894.wav", "onoffCaption": "gunshot at 1.001-3.001", "frequencyCaption": "gunshot one times"} +{"filepath": "data/multi_event_train/syn_2111.wav", "onoffCaption": "duck quacking at 0.295-2.295, 2.851-4.851 and woman laughing at 1.563-5.615, 7.148-10.0", "frequencyCaption": "duck quacking two times and woman laughing two times"} +{"filepath": "data/multi_event_train/syn_2144.wav", "onoffCaption": "dog barking at 0.587-2.587, 3.375-5.375, 6.652-8.652", "frequencyCaption": "dog barking three times"} +{"filepath": "data/multi_event_train/syn_2337.wav", "onoffCaption": "cow mooing at 0.134-5.114, 6.062-8.242", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_2362.wav", "onoffCaption": "sheep goat bleating at 3.271-5.271, 7.111-9.111", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_2387.wav", "onoffCaption": "sheep goat bleating at 2.984-4.984", "frequencyCaption": "sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_2409.wav", "onoffCaption": "cow mooing at 1.935-5.233, 6.077-9.059", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_2413.wav", "onoffCaption": "gunshot at 0.733-2.733", "frequencyCaption": "gunshot one times"} +{"filepath": "data/multi_event_train/syn_2512.wav", "onoffCaption": "whistling at 3.548-10.0", "frequencyCaption": "whistling one times"} +{"filepath": "data/multi_event_train/syn_2547.wav", "onoffCaption": "explosion at 0.219-2.221, 3.47-5.534 and sheep goat bleating at 6.765-8.765", "frequencyCaption": "explosion two times and sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_2761.wav", "onoffCaption": "duck quacking at 2.498-4.498, 5.686-7.686 and explosion at 5.745-7.809", "frequencyCaption": "duck quacking two times and explosion one times"} +{"filepath": "data/multi_event_train/syn_2784.wav", "onoffCaption": "thump thud at 1.867-4.206, 5.36-7.699", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_2816.wav", "onoffCaption": "door slamming at 2.818-5.599 and cow mooing at 2.905-7.885", "frequencyCaption": "door slamming one times and cow mooing one times"} +{"filepath": "data/multi_event_train/syn_2843.wav", "onoffCaption": "burping belching at 2.029-7.973", "frequencyCaption": "burping belching one times"} +{"filepath": "data/multi_event_train/syn_2958.wav", "onoffCaption": "spraying at 0.929-3.321", "frequencyCaption": "spraying one times"} +{"filepath": "data/multi_event_train/syn_4049.wav", "onoffCaption": "woman laughing at 0.476-7.921 and car horn honking at 3.279-5.779", "frequencyCaption": "woman laughing one times and car horn honking one times"} +{"filepath": "data/multi_event_train/syn_4107.wav", "onoffCaption": "cow mooing at 0.839-5.268, 6.236-10.0", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_4152.wav", "onoffCaption": "burping belching at 1.002-3.625, 6.031-8.654", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_4321.wav", "onoffCaption": "sneeze at 0.133-1.379, 2.353-3.965 and whistling at 1.027-7.317", "frequencyCaption": "sneeze two times and whistling one times"} +{"filepath": "data/multi_event_train/syn_4374.wav", "onoffCaption": "explosion at 0.919-3.672, 5.793-8.522 and burping belching at 5.774-8.976", "frequencyCaption": "explosion two times and burping belching one times"} +{"filepath": "data/multi_event_train/syn_4391.wav", "onoffCaption": "sheep goat bleating at 2.629-4.629", "frequencyCaption": "sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_4504.wav", "onoffCaption": "cat meowing at 0.188-1.374 and dog barking at 0.587-2.587, 3.86-5.86, 6.47-8.47", "frequencyCaption": "cat meowing one times and dog barking three times"} +{"filepath": "data/multi_event_train/syn_4551.wav", "onoffCaption": "woman laughing at 3.557-6.642", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/multi_event_train/syn_4639.wav", "onoffCaption": "car horn honking at 1.638-4.813 and burping belching at 7.602-9.633", "frequencyCaption": "car horn honking one times and burping belching one times"} +{"filepath": "data/multi_event_train/syn_4689.wav", "onoffCaption": "sheep goat bleating at 1.268-3.268, 5.556-7.556", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_4722.wav", "onoffCaption": "whistling at 1.885-7.385, 7.979-10.0", "frequencyCaption": "whistling two times"} +{"filepath": "data/multi_event_train/syn_4777.wav", "onoffCaption": "train horn at 1.37-5.81, 7.867-10.0 and gunshot at 2.589-4.589", "frequencyCaption": "train horn two times and gunshot one times"} +{"filepath": "data/multi_event_train/syn_4792.wav", "onoffCaption": "woman laughing at 1.201-3.395, 5.62-7.814", "frequencyCaption": "woman laughing two times"} +{"filepath": "data/multi_event_train/syn_4800.wav", "onoffCaption": "train horn at 0.444-3.924 and sneeze at 2.515-4.198 and duck quacking at 6.469-8.469", "frequencyCaption": "train horn one times and sneeze one times and duck quacking one times"} +{"filepath": "data/multi_event_train/syn_4855.wav", "onoffCaption": "door slamming at 0.924-3.353 and tapping clicking clanking at 6.856-10.0", "frequencyCaption": "door slamming one times and tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_2010.wav", "onoffCaption": "thump thud at 0.321-2.783 and cat meowing at 4.756-9.756", "frequencyCaption": "thump thud one times and cat meowing one times"} +{"filepath": "data/multi_event_train/syn_2045.wav", "onoffCaption": "door slamming at 0.243-2.469 and cow mooing at 4.409-7.707", "frequencyCaption": "door slamming one times and cow mooing one times"} +{"filepath": "data/multi_event_train/syn_2236.wav", "onoffCaption": "thump thud at 1.714-3.942, 6.215-8.443", "frequencyCaption": "thump thud two times"} +{"filepath": "data/multi_event_train/syn_2263.wav", "onoffCaption": "car horn honking at 2.479-6.801 and door slamming at 2.505-3.524, 5.637-6.656 and door knocking at 3.955-6.455", "frequencyCaption": "car horn honking one times and door slamming two times and door knocking one times"} +{"filepath": "data/multi_event_train/syn_2286.wav", "onoffCaption": "sneeze at 0.115-1.349, 1.861-3.155, 3.884-6.727", "frequencyCaption": "sneeze three times"} +{"filepath": "data/multi_event_train/syn_2378.wav", "onoffCaption": "duck quacking at 1.898-3.898, 4.706-6.706, 7.974-9.974 and car horn honking at 3.594-6.38", "frequencyCaption": "duck quacking three times and car horn honking one times"} +{"filepath": "data/multi_event_train/syn_2446.wav", "onoffCaption": "door knocking at 3.375-5.999, 7.052-9.129", "frequencyCaption": "door knocking two times"} +{"filepath": "data/multi_event_train/syn_2508.wav", "onoffCaption": "woman laughing at 1.307-3.532, 4.108-6.316 and explosion at 3.427-6.427", "frequencyCaption": "woman laughing two times and explosion one times"} +{"filepath": "data/multi_event_train/syn_2635.wav", "onoffCaption": "sheep goat bleating at 0.781-2.781", "frequencyCaption": "sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_2660.wav", "onoffCaption": "woman laughing at 3.713-7.765", "frequencyCaption": "woman laughing one times"} +{"filepath": "data/multi_event_train/syn_2685.wav", "onoffCaption": "sheep goat bleating at 0.99-2.99, 5.032-7.088", "frequencyCaption": "sheep goat bleating two times"} +{"filepath": "data/multi_event_train/syn_2859.wav", "onoffCaption": "burping belching at 2.044-5.55, 7.897-10.0", "frequencyCaption": "burping belching two times"} +{"filepath": "data/multi_event_train/syn_2917.wav", "onoffCaption": "tapping clicking clanking at 2.146-5.586, 6.162-9.602", "frequencyCaption": "tapping clicking clanking two times"} +{"filepath": "data/multi_event_train/syn_2942.wav", "onoffCaption": "duck quacking at 0.672-2.672, 5.106-7.106, 7.837-9.837", "frequencyCaption": "duck quacking three times"} +{"filepath": "data/multi_event_train/syn_4006.wav", "onoffCaption": "tapping clicking clanking at 2.901-6.341", "frequencyCaption": "tapping clicking clanking one times"} +{"filepath": "data/multi_event_train/syn_4053.wav", "onoffCaption": "cat meowing at 0.589-1.599, 2.313-3.34, 4.373-5.52", "frequencyCaption": "cat meowing three times"} +{"filepath": "data/multi_event_train/syn_4148.wav", "onoffCaption": "spraying at 1.484-3.217, 3.795-5.052, 5.923-7.007", "frequencyCaption": "spraying three times"} +{"filepath": "data/multi_event_train/syn_4220.wav", "onoffCaption": "cow mooing at 0.893-4.191", "frequencyCaption": "cow mooing one times"} +{"filepath": "data/multi_event_train/syn_4275.wav", "onoffCaption": "train horn at 0.043-3.443 and sheep goat bleating at 5.656-7.656", "frequencyCaption": "train horn one times and sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_4290.wav", "onoffCaption": "dog barking at 0.053-2.053 and sneeze at 1.421-3.008 and cat meowing at 1.901-3.087", "frequencyCaption": "dog barking one times and sneeze one times and cat meowing one times"} +{"filepath": "data/multi_event_train/syn_4405.wav", "onoffCaption": "explosion at 0.051-3.051, 3.851-6.574", "frequencyCaption": "explosion two times"} +{"filepath": "data/multi_event_train/syn_4450.wav", "onoffCaption": "sneeze at 2.386-3.62, 4.447-6.154, 7.403-10.0", "frequencyCaption": "sneeze three times"} +{"filepath": "data/multi_event_train/syn_4623.wav", "onoffCaption": "cat meowing at 2.217-7.217", "frequencyCaption": "cat meowing one times"} +{"filepath": "data/multi_event_train/syn_4676.wav", "onoffCaption": "sheep goat bleating at 0.572-2.572", "frequencyCaption": "sheep goat bleating one times"} +{"filepath": "data/multi_event_train/syn_4693.wav", "onoffCaption": "cow mooing at 2.486-5.455, 7.47-9.667", "frequencyCaption": "cow mooing two times"} +{"filepath": "data/multi_event_train/syn_4738.wav", "onoffCaption": "door slamming at 3.65-4.628, 6.613-8.613", "frequencyCaption": "door slamming two times"} +{"filepath": "data/multi_event_train/syn_4788.wav", "onoffCaption": "explosion at 0.578-4.447, 5.213-9.082 and car horn honking at 0.746-3.564", "frequencyCaption": "explosion two times and car horn honking one times"} +{"filepath": "data/multi_event_train/syn_4901.wav", "onoffCaption": "car horn honking at 2.729-5.642, 6.454-8.801", "frequencyCaption": "car horn honking two times"} +{"filepath": "data/multi_event_train/syn_4954.wav", "onoffCaption": "cat meowing at 0.74-3.77", "frequencyCaption": "cat meowing one times"} diff --git a/picoaudio/models/controllable_dataset.py b/picoaudio/models/controllable_dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..cb9468bcc3fb9f5e6c667c0dbf83ed010ddbe87e --- /dev/null +++ b/picoaudio/models/controllable_dataset.py @@ -0,0 +1,126 @@ +import numpy as np +import torch +import pandas as pd +from data.filter_data import get_event_list + + + +class Text_Onset_2_Audio_Dataset(torch.utils.data.Dataset): + def __init__(self, dataset, args): + + self.captions = list(dataset[args.text_column]) + self.audios = list(dataset[args.audio_column]) + self.onsets = list(dataset[args.onset_column]) + self.indices = list(range(len(self.captions))) + + self.mapper = {} + for index, audio, caption, onset in zip(self.indices, self.audios, self.captions, self.onsets): + self.mapper[index] = [audio, caption, onset] + + num_examples = args.num_examples + if num_examples != -1: + self.captions, self.audios, self.onsets = self.captions[:num_examples], self.audios[:num_examples], self.onsets[:num_examples] + self.indices = self.indices[:num_examples] + self.class2id = {event: idx for idx, event in enumerate(args.event_list)} + + def decode_data(self, line_onset_str): + # data { "location": audio_path, + # "captions" : "event1 n times and event2 n times", + # "onset_str": "event1__onset1-offset1_onset2-offset2--event2__onset1-offset1"} + + line_onset_index = np.zeros((32, 256)) + line_event = [] + for event_onset in line_onset_str.split('--'): + # event_onset : event1__onset1-offset1_onset2-offset2 + (event, instance) = event_onset.split('__') + line_event.append(event) + # instance : onset1-offset1_onset2-offset2 + for start_end in instance.split('_'): + (start, end) = start_end.split('-') + start, end = int(float(start)*250/10), int(float(end)*250/10) + if end > 255: break + line_onset_index[self.class2id[event], start: end] = 1 + line_event_str = " and ".join(line_event) + return line_onset_index, line_event_str + + def __len__(self): + return len(self.captions) + + def get_num_instances(self): + return len(self.captions) + + def __getitem__(self, index): + onset_str, filename, idx, caption = self.onsets[index], self.audios[index], self.indices[index], self.captions[index] + onset, _ = self.decode_data(onset_str) + #"onset_str": "event1__onset1-offset1_onset2-offset2--event2__onset1-offset1" + #assert len(onset_str.split("--")) == 1 + first_class_id = self.class2id[onset_str.split("__")[0]] + return idx, onset, first_class_id, filename, caption, onset_str + + def collate_fn(self, data): + dat = pd.DataFrame(data) + batch = [] + for i in dat: + if i==1: + batch.append(torch.tensor(np.array(dat[i].tolist()), dtype=torch.float32)) + elif i==2: + batch.append(torch.tensor(dat[i])) + else: + batch.append(dat[i].tolist()) + return batch + +class Clap_Onset_2_Audio_Dataset(Text_Onset_2_Audio_Dataset): + def __init__(self, dataset, args): + super().__init__(dataset, args) + import laion_clap + from laion_clap.clap_module.factory import load_state_dict as clap_load_state_dict + + self.clap_scorer = laion_clap.CLAP_Module(enable_fusion=False) + ckpt_path = 'miniconda3/envs/py3.10.11/lib/python3.10/site-packages/laion_clap/630k-audioset-best.pt' + ckpt = clap_load_state_dict(ckpt_path, skip_params=True) + del_parameter_key = ["text_branch.embeddings.position_ids"] + ckpt = {"model."+k:v for k, v in ckpt.items() if k not in del_parameter_key} + self.clap_scorer.load_state_dict(ckpt) + + def __getitem__(self, index): + onset_str, filename, idx, caption = self.onsets[index], self.audios[index], self.indices[index], self.captions[index] + onset, event = self.decode_data(onset_str) + with torch.no_grad(): + clap_embed = self.clap_scorer.get_text_embedding([event, ""], use_tensor=False)[0] + return idx, onset, clap_embed, filename, caption, onset_str + + def collate_fn(self, data): + dat = pd.DataFrame(data) + batch = [] + for i in dat: + if i==1 or i==2: + batch.append(torch.tensor(np.array(dat[i].tolist()), dtype=torch.float32)) + else: + batch.append(dat[i].tolist()) + return batch + + +if __name__ == "__main__": + import torch + from torch.utils.data import Dataset, DataLoader + import datasets + import argparse + import sys + + import models.controllable_dataset as ConDataset + from data_utils.filter_data import get_event_list + + + parser = argparse.ArgumentParser(description=".") + args = parser.parse_args() + args.event_list = get_event_list() + args.train_file = "" + + + extension = args.train_file.split(".")[-1] + raw_datasets = load_dataset(extension, data_files={"train": args.train_file}) + train_dataset = Clap_Onset_2_Audio_Dataset(raw_datasets["train"], args) + train_dataloader = DataLoader(train_dataset, shuffle=True, batch_size=args.batch_size, collate_fn=train_dataset.collate_fn) + for batch in train_dataloader: + import pdb; pdb.set_trace() + idx, onset, event_info, audios, caption, onset_str = batch diff --git a/picoaudio/models/controllable_diffusion.py b/picoaudio/models/controllable_diffusion.py new file mode 100644 index 0000000000000000000000000000000000000000..fd12a014a013eaed7657ded3be07e7c198b74dec --- /dev/null +++ b/picoaudio/models/controllable_diffusion.py @@ -0,0 +1,291 @@ +import random +import numpy as np +from tqdm import tqdm +from einops import repeat + +import torch +import torch.nn as nn +import torch.nn.functional as F + +from transformers import CLIPTokenizer, AutoTokenizer +from transformers import CLIPTextModel, T5EncoderModel, AutoModel +import diffusers +from diffusers.utils.torch_utils import randn_tensor +from diffusers import DDPMScheduler, UNet2DConditionModel +from diffusers import AutoencoderKL as DiffuserAutoencoderKL + +from utils.torch_tools import wav_to_fbank +from audioldm.audio.stft import TacotronSTFT +from audioldm.variational_autoencoder.autoencoder import AutoencoderKL +from audioldm.utils import default_audioldm_config, get_metadata + +def build_pretrained_models(name): + checkpoint = torch.load(get_metadata()[name]["path"], map_location="cpu") + scale_factor = checkpoint["state_dict"]["scale_factor"].item() + + vae_state_dict = {k[18:]: v for k, v in checkpoint["state_dict"].items() if "first_stage_model." in k} + + config = default_audioldm_config(name) + vae_config = config["model"]["params"]["first_stage_config"]["params"] + vae_config["scale_factor"] = scale_factor + + vae = AutoencoderKL(**vae_config) + vae.load_state_dict(vae_state_dict) + + fn_STFT = TacotronSTFT( + config["preprocessing"]["stft"]["filter_length"], + config["preprocessing"]["stft"]["hop_length"], + config["preprocessing"]["stft"]["win_length"], + config["preprocessing"]["mel"]["n_mel_channels"], + config["preprocessing"]["audio"]["sampling_rate"], + config["preprocessing"]["mel"]["mel_fmin"], + config["preprocessing"]["mel"]["mel_fmax"], + ) + + vae.eval() + fn_STFT.eval() + + return vae, fn_STFT + +def _init_layer(layer): + """Initialize a Linear or Convolutional layer. """ + nn.init.xavier_uniform_(layer.weight) + + if hasattr(layer, 'bias'): + if layer.bias is not None: + layer.bias.data.fill_(0.) + +class BaseDiffusion(nn.Module): + def __init__( + self, + scheduler_name, + unet_model_config_path=None, + snr_gamma=None, + uncondition=False, + ): + super().__init__() + + assert unet_model_config_path is not None, "Either UNet pretrain model name or a config file path is required" + self.scheduler_name = scheduler_name + self.unet_model_config_path = unet_model_config_path + self.snr_gamma = snr_gamma + self.uncondition = uncondition + self.device = "cuda" if torch.cuda.is_available() else "cpu" + # https://huggingface.co/docs/diffusers/v0.14.0/en/api/schedulers/overview + self.noise_scheduler = DDPMScheduler.from_pretrained(self.scheduler_name, subfolder="scheduler") + self.inference_scheduler = DDPMScheduler.from_pretrained(self.scheduler_name, subfolder="scheduler") + unet_config = UNet2DConditionModel.load_config(unet_model_config_path) + self.unet = UNet2DConditionModel.from_config(unet_config, subfolder="unet") + print("UNet initialized randomly.") + """ + self.text_encoder_name = "./checkpoint/models--google--flan-t5-large/" + \ + "snapshots/0613663d0d48ea86ba8cb3d7a44f0f65dc596a2a/" + self.tokenizer = AutoTokenizer.from_pretrained(self.text_encoder_name) + self.text_encoder = T5EncoderModel.from_pretrained(self.text_encoder_name) + """ + + def compute_snr(self, timesteps): + """ + Computes SNR as per https://github.com/TiankaiHang/Min-SNR-Diffusion-Training/blob/521b624bd70c67cee4bdf49225915f5945a872e3/guided_diffusion/gaussian_diffusion.py#L847-L849 + """ + alphas_cumprod = self.noise_scheduler.alphas_cumprod + sqrt_alphas_cumprod = alphas_cumprod**0.5 + sqrt_one_minus_alphas_cumprod = (1.0 - alphas_cumprod) ** 0.5 + + # Expand the tensors. + # Adapted from https://github.com/TiankaiHang/Min-SNR-Diffusion-Training/blob/521b624bd70c67cee4bdf49225915f5945a872e3/guided_diffusion/gaussian_diffusion.py#L1026 + sqrt_alphas_cumprod = sqrt_alphas_cumprod.to(device=timesteps.device)[timesteps].float() + while len(sqrt_alphas_cumprod.shape) < len(timesteps.shape): + sqrt_alphas_cumprod = sqrt_alphas_cumprod[..., None] + alpha = sqrt_alphas_cumprod.expand(timesteps.shape) + + sqrt_one_minus_alphas_cumprod = sqrt_one_minus_alphas_cumprod.to(device=timesteps.device)[timesteps].float() + while len(sqrt_one_minus_alphas_cumprod.shape) < len(timesteps.shape): + sqrt_one_minus_alphas_cumprod = sqrt_one_minus_alphas_cumprod[..., None] + sigma = sqrt_one_minus_alphas_cumprod.expand(timesteps.shape) + + # Compute SNR. + snr = (alpha / sigma) ** 2 + return snr + + def encode_text(self, input_dict): + raise NotImplementedError + + def forward(self, input_dict): + raise NotImplementedError + + @torch.no_grad() + def inference(self, input_dict): + raise NotImplementedError + +class Text_Onset_2_Audio_Diffusion(BaseDiffusion): + def __init__(self, + scheduler_name, + unet_model_config_path=None, + snr_gamma=None, + freeze_text_encoder=True, + uncondition=False, + ): + super().__init__(scheduler_name, unet_model_config_path, snr_gamma, uncondition) + self.freeze_text_encoder = freeze_text_encoder + self.class_emb = nn.Embedding(24, 1024) + # self.channel_emb = nn.Linear(24, 16) + # _init_layer(self.channel_emb) + + def encode_channel(self, input): + # input [batch, 32, 256] -> [batch, 2, 256, 16] + return input.reshape(input.shape[0], 2, 16, 256).transpose(2, 3) + # return self.channel_emb(input).unsqueeze(1) + + def encode_text(self, input_dict): + device = self.device + + encoder_hidden_states = self.class_emb(input_dict["event_info"].unsqueeze(-1)) + boolean_encoder_mask = (torch.ones(len(encoder_hidden_states), 1) == 1).to(device) + + return encoder_hidden_states, boolean_encoder_mask + + def forward(self, input_dict, validation_mode=False): + device = self.device + latents = input_dict["latent"] + num_train_timesteps = self.noise_scheduler.num_train_timesteps + self.noise_scheduler.set_timesteps(num_train_timesteps, device=device) + + + # [batch, 1, 1024], [batch, 1] + + encoder_hidden_states, boolean_encoder_mask = self.encode_text(input_dict) + if self.uncondition: + mask_indices = [k for k in range(len(latents)) if random.random() < 0.1] + if len(mask_indices) > 0: + encoder_hidden_states[mask_indices] = 0 + + bsz = latents.shape[0] + if validation_mode: + timesteps = (self.noise_scheduler.num_train_timesteps//2) * torch.ones((bsz,), dtype=torch.int64, device=device) + else: + # Sample a random timestep for each instance + timesteps = torch.randint(0, self.noise_scheduler.num_train_timesteps, (bsz,), device=device) + timesteps = timesteps.long() + + noise = torch.randn_like(latents) + noisy_latents = self.noise_scheduler.add_noise(latents, noise, timesteps) + + onset_emb = self.encode_channel(input_dict["onset"]) + # [batch, channel:8, 256, 16] + [batch, onset:2, 256, 16] + onset_noisy_latents = torch.cat((onset_emb, noisy_latents), dim=1) + + # Get the target for loss depending on the prediction type + if self.noise_scheduler.config.prediction_type == "epsilon": + target = noise + elif self.noise_scheduler.config.prediction_type == "v_prediction": + target = self.noise_scheduler.get_velocity(latents, noise, timesteps) + else: + raise ValueError(f"Unknown prediction type {self.noise_scheduler.config.prediction_type}") + + model_pred = self.unet( + onset_noisy_latents, timesteps, encoder_hidden_states, + #encoder_attention_mask=boolean_encoder_mask + ).sample + + if self.snr_gamma is None: + loss = F.mse_loss(model_pred.float(), target.float(), reduction="mean") + else: + # Compute loss-weights as per Section 3.4 of https://arxiv.org/abs/2303.09556. + # Adaptef from huggingface/diffusers/blob/main/examples/text_to_image/train_text_to_image.py + snr = self.compute_snr(timesteps) + mse_loss_weights = ( + torch.stack([snr, self.snr_gamma * torch.ones_like(timesteps)], dim=1).min(dim=1)[0] / snr + ) + loss = F.mse_loss(model_pred.float(), target.float(), reduction="none") + loss = loss.mean(dim=list(range(1, len(loss.shape)))) * mse_loss_weights + loss = loss.mean() + + return loss + + def prepare_latents(self, batch_size, inference_scheduler, num_channels_latents, dtype, device): + shape = (batch_size, num_channels_latents, 256, 16) + latents = randn_tensor(shape, generator=None, device=device, dtype=dtype) + # scale the initial noise by the standard deviation required by the scheduler + latents = latents * inference_scheduler.init_noise_sigma + return latents + + def encode_text_classifier_free(self, input_dict, num_samples_per_prompt): + device = self.device + prompt_embeds, boolean_prompt_mask = self.encode_text(input_dict) + prompt_embeds = prompt_embeds.repeat_interleave(num_samples_per_prompt, 0) + attention_mask = boolean_prompt_mask.repeat_interleave(num_samples_per_prompt, 0) + # get unconditional embeddings for classifier free guidance + negative_prompt_embeds = torch.zeros(prompt_embeds.shape).to(device) + uncond_attention_mask = (torch.ones(attention_mask.shape) == 1).to(device) + # negative_prompt_embeds = negative_prompt_embeds.repeat_interleave(num_samples_per_prompt, 0) + # uncond_attention_mask = uncond_attention_mask.repeat_interleave(num_samples_per_prompt, 0) + + # For classifier free guidance, we need to do two forward passes. + # We concatenate the unconditional and text embeddings into a single batch to avoid doing two forward passes + prompt_embeds = torch.cat([negative_prompt_embeds, prompt_embeds]) + prompt_mask = torch.cat([uncond_attention_mask, attention_mask]) + boolean_prompt_mask = (prompt_mask == 1).to(device) + + return prompt_embeds, boolean_prompt_mask + + @torch.no_grad() + def inference(self, input_dict, inference_scheduler, num_steps=20, guidance_scale=3, num_samples_per_prompt=1, + disable_progress=True): + prompt = input_dict["onset"] + device = self.device + classifier_free_guidance = guidance_scale > 1.0 + batch_size = len(prompt) * num_samples_per_prompt + + if classifier_free_guidance: + prompt_embeds, boolean_prompt_mask = self.encode_text_classifier_free(input_dict, num_samples_per_prompt) + else: + prompt_embeds, boolean_prompt_mask = self.encode_text(input_dict) + prompt_embeds = prompt_embeds.repeat_interleave(num_samples_per_prompt, 0) + boolean_prompt_mask = boolean_prompt_mask.repeat_interleave(num_samples_per_prompt, 0) + + inference_scheduler.set_timesteps(num_steps, device=device) + timesteps = inference_scheduler.timesteps + + num_channels_latents = self.unet.config.in_channels - 2 + latents = self.prepare_latents(batch_size, inference_scheduler, num_channels_latents, prompt_embeds.dtype, device) + onset_emb = self.encode_channel(input_dict["onset"]).repeat_interleave(num_samples_per_prompt, 0) + onset_latents = torch.cat((onset_emb, latents), dim=1) + + num_warmup_steps = len(timesteps) - num_steps * inference_scheduler.order + progress_bar = tqdm(range(num_steps), disable=disable_progress) + + for i, t in tqdm(enumerate(timesteps)): + # expand the latents if we are doing classifier free guidance + latent_model_input = torch.cat([onset_latents] * 2) if classifier_free_guidance else onset_latents + latent_model_input = inference_scheduler.scale_model_input(latent_model_input, t) + noise_pred = self.unet( + latent_model_input, t, encoder_hidden_states=prompt_embeds, + encoder_attention_mask=boolean_prompt_mask + ).sample + + # perform guidance + if classifier_free_guidance: + noise_pred_uncond, noise_pred_text = noise_pred.chunk(2) + noise_pred = noise_pred_uncond + guidance_scale * (noise_pred_text - noise_pred_uncond) + + # compute the previous noisy sample x_t -> x_t-1 + latents = inference_scheduler.step(noise_pred, t, latents).prev_sample + onset_latents = torch.cat((onset_emb, latents), dim=1) + + # call the callback, if provided + if i == len(timesteps) - 1 or ((i + 1) > num_warmup_steps and (i + 1) % inference_scheduler.order == 0): + progress_bar.update(1) + + + return latents + +class ClapText_Onset_2_Audio_Diffusion(Text_Onset_2_Audio_Diffusion): + def encode_text(self, input_dict): + device = self.device + + encoder_hidden_states = input_dict["event_info"].repeat_interleave(2, -1).unsqueeze(1) + boolean_encoder_mask = (torch.ones(len(encoder_hidden_states), 1) == 1).to(device) + + return encoder_hidden_states, boolean_encoder_mask + diff --git a/picoaudio/requirements.txt b/picoaudio/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..9eaf4207459bfa08ecd90d0350d954ce1a089f20 --- /dev/null +++ b/picoaudio/requirements.txt @@ -0,0 +1,30 @@ +torch==2.0.1 +torchaudio==2.0.2 +torchvision==0.15.2 +transformers==4.37.2 +accelerate==0.26.1 +datasets==2.16.1 +diffusers==0.18.2 +einops==0.7.0 +h5py==3.10.0 +huggingface_hub==0.20.3 +importlib_metadata==7.0.1 +librosa==0.10.1 +matplotlib==3.8.2 +numpy==1.23.5 +omegaconf==2.0.6 +packaging==23.2 +pandas==2.2.0 +progressbar33==2.4 +protobuf==3.20.* +resampy==0.4.2 +scikit_image==0.22.0 +scikit_learn==1.4.0 +scipy==1.12.0 +soundfile==0.12.1 +ssr_eval==0.0.7 +torchlibrosa==0.1.0 +tqdm==4.63.1 +laion-clap==1.1.4 +gradio +google-generativeai \ No newline at end of file diff --git a/picoaudio/runner/controllable_inference.py b/picoaudio/runner/controllable_inference.py new file mode 100644 index 0000000000000000000000000000000000000000..9270119ee2668521972e113ff9a1eb219b9bef7e --- /dev/null +++ b/picoaudio/runner/controllable_inference.py @@ -0,0 +1,165 @@ + +import os +import sys +import copy +import json +import time +import random +import argparse +import soundfile as sf +import numpy as np +import librosa +import torchaudio +from tqdm import tqdm +import laion_clap +from laion_clap.clap_module.factory import load_state_dict as clap_load_state_dict +from sklearn.metrics.pairwise import cosine_similarity + +import torch +from datetime import datetime +from diffusers import DDPMScheduler +from datasets import load_dataset +from torch.utils.data import Dataset, DataLoader + +import models.controllable_diffusion as ConDiffusion +import models.controllable_dataset as ConDataset +from data.filter_data import get_event_list + +class dotdict(dict): + """dot.notation access to dictionary attributes""" + __getattr__ = dict.get + __setattr__ = dict.__setitem__ + __delattr__ = dict.__delitem__ + +def parse_args(): + parser = argparse.ArgumentParser(description="Inference for text to audio generation task.") + parser.add_argument( + "--exp_path", '-exp', type=str, default=None, + help="Path for experiment." + ) + parser.add_argument( + "--test_file", type=str, default="data/meta_data/test-onoff-control_multi-event.json", + help="Path for test_file." + ) + parser.add_argument( + "--original_args", type=str, default="summary.jsonl", + help="Path for summary jsonl file saved during training." + ) + parser.add_argument( + "--model_pt", type=str, default="best.pt", + help="Path for saved model bin file." + ) + parser.add_argument( + "--num_steps", type=int, default=200, + help="How many denoising steps for generation.", + ) + parser.add_argument( + "--guidance", '-g', type=float, + #default=3, + default=1, + help="Guidance scale for classifier free guidance." + ) + parser.add_argument( + "--batch_size", '-b', type=int, default=32, + help="Batch size for generation.", + ) + parser.add_argument( + "--num_samples", type=int, default=1, + help="How many samples per prompt.", + ) + parser.add_argument( + "--seed", type=int, default=0, + help="seed.", + ) + + args = parser.parse_args() + + args.original_args = os.path.join(args.exp_path, args.original_args) + args.model_pt = os.path.join(args.exp_path, args.model_pt) + return args + +def main(): + args = parse_args() + train_args = dotdict(json.loads(open(args.original_args).readlines()[0])) + + seed = args.seed + random.seed(seed) + np.random.seed(seed) + torch.manual_seed(seed) + if torch.cuda.is_available(): + torch.backends.cudnn.deterministic = True + torch.backends.cudnn.benchmark = False + + # Prepare Data # + extension = args.test_file.split(".")[-1] + raw_datasets = load_dataset(extension, data_files={"test": args.test_file}) + test_dataset = getattr(ConDataset, train_args.dataset_class)(raw_datasets["test"], train_args) + test_dataloader = DataLoader(test_dataset, shuffle=False, batch_size=args.batch_size, collate_fn=test_dataset.collate_fn) + + + # Load Models # + print("\n------Load model") + name = "audioldm-s-full" + vae, stft = ConDiffusion.build_pretrained_models(name) + vae, stft = vae.cuda(), stft.cuda() + print(train_args.model_class) + model = getattr(ConDiffusion, train_args.model_class)( + scheduler_name=train_args.scheduler_name, + unet_model_config_path=train_args.unet_model_config, + snr_gamma=train_args.snr_gamma + ).cuda().eval() + + # Load Trained Weight # + device = vae.device() + model.load_state_dict(torch.load(args.model_pt)) + scheduler = DDPMScheduler.from_pretrained(train_args.scheduler_name, subfolder="scheduler") + + + # Generate # + num_steps, guidance, batch_size, num_samples = args.num_steps, args.guidance, args.batch_size, args.num_samples + audio_len = 16000 * 10 + output_dir = os.path.join(CONTROLLABLE_PATH, + f"synthesized/{'-'.join(args.model_pt.split('/')[-3:-1])}_steps-{num_steps}_guidance-{guidance}_samples-{num_samples}_{args.test_file.split('/')[-1].split('.')[0]}/") + os.makedirs(output_dir, exist_ok=True) + print(f"------Write to files to {output_dir}") + + print("------Diffusion begin!") + if args.num_samples == 1: + for batch in tqdm(test_dataloader): + idx, onset, event_info, _, _, onset_str = batch # idx, onset, event_info, audios, caption, onset_str + with torch.no_grad(): + latents = model.inference({"onset":onset.to(device), "event_info":event_info.to(device)}, scheduler, num_steps, guidance, num_samples, disable_progress=True) + mel = vae.decode_first_stage(latents) + wave = vae.decode_to_waveform(mel) + for j, wav in enumerate(wave): + sf.write(f"{output_dir}/{idx[j]}--{onset_str[j]}.wav", wav[:audio_len], samplerate=16000, subtype='PCM_16') + else: + print("Clap scorer filter") + clap_scorer = laion_clap.CLAP_Module(enable_fusion=False) + ckpt_path = 'miniconda3/envs/py3.10.11/lib/python3.10/site-packages/laion_clap/630k-audioset-best.pt' + ckpt = clap_load_state_dict(ckpt_path, skip_params=True) + del_parameter_key = ["text_branch.embeddings.position_ids"] + ckpt = {"model."+k:v for k, v in ckpt.items() if k not in del_parameter_key} + clap_scorer.load_state_dict(ckpt) + for batch in tqdm(test_dataloader): + _, onset, event_info, _, caption, onset_str = batch # idx, onset, event_info, audios, caption, onset_str + with torch.no_grad(): + latents = model.inference({"onset":onset.to(device), "event_info":event_info.to(device)}, scheduler, num_steps, guidance, num_samples, disable_progress=True) + mel = vae.decode_first_stage(latents) + wave = vae.decode_to_waveform(mel) + for j in range(batch_size): + text_embed = clap_scorer.get_text_embedding([caption[j], ""], use_tensor=False)[:1] + best_idx, best_score = 0, float('-inf') + for candidate_idx in range(num_samples): + wav_48k = librosa.core.resample(wave[j*num_samples+candidate_idx].astype(np.float64)[:audio_len] / 32768, orig_sr=16000, target_sr=48000).reshape(1, -1) + audio_embed = clap_scorer.get_audio_embedding_from_data(x = wav_48k) + pair_similarity = cosine_similarity(audio_embed, text_embed)[0][0] + if pair_similarity > best_score: + best_score = pair_similarity + best_idx = candidate_idx + sf.write(f"{output_dir}/{idx[j]}--{onset_str[j]}.wav", wave[j * num_samples + best_idx][:audio_len], samplerate=16000, subtype='PCM_16') + + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/picoaudio/runner/controllable_train.py b/picoaudio/runner/controllable_train.py new file mode 100644 index 0000000000000000000000000000000000000000..01f28726a9a92d6d8dfba47e19414813877cd7db --- /dev/null +++ b/picoaudio/runner/controllable_train.py @@ -0,0 +1,393 @@ +import os +import math +import json +import numpy as np +import pandas as pd +import random +import logging +import argparse +import diffusers +import transformers +from transformers import SchedulerType, get_scheduler +from tqdm.auto import tqdm +from datetime import datetime + +import torch +from torch.utils.data import Dataset, DataLoader +import datasets +from datasets import load_dataset +from accelerate import Accelerator +from accelerate.logging import get_logger +from accelerate.utils import set_seed + +import sys +import utils.torch_tools as torch_tools +import models.controllable_diffusion as ConDiffusion +import models.controllable_dataset as ConDataset +from data.filter_data import get_event_list + +logger = get_logger(__name__) + +def parse_args(): + parser = argparse.ArgumentParser(description="Finetune a diffusion model for text to audio generation task.") + parser.add_argument( + "--train_file", '-f', type=str, default="data/meta_data/train.json" + ) + parser.add_argument( + "--batch_size", '-b', type=int, default=1, + help="Batch size (per device) for the training dataloader.", + ) + parser.add_argument( + "--learning_rate", '-lr', type=float, default=3e-5, + help="Initial learning rate (after the potential warmup period) to use.", + ) + parser.add_argument( + "--num_epochs", '-e', type=int, default=40, + help="Total number of training epochs to perform." + ) + parser.add_argument( + "--output_dir", '-o', type=str, default=None, + help="Where to store the final model." + ) + parser.add_argument( + "--model_class", '-m', type=str, default="ClapText_Onset_2_Audio_Diffusion", #TextOnset2AudioDiffusion + help="name of model_class" + ) + parser.add_argument( + "--dataset_class", '-dc', type=str, default="Clap_Onset_2_Audio_Dataset", #Text_Onset2AudioDataset + help="name of model_class" + ) + parser.add_argument( + "--duration", '-d', type=float, default=10, + help="Audio duration." + ) + parser.add_argument( + "--num_examples", '-n', type=int, default=-1, + help="How many examples to use for training.", + ) + parser.add_argument( + "--scheduler_name", type=str, + default="stabilityai/stable-diffusion-2-1", + help="Scheduler identifier.", + ) + parser.add_argument( + "--unet_model_config", type=str, default="utils/configs/frequency.json", + help="UNet model config json path.", + ) + parser.add_argument( + "--text_column", type=str, default="captions", + help="The name of the column in the datasets containing the input texts.", + ) + parser.add_argument( + "--onset_column", type=str, default="onset", + help="The name of the column in the datasets containing the osnet.", + ) + parser.add_argument( + "--audio_column", type=str, default="location", + help="The name of the column in the datasets containing the audio paths.", + ) + if True: + parser.add_argument( + "--augment", action="store_true", default=False, + help="Augment training data.", + ) + parser.add_argument( + "--uncondition", action="store_true", default=False, + help="10% uncondition for training.", + ) + parser.add_argument( + "--weight_decay", type=float, default=1e-8, + help="Weight decay to use." + ) + parser.add_argument( + "--snr_gamma", type=float, + #default=None, + default=5.0, + help="SNR weighting gamma to be used if rebalancing the loss. Recommended value is 5.0. " + "More details here: https://arxiv.org/abs/2303.09556.", + ) + parser.add_argument( + "--max_train_steps", type=int, default=None, + help="Total number of training steps to perform. If provided, overrides num_epochs.", + ) + parser.add_argument( + "--gradient_accumulation_steps", type=int, default=4, + help="Number of updates steps to accumulate before performing a backward/update pass.", + ) + parser.add_argument( + "--lr_scheduler_type", type=SchedulerType, default="linear", + help="The scheduler type to use.", + choices=["linear", "cosine", "cosine_with_restarts", "polynomial", "constant", "constant_with_warmup"], + ) + parser.add_argument( + "--num_warmup_steps", type=int, default=0, + help="Number of steps for the warmup in the lr scheduler." + ) + parser.add_argument( + "--adam_beta1", type=float, default=0.9, + help="The beta1 parameter for the Adam optimizer." + ) + parser.add_argument( + "--adam_beta2", type=float, default=0.999, + help="The beta2 parameter for the Adam optimizer." + ) + parser.add_argument( + "--adam_weight_decay", type=float, default=1e-2, + help="Weight decay to use." + ) + parser.add_argument( + "--adam_epsilon", type=float, default=1e-08, + help="Epsilon value for the Adam optimizer" + ) + parser.add_argument( + "--seed", type=int, default=0, + help="A seed for reproducible training." + ) + parser.add_argument( + "--checkpointing_steps", type=str, default="best", + help="Whether the various states should be saved at the end of every 'epoch' or 'best' whenever validation loss decreases.", + ) + parser.add_argument( + "--save_every", type=int, default=40, + help="Save model after every how many epochs when checkpointing_steps is set to best." + ) + parser.add_argument( + "--resume_from_checkpoint", type=str, default=None, + help="If the training should continue from a local checkpoint folder.", + ) + parser.add_argument( + "--with_tracking", action="store_true", + help="Whether to enable experiment trackers for logging.", + ) + parser.add_argument( + "--report_to", type=str, default="all", + help=( + 'The integration to report the results and logs to. Supported platforms are `"tensorboard"`,' + ' `"wandb"`, `"comet_ml"` and `"clearml"`. Use `"all"` (default) to report to all integrations.' + "Only applicable when `--with_tracking` is passed." + ), + ) + args = parser.parse_args() + + return args + + + +def main(): + args = parse_args() + args.event_list = get_event_list() + print(args) + accelerator_log_kwargs = {} + + if args.with_tracking: + accelerator_log_kwargs["log_with"] = args.report_to + accelerator_log_kwargs["logging_dir"] = args.output_dir + + accelerator = Accelerator(gradient_accumulation_steps=args.gradient_accumulation_steps, **accelerator_log_kwargs) + + # Make one log on every process with the configuration for debugging. + logging.basicConfig( + format="%(asctime)s - %(levelname)s - %(name)s - %(message)s", + datefmt="%m/%d/%Y %H:%M:%S", + level=logging.INFO, + ) + logger.info(accelerator.state, main_process_only=False) + + datasets.utils.logging.set_verbosity_error() + diffusers.utils.logging.set_verbosity_error() + transformers.utils.logging.set_verbosity_error() + + # If passed along, set the training seed now. + #if args.seed is not None: + set_seed(args.seed) + seed = args.seed + random.seed(seed) + np.random.seed(seed) + torch.manual_seed(seed) + if torch.cuda.is_available(): + torch.backends.cudnn.deterministic = True + torch.backends.cudnn.benchmark = False + + # Handle output directory creation and wandb tracking + if accelerator.is_main_process: + if args.output_dir is None or args.output_dir == "": + args.output_dir = f"ckpts/{args.model_class}_{args.dataset_class}/base" + elif args.output_dir is not None: + args.output_dir = f"ckpts/{args.model_class}_{args.dataset_class}/" + args.output_dir + os.makedirs(args.output_dir, exist_ok=True) + + with open("{}/summary.jsonl".format(args.output_dir), "w") as f: + f.write(json.dumps(dict(vars(args))) + "\n\n") + + accelerator.project_configuration.automatic_checkpoint_naming = False + + accelerator.wait_for_everyone() + + + # Initialize models + pretrained_model_name = "audioldm-s-full" + vae, stft = ConDiffusion.build_pretrained_models(pretrained_model_name) + #vae, stft, clap, _ = build_vae_stft_clap_models(pretrained_model_name) + + model = getattr(ConDiffusion, args.model_class)( + scheduler_name=args.scheduler_name, unet_model_config_path=args.unet_model_config, + snr_gamma=args.snr_gamma, uncondition=args.uncondition, + ) + + # Get the datasets + extension = args.train_file.split(".")[-1] + raw_datasets = load_dataset(extension, data_files={"train": args.train_file}) + with accelerator.main_process_first(): + train_dataset = getattr(ConDataset, args.dataset_class)(raw_datasets["train"], args) + accelerator.print("Num instances in train: {}".format(train_dataset.get_num_instances())) + + train_dataloader = DataLoader(train_dataset, shuffle=True, batch_size=args.batch_size, collate_fn=train_dataset.collate_fn) + + # Optimizer + optimizer_parameters = model.parameters() + if hasattr(model, "text_encoder"): + for param in model.text_encoder.parameters(): + param.requires_grad = False + model.text_encoder.eval() + optimizer_parameters = model.unet.parameters() + accelerator.print("Optimizing UNet parameters.") + + num_trainable_parameters = sum(p.numel() for p in model.parameters() if p.requires_grad) + accelerator.print("Num trainable parameters: {}".format(num_trainable_parameters)) + + optimizer = torch.optim.AdamW( + optimizer_parameters, lr=args.learning_rate, + betas=(args.adam_beta1, args.adam_beta2), + weight_decay=args.adam_weight_decay, + eps=args.adam_epsilon, + ) + + # Scheduler and math around the number of training steps. + overrode_max_train_steps = False + num_update_steps_per_epoch = math.ceil(len(train_dataloader) / args.gradient_accumulation_steps) + if args.max_train_steps is None: + args.max_train_steps = args.num_epochs * num_update_steps_per_epoch + overrode_max_train_steps = True + + lr_scheduler = get_scheduler( + name=args.lr_scheduler_type, + optimizer=optimizer, + num_warmup_steps=args.num_warmup_steps * args.gradient_accumulation_steps, + num_training_steps=args.max_train_steps * args.gradient_accumulation_steps, + ) + + # Prepare everything with our `accelerator`. + vae, stft, model, optimizer, lr_scheduler = accelerator.prepare( + vae, stft, model, optimizer, lr_scheduler + ) + train_dataloader = accelerator.prepare( + train_dataloader + ) + + # We need to recalculate our total training steps as the size of the training dataloader may have changed. + num_update_steps_per_epoch = math.ceil(len(train_dataloader) / args.gradient_accumulation_steps) + if overrode_max_train_steps: + args.max_train_steps = args.num_epochs * num_update_steps_per_epoch + # Afterwards we recalculate our number of training epochs + args.num_epochs = math.ceil(args.max_train_steps / num_update_steps_per_epoch) + + # We need to initialize the trackers we use, and also store our configuration. + # The trackers initializes automatically on the main process. + if args.with_tracking: + experiment_config = vars(args) + # TensorBoard cannot log Enums, need the raw value + experiment_config["lr_scheduler_type"] = experiment_config["lr_scheduler_type"].value + accelerator.init_trackers("text_to_audio_diffusion", experiment_config) + + # Train! + total_batch_size = args.batch_size * accelerator.num_processes * args.gradient_accumulation_steps + + logger.info("***** Running training *****") + logger.info(f" Num examples = {len(train_dataset)}") + logger.info(f" Num Epochs = {args.num_epochs}") + logger.info(f" Instantaneous batch size per device = {args.batch_size}") + logger.info(f" Total train batch size (w. parallel, distributed & accumulation) = {total_batch_size}") + logger.info(f" Gradient Accumulation steps = {args.gradient_accumulation_steps}") + logger.info(f" Total optimization steps = {args.max_train_steps}") + + # Only show the progress bar once on each machine. + progress_bar = tqdm(range(args.max_train_steps), disable=not accelerator.is_local_main_process) + + completed_steps = 0 + starting_epoch = 0 + # Potentially load in the weights and states from a previous save + if args.resume_from_checkpoint: + if args.resume_from_checkpoint is not None or args.resume_from_checkpoint != "": + accelerator.load_state(args.resume_from_checkpoint) + # path = os.path.basename(args.resume_from_checkpoint) + accelerator.print(f"Resumed from local checkpoint: {args.resume_from_checkpoint}") + else: + # Get the most recent checkpoint + dirs = [f.name for f in os.scandir(os.getcwd()) if f.is_dir()] + dirs.sort(key=os.path.getctime) + # path = dirs[-1] # Sorts folders by date modified, most recent checkpoint is the last + + # Duration of the audio clips in seconds + duration, best_loss, best_epoch = args.duration, np.inf, 0 + + for epoch in range(starting_epoch, args.num_epochs): + model.train() + total_loss = 0 + logger.info(f"train epoch {epoch} begin!") + for step, batch in enumerate(train_dataloader): + with accelerator.accumulate(model): + device = model.device + + _, onset, event_info, audios, _, _ = batch #idx, onset, event_info, audios, caption, onset_str + target_length = int(duration * 102.4) + with torch.no_grad(): + unwrapped_vae = accelerator.unwrap_model(vae) + mel, _, waveform = torch_tools.wav_to_fbank(audios, target_length, stft) + mel = mel.unsqueeze(1).to(device) + true_latent = unwrapped_vae.get_first_stage_encoding(unwrapped_vae.encode_first_stage(mel)) + + loss = model({"latent":true_latent, "onset":onset, "event_info":event_info}, validation_mode=False) + total_loss += loss.detach().float() + accelerator.backward(loss) + optimizer.step() + lr_scheduler.step() + optimizer.zero_grad() + + # Checks if the accelerator has performed an optimization step behind the scenes + if accelerator.sync_gradients: + progress_bar.update(1) + completed_steps += 1 + + if completed_steps >= args.max_train_steps: + break + logger.info(f"train epoch {epoch} finish!") + model.uncondition = False + + if accelerator.is_main_process: + result = {} + result["epoch"] = epoch, + result["step"] = completed_steps + result["train_loss"] = round(total_loss.item()/len(train_dataloader), 4) + + if result["train_loss"] < best_loss: + best_loss = result["train_loss"] + best_epoch = epoch + if args.checkpointing_steps == "best": + accelerator.save(accelerator.unwrap_model(model).state_dict(), f"{args.output_dir}/best.pt") + # Save all states -> continue training + # accelerator.save_state("{}/{}".format(args.output_dir, "best")) + + result["best_eopch"] = best_epoch + logger.info(result) + result["time"] = datetime.now().strftime("%y-%m-%d-%H-%M-%S") + + with open("{}/summary.jsonl".format(args.output_dir), "a") as f: + f.write(json.dumps(result) + "\n\n") + + if args.with_tracking: + accelerator.log(result, step=completed_steps) + + + +if __name__ == "__main__": + main() diff --git a/picoaudio/utils/__pycache__/torch_tools.cpython-310.pyc b/picoaudio/utils/__pycache__/torch_tools.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..561ee820f054621285c7458a623c169d974b6f20 Binary files /dev/null and b/picoaudio/utils/__pycache__/torch_tools.cpython-310.pyc differ diff --git a/picoaudio/utils/configs/frequency.json b/picoaudio/utils/configs/frequency.json new file mode 100644 index 0000000000000000000000000000000000000000..c4c2cc2c899d7d24b6eadfc3da7537981704a1bb --- /dev/null +++ b/picoaudio/utils/configs/frequency.json @@ -0,0 +1,46 @@ +{ + "_class_name": "UNet2DConditionModel", + "_diffusers_version": "0.10.0.dev0", + "act_fn": "silu", + "attention_head_dim": [ + 4, + 8, + 16, + 16 + ], + "block_out_channels": [ + 128, + 256, + 512, + 512 + ], + "center_input_sample": false, + "cross_attention_dim": 1024, + "down_block_types": [ + "CrossAttnDownBlock2D", + "CrossAttnDownBlock2D", + "CrossAttnDownBlock2D", + "DownBlock2D" + ], + "downsample_padding": 1, + "dual_cross_attention": false, + "flip_sin_to_cos": true, + "freq_shift": 0, + "in_channels": 10, + "layers_per_block": 2, + "mid_block_scale_factor": 1, + "norm_eps": 1e-05, + "norm_num_groups": 32, + "num_class_embeds": null, + "only_cross_attention": false, + "out_channels": 8, + "sample_size": [32, 2], + "up_block_types": [ + "UpBlock2D", + "CrossAttnUpBlock2D", + "CrossAttnUpBlock2D", + "CrossAttnUpBlock2D" + ], + "use_linear_projection": true, + "upcast_attention": true +} diff --git a/picoaudio/utils/torch_tools.py b/picoaudio/utils/torch_tools.py new file mode 100644 index 0000000000000000000000000000000000000000..50237e48f616780b1f1f5418461129434d03fd86 --- /dev/null +++ b/picoaudio/utils/torch_tools.py @@ -0,0 +1,128 @@ +import torch +import torchaudio +import random +import itertools +import numpy as np +#from utils.mix import mix + +def normalize_wav(waveform): + waveform = waveform - torch.mean(waveform) + waveform = waveform / (torch.max(torch.abs(waveform)) + 1e-8) + return waveform * 0.5 + + +def pad_wav(waveform, segment_length): + waveform_length = len(waveform) + + if segment_length is None or waveform_length == segment_length: + return waveform + elif waveform_length > segment_length: + return waveform[:segment_length] + else: + pad_wav = torch.zeros(segment_length - waveform_length).to(waveform.device) + waveform = torch.cat([waveform, pad_wav]) + return waveform + + +def _pad_spec(fbank, target_length=1024): + batch, n_frames, channels = fbank.shape + p = target_length - n_frames + if p > 0: + pad = torch.zeros(batch, p, channels).to(fbank.device) + fbank = torch.cat([fbank, pad], 1) + elif p < 0: + fbank = fbank[:, :target_length, :] + + if channels % 2 != 0: + fbank = fbank[:, :, :-1] + + return fbank + + +def read_wav_file(filename, segment_length): + + waveform, sr = torchaudio.load(filename) # Faster!!! + waveform = torchaudio.functional.resample(waveform, orig_freq=sr, new_freq=16000)[0] + try: + waveform = normalize_wav(waveform) + except: + print ("Exception normalizing:", filename) + waveform = torch.ones(160000) + waveform = pad_wav(waveform, segment_length).unsqueeze(0) + waveform = waveform / torch.max(torch.abs(waveform)) + waveform = 0.5 * waveform + return waveform + + +def get_mel_from_wav(audio, _stft): + audio = torch.nan_to_num(torch.clip(audio, -1, 1)) + audio = torch.autograd.Variable(audio, requires_grad=False) + melspec, log_magnitudes_stft, energy = _stft.mel_spectrogram(audio) + return melspec, log_magnitudes_stft, energy + + +def wav_to_fbank(paths, target_length=1024, fn_STFT=None): + assert fn_STFT is not None + + waveform = torch.cat([read_wav_file(path, target_length * 160) for path in paths], 0) # hop size is 160 + + fbank, log_magnitudes_stft, energy = get_mel_from_wav(waveform, fn_STFT) + fbank = fbank.transpose(1, 2) + log_magnitudes_stft = log_magnitudes_stft.transpose(1, 2) + + fbank, log_magnitudes_stft = _pad_spec(fbank, target_length), _pad_spec( + log_magnitudes_stft, target_length + ) + + return fbank, log_magnitudes_stft, waveform + + +def uncapitalize(s): + if s: + return s[:1].lower() + s[1:] + else: + return "" + + +def mix_wavs_and_captions(path1, path2, caption1, caption2, target_length=1024): + sound1 = read_wav_file(path1, target_length * 160)[0].numpy() + sound2 = read_wav_file(path2, target_length * 160)[0].numpy() + mixed_sound = mix(sound1, sound2, 0.5, 16000).reshape(1, -1) + mixed_caption = "{} and {}".format(caption1, uncapitalize(caption2)) + return mixed_sound, mixed_caption + + +def augment(paths, texts, num_items=4, target_length=1024): + mixed_sounds, mixed_captions = [], [] + combinations = list(itertools.combinations(list(range(len(texts))), 2)) + random.shuffle(combinations) + if len(combinations) < num_items: + selected_combinations = combinations + else: + selected_combinations = combinations[:num_items] + + for (i, j) in selected_combinations: + new_sound, new_caption = mix_wavs_and_captions(paths[i], paths[j], texts[i], texts[j], target_length) + mixed_sounds.append(new_sound) + mixed_captions.append(new_caption) + + waveform = torch.tensor(np.concatenate(mixed_sounds, 0)) + waveform = waveform / torch.max(torch.abs(waveform)) + waveform = 0.5 * waveform + + return waveform, mixed_captions + + +def augment_wav_to_fbank(paths, texts, num_items=4, target_length=1024, fn_STFT=None): + assert fn_STFT is not None + + waveform, captions = augment(paths, texts) + fbank, log_magnitudes_stft, energy = get_mel_from_wav(waveform, fn_STFT) + fbank = fbank.transpose(1, 2) + log_magnitudes_stft = log_magnitudes_stft.transpose(1, 2) + + fbank, log_magnitudes_stft = _pad_spec(fbank, target_length), _pad_spec( + log_magnitudes_stft, target_length + ) + + return fbank, log_magnitudes_stft, waveform, captions \ No newline at end of file