ilyassmoummad
/

ProtoCLR

Feature Extraction

PyTorch

Bioacoustics

Model card Files Files and versions Community

ilyassmoummad commited on 19 days ago

Commit

bd87457

verified ·

1 Parent(s): 2749826

Update README.md

Browse files

Files changed (1) hide show

README.md +6 -5

README.md CHANGED Viewed

@@ -57,8 +57,9 @@ from cvt import cvt13  # Import model architecture
 from melspectrogram import MelSpectrogramProcessor  # Import Mel spectrogram processor
 # Initialize the preprocessor and model
-preprocessor = MelSpectrogramProcessor()
-model = cvt13()
 # Load weights trained using Cross-Entropy
 model.load_state_dict(torch.load("ce.pth", map_location="cpu")['encoder'])
@@ -81,7 +82,7 @@ def load_waveform(file_path):
     # For example, using torchaudio to load and resample
     pass
-waveform = load_waveform("path/to/audio.wav")  # Load your audio file here
 # Ensure waveform is sampled at 16 kHz, then pad/chunk as needed for 6s length
 input_tensor = preprocessor.process(waveform).unsqueeze(0)  # Add batch dimension
@@ -104,7 +105,7 @@ The following table presents the classification accuracy of various models on on
 | BirdAVES-bioxn-large      | 300M       | 7.59±0.8    | 27.2±3.6    | 13.7±2.9    | 12.5±3.6    | 10.0±1.4    | 14.5±3.2    | 14.2  |
 | BioLingual                | 28M        | 6.21±1.1    | 37.5±2.9    | 17.8±3.5    | 17.6±5.1    | 22.5±4.0    | 26.4±3.4    | 21.3  |
 | Perch                     | 80M        | 9.10±5.3    | 42.4±4.9    | 19.8±5.0    | 26.7±9.8    | 22.3±3.3    | 29.1±5.9    | 24.9  |
-| CE (Ours)                 | 19M        | 9.55±1.5    | 41.3±3.6    | 19.7±4.7    | 25.2±5.7    | 17.8±1.4    | 31.5±5.4    | 24.2  |
 | SimCLR (Ours)             | 19M        | 7.85±1.1    | 31.2±2.4    | 14.9±2.9    | 19.0±3.8    | 10.6±1.1    | 24.0±4.1    | 17.9  |
 | SupCon (Ours)             | 19M        | 8.53±1.1    | 39.8±6.0    | 18.8±3.0    | 20.4±6.9    | 12.6±1.6    | 23.2±3.1    | 20.5  |
 | ProtoCLR (Ours)           | 19M        | 9.23±1.6    | 38.6±5.1    | 18.4±2.3    | 21.2±7.3    | 15.5±2.3    | 25.8±5.2    | 21.4  |
@@ -114,7 +115,7 @@ The following table presents the classification accuracy of various models on on
 | BirdAVES-bioxn-large      | 300M       | 15.0±0.9    | 42.6±2.7    | 23.7±3.8    | 28.4±2.4    | 18.3±1.8    | 27.3±2.3    | 25.8  |
 | BioLingual                | 28M        | 13.6±1.3    | 65.2±1.4    | 31.0±2.9    | 34.3±3.5    | 43.9±0.9    | 49.9±2.3    | 39.6  |
 | Perch                     | 80M        | 21.2±1.2    | 71.7±1.5    | 39.5±3.0    | 52.5±5.9    | 48.0±1.9    | 59.7±1.8    | 48.7  |
-| CE (Ours)                 | 19M        | 21.4±1.3    | 69.2±1.8    | 35.6±3.4    | 48.2±5.5    | 39.9±1.1    | 57.5±2.3    | 45.3  |
 | SimCLR (Ours)             | 19M        | 15.4±1.0    | 54.0±1.8    | 23.0±2.3    | 32.8±4.0    | 22.0±1.2    | 40.7±2.4    | 31.3  |
 | SupCon (Ours)             | 19M        | 17.2±1.3    | 64.6±2.4    | 34.1±2.9    | 42.5±2.9    | 30.8±0.8    | 48.1±2.4    | 39.5  |
 | ProtoCLR (Ours)           | 19M        | 19.2±1.1    | 67.9±2.8    | 36.1±4.3    | 48.0±4.3    | 34.6±2.3    | 48.6±2.8    | 42.4  |

 from melspectrogram import MelSpectrogramProcessor  # Import Mel spectrogram processor
 # Initialize the preprocessor and model
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+preprocessor = MelSpectrogramProcessor(device=device)
+model = cvt13().to(device)
 # Load weights trained using Cross-Entropy
 model.load_state_dict(torch.load("ce.pth", map_location="cpu")['encoder'])
     # For example, using torchaudio to load and resample
     pass
+waveform = load_waveform("path/to/audio.wav").to(device)  # Load your audio file here and convert it to a PyTorch tensor.
 # Ensure waveform is sampled at 16 kHz, then pad/chunk as needed for 6s length
 input_tensor = preprocessor.process(waveform).unsqueeze(0)  # Add batch dimension
 | BirdAVES-bioxn-large      | 300M       | 7.59±0.8    | 27.2±3.6    | 13.7±2.9    | 12.5±3.6    | 10.0±1.4    | 14.5±3.2    | 14.2  |
 | BioLingual                | 28M        | 6.21±1.1    | 37.5±2.9    | 17.8±3.5    | 17.6±5.1    | 22.5±4.0    | 26.4±3.4    | 21.3  |
 | Perch                     | 80M        | 9.10±5.3    | 42.4±4.9    | 19.8±5.0    | 26.7±9.8    | 22.3±3.3    | 29.1±5.9    | 24.9  |
+| CE (Ours)                 | 23M        | 9.55±1.5    | 41.3±3.6    | 19.7±4.7    | 25.2±5.7    | 17.8±1.4    | 31.5±5.4    | 24.2  |
 | SimCLR (Ours)             | 19M        | 7.85±1.1    | 31.2±2.4    | 14.9±2.9    | 19.0±3.8    | 10.6±1.1    | 24.0±4.1    | 17.9  |
 | SupCon (Ours)             | 19M        | 8.53±1.1    | 39.8±6.0    | 18.8±3.0    | 20.4±6.9    | 12.6±1.6    | 23.2±3.1    | 20.5  |
 | ProtoCLR (Ours)           | 19M        | 9.23±1.6    | 38.6±5.1    | 18.4±2.3    | 21.2±7.3    | 15.5±2.3    | 25.8±5.2    | 21.4  |
 | BirdAVES-bioxn-large      | 300M       | 15.0±0.9    | 42.6±2.7    | 23.7±3.8    | 28.4±2.4    | 18.3±1.8    | 27.3±2.3    | 25.8  |
 | BioLingual                | 28M        | 13.6±1.3    | 65.2±1.4    | 31.0±2.9    | 34.3±3.5    | 43.9±0.9    | 49.9±2.3    | 39.6  |
 | Perch                     | 80M        | 21.2±1.2    | 71.7±1.5    | 39.5±3.0    | 52.5±5.9    | 48.0±1.9    | 59.7±1.8    | 48.7  |
+| CE (Ours)                 | 23M        | 21.4±1.3    | 69.2±1.8    | 35.6±3.4    | 48.2±5.5    | 39.9±1.1    | 57.5±2.3    | 45.3  |
 | SimCLR (Ours)             | 19M        | 15.4±1.0    | 54.0±1.8    | 23.0±2.3    | 32.8±4.0    | 22.0±1.2    | 40.7±2.4    | 31.3  |
 | SupCon (Ours)             | 19M        | 17.2±1.3    | 64.6±2.4    | 34.1±2.9    | 42.5±2.9    | 30.8±0.8    | 48.1±2.4    | 39.5  |
 | ProtoCLR (Ours)           | 19M        | 19.2±1.1    | 67.9±2.8    | 36.1±4.3    | 48.0±4.3    | 34.6±2.3    | 48.6±2.8    | 42.4  |