ilyassmoummad
commited on
Update README.md
Browse files
README.md
CHANGED
@@ -57,8 +57,9 @@ from cvt import cvt13 # Import model architecture
|
|
57 |
from melspectrogram import MelSpectrogramProcessor # Import Mel spectrogram processor
|
58 |
|
59 |
# Initialize the preprocessor and model
|
60 |
-
|
61 |
-
|
|
|
62 |
|
63 |
# Load weights trained using Cross-Entropy
|
64 |
model.load_state_dict(torch.load("ce.pth", map_location="cpu")['encoder'])
|
@@ -81,7 +82,7 @@ def load_waveform(file_path):
|
|
81 |
# For example, using torchaudio to load and resample
|
82 |
pass
|
83 |
|
84 |
-
waveform = load_waveform("path/to/audio.wav") # Load your audio file here
|
85 |
|
86 |
# Ensure waveform is sampled at 16 kHz, then pad/chunk as needed for 6s length
|
87 |
input_tensor = preprocessor.process(waveform).unsqueeze(0) # Add batch dimension
|
@@ -104,7 +105,7 @@ The following table presents the classification accuracy of various models on on
|
|
104 |
| BirdAVES-bioxn-large | 300M | 7.59±0.8 | 27.2±3.6 | 13.7±2.9 | 12.5±3.6 | 10.0±1.4 | 14.5±3.2 | 14.2 |
|
105 |
| BioLingual | 28M | 6.21±1.1 | 37.5±2.9 | 17.8±3.5 | 17.6±5.1 | 22.5±4.0 | 26.4±3.4 | 21.3 |
|
106 |
| Perch | 80M | 9.10±5.3 | 42.4±4.9 | 19.8±5.0 | 26.7±9.8 | 22.3±3.3 | 29.1±5.9 | 24.9 |
|
107 |
-
| CE (Ours) |
|
108 |
| SimCLR (Ours) | 19M | 7.85±1.1 | 31.2±2.4 | 14.9±2.9 | 19.0±3.8 | 10.6±1.1 | 24.0±4.1 | 17.9 |
|
109 |
| SupCon (Ours) | 19M | 8.53±1.1 | 39.8±6.0 | 18.8±3.0 | 20.4±6.9 | 12.6±1.6 | 23.2±3.1 | 20.5 |
|
110 |
| ProtoCLR (Ours) | 19M | 9.23±1.6 | 38.6±5.1 | 18.4±2.3 | 21.2±7.3 | 15.5±2.3 | 25.8±5.2 | 21.4 |
|
@@ -114,7 +115,7 @@ The following table presents the classification accuracy of various models on on
|
|
114 |
| BirdAVES-bioxn-large | 300M | 15.0±0.9 | 42.6±2.7 | 23.7±3.8 | 28.4±2.4 | 18.3±1.8 | 27.3±2.3 | 25.8 |
|
115 |
| BioLingual | 28M | 13.6±1.3 | 65.2±1.4 | 31.0±2.9 | 34.3±3.5 | 43.9±0.9 | 49.9±2.3 | 39.6 |
|
116 |
| Perch | 80M | 21.2±1.2 | 71.7±1.5 | 39.5±3.0 | 52.5±5.9 | 48.0±1.9 | 59.7±1.8 | 48.7 |
|
117 |
-
| CE (Ours) |
|
118 |
| SimCLR (Ours) | 19M | 15.4±1.0 | 54.0±1.8 | 23.0±2.3 | 32.8±4.0 | 22.0±1.2 | 40.7±2.4 | 31.3 |
|
119 |
| SupCon (Ours) | 19M | 17.2±1.3 | 64.6±2.4 | 34.1±2.9 | 42.5±2.9 | 30.8±0.8 | 48.1±2.4 | 39.5 |
|
120 |
| ProtoCLR (Ours) | 19M | 19.2±1.1 | 67.9±2.8 | 36.1±4.3 | 48.0±4.3 | 34.6±2.3 | 48.6±2.8 | 42.4 |
|
|
|
57 |
from melspectrogram import MelSpectrogramProcessor # Import Mel spectrogram processor
|
58 |
|
59 |
# Initialize the preprocessor and model
|
60 |
+
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
61 |
+
preprocessor = MelSpectrogramProcessor(device=device)
|
62 |
+
model = cvt13().to(device)
|
63 |
|
64 |
# Load weights trained using Cross-Entropy
|
65 |
model.load_state_dict(torch.load("ce.pth", map_location="cpu")['encoder'])
|
|
|
82 |
# For example, using torchaudio to load and resample
|
83 |
pass
|
84 |
|
85 |
+
waveform = load_waveform("path/to/audio.wav").to(device) # Load your audio file here and convert it to a PyTorch tensor.
|
86 |
|
87 |
# Ensure waveform is sampled at 16 kHz, then pad/chunk as needed for 6s length
|
88 |
input_tensor = preprocessor.process(waveform).unsqueeze(0) # Add batch dimension
|
|
|
105 |
| BirdAVES-bioxn-large | 300M | 7.59±0.8 | 27.2±3.6 | 13.7±2.9 | 12.5±3.6 | 10.0±1.4 | 14.5±3.2 | 14.2 |
|
106 |
| BioLingual | 28M | 6.21±1.1 | 37.5±2.9 | 17.8±3.5 | 17.6±5.1 | 22.5±4.0 | 26.4±3.4 | 21.3 |
|
107 |
| Perch | 80M | 9.10±5.3 | 42.4±4.9 | 19.8±5.0 | 26.7±9.8 | 22.3±3.3 | 29.1±5.9 | 24.9 |
|
108 |
+
| CE (Ours) | 23M | 9.55±1.5 | 41.3±3.6 | 19.7±4.7 | 25.2±5.7 | 17.8±1.4 | 31.5±5.4 | 24.2 |
|
109 |
| SimCLR (Ours) | 19M | 7.85±1.1 | 31.2±2.4 | 14.9±2.9 | 19.0±3.8 | 10.6±1.1 | 24.0±4.1 | 17.9 |
|
110 |
| SupCon (Ours) | 19M | 8.53±1.1 | 39.8±6.0 | 18.8±3.0 | 20.4±6.9 | 12.6±1.6 | 23.2±3.1 | 20.5 |
|
111 |
| ProtoCLR (Ours) | 19M | 9.23±1.6 | 38.6±5.1 | 18.4±2.3 | 21.2±7.3 | 15.5±2.3 | 25.8±5.2 | 21.4 |
|
|
|
115 |
| BirdAVES-bioxn-large | 300M | 15.0±0.9 | 42.6±2.7 | 23.7±3.8 | 28.4±2.4 | 18.3±1.8 | 27.3±2.3 | 25.8 |
|
116 |
| BioLingual | 28M | 13.6±1.3 | 65.2±1.4 | 31.0±2.9 | 34.3±3.5 | 43.9±0.9 | 49.9±2.3 | 39.6 |
|
117 |
| Perch | 80M | 21.2±1.2 | 71.7±1.5 | 39.5±3.0 | 52.5±5.9 | 48.0±1.9 | 59.7±1.8 | 48.7 |
|
118 |
+
| CE (Ours) | 23M | 21.4±1.3 | 69.2±1.8 | 35.6±3.4 | 48.2±5.5 | 39.9±1.1 | 57.5±2.3 | 45.3 |
|
119 |
| SimCLR (Ours) | 19M | 15.4±1.0 | 54.0±1.8 | 23.0±2.3 | 32.8±4.0 | 22.0±1.2 | 40.7±2.4 | 31.3 |
|
120 |
| SupCon (Ours) | 19M | 17.2±1.3 | 64.6±2.4 | 34.1±2.9 | 42.5±2.9 | 30.8±0.8 | 48.1±2.4 | 39.5 |
|
121 |
| ProtoCLR (Ours) | 19M | 19.2±1.1 | 67.9±2.8 | 36.1±4.3 | 48.0±4.3 | 34.6±2.3 | 48.6±2.8 | 42.4 |
|