|
import torch |
|
import librosa |
|
import pickle |
|
import numpy as np |
|
import gradio as gr |
|
|
|
class ML_model: |
|
def __init__(self): |
|
self.ml_model = torch.load("support_file/resnet_carcrash_94.pth", map_location=torch.device('cpu')) |
|
self.ml_model.eval() |
|
with open('support_file/indtocat.pkl', 'rb') as f: |
|
self.i2c = pickle.load(f) |
|
|
|
def spec_to_image(self, spec, eps=1e-6): |
|
mean = spec.mean() |
|
std = spec.std() |
|
spec_norm = (spec - mean) / (std + eps) |
|
spec_min, spec_max = spec_norm.min(), spec_norm.max() |
|
spec_scaled = 255 * (spec_norm - spec_min) / (spec_max - spec_min) |
|
spec_scaled = spec_scaled.astype(np.uint8) |
|
return spec_scaled |
|
|
|
def get_melspectrogram_db(self, file_path): |
|
|
|
wav, sr = librosa.load(file_path, sr=None) |
|
sr= 44100 |
|
|
|
n_fft = 2048 |
|
hop_length = 512 |
|
n_mels = 128 |
|
fmin = 20 |
|
fmax = 8300 |
|
if wav.shape[0]<5*sr: |
|
wav=np.pad(wav,int(np.ceil((5*sr-wav.shape[0])/2)),mode='reflect') |
|
else: |
|
wav=wav[:5*sr] |
|
|
|
spec = librosa.feature.melspectrogram(y=wav, sr=sr, n_fft=n_fft, hop_length=hop_length, n_mels=n_mels, fmin=fmin, fmax=fmax) |
|
|
|
|
|
spec_db = librosa.power_to_db(spec, ref=np.max) |
|
return spec_db |
|
|
|
def get_prediction(self, file_path): |
|
spec_db = self.get_melspectrogram_db(file_path) |
|
input_image = self.spec_to_image(spec_db) |
|
input_tensor = torch.tensor(input_image[np.newaxis, np.newaxis, ...], dtype=torch.float32).to('cpu') |
|
predictions = self.ml_model(input_tensor) |
|
predicted_index = predictions.argmax(dim=1).item() |
|
return self.i2c[predicted_index] |
|
|
|
def predict(file_path): |
|
ml_model = ML_model() |
|
prediction = ml_model.get_prediction(file_path) |
|
return prediction |
|
|
|
interface = gr.Interface( |
|
fn=predict, |
|
inputs=gr.Audio(type="filepath", label="Upload your audio file"), |
|
outputs="text", |
|
title="Car Crash Sound Detection", |
|
description="Upload a car crash sound clip and the model will identify the crash type.", |
|
examples=["input_fileszQ1QmqrakIA_5-talking.wav","car-crash-bentley.mp3","input_fileszQ1QmqrakIA_13-crash.wav"], |
|
cache_examples=False |
|
) |
|
|
|
interface.launch(share=True) |