File size: 1,774 Bytes
4a9476d f2557db 4a9476d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 |
import torch
import torchaudio
import streamlit as st
import traceback
import psutil
# Accent label map
ACCENT_LABELS = {
"us": "American Accent",
"england": "British Accent",
"australia": "Australian Accent",
"indian": "Indian Accent",
"canada": "Canadian Accent",
"bermuda": "Bermudian Accent",
"scotland": "Scottish Accent",
"african": "African Accent",
"ireland": "Irish Accent",
"newzealand": "New Zealand Accent",
"wales": "Welsh Accent",
"malaysia": "Malaysian Accent",
"philippines": "Philippine Accent",
"singapore": "Singaporean Accent",
"hongkong": "Hong Kong Accent",
"southatlandtic": "South Atlantic Accent"
}
def analyze_accent(audio_tensor, sample_rate, model):
"""Classifies audio to identify English accent."""
try:
# Convert stereo to mono (if needed)
if audio_tensor.shape[0] > 1:
audio_tensor = audio_tensor.mean(dim=0, keepdim=True)
audio_tensor = audio_tensor.squeeze(0).unsqueeze(0).to(torch.float32)
# Convert to 16kHz if needed
if sample_rate != 16000:
resampler = torchaudio.transforms.Resample(orig_freq=sample_rate, new_freq=16000)
audio_tensor = resampler(audio_tensor)
audio_tensor = audio_tensor.to("cpu")
with torch.no_grad():
# Perform Classification
out_prob, score, index, text_lab = model.classify_batch(audio_tensor)
accent_label = text_lab[0]
readable = ACCENT_LABELS.get(accent_label, accent_label.title() + " accent")
return readable, round(score[0].item() * 100, 2)
except Exception:
st.error("Error during classification.")
st.code(traceback.format_exc())
return None, None
|