hear / app.py
walaa2022's picture
Create app.py
e07cefa verified
import gradio as gr
import torch
from transformers import AutoFeatureExtractor, AutoModel
import numpy as np
from sklearn.linear_model import LogisticRegression
# Load HeAR model and feature extractor
MODEL_ID = "google/hear"
feature_extractor = AutoFeatureExtractor.from_pretrained(MODEL_ID)
model = AutoModel.from_pretrained(MODEL_ID)
# Dummy classifier (replace with your trained classifier)
# For demonstration, we simulate a trained classifier with random weights
# In real use, train a classifier on HeAR embeddings using your labeled dataset
clf = LogisticRegression()
clf.classes_ = np.array(["Normal", "Abnormal"])
clf.coef_ = np.random.randn(1, 768) # HeAR outputs 768-dim embeddings
clf.intercept_ = np.random.randn(1)
def extract_embedding(audio):
# audio: tuple (sr, np.array)
if audio is None:
return None
sr, y = audio
# HeAR expects 2-second clips at 16kHz; pad/truncate as needed
target_sr = 16000
if sr != target_sr:
import librosa
y = librosa.resample(y, orig_sr=sr, target_sr=target_sr)
y = y[:target_sr*2] if len(y) > target_sr*2 else np.pad(y, (0, max(0, target_sr*2-len(y))))
inputs = feature_extractor(y, sampling_rate=target_sr, return_tensors="pt")
with torch.no_grad():
emb = model(**inputs).last_hidden_state.mean(dim=1).cpu().numpy()
return emb
def predict(audio):
emb = extract_embedding(audio)
if emb is None:
return "Please upload a heart or lung sound file."
# Predict with the dummy classifier
pred = clf.predict(emb)[0]
prob = clf.predict_proba(emb)[0]
return f"Prediction: **{pred}**\n\nConfidence: {max(prob):.2%}"
description = """
# Heart & Lung Sound Classifier (Demo)
Upload a heart or lung sound (WAV, MP3, etc.).
This demo uses the [HeAR model](https://huggingface.co/google/hear) for health acoustic embeddings and a simple classifier for normal/abnormal prediction.
**Note:** For best results, use 2-second clips. For real diagnosis, a classifier trained on labeled heart/lung sound data should be used.
"""
iface = gr.Interface(
fn=predict,
inputs=gr.Audio(sources=["upload", "microphone"], type="numpy", label="Upload Heart/Lung Sound"),
outputs=gr.Markdown(),
title="Heart & Lung Sound Classifier",
description=description,
allow_flagging="never"
)
if __name__ == "__main__":
iface.launch()