Spaces:

walaa2022
/

hear

Runtime error

App Files Files Community

hear / app.py

walaa2022

Create app.py

e07cefa verified 24 days ago

raw

history blame contribute delete

2.38 kB

	import gradio as gr
	import torch
	from transformers import AutoFeatureExtractor, AutoModel
	import numpy as np
	from sklearn.linear_model import LogisticRegression

	# Load HeAR model and feature extractor
	MODEL_ID = "google/hear"
	feature_extractor = AutoFeatureExtractor.from_pretrained(MODEL_ID)
	model = AutoModel.from_pretrained(MODEL_ID)

	# Dummy classifier (replace with your trained classifier)
	# For demonstration, we simulate a trained classifier with random weights
	# In real use, train a classifier on HeAR embeddings using your labeled dataset
	clf = LogisticRegression()
	clf.classes_ = np.array(["Normal", "Abnormal"])
	clf.coef_ = np.random.randn(1, 768) # HeAR outputs 768-dim embeddings
	clf.intercept_ = np.random.randn(1)

	def extract_embedding(audio):
	# audio: tuple (sr, np.array)
	if audio is None:
	return None
	sr, y = audio
	# HeAR expects 2-second clips at 16kHz; pad/truncate as needed
	target_sr = 16000
	if sr != target_sr:
	import librosa
	y = librosa.resample(y, orig_sr=sr, target_sr=target_sr)
	y = y[:target_sr2] if len(y) > target_sr2 else np.pad(y, (0, max(0, target_sr*2-len(y))))
	inputs = feature_extractor(y, sampling_rate=target_sr, return_tensors="pt")
	with torch.no_grad():
	emb = model(**inputs).last_hidden_state.mean(dim=1).cpu().numpy()
	return emb

	def predict(audio):
	emb = extract_embedding(audio)
	if emb is None:
	return "Please upload a heart or lung sound file."
	# Predict with the dummy classifier
	pred = clf.predict(emb)[0]
	prob = clf.predict_proba(emb)[0]
	return f"Prediction: {pred}\n\nConfidence: {max(prob):.2%}"

	description = """
	# Heart & Lung Sound Classifier (Demo)
	Upload a heart or lung sound (WAV, MP3, etc.).
	This demo uses the [HeAR model](https://huggingface.co/google/hear) for health acoustic embeddings and a simple classifier for normal/abnormal prediction.
	Note: For best results, use 2-second clips. For real diagnosis, a classifier trained on labeled heart/lung sound data should be used.
	"""

	iface = gr.Interface(
	fn=predict,
	inputs=gr.Audio(sources=["upload", "microphone"], type="numpy", label="Upload Heart/Lung Sound"),
	outputs=gr.Markdown(),
	title="Heart & Lung Sound Classifier",
	description=description,
	allow_flagging="never"
	)

	if __name__ == "__main__":
	iface.launch()