Spaces:

HareemFatima
/

HareemFatima-distilhubert-finetuned-stutterdetection

Runtime error

App Files Files Community

HareemFatima commited on Apr 30, 2024

Commit

0f0cd12

•

1 Parent(s): 82b8169

Update app.py

Browse files

Files changed (1) hide show

app.py +22 -40

app.py CHANGED Viewed

@@ -1,24 +1,19 @@
-# install --upgrade transformers scipy
-# install tensorflow==2.16.1
 from transformers import pipeline
-# Load model directly
-from transformers import AutoProcessor, AutoModelForTextToWaveform
-import gradio as gr
-processor = AutoProcessor.from_pretrained("suno/bark-small")
 # Load audio classification model
 audio_classifier = pipeline(
     "audio-classification", model="HareemFatima/distilhubert-finetuned-stutterdetection"
 )
-# Load text-to-speech model
-tts_processor = AutoProcessor.from_pretrained("suno/bark-small")
-tts_model = AutoModelForTextToWaveform.from_pretrained("suno/bark-small")
-# Define therapy text for different stutter types (replace with your specific therapy content)
 therapy_text = {
     "Normal Speech": "Your speech sounds great! Keep practicing!",
     "Blocking": "Take a deep breath and try speaking slowly. You can do it!",
@@ -26,39 +21,26 @@ therapy_text = {
     # Add more stutter types and therapy text here
 }
-def predict_and_synthesize(audio):
-    """Predicts stutter type and synthesizes speech with therapy text.
-    Args:
-        audio (bytes): Audio data from the user.
-    Returns:
-        tuple: A tuple containing the predicted stutter type (string) and synthesized speech (bytes).
-    """
-    # Classify stuttering type using audio classification model
-    prediction = audio_classifier(audio)
     stutter_type = prediction[0]["label"]
-    # Retrieve therapy text based on predicted stutter type
     therapy = therapy_text.get(stutter_type, "General therapy tip: Practice slow, relaxed speech.")
-    # Generate synthesized speech with the therapy text
-    synthesized_speech = tts_model.generate(
-        tts_processor(therapy, return_tensors="pt").input_ids
-    )[0].squeeze().cpu().numpy()
-    return stutter_type, synthesized_speech
-# Create Gradio interface
-interface = gr.Interface(
-    fn=predict_and_synthesize,
-    inputs="microphone",
-    outputs=["text", "audio"],
-    title="Stuttering Therapy Assistant",
-    description="This app helps you identify stuttering types and provides personalized therapy suggestions. Upload an audio clip, and it will analyze the speech and generate audio with relevant therapy tips.",
-)
-interface.launch(debug=False)

+import streamlit as st
 from transformers import pipeline
 # Load audio classification model
 audio_classifier = pipeline(
     "audio-classification", model="HareemFatima/distilhubert-finetuned-stutterdetection"
 )
+# Load text-to-speech model (replace with your TTS model details)
+# Placeholder text-to-speech function (replace with your actual implementation)
+def tts(text):
+    # Replace this with your text-to-speech processing logic
+    # This is a placeholder to demonstrate the concept
+    return f"Synthesized speech for therapy: {text}"
+# Define therapy text for different stutter types (replace with your specific content)
 therapy_text = {
     "Normal Speech": "Your speech sounds great! Keep practicing!",
     "Blocking": "Take a deep breath and try speaking slowly. You can do it!",
     # Add more stutter types and therapy text here
 }
+st.title("Stuttering Therapy Assistant")
+st.write("This app helps you identify stuttering types and provides personalized therapy suggestions.")
+uploaded_audio = st.file_uploader("Upload Audio Clip")
+if uploaded_audio is not None:
+    # Read audio data
+    audio_bytes = uploaded_audio.read()
+    # Classify stuttering type
+    prediction = audio_classifier(audio_bytes)
     stutter_type = prediction[0]["label"]
+    # Retrieve therapy text
     therapy = therapy_text.get(stutter_type, "General therapy tip: Practice slow, relaxed speech.")
+    # Generate synthesized speech (placeholder for now)
+    synthesized_speech = tts(therapy)
+    st.write(f"Predicted Stutter Type: {stutter_type}")
+    st.write(f"Therapy Tip: {therapy}")
+    st.audio(synthesized_speech)  # Placeholder audio output (replace with actual synthesized speech)