HareemFatima commited on
Commit
0f0cd12
1 Parent(s): 82b8169

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -40
app.py CHANGED
@@ -1,24 +1,19 @@
1
- # install --upgrade transformers scipy
2
-
3
- # install tensorflow==2.16.1
4
  from transformers import pipeline
5
- # Load model directly
6
- from transformers import AutoProcessor, AutoModelForTextToWaveform
7
-
8
- import gradio as gr
9
 
10
-
11
- processor = AutoProcessor.from_pretrained("suno/bark-small")
12
  # Load audio classification model
13
  audio_classifier = pipeline(
14
  "audio-classification", model="HareemFatima/distilhubert-finetuned-stutterdetection"
15
  )
16
 
17
- # Load text-to-speech model
18
- tts_processor = AutoProcessor.from_pretrained("suno/bark-small")
19
- tts_model = AutoModelForTextToWaveform.from_pretrained("suno/bark-small")
 
 
 
20
 
21
- # Define therapy text for different stutter types (replace with your specific therapy content)
22
  therapy_text = {
23
  "Normal Speech": "Your speech sounds great! Keep practicing!",
24
  "Blocking": "Take a deep breath and try speaking slowly. You can do it!",
@@ -26,39 +21,26 @@ therapy_text = {
26
  # Add more stutter types and therapy text here
27
  }
28
 
 
 
29
 
30
- def predict_and_synthesize(audio):
31
- """Predicts stutter type and synthesizes speech with therapy text.
32
-
33
- Args:
34
- audio (bytes): Audio data from the user.
35
 
36
- Returns:
37
- tuple: A tuple containing the predicted stutter type (string) and synthesized speech (bytes).
38
- """
39
 
40
- # Classify stuttering type using audio classification model
41
- prediction = audio_classifier(audio)
42
  stutter_type = prediction[0]["label"]
43
 
44
- # Retrieve therapy text based on predicted stutter type
45
  therapy = therapy_text.get(stutter_type, "General therapy tip: Practice slow, relaxed speech.")
46
 
47
- # Generate synthesized speech with the therapy text
48
- synthesized_speech = tts_model.generate(
49
- tts_processor(therapy, return_tensors="pt").input_ids
50
- )[0].squeeze().cpu().numpy()
51
 
52
- return stutter_type, synthesized_speech
53
-
54
-
55
- # Create Gradio interface
56
- interface = gr.Interface(
57
- fn=predict_and_synthesize,
58
- inputs="microphone",
59
- outputs=["text", "audio"],
60
- title="Stuttering Therapy Assistant",
61
- description="This app helps you identify stuttering types and provides personalized therapy suggestions. Upload an audio clip, and it will analyze the speech and generate audio with relevant therapy tips.",
62
- )
63
 
64
- interface.launch(debug=False)
 
1
+ import streamlit as st
 
 
2
  from transformers import pipeline
 
 
 
 
3
 
 
 
4
  # Load audio classification model
5
  audio_classifier = pipeline(
6
  "audio-classification", model="HareemFatima/distilhubert-finetuned-stutterdetection"
7
  )
8
 
9
+ # Load text-to-speech model (replace with your TTS model details)
10
+ # Placeholder text-to-speech function (replace with your actual implementation)
11
+ def tts(text):
12
+ # Replace this with your text-to-speech processing logic
13
+ # This is a placeholder to demonstrate the concept
14
+ return f"Synthesized speech for therapy: {text}"
15
 
16
+ # Define therapy text for different stutter types (replace with your specific content)
17
  therapy_text = {
18
  "Normal Speech": "Your speech sounds great! Keep practicing!",
19
  "Blocking": "Take a deep breath and try speaking slowly. You can do it!",
 
21
  # Add more stutter types and therapy text here
22
  }
23
 
24
+ st.title("Stuttering Therapy Assistant")
25
+ st.write("This app helps you identify stuttering types and provides personalized therapy suggestions.")
26
 
27
+ uploaded_audio = st.file_uploader("Upload Audio Clip")
 
 
 
 
28
 
29
+ if uploaded_audio is not None:
30
+ # Read audio data
31
+ audio_bytes = uploaded_audio.read()
32
 
33
+ # Classify stuttering type
34
+ prediction = audio_classifier(audio_bytes)
35
  stutter_type = prediction[0]["label"]
36
 
37
+ # Retrieve therapy text
38
  therapy = therapy_text.get(stutter_type, "General therapy tip: Practice slow, relaxed speech.")
39
 
40
+ # Generate synthesized speech (placeholder for now)
41
+ synthesized_speech = tts(therapy)
 
 
42
 
43
+ st.write(f"Predicted Stutter Type: {stutter_type}")
44
+ st.write(f"Therapy Tip: {therapy}")
45
+ st.audio(synthesized_speech) # Placeholder audio output (replace with actual synthesized speech)
 
 
 
 
 
 
 
 
46