Files changed (1) hide show
  1. app.py +47 -93
app.py CHANGED
@@ -4,10 +4,10 @@ import numpy as np
4
  import os
5
  import hashlib
6
  from datetime import datetime
 
7
  import soundfile as sf
8
  import torch
9
  from tenacity import retry, stop_after_attempt, wait_fixed
10
- from transformers import pipeline
11
 
12
  # Initialize local models with retry logic
13
  @retry(stop=stop_after_attempt(3), wait=wait_fixed(2))
@@ -15,7 +15,7 @@ def load_whisper_model():
15
  try:
16
  model = pipeline(
17
  "automatic-speech-recognition",
18
- model="openai/whisper-tiny", # Multilingual model
19
  device=-1, # CPU; use device=0 for GPU if available
20
  model_kwargs={"use_safetensors": True}
21
  )
@@ -65,7 +65,7 @@ try:
65
  except Exception as e:
66
  print(f"Symptom model initialization failed after retries: {str(e)}")
67
  symptom_classifier = None
68
- is_fallback_model = True
69
 
70
  def compute_file_hash(file_path):
71
  """Compute MD5 hash of a file to check uniqueness."""
@@ -75,7 +75,7 @@ def compute_file_hash(file_path):
75
  hash_md5.update(chunk)
76
  return hash_md5.hexdigest()
77
 
78
- def transcribe_audio(audio_file, language="en"):
79
  """Transcribe audio using local Whisper model."""
80
  if not whisper:
81
  return "Error: Whisper model not loaded. Check logs for details or ensure sufficient compute resources."
@@ -85,15 +85,15 @@ def transcribe_audio(audio_file, language="en"):
85
  if len(audio) < 1600: # Less than 0.1s
86
  return "Error: Audio too short. Please provide audio of at least 1 second."
87
  if np.max(np.abs(audio)) < 1e-4: # Too quiet
88
- return "Error: Audio too quiet. Please provide clear audio describing symptoms."
89
 
90
  # Save as WAV for Whisper
91
  temp_wav = f"/tmp/{os.path.basename(audio_file)}.wav"
92
  sf.write(temp_wav, audio, sr)
93
 
94
- # Transcribe with beam search and language
95
  with torch.no_grad():
96
- result = whisper(temp_wav, generate_kwargs={"num_beams": 5, "language": language})
97
  transcription = result.get("text", "").strip()
98
  print(f"Transcription: {transcription}")
99
 
@@ -104,7 +104,7 @@ def transcribe_audio(audio_file, language="en"):
104
  pass
105
 
106
  if not transcription:
107
- return "Transcription empty. Please provide clear audio describing symptoms."
108
  # Check for repetitive transcription
109
  words = transcription.split()
110
  if len(words) > 5 and len(set(words)) < len(words) / 2:
@@ -134,20 +134,10 @@ def analyze_symptoms(text):
134
  except Exception as e:
135
  return f"Error analyzing symptoms: {str(e)}", 0.0
136
 
137
- def handle_health_query(query, language="en"):
138
- """Handle health-related queries with a general response."""
139
- if not query:
140
- return "Please provide a valid health query."
141
- # Placeholder for Q&A logic (could integrate a model like BERT for Q&A)
142
- restricted_terms = ["medicine", "treatment", "drug", "prescription"]
143
- if any(term in query.lower() for term in restricted_terms):
144
- return "This tool does not provide medication or treatment advice. Please ask about symptoms or general health information (e.g., 'What are symptoms of asthma?')."
145
- return f"Response to query '{query}': For accurate health information, consult a healthcare provider."
146
-
147
- def analyze_voice(audio_file, language="en"):
148
- """Analyze voice for health indicators and handle queries."""
149
  try:
150
- # Ensure unique file name
151
  unique_path = f"/tmp/gradio/{datetime.now().strftime('%Y%m%d%H%M%S%f')}_{os.path.basename(audio_file)}"
152
  os.rename(audio_file, unique_path)
153
  audio_file = unique_path
@@ -161,43 +151,29 @@ def analyze_voice(audio_file, language="en"):
161
  print(f"Audio shape: {audio.shape}, Sampling rate: {sr}, Duration: {len(audio)/sr:.2f}s, Mean: {np.mean(audio):.4f}, Std: {np.std(audio):.4f}")
162
 
163
  # Transcribe audio
164
- transcription = transcribe_audio(audio_file, language)
165
  if "Error transcribing" in transcription:
166
  return transcription
167
 
168
- # Split transcription into symptom and query parts
169
- symptom_text = transcription
170
- query_text = None
171
- restricted_terms = ["medicine", "treatment", "drug", "prescription"]
172
- for term in restricted_terms:
173
- if term in transcription.lower():
174
- # Split at the first restricted term
175
- split_index = transcription.lower().find(term)
176
- symptom_text = transcription[:split_index].strip()
177
- query_text = transcription[split_index:].strip()
178
- break
179
 
180
- feedback = ""
 
 
 
181
 
182
- # Analyze symptoms if present
183
- if symptom_text:
184
- prediction, score = analyze_symptoms(symptom_text)
185
- if "Error analyzing" in prediction:
186
- feedback += prediction + "\n"
187
- elif prediction == "No health condition predicted":
188
- feedback += "No significant health indicators detected.\n"
189
- else:
190
- feedback += f"Possible health condition: {prediction} (confidence: {score:.4f}). Consult a doctor.\n"
191
  else:
192
- feedback += "No symptoms detected in the audio.\n"
193
 
194
- # Handle query if present
195
- if query_text:
196
- feedback += f"\nQuery detected: '{query_text}'\n"
197
- feedback += handle_health_query(query_text, language) + "\n"
198
-
199
- # Add debug info and disclaimer
200
- feedback += f"\n**Debug Info**: Transcription = '{transcription}', File Hash = {file_hash}"
201
  feedback += "\n**Disclaimer**: This is not a diagnostic tool. Consult a healthcare provider for medical advice."
202
 
203
  # Clean up temporary audio file
@@ -211,48 +187,26 @@ def analyze_voice(audio_file, language="en"):
211
  except Exception as e:
212
  return f"Error processing audio: {str(e)}"
213
 
 
 
 
 
 
 
 
 
 
 
 
214
  # Gradio interface
215
- def create_gradio_interface():
216
- with gr.Blocks(theme=gr.themes.Soft()) as demo:
217
- gr.Markdown(
218
- """
219
- # Health Voice Analyzer
220
- Record or upload a voice sample describing symptoms in English, Spanish, Hindi, or Mandarin (e.g., 'I have a fever').
221
- Ask health questions in the text box below (e.g., 'What are symptoms of asthma?').
222
- **Note**: Do not ask for medication or treatment advice; focus on symptoms or general health questions.
223
- **Disclaimer**: This is not a diagnostic tool. Consult a healthcare provider for medical advice.
224
- **Text-to-Speech**: Available in the web frontend (Salesforce Sites) using the browser's Web Speech API.
225
- """
226
- )
227
- with gr.Row():
228
- language = gr.Dropdown(
229
- choices=["en", "es", "hi", "zh"],
230
- label="Select Language",
231
- value="en"
232
- )
233
- with gr.Row():
234
- audio_input = gr.Audio(type="filepath", label="Record or Upload Voice")
235
- with gr.Row():
236
- query_input = gr.Textbox(label="Ask a Health Question (e.g., 'What are symptoms of asthma?')")
237
- with gr.Row():
238
- output = gr.Textbox(label="Health Assessment Feedback")
239
- with gr.Row():
240
- analyze_button = gr.Button("Analyze Voice")
241
- query_button = gr.Button("Submit Query")
242
-
243
- analyze_button.click(
244
- fn=analyze_voice,
245
- inputs=[audio_input, language],
246
- outputs=output
247
- )
248
- query_button.click(
249
- fn=handle_health_query,
250
- inputs=[query_input, language],
251
- outputs=output
252
- )
253
-
254
- return demo
255
 
256
  if __name__ == "__main__":
257
- demo = create_gradio_interface()
258
- demo.launch(server_name="0.0.0.0", server_port=7860)
 
4
  import os
5
  import hashlib
6
  from datetime import datetime
7
+ from transformers import pipeline
8
  import soundfile as sf
9
  import torch
10
  from tenacity import retry, stop_after_attempt, wait_fixed
 
11
 
12
  # Initialize local models with retry logic
13
  @retry(stop=stop_after_attempt(3), wait=wait_fixed(2))
 
15
  try:
16
  model = pipeline(
17
  "automatic-speech-recognition",
18
+ model="openai/whisper-tiny.en",
19
  device=-1, # CPU; use device=0 for GPU if available
20
  model_kwargs={"use_safetensors": True}
21
  )
 
65
  except Exception as e:
66
  print(f"Symptom model initialization failed after retries: {str(e)}")
67
  symptom_classifier = None
68
+ is_fallback_model = True # Track if fallback model is used
69
 
70
  def compute_file_hash(file_path):
71
  """Compute MD5 hash of a file to check uniqueness."""
 
75
  hash_md5.update(chunk)
76
  return hash_md5.hexdigest()
77
 
78
+ def transcribe_audio(audio_file):
79
  """Transcribe audio using local Whisper model."""
80
  if not whisper:
81
  return "Error: Whisper model not loaded. Check logs for details or ensure sufficient compute resources."
 
85
  if len(audio) < 1600: # Less than 0.1s
86
  return "Error: Audio too short. Please provide audio of at least 1 second."
87
  if np.max(np.abs(audio)) < 1e-4: # Too quiet
88
+ return "Error: Audio too quiet. Please provide clear audio describing symptoms in English."
89
 
90
  # Save as WAV for Whisper
91
  temp_wav = f"/tmp/{os.path.basename(audio_file)}.wav"
92
  sf.write(temp_wav, audio, sr)
93
 
94
+ # Transcribe with beam search
95
  with torch.no_grad():
96
+ result = whisper(temp_wav, generate_kwargs={"num_beams": 5})
97
  transcription = result.get("text", "").strip()
98
  print(f"Transcription: {transcription}")
99
 
 
104
  pass
105
 
106
  if not transcription:
107
+ return "Transcription empty. Please provide clear audio describing symptoms in English."
108
  # Check for repetitive transcription
109
  words = transcription.split()
110
  if len(words) > 5 and len(set(words)) < len(words) / 2:
 
134
  except Exception as e:
135
  return f"Error analyzing symptoms: {str(e)}", 0.0
136
 
137
+ def analyze_voice(audio_file):
138
+ """Analyze voice for health indicators."""
 
 
 
 
 
 
 
 
 
 
139
  try:
140
+ # Ensure unique file name to avoid Gradio reuse
141
  unique_path = f"/tmp/gradio/{datetime.now().strftime('%Y%m%d%H%M%S%f')}_{os.path.basename(audio_file)}"
142
  os.rename(audio_file, unique_path)
143
  audio_file = unique_path
 
151
  print(f"Audio shape: {audio.shape}, Sampling rate: {sr}, Duration: {len(audio)/sr:.2f}s, Mean: {np.mean(audio):.4f}, Std: {np.std(audio):.4f}")
152
 
153
  # Transcribe audio
154
+ transcription = transcribe_audio(audio_file)
155
  if "Error transcribing" in transcription:
156
  return transcription
157
 
158
+ # Check for medication-related queries
159
+ if "medicine" in transcription.lower() or "treatment" in transcription.lower():
160
+ feedback = "Error: This tool does not provide medication or treatment advice. Please describe symptoms only (e.g., 'I have a fever')."
161
+ feedback += f"\n\n**Debug Info**: Transcription = '{transcription}', File Hash = {file_hash}"
162
+ feedback += "\n**Disclaimer**: This is not a diagnostic tool. Consult a healthcare provider for medical advice."
163
+ return feedback
 
 
 
 
 
164
 
165
+ # Analyze symptoms
166
+ prediction, score = analyze_symptoms(transcription)
167
+ if "Error analyzing" in prediction:
168
+ return prediction
169
 
170
+ # Generate feedback
171
+ if prediction == "No health condition predicted":
172
+ feedback = "No significant health indicators detected."
 
 
 
 
 
 
173
  else:
174
+ feedback = f"Possible health condition: {prediction} (confidence: {score:.4f}). Consult a doctor."
175
 
176
+ feedback += f"\n\n**Debug Info**: Transcription = '{transcription}', Prediction = {prediction}, Confidence = {score:.4f}, File Hash = {file_hash}"
 
 
 
 
 
 
177
  feedback += "\n**Disclaimer**: This is not a diagnostic tool. Consult a healthcare provider for medical advice."
178
 
179
  # Clean up temporary audio file
 
187
  except Exception as e:
188
  return f"Error processing audio: {str(e)}"
189
 
190
+ def test_with_sample_audio():
191
+ """Test the app with sample audio files."""
192
+ samples = ["audio_samples/sample.wav", "audio_samples/common_voice_en.wav"]
193
+ results = []
194
+ for sample in samples:
195
+ if os.path.exists(sample):
196
+ results.append(analyze_voice(sample))
197
+ else:
198
+ results.append(f"Sample not found: {sample}")
199
+ return "\n".join(results)
200
+
201
  # Gradio interface
202
+ iface = gr.Interface(
203
+ fn=analyze_voice,
204
+ inputs=gr.Audio(type="filepath", label="Record or Upload Voice"),
205
+ outputs=gr.Textbox(label="Health Assessment Feedback"),
206
+ title="Health Voice Analyzer",
207
+ description="Record or upload a voice sample describing symptoms (e.g., 'I have a fever') for preliminary health assessment. Supports English only. Use clear audio (WAV, 16kHz). Do not ask for medication or treatment advice."
208
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
209
 
210
  if __name__ == "__main__":
211
+ print(test_with_sample_audio())
212
+ iface.launch(server_name="0.0.0.0", server_port=7860)