AmitSharma99 commited on
Commit
499194e
·
verified ·
1 Parent(s): 4afadee

Upload gradio_app.py

Browse files
Files changed (1) hide show
  1. gradio_app.py +302 -0
gradio_app.py ADDED
@@ -0,0 +1,302 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # # # if you dont use pipenv uncomment the following:
2
+ # # # from dotenv import load_dotenv
3
+ # # # load_dotenv()
4
+
5
+ # # #VoiceBot UI with Gradio
6
+ # # import os
7
+ # # import gradio as gr
8
+
9
+ # # from brain_of_the_doctor import encode_image, analyze_image_with_query
10
+ # # from voice_of_the_patient import record_audio, transcribe_with_groq
11
+ # # from voice_of_the_doctor import text_to_speech_with_gtts, text_to_speech_with_elevenlabs
12
+
13
+ # # #load_dotenv()
14
+
15
+ # # system_prompt="""You have to act as a professional doctor, i know you are not but this is for learning purpose.
16
+ # # What's in this image?. Do you find anything wrong with it medically?
17
+ # # If you make a differential, suggest some remedies for them. Donot add any numbers or special characters in
18
+ # # your response. Your response should be in one long paragraph. Also always answer as if you are answering to a real person.
19
+ # # Donot say 'In the image I see' but say 'With what I see, I think you have ....'
20
+ # # Dont respond as an AI model in markdown, your answer should mimic that of an actual doctor not an AI bot,
21
+ # # Keep your answer concise (max 2 sentences). No preamble, start your answer right away please"""
22
+
23
+
24
+ # # def process_inputs(audio_filepath, image_filepath):
25
+ # # speech_to_text_output = transcribe_with_groq(GROQ_API_KEY=os.environ.get("GROQ_API_KEY"),
26
+ # # audio_filepath=audio_filepath,
27
+ # # stt_model="whisper-large-v3")
28
+
29
+ # # # Handle the image input
30
+ # # if image_filepath:
31
+ # # doctor_response = analyze_image_with_query(query=system_prompt+speech_to_text_output, encoded_image=encode_image(image_filepath), model="meta-llama/llama-4-scout-17b-16e-instruct") #model="meta-llama/llama-4-maverick-17b-128e-instruct")
32
+ # # else:
33
+ # # doctor_response = "No image provided for me to analyze"
34
+
35
+ # # # voice_of_doctor = text_to_speech_with_elevenlabs(input_text=doctor_response, output_filepath="final.mp3")
36
+ # # voice_of_doctor = text_to_speech_with_gtts(input_text=doctor_response, output_filepath="final.mp3")
37
+
38
+ # # return speech_to_text_output, doctor_response, voice_of_doctor
39
+
40
+
41
+ # # # Create the interface
42
+ # # iface = gr.Interface(
43
+ # # fn=process_inputs,
44
+ # # inputs=[
45
+ # # gr.Audio(sources=["microphone"], type="filepath"),
46
+ # # gr.Image(type="filepath")
47
+ # # ],
48
+ # # outputs=[
49
+ # # gr.Textbox(label="Speech to Text"),
50
+ # # gr.Textbox(label="Doctor's Response"),
51
+ # # gr.Audio("Temp.mp3")
52
+ # # ],
53
+ # # title="AI Doctor with Vision and Voice"
54
+ # # )
55
+
56
+ # # iface.launch(debug=True)
57
+
58
+ # # #http://127.0.0.1:7860
59
+
60
+
61
+
62
+ # # if you dont use pipenv uncomment the following:
63
+ # # from dotenv import load_dotenv
64
+ # # load_dotenv()
65
+
66
+
67
+
68
+
69
+
70
+ # # ---------------------------------------------------------
71
+
72
+
73
+
74
+
75
+ # # VoiceBot UI with Gradio
76
+ # import os
77
+ # import gradio as gr
78
+
79
+ # from brain_of_the_doctor import encode_image, analyze_image_with_query
80
+ # from voice_of_the_patient import record_audio, transcribe_with_groq
81
+ # from voice_of_the_doctor import text_to_speech_with_gtts, text_to_speech_with_elevenlabs
82
+
83
+ # # load_dotenv()
84
+
85
+ # system_prompt = """
86
+ # You have to act as a professional doctor, i know you are not but this is for learning purpose.
87
+ # What's in this image? Do you find anything wrong with it medically?
88
+ # If you make a differential, suggest some remedies for them. Donot add any numbers or special characters in your response.
89
+ # Your response should be in one long paragraph. Also always answer as if you are answering to a real person.
90
+ # Donot say 'In the image I see' but say 'With what I see, I think you have ....'
91
+ # Dont respond as an AI model in markdown, your answer should mimic that of an actual doctor not an AI bot,
92
+ # Keep your answer concise (max 2 sentences). No preamble, start your answer right away please
93
+ # """
94
+
95
+ # def process_inputs(audio_filepath, image_filepath):
96
+ # # Step 1: Speech to Text
97
+ # speech_to_text_output = transcribe_with_groq(
98
+ # GROQ_API_KEY=os.environ.get("GROQ_API_KEY"),
99
+ # audio_filepath=audio_filepath,
100
+ # stt_model="whisper-large-v3"
101
+ # )
102
+
103
+ # # Step 2: Vision + Reasoning
104
+ # if image_filepath:
105
+ # doctor_response = analyze_image_with_query(
106
+ # query=system_prompt + speech_to_text_output,
107
+ # encoded_image=encode_image(image_filepath),
108
+ # model="meta-llama/llama-4-scout-17b-16e-instruct"
109
+ # )
110
+ # else:
111
+ # doctor_response = "No image provided for me to analyze."
112
+
113
+ # # Step 3: Text to Speech (Doctor’s Voice)
114
+ # output_path = "final.mp3"
115
+ # text_to_speech_with_gtts(input_text=doctor_response, output_filepath=output_path)
116
+ # # Or you can switch to ElevenLabs if available:
117
+ # # text_to_speech_with_elevenlabs(input_text=doctor_response, output_filepath=output_path)
118
+
119
+ # # Step 4: Return outputs for Gradio
120
+ # return speech_to_text_output, doctor_response, output_path
121
+
122
+
123
+ # # Step 5: Gradio Interface
124
+ # iface = gr.Interface(
125
+ # fn=process_inputs,
126
+ # inputs=[
127
+ # gr.Audio(sources=["microphone"], type="filepath", label="Speak Your Symptoms"),
128
+ # gr.Image(type="filepath", label="Upload an Affected Area Image")
129
+ # ],
130
+ # outputs=[
131
+ # gr.Textbox(label="Speech to Text (What You Said)"),
132
+ # gr.Textbox(label="Doctor's Response"),
133
+ # gr.Audio(label="Doctor's Voice Output")
134
+ # ],
135
+ # title="🩺 AI Doctor with Vision and Voice",
136
+ # description="Speak your symptoms and upload an image — get a voice and text response from your AI Doctor.",
137
+ # theme="default"
138
+ # )
139
+
140
+ # if __name__ == "__main__":
141
+ # iface.launch(debug=True)
142
+
143
+
144
+ # --------------------------------------------------------------------------------------------------------------------------
145
+ import gradio as gr
146
+ import os
147
+ import pickle
148
+ import numpy as np
149
+
150
+ # -------------------
151
+ # Load Models
152
+ # -------------------
153
+ working_dir = os.path.dirname(os.path.abspath(__file__))
154
+
155
+ diabetes_model = pickle.load(open(f"{working_dir}/models/diabetes.pkl", "rb"))
156
+ heart_model = pickle.load(open(f"{working_dir}/models/heart.pkl", "rb"))
157
+
158
+ # TEMPORARY: Tumor model disabled for deployment
159
+ # from tensorflow.keras.models import load_model
160
+ # tumor_model = load_model(f"{working_dir}/models/model.h5")
161
+
162
+
163
+ # -------------------
164
+ # AI Doctor Function
165
+ # -------------------
166
+ from brain_of_the_doctor import encode_image, analyze_image_with_query
167
+ from voice_of_the_patient import transcribe_with_groq
168
+ from voice_of_the_doctor import text_to_speech_with_gtts
169
+
170
+ system_prompt = """
171
+ You have to act as a professional doctor, i know you are not but this is for learning purpose.
172
+ What's in this image? Do you find anything wrong with it medically?
173
+ If you make a differential, suggest some remedies for them. Donot add any numbers or special characters in your response.
174
+ Your response should be in one long paragraph. Also always answer as if you are answering to a real person.
175
+ Donot say 'In the image I see' but say 'With what I see, I think you have ....'
176
+ Dont respond as an AI model in markdown, your answer should mimic that of an actual doctor not an AI bot,
177
+ Keep your answer concise (max 2 sentences). No preamble, start your answer right away please
178
+ """
179
+
180
+ def ai_doctor(audio_filepath, image_filepath):
181
+ speech_to_text_output = transcribe_with_groq(
182
+ GROQ_API_KEY=os.environ.get("GROQ_API_KEY"),
183
+ audio_filepath=audio_filepath,
184
+ stt_model="whisper-large-v3"
185
+ )
186
+ if image_filepath:
187
+ doctor_response = analyze_image_with_query(
188
+ query=system_prompt + speech_to_text_output,
189
+ encoded_image=encode_image(image_filepath),
190
+ model="meta-llama/llama-4-scout-17b-16e-instruct"
191
+ )
192
+ else:
193
+ doctor_response = "No image provided for analysis."
194
+
195
+ output_path = "final.mp3"
196
+ text_to_speech_with_gtts(input_text=doctor_response, output_filepath=output_path)
197
+ return speech_to_text_output, doctor_response, output_path
198
+
199
+
200
+ # -------------------
201
+ # ML Prediction Functions
202
+ # -------------------
203
+ def diabetes_predict(Pregnancies, Glucose, BloodPressure, SkinThickness, Insulin, BMI, DPF, Age):
204
+ user_input = [float(x) for x in [Pregnancies, Glucose, BloodPressure, SkinThickness, Insulin, BMI, DPF, Age]]
205
+ pred = diabetes_model.predict([user_input])[0]
206
+ return "Diabetic" if pred == 1 else "Not Diabetic"
207
+
208
+
209
+ def heart_predict(age, sex, cp, trestbps, chol, fbs, restecg, thalach, exang, oldpeak, slope, ca, thal):
210
+ user_input = [float(x) for x in [age, sex, cp, trestbps, chol, fbs, restecg, thalach, exang, oldpeak, slope, ca, thal]]
211
+ pred = heart_model.predict([user_input])[0]
212
+ return "Heart Disease" if pred == 1 else "No Heart Disease"
213
+
214
+
215
+ # TEMP Dummy Tumor prediction (TensorFlow removed)
216
+ def tumor_predict(image):
217
+ return "Tumor model disabled temporarily – ONNX version will be added soon."
218
+
219
+
220
+ # -------------------
221
+ # Gradio Blocks App UI
222
+ # -------------------
223
+ with gr.Blocks(css="""
224
+ body {background-color: #e6f2ff; font-family: 'Arial', sans-serif;}
225
+ .gr-button {background: linear-gradient(to right, #4CAF50, #45a049) !important; color: white !important; font-weight: bold;}
226
+ .gr-textbox {background-color: #ffffff !important; border-radius: 10px; padding: 8px; box-shadow: 0px 2px 5px rgba(0,0,0,0.1);}
227
+ .gr-label {font-weight: bold; font-size: 14px; color: #333;}
228
+ .gr-tabs-header {font-weight: bold; font-size: 16px; color: #333;}
229
+ """) as demo:
230
+
231
+ with gr.Tabs():
232
+
233
+ # ------------------- AI Doctor -------------------
234
+ with gr.TabItem("AI Doctor"):
235
+ with gr.Row():
236
+ with gr.Column(scale=1):
237
+ gr.Markdown("### Speak your symptoms & upload image")
238
+ audio_input = gr.Audio(sources=["microphone"], type="filepath")
239
+ image_input = gr.Image(type="filepath")
240
+ doctor_button = gr.Button("Get Doctor Response")
241
+ with gr.Column(scale=1):
242
+ st_text_output = gr.Textbox(label="Speech to Text", interactive=False)
243
+ doctor_text_output = gr.Textbox(label="Doctor's Response", interactive=False)
244
+ doctor_voice_output = gr.Audio(label="Doctor Voice", interactive=False)
245
+ doctor_button.click(ai_doctor, inputs=[audio_input, image_input],
246
+ outputs=[st_text_output, doctor_text_output, doctor_voice_output])
247
+
248
+ # ------------------- Diabetes -------------------
249
+ with gr.TabItem("Diabetes Prediction"):
250
+ with gr.Row():
251
+ with gr.Column():
252
+ Pregnancies = gr.Textbox(label="Pregnancies")
253
+ Glucose = gr.Textbox(label="Glucose")
254
+ BloodPressure = gr.Textbox(label="Blood Pressure")
255
+ SkinThickness = gr.Textbox(label="Skin Thickness")
256
+ Insulin = gr.Textbox(label="Insulin")
257
+ BMI = gr.Textbox(label="BMI")
258
+ DPF = gr.Textbox(label="Diabetes Pedigree Function")
259
+ Age = gr.Textbox(label="Age")
260
+ diabetes_button = gr.Button("Check Diabetes")
261
+ with gr.Column():
262
+ diabetes_output = gr.Textbox(label="Result", interactive=False)
263
+ diabetes_button.click(diabetes_predict,
264
+ inputs=[Pregnancies, Glucose, BloodPressure, SkinThickness, Insulin, BMI, DPF, Age],
265
+ outputs=diabetes_output)
266
+
267
+ # ------------------- Heart -------------------
268
+ with gr.TabItem("Heart Prediction"):
269
+ with gr.Row():
270
+ with gr.Column():
271
+ age = gr.Textbox(label="Age")
272
+ sex = gr.Textbox(label="Sex")
273
+ cp = gr.Textbox(label="Chest Pain Types")
274
+ trestbps = gr.Textbox(label="Resting BP")
275
+ chol = gr.Textbox(label="Cholesterol")
276
+ fbs = gr.Textbox(label="Fasting Blood Sugar")
277
+ restecg = gr.Textbox(label="Resting ECG")
278
+ thalach = gr.Textbox(label="Max Heart Rate")
279
+ exang = gr.Textbox(label="Exercise Induced Angina")
280
+ oldpeak = gr.Textbox(label="ST Depression")
281
+ slope = gr.Textbox(label="Slope of ST Segment")
282
+ ca = gr.Textbox(label="Major Vessels")
283
+ thal = gr.Textbox(label="Thalassemia")
284
+ heart_button = gr.Button("Check Heart Disease")
285
+ with gr.Column():
286
+ heart_output = gr.Textbox(label="Result", interactive=False)
287
+ heart_button.click(heart_predict,
288
+ inputs=[age, sex, cp, trestbps, chol, fbs, restecg, thalach, exang, oldpeak, slope, ca, thal],
289
+ outputs=heart_output)
290
+
291
+ # ------------------- Tumor -------------------
292
+ with gr.TabItem("Tumor Prediction"):
293
+ with gr.Row():
294
+ with gr.Column():
295
+ tumor_image = gr.Image(type="filepath")
296
+ tumor_button = gr.Button("Check Tumor")
297
+ with gr.Column():
298
+ tumor_output = gr.Textbox(label="Result", interactive=False)
299
+ tumor_button.click(tumor_predict, inputs=tumor_image, outputs=tumor_output)
300
+
301
+ if __name__ == "__main__":
302
+ demo.launch()