Update app.py
Browse files
app.py
CHANGED
@@ -1,5 +1,6 @@
|
|
1 |
import torch
|
2 |
from transformers import BitsAndBytesConfig, pipeline
|
|
|
3 |
import whisper
|
4 |
import gradio as gr
|
5 |
import time
|
@@ -19,8 +20,6 @@ import base64
|
|
19 |
import io
|
20 |
from PIL import Image
|
21 |
|
22 |
-
|
23 |
-
|
24 |
quantization_config = BitsAndBytesConfig(
|
25 |
load_in_4bit=True,
|
26 |
bnb_4bit_compute_dtype=torch.float16
|
@@ -31,7 +30,6 @@ pipe = pipeline("image-to-text",
|
|
31 |
model=model_id,
|
32 |
model_kwargs={"quantization_config": quantization_config})
|
33 |
|
34 |
-
|
35 |
torch.cuda.is_available()
|
36 |
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
|
37 |
print(f"Using torch {torch.__version__} ({DEVICE})")
|
@@ -53,7 +51,7 @@ def img2txt(input_text, input_image):
|
|
53 |
image = Image.open(input_image)
|
54 |
|
55 |
writehistory(f"Input text: {input_text} - Type: {type(input_text)} - Dir: {dir(input_text)}")
|
56 |
-
#creating a default promt for the model if user does not provide one.
|
57 |
if type(input_text) == tuple:
|
58 |
prompt_instructions = """
|
59 |
Describe the medical condition shown in the image using as much detail as possible and provide a treatment plan for the medical condition
|
@@ -108,7 +106,6 @@ def transcribe(audio):
|
|
108 |
|
109 |
return result_text
|
110 |
|
111 |
-
|
112 |
#transforming the text to speech
|
113 |
def text_to_speech(text, file_path):
|
114 |
language = 'en'
|
@@ -137,7 +134,7 @@ def process_inputs(audio_path, image_path):
|
|
137 |
|
138 |
return speech_to_text_output, chatgpt_output, processed_audio_path
|
139 |
|
140 |
-
#Create the interface
|
141 |
iface = gr.Interface(
|
142 |
fn=process_inputs,
|
143 |
inputs=[
|
@@ -149,9 +146,9 @@ iface = gr.Interface(
|
|
149 |
gr.Textbox(label="ChatGPT Output"),
|
150 |
gr.Audio("Temp.mp3")
|
151 |
],
|
152 |
-
title="
|
153 |
-
description="Upload an image and interact via voice input and audio
|
154 |
)
|
155 |
|
156 |
# Launch the interface
|
157 |
-
iface.launch(inline=False)
|
|
|
1 |
import torch
|
2 |
from transformers import BitsAndBytesConfig, pipeline
|
3 |
+
|
4 |
import whisper
|
5 |
import gradio as gr
|
6 |
import time
|
|
|
20 |
import io
|
21 |
from PIL import Image
|
22 |
|
|
|
|
|
23 |
quantization_config = BitsAndBytesConfig(
|
24 |
load_in_4bit=True,
|
25 |
bnb_4bit_compute_dtype=torch.float16
|
|
|
30 |
model=model_id,
|
31 |
model_kwargs={"quantization_config": quantization_config})
|
32 |
|
|
|
33 |
torch.cuda.is_available()
|
34 |
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
|
35 |
print(f"Using torch {torch.__version__} ({DEVICE})")
|
|
|
51 |
image = Image.open(input_image)
|
52 |
|
53 |
writehistory(f"Input text: {input_text} - Type: {type(input_text)} - Dir: {dir(input_text)}")
|
54 |
+
#creating a default promt for the model if user does not provide one.
|
55 |
if type(input_text) == tuple:
|
56 |
prompt_instructions = """
|
57 |
Describe the medical condition shown in the image using as much detail as possible and provide a treatment plan for the medical condition
|
|
|
106 |
|
107 |
return result_text
|
108 |
|
|
|
109 |
#transforming the text to speech
|
110 |
def text_to_speech(text, file_path):
|
111 |
language = 'en'
|
|
|
134 |
|
135 |
return speech_to_text_output, chatgpt_output, processed_audio_path
|
136 |
|
137 |
+
# Create the interface
|
138 |
iface = gr.Interface(
|
139 |
fn=process_inputs,
|
140 |
inputs=[
|
|
|
146 |
gr.Textbox(label="ChatGPT Output"),
|
147 |
gr.Audio("Temp.mp3")
|
148 |
],
|
149 |
+
title="(Beta) Medical Research Model with Voice-to-Text Feature",
|
150 |
+
description="Upload an image and interact via voice input and audio.(Must give microphone permission)"
|
151 |
)
|
152 |
|
153 |
# Launch the interface
|
154 |
+
iface.launch(inline=False, share=True)
|