Spaces:

Johan014
/

Finalpj2

Paused

App Files Files Community

Johan014 commited on May 3, 2024

Commit

f1c7dc3

•

1 Parent(s): fd2fdd5

Update app.py

Browse files

Files changed (1) hide show

app.py +6 -9

app.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import torch
 from transformers import BitsAndBytesConfig, pipeline
 import whisper
 import gradio as gr
 import time
@@ -19,8 +20,6 @@ import base64
 import io
 from PIL import Image
 quantization_config = BitsAndBytesConfig(
     load_in_4bit=True,
     bnb_4bit_compute_dtype=torch.float16
@@ -31,7 +30,6 @@ pipe = pipeline("image-to-text",
                 model=model_id,
                 model_kwargs={"quantization_config": quantization_config})
 torch.cuda.is_available()
 DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
 print(f"Using torch {torch.__version__} ({DEVICE})")
@@ -53,7 +51,7 @@ def img2txt(input_text, input_image):
     image = Image.open(input_image)
     writehistory(f"Input text: {input_text} - Type: {type(input_text)} - Dir: {dir(input_text)}")
-    #creating a default promt for the model if user does not provide one.
     if type(input_text) == tuple:
         prompt_instructions = """
         Describe the medical condition shown in the image using as much detail as possible and provide a treatment plan for the medical condition
@@ -108,7 +106,6 @@ def transcribe(audio):
     return result_text
 #transforming the text to speech
 def text_to_speech(text, file_path):
     language = 'en'
@@ -137,7 +134,7 @@ def process_inputs(audio_path, image_path):
     return speech_to_text_output, chatgpt_output, processed_audio_path
-#Create the interface
 iface = gr.Interface(
     fn=process_inputs,
     inputs=[
@@ -149,9 +146,9 @@ iface = gr.Interface(
         gr.Textbox(label="ChatGPT Output"),
         gr.Audio("Temp.mp3")
     ],
-    title="Learn OpenAI Whisper: Image processing with Whisper and Llava",
-    description="Upload an image and interact via voice input and audio response."
 )
 # Launch the interface
-iface.launch(inline=False)

 import torch
 from transformers import BitsAndBytesConfig, pipeline
 import whisper
 import gradio as gr
 import time
 import io
 from PIL import Image
 quantization_config = BitsAndBytesConfig(
     load_in_4bit=True,
     bnb_4bit_compute_dtype=torch.float16
                 model=model_id,
                 model_kwargs={"quantization_config": quantization_config})
 torch.cuda.is_available()
 DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
 print(f"Using torch {torch.__version__} ({DEVICE})")
     image = Image.open(input_image)
     writehistory(f"Input text: {input_text} - Type: {type(input_text)} - Dir: {dir(input_text)}")
+    #creating a default promt for the model if user does not provide one.
     if type(input_text) == tuple:
         prompt_instructions = """
         Describe the medical condition shown in the image using as much detail as possible and provide a treatment plan for the medical condition
     return result_text
 #transforming the text to speech
 def text_to_speech(text, file_path):
     language = 'en'
     return speech_to_text_output, chatgpt_output, processed_audio_path
+# Create the interface
 iface = gr.Interface(
     fn=process_inputs,
     inputs=[
         gr.Textbox(label="ChatGPT Output"),
         gr.Audio("Temp.mp3")
     ],
+    title="(Beta) Medical Research Model with Voice-to-Text Feature",
+    description="Upload an image and interact via voice input and audio.(Must give microphone permission)"
 )
 # Launch the interface
+iface.launch(inline=False, share=True)