Johan014 commited on
Commit
f1c7dc3
1 Parent(s): fd2fdd5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -9
app.py CHANGED
@@ -1,5 +1,6 @@
1
  import torch
2
  from transformers import BitsAndBytesConfig, pipeline
 
3
  import whisper
4
  import gradio as gr
5
  import time
@@ -19,8 +20,6 @@ import base64
19
  import io
20
  from PIL import Image
21
 
22
-
23
-
24
  quantization_config = BitsAndBytesConfig(
25
  load_in_4bit=True,
26
  bnb_4bit_compute_dtype=torch.float16
@@ -31,7 +30,6 @@ pipe = pipeline("image-to-text",
31
  model=model_id,
32
  model_kwargs={"quantization_config": quantization_config})
33
 
34
-
35
  torch.cuda.is_available()
36
  DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
37
  print(f"Using torch {torch.__version__} ({DEVICE})")
@@ -53,7 +51,7 @@ def img2txt(input_text, input_image):
53
  image = Image.open(input_image)
54
 
55
  writehistory(f"Input text: {input_text} - Type: {type(input_text)} - Dir: {dir(input_text)}")
56
- #creating a default promt for the model if user does not provide one.
57
  if type(input_text) == tuple:
58
  prompt_instructions = """
59
  Describe the medical condition shown in the image using as much detail as possible and provide a treatment plan for the medical condition
@@ -108,7 +106,6 @@ def transcribe(audio):
108
 
109
  return result_text
110
 
111
-
112
  #transforming the text to speech
113
  def text_to_speech(text, file_path):
114
  language = 'en'
@@ -137,7 +134,7 @@ def process_inputs(audio_path, image_path):
137
 
138
  return speech_to_text_output, chatgpt_output, processed_audio_path
139
 
140
- #Create the interface
141
  iface = gr.Interface(
142
  fn=process_inputs,
143
  inputs=[
@@ -149,9 +146,9 @@ iface = gr.Interface(
149
  gr.Textbox(label="ChatGPT Output"),
150
  gr.Audio("Temp.mp3")
151
  ],
152
- title="Learn OpenAI Whisper: Image processing with Whisper and Llava",
153
- description="Upload an image and interact via voice input and audio response."
154
  )
155
 
156
  # Launch the interface
157
- iface.launch(inline=False)
 
1
  import torch
2
  from transformers import BitsAndBytesConfig, pipeline
3
+
4
  import whisper
5
  import gradio as gr
6
  import time
 
20
  import io
21
  from PIL import Image
22
 
 
 
23
  quantization_config = BitsAndBytesConfig(
24
  load_in_4bit=True,
25
  bnb_4bit_compute_dtype=torch.float16
 
30
  model=model_id,
31
  model_kwargs={"quantization_config": quantization_config})
32
 
 
33
  torch.cuda.is_available()
34
  DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
35
  print(f"Using torch {torch.__version__} ({DEVICE})")
 
51
  image = Image.open(input_image)
52
 
53
  writehistory(f"Input text: {input_text} - Type: {type(input_text)} - Dir: {dir(input_text)}")
54
+ #creating a default promt for the model if user does not provide one.
55
  if type(input_text) == tuple:
56
  prompt_instructions = """
57
  Describe the medical condition shown in the image using as much detail as possible and provide a treatment plan for the medical condition
 
106
 
107
  return result_text
108
 
 
109
  #transforming the text to speech
110
  def text_to_speech(text, file_path):
111
  language = 'en'
 
134
 
135
  return speech_to_text_output, chatgpt_output, processed_audio_path
136
 
137
+ # Create the interface
138
  iface = gr.Interface(
139
  fn=process_inputs,
140
  inputs=[
 
146
  gr.Textbox(label="ChatGPT Output"),
147
  gr.Audio("Temp.mp3")
148
  ],
149
+ title="(Beta) Medical Research Model with Voice-to-Text Feature",
150
+ description="Upload an image and interact via voice input and audio.(Must give microphone permission)"
151
  )
152
 
153
  # Launch the interface
154
+ iface.launch(inline=False, share=True)