MingDoan commited on
Commit
63f6e61
1 Parent(s): 4d5b0bf

feat: Add Image to Text

Browse files
controllers/img2text.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from PIL import Image
2
+ from controllers.utils import image_to_bytes
3
+ from services.api_service import post_data
4
+ from .fw import get_fw_query_params
5
+
6
+
7
+ def img2text_controller(image: Image, fw_option: str = 'none'):
8
+ request_data_files = {
9
+ "image": image_to_bytes(image),
10
+ }
11
+ response = post_data(
12
+ f"/api/img2text/{get_fw_query_params(fw_option)}", files=request_data_files)
13
+ if response is None:
14
+ return None
15
+ return response['caption']
controllers/vqa.py CHANGED
@@ -13,7 +13,6 @@ def vqa_controller(image: Image, question: str, fw_option: str = 'none'):
13
  }
14
  response = post_data(
15
  f"/api/vqa/{get_fw_query_params(fw_option)}", files=request_data_files, data=request_data_form)
16
- print(response)
17
  if response is None:
18
  return None
19
  return response['answer']
 
13
  }
14
  response = post_data(
15
  f"/api/vqa/{get_fw_query_params(fw_option)}", files=request_data_files, data=request_data_form)
 
16
  if response is None:
17
  return None
18
  return response['answer']
views/__init__.py CHANGED
@@ -3,6 +3,7 @@ import gradio as gr
3
  from .rembg import rembg_interface
4
  from .fd import fd_interface
5
  from .chat import chat_interface
 
6
  from .vqa import vqa_interface
7
  from .config import config_interface
8
 
@@ -11,12 +12,14 @@ def create_interface():
11
  rembg_ui = rembg_interface()
12
  fd_ui = fd_interface()
13
  chat_ui = chat_interface()
14
- config_ui = config_interface()
15
  vqa_ui = vqa_interface()
 
16
 
17
  return gr.TabbedInterface(
18
  title="GDSC AI Service Inference",
19
- interface_list=[rembg_ui, fd_ui, chat_ui.queue(), vqa_ui, config_ui],
 
20
  tab_names=[rembg_ui.title, fd_ui.title,
21
- chat_ui.title, vqa_ui.title, config_ui.title],
22
  )
 
3
  from .rembg import rembg_interface
4
  from .fd import fd_interface
5
  from .chat import chat_interface
6
+ from .img2text import img2text_interface
7
  from .vqa import vqa_interface
8
  from .config import config_interface
9
 
 
12
  rembg_ui = rembg_interface()
13
  fd_ui = fd_interface()
14
  chat_ui = chat_interface()
15
+ img2text_ui = img2text_interface()
16
  vqa_ui = vqa_interface()
17
+ config_ui = config_interface()
18
 
19
  return gr.TabbedInterface(
20
  title="GDSC AI Service Inference",
21
+ interface_list=[rembg_ui, fd_ui,
22
+ chat_ui.queue(), img2text_ui, vqa_ui, config_ui],
23
  tab_names=[rembg_ui.title, fd_ui.title,
24
+ chat_ui.title, img2text_ui.title, vqa_ui.title, config_ui.title],
25
  )
views/img2text.py ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from services.state import get_state
3
+ from controllers.img2text import img2text_controller
4
+ from utilities.constants import FW_DEFAULT_OPTION
5
+
6
+
7
+ def img2text_interface():
8
+ return gr.Interface(
9
+ title="Image to Text",
10
+ fn=lambda image: img2text_controller(
11
+ image, get_state('fw_option', FW_DEFAULT_OPTION)),
12
+ inputs=[
13
+ gr.Image(type="pil", label="Input Image"),
14
+ ],
15
+ outputs=[
16
+ gr.Textbox(label="Caption")
17
+ ],
18
+ flagging_options=[]
19
+ )