jiuface commited on
Commit
caa3c61
1 Parent(s): f7e0c7d
Files changed (2) hide show
  1. app.py +21 -29
  2. requirements.txt +2 -1
app.py CHANGED
@@ -5,6 +5,9 @@ import spaces
5
  import supervision as sv
6
  import torch
7
  from PIL import Image
 
 
 
8
 
9
  from utils.florence import load_florence_model, run_florence_inference, \
10
  FLORENCE_OPEN_VOCABULARY_DETECTION_TASK
@@ -26,7 +29,7 @@ SAM_IMAGE_MODEL = load_sam_image_model(device=DEVICE)
26
  @spaces.GPU(duration=20)
27
  @torch.inference_mode()
28
  @torch.autocast(device_type="cuda", dtype=torch.bfloat16)
29
- def process_image(image_input, task_prompt, text_input) -> Optional[Image.Image]:
30
  if not image_input:
31
  gr.Info("Please upload an image.")
32
  return None
@@ -36,6 +39,14 @@ def process_image(image_input, task_prompt, text_input) -> Optional[Image.Image]
36
  if not text_input:
37
  gr.Info("Please enter a text prompt.")
38
  return None
 
 
 
 
 
 
 
 
39
  _, result = run_florence_inference(
40
  model=FLORENCE_MODEL,
41
  processor=FLORENCE_PROCESSOR,
@@ -67,38 +78,19 @@ with gr.Blocks() as demo:
67
  image = gr.Image(type='pil', label='Upload image')
68
  image_url = gr.Textbox( label='Image url', placeholder='Enter text prompts (Optional)')
69
  task_prompt = gr.Dropdown(
70
- [
71
- "<CAPTION>",
72
- "<DETAILED_CAPTION>",
73
- "<MORE_DETAILED_CAPTION>",
74
- "<CAPTION_TO_PHRASE_GROUNDING>",
75
- "<OPEN_VOCABULARY_DETECTION>",
76
- '<DENSE_REGION_CAPTION>'
77
- ], value="<CAPTION_TO_PHRASE_GROUNDING>", label="Task Prompt", info="task prompts"
78
- ),
79
  text_prompt = gr.Textbox(label='Text prompt', placeholder='Enter text prompts')
80
  submit_button = gr.Button(value='Submit', variant='primary')
81
  with gr.Column():
82
- image_gallery = gr.Gallery(label="Generated images")
83
-
84
- text_prompt.sumbit(
85
- fn=process_image,
86
- inputs=[
87
- image,
88
- task_prompt,
89
- text_prompt
90
- ],
91
- outputs=image_gallery
92
- )
93
  submit_button.click(
94
- fn=process_image,
95
- inputs=[
96
- image,
97
- task_prompt,
98
- text_prompt
99
- ],
100
- outputs=image_gallery
101
  )
102
-
103
 
104
  demo.launch(debug=True, show_error=True)
 
5
  import supervision as sv
6
  import torch
7
  from PIL import Image
8
+ from io import BytesIO
9
+ import PIL.Image
10
+ import requests
11
 
12
  from utils.florence import load_florence_model, run_florence_inference, \
13
  FLORENCE_OPEN_VOCABULARY_DETECTION_TASK
 
29
  @spaces.GPU(duration=20)
30
  @torch.inference_mode()
31
  @torch.autocast(device_type="cuda", dtype=torch.bfloat16)
32
+ def process_image(image_input, image_url, task_prompt, text_input) -> Optional[Image.Image]:
33
  if not image_input:
34
  gr.Info("Please upload an image.")
35
  return None
 
39
  if not text_input:
40
  gr.Info("Please enter a text prompt.")
41
  return None
42
+
43
+ if image_url:
44
+ print("start to fetch image from url", image_url)
45
+ response = requests.get(image_url)
46
+ response.raise_for_status()
47
+ image_input = PIL.Image.open(BytesIO(response.content))
48
+ print("fetch image success")
49
+
50
  _, result = run_florence_inference(
51
  model=FLORENCE_MODEL,
52
  processor=FLORENCE_PROCESSOR,
 
78
  image = gr.Image(type='pil', label='Upload image')
79
  image_url = gr.Textbox( label='Image url', placeholder='Enter text prompts (Optional)')
80
  task_prompt = gr.Dropdown(
81
+ ["<CAPTION>", "<DETAILED_CAPTION>", "<MORE_DETAILED_CAPTION>", "<CAPTION_TO_PHRASE_GROUNDING>", "<OPEN_VOCABULARY_DETECTION>", "<DENSE_REGION_CAPTION>"], value="<CAPTION_TO_PHRASE_GROUNDING>", label="Task Prompt", info="task prompts"
82
+ )
 
 
 
 
 
 
 
83
  text_prompt = gr.Textbox(label='Text prompt', placeholder='Enter text prompts')
84
  submit_button = gr.Button(value='Submit', variant='primary')
85
  with gr.Column():
86
+ image_gallery = gr.Gallery(label="Generated images", show_label=False, elem_id="gallery", columns=[3], rows=[1], object_fit="contain", height="auto")
87
+ print(image, image_url, task_prompt, text_prompt, image_gallery)
 
 
 
 
 
 
 
 
 
88
  submit_button.click(
89
+ fn = process_image,
90
+ inputs = [image, image_url, task_prompt, text_prompt],
91
+ outputs = [image_gallery,],
92
+ show_api=False
 
 
 
93
  )
94
+
95
 
96
  demo.launch(debug=True, show_error=True)
requirements.txt CHANGED
@@ -7,4 +7,5 @@ samv2
7
  gradio
8
  supervision
9
  opencv-python
10
- pytest
 
 
7
  gradio
8
  supervision
9
  opencv-python
10
+ pytest
11
+ requests