capjamesg commited on
Commit
8db0533
·
1 Parent(s): 9fc6c1f

add space code

Browse files
Files changed (2) hide show
  1. app.py +61 -0
  2. requirements.txt +9 -0
app.py ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from autodistill_gpt_4v import GPT4V
3
+ from autodistill.detection import CaptionOntology
4
+ from autodistill_grounded_sam import GroundedSAM
5
+ from autodistill.utils import plot
6
+ import cv2
7
+
8
+ from autodistill.core.custom_detection_model import CustomDetectionModel
9
+
10
+ MARKDOWN = """
11
+ # Grounded SAM-GPT4V
12
+
13
+ Use Grounding DINO, Meta AI's Segment Anything (SAM) and GPT-4V to label specific objects.
14
+
15
+ Visit [awesome-openai-vision-api-experiments](https://github.com/roboflow/awesome-openai-vision-api-experiments)
16
+ repository to find more OpenAI Vision API experiments or contribute your own."""
17
+
18
+ def respond(api_key, input_image, dino_prompt, gpt_prompt):
19
+ input_image = cv2.cvtColor(input_image, cv2.COLOR_BGR2RGB)
20
+
21
+ DINOGPT = CustomDetectionModel(
22
+ detection_model=GroundedSAM(CaptionOntology(
23
+ {dino_prompt: dino_prompt},
24
+ )),
25
+ classification_model=GPT4V(
26
+ CaptionOntology({k: k for k in gpt_prompt.split(", ")}),
27
+ api_key=api_key
28
+ )
29
+ )
30
+
31
+ results = DINOGPT.predict("input.jpg")
32
+
33
+ result = plot(
34
+ image=cv2.imread("input.jpg"),
35
+ detections=results,
36
+ classes=gpt_prompt.split(", "),
37
+ raw=True
38
+ )
39
+
40
+ return result
41
+
42
+ with gr.Blocks() as demo:
43
+ gr.Markdown(MARKDOWN)
44
+ with gr.Row():
45
+ with gr.Column():
46
+ api_key_textbox = gr.Textbox(
47
+ label="OpenAI API KEY", type="password")
48
+ dino_prompt = gr.Textbox(label="Grounded SAM Prompt")
49
+ gpt_prompt = gr.Textbox(label="GPT-4V Prompt")
50
+ input_image = gr.Image(type="numpy", label="Input Image")
51
+ with gr.Column():
52
+ output_image = gr.Image(type="numpy", label="Output Image")
53
+ submit_button = gr.Button()
54
+
55
+ submit_button.click(
56
+ fn=respond,
57
+ inputs=[api_key_textbox, input_image, dino_prompt, gpt_prompt],
58
+ outputs=[output_image]
59
+ )
60
+
61
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ numpy
2
+ opencv-python
3
+ requests
4
+ gradio==3.50.2
5
+ autodistill
6
+ autodistill_segment_anything
7
+ autodistill_gpt_4v
8
+ roboflow
9
+ openai