Update README.md
Browse files
README.md
CHANGED
@@ -4,4 +4,104 @@ datasets:
|
|
4 |
- yiye2023/GUIChat
|
5 |
- yiye2023/GUIAct
|
6 |
pipeline_tag: visual-question-answering
|
7 |
-
---
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4 |
- yiye2023/GUIChat
|
5 |
- yiye2023/GUIAct
|
6 |
pipeline_tag: visual-question-answering
|
7 |
+
---
|
8 |
+
|
9 |
+
|
10 |
+
```python
|
11 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer
|
12 |
+
import torch
|
13 |
+
|
14 |
+
from PIL import Image, ImageDraw, ImageFont
|
15 |
+
import re
|
16 |
+
|
17 |
+
|
18 |
+
def draw_circle(draw, center, radius=10, width=2, outline_color=(0, 255, 0), is_fill=False, bg_color=(0, 255, 0), transparency=80):
|
19 |
+
# Calculate the bounding box coordinates for the circle
|
20 |
+
x1 = center[0] - radius
|
21 |
+
y1 = center[1] - radius
|
22 |
+
x2 = center[0] + radius
|
23 |
+
y2 = center[1] + radius
|
24 |
+
bbox = (x1, y1, x2, y2)
|
25 |
+
|
26 |
+
# Draw the circle
|
27 |
+
if is_fill:
|
28 |
+
# Calculate the alpha value based on the transparency percentage
|
29 |
+
alpha = int((1 - transparency / 100) * 255)
|
30 |
+
|
31 |
+
# Set the fill color with the specified background color and transparency
|
32 |
+
fill_color = tuple(bg_color) + (alpha,)
|
33 |
+
|
34 |
+
draw.ellipse(bbox, width=width, outline=outline_color, fill=fill_color)
|
35 |
+
else:
|
36 |
+
draw.ellipse(bbox, width=width, outline=outline_color)
|
37 |
+
|
38 |
+
def draw_point(draw, center, radius1=3, radius2=6, color=(0, 255, 0)):
|
39 |
+
draw_circle(draw, center, radius=radius1, outline_color=color)
|
40 |
+
draw_circle(draw, center, radius=radius2, outline_color=color)
|
41 |
+
|
42 |
+
def draw_rectangle(draw, box_coords, width=2, outline_color=(0, 255, 0), is_fill=False, bg_color=(0, 255, 0), transparency=80):
|
43 |
+
if is_fill:
|
44 |
+
# Calculate the alpha value based on the transparency percentage
|
45 |
+
alpha = int((1 - transparency / 100) * 255)
|
46 |
+
|
47 |
+
# Set the fill color with the specified background color and transparency
|
48 |
+
fill_color = tuple(bg_color) + (alpha,)
|
49 |
+
|
50 |
+
draw.rectangle(box_coords, width=width, outline=outline_color, fill=fill_color)
|
51 |
+
else:
|
52 |
+
draw.rectangle(box_coords, width=width, outline=outline_color)
|
53 |
+
|
54 |
+
def draw(path, out_path, response):
|
55 |
+
img = Image.open(path).convert("RGB")
|
56 |
+
draw = ImageDraw.Draw(img)
|
57 |
+
|
58 |
+
box_coords = re.findall(r"<box>(.*?)</box>", response)
|
59 |
+
for box in box_coords:
|
60 |
+
try:
|
61 |
+
x1, y1, x2, y2 = box.replace("(", "").replace(")", "").split(",")
|
62 |
+
x1, y1, x2, y2 = float(x1) * img.width/1000, float(y1) * img.height/1000, float(x2) * img.width/1000, float(y2) * img.height/1000
|
63 |
+
draw_rectangle(draw, (x1, y1, x2, y2))
|
64 |
+
except:
|
65 |
+
print("There were some errors while parsing the bounding box.")
|
66 |
+
|
67 |
+
point_coords = re.findall(r"<point>(.*?)</point>", response)
|
68 |
+
for point in point_coords:
|
69 |
+
try:
|
70 |
+
x1, y1 = point.replace("(", "").replace(")", "").split(",")
|
71 |
+
x1, y1 = float(x1) * img.width/1000, float(y1) * img.height/1000
|
72 |
+
draw_point(draw, (x1, y1))
|
73 |
+
except:
|
74 |
+
print("There were some errors while parsing the bounding point.")
|
75 |
+
|
76 |
+
img.save(out_path)
|
77 |
+
|
78 |
+
def load_model_and_tokenizer(path, device):
|
79 |
+
tokenizer = AutoTokenizer.from_pretrained(path, trust_remote_code=True)
|
80 |
+
model = AutoModelForCausalLM.from_pretrained(path, device_map=device, trust_remote_code=True).eval()
|
81 |
+
return model, tokenizer
|
82 |
+
|
83 |
+
|
84 |
+
def infer(model, tokenizer, image_path, text):
|
85 |
+
query = tokenizer.from_list_format([
|
86 |
+
{'image': image_path},
|
87 |
+
{'text': text},
|
88 |
+
])
|
89 |
+
response, history = model.chat(tokenizer, query=query, history=None)
|
90 |
+
return response
|
91 |
+
|
92 |
+
if __name__ == "__main__":
|
93 |
+
device = "cuda:0"
|
94 |
+
model_path = "<your_model_path>"
|
95 |
+
model, tokenizer = load_model_and_tokenizer(model_path, device)
|
96 |
+
|
97 |
+
while True:
|
98 |
+
image_path = input("image path >>>>> ")
|
99 |
+
if image_path == "stop":
|
100 |
+
break
|
101 |
+
query = input("Human:")
|
102 |
+
if query == "stop":
|
103 |
+
break
|
104 |
+
|
105 |
+
response = infer(model, tokenizer, img_path, query)
|
106 |
+
draw(img_path, "1.jpg", response)
|
107 |
+
```
|