kelly0000
/

dataset

Model card Files Files and versions Community

kelly0000 commited on Dec 6, 2024

Commit

a5aab08

verified ·

1 Parent(s): c5583e4

Upload caption_aitw_v2.py with huggingface_hub

Browse files

Files changed (1) hide show

caption_aitw_v2.py +95 -0

caption_aitw_v2.py ADDED Viewed

	@@ -0,0 +1,95 @@

+import os
+import torch
+import json
+from PIL import Image
+import pprint
+from tqdm import  tqdm
+from multiprocessing import Pool, cpu_count
+from chat import MiniCPMVChat, img2base64
+def read_json(file_path):
+    with open(file_path, 'r', encoding='utf-8') as file:
+        data = json.load(file)
+    return data
+def write_json(file_path, data):
+    with open(file_path, 'w', encoding='utf-8') as file:
+        json.dump(data, file, ensure_ascii=False, indent=4)
+def preprocess_data(data, path_base):
+    """将图像路径替换为 base64 编码，减少重复 I/O。"""
+    for item in data:
+        img_path = os.path.join(path_base, item['image'])
+        item['image_base64'] = img2base64(img_path)
+    return data
+def chat_minicpm_application(image_path):
+    qs = """
+    List the names and locations of all interactive applications in the image, as well as their functionality and potential applications.
+    """
+    # qs = f'''{context}.  The green frame in the picture represents the situation of clicking, need to explain why click in the corresponding area.
+    # '''
+    im_64 = img2base64(image_path)
+    msgs = [{"role": "user", "content": qs}]
+    inputs = {"image": im_64, "question": json.dumps(msgs)}
+    answer = chat_model.chat(inputs)
+    return answer
+def chat_minicpm_content(image_path):
+    qs = """
+    Describe the content of this image.
+    """
+    im_64 = img2base64(image_path)
+    msgs = [{"role": "user", "content": qs}]
+    inputs = {"image": im_64, "question": json.dumps(msgs)}
+    answer = chat_model.chat(inputs)
+    return answer
+def chat_minicpm_mind(image_path):
+    qs = """
+    The green frame in the picture represents the situation of clicking, need to explain why click in the corresponding area. Answer template: The green box ....
+    """
+    im_64 = img2base64(image_path)
+    msgs = [{"role": "user", "content": qs}]
+    inputs = {"image": im_64, "question": json.dumps(msgs)}
+    answer = chat_model.chat(inputs)
+    return answer
+torch.manual_seed(0)
+chat_model = MiniCPMVChat('/code/Model/MiniCPM-Llama3-V-2_5')
+path_base = '/code/Auto-GUI/dataset/'
+data = read_json("/code/Auto-GUI/dataset/mind/general_blip_train_llava_coco.json")
+data = [line for line in data if line['action_type'] == '#DUAL_POINT#'][17370:]
+for idx, i in enumerate(tqdm(data), 1):  # 从1开始计数，便于后续计数判断
+    img_path = path_base + i['image']
+    # context = data[idx]['conversations'][0]['value']
+    i['application'] = chat_minicpm_application(img_path)
+    i['content'] = chat_minicpm_content(img_path)
+    i['mind'] = chat_minicpm_mind(img_path)
+    # 每100次保存一次
+    if idx % 100 == 0:
+        write_json('/code/MiniCPM-V/general_blip_train_llava_coco_caption_mind2.json', data)
+# 最后保存一次，确保未满100的剩余数据也能保存
+write_json('/code/MiniCPM-V/general_blip_train_llava_coco_caption_mind2.json', data)