|
import numpy as np |
|
import random |
|
from xtuner.utils import DEFAULT_IMAGE_TOKEN |
|
|
|
GCG_QUESTIONS = [ |
|
DEFAULT_IMAGE_TOKEN + 'Could you please give me a brief description of the image? Please respond with interleaved segmentation masks for the corresponding parts of the answer.', |
|
DEFAULT_IMAGE_TOKEN + 'Can you provide a brief description of the this image? Please output with interleaved segmentation masks for the corresponding phrases.', |
|
DEFAULT_IMAGE_TOKEN + 'Please briefly describe the contents of the image. Please respond with interleaved segmentation masks for the corresponding parts of the answer.', |
|
DEFAULT_IMAGE_TOKEN + 'Could you give a brief explanation of what can be found within this picture? Please output with interleaved segmentation masks for the corresponding phrases.', |
|
DEFAULT_IMAGE_TOKEN + 'Could you give me an brief explanation of this picture? Please respond with interleaved segmentation masks for the corresponding phrases.', |
|
DEFAULT_IMAGE_TOKEN + 'Could you provide me with a briefly analysis of this photo? Please output with interleaved segmentation masks for the corresponding parts of the answer.', |
|
] |
|
|
|
def grand_parse_annotations(example): |
|
annotations = { |
|
'caption': [], 'masks': [], |
|
'tokens_positive': [], 'labels': []} |
|
annotations['caption'] = example['dense_caption']['caption'].strip('"').strip() |
|
object_infos = example['dense_caption']['details'] |
|
|
|
all_seg_objects_dict = {} |
|
for seg_object_dict in example["objects"]: |
|
all_seg_objects_dict[seg_object_dict['id']] = seg_object_dict |
|
for seg_object_dict in example["floating_objects"]: |
|
all_seg_objects_dict[seg_object_dict['id']] = seg_object_dict |
|
|
|
for object_info in object_infos: |
|
ids = object_info["ids"] |
|
if object_info["tokens_positive"] is None: |
|
continue |
|
annotations['labels'].append(object_info["phrase"]) |
|
annotations['tokens_positive'].append(object_info["tokens_positive"]) |
|
_masks = [] |
|
for _id in ids: |
|
_masks.append(all_seg_objects_dict[_id]['segmentation']) |
|
annotations['masks'].append(_masks) |
|
return annotations |
|
|
|
def grand_conversation(caption, tokens_positive): |
|
question = random.choice(GCG_QUESTIONS).strip() |
|
|
|
|
|
def tag_caption(caption, tokens): |
|
for start, end in sorted(tokens, key=lambda x: x[0], reverse=True): |
|
caption = f"{caption[:start]}<p> {caption[start:end]} </p> [SEG]{caption[end:]}" |
|
return caption |
|
|
|
detailed_answer = tag_caption(caption, tokens_positive) |
|
|
|
conversations = [{'from': 'human', 'value': question}, {'from': 'gpt', 'value': detailed_answer}] |
|
return conversations |
|
|
|
def grand_preprocess(example): |
|
data_labels = example['labels'] |
|
masks = example['masks'] |
|
caption = example['caption'] |
|
tokens_positive = example['tokens_positive'] |
|
|
|
|
|
def sort_by_start_index(items, order): |
|
return [items[i] for i in order] |
|
|
|
|
|
phrase_order = sorted(range(len(tokens_positive)), key=lambda x: tokens_positive[x][0]) |
|
masks = sort_by_start_index(masks, phrase_order) |
|
data_labels = sort_by_start_index(data_labels, phrase_order) |
|
tokens_positive = sort_by_start_index(tokens_positive, phrase_order) |
|
|
|
conversations = grand_conversation(caption, tokens_positive) |
|
example['conversations'] = conversations |
|
example['labels'] = data_labels |
|
example['masks'] = masks |
|
example['tokens_positive'] = tokens_positive |
|
return example |
|
|
|
def glamm_grand_map_fn(example): |
|
|
|
|
|
example = grand_parse_annotations(example) |
|
|
|
|
|
example = grand_preprocess(example) |
|
|
|
|
|
messages = example['conversations'] |
|
input = '' |
|
conversation = [] |
|
while messages and messages[0]['from'] == 'gpt': |
|
|
|
messages = messages[1:] |
|
for msg in messages: |
|
if msg['from'] == 'human': |
|
if DEFAULT_IMAGE_TOKEN in msg['value']: |
|
msg['value'] = msg['value'].replace(DEFAULT_IMAGE_TOKEN, |
|
'').strip() |
|
msg['value'] = DEFAULT_IMAGE_TOKEN + '\n' + msg['value'] |
|
msg['value'] = msg['value'].strip() |
|
input += msg['value'] |
|
|
|
elif msg['from'] == 'gpt': |
|
conversation.append({'input': input, 'output': msg['value']}) |
|
input = '' |
|
else: |
|
raise NotImplementedError |
|
example.update({'conversation': conversation}) |
|
return example |
|
|
|
|
|
|
|
|
|
|