import json def load_jsonl(json_file): with open(json_file) as f: lines = f.readlines() data = [] for line in lines: data.append(json.loads(line)) return data json_data = "/mnt/bn/xiangtai-training-data/project/VLM/data/SOLO_SFT/all_data.jsonl" image_data = "/mnt/bn/xiangtai-training-data/project/VLM/data/SOLO_SFT/images" a = load_jsonl(json_data) for index, i in enumerate(a): conversations = i['conversations'] image_name = i['image'] for msg in conversations: if "role" in msg.keys(): print(i) print(index) exit() elif 'from' in msg.keys(): continue elif 'value' in msg.keys(): continue else: print(msg.keys) # if msg['from'] == 'human' or msg['from'] == 'user' or msg['role'] == 'user': # continue # elif msg['from'] == 'gpt' or msg['from'] == 'model' or msg['role'] == 'assistant': # continue # for item in conversations: # if type(item) is str: # print(conversations)