import json # 读取JSON文件 def read_json_file(file_path): with open(file_path, "r", encoding="utf-8") as file: data = json.load(file) return data # 写入JSON文件 def write_json_file(file_path, data): with open(file_path, "w", encoding="utf-8") as file: json.dump(data, file, ensure_ascii=False, indent=2) if __name__ == "__main__": # 假设原始数据存储在data.json文件中 input_file_path = "caixinyu/vicuna/instruct_chat_50k.jsonl/instruct_chat_50k.jsonl" output_file_path = "caixinyu/vicuna/instruct_chat_50k.jsonl/instruct_chat_50knew.jsonl" with open(input_file_path, "r",encoding='utf-8') as input_file, open(output_file_path, "w",encoding='utf-8') as output_file: for line in input_file: json_data = json.loads(line) input_text = " ".join(json_data["input"]) output_text = " ".join(json_data["output"]) json_data["input"] = input_text json_data["output"] = output_text output_line = json.dumps(json_data,ensure_ascii=False) + "\n" output_file.write(output_line) # # 读取原始JSON文件 # with open(input_file_path, 'r', encoding='utf-8') as json_file: # data = json.load(json_file) # # 将数据写入JSONL文件 # with open(output_file_path, 'w', encoding='utf-8') as jsonl_file: # for item in data: # jsonl_file.write(json.dumps(item, ensure_ascii=False) + '\n') # # 读取JSON文件 # original_data = read_json_file(input_file_path) # # # 进行转换,去掉多余的[] # # corrected_data = [dialog[0] for dialog in original_data] # processed_data = [] # for item in original_data: # processed_item = { # "input": item["instruction"], # "output": item["output"] # } # processed_data.append(processed_item) # # 保存转换后的数据到新的JSON文件 # write_json_file(output_file_path, processed_data) # # print("数据转换完成,并保存到corrected_data.json文件中。")