File size: 2,046 Bytes
fa6856c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
import json

# 读取JSON文件
def read_json_file(file_path):
    with open(file_path, "r", encoding="utf-8") as file:
        data = json.load(file)
    return data

# 写入JSON文件
def write_json_file(file_path, data):
    with open(file_path, "w", encoding="utf-8") as file:
        json.dump(data, file, ensure_ascii=False, indent=2)

if __name__ == "__main__":
    # 假设原始数据存储在data.json文件中
    input_file_path = "caixinyu/vicuna/instruct_chat_50k.jsonl/instruct_chat_50k.jsonl"
    output_file_path = "caixinyu/vicuna/instruct_chat_50k.jsonl/instruct_chat_50knew.jsonl"

with open(input_file_path, "r",encoding='utf-8') as input_file, open(output_file_path, "w",encoding='utf-8') as output_file:
    for line in input_file:
        json_data = json.loads(line)
        input_text = " ".join(json_data["input"])
        output_text = " ".join(json_data["output"])
        
        json_data["input"] = input_text
        json_data["output"] = output_text
        
        output_line = json.dumps(json_data,ensure_ascii=False) + "\n"
        output_file.write(output_line)

# # 读取原始JSON文件
# with open(input_file_path, 'r', encoding='utf-8') as json_file:
#     data = json.load(json_file)

# # 将数据写入JSONL文件
# with open(output_file_path, 'w', encoding='utf-8') as jsonl_file:
#     for item in data:
#         jsonl_file.write(json.dumps(item, ensure_ascii=False) + '\n')


    # # 读取JSON文件
    # original_data = read_json_file(input_file_path)

    # # # 进行转换,去掉多余的[]
    # # corrected_data = [dialog[0] for dialog in original_data]
    # processed_data = []
    # for item in original_data:
    #     processed_item = {
    #         "input": item["instruction"],
    #         "output": item["output"]
    #     }
    #     processed_data.append(processed_item)
    # # 保存转换后的数据到新的JSON文件
    # write_json_file(output_file_path, processed_data)

    # # print("数据转换完成,并保存到corrected_data.json文件中。")