File size: 690 Bytes
b9a8411
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
from datasets import load_dataset
import pandas as pd
from translate import Translator

dataset = load_dataset("iamtarun/python_code_instructions_18k_alpaca")

train_dataset = dataset['train'][900:1000]
instruction_list = train_dataset['instruction']
input_list = train_dataset['input']
output_list = train_dataset['output']

# translator = Translator(to_lang="zh")
# for idx in range(len(instruction_list)):
#     instruction_list[idx] = translator.translate(instruction_list[idx])

init_df = pd.DataFrame({'instruction': instruction_list, 'input': input_list, 'output': output_list})

init_df.to_excel('/Users/yangweipeng/code/excel/python_code_instructions_18k_alpaca.xlsx', index=True)