Spaces:
No application file
No application file
# %% | |
import pandas as pd | |
df_i = pd.read_csv('/home/tosi-n/ark/data/jack_line_item_ner_task_v2.csv', sep='\t') | |
df_ii = pd.read_csv('/home/tosi-n/ark/data/jack_line_item_ner_task.csv', sep='\t') | |
display(df_i.head()) | |
display(df_ii.head()) | |
# %% | |
df_i = df_i[['context', 'instruction', 'response']] | |
df_ii = df_ii[['context', 'instruction', 'response']] | |
df = pd.concat([df_i, df_ii]) | |
df.rename(columns={'context': 'input', 'response': 'output'}, inplace=True) | |
display(df.head()) | |
# %% | |
# check for nan values | |
df.isna().sum() | |
# %% | |
# drop nan values | |
df.dropna(inplace=True) | |
# %% | |
df.to_json('/home/tosi-n/ark/data/line_item_and_alm_data_v1.json', orient='records') | |
# %% | |