Spaces:
No application file
No application file
| # %% | |
| import pandas as pd | |
| df_i = pd.read_csv('/home/tosi-n/ark/data/jack_line_item_ner_task_v2.csv', sep='\t') | |
| df_ii = pd.read_csv('/home/tosi-n/ark/data/jack_line_item_ner_task.csv', sep='\t') | |
| display(df_i.head()) | |
| display(df_ii.head()) | |
| # %% | |
| df_i = df_i[['context', 'instruction', 'response']] | |
| df_ii = df_ii[['context', 'instruction', 'response']] | |
| df = pd.concat([df_i, df_ii]) | |
| df.rename(columns={'context': 'input', 'response': 'output'}, inplace=True) | |
| display(df.head()) | |
| # %% | |
| # check for nan values | |
| df.isna().sum() | |
| # %% | |
| # drop nan values | |
| df.dropna(inplace=True) | |
| # %% | |
| df.to_json('/home/tosi-n/ark/data/line_item_and_alm_data_v1.json', orient='records') | |
| # %% | |