|
|
import pandas as pd |
|
|
|
|
|
|
|
|
def process_Kit_Composition_and_relation(output_csv_path: str = 'data/real_data_excel/converted_csv/Kit_Composition_and_relation_cleaned_with_line_type_and_id.csv') -> pd.DataFrame: |
|
|
""" |
|
|
Process the Kit_Composition_and_relation.csv file to clean the data and add line type and id. |
|
|
|
|
|
Returns: |
|
|
saves to csv path |
|
|
cleaned_df: pd.DataFrame |
|
|
""" |
|
|
df = pd.read_csv('data/real_data_excel/converted_csv/Kit_Composition_and_relation.csv') |
|
|
|
|
|
master = df[["Master Kit", "Master Kit Description"]] |
|
|
master["kit_type"] = "master" |
|
|
master.rename(columns={"Master Kit": "kit_name", "Master Kit Description": "kit_description"}, inplace=True) |
|
|
|
|
|
subkit = df[["Sub kit", "Sub kit description"]] |
|
|
subkit["kit_type"] = "subkit" |
|
|
subkit.rename(columns={"Sub kit": "kit_name", "Sub kit Description": "kit_description"}, inplace=True) |
|
|
subkit.columns = ["kit_name", "kit_description", "kit_type"] |
|
|
|
|
|
prepack = df[["Prepack", "Prepack Description"]] |
|
|
prepack["kit_type"] = "prepack" |
|
|
prepack.rename(columns={"Prepack": "kit_name", "Prepack Description": "kit_description"}, inplace=True) |
|
|
|
|
|
|
|
|
cleaned_df = pd.concat([master, subkit, prepack]) |
|
|
cleaned_df[['kit_name','kit_description','kit_type']].drop_duplicates() |
|
|
tmp = cleaned_df.groupby('kit_name').count()['kit_type'].reset_index() |
|
|
standalone_masterkit_list = tmp.loc[tmp['kit_type']==1,'kit_name'] |
|
|
|
|
|
cleaned_df.loc[cleaned_df['kit_name'].isin(standalone_masterkit_list),'line_type'] = 'long line' |
|
|
cleaned_df.loc[cleaned_df['kit_type']=='prepack','line_type'] = 'mini load' |
|
|
cleaned_df.loc[cleaned_df['kit_type']=='subkit','line_type'] = 'long line' |
|
|
cleaned_df.loc[cleaned_df['line_type']=='mini load', 'line_id'] = 7 |
|
|
cleaned_df.loc[cleaned_df['line_type']=='long line', 'line_id'] = 6 |
|
|
cleaned_df.to_csv(output_csv_path, index=False) |
|
|
return cleaned_df |