Spaces:
Runtime error
Runtime error
File size: 2,196 Bytes
60e8b74 53950c0 60e8b74 303845a 60e8b74 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 |
import os
import numpy as np
import gradio as gr
import pandas as pd
from zipfile import ZipFile
def zip_two_files(data1, data2):
with ZipFile('my_csvs.zip', 'w') as csv_zip:
csv_zip.writestr("primary_data.csv", data1.to_csv(index=False))
csv_zip.writestr("secondary_data.csv", data2.to_csv(index=False))
return 'my_csvs.zip'
def get_split(csv_file,target_columns,primary_cols,combination_of ):
df = pd.read_csv(csv_file.name, delimiter=",")
target_columns = [target_columns]
primary_cols = primary_cols.split(',') + target_columns
combination_of = combination_of.split(',')
secondary_cols = list(set(df.columns.tolist()) - set(primary_cols))
df["Comb"] = (
df[combination_of]
.astype(str)
.agg(lambda x: ",".join(x.values), axis=1)
.T
)
secondary_df = pd.DataFrame({'Id_Apres': range(1, len(df['Comb'].unique())+1), 'Comb': df['Comb'].unique()})
secondary_df = secondary_df.merge(df[['Comb']+secondary_cols], on=['Comb']).drop_duplicates(subset=['Comb']).drop(columns=['Comb'])
secondary_df = secondary_df.reset_index(drop=True)
primary_df = df.merge(secondary_df, on=combination_of).drop(columns=combination_of)
primary_df = primary_df[primary_cols+['Id_Apres']]
primary_df = primary_df.reset_index()
return zip_two_files(primary_df,secondary_df)
iface = gr.Interface(fn = get_split,
inputs = [
gr.inputs.File(label='CSV file') ,
gr.inputs.Textbox(label='Target Column') ,
gr.inputs.Textbox(label='Primary Column') ,
gr.inputs.Textbox(label='Combination of Column')
],
outputs = [
#gr.outputs.Dataframe(label='Primary data'),
#gr.outputs.Dataframe(label='Secondary data'),
'file'
],
title = 'Data Splitter ',
description="Split your data into 2 parts. Apres.io © 2022 All rights reserved.")
iface.launch( debug=True) |