Spaces:
Sleeping
Sleeping
File size: 5,374 Bytes
a0a58e9 f6d135d a0a58e9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 |
import sys
import os
import csv
import streamlit as st
import pandas as pd
from datetime import date
import pandasql as psql
import base64
################################
######### Variables ############
################################
# -- Loading Variables
script_directory = os.path.dirname(os.path.abspath(sys.argv[0]))
# -- Loading Session Data
if 'project_data' not in st.session_state:
st.session_state.project_data = pd.read_csv(script_directory+'/data/project.csv')
if 'mapping_df' not in st.session_state:
st.session_state.mapping_df = pd.DataFrame(columns=["Sno","DestinationColumn","SourceColumn","Type","Expression"])
################################
####### GenericFunctions #######
################################
# -- Load Mapping File
def load_mapping_file():
if 'project_name' in st.session_state:
try:
# print("project_name - "+st.session_state.project_name)
cond = (st.session_state.project_data['Project'] == st.session_state.project_name)
file_name = script_directory+'/data/'+str(st.session_state.project_data[cond].Id.values[0])+"_"+st.session_state.project_data[cond].Source.values[0]+"_"+st.session_state.project_data[cond].Destination.values[0]+'.csv'
# print("file_name - "+file_name)
st.session_state.mapping_df = pd.read_csv(file_name,sep="|",quoting=csv.QUOTE_NONE)
except Exception as e:
st.session_state.mapping_df = pd.DataFrame(columns=["Sno","DestinationColumn","SourceColumn","Type","Expression"])
st.error(f"Unable to load mapping file - {e}")
################################
####### Display of data ########
################################
# -- Streamlit Settings
st.set_page_config(layout='wide')
st.title("Data Generation")
# -- Add Project Dropdown
st.text("")
st.text("")
st.text("")
col1, col2, col3 = st.columns(3)
project_name = col1.selectbox(
'Select Project',
st.session_state.project_data['Project'],
key="project_name",
on_change=load_mapping_file()
)
# -- Upload Data
if len(st.session_state.mapping_df)>0:
st.text("")
st.text("")
st.text("")
col1, col2, col3 = st.columns(3)
cond = (st.session_state.project_data['Project'] == st.session_state.project_name)
result = st.session_state.project_data[cond].Source.values[0]
with col1:
source_data_file = st.file_uploader(
"Source data file name - "+str(result)+".csv",
type="csv",
key="source_data_file",
accept_multiple_files=True
)
# -- Button Show Data
st.text("")
st.text("")
col1, col2, col3 = st.columns([0.3,0.5,2.2])
if col1.button("Show Data"):
if source_data_file is not None:
for file in source_data_file:
df = pd.read_csv(file)
# Update dataframe with Pandas Mapping Fields
for index, row in st.session_state.mapping_df.iterrows():
if row['Type'] == 'Pandas':
column_name = row['DestinationColumn']
expression = row['Expression'].replace("'", "")
df[column_name] = eval(expression)
# Creating SQL Statement
sql_statement = "SELECT "
for index, row in st.session_state.mapping_df.iterrows():
destination_column = row['DestinationColumn']
source_column = row['SourceColumn']
column_type = row['Type']
expression = row['Expression'] if 'Expression' in row else None
if column_type == 'Constant':
# Create a dummy column with the provided expression
sql_statement += str(expression) + ' AS "' + str(destination_column) + '",'
elif column_type == 'Pandas':
sql_statement += '"' + str(destination_column) + '" AS "' + str(destination_column) + '",'
else:
# Use the source column as-is
sql_statement += '"' + str(source_column) + '" AS "' + str(destination_column) + '",'
# Remove the trailing comma and space
sql_statement = sql_statement[:-1]+" from df"
# st.write(sql_statement+" from df")
st.session_state.df = df
st.session_state.sql_statement = sql_statement
# Display Data
st.dataframe(df)
# -- Button Generate Data
if col2.button("Generate Data"):
df = st.session_state.df
if len(df) == 0 :
st.error("No records available to run query, click on Show Data")
else:
sql_query = st.text_area(label="Sql Query", value=st.session_state.sql_statement, key="sql_query", height=200)
try:
result_df = psql.sqldf(sql_query, locals())
st.write("Query Result")
st.dataframe(result_df)
csv_data = result_df.to_csv(index=False,header=False,sep="|",quoting=csv.QUOTE_NONE)
b64 = base64.b64encode(csv_data.encode()).decode()
st.markdown(f'<a href="data:file/csv;base64,{b64}" download="result.csv">Download Result CSV</a>', unsafe_allow_html=True)
except Exception as e:
st.error(f"Error executing SQL query: {str(e)}") |