File size: 5,374 Bytes
a0a58e9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f6d135d
a0a58e9
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
import sys
import os
import csv
import streamlit as st
import pandas as pd
from datetime import date
import pandasql as psql
import base64

################################
######### Variables ############
################################
# -- Loading Variables
script_directory = os.path.dirname(os.path.abspath(sys.argv[0]))

# -- Loading Session Data
if 'project_data' not in st.session_state:
    st.session_state.project_data = pd.read_csv(script_directory+'/data/project.csv')

if 'mapping_df' not in st.session_state:
    st.session_state.mapping_df = pd.DataFrame(columns=["Sno","DestinationColumn","SourceColumn","Type","Expression"])

################################
####### GenericFunctions #######
################################
# -- Load Mapping File
def load_mapping_file():
    if 'project_name' in st.session_state:
        try:
            # print("project_name - "+st.session_state.project_name)
            cond = (st.session_state.project_data['Project'] == st.session_state.project_name)
            file_name = script_directory+'/data/'+str(st.session_state.project_data[cond].Id.values[0])+"_"+st.session_state.project_data[cond].Source.values[0]+"_"+st.session_state.project_data[cond].Destination.values[0]+'.csv'
            # print("file_name - "+file_name)
            st.session_state.mapping_df = pd.read_csv(file_name,sep="|",quoting=csv.QUOTE_NONE)
        except Exception as e:
            st.session_state.mapping_df = pd.DataFrame(columns=["Sno","DestinationColumn","SourceColumn","Type","Expression"])
            st.error(f"Unable to load mapping file - {e}")

################################
####### Display of data ########
################################
# -- Streamlit Settings
st.set_page_config(layout='wide')
st.title("Data Generation")

# -- Add Project Dropdown
st.text("")
st.text("")
st.text("")
col1, col2, col3 = st.columns(3)
project_name = col1.selectbox(
                                'Select Project',
                                st.session_state.project_data['Project'],
                                key="project_name",
                                on_change=load_mapping_file()
                             )

# -- Upload Data
if len(st.session_state.mapping_df)>0:
    st.text("")
    st.text("")
    st.text("")
    col1, col2, col3 = st.columns(3)

    cond = (st.session_state.project_data['Project'] == st.session_state.project_name)
    result = st.session_state.project_data[cond].Source.values[0]
    with col1:
        source_data_file = st.file_uploader(
            "Source data file name - "+str(result)+".csv",
            type="csv",
            key="source_data_file",
            accept_multiple_files=True
        )

    # -- Button Show Data
    st.text("")
    st.text("")
    col1, col2, col3 = st.columns([0.3,0.5,2.2])

    if col1.button("Show Data"):
        if source_data_file is not None:
            for file in source_data_file:
                df = pd.read_csv(file)

        # Update dataframe with Pandas Mapping Fields
        for index, row in st.session_state.mapping_df.iterrows():
            if row['Type'] == 'Pandas':
                column_name = row['DestinationColumn']
                expression = row['Expression'].replace("'", "")
                df[column_name] = eval(expression)

        # Creating SQL Statement
        sql_statement = "SELECT "
        for index, row in st.session_state.mapping_df.iterrows():
            destination_column = row['DestinationColumn']
            source_column = row['SourceColumn']
            column_type = row['Type']
            expression = row['Expression'] if 'Expression' in row else None

            if column_type == 'Constant':
                # Create a dummy column with the provided expression
                sql_statement += str(expression) + ' AS "' + str(destination_column) + '",'
            elif column_type == 'Pandas':
                sql_statement += '"' + str(destination_column) + '" AS "' + str(destination_column) + '",'
            else:
                # Use the source column as-is
                sql_statement += '"' + str(source_column) + '" AS "' + str(destination_column) + '",'

        
        # Remove the trailing comma and space
        sql_statement = sql_statement[:-1]+" from df"
        # st.write(sql_statement+" from df")

        st.session_state.df = df
        st.session_state.sql_statement = sql_statement

        # Display Data
        st.dataframe(df)

    # -- Button Generate Data
    if col2.button("Generate Data"):
        df = st.session_state.df
        if len(df) == 0 :
            st.error("No records available to run query, click on Show Data")
        else:
            sql_query = st.text_area(label="Sql Query", value=st.session_state.sql_statement, key="sql_query", height=200)
            try:
                result_df = psql.sqldf(sql_query, locals())
                st.write("Query Result")
                st.dataframe(result_df)
                csv_data = result_df.to_csv(index=False,header=False,sep="|",quoting=csv.QUOTE_NONE)
                b64 = base64.b64encode(csv_data.encode()).decode()
                st.markdown(f'<a href="data:file/csv;base64,{b64}" download="result.csv">Download Result CSV</a>', unsafe_allow_html=True)
            except Exception as e:
                st.error(f"Error executing SQL query: {str(e)}")