pratham0011 commited on
Commit
d11c1ab
Β·
verified Β·
1 Parent(s): e2498d7

Upload 6 files

Browse files
Files changed (6) hide show
  1. employee.csv +51 -0
  2. main.py +98 -0
  3. requirements.txt +7 -0
  4. run_app.py +11 -0
  5. streamlit_app.py +204 -0
  6. student.db +0 -0
employee.csv ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ EMPLOYEE_ID,FIRST_NAME,LAST_NAME,EMAIL,PHONE_NUMBER,HIRE_DATE,JOB_ID,SALARY,COMMISSION_PCT,MANAGER_ID,DEPARTMENT_ID
2
+ 198,Donald,OConnell,DOCONNEL,650.507.9833,21-JUN-07,SH_CLERK,2600, - ,124,50
3
+ 199,Douglas,Grant,DGRANT,650.507.9844,13-JAN-08,SH_CLERK,2600, - ,124,50
4
+ 200,Jennifer,Whalen,JWHALEN,515.123.4444,17-SEP-03,AD_ASST,4400, - ,101,10
5
+ 201,Michael,Hartstein,MHARTSTE,515.123.5555,17-FEB-04,MK_MAN,13000, - ,100,20
6
+ 202,Pat,Fay,PFAY,603.123.6666,17-AUG-05,MK_REP,6000, - ,201,20
7
+ 203,Susan,Mavris,SMAVRIS,515.123.7777,07-JUN-02,HR_REP,6500, - ,101,40
8
+ 204,Hermann,Baer,HBAER,515.123.8888,07-JUN-02,PR_REP,10000, - ,101,70
9
+ 205,Shelley,Higgins,SHIGGINS,515.123.8080,07-JUN-02,AC_MGR,12008, - ,101,110
10
+ 206,William,Gietz,WGIETZ,515.123.8181,07-JUN-02,AC_ACCOUNT,8300, - ,205,110
11
+ 100,Steven,King,SKING,515.123.4567,17-JUN-03,AD_PRES,24000, - , - ,90
12
+ 101,Neena,Kochhar,NKOCHHAR,515.123.4568,21-SEP-05,AD_VP,17000, - ,100,90
13
+ 102,Lex,De Haan,LDEHAAN,515.123.4569,13-JAN-01,AD_VP,17000, - ,100,90
14
+ 103,Alexander,Hunold,AHUNOLD,590.423.4567,03-JAN-06,IT_PROG,9000, - ,102,60
15
+ 104,Bruce,Ernst,BERNST,590.423.4568,21-MAY-07,IT_PROG,6000, - ,103,60
16
+ 105,David,Austin,DAUSTIN,590.423.4569,25-JUN-05,IT_PROG,4800, - ,103,60
17
+ 106,Valli,Pataballa,VPATABAL,590.423.4560,05-FEB-06,IT_PROG,4800, - ,103,60
18
+ 107,Diana,Lorentz,DLORENTZ,590.423.5567,07-FEB-07,IT_PROG,4200, - ,103,60
19
+ 108,Nancy,Greenberg,NGREENBE,515.124.4569,17-AUG-02,FI_MGR,12008, - ,101,100
20
+ 109,Daniel,Faviet,DFAVIET,515.124.4169,16-AUG-02,FI_ACCOUNT,9000, - ,108,100
21
+ 110,John,Chen,JCHEN,515.124.4269,28-SEP-05,FI_ACCOUNT,8200, - ,108,100
22
+ 111,Ismael,Sciarra,ISCIARRA,515.124.4369,30-SEP-05,FI_ACCOUNT,7700, - ,108,100
23
+ 112,Jose Manuel,Urman,JMURMAN,515.124.4469,07-MAR-06,FI_ACCOUNT,7800, - ,108,100
24
+ 113,Luis,Popp,LPOPP,515.124.4567,07-DEC-07,FI_ACCOUNT,6900, - ,108,100
25
+ 114,Den,Raphaely,DRAPHEAL,515.127.4561,07-DEC-02,PU_MAN,11000, - ,100,30
26
+ 115,Alexander,Khoo,AKHOO,515.127.4562,18-MAY-03,PU_CLERK,3100, - ,114,30
27
+ 116,Shelli,Baida,SBAIDA,515.127.4563,24-DEC-05,PU_CLERK,2900, - ,114,30
28
+ 117,Sigal,Tobias,STOBIAS,515.127.4564,24-JUL-05,PU_CLERK,2800, - ,114,30
29
+ 118,Guy,Himuro,GHIMURO,515.127.4565,15-NOV-06,PU_CLERK,2600, - ,114,30
30
+ 119,Karen,Colmenares,KCOLMENA,515.127.4566,10-AUG-07,PU_CLERK,2500, - ,114,30
31
+ 120,Matthew,Weiss,MWEISS,650.123.1234,18-JUL-04,ST_MAN,8000, - ,100,50
32
+ 121,Adam,Fripp,AFRIPP,650.123.2234,10-APR-05,ST_MAN,8200, - ,100,50
33
+ 122,Payam,Kaufling,PKAUFLIN,650.123.3234,01-MAY-03,ST_MAN,7900, - ,100,50
34
+ 123,Shanta,Vollman,SVOLLMAN,650.123.4234,10-OCT-05,ST_MAN,6500, - ,100,50
35
+ 124,Kevin,Mourgos,KMOURGOS,650.123.5234,16-NOV-07,ST_MAN,5800, - ,100,50
36
+ 125,Julia,Nayer,JNAYER,650.124.1214,16-JUL-05,ST_CLERK,3200, - ,120,50
37
+ 126,Irene,Mikkilineni,IMIKKILI,650.124.1224,28-SEP-06,ST_CLERK,2700, - ,120,50
38
+ 127,James,Landry,JLANDRY,650.124.1334,14-JAN-07,ST_CLERK,2400, - ,120,50
39
+ 128,Steven,Markle,SMARKLE,650.124.1434,08-MAR-08,ST_CLERK,2200, - ,120,50
40
+ 129,Laura,Bissot,LBISSOT,650.124.5234,20-AUG-05,ST_CLERK,3300, - ,121,50
41
+ 130,Mozhe,Atkinson,MATKINSO,650.124.6234,30-OCT-05,ST_CLERK,2800, - ,121,50
42
+ 131,James,Marlow,JAMRLOW,650.124.7234,16-FEB-05,ST_CLERK,2500, - ,121,50
43
+ 132,TJ,Olson,TJOLSON,650.124.8234,10-APR-07,ST_CLERK,2100, - ,121,50
44
+ 133,Jason,Mallin,JMALLIN,650.127.1934,14-JUN-04,ST_CLERK,3300, - ,122,50
45
+ 134,Michael,Rogers,MROGERS,650.127.1834,26-AUG-06,ST_CLERK,2900, - ,122,50
46
+ 135,Ki,Gee,KGEE,650.127.1734,12-DEC-07,ST_CLERK,2400, - ,122,50
47
+ 136,Hazel,Philtanker,HPHILTAN,650.127.1634,06-FEB-08,ST_CLERK,2200, - ,122,50
48
+ 137,Renske,Ladwig,RLADWIG,650.121.1234,14-JUL-03,ST_CLERK,3600, - ,123,50
49
+ 138,Stephen,Stiles,SSTILES,650.121.2034,26-OCT-05,ST_CLERK,3200, - ,123,50
50
+ 139,John,Seo,JSEO,650.121.2019,12-FEB-06,ST_CLERK,2700, - ,123,50
51
+ 140,Joshua,Patel,JPATEL,650.121.1834,06-APR-06,ST_CLERK,2500, - ,123,50
main.py ADDED
@@ -0,0 +1,98 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, HTTPException
2
+ from pydantic import BaseModel
3
+ import sqlite3
4
+ import pandas as pd
5
+ import os
6
+ from dotenv import load_dotenv
7
+ import google.generativeai as genai
8
+
9
+ app = FastAPI()
10
+
11
+ # Load environment variables and configure Genai
12
+ load_dotenv()
13
+ genai.configure(api_key=os.getenv('GOOGLE_API_KEY'))
14
+
15
+ class Query(BaseModel):
16
+ question: str
17
+ data_source: str
18
+
19
+ def get_gemini_response(question, prompt):
20
+ model = genai.GenerativeModel('gemini-pro')
21
+ response = model.generate_content([prompt, question])
22
+ return response.text
23
+
24
+ def get_csv_columns():
25
+ df = pd.read_csv('employee.csv')
26
+ return df.columns.tolist()
27
+
28
+ csv_columns = get_csv_columns()
29
+
30
+ sql_prompt = """
31
+ You are an expert in converting English questions to SQL code!
32
+ The SQL database has the name STUDENT and has the following Columns - NAME, CLASS, SECTION
33
+
34
+ For example:
35
+ - How many entries of records are present? SQL command: SELECT COUNT(*) FROM STUDENT;
36
+ - Tell me all the students studying in Data Science class? SQL command: SELECT * FROM STUDENT where CLASS="Data Science";
37
+
38
+ Also, the SQL code should not have ''' in the beginning or at the end, and SQL word in output.
39
+ Ensure that you only generate valid SQL queries, not pandas or Python code.
40
+ """
41
+
42
+ csv_prompt = f"""
43
+ You are an expert in analyzing CSV data and converting English questions to pandas query syntax.
44
+ The CSV file is named 'employee.csv' and contains employee information.
45
+ The available columns in the CSV file are: {', '.join(csv_columns)}
46
+
47
+ For example:
48
+ - How many employees are there? Pandas query: len(df)
49
+ - List all employees in the Sales department. Pandas query: df[df['Department'] == 'Sales']
50
+ - Show employees with a specific ID. Pandas query: df[df['ID'] == specific_id]
51
+
52
+ Provide only the pandas query syntax without any additional explanation or markdown formatting.
53
+ Do not include 'df = ' or any variable assignment in your response.
54
+ Make sure to use only the columns that are available in the CSV file.
55
+ Ensure that you only generate valid pandas queries, not SQL or other types of code.
56
+ """
57
+
58
+ def execute_sql_query(query):
59
+ conn = sqlite3.connect('student.db')
60
+ try:
61
+ cursor = conn.cursor()
62
+ cursor.execute(query)
63
+ result = cursor.fetchall()
64
+ return result
65
+ except sqlite3.Error as e:
66
+ raise HTTPException(status_code=400, detail=f"SQL Error: {str(e)}")
67
+ finally:
68
+ conn.close()
69
+
70
+ def execute_pandas_query(query):
71
+ df = pd.read_csv('employee.csv')
72
+ try:
73
+ result = eval(query, {'df': df, 'pd': pd})
74
+ if isinstance(result, pd.DataFrame):
75
+ return result.to_dict(orient='records')
76
+ elif isinstance(result, pd.Series):
77
+ return result.to_dict()
78
+ else:
79
+ return result
80
+ except Exception as e:
81
+ raise HTTPException(status_code=400, detail=f"Pandas Error: {str(e)}")
82
+
83
+ @app.post("/query")
84
+ async def process_query(query: Query):
85
+ if query.data_source == "SQL Database":
86
+ ai_response = get_gemini_response(query.question, sql_prompt)
87
+ try:
88
+ result = execute_sql_query(ai_response)
89
+ return {"query": ai_response, "result": result}
90
+ except HTTPException as e:
91
+ raise HTTPException(status_code=400, detail=f"Error in SQL query: {e.detail}")
92
+ else: # CSV Data
93
+ ai_response = get_gemini_response(query.question, csv_prompt)
94
+ try:
95
+ result = execute_pandas_query(ai_response)
96
+ return {"query": ai_response, "result": result, "columns": csv_columns}
97
+ except HTTPException as e:
98
+ raise HTTPException(status_code=400, detail=f"Error in pandas query: {e.detail}")
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ fastapi
2
+ uvicorn
3
+ streamlit
4
+ pandas
5
+ python-dotenv
6
+ google-generativeai
7
+ requests
run_app.py ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import subprocess
2
+
3
+ # Start Streamlit
4
+ streamlit_process = subprocess.Popen(["streamlit", "run", "streamlit_app.py"])
5
+
6
+ # Start Uvicorn
7
+ uvicorn_process = subprocess.Popen(["uvicorn", "main:app", "--reload"])
8
+
9
+ # Wait for the processes to finish
10
+ streamlit_process.wait()
11
+ uvicorn_process.wait()
streamlit_app.py ADDED
@@ -0,0 +1,204 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # import streamlit as st
2
+ # import requests
3
+ # import json
4
+
5
+ # st.set_page_config(page_title="QueryMate: Text to SQL & CSV")
6
+
7
+ # st.markdown("# QueryMate: Text to SQL & CSV πŸ’¬πŸŒπŸ—„οΈ")
8
+ # st.description('''Welcome to QueryMate, your friendly assistant for converting natural language queries into SQL statements and CSV outputs!
9
+ # Let's get started with your data queries!''')
10
+ # # Load chat history
11
+ # def load_chat_history():
12
+ # try:
13
+ # with open('chat_history.json', 'r') as f:
14
+ # return json.load(f)
15
+ # except FileNotFoundError:
16
+ # return []
17
+
18
+ # def save_chat_history(history):
19
+ # with open('chat_history.json', 'w') as f:
20
+ # json.dump(history, f)
21
+
22
+ # chat_history = load_chat_history()
23
+
24
+ # # Data source selection
25
+ # data_source = st.radio("Select Data Source:", ('SQL Database', 'Employee CSV'))
26
+
27
+ # # Predefined queries
28
+ # predefined_queries = {
29
+ # 'SQL Database': [
30
+ # 'Print all students',
31
+ # 'Count total number of students',
32
+ # 'List students in Data Science class'
33
+ # ],
34
+ # 'Employee CSV': [
35
+ # 'Print employees having the department id equal to 100',
36
+ # 'Count total number of employees',
37
+ # 'List Top 5 employees according to salary in descending order'
38
+ # ]
39
+ # }
40
+
41
+ # st.markdown(f"### Predefined Queries for {data_source}")
42
+
43
+ # # Create buttons for predefined queries
44
+ # for query in predefined_queries[data_source]:
45
+ # if st.button(query):
46
+ # st.session_state.predefined_query = query
47
+
48
+ # st.markdown("### Enter Your Question")
49
+ # question = st.text_input("Input: ", key="input", value=st.session_state.get('predefined_query', ''))
50
+
51
+ # # Submit button
52
+ # submit = st.button("Submit")
53
+
54
+ # if submit:
55
+ # # Send request to FastAPI backend
56
+ # response = requests.post("http://localhost:8000/query",
57
+ # json={"question": question, "data_source": data_source})
58
+ # if response.status_code == 200:
59
+ # data = response.json()
60
+ # st.markdown(f"## Generated {'SQL' if data_source == 'SQL Database' else 'Pandas'} Query")
61
+ # st.code(data['query'])
62
+
63
+ # st.markdown("## Query Results")
64
+ # st.write(data['result'])
65
+
66
+ # if data_source == 'Employee CSV':
67
+ # st.markdown("## Available CSV Columns")
68
+ # st.write(data['columns'])
69
+
70
+ # # Update chat history
71
+ # chat_history.append(f"User ({data_source}): {question}")
72
+ # chat_history.append(f"AI: {data['query']}")
73
+ # save_chat_history(chat_history)
74
+ # else:
75
+ # st.error(f"Error processing your request: {response.text}")
76
+
77
+ # # Clear the predefined query from session state
78
+ # st.session_state.pop('predefined_query', None)
79
+
80
+ # # Display chat history
81
+ # st.markdown("## Chat History")
82
+ # for message in chat_history:
83
+ # st.text(message)
84
+
85
+ # # Option to clear chat history
86
+ # if st.button("Clear Chat History"):
87
+ # chat_history.clear()
88
+ # save_chat_history(chat_history)
89
+ # st.success("Chat history cleared!")
90
+
91
+
92
+
93
+
94
+
95
+ import streamlit as st
96
+ import requests
97
+ import json
98
+ import pandas as pd
99
+
100
+ st.set_page_config(page_title="QueryMate: Text to SQL & CSV")
101
+
102
+ st.markdown("# QueryMate: Text to SQL & CSV πŸ’¬πŸ—„οΈ")
103
+ st.markdown('''Welcome to QueryMate, your friendly assistant for converting natural language queries into SQL statements and CSV outputs!
104
+ Let's get started with your data queries!''')
105
+
106
+ # Load chat history
107
+ def load_chat_history():
108
+ try:
109
+ with open('chat_history.json', 'r') as f:
110
+ return json.load(f)
111
+ except FileNotFoundError:
112
+ return []
113
+
114
+ def save_chat_history(history):
115
+ with open('chat_history.json', 'w') as f:
116
+ json.dump(history, f)
117
+
118
+ chat_history = load_chat_history()
119
+
120
+ # Data source selection
121
+ data_source = st.radio("Select Data Source:", ('SQL Database', 'Employee CSV'))
122
+
123
+ # Predefined queries
124
+ predefined_queries = {
125
+ 'SQL Database': [
126
+ 'Print all students',
127
+ 'Count total number of students',
128
+ 'List students in Data Science class'
129
+ ],
130
+ 'Employee CSV': [
131
+ 'Print employees having the department id equal to 100',
132
+ 'Count total number of employees',
133
+ 'List Top 5 employees according to salary in descending order'
134
+ ]
135
+ }
136
+
137
+ st.markdown(f"### Predefined Queries for {data_source}")
138
+
139
+ # Create buttons for predefined queries
140
+ for query in predefined_queries[data_source]:
141
+ if st.button(query):
142
+ st.session_state.predefined_query = query
143
+
144
+ st.markdown("### Enter Your Question")
145
+ question = st.text_input("Input: ", key="input", value=st.session_state.get('predefined_query', ''))
146
+
147
+ # Submit button
148
+ submit = st.button("Submit")
149
+
150
+ if submit:
151
+ # Send request to FastAPI backend
152
+ response = requests.post("http://localhost:8000/query",
153
+ json={"question": question, "data_source": data_source})
154
+ if response.status_code == 200:
155
+ data = response.json()
156
+ st.markdown(f"## Generated {'SQL' if data_source == 'SQL Database' else 'Pandas'} Query")
157
+ st.code(data['query'])
158
+
159
+ st.markdown("## Query Results")
160
+ result = data['result']
161
+
162
+ if isinstance(result, list) and len(result) > 0:
163
+ if isinstance(result[0], dict):
164
+ # For CSV queries that return a list of dictionaries
165
+ df = pd.DataFrame(result)
166
+ st.dataframe(df)
167
+ elif isinstance(result[0], list):
168
+ # For SQL queries that return a list of lists
169
+ df = pd.DataFrame(result)
170
+ st.dataframe(df)
171
+ else:
172
+ # For single column results
173
+ st.dataframe(pd.DataFrame(result, columns=['Result']))
174
+ elif isinstance(result, dict):
175
+ # For single row results
176
+ st.table(result)
177
+ else:
178
+ # For scalar results or empty results
179
+ st.write(result)
180
+
181
+ if data_source == 'Employee CSV':
182
+ st.markdown("## Available CSV Columns")
183
+ st.write(data['columns'])
184
+
185
+ # Update chat history
186
+ chat_history.append(f"πŸ‘¨β€πŸ’»({data_source}): {question}")
187
+ chat_history.append(f"πŸ€–: {data['query']}")
188
+ save_chat_history(chat_history)
189
+ else:
190
+ st.error(f"Error processing your request: {response.text}")
191
+
192
+ # Clear the predefined query from session state
193
+ st.session_state.pop('predefined_query', None)
194
+
195
+ # Display chat history
196
+ st.markdown("## Chat History")
197
+ for message in chat_history:
198
+ st.text(message)
199
+
200
+ # Option to clear chat history
201
+ if st.button("Clear Chat History"):
202
+ chat_history.clear()
203
+ save_chat_history(chat_history)
204
+ st.success("Chat history cleared!")
student.db ADDED
Binary file (8.19 kB). View file