Spaces:
Sleeping
Sleeping
Upload 11 files
Browse files- app.py +58 -73
- requirements.txt +5 -4
app.py
CHANGED
@@ -9,6 +9,7 @@ from langchain.prompts import PromptTemplate
|
|
9 |
from utils.extarct_db import extract_name_and_colums , read_excel_query , read_sql_query
|
10 |
from typing import List
|
11 |
import numpy as np
|
|
|
12 |
|
13 |
import matplotlib.pyplot as plt
|
14 |
|
@@ -40,8 +41,6 @@ def get_gemini_response(question: str, table_name: str, column_names: List[str])
|
|
40 |
the SQL command will be something like this SELECT COUNT(*) FROM {table_name} ;
|
41 |
\nExample 2 - Tell me all the students studying in Data Science class?,
|
42 |
the SQL command will be something like this SELECT * FROM {table_name}
|
43 |
-
where CLASS="Data Science";
|
44 |
-
and remove "_" underscore between colum names show like Full Name
|
45 |
also the sql code should not have ``` in beginning or end and sql word in output
|
46 |
|
47 |
"""
|
@@ -63,14 +62,16 @@ if uploaded_file is not None:
|
|
63 |
|
64 |
|
65 |
file_type = uploaded_file.type
|
66 |
-
allowed_file_extensions = ["application/octet-stream"]
|
67 |
|
68 |
|
69 |
if file_type in allowed_file_extensions:
|
70 |
|
71 |
print(uploaded_file.name)
|
72 |
question=st.text_input("Input Prompt: " , key="input")
|
73 |
-
submit = st.button("Query")
|
|
|
|
|
74 |
if file_type == "application/octet-stream":
|
75 |
|
76 |
|
@@ -108,92 +109,76 @@ if uploaded_file is not None:
|
|
108 |
df = pd.DataFrame(data)
|
109 |
st.subheader("The Response is ")
|
110 |
st.table(df)
|
111 |
-
# for row in response:
|
112 |
-
# print(row)
|
113 |
-
# # st.header(row)
|
114 |
-
|
115 |
-
|
116 |
|
117 |
-
|
118 |
-
|
|
|
|
|
|
|
|
|
119 |
|
120 |
-
|
121 |
-
|
122 |
-
# chart_data = pd.DataFrame(df, columns=column_names)
|
123 |
-
|
124 |
-
# st.bar_chart(chart_data)
|
125 |
|
126 |
|
127 |
|
128 |
|
129 |
|
130 |
|
131 |
-
|
132 |
-
|
133 |
-
|
134 |
-
|
135 |
-
|
136 |
-
|
137 |
-
|
138 |
-
|
139 |
-
|
140 |
-
|
141 |
-
|
142 |
-
|
143 |
-
|
144 |
-
|
145 |
-
# if file_type == "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet":
|
146 |
|
147 |
-
|
148 |
-
|
149 |
-
|
150 |
-
|
151 |
-
|
152 |
-
|
153 |
-
|
154 |
-
|
155 |
-
|
156 |
-
|
157 |
-
|
158 |
|
159 |
|
160 |
-
|
161 |
|
162 |
-
|
163 |
-
|
164 |
-
|
165 |
|
166 |
-
|
167 |
|
168 |
|
169 |
-
|
170 |
-
|
171 |
-
|
172 |
|
173 |
-
|
174 |
-
|
175 |
-
# # print(response)
|
176 |
-
# formatted_response=model.generate_content(f"Format this {response} in the table format")
|
177 |
-
# response_text = formatted_response.candidates[0].content.parts[0].text
|
178 |
-
# # Splitting the response text into lines
|
179 |
-
# lines = response_text.strip().split('\n')
|
180 |
-
|
181 |
-
# # Extracting column names and data
|
182 |
-
# columns = [col.strip() for col in lines[0].split('|') if col.strip()]
|
183 |
-
# data = [dict(zip(columns, [item.strip() for item in line.split('|') if item.strip()])) for line in lines[2:]]
|
184 |
-
|
185 |
-
# # Creating DataFrame
|
186 |
-
# df = pd.DataFrame(data)
|
187 |
-
# st.subheader("The Response is ")
|
188 |
-
# st.table(df)
|
189 |
-
# # for row in response:
|
190 |
-
# # print(row)
|
191 |
-
# # # st.header(row)
|
192 |
-
|
193 |
|
194 |
-
|
195 |
-
|
196 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
197 |
|
198 |
|
199 |
|
|
|
9 |
from utils.extarct_db import extract_name_and_colums , read_excel_query , read_sql_query
|
10 |
from typing import List
|
11 |
import numpy as np
|
12 |
+
import plotly.express as px
|
13 |
|
14 |
import matplotlib.pyplot as plt
|
15 |
|
|
|
41 |
the SQL command will be something like this SELECT COUNT(*) FROM {table_name} ;
|
42 |
\nExample 2 - Tell me all the students studying in Data Science class?,
|
43 |
the SQL command will be something like this SELECT * FROM {table_name}
|
|
|
|
|
44 |
also the sql code should not have ``` in beginning or end and sql word in output
|
45 |
|
46 |
"""
|
|
|
62 |
|
63 |
|
64 |
file_type = uploaded_file.type
|
65 |
+
allowed_file_extensions = ["application/octet-stream", "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"]
|
66 |
|
67 |
|
68 |
if file_type in allowed_file_extensions:
|
69 |
|
70 |
print(uploaded_file.name)
|
71 |
question=st.text_input("Input Prompt: " , key="input")
|
72 |
+
submit = st.button("Query")
|
73 |
+
|
74 |
+
|
75 |
if file_type == "application/octet-stream":
|
76 |
|
77 |
|
|
|
109 |
df = pd.DataFrame(data)
|
110 |
st.subheader("The Response is ")
|
111 |
st.table(df)
|
|
|
|
|
|
|
|
|
|
|
112 |
|
113 |
+
try:
|
114 |
+
# Example chart with Plotly
|
115 |
+
fig = px.bar(df, x=columns[0], y=columns[1], title="Visualize")
|
116 |
+
st.plotly_chart(fig)
|
117 |
+
except:
|
118 |
+
st.success("No Enough data available to do analysis")
|
119 |
|
120 |
+
|
|
|
|
|
|
|
|
|
121 |
|
122 |
|
123 |
|
124 |
|
125 |
|
126 |
|
127 |
+
if file_type == "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet":
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
128 |
|
129 |
+
# converting excel file to db format then deleting excel file
|
130 |
+
db_name = "data.db"
|
131 |
+
file_path = os.path.join(os.getcwd(), uploaded_file.name)
|
132 |
+
with open(file_path, "wb") as f:
|
133 |
+
f.write(uploaded_file.getbuffer())
|
134 |
+
|
135 |
+
df = pd.read_excel(file_path)
|
136 |
+
conn = sqlite3.connect(db_name)
|
137 |
+
df.to_sql("excel_data", conn, index=False, if_exists="replace")
|
138 |
+
conn.close()
|
139 |
+
# os.remove(file_path)
|
140 |
|
141 |
|
142 |
+
# extracting DB Name & colum names
|
143 |
|
144 |
+
db_info = extract_name_and_colums(db_name)
|
145 |
+
table_name = db_info['table_name'][0]
|
146 |
+
column_names = db_info['colum_names']
|
147 |
|
148 |
+
print(column_names)
|
149 |
|
150 |
|
151 |
+
if submit:
|
152 |
+
response = get_gemini_response(question, table_name, column_names)
|
153 |
+
print(response)
|
154 |
|
155 |
+
response = read_sql_query(response, db_name)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
156 |
|
157 |
+
# print(response)
|
158 |
+
formatted_response=model.generate_content(f"Format this {response} in the table format")
|
159 |
+
response_text = formatted_response.candidates[0].content.parts[0].text
|
160 |
+
# Splitting the response text into lines
|
161 |
+
lines = response_text.strip().split('\n')
|
162 |
+
|
163 |
+
# Extracting column names and data
|
164 |
+
columns = [col.strip() for col in lines[0].split('|') if col.strip()]
|
165 |
+
data = [dict(zip(columns, [item.strip() for item in line.split('|') if item.strip()])) for line in lines[2:]]
|
166 |
+
|
167 |
+
# Creating DataFrame
|
168 |
+
df = pd.DataFrame(data)
|
169 |
+
st.subheader("The Response is ")
|
170 |
+
st.table(df)
|
171 |
+
|
172 |
+
try:
|
173 |
+
# Example chart with Plotly
|
174 |
+
fig = px.bar(df, x=columns[0], y=columns[1], title="Visualize")
|
175 |
+
st.plotly_chart(fig)
|
176 |
+
except:
|
177 |
+
st.success("No Enough data available to do analysis")
|
178 |
+
|
179 |
+
else:
|
180 |
+
st.error("File type is not allowed. Please upload a .db or .xlsx file.")
|
181 |
+
|
182 |
|
183 |
|
184 |
|
requirements.txt
CHANGED
@@ -1,4 +1,5 @@
|
|
1 |
-
google-generativeai
|
2 |
-
streamlit
|
3 |
-
python-dotenv
|
4 |
-
langchain
|
|
|
|
1 |
+
google-generativeai
|
2 |
+
streamlit
|
3 |
+
python-dotenv
|
4 |
+
langchain
|
5 |
+
plotly
|