Improve query functions

#41
Files changed (2) hide show
  1. functions/query_functions.py +29 -10
  2. templates/sql_db.py +1 -1
functions/query_functions.py CHANGED
@@ -30,9 +30,10 @@ class SQLiteQuery:
30
  for query in queries:
31
  result = pd.read_sql(query, self.connection)
32
  result.to_csv(f'{dir_path}/file_upload/query.csv', index=False)
 
33
  results.append(f"{result}")
34
  self.connection.close()
35
- return {"results": results, "queries": queries}
36
 
37
 
38
 
@@ -43,7 +44,9 @@ def sqlite_query_func(queries: List[str], session_hash, **kwargs):
43
  result = sql_query.run(queries, session_hash)
44
  if len(result["results"][0]) > 1000:
45
  print("QUERY TOO LARGE")
46
- return {"reply": "query result too large to be processed by llm, the query results are in our query.csv file. If you need to display the results directly, perhaps use the table_generation_func function."}
 
 
47
  else:
48
  return {"reply": result["results"][0]}
49
 
@@ -75,9 +78,10 @@ class PostgreSQLQuery:
75
  print(query)
76
  result = pd.read_sql_query(query, self.connection)
77
  result.to_csv(f'{dir_path}/sql/query.csv', index=False)
 
78
  results.append(f"{result}")
79
  self.connection.close()
80
- return {"results": results, "queries": queries}
81
 
82
 
83
 
@@ -89,7 +93,9 @@ def sql_query_func(queries: List[str], session_hash, args, **kwargs):
89
  print(result)
90
  if len(result["results"][0]) > 1000:
91
  print("QUERY TOO LARGE")
92
- return {"reply": "query result too large to be processed by llm, the query results are in our query.csv file. If you need to display the results directly, perhaps use the table_generation_func function."}
 
 
93
  else:
94
  return {"reply": result["results"][0]}
95
 
@@ -143,10 +149,11 @@ class DocDBQuery:
143
  docs = collection.aggregate_pandas_all(query_list)
144
  print("DATA FRAME COMPLETE")
145
  docs.to_csv(f'{dir_path}/doc_db/query.csv', index=False)
 
146
  print("CSV COMPLETE")
147
  results.append(f"{docs}")
148
  self.client.close()
149
- return {"results": results, "queries": aggregation_pipeline}
150
 
151
 
152
 
@@ -157,7 +164,9 @@ def doc_db_query_func(aggregation_pipeline: List[str], db_collection: AnyStr, se
157
  print("RESULT")
158
  if len(result["results"][0]) > 1000:
159
  print("QUERY TOO LARGE")
160
- return {"reply": "query result too large to be processed by llm, the query results are in our query.csv file. If you need to display the results directly, perhaps use the table_generation_func function."}
 
 
161
  else:
162
  return {"reply": result["results"][0]}
163
 
@@ -200,9 +209,10 @@ class GraphQLQuery:
200
  #print(response_frame)
201
 
202
  response_frame.to_csv(f'{dir_path}/graphql/query.csv', index=False)
 
203
  print("CSV COMPLETE")
204
  results.append(f"{response_frame}")
205
- return {"results": results, "queries": graphql_query}
206
 
207
 
208
 
@@ -213,7 +223,9 @@ def graphql_query_func(graphql_query: AnyStr, session_hash, args, **kwargs):
213
  print("RESULT")
214
  if len(result["results"][0]) > 1000:
215
  print("QUERY TOO LARGE")
216
- return {"reply": "query result too large to be processed by llm, the query results are in our query.csv file. If you need to display the results directly, perhaps use the table_generation_func function."}
 
 
217
  else:
218
  return {"reply": result["results"][0]}
219
 
@@ -256,9 +268,16 @@ def graphql_csv_query(csv_query: AnyStr, session_hash, **kwargs):
256
  print("GRAPHQL CSV QUERY")
257
  queried_df = sqldf(csv_query, locals())
258
  print(queried_df)
 
259
  queried_df.to_csv(f'{dir_path}/graphql/query.csv', index=False)
260
 
261
- return {"reply": "The new query results are in our query.csv file. If you need to display the results directly, perhaps use the table_generation_func function."}
 
 
 
 
 
 
262
 
263
  except Exception as e:
264
  reply = f"""There was an error querying our query.csv file with the query:{csv_query}
@@ -266,4 +285,4 @@ def graphql_csv_query(csv_query: AnyStr, session_hash, **kwargs):
266
  You should probably try again.
267
  """
268
  print(reply)
269
- return {"reply": reply}
 
30
  for query in queries:
31
  result = pd.read_sql(query, self.connection)
32
  result.to_csv(f'{dir_path}/file_upload/query.csv', index=False)
33
+ column_names = list(result.columns)
34
  results.append(f"{result}")
35
  self.connection.close()
36
+ return {"results": results, "queries": queries, "csv_columns": column_names}
37
 
38
 
39
 
 
44
  result = sql_query.run(queries, session_hash)
45
  if len(result["results"][0]) > 1000:
46
  print("QUERY TOO LARGE")
47
+ return {"reply": f"""query result too large to be processed by llm, the query results are in our query.csv file.
48
+ The column names of this query.csv file are: {result["csv_columns"]}.
49
+ If you need to display the results directly, perhaps use the table_generation_func function."""}
50
  else:
51
  return {"reply": result["results"][0]}
52
 
 
78
  print(query)
79
  result = pd.read_sql_query(query, self.connection)
80
  result.to_csv(f'{dir_path}/sql/query.csv', index=False)
81
+ column_names = list(result.columns)
82
  results.append(f"{result}")
83
  self.connection.close()
84
+ return {"results": results, "queries": queries, "csv_columns": column_names}
85
 
86
 
87
 
 
93
  print(result)
94
  if len(result["results"][0]) > 1000:
95
  print("QUERY TOO LARGE")
96
+ return {"reply": f"""query result too large to be processed by llm, the query results are in our query.csv file.
97
+ The column names of this query.csv file are: {result["csv_columns"]}.
98
+ If you need to display the results directly, perhaps use the table_generation_func function."""}
99
  else:
100
  return {"reply": result["results"][0]}
101
 
 
149
  docs = collection.aggregate_pandas_all(query_list)
150
  print("DATA FRAME COMPLETE")
151
  docs.to_csv(f'{dir_path}/doc_db/query.csv', index=False)
152
+ column_names = list(docs.columns)
153
  print("CSV COMPLETE")
154
  results.append(f"{docs}")
155
  self.client.close()
156
+ return {"results": results, "queries": aggregation_pipeline, "csv_columns": column_names}
157
 
158
 
159
 
 
164
  print("RESULT")
165
  if len(result["results"][0]) > 1000:
166
  print("QUERY TOO LARGE")
167
+ return {"reply": f"""query result too large to be processed by llm, the query results are in our query.csv file.
168
+ The column names of this query.csv file are: {result["csv_columns"]}.
169
+ If you need to display the results directly, perhaps use the table_generation_func function."""}
170
  else:
171
  return {"reply": result["results"][0]}
172
 
 
209
  #print(response_frame)
210
 
211
  response_frame.to_csv(f'{dir_path}/graphql/query.csv', index=False)
212
+ column_names = list(response_frame.columns)
213
  print("CSV COMPLETE")
214
  results.append(f"{response_frame}")
215
+ return {"results": results, "queries": graphql_query, "csv_columns": column_names}
216
 
217
 
218
 
 
223
  print("RESULT")
224
  if len(result["results"][0]) > 1000:
225
  print("QUERY TOO LARGE")
226
+ return {"reply": f"""query result too large to be processed by llm, the query results are in our query.csv file.
227
+ The column names of this query.csv file are: {result["csv_columns"]}.
228
+ If you need to display the results directly, perhaps use the table_generation_func function."""}
229
  else:
230
  return {"reply": result["results"][0]}
231
 
 
268
  print("GRAPHQL CSV QUERY")
269
  queried_df = sqldf(csv_query, locals())
270
  print(queried_df)
271
+ column_names = list(queried_df.columns)
272
  queried_df.to_csv(f'{dir_path}/graphql/query.csv', index=False)
273
 
274
+ if len(queried_df) > 1000:
275
+ print("CSV QUERY TOO LARGE")
276
+ return {"reply": f"""The new query results are in our query.csv file.
277
+ The column names of this query.csv file are: {column_names}.
278
+ If you need to display the results directly, perhaps use the table_generation_func function."""}
279
+ else:
280
+ return {"reply": str(queried_df)}
281
 
282
  except Exception as e:
283
  reply = f"""There was an error querying our query.csv file with the query:{csv_query}
 
285
  You should probably try again.
286
  """
287
  print(reply)
288
+ return {"reply": reply}
templates/sql_db.py CHANGED
@@ -49,7 +49,7 @@ with gr.Blocks() as demo:
49
  ["Describe the dataset"],
50
  ["What is the total revenue generated by each store?"],
51
  ["Can you generate and display a bar chart of film category to number of films in that category?"],
52
- ["Can you generate a pie chart showing the top 10 most rented films by revenue vs all other films?"],
53
  ["Can you generate a line chart of rental revenue over time?"],
54
  ["What is the relationship between film length and rental frequency?"]
55
  ]
 
49
  ["Describe the dataset"],
50
  ["What is the total revenue generated by each store?"],
51
  ["Can you generate and display a bar chart of film category to number of films in that category?"],
52
+ ["Can you generate a pie chart showing the top 10 most rented films by revenue?"],
53
  ["Can you generate a line chart of rental revenue over time?"],
54
  ["What is the relationship between film length and rental frequency?"]
55
  ]