Spaces:

nolanzandi
/

virtual-data-analyst

Running

App Files Files Community

Condense query functions

#42

by nolanzandi - opened May 15

base: refs/heads/main

←

from: refs/pr/42

Discussion Files changed

+168

-225

Files changed (5) hide show

functions/__init__.py +2 -2
functions/chat_functions.py +5 -5
functions/query_functions.py +18 -75
templates/data_file.py +135 -135
tools/tools.py +8 -8

functions/__init__.py CHANGED Viewed

@@ -1,9 +1,9 @@
-from .query_functions import SQLiteQuery, sqlite_query_func, sql_query_func, doc_db_query_func, graphql_query_func, graphql_schema_query, graphql_csv_query
 from .chart_functions import table_generation_func, scatter_chart_generation_func, \
 line_chart_generation_func, bar_chart_generation_func, pie_chart_generation_func, histogram_generation_func, scatter_chart_fig
 from .chat_functions import example_question_generator, chatbot_func
 from .stat_functions import regression_func
-__all__ = ["SQLiteQuery","sqlite_query_func","sql_query_func","doc_db_query_func","graphql_query_func","graphql_schema_query","graphql_csv_query","table_generation_func","scatter_chart_generation_func",
            "line_chart_generation_func","bar_chart_generation_func","regression_func", "pie_chart_generation_func", "histogram_generation_func",
            "scatter_chart_fig","example_question_generator","chatbot_func"]

+from .query_functions import graphql_schema_query, graphql_csv_query, query_func
 from .chart_functions import table_generation_func, scatter_chart_generation_func, \
 line_chart_generation_func, bar_chart_generation_func, pie_chart_generation_func, histogram_generation_func, scatter_chart_fig
 from .chat_functions import example_question_generator, chatbot_func
 from .stat_functions import regression_func
+__all__ = ["query_func","graphql_schema_query","graphql_csv_query","table_generation_func","scatter_chart_generation_func",
            "line_chart_generation_func","bar_chart_generation_func","regression_func", "pie_chart_generation_func", "histogram_generation_func",
            "scatter_chart_fig","example_question_generator","chatbot_func"]

functions/chat_functions.py CHANGED Viewed

@@ -62,7 +62,8 @@ def example_question_generator(session_hash, data_source, name, titles, schema):
     return example_response["replies"][0].text
 def system_message(data_source, titles, schema=""):
     system_message_dict = {
         'file_upload' : f"""You are a helpful and knowledgeable agent who has access to an SQLite database which has a table called 'data_source' that contains the following columns: {titles}.
                     You also have access to a function, called table_generation_func, that can take a query.csv file generated from our sql query and returns an iframe that we should display in our chat window.
@@ -111,13 +112,12 @@ def system_message(data_source, titles, schema=""):
     return system_message_dict[data_source]
 def chatbot_func(message, history, session_hash, data_source, titles, schema, *args):
-    from functions import sqlite_query_func, table_generation_func, regression_func, scatter_chart_generation_func, \
-        sql_query_func, doc_db_query_func, graphql_query_func, graphql_schema_query, graphql_csv_query, \
         line_chart_generation_func,bar_chart_generation_func,pie_chart_generation_func,histogram_generation_func
     import tools.tools as tools
-    available_functions = {"sqlite_query_func": sqlite_query_func,"sql_query_func": sql_query_func,"doc_db_query_func": doc_db_query_func,
-                           "graphql_query_func": graphql_query_func,"graphql_schema_query": graphql_schema_query,"graphql_csv_query": graphql_csv_query,
                            "table_generation_func":table_generation_func,
                            "line_chart_generation_func":line_chart_generation_func,"bar_chart_generation_func":bar_chart_generation_func,
                            "scatter_chart_generation_func":scatter_chart_generation_func, "pie_chart_generation_func":pie_chart_generation_func,

     return example_response["replies"][0].text
 def system_message(data_source, titles, schema=""):
+    print("TITLES")
+    print(titles)
     system_message_dict = {
         'file_upload' : f"""You are a helpful and knowledgeable agent who has access to an SQLite database which has a table called 'data_source' that contains the following columns: {titles}.
                     You also have access to a function, called table_generation_func, that can take a query.csv file generated from our sql query and returns an iframe that we should display in our chat window.
     return system_message_dict[data_source]
 def chatbot_func(message, history, session_hash, data_source, titles, schema, *args):
+    from functions import table_generation_func, regression_func, scatter_chart_generation_func, \
+        query_func, graphql_schema_query, graphql_csv_query, \
         line_chart_generation_func,bar_chart_generation_func,pie_chart_generation_func,histogram_generation_func
     import tools.tools as tools
+    available_functions = {"query_func":query_func,"graphql_schema_query": graphql_schema_query,"graphql_csv_query": graphql_csv_query,
                            "table_generation_func":table_generation_func,
                            "line_chart_generation_func":line_chart_generation_func,"bar_chart_generation_func":bar_chart_generation_func,
                            "scatter_chart_generation_func":scatter_chart_generation_func, "pie_chart_generation_func":pie_chart_generation_func,

functions/query_functions.py CHANGED Viewed

@@ -35,28 +35,6 @@ class SQLiteQuery:
         self.connection.close()
         return {"results": results, "queries": queries, "csv_columns": column_names}
-def sqlite_query_func(queries: List[str], session_hash, **kwargs):
-    dir_path = TEMP_DIR / str(session_hash)
-    sql_query = SQLiteQuery(f'{dir_path}/file_upload/data_source.db')
-    try:
-      result = sql_query.run(queries, session_hash)
-      if len(result["results"][0]) > 1000:
-        print("QUERY TOO LARGE")
-        return {"reply": f"""query result too large to be processed by llm, the query results are in our query.csv file.
-                The column names of this query.csv file are: {result["csv_columns"]}.
-                If you need to display the results directly, perhaps use the table_generation_func function."""}
-      else:
-        return {"reply": result["results"][0]}
-    except Exception as e:
-      reply = f"""There was an error running the SQL Query = {queries}
-              The error is {e},
-              You should probably try again.
-              """
-      return {"reply": reply}
 @component
 class PostgreSQLQuery:
@@ -82,30 +60,6 @@ class PostgreSQLQuery:
           results.append(f"{result}")
         self.connection.close()
         return {"results": results, "queries": queries, "csv_columns": column_names}
-def sql_query_func(queries: List[str], session_hash, args, **kwargs):
-    sql_query = PostgreSQLQuery(args[0], args[1], args[2], args[3], args[4])
-    try:
-      result = sql_query.run(queries, session_hash)
-      print("RESULT")
-      print(result)
-      if len(result["results"][0]) > 1000:
-        print("QUERY TOO LARGE")
-        return {"reply": f"""query result too large to be processed by llm, the query results are in our query.csv file.
-                The column names of this query.csv file are: {result["csv_columns"]}.
-                If you need to display the results directly, perhaps use the table_generation_func function."""}
-      else:
-        return {"reply": result["results"][0]}
-    except Exception as e:
-      reply = f"""There was an error running the SQL Query = {queries}
-              The error is {e},
-              You should probably try again.
-              """
-      print(reply)
-      return {"reply": reply}
 @component
 class DocDBQuery:
@@ -155,29 +109,6 @@ class DocDBQuery:
         self.client.close()
         return {"results": results, "queries": aggregation_pipeline, "csv_columns": column_names}
-def doc_db_query_func(aggregation_pipeline: List[str], db_collection: AnyStr, session_hash, args, **kwargs):
-    doc_db_query = DocDBQuery(args[0], args[1])
-    try:
-      result = doc_db_query.run(aggregation_pipeline, db_collection, session_hash)
-      print("RESULT")
-      if len(result["results"][0]) > 1000:
-        print("QUERY TOO LARGE")
-        return {"reply": f"""query result too large to be processed by llm, the query results are in our query.csv file.
-                The column names of this query.csv file are: {result["csv_columns"]}.
-                If you need to display the results directly, perhaps use the table_generation_func function."""}
-      else:
-        return {"reply": result["results"][0]}
-    except Exception as e:
-      reply = f"""There was an error running the NoSQL (Mongo) Query = {aggregation_pipeline}
-              The error is {e},
-              You should probably try again.
-              """
-      print(reply)
-      return {"reply": reply}
 @component
 class GraphQLQuery:
@@ -214,12 +145,23 @@ class GraphQLQuery:
           results.append(f"{response_frame}")
           return {"results": results, "queries": graphql_query, "csv_columns": column_names}
-def graphql_query_func(graphql_query: AnyStr, session_hash, args, **kwargs):
-    graphql_object = GraphQLQuery()
     try:
-      result = graphql_object.run(graphql_query, args[0], args[1], args[2], session_hash)
       print("RESULT")
       if len(result["results"][0]) > 1000:
         print("QUERY TOO LARGE")
@@ -230,7 +172,7 @@ def graphql_query_func(graphql_query: AnyStr, session_hash, args, **kwargs):
         return {"reply": result["results"][0]}
     except Exception as e:
-      reply = f"""There was an error running the GraphQL Query = {graphql_query}
               The error is {e},
               You should probably try again.
               """
@@ -266,6 +208,7 @@ def graphql_csv_query(csv_query: AnyStr, session_hash, **kwargs):
       query = pd.read_csv(f'{dir_path}/graphql/query.csv')
       query.Name = 'query'
       print("GRAPHQL CSV QUERY")
       queried_df = sqldf(csv_query, locals())
       print(queried_df)
       column_names = list(queried_df.columns)

         self.connection.close()
         return {"results": results, "queries": queries, "csv_columns": column_names}
 @component
 class PostgreSQLQuery:
           results.append(f"{result}")
         self.connection.close()
         return {"results": results, "queries": queries, "csv_columns": column_names}
 @component
 class DocDBQuery:
         self.client.close()
         return {"results": results, "queries": aggregation_pipeline, "csv_columns": column_names}
 @component
 class GraphQLQuery:
           results.append(f"{response_frame}")
           return {"results": results, "queries": graphql_query, "csv_columns": column_names}
+def query_func(queries:List[str], session_hash, session_folder, args, **kwargs):
     try:
+      print("QUERY")
+      print(queries)
+      if session_folder == "file_upload":
+        dir_path = TEMP_DIR / str(session_hash)
+        sql_query = SQLiteQuery(f'{dir_path}/file_upload/data_source.db')
+        result = sql_query.run(queries, session_hash)
+      elif session_folder == "sql":
+        sql_query = PostgreSQLQuery(args[0], args[1], args[2], args[3], args[4])
+        result = sql_query.run(queries, session_hash)
+      elif session_folder == 'doc_db':
+        doc_db_query = DocDBQuery(args[0], args[1])
+        result = doc_db_query.run(queries, kwargs['db_collection'], session_hash)
+      elif session_folder == 'graphql':
+        graphql_object = GraphQLQuery()
+        result = graphql_object.run(queries, args[0], args[1], args[2], session_hash)
       print("RESULT")
       if len(result["results"][0]) > 1000:
         print("QUERY TOO LARGE")
         return {"reply": result["results"][0]}
     except Exception as e:
+      reply = f"""There was an error running the {session_folder} Query = {queries}
               The error is {e},
               You should probably try again.
               """
       query = pd.read_csv(f'{dir_path}/graphql/query.csv')
       query.Name = 'query'
       print("GRAPHQL CSV QUERY")
+      print(csv_query)
       queried_df = sqldf(csv_query, locals())
       print(queried_df)
       column_names = list(queried_df.columns)

templates/data_file.py CHANGED Viewed

@@ -1,136 +1,136 @@
-import gradio as gr
-from functions import example_question_generator, chatbot_func
-from data_sources import process_data_upload
-from utils import message_dict
-import ast
-def run_example(input):
-    return input
-def example_display(input):
-    if input == None:
-        display = True
-    else:
-        display = False
-    return [gr.update(visible=display),gr.update(visible=display),gr.update(visible=display),gr.update(visible=display)]
-with gr.Blocks() as demo:
-    description = gr.HTML("""
-                        <!-- Header -->
-                        <div class="max-w-4xl mx-auto mb-12 text-center">
-                            <div class="bg-blue-50 border border-blue-200 rounded-lg max-w-2xl mx-auto">
-                                <h2 class="font-semibold text-blue-800 ">
-                                    <i class="fas fa-info-circle mr-2"></i>Supported Files
-                                </h2>
-                                <div class="flex flex-wrap justify-center gap-3 pb-4 text-blue-700">
-                                    <span class="tooltip">
-                                        <i class="fas fa-file-csv mr-1"></i>CSV
-                                        <span class="tooltip-text">Comma-separated values</span>
-                                    </span>
-                                    <span class="tooltip">
-                                        <i class="fas fa-file-alt mr-1"></i>TSV
-                                        <span class="tooltip-text">Tab-separated values</span>
-                                    </span>
-                                    <span class="tooltip">
-                                        <i class="fas fa-file-alt mr-1"></i>TXT
-                                        <span class="tooltip-text">Text files</span>
-                                    </span>
-                                    <span class="tooltip">
-                                        <i class="fas fa-file-excel mr-1"></i>XLS/XLSX
-                                        <span class="tooltip-text">Excel spreadsheets</span>
-                                    </span>
-                                    <span class="tooltip">
-                                        <i class="fas fa-file-code mr-1"></i>XML
-                                        <span class="tooltip-text">XML documents</span>
-                                    </span>
-                                    <span class="tooltip">
-                                        <i class="fas fa-file-code mr-1"></i>JSON
-                                        <span class="tooltip-text">JSON data files</span>
-                                    </span>
-                                </div>
-                            </div>
-                        </div>
-                          """, elem_classes="description_component")
-    example_file_1 = gr.File(visible=False, value="samples/bank_marketing_campaign.csv")
-    example_file_2 = gr.File(visible=False, value="samples/online_retail_data.csv")
-    example_file_3 = gr.File(visible=False, value="samples/tb_illness_data.csv")
-    with gr.Row():
-        example_btn_1 = gr.Button(value="Try Me: bank_marketing_campaign.csv", elem_classes="sample-btn bg-gradient-to-r from-purple-500 to-indigo-600 text-white p-6 rounded-lg text-left hover:shadow-lg", size="md", variant="primary")
-        example_btn_2 = gr.Button(value="Try Me: online_retail_data.csv", elem_classes="sample-btn bg-gradient-to-r from-purple-500 to-indigo-600 text-white p-6 rounded-lg text-left hover:shadow-lg", size="md", variant="primary")
-        example_btn_3 = gr.Button(value="Try Me: tb_illness_data.csv", elem_classes="sample-btn bg-gradient-to-r from-purple-500 to-indigo-600 text-white p-6 rounded-lg text-left hover:shadow-lg", size="md", variant="primary")
-    file_output = gr.File(label="Data File (CSV, TSV, TXT, XLS, XLSX, XML, JSON)", show_label=True, elem_classes="file_marker drop-zone border-2 border-dashed border-gray-300 rounded-lg hover:border-primary cursor-pointer bg-gray-50 hover:bg-blue-50 transition-colors duration-300", file_types=['.csv','.xlsx','.txt','.json','.ndjson','.xml','.xls','.tsv'])
-    example_btn_1.click(fn=run_example, inputs=example_file_1, outputs=file_output)
-    example_btn_2.click(fn=run_example, inputs=example_file_2, outputs=file_output)
-    example_btn_3.click(fn=run_example, inputs=example_file_3, outputs=file_output)
-    file_output.change(fn=example_display, inputs=file_output, outputs=[example_btn_1, example_btn_2, example_btn_3, description])
-    @gr.render(inputs=file_output)
-    def data_options(filename, request: gr.Request):
-        print(filename)
-        if request.session_hash not in message_dict:
-            message_dict[request.session_hash] = {}
-        message_dict[request.session_hash]['file_upload'] = None
-        if filename:
-            process_message = process_upload(filename, request.session_hash)
-            gr.HTML(value=process_message[1], padding=False)
-            if process_message[0] == "success":
-                if "bank_marketing_campaign" in filename:
-                    example_questions = [
-                                            ["Describe the dataset"],
-                                            ["What levels of education have the highest and lowest average balance?"],
-                                            ["What job is most and least common for a yes response from the individuals, not counting 'unknown'?"],
-                                            ["Can you generate a bar chart of education vs. average balance?"],
-                                            ["Can you generate a table of levels of education versus average balance, percent married, percent with a loan, and percent in default?"],
-                                            ["Can we predict the relationship between the number of contacts performed before this campaign and the average balance?"],
-                                            ["Can you plot the number of contacts performed before this campaign versus the duration and use balance as the size in a bubble chart?"]
-                                        ]
-                elif "online_retail_data" in filename:
-                    example_questions = [
-                                            ["Describe the dataset"],
-                                            ["What month had the highest revenue?"],
-                                            ["Is revenue higher in the morning or afternoon?"],
-                                            ["Can you generate a line graph of revenue per month?"],
-                                            ["Can you generate a table of revenue per month?"],
-                                            ["Can we predict how time of day affects transaction value in this data set?"],
-                                            ["Can you plot revenue per month with size being the number of units sold that month in a bubble chart?"]
-                                        ]
-                else:
-                    try:
-                        generated_examples = ast.literal_eval(example_question_generator(request.session_hash, 'file_upload', '', process_message[1], ''))
-                        example_questions = [
-                                                ["Describe the dataset"]
-                                            ]
-                        for example in generated_examples:
-                            example_questions.append([example])
-                    except Exception as e:
-                        print("DATA FILE QUESTION GENERATION ERROR")
-                        print(e)
-                        example_questions = [
-                                            ["Describe the dataset"],
-                                            ["List the columns in the dataset"],
-                                            ["What could this data be used for?"],
-                                        ]
-                session_hash = gr.Textbox(visible=False, value=request.session_hash)
-                data_source = gr.Textbox(visible=False, value='file_upload')
-                schema = gr.Textbox(visible=False, value='')
-                titles = gr.Textbox(value=process_message[2], interactive=False, visible=False)
-                bot = gr.Chatbot(type='messages', label="CSV Chat Window", render_markdown=True, sanitize_html=False, show_label=True, render=False, visible=True, elem_classes="chatbot")
-                chat = gr.ChatInterface(
-                                    fn=chatbot_func,
-                                    type='messages',
-                                    chatbot=bot,
-                                    title="Chat with your data file",
-                                    concurrency_limit=None,
-                                    examples=example_questions,
-                                    additional_inputs=[session_hash, data_source, titles, schema]
-                                    )
-    def process_upload(upload_value, session_hash):
-        if upload_value:
-            process_message = process_data_upload(upload_value, session_hash)
-        return process_message
-if __name__ == "__main__":
     demo.launch()

+import gradio as gr
+from functions import example_question_generator, chatbot_func
+from data_sources import process_data_upload
+from utils import message_dict
+import ast
+def run_example(input):
+    return input
+def example_display(input):
+    if input == None:
+        display = True
+    else:
+        display = False
+    return [gr.update(visible=display),gr.update(visible=display),gr.update(visible=display),gr.update(visible=display)]
+with gr.Blocks() as demo:
+    description = gr.HTML("""
+                        <!-- Header -->
+                        <div class="max-w-4xl mx-auto mb-12 text-center">
+                            <div class="bg-blue-50 border border-blue-200 rounded-lg max-w-2xl mx-auto">
+                                <h2 class="font-semibold text-blue-800 ">
+                                    <i class="fas fa-info-circle mr-2"></i>Supported Files
+                                </h2>
+                                <div class="flex flex-wrap justify-center gap-3 pb-4 text-blue-700">
+                                    <span class="tooltip">
+                                        <i class="fas fa-file-csv mr-1"></i>CSV
+                                        <span class="tooltip-text">Comma-separated values</span>
+                                    </span>
+                                    <span class="tooltip">
+                                        <i class="fas fa-file-alt mr-1"></i>TSV
+                                        <span class="tooltip-text">Tab-separated values</span>
+                                    </span>
+                                    <span class="tooltip">
+                                        <i class="fas fa-file-alt mr-1"></i>TXT
+                                        <span class="tooltip-text">Text files</span>
+                                    </span>
+                                    <span class="tooltip">
+                                        <i class="fas fa-file-excel mr-1"></i>XLS/XLSX
+                                        <span class="tooltip-text">Excel spreadsheets</span>
+                                    </span>
+                                    <span class="tooltip">
+                                        <i class="fas fa-file-code mr-1"></i>XML
+                                        <span class="tooltip-text">XML documents</span>
+                                    </span>
+                                    <span class="tooltip">
+                                        <i class="fas fa-file-code mr-1"></i>JSON
+                                        <span class="tooltip-text">JSON data files</span>
+                                    </span>
+                                </div>
+                            </div>
+                        </div>
+                          """, elem_classes="description_component")
+    example_file_1 = gr.File(visible=False, value="samples/bank_marketing_campaign.csv")
+    example_file_2 = gr.File(visible=False, value="samples/online_retail_data.csv")
+    example_file_3 = gr.File(visible=False, value="samples/tb_illness_data.csv")
+    with gr.Row():
+        example_btn_1 = gr.Button(value="Try Me: bank_marketing_campaign.csv", elem_classes="sample-btn bg-gradient-to-r from-purple-500 to-indigo-600 text-white p-6 rounded-lg text-left hover:shadow-lg", size="md", variant="primary")
+        example_btn_2 = gr.Button(value="Try Me: online_retail_data.csv", elem_classes="sample-btn bg-gradient-to-r from-purple-500 to-indigo-600 text-white p-6 rounded-lg text-left hover:shadow-lg", size="md", variant="primary")
+        example_btn_3 = gr.Button(value="Try Me: tb_illness_data.csv", elem_classes="sample-btn bg-gradient-to-r from-purple-500 to-indigo-600 text-white p-6 rounded-lg text-left hover:shadow-lg", size="md", variant="primary")
+    file_output = gr.File(label="Data File (CSV, TSV, TXT, XLS, XLSX, XML, JSON)", show_label=True, elem_classes="file_marker drop-zone border-2 border-dashed border-gray-300 rounded-lg hover:border-primary cursor-pointer bg-gray-50 hover:bg-blue-50 transition-colors duration-300", file_types=['.csv','.xlsx','.txt','.json','.ndjson','.xml','.xls','.tsv'])
+    example_btn_1.click(fn=run_example, inputs=example_file_1, outputs=file_output)
+    example_btn_2.click(fn=run_example, inputs=example_file_2, outputs=file_output)
+    example_btn_3.click(fn=run_example, inputs=example_file_3, outputs=file_output)
+    file_output.change(fn=example_display, inputs=file_output, outputs=[example_btn_1, example_btn_2, example_btn_3, description])
+    @gr.render(inputs=file_output)
+    def data_options(filename, request: gr.Request):
+        print(filename)
+        if request.session_hash not in message_dict:
+            message_dict[request.session_hash] = {}
+        message_dict[request.session_hash]['file_upload'] = None
+        if filename:
+            process_message = process_upload(filename, request.session_hash)
+            gr.HTML(value=process_message[1], padding=False)
+            if process_message[0] == "success":
+                if "bank_marketing_campaign" in filename:
+                    example_questions = [
+                                            ["Describe the dataset"],
+                                            ["What levels of education have the highest and lowest average balance?"],
+                                            ["What job is most and least common for a yes response from the individuals, not counting 'unknown'?"],
+                                            ["Can you generate a bar chart of education vs. average balance?"],
+                                            ["Can you generate a table of levels of education versus average balance, percent married, percent with a loan, and percent in default?"],
+                                            ["Can we predict the relationship between the number of contacts performed before this campaign and the average balance?"],
+                                            ["Can you plot the number of contacts performed before this campaign versus the duration and use balance as the size in a bubble chart?"]
+                                        ]
+                elif "online_retail_data" in filename:
+                    example_questions = [
+                                            ["Describe the dataset"],
+                                            ["What month had the highest revenue?"],
+                                            ["Is revenue higher in the morning or afternoon?"],
+                                            ["Can you generate a line graph of revenue per month?"],
+                                            ["Can you generate a table of revenue per month?"],
+                                            ["Can we predict how time of day affects transaction value in this data set?"],
+                                            ["Can you plot revenue per month with size being the number of units sold that month in a bubble chart?"]
+                                        ]
+                else:
+                    try:
+                        generated_examples = ast.literal_eval(example_question_generator(request.session_hash, 'file_upload', '', process_message[1], ''))
+                        example_questions = [
+                                                ["Describe the dataset"]
+                                            ]
+                        for example in generated_examples:
+                            example_questions.append([example])
+                    except Exception as e:
+                        print("DATA FILE QUESTION GENERATION ERROR")
+                        print(e)
+                        example_questions = [
+                                            ["Describe the dataset"],
+                                            ["List the columns in the dataset"],
+                                            ["What could this data be used for?"],
+                                        ]
+                session_hash = gr.Textbox(visible=False, value=request.session_hash)
+                data_source = gr.Textbox(visible=False, value='file_upload')
+                schema = gr.Textbox(visible=False, value='')
+                titles = gr.Textbox(value=process_message[2], interactive=False, visible=False)
+                bot = gr.Chatbot(type='messages', label="CSV Chat Window", render_markdown=True, sanitize_html=False, show_label=True, render=False, visible=True, elem_classes="chatbot")
+                chat = gr.ChatInterface(
+                                    fn=chatbot_func,
+                                    type='messages',
+                                    chatbot=bot,
+                                    title="Chat with your data file",
+                                    concurrency_limit=None,
+                                    examples=example_questions,
+                                    additional_inputs=[session_hash, data_source, titles, schema]
+                                    )
+    def process_upload(upload_value, session_hash):
+        if upload_value:
+            process_message = process_data_upload(upload_value, session_hash)
+        return process_message
+if __name__ == "__main__":
     demo.launch()

tools/tools.py CHANGED Viewed

@@ -10,7 +10,7 @@ def tools_call(session_hash, data_source, titles):
                 {
                     "type": "function",
                     "function": {
-                        "name": "sqlite_query_func",
                         "description": f"""This is a tool useful to query a SQLite table called 'data_source' with the following Columns: {titles_string}.
                         There may also be more columns in the table if the number of columns is too large to process.
                         This function also saves the results of the query to csv file called query.csv.""",
@@ -34,7 +34,7 @@ def tools_call(session_hash, data_source, titles):
             {
                 "type": "function",
                 "function": {
-                    "name": "sql_query_func",
                     "description": f"""This is a tool useful to query a PostgreSQL database with the following tables, {titles_string}.
                     There may also be more tables in the database if the number of tables is too large to process.
                     This function also saves the results of the query to csv file called query.csv.""",
@@ -58,14 +58,14 @@ def tools_call(session_hash, data_source, titles):
             {
                 "type": "function",
                 "function": {
-                    "name": "doc_db_query_func",
                     "description": f"""This is a tool useful to build an aggregation pipeline to query a MongoDB NoSQL document database with the following collections, {titles_string}.
                     There may also be more collections in the database if the number of tables is too large to process.
                     This function also saves the results of the query to a csv file called query.csv.""",
                     "parameters": {
                         "type": "object",
                         "properties": {
-                            "aggregation_pipeline": {
                                 "type": "string",
                                 "description": "The MongoDB aggregation pipeline to use in the search. Infer this from the user's message. It should be a question or a statement."
                             },
@@ -74,7 +74,7 @@ def tools_call(session_hash, data_source, titles):
                                 "description": "The MongoDB collection to use in the search. Infer this from the user's message. It should be a question or a statement.",
                             }
                         },
-                        "required": ["aggregation_pipeline","db_collection"],
                     },
                 },
             },
@@ -83,19 +83,19 @@ def tools_call(session_hash, data_source, titles):
             {
                 "type": "function",
                 "function": {
-                    "name": "graphql_query_func",
                     "description": f"""This is a tool useful to build a GraphQL query for a GraphQL API endpoint with the following types, {titles_string}.
                     There may also be more types in the GraphQL endpoint if the number of types is too large to process.
                     This function also saves the results of the query to a csv file called query.csv.""",
                     "parameters": {
                         "type": "object",
                         "properties": {
-                            "graphql_query": {
                                 "type": "string",
                                 "description": "The GraphQL query to use in the search. Infer this from the user's message. It should be a question or a statement."
                             }
                         },
-                        "required": ["graphql_query"],
                     },
                 },
             },

                 {
                     "type": "function",
                     "function": {
+                        "name": "query_func",
                         "description": f"""This is a tool useful to query a SQLite table called 'data_source' with the following Columns: {titles_string}.
                         There may also be more columns in the table if the number of columns is too large to process.
                         This function also saves the results of the query to csv file called query.csv.""",
             {
                 "type": "function",
                 "function": {
+                    "name": "query_func",
                     "description": f"""This is a tool useful to query a PostgreSQL database with the following tables, {titles_string}.
                     There may also be more tables in the database if the number of tables is too large to process.
                     This function also saves the results of the query to csv file called query.csv.""",
             {
                 "type": "function",
                 "function": {
+                    "name": "query_func",
                     "description": f"""This is a tool useful to build an aggregation pipeline to query a MongoDB NoSQL document database with the following collections, {titles_string}.
                     There may also be more collections in the database if the number of tables is too large to process.
                     This function also saves the results of the query to a csv file called query.csv.""",
                     "parameters": {
                         "type": "object",
                         "properties": {
+                            "queries": {
                                 "type": "string",
                                 "description": "The MongoDB aggregation pipeline to use in the search. Infer this from the user's message. It should be a question or a statement."
                             },
                                 "description": "The MongoDB collection to use in the search. Infer this from the user's message. It should be a question or a statement.",
                             }
                         },
+                        "required": ["queries","db_collection"],
                     },
                 },
             },
             {
                 "type": "function",
                 "function": {
+                    "name": "query_func",
                     "description": f"""This is a tool useful to build a GraphQL query for a GraphQL API endpoint with the following types, {titles_string}.
                     There may also be more types in the GraphQL endpoint if the number of types is too large to process.
                     This function also saves the results of the query to a csv file called query.csv.""",
                     "parameters": {
                         "type": "object",
                         "properties": {
+                            "queries": {
                                 "type": "string",
                                 "description": "The GraphQL query to use in the search. Infer this from the user's message. It should be a question or a statement."
                             }
                         },
+                        "required": ["queries"],
                     },
                 },
             },