Spaces:

nolanzandi
/

virtual-data-analyst

Running

App Files Files Community

nolanzandi commited on Apr 12

Commit

3d87c18

verified ·

1 Parent(s): 1647e02

viz-refactor-12apr (#23)

Browse files

- visualization updates and refactor for future release (f5b1bff28e943b27af3486e62b019f098e6283c0)

Files changed (8) hide show

app.py +62 -97
assets/styles.css +168 -0
data_file.py +126 -0
data_sources/upload_file.py +1 -1
functions/chat_functions.py +5 -6
tools.py → tools/chart_tools.py +13 -93
tools/stats_tools.py +44 -0
tools/tools.py +79 -0

app.py CHANGED Viewed

@@ -1,18 +1,13 @@
-from data_sources import process_data_upload
-from functions import example_question_generator, chatbot_with_fc
 from utils import TEMP_DIR, message_dict
 import gradio as gr
-import ast
 import os
 from getpass import getpass
 from dotenv import load_dotenv
 load_dotenv()
-if "OPENAI_API_KEY" not in os.environ:
-    os.environ["OPENAI_API_KEY"] = getpass("Enter OpenAI API key:")
 def delete_db(req: gr.Request):
     import shutil
     dir_path = TEMP_DIR / str(req.session_hash)
@@ -20,102 +15,72 @@ def delete_db(req: gr.Request):
         shutil.rmtree(dir_path)
         message_dict[req.session_hash] = None
-def run_example(input):
-    return input
-def example_display(input):
-    if input == None:
-        display = True
-    else:
-        display = False
-    return [gr.update(visible=display),gr.update(visible=display)]
 css= ".file_marker .large{min-height:50px !important;} .example_btn{max-width:300px;} .padding{padding:0;}"
-with gr.Blocks(css=css, delete_cache=(3600,3600)) as demo:
-    title = gr.HTML("<h1 style='text-align:center;'>Virtual Data Analyst</h1>")
-    description = gr.HTML("""<p style='text-align:center;'>A helpful tool for data analysis, visualizations, regressions, and more.
-                          Upload a data file and chat with our virtual data analyst to get insights on your data set.
-                          Try a sample file to get started!</p>
-                          <ul style="margin:auto;max-width: 500px;">
-                          <li style="margin:0;line-height:1;">Currently accepts CSV, TSV, TXT, XLS, XLSX, XML, and JSON files.</li>
-                          <li style="margin:0;line-height:1;">Can run SQL queries, linear regressions, and analyze the results.</li>
-                          <li style="margin:0;line-height:1;">Can generate scatter plots, line charts, pie charts, bar graphs, histograms, time series, and more.
-                          New visualizations types added regularly.</li>
-                          </ul>
-                          <p style='text-align:center;'>This application is under active development. If you experience bugs with use,
-                          open a discussion in the community tab and I will respond.</p>""")
-    example_file_1 = gr.File(visible=False, value="samples/bank_marketing_campaign.csv")
-    example_file_2 = gr.File(visible=False, value="samples/online_retail_data.csv")
-    with gr.Row():
-        example_btn_1 = gr.Button(value="Try Me: bank_marketing_campaign.csv", elem_classes="example_btn", size="md", variant="primary")
-        example_btn_2 = gr.Button(value="Try Me: online_retail_data.csv", elem_classes="example_btn", size="md", variant="primary")
-    file_output = gr.File(label="Data File (CSV, TSV, TXT, XLS, XLSX, XML, JSON)", show_label=True, elem_classes="file_marker", file_types=['.csv','.xlsx','.txt','.json','.ndjson','.xml','.xls','.tsv'])
-    example_btn_1.click(fn=run_example, inputs=example_file_1, outputs=file_output)
-    example_btn_2.click(fn=run_example, inputs=example_file_2, outputs=file_output)
-    file_output.change(fn=example_display, inputs=file_output, outputs=[example_btn_1, example_btn_2])
-    @gr.render(inputs=file_output)
-    def data_options(filename, request: gr.Request):
-        print(filename)
-        message_dict[request.session_hash] = None
-        if filename:
-            process_message = process_upload(filename, request.session_hash)
-            gr.HTML(value=process_message[1], padding=False)
-            if process_message[0] == "success":
-                if "bank_marketing_campaign" in filename:
-                    example_questions = [
-                                            ["Describe the dataset"],
-                                            ["What levels of education have the highest and lowest average balance?"],
-                                            ["What job is most and least common for a yes response from the individuals, not counting 'unknown'?"],
-                                            ["Can you generate a bar chart of education vs. average balance?"],
-                                            ["Can you generate a table of levels of education versus average balance, percent married, percent with a loan, and percent in default?"],
-                                            ["Can we predict the relationship between the number of contacts performed before this campaign and the average balance?"],
-                                            ["Can you plot the number of contacts performed before this campaign versus the duration and use balance as the size in a bubble chart?"]
-                                        ]
-                elif "online_retail_data" in filename:
-                    example_questions = [
-                                            ["Describe the dataset"],
-                                            ["What month had the highest revenue?"],
-                                            ["Is revenue higher in the morning or afternoon?"],
-                                            ["Can you generate a line graph of revenue per month?"],
-                                            ["Can you generate a table of revenue per month?"],
-                                            ["Can we predict how time of day affects transaction value in this data set?"],
-                                            ["Can you plot revenue per month with size being the number of units sold that month in a bubble chart?"]
-                                        ]
-                else:
-                    try:
-                        generated_examples = ast.literal_eval(example_question_generator(request.session_hash))
-                        example_questions = [
-                                                ["Describe the dataset"]
-                                            ]
-                        for example in generated_examples:
-                            example_questions.append([example])
-                    except:
-                        example_questions = [
-                                            ["Describe the dataset"],
-                                            ["List the columns in the dataset"],
-                                            ["What could this data be used for?"],
-                                        ]
-                parameters = gr.Textbox(visible=False, value=request.session_hash)
-                bot = gr.Chatbot(type='messages', label="CSV Chat Window", render_markdown=True, sanitize_html=False, show_label=True, render=False, visible=True, elem_classes="chatbot")
-                chat = gr.ChatInterface(
-                                    fn=chatbot_with_fc,
-                                    type='messages',
-                                    chatbot=bot,
-                                    title="Chat with your data file",
-                                    concurrency_limit=None,
-                                    examples=example_questions,
-                                    additional_inputs=parameters
-                                    )
-    def process_upload(upload_value, session_hash):
-        if upload_value:
-            process_message = process_data_upload(upload_value, session_hash)
-        return process_message
     demo.unload(delete_db)
 ## Uncomment the line below to launch the chat app with UI
-demo.launch(debug=True, allowed_paths=["temp/"])

 from utils import TEMP_DIR, message_dict
 import gradio as gr
+import data_file, sql_db
 import os
 from getpass import getpass
 from dotenv import load_dotenv
 load_dotenv()
 def delete_db(req: gr.Request):
     import shutil
     dir_path = TEMP_DIR / str(req.session_hash)
         shutil.rmtree(dir_path)
         message_dict[req.session_hash] = None
+if "OPENAI_API_KEY" not in os.environ:
+    os.environ["OPENAI_API_KEY"] = getpass("Enter OpenAI API key:")
 css= ".file_marker .large{min-height:50px !important;} .example_btn{max-width:300px;} .padding{padding:0;}"
+head = """<meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>Virtual Data Analyst</title>
+    <!-- Tailwind CSS -->
+    <script src="https://cdn.tailwindcss.com"></script>
+    <!-- Google Fonts -->
+    <link href="https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700&display=swap" rel="stylesheet">
+    <!-- Font Awesome -->
+    <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.0.0-beta3/css/all.min.css">
+    <!-- Custom Styles -->
+    <link rel="stylesheet" href="/gradio_api/file=assets/styles.css">
+    """
+theme = gr.themes.Base(primary_hue="sky", secondary_hue="slate",font=[gr.themes.GoogleFont("Inter"), "Inter", "sans-serif"]).set(
+    button_primary_background_fill="#3B82F6",
+    button_secondary_background_fill="#6B7280",
+)
+from pathlib import Path
+gr.set_static_paths(paths=[Path.cwd().absolute()/"assets"])
+with gr.Blocks(theme=theme, css=css, head=head, delete_cache=(3600,3600)) as demo:
+    header = gr.HTML("""
+                        <!-- Header -->
+                        <header class="max-w-4xl mx-auto mb-12 text-center">
+                            <h1 class="text-4xl font-bold text-gray-900 mb-4">Virtual Data Analyst</h1>
+                            <p class="text-lg text-gray-600 mb-6">
+                                A powerful tool for data analysis, visualizations, and insights
+                            </p>
+                        </header>
+                        <!-- Main Content -->
+                        <main class="max-w-4xl mx-auto">
+                            <!-- Features Preview -->
+                            <div class="mt-12 grid md:grid-cols-3 gap-6" style="margin-bottom:3px !important;">
+                                <div class="feature-card bg-white p-6 rounded-lg shadow-md">
+                                    <i class="feature-icon fas fa-chart-line text-primary text-2xl mb-4"></i>
+                                    <h3 class="font-semibold text-gray-800 mb-2">Advanced Analytics</h3>
+                                    <p class="text-gray-600 text-sm">Run SQL queries, perform regressions, and analyze results with ease</p>
+                                </div>
+                                <div class="feature-card bg-white p-6 rounded-lg shadow-md">
+                                    <i class="feature-icon fas fa-chart-pie text-primary text-2xl mb-4"></i>
+                                    <h3 class="font-semibold text-gray-800 mb-2">Rich Visualizations</h3>
+                                    <p class="text-gray-600 text-sm">Create scatter plots, line charts, pie charts, and more</p>
+                                </div>
+                                <div class="feature-card bg-white p-6 rounded-lg shadow-md">
+                                    <i class="feature-icon fas fa-magic text-primary text-2xl mb-4"></i>
+                                    <h3 class="font-semibold text-gray-800 mb-2">Automated Insights</h3>
+                                    <p class="text-gray-600 text-sm">Get instant insights and recommendations for your data</p>
+                                </div>
+                            </div>
+                        </main>""")
+    #with gr.Tab("Data File"):
+    data_file.demo.render()
+    #with gr.Tab("SQL Database"):
+    #    sql_db.demo.render()
+    footer = gr.HTML("""<!-- Footer -->
+        <footer class="max-w-4xl mx-auto mt-12 text-center text-gray-500 text-sm">
+            <p>This application is under active development. For bugs or feedback, please open a discussion in the community tab.</p>
+        </footer>""")
     demo.unload(delete_db)
 ## Uncomment the line below to launch the chat app with UI
+demo.launch(debug=True, allowed_paths=["temp/","assets/"])

assets/styles.css ADDED Viewed

	@@ -0,0 +1,168 @@

+/* Loading Animation */
+.loading-spinner {
+    display: none;
+    width: 50px;
+    height: 50px;
+    border: 5px solid #f3f3f3;
+    border-top: 5px solid #3B82F6;
+    border-radius: 50%;
+    animation: spin 1s linear infinite;
+    margin: 0 auto;
+}
+@keyframes spin {
+    0% { transform: rotate(0deg); }
+    100% { transform: rotate(360deg); }
+}
+/* File Upload Progress */
+.progress-bar {
+    width: 100%;
+    height: 6px;
+    background-color: #e5e7eb;
+    border-radius: 3px;
+    overflow: hidden;
+    display: none;
+    margin: 1rem auto;
+    max-width: 300px;
+}
+.progress-bar-fill {
+    height: 100%;
+    background-color: #3B82F6;
+    width: 0%;
+    transition: width 0.3s ease;
+}
+/* Tooltip */
+.tooltip {
+    position: relative;
+    display: inline-block;
+}
+.tooltip .tooltip-text {
+    visibility: hidden;
+    background-color: #1f2937;
+    color: white;
+    text-align: center;
+    padding: 8px 12px;
+    border-radius: 6px;
+    position: absolute;
+    z-index: 1;
+    bottom: 125%;
+    left: 50%;
+    transform: translateX(-50%);
+    opacity: 0;
+    transition: opacity 0.3s;
+    font-size: 0.875rem;
+    white-space: nowrap;
+    box-shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.1);
+}
+.tooltip:hover .tooltip-text {
+    visibility: visible;
+    opacity: 1;
+}
+/* File Type Icons */
+.file-type-icon {
+    font-size: 1.5rem;
+    margin-right: 0.5rem;
+    color: #3B82F6;
+}
+/* Success Animation */
+@keyframes checkmark {
+    0% { transform: scale(0); opacity: 0; }
+    50% { transform: scale(1.2); opacity: 0.8; }
+    100% { transform: scale(1); opacity: 1; }
+}
+.success-checkmark {
+    display: none;
+    color: #10B981;
+    animation: checkmark 0.5s ease-in-out forwards;
+}
+/* Sample Data Cards */
+.sample-btn {
+    transition: all 0.3s ease;
+    position: relative;
+    overflow: hidden;
+}
+.sample-btn::after {
+    content: '';
+    position: absolute;
+    top: 0;
+    left: 0;
+    width: 100%;
+    height: 100%;
+    background: linear-gradient(rgba(255,255,255,0.1), rgba(255,255,255,0));
+    transform: translateY(-100%);
+    transition: transform 0.3s ease;
+}
+.sample-btn:hover::after {
+    transform: translateY(0);
+}
+.sample-btn:hover {
+    transform: translateY(-2px);
+    box-shadow: 0 8px 15px rgba(0,0,0,0.1);
+}
+/* Drop Zone Enhancements */
+.drop-zone {
+    transition: all 0.3s ease;
+    position: relative;
+    overflow: hidden;
+}
+.drop-zone::before {
+    content: '';
+    position: absolute;
+    top: 0;
+    left: 0;
+    right: 0;
+    bottom: 0;
+    border-radius: 8px;
+    border: 2px dashed #3B82F6;
+    opacity: 0;
+    transition: opacity 0.3s ease;
+}
+.drop-zone:hover::before {
+    opacity: 1;
+}
+/* File Info Card */
+#fileInfo {
+    background: linear-gradient(to right, #f8fafc, #f1f5f9);
+    border: 1px solid #e2e8f0;
+    transition: all 0.3s ease;
+}
+#fileInfo:hover {
+    transform: translateY(-2px);
+    box-shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.1);
+}
+/* Features Section */
+.feature-card {
+    transition: all 0.3s ease;
+}
+.feature-card:hover {
+    transform: translateY(-2px);
+    box-shadow: 0 8px 15px rgba(0,0,0,0.1);
+}
+.feature-icon {
+    transition: all 0.3s ease;
+}
+.feature-card:hover .feature-icon {
+    transform: scale(1.1);
+    color: #2563eb;
+}

data_file.py ADDED Viewed

	@@ -0,0 +1,126 @@

+import gradio as gr
+from functions import example_question_generator, chatbot_with_fc
+from data_sources import process_data_upload
+from utils import message_dict
+import ast
+def run_example(input):
+    return input
+def example_display(input):
+    if input == None:
+        display = True
+    else:
+        display = False
+    return [gr.update(visible=display),gr.update(visible=display),gr.update(visible=display)]
+with gr.Blocks() as demo:
+    description = gr.HTML("""
+                        <!-- Header -->
+                        <div class="max-w-4xl mx-auto mb-12 text-center">
+                            <div class="bg-blue-50 border border-blue-200 rounded-lg max-w-2xl mx-auto">
+                                <h2 class="font-semibold text-blue-800 ">
+                                    <i class="fas fa-info-circle mr-2"></i>Supported Files
+                                </h2>
+                                <div class="flex flex-wrap justify-center gap-3 pb-4 text-blue-700">
+                                    <span class="tooltip">
+                                        <i class="fas fa-file-csv mr-1"></i>CSV
+                                        <span class="tooltip-text">Comma-separated values</span>
+                                    </span>
+                                    <span class="tooltip">
+                                        <i class="fas fa-file-alt mr-1"></i>TSV
+                                        <span class="tooltip-text">Tab-separated values</span>
+                                    </span>
+                                    <span class="tooltip">
+                                        <i class="fas fa-file-alt mr-1"></i>TXT
+                                        <span class="tooltip-text">Text files</span>
+                                    </span>
+                                    <span class="tooltip">
+                                        <i class="fas fa-file-excel mr-1"></i>XLS/XLSX
+                                        <span class="tooltip-text">Excel spreadsheets</span>
+                                    </span>
+                                    <span class="tooltip">
+                                        <i class="fas fa-file-code mr-1"></i>XML
+                                        <span class="tooltip-text">XML documents</span>
+                                    </span>
+                                    <span class="tooltip">
+                                        <i class="fas fa-file-code mr-1"></i>JSON
+                                        <span class="tooltip-text">JSON data files</span>
+                                    </span>
+                                </div>
+                            </div>
+                        </div>
+                          """)
+    example_file_1 = gr.File(visible=False, value="samples/bank_marketing_campaign.csv")
+    example_file_2 = gr.File(visible=False, value="samples/online_retail_data.csv")
+    with gr.Row():
+        example_btn_1 = gr.Button(value="Try Me: bank_marketing_campaign.csv", elem_classes="example_btn sample-btn bg-gradient-to-r from-purple-500 to-indigo-600 text-white p-6 rounded-lg text-left hover:shadow-lg", size="md", variant="primary")
+        example_btn_2 = gr.Button(value="Try Me: online_retail_data.csv", elem_classes="example_btn sample-btn bg-gradient-to-r from-purple-500 to-indigo-600 text-white p-6 rounded-lg text-left hover:shadow-lg", size="md", variant="primary")
+    file_output = gr.File(label="Data File (CSV, TSV, TXT, XLS, XLSX, XML, JSON)", show_label=True, elem_classes="file_marker drop-zone border-2 border-dashed border-gray-300 rounded-lg hover:border-primary cursor-pointer bg-gray-50 hover:bg-blue-50 transition-colors duration-300", file_types=['.csv','.xlsx','.txt','.json','.ndjson','.xml','.xls','.tsv'])
+    example_btn_1.click(fn=run_example, inputs=example_file_1, outputs=file_output)
+    example_btn_2.click(fn=run_example, inputs=example_file_2, outputs=file_output)
+    file_output.change(fn=example_display, inputs=file_output, outputs=[example_btn_1, example_btn_2, description])
+    @gr.render(inputs=file_output)
+    def data_options(filename, request: gr.Request):
+        print(filename)
+        message_dict[request.session_hash] = None
+        if filename:
+            process_message = process_upload(filename, request.session_hash)
+            gr.HTML(value=process_message[1], padding=False)
+            if process_message[0] == "success":
+                if "bank_marketing_campaign" in filename:
+                    example_questions = [
+                                            ["Describe the dataset"],
+                                            ["What levels of education have the highest and lowest average balance?"],
+                                            ["What job is most and least common for a yes response from the individuals, not counting 'unknown'?"],
+                                            ["Can you generate a bar chart of education vs. average balance?"],
+                                            ["Can you generate a table of levels of education versus average balance, percent married, percent with a loan, and percent in default?"],
+                                            ["Can we predict the relationship between the number of contacts performed before this campaign and the average balance?"],
+                                            ["Can you plot the number of contacts performed before this campaign versus the duration and use balance as the size in a bubble chart?"]
+                                        ]
+                elif "online_retail_data" in filename:
+                    example_questions = [
+                                            ["Describe the dataset"],
+                                            ["What month had the highest revenue?"],
+                                            ["Is revenue higher in the morning or afternoon?"],
+                                            ["Can you generate a line graph of revenue per month?"],
+                                            ["Can you generate a table of revenue per month?"],
+                                            ["Can we predict how time of day affects transaction value in this data set?"],
+                                            ["Can you plot revenue per month with size being the number of units sold that month in a bubble chart?"]
+                                        ]
+                else:
+                    try:
+                        generated_examples = ast.literal_eval(example_question_generator(request.session_hash))
+                        example_questions = [
+                                                ["Describe the dataset"]
+                                            ]
+                        for example in generated_examples:
+                            example_questions.append([example])
+                    except:
+                        example_questions = [
+                                            ["Describe the dataset"],
+                                            ["List the columns in the dataset"],
+                                            ["What could this data be used for?"],
+                                        ]
+                parameters = gr.Textbox(visible=False, value=request.session_hash)
+                bot = gr.Chatbot(type='messages', label="CSV Chat Window", render_markdown=True, sanitize_html=False, show_label=True, render=False, visible=True, elem_classes="chatbot")
+                chat = gr.ChatInterface(
+                                    fn=chatbot_with_fc,
+                                    type='messages',
+                                    chatbot=bot,
+                                    title="Chat with your data file",
+                                    concurrency_limit=None,
+                                    examples=example_questions,
+                                    additional_inputs=parameters
+                                    )
+    def process_upload(upload_value, session_hash):
+        if upload_value:
+            process_message = process_data_upload(upload_value, session_hash)
+        return process_message
+if __name__ == "__main__":
+    demo.launch()

data_sources/upload_file.py CHANGED Viewed

@@ -68,7 +68,7 @@ def process_data_upload(data_file, session_hash):
                 pattern = 'year|month|date|day|time'
                 if re.search(pattern, column.lower()):
                     try:
-                        df[column] = pd.to_datetime(df[column], infer_datetime_format=True)
                     except:
                         pass
             if df[column].dtype == 'object' and isinstance(df[column].iloc[0], list):

                 pattern = 'year|month|date|day|time'
                 if re.search(pattern, column.lower()):
                     try:
+                        df[column] = pd.to_datetime(df[column])
                     except:
                         pass
             if df[column].dtype == 'object' and isinstance(df[column].iloc[0], list):

functions/chat_functions.py CHANGED Viewed

@@ -38,7 +38,7 @@ def example_question_generator(session_hash):
 def chatbot_with_fc(message, history, session_hash):
     from functions import sqlite_query_func, table_generation_func, regression_func, scatter_chart_generation_func, \
         line_chart_generation_func,bar_chart_generation_func,pie_chart_generation_func,histogram_generation_func
-    import tools
     available_functions = {"sql_query_func": sqlite_query_func,"table_generation_func":table_generation_func,
                            "line_chart_generation_func":line_chart_generation_func,"bar_chart_generation_func":bar_chart_generation_func,
@@ -64,7 +64,7 @@ def chatbot_with_fc(message, history, session_hash):
         messages.append(ChatMessage.from_user(message))
         message_dict[session_hash] = messages
-    response = chat_generator.run(messages=message_dict[session_hash], generation_kwargs={"tools": tools.tools_call(session_hash)})
     while True:
         # if OpenAI response is a tool call
@@ -82,12 +82,11 @@ def chatbot_with_fc(message, history, session_hash):
                 print(function_name)
                 ## Append function response to the messages list using `ChatMessage.from_tool`
                 message_dict[session_hash].append(ChatMessage.from_tool(tool_result=function_response['reply'], origin=function_call))
-                response = chat_generator.run(messages=message_dict[session_hash], generation_kwargs={"tools": tools.tools_call(session_hash)})
         # Regular Conversation
         else:
             message_dict[session_hash].append(response["replies"][0])
             break
-    return response["replies"][0].text

 def chatbot_with_fc(message, history, session_hash):
     from functions import sqlite_query_func, table_generation_func, regression_func, scatter_chart_generation_func, \
         line_chart_generation_func,bar_chart_generation_func,pie_chart_generation_func,histogram_generation_func
+    import tools.tools as tools
     available_functions = {"sql_query_func": sqlite_query_func,"table_generation_func":table_generation_func,
                            "line_chart_generation_func":line_chart_generation_func,"bar_chart_generation_func":bar_chart_generation_func,
         messages.append(ChatMessage.from_user(message))
         message_dict[session_hash] = messages
+    response = chat_generator.run(messages=message_dict[session_hash], generation_kwargs={"tools": tools.data_file_tools_call(session_hash)})
     while True:
         # if OpenAI response is a tool call
                 print(function_name)
                 ## Append function response to the messages list using `ChatMessage.from_tool`
                 message_dict[session_hash].append(ChatMessage.from_tool(tool_result=function_response['reply'], origin=function_call))
+                response = chat_generator.run(messages=message_dict[session_hash], generation_kwargs={"tools": tools.data_file_tools_call(session_hash)})
         # Regular Conversation
         else:
             message_dict[session_hash].append(response["replies"][0])
             break
+    return response["replies"][0].text

tools.py → tools/chart_tools.py RENAMED Viewed

@@ -1,43 +1,5 @@
-import sqlite3
-from utils import TEMP_DIR
-def tools_call(session_hash):
-    dir_path = TEMP_DIR / str(session_hash)
-    connection = sqlite3.connect(f'{dir_path}/data_source.db')
-    print("Querying Database in Tools.py");
-    cur=connection.execute('select * from data_source')
-    columns = [i[0] for i in cur.description]
-    print("COLUMNS 2")
-    print(columns)
-    cur.close()
-    connection.close()
-    column_string = (columns[:625] + '..') if len(columns) > 625 else columns
-    return [
-        {
-            "type": "function",
-            "function": {
-                "name": "sql_query_func",
-                "description": f"""This is a tool useful to query a SQLite table called 'data_source' with the following Columns: {column_string}.
-                There may also be more columns in the table if the number of columns is too large to process.
-                This function also saves the results of the query to csv file called query.csv.""",
-                "parameters": {
-                    "type": "object",
-                    "properties": {
-                        "queries": {
-                            "type": "array",
-                            "description": "The query to use in the search. Infer this from the user's message. It should be a question or a statement",
-                            "items": {
-                                "type": "string",
-                            }
-                        }
-                    },
-                    "required": ["queries"],
-                },
-            },
-        },
-        {
         "type": "function",
             "function": {
                 "name": "scatter_chart_generation_func",
@@ -84,9 +46,9 @@ def tools_call(session_hash):
                         "trendline": {
                             "type": "string",
                             "description": f"""An optional field to specify the type of plotly trendline we wish to use in the scatter plot.
-                             This trendline value can be one of ['ols','lowess','rolling','ewm','expanding'].
-                             Do not send any values outside of this array as the function will fail.
-                             Infer this from the user's message.""",
                             "items": {
                                 "type": "string",
                             }
@@ -103,9 +65,9 @@ def tools_call(session_hash):
                         "marginal_x": {
                             "type": "string",
                             "description": f"""The type of marginal distribution plot we'd like to specify for the plotly scatter plot for the x axis.
-                             This marginal_x value can be one of ['histogram','rug','box','violin'].
-                             Do not send any values outside of this array as the function will fail.
-                             Infer this from the user's message.""",
                             "items": {
                                 "type": "string",
                             }
@@ -113,9 +75,9 @@ def tools_call(session_hash):
                         "marginal_y": {
                             "type": "string",
                             "description": f"""The type of marginal distribution plot we'd like to specify for the plotly scatter plot for the y axis.
-                             This marginal_y value can be one of ['histogram','rug','box','violin'].
-                             Do not send any values outside of this array as the function will fail.
-                             Infer this from the user's message.""",
                             "items": {
                                 "type": "string",
                             }
@@ -376,7 +338,7 @@ def tools_call(session_hash):
                             "type": "string",
                             "description": f"""An optional value that represents the function of data to compute the function which is used on the optional y column.
                             This histfunc value can be one of ['avg','sum','count'].
-                             Do not send any values outside of this array as the function will fail.""",
                             "items": {
                                 "type": "string",
                             }
@@ -405,47 +367,5 @@ def tools_call(session_hash):
                 from the table_generation_func function in any way and always display the iframe fully to the user in the chat window.""",
                 "parameters": {},
             },
-        },
-        {
-        "type": "function",
-            "function": {
-                "name": "regression_func",
-                "description": f"""This a tool to calculate regressions on our SQLite table called 'data_source'.
-                We can run queries with our 'sql_query_func' function and they will be available to use in this function via the query.csv file that is generated.
-                Returns a dictionary of values that includes a regression_summary and a regression chart (which is an iframe displaying the
-                linear regression in chart form and should be shown to the user).""",
-                "parameters": {
-                    "type": "object",
-                    "properties": {
-                        "independent_variables": {
-                            "type": "array",
-                            "description": f"""An array of strings that states the independent variables in our data set which should be column names in our query.csv file that is generated
-                            in the 'sql_query_func' function. This will allow us to identify the data to use for our independent variables.
-                            Infer this from the user's message.""",
-                            "items": {
-                                "type": "string",
-                            }
-                        },
-                        "dependent_variable": {
-                            "type": "string",
-                            "description": f"""A string that states the dependent variables in our data set which should be a column name in our query.csv file that is generated
-                            in the 'sql_query_func' function. This will allow us to identify the data to use for our dependent variables.
-                            Infer this from the user's message.""",
-                            "items": {
-                                "type": "string",
-                            }
-                        },
-                        "category": {
-                            "type": "string",
-                            "description": f"""An optional column in our query.csv file that contain a parameter that will define the category for the data.
-                            Do not send value if no category is needed or specified. This category must be present in our query.csv file to be valid.""",
-                            "items": {
-                                "type": "string",
-                            }
-                        }
-                    },
-                    "required": ["independent_variables","dependent_variable"],
-                },
-            },
         }
-    ]

+chart_tools = [
+    {
         "type": "function",
             "function": {
                 "name": "scatter_chart_generation_func",
                         "trendline": {
                             "type": "string",
                             "description": f"""An optional field to specify the type of plotly trendline we wish to use in the scatter plot.
+                                This trendline value can be one of ['ols','lowess','rolling','ewm','expanding'].
+                                Do not send any values outside of this array as the function will fail.
+                                Infer this from the user's message.""",
                             "items": {
                                 "type": "string",
                             }
                         "marginal_x": {
                             "type": "string",
                             "description": f"""The type of marginal distribution plot we'd like to specify for the plotly scatter plot for the x axis.
+                                This marginal_x value can be one of ['histogram','rug','box','violin'].
+                                Do not send any values outside of this array as the function will fail.
+                                Infer this from the user's message.""",
                             "items": {
                                 "type": "string",
                             }
                         "marginal_y": {
                             "type": "string",
                             "description": f"""The type of marginal distribution plot we'd like to specify for the plotly scatter plot for the y axis.
+                                This marginal_y value can be one of ['histogram','rug','box','violin'].
+                                Do not send any values outside of this array as the function will fail.
+                                Infer this from the user's message.""",
                             "items": {
                                 "type": "string",
                             }
                             "type": "string",
                             "description": f"""An optional value that represents the function of data to compute the function which is used on the optional y column.
                             This histfunc value can be one of ['avg','sum','count'].
+                                Do not send any values outside of this array as the function will fail.""",
                             "items": {
                                 "type": "string",
                             }
                 from the table_generation_func function in any way and always display the iframe fully to the user in the chat window.""",
                 "parameters": {},
             },
         }
+]

tools/stats_tools.py ADDED Viewed

	@@ -0,0 +1,44 @@

+stats_tools = [
+        {
+        "type": "function",
+            "function": {
+                "name": "regression_func",
+                "description": f"""This a tool to calculate regressions on our SQLite table called 'data_source'.
+                We can run queries with our 'sql_query_func' function and they will be available to use in this function via the query.csv file that is generated.
+                Returns a dictionary of values that includes a regression_summary and a regression chart (which is an iframe displaying the
+                linear regression in chart form and should be shown to the user).""",
+                "parameters": {
+                    "type": "object",
+                    "properties": {
+                        "independent_variables": {
+                            "type": "array",
+                            "description": f"""An array of strings that states the independent variables in our data set which should be column names in our query.csv file that is generated
+                            in the 'sql_query_func' function. This will allow us to identify the data to use for our independent variables.
+                            Infer this from the user's message.""",
+                            "items": {
+                                "type": "string",
+                            }
+                        },
+                        "dependent_variable": {
+                            "type": "string",
+                            "description": f"""A string that states the dependent variables in our data set which should be a column name in our query.csv file that is generated
+                            in the 'sql_query_func' function. This will allow us to identify the data to use for our dependent variables.
+                            Infer this from the user's message.""",
+                            "items": {
+                                "type": "string",
+                            }
+                        },
+                        "category": {
+                            "type": "string",
+                            "description": f"""An optional column in our query.csv file that contain a parameter that will define the category for the data.
+                            Do not send value if no category is needed or specified. This category must be present in our query.csv file to be valid.""",
+                            "items": {
+                                "type": "string",
+                            }
+                        }
+                    },
+                    "required": ["independent_variables","dependent_variable"],
+                },
+            },
+        }
+]

tools/tools.py ADDED Viewed

	@@ -0,0 +1,79 @@

+import sqlite3
+from .stats_tools import stats_tools
+from .chart_tools import chart_tools
+from utils import TEMP_DIR
+def data_file_tools_call(session_hash):
+    dir_path = TEMP_DIR / str(session_hash)
+    connection = sqlite3.connect(f'{dir_path}/data_source.db')
+    print("Querying Database in Tools.py");
+    cur=connection.execute('select * from data_source')
+    columns = [i[0] for i in cur.description]
+    print("COLUMNS 2")
+    print(columns)
+    cur.close()
+    connection.close()
+    column_string = (columns[:625] + '..') if len(columns) > 625 else columns
+    tools_calls = [
+        {
+            "type": "function",
+            "function": {
+                "name": "sql_query_func",
+                "description": f"""This is a tool useful to query a SQLite table called 'data_source' with the following Columns: {column_string}.
+                There may also be more columns in the table if the number of columns is too large to process.
+                This function also saves the results of the query to csv file called query.csv.""",
+                "parameters": {
+                    "type": "object",
+                    "properties": {
+                        "queries": {
+                            "type": "array",
+                            "description": "The query to use in the search. Infer this from the user's message. It should be a question or a statement",
+                            "items": {
+                                "type": "string",
+                            }
+                        }
+                    },
+                    "required": ["queries"],
+                },
+            },
+        },
+    ]
+    tools_calls.extend(chart_tools)
+    tools_calls.extend(stats_tools)
+    return tools_calls
+def graphql_tools_call(sessions_hash):
+    tools_calls = [
+        {
+            "type": "function",
+            "function": {
+                "name": "graphql_query_func",
+                "description": f"""This is a tool useful to query a GraphQL endpoint with the following Columns: {column_string}.
+                There may also be more columns in the table if the number of columns is too large to process.
+                This function also saves the results of the query to csv file called query.csv.""",
+                "parameters": {
+                    "type": "object",
+                    "properties": {
+                        "queries": {
+                            "type": "array",
+                            "description": "The graphQL query to use in the search. Infer this from the user's message. It should be a question or a statement",
+                            "items": {
+                                "type": "string",
+                            }
+                        }
+                    },
+                    "required": ["queries"],
+                },
+            },
+        },
+    ]
+    tools_calls.append(chart_tools)
+    tools_calls.append(stats_tools)
+    return