NEXAS commited on
Commit
182219d
β€’
1 Parent(s): 496ca86

Upload 22 files

Browse files
DV-AGENT/Student.csv ADDED
The diff for this file is too large to render. See raw diff
 
DV-AGENT/Student_copy.csv ADDED
The diff for this file is too large to render. See raw diff
 
DV-AGENT/__pycache__/agent_chain.cpython-310.pyc ADDED
Binary file (2.11 kB). View file
 
DV-AGENT/__pycache__/default_text.cpython-310.pyc ADDED
Binary file (193 Bytes). View file
 
DV-AGENT/__pycache__/generate_plot.cpython-310.pyc ADDED
Binary file (4.05 kB). View file
 
DV-AGENT/__pycache__/markup.cpython-310.pyc ADDED
Binary file (2.6 kB). View file
 
DV-AGENT/__pycache__/memory.cpython-310.pyc ADDED
Binary file (459 Bytes). View file
 
DV-AGENT/__pycache__/modules.cpython-310.pyc ADDED
Binary file (1.08 kB). View file
 
DV-AGENT/__pycache__/tools.cpython-310.pyc ADDED
Binary file (3.23 kB). View file
 
DV-AGENT/agent_chain.py ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain.agents import AgentExecutor, create_react_agent
2
+ from langchain.prompts import PromptTemplate
3
+ from memory import memory
4
+ from tools import zeroshot_tools
5
+ import pandas as pd
6
+ import os
7
+ import streamlit as st
8
+ #from langchain_community.llms import HuggingFaceHub
9
+ from typing import List
10
+ from langchain_groq import ChatGroq
11
+ from dotenv import load_dotenv
12
+ load_dotenv()
13
+
14
+ groq_api_key = os.getenv("GROQ_API_KEY")
15
+
16
+ llm1 = ChatGroq(temperature=0, model_name="mixtral-8x7b-32768")
17
+
18
+ def read_first_3_rows():
19
+ dataset_path = "dataset.csv"
20
+ try:
21
+ df = pd.read_csv(dataset_path)
22
+ first_3_rows = df.head(3).to_string(index=False)
23
+ except FileNotFoundError:
24
+ first_3_rows = "Error: Dataset file not found."
25
+
26
+ return first_3_rows
27
+
28
+
29
+ def get_agent_chain():
30
+
31
+
32
+ dataset_first_3_rows = read_first_3_rows()
33
+
34
+ prompt = PromptTemplate(
35
+
36
+ input_variables = ['agent_scratchpad', 'chat_history', 'input', 'tool_names', 'tools'],
37
+ template = ( f"""
38
+ You are a helpful assistant that can help users explore a dataset.
39
+ First 3 rows of the dataset:
40
+ {dataset_first_3_rows}
41
+ ===="""
42
+ """
43
+ TOOLS:
44
+ ------
45
+ You has access to the following tools:
46
+
47
+ {tools}
48
+
49
+ To use a tool, please use the following format:
50
+
51
+ Thought: Do I need to use a tool? Yes
52
+ Action: the action to take, should be one of [{tool_names}]
53
+ Action Input: the input to the action
54
+ Observation: the result of the action
55
+
56
+ When you have a response to say to the Human, or if you do not need to use a tool, you MUST use the format:
57
+
58
+ Thought: Do I need to use a tool? No
59
+ Final Answer: [your response here]
60
+
61
+ Begin!
62
+
63
+ New input: {input}
64
+ {agent_scratchpad}"""
65
+ )
66
+
67
+ )
68
+
69
+
70
+ conversational_agent_llm = llm1
71
+ #conversational_agent_llm = ChatOpenAI(model="gpt-3.5-turbo-16k", temperature=temperature, streaming=True)
72
+ conversational_agent = create_react_agent(conversational_agent_llm, zeroshot_tools, prompt)
73
+ room_selection_chain = AgentExecutor(agent=conversational_agent, tools=zeroshot_tools, verbose=True, memory=memory, handle_parsing_errors=True, max_iterations=4)
74
+ return room_selection_chain
DV-AGENT/app.py ADDED
@@ -0,0 +1,105 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from streamlit_option_menu import option_menu
3
+ from memory import memory_storage
4
+ from agent_chain import get_agent_chain
5
+ from default_text import default_text4
6
+ from generate_plot import generate_plot, retry_generate_plot
7
+ from markup import app_intro, how_use_intro
8
+ from modules import replace_default_dataset, save_uploaded_dataset
9
+ import pandas as pd
10
+ import os
11
+
12
+ if 'error' not in st.session_state:
13
+ st.session_state['error'] = []
14
+
15
+ def tab1():
16
+
17
+ col1, col2 = st.columns([1, 2])
18
+ with col1:
19
+ st.image("image.jpg", use_column_width=True)
20
+ with col2:
21
+ st.markdown(app_intro(), unsafe_allow_html=True)
22
+ st.markdown(how_use_intro(),unsafe_allow_html=True)
23
+
24
+
25
+ #st.markdown("<p style='font-size: 14px; color: #777;'>Disclaimer: This app is a proof-of-concept and may not be suitable for real-world decisions. During the Hackthon period usage information are being recorded using Langsmith</p>", unsafe_allow_html=True)
26
+
27
+
28
+
29
+ def tab2():
30
+
31
+ dataset_option = st.radio("Select Dataset Option", ("Default", "Upload"))
32
+
33
+ if dataset_option == "Default":
34
+ if st.button("Use Default Dataset"):
35
+ replace_default_dataset()
36
+ else:
37
+ uploaded_file = st.file_uploader("Upload a CSV file", type=["csv"])
38
+ if uploaded_file:
39
+ save_uploaded_dataset(uploaded_file)
40
+
41
+ st.header("πŸ—£οΈ Chat")
42
+
43
+ for i, msg in enumerate(memory_storage.messages):
44
+ name = "user" if i % 2 == 0 else "assistant"
45
+ st.chat_message(name).markdown(msg.content)
46
+
47
+ if user_input := st.chat_input("User Input"):
48
+
49
+ with st.chat_message("user"):
50
+ st.markdown(user_input)
51
+
52
+ with st.spinner("Generating Response..."):
53
+
54
+ with st.chat_message("assistant"):
55
+ zeroshot_agent_chain = get_agent_chain()
56
+ response = zeroshot_agent_chain({"input": user_input})
57
+
58
+ answer = response['output']
59
+ st.markdown(answer)
60
+
61
+
62
+ if st.sidebar.button("Clear Chat History"):
63
+ memory_storage.clear()
64
+
65
+
66
+ def tab4():
67
+
68
+ try:
69
+ df = pd.read_csv("dataset.csv")
70
+
71
+ st.header("Dataset Content")
72
+ st.dataframe(df)
73
+
74
+ except FileNotFoundError:
75
+ st.error("File 'dataset.csv' not found in the current directory.")
76
+
77
+ except pd.errors.EmptyDataError:
78
+ st.error("File 'dataset.csv' is empty.")
79
+
80
+ except pd.errors.ParserError:
81
+ st.error("File 'dataset.csv' could not be parsed as a CSV file.")
82
+
83
+ def main():
84
+ st.set_page_config(page_title="NaturalViz", page_icon=":memo:", layout="wide")
85
+
86
+ #os.environ['LANGCHAIN_TRACING_V2'] = "true"
87
+ #os.environ['LANGCHAIN_API_KEY'] == st.secrets['LANGCHAIN_API_KEY']
88
+
89
+ tabs = ["Intro","Chat", "View Dataset"]
90
+
91
+ with st.sidebar:
92
+
93
+ current_tab = option_menu("Select a Tab", tabs, menu_icon="cast")
94
+
95
+ tab_functions = {
96
+ "Intro": tab1,
97
+ "Chat": tab2,
98
+ "View Dataset": tab4,
99
+ }
100
+
101
+ if current_tab in tab_functions:
102
+ tab_functions[current_tab]()
103
+
104
+ if __name__ == "__main__":
105
+ main()
DV-AGENT/app2.py ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import altair as alt
2
+ import pandas as pd
3
+ import streamlit as st
4
+
5
+ # Read the dataset
6
+ df = pd.read_csv('dataset.csv')
7
+
8
+ # Calculate the sum of calories
9
+ calories_sum = df['calories'].sum()
10
+
11
+ # Create a dataframe with the sum of calories
12
+ calories_df = pd.DataFrame({'calories': [calories_sum]}, index=['Total'])
13
+
14
+ # Create the chart
15
+ chart = alt.Chart(calories_df).mark_bar().encode(
16
+ x='calories:Q',
17
+ y='sum(calories):Q'
18
+ )
19
+
20
+ # Display the chart
21
+ st.altair_chart(chart, use_container_width=True)
DV-AGENT/default_text.py ADDED
@@ -0,0 +1 @@
 
 
1
+ default_text4 = """Total number of rows"""
DV-AGENT/generate_plot.py ADDED
@@ -0,0 +1,121 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain.prompts import PromptTemplate
2
+ from langchain.chains import LLMChain
3
+ import pandas as pd
4
+ import os
5
+ import streamlit as st
6
+ #from langchain_community.llms import HuggingFaceHub
7
+ from typing import List
8
+ from langchain_groq import ChatGroq
9
+ from dotenv import load_dotenv
10
+ load_dotenv()
11
+
12
+ groq_api_key = os.getenv("GROQ_API_KEY")
13
+
14
+ llm1 = ChatGroq(temperature=0, model_name="mixtral-8x7b-32768")
15
+
16
+
17
+ def read_first_3_rows():
18
+ dataset_path = "dataset.csv"
19
+ try:
20
+ df = pd.read_csv(dataset_path)
21
+ first_3_rows = df.head(3).to_string(index=False)
22
+ except FileNotFoundError:
23
+ first_3_rows = "Error: Dataset file not found."
24
+
25
+ return first_3_rows
26
+
27
+ def generate_plot(question):
28
+ dataset_first_3_rows = read_first_3_rows()
29
+
30
+ GENERATE_PLOT_TEMPLATE_PREFIX = """You are a high skilled visualization assistant that can modify a provided visualization code based on a set of instructions. You MUST return a full program. DO NOT include any preamble text. Do not include explanations or prose.
31
+ First 3 rows of the dataset:"""
32
+
33
+ DATASET = f"{dataset_first_3_rows}"
34
+
35
+ GENERATE_PLOT_TEMPLATE_SUFIX = """
36
+ Question:
37
+ {question}
38
+
39
+ # comment Example for protein count of different products:
40
+
41
+ import altair as alt
42
+ import pandas as pd
43
+ import streamlit as st
44
+
45
+ # comment Read the dataset
46
+ df = pd.read_csv('dataset.csv')
47
+
48
+ # comment Calculate the protein count of different products
49
+ product_protein = df.groupby('name')['protein'].sum().reset_index()
50
+
51
+ # comment Create the chart
52
+ chart = alt.Chart(product_protein).mark_bar().encode(
53
+ x=alt.X('name:N', title='Product Name'),
54
+ y=alt.Y('protein:Q', title='Protein Count')
55
+ )
56
+
57
+ # comment Display the chart
58
+ st.altair_chart(chart, use_container_width=True)
59
+
60
+ """
61
+
62
+ template = GENERATE_PLOT_TEMPLATE_PREFIX + DATASET + GENERATE_PLOT_TEMPLATE_SUFIX
63
+ prompt = PromptTemplate(template=template, input_variables=['question'])
64
+ llm_chain = LLMChain(prompt=prompt, llm=llm1)
65
+ response = llm_chain.predict(question=question)
66
+ return response
67
+
68
+
69
+
70
+ def retry_generate_plot(question, error_message, error_code):
71
+
72
+ dataset_first_3_rows = read_first_3_rows()
73
+ RETRY_TEMPLATE_PREFIX = """You are a high skilled visualization assistant that can modify a provided visualization code based on a set of instructions. You MUST return a full program. DO NOT include any preamble text. Do not include explanations or prose.
74
+ Current code attempts to create a visualization of dataset.csv to meet the objective. but it has encounted the given error. provide a corrected code. if you are adding comments or explanations they should start with #.
75
+
76
+ #Example:
77
+ import altair as alt
78
+ import pandas as pd
79
+ import streamlit as st
80
+
81
+ # Read the dataset
82
+ df = pd.read_csv('dataset.csv')
83
+
84
+ # Calculate the total social media followers for each region
85
+ region_followers = df.groupby('Region of Focus')[['X (Twitter) Follower #', 'Facebook Follower #', 'Instagram Follower #', 'Threads Follower #', 'YouTube Subscriber #', 'TikTok Subscriber #']].sum().reset_index()
86
+
87
+ # Melt the dataframe to convert it into long format
88
+ region_followers = region_followers.melt(id_vars='Region of Focus', var_name='Social Media', value_name='Total Followers')
89
+
90
+ # Create the chart
91
+ chart = alt.Chart(region_followers).mark_bar().encode(
92
+ x=alt.X('Region of Focus:N', title='Region of Focus'),
93
+ y=alt.Y('Total Followers:Q', title='Total Followers'),
94
+ color=alt.Color('Social Media:N', title='Social Media')
95
+ )
96
+
97
+ # Display the chart
98
+ st.altair_chart(chart, use_container_width=True)
99
+
100
+ First 3 rows of the dataset:"""
101
+ DATASET = f"{dataset_first_3_rows}"
102
+
103
+
104
+ RETRY_TEMPLATE_SUFIX = """
105
+ Objective: {question}
106
+
107
+ Current Code:
108
+ {error_code}
109
+
110
+ Error Message:
111
+ {error_message}
112
+
113
+ Corrected Code:
114
+ """
115
+
116
+ retry_template = RETRY_TEMPLATE_PREFIX + DATASET + RETRY_TEMPLATE_SUFIX
117
+ retry_prompt = PromptTemplate(template=retry_template, input_variables=["question", "error_message, error_code"])
118
+
119
+ llm_chain = LLMChain(prompt=retry_prompt, llm=llm1)
120
+ response = llm_chain.predict(question=question, error_message=error_message, error_code=error_code)
121
+ return response
DV-AGENT/image.jpg ADDED
DV-AGENT/markup.py ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ def app_intro():
2
+ return """
3
+ <div style='text-align: left; padding: 20px; border-radius: 10px;'>
4
+ <h1 style='text-align: center; color: #333;'>Student Performance Query System</h1>
5
+ <h2 style='text-align: center; color: #666;'>Query Student Performance Data with NLP</h2>
6
+
7
+ <p style='font-size: 18px; color: #444;'>Welcome to the Student Performance Query System! This application enables you to query student performance data using natural language processing (NLP) techniques. Originally designed with OpenAI functions, it has now been fully converted to utilize Mistral-8x7B-Instruct.</p>
8
+
9
+ <h3 style='color: #737373;'>Key Features: </h3>
10
+ <ul>
11
+ <li>Translate natural language queries into actionable insights! ✨</li>
12
+ <li>Engage in conversational queries to explore student performance data.</li>
13
+ <li>Powered by Mistral-8x7B-Instruct.</li>
14
+ </ul>
15
+
16
+ <h3 style='color: #737373;'>In the Black Box: βš™οΈ</h3>
17
+ <p style='font-size: 16px;'>This application utilizes the <a href="https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1">Mistral-8x7B-Instruct-v0.1 LLM</a> to interpret your queries and provide insights into student performance data.</p>
18
+
19
+ <h3 style='color: #737373;'>Get Started: </h3>
20
+ <p style='font-size: 16px;'>Ask questions about student performance in plain language and let the system handle the rest! πŸŽ“ The bot is here to assist you. Dataset used: <a href="https://archive.ics.uci.edu/dataset/320/student+performance">Student Performance Data</a>.</p>
21
+ </div>
22
+ """
23
+
24
+
25
+ def how_use_intro():
26
+ return """
27
+ <div style='text-align: left; padding: 20px; border-radius: 10px;'>
28
+ <h2 style='text-align: center; color: #333;'>Getting Started with Student Performance Query System! πŸ“ŠπŸ”</h2>
29
+ <br>
30
+ <h3 style='color: #777;'>How to Use:</h3>
31
+ <ul style='font-size: 16px; color: #555;'>
32
+ <li><b>Query Student Performance:</b> Enter your questions about student performance in the provided input box using natural language. Click "Submit" to get answers and insights.</li>
33
+ <li><b>Interact with the Chatbot:</b> Engage in conversational queries with the chatbot. Ask about specific students, subjects, or trends to explore the dataset interactively.</li>
34
+ </ul>
35
+ <br>
36
+ </div>
37
+ """
DV-AGENT/memory.py ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ from langchain.memory import ConversationBufferWindowMemory
2
+ from langchain_community.chat_message_histories import StreamlitChatMessageHistory
3
+
4
+ memory_storage = StreamlitChatMessageHistory(key="chat_messages")
5
+ memory = ConversationBufferWindowMemory(memory_key="chat_history", human_prefix="User", chat_memory=memory_storage, k=3)
DV-AGENT/modules.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import os
4
+ import shutil
5
+
6
+ def replace_default_dataset():
7
+ # Replace dataset.csv with dataset_backup.csv
8
+ dataset_backup_path = "Student_copy.csv"
9
+ dataset_path = "Student.csv"
10
+
11
+ if os.path.exists(dataset_backup_path):
12
+ shutil.copy(dataset_backup_path, dataset_path)
13
+ st.success("Default dataset applied successfully.")
14
+ else:
15
+ st.warning("Default dataset backup not found.")
16
+
17
+ def save_uploaded_dataset(uploaded_file):
18
+ # Save the uploaded dataset file and replace dataset.csv
19
+ dataset_path = "Student.csv"
20
+ uploaded_file.seek(0)
21
+
22
+ try:
23
+ df = pd.read_csv(uploaded_file)
24
+ df.to_csv(dataset_path, index=False)
25
+ st.success("Dataset uploaded and applied successfully.")
26
+ except pd.errors.EmptyDataError:
27
+ st.warning("Uploaded dataset is empty.")
28
+ except Exception as e:
29
+ st.error(f"An error occurred: {e}")
DV-AGENT/requirements.txt ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ langchain
2
+ langchain_experimental
3
+ matplotlib
4
+ altair
5
+ streamlit
6
+ streamlit_option_menu
7
+ plotly
8
+ tabulate
9
+ langchain_community
10
+ langchain-groq
11
+ python-dotenv
DV-AGENT/tools.py ADDED
@@ -0,0 +1,111 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain_experimental.agents.agent_toolkits import create_csv_agent
2
+ from langchain.agents.agent_types import AgentType
3
+ from langchain.agents import Tool
4
+ from langchain.chains import LLMMathChain
5
+ import streamlit as st
6
+ import pandas as pd
7
+ import plotly.express as px
8
+ import os
9
+ import streamlit as st
10
+ #from langchain_community.llms import HuggingFaceHub
11
+ from typing import List
12
+ from langchain_groq import ChatGroq
13
+ from dotenv import load_dotenv
14
+ load_dotenv()
15
+
16
+ groq_api_key = os.getenv("GROQ_API_KEY")
17
+
18
+ llm1 = ChatGroq(temperature=0, model_name="mixtral-8x7b-32768")
19
+
20
+ def csv_agnet(string):
21
+ agent = create_csv_agent(
22
+ llm1,
23
+ "dataset.csv",
24
+ verbose=True,
25
+ agent_type=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
26
+ )
27
+
28
+ ans = agent.invoke(string)
29
+ return ans
30
+
31
+ #def csv_agnet(string):
32
+ # agent = create_csv_agent(
33
+ # ChatOpenAI(temperature=0, model="gpt-3.5-turbo-0613"),
34
+ # "dataset.csv",
35
+ # verbose=True,
36
+ # agent_type=AgentType.OPENAI_FUNCTIONS,
37
+ # )
38
+
39
+ # ans = agent.run(string)
40
+ # return ans
41
+
42
+ def math_tool(string):
43
+ #llm = OpenAI(temperature=0)
44
+ llm = llm1
45
+ llm_math_chain = LLMMathChain(llm=llm, verbose=True)
46
+ res = llm_math_chain.run(string)
47
+ return res
48
+
49
+ def load_data():
50
+ df = pd.read_csv("dataset.csv", encoding="utf-8")
51
+ return df
52
+
53
+ def plot_visualization(selected_option, x_column, y_column):
54
+ df = load_data()
55
+
56
+ if df.empty:
57
+ return st.warning("The data is empty.")
58
+
59
+ if x_column not in df.columns or y_column not in df.columns:
60
+ return st.warning("Invalid columns selected.")
61
+
62
+ if selected_option == "bar":
63
+ fig = px.bar(df, x=x_column, y=y_column, title=f"{x_column} vs {y_column}")
64
+ elif selected_option == "scatter":
65
+ fig = px.scatter(df, x=x_column, y=y_column, title=f"{x_column} vs {y_column}")
66
+ elif selected_option == "line":
67
+ fig = px.line(df, x=x_column, y=y_column, title=f"{x_column} vs {y_column}")
68
+ elif selected_option == "scatter_matrix":
69
+ fig = px.scatter_matrix(df, dimensions=[x_column, y_column], title=f"Scatter Matrix: {x_column} vs {y_column}")
70
+ elif selected_option == "box":
71
+ fig = px.box(df, x=x_column, y=y_column, title=f"Box Plot: {x_column} vs {y_column}")
72
+ elif selected_option == "heatmap":
73
+ fig = px.imshow(df.pivot_table(index=x_column, columns=y_column, aggfunc='size').fillna(0),
74
+ labels=dict(x=x_column, y=y_column),
75
+ title=f"Heatmap: {x_column} vs {y_column}")
76
+ else:
77
+ return st.warning("Please select a valid plot type.")
78
+
79
+ return st.plotly_chart(fig)
80
+
81
+
82
+ def parsing_input(string):
83
+ selected_option, x_column, y_column = string.split(",")
84
+ return plot_visualization(selected_option, x_column, y_column)
85
+
86
+
87
+ zeroshot_tools = [
88
+ Tool(
89
+ name="answer_qa",
90
+ func=csv_agnet,
91
+ description="Use this tool to query the dataset. input to this tool should be a standalone question. Include the correct row titles that are needed. Example Input format: How many rows are there in the dataset, which name has the highest calories",
92
+ #return_direct=True,
93
+ ),
94
+ Tool(
95
+ name="create_simple_plot",
96
+ func=parsing_input,
97
+ description="""Use this tool if the user asks to create x vs y plots. input must be a comma seperated list of: selected_option, x_column, y_column
98
+ Example Inputs:
99
+ bar,calories,name
100
+
101
+ Allowed options are: bar, line, scatter_matrix, box, heatmap
102
+ you can decide plot type, x colllumn and y collumn based on the user input.
103
+ """,
104
+ #return_direct=True,
105
+ ),
106
+ Tool(
107
+ name="Calculator",
108
+ func=math_tool,
109
+ description="useful when you need to do calculations. Example input: 21^0.43"
110
+ ),
111
+ ]
DV-AGENT/v2/.env ADDED
@@ -0,0 +1 @@
 
 
1
+ GROQ_API_KEY="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"
DV-AGENT/v2/app.py ADDED
@@ -0,0 +1,146 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain_groq import ChatGroq
2
+ from langchain_experimental.agents import create_pandas_dataframe_agent
3
+ import pandas as pd
4
+ import json
5
+ import streamlit as st
6
+ from dotenv import load_dotenv
7
+ import os
8
+ load_dotenv()
9
+
10
+ groq_api_key = os.getenv("GROQ_API_KEY")
11
+ chat_model = ChatGroq(temperature=0, model_name="mixtral-8x7b-32768")
12
+
13
+ def csv_tool(filename : str):
14
+
15
+ df = pd.read_csv(filename)
16
+ return create_pandas_dataframe_agent(chat_model,df, verbose=True)
17
+
18
+ def ask_agent(agent, query):
19
+ """
20
+ Query an agent and return the response as a string.
21
+
22
+ Args:
23
+ agent: The agent to query.
24
+ query: The query to ask the agent.
25
+
26
+ Returns:
27
+ The response from the agent as a string.
28
+ """
29
+ # Prepare the prompt with query guidelines and formatting
30
+ prompt = (
31
+ """
32
+ Let's decode the way to respond to the queries. The responses depend on the type of information requested in the query.
33
+
34
+ 1. If the query requires a table, format your answer like this:
35
+ {"table": {"columns": ["column1", "column2", ...], "data": [[value1, value2, ...], [value1, value2, ...], ...]}}
36
+
37
+ 2. For a bar chart, respond like this:
38
+ {"bar": {"columns": ["A", "B", "C", ...], "data": [25, 24, 10, ...]}}
39
+
40
+ 3. If a line chart is more appropriate, your reply should look like this:
41
+ {"line": {"columns": ["A", "B", "C", ...], "data": [25, 24, 10, ...]}}
42
+
43
+ Note: We only accommodate two types of charts: "bar" and "line".
44
+
45
+ 4. For a plain question that doesn't need a chart or table, your response should be:
46
+ {"answer": "Your answer goes here"}
47
+
48
+ For example:
49
+ {"answer": "The Product with the highest Orders is '15143Exfo'"}
50
+
51
+ 5. If the answer is not known or available, respond with:
52
+ {"answer": "I do not know."}
53
+
54
+ Return all output as a string. Remember to encase all strings in the "columns" list and data list in double quotes.
55
+ For example: {"columns": ["Products", "Orders"], "data": [["51993Masc", 191], ["49631Foun", 152]]}
56
+
57
+ Now, let's tackle the query step by step. Here's the query for you to work on:
58
+ """
59
+ + query
60
+ )
61
+
62
+ # Run the prompt through the agent and capture the response.
63
+ response = agent.run(prompt)
64
+
65
+ # Return the response converted to a string.
66
+ return str(response)
67
+
68
+ def decode_response(response: str) -> dict:
69
+ """This function converts the string response from the model to a dictionary object.
70
+
71
+ Args:
72
+ response (str): response from the model
73
+
74
+ Returns:
75
+ dict: dictionary with response data
76
+ """
77
+ return json.loads(response)
78
+
79
+ def write_answer(response_dict: dict):
80
+ """app
81
+ Write a response from an agent to a Streamlit .
82
+
83
+ Args:
84
+ response_dict: The response from the agent.
85
+
86
+ Returns:
87
+ None.
88
+ """
89
+
90
+ # Check if the response is an answer.
91
+ if "answer" in response_dict:
92
+ st.write(response_dict["answer"])
93
+
94
+ # Check if the response is a bar chart.
95
+ # Check if the response is a bar chart.
96
+ if "bar" in response_dict:
97
+ data = response_dict["bar"]
98
+ try:
99
+ df_data = {
100
+ col: [x[i] if isinstance(x, list) else x for x in data['data']]
101
+ for i, col in enumerate(data['columns'])
102
+ }
103
+ df = pd.DataFrame(df_data)
104
+ df.set_index("Products", inplace=True)
105
+ st.bar_chart(df)
106
+ except ValueError:
107
+ print(f"Couldn't create DataFrame from data: {data}")
108
+
109
+ # Check if the response is a line chart.
110
+ if "line" in response_dict:
111
+ data = response_dict["line"]
112
+ try:
113
+ df_data = {col: [x[i] for x in data['data']] for i, col in enumerate(data['columns'])}
114
+ df = pd.DataFrame(df_data)
115
+ df.set_index("Products", inplace=True)
116
+ st.line_chart(df)
117
+ except ValueError:
118
+ print(f"Couldn't create DataFrame from data: {data}")
119
+
120
+
121
+ # Check if the response is a table.
122
+ if "table" in response_dict:
123
+ data = response_dict["table"]
124
+ df = pd.DataFrame(data["data"], columns=data["columns"])
125
+ st.table(df)
126
+ st.set_page_config(page_title="πŸ‘¨β€πŸ’» Talk with your CSV")
127
+ st.title("πŸ‘¨β€πŸ’» Talk with your CSV")
128
+
129
+ st.write("Please upload your CSV file below.")
130
+
131
+ data = st.file_uploader("Upload a CSV" , type="csv")
132
+
133
+ query = st.text_area("Send a Message")
134
+
135
+ if st.button("Submit Query", type="primary"):
136
+ # Create an agent from the CSV file.
137
+ agent = csv_tool(data)
138
+
139
+ # Query the agent.
140
+ response = ask_agent(agent=agent, query=query)
141
+
142
+ # Decode the response.
143
+ decoded_response = decode_response(response)
144
+
145
+ # Write the response to the Streamlit app.
146
+ write_answer(decoded_response)