NEXAS commited on
Commit
b660019
1 Parent(s): e565936

Upload 3 files

Browse files
Files changed (3) hide show
  1. main.py +74 -0
  2. requirements.txt +10 -0
  3. ui.py +49 -0
main.py ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain_groq import ChatGroq
2
+ from langchain.prompts import PromptTemplate
3
+ from langchain.chains import LLMChain
4
+ from langchain.prompts import PromptTemplate
5
+ from langchain.callbacks.manager import CallbackManager
6
+ from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
7
+ from langchain.agents import Tool
8
+ from langchain_experimental.utilities import PythonREPL # type: ignore
9
+ from langchain_community.chat_models import ChatOllama
10
+ import autopep8 # type: ignore
11
+ import pandas as pd
12
+ import os
13
+
14
+ from dotenv import load_dotenv
15
+ load_dotenv()
16
+
17
+ groq_api_key = os.getenv("GROQ_API_KEY")
18
+
19
+ class datachat():
20
+
21
+ def __init__(self,file_path):
22
+ callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])
23
+
24
+ self.llm = ChatGroq(temperature=0, model_name="mixtral-8x7b-32768",callback_manager=callback_manager)
25
+ self.instruction = """
26
+ As a python coder create a pythonic response for the query with reference to the columns in my pandas dataframe{columns}.
27
+ Instruction:
28
+ Do not write the whole script just give me a pythonic response for this query and do not extend more than asked. Assume a dataframe variable df_temp.
29
+ Enclose the generated code in Markdown code embedding format. Do not generate sample output. Answer the question and provide a one-line explanation and stop.
30
+
31
+ example:
32
+ ```python
33
+ output = df['region'].unique()
34
+ ```
35
+
36
+ question: {input}
37
+
38
+ answer:
39
+
40
+ """
41
+ self.file_path=file_path
42
+
43
+
44
+ def extract_code(self,response):
45
+ start = 0
46
+ q = ""
47
+ temp_block=""
48
+ for line in response.splitlines():
49
+ if '```python' in line and start==0:
50
+ start=1
51
+ if '```' == line.strip() and start==1:
52
+ start =0
53
+ break
54
+ if start ==1 and '```' not in line:
55
+ q=q+'\n'+line
56
+ return q
57
+
58
+
59
+ def data_ops(self,query):
60
+ if os.path.isfile('./data/output.csv'):
61
+ df=pd.read_csv('./data/output.csv')
62
+ else:
63
+ df=pd.read_csv(self.file_path)
64
+ query = query
65
+ columns=df.columns.tolist()
66
+ prompt = PromptTemplate.from_template(self.instruction)
67
+ agent = LLMChain(llm=self.llm,prompt=prompt)
68
+ response = agent.invoke(input={"columns":columns,"input":query})
69
+ response = self.extract_code(response['text'])
70
+ gencode=autopep8.fix_code(response)
71
+ df_temp=df
72
+ exec(gencode)
73
+ df_temp.to_csv('./data/output.csv',index=False)
74
+ return df_temp
requirements.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ #insert python library
2
+ langchain
3
+ autopep8
4
+ langchain_experimental
5
+ langchain-community
6
+ python-dotenv
7
+ langchain-groq
8
+ unstructured[md]
9
+ pandas
10
+ streamlit
ui.py ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from main import datachat as dc
3
+
4
+ data_file = r"C:\Users\Naresh Kumar Lahajal\Desktop\DE-LLM\data\input\world_population_data.csv"
5
+ uploaded_file = st.file_uploader("Choose a file")
6
+ # Write the uploaded file to a specific location
7
+ if uploaded_file is not None:
8
+ with open(data_file, "wb") as f:
9
+ f.write(uploaded_file.read())
10
+
11
+ #chat_object= dc(file_path='./data/employees.csv')
12
+ chat_object= dc(file_path=data_file)
13
+
14
+ st.title("Data Engineering Chatbot")
15
+
16
+ # Initialize chat history
17
+ if "messages" not in st.session_state:
18
+ st.session_state.messages = []
19
+
20
+ # Display chat messages from history on app rerun
21
+ for message in st.session_state.messages:
22
+ if message["role"] == 'user':
23
+ with st.chat_message(message["role"]):
24
+ st.markdown(message["content"])
25
+ if message["role"] == 'assistant':
26
+ with st.chat_message(message["role"]):
27
+ st.dataframe(message["content"],hide_index=True)
28
+
29
+
30
+ # React to user input
31
+ if prompt := st.chat_input("What is up?"):
32
+ # Display user message in chat message container
33
+ with st.chat_message("user"):
34
+ st.markdown(prompt)
35
+ # Add user message to chat history
36
+ st.session_state.messages.append({"role": "user", "content": prompt})
37
+
38
+ response = chat_object.data_ops(prompt)
39
+ # Display assistant response in chat message container
40
+ with st.chat_message("assistant"):
41
+ #st.markdown(response)
42
+ st.dataframe(response,hide_index=True)
43
+ # Add assistant response to chat history
44
+ st.session_state.messages.append({"role": "assistant", "content": response})
45
+
46
+ # split the salary and define 10% as HRA, 70% as Basic and 20% as Allowance.
47
+ # mask the SSN columns as *********1234
48
+ # convert the hire date column from string to date time and format it as DD-MON-YYYY
49
+ # combine the first name and last name columns