Spaces:

NEXAS
/

Data-Engineers-Helper

Runtime error

App Files Files Community

NEXAS commited on Apr 7

Commit

b660019

•

1 Parent(s): e565936

Upload 3 files

Browse files

Files changed (3) hide show

main.py +74 -0
requirements.txt +10 -0
ui.py +49 -0

main.py ADDED Viewed

	@@ -0,0 +1,74 @@

+from langchain_groq import ChatGroq
+from langchain.prompts import PromptTemplate
+from langchain.chains import LLMChain
+from langchain.prompts import PromptTemplate
+from langchain.callbacks.manager import CallbackManager
+from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
+from langchain.agents import Tool
+from langchain_experimental.utilities import PythonREPL # type: ignore
+from langchain_community.chat_models import ChatOllama
+import autopep8 # type: ignore
+import pandas as pd
+import os
+from dotenv import load_dotenv
+load_dotenv()
+groq_api_key = os.getenv("GROQ_API_KEY")
+class datachat():
+    def __init__(self,file_path):
+        callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])
+        self.llm = ChatGroq(temperature=0, model_name="mixtral-8x7b-32768",callback_manager=callback_manager)
+        self.instruction = """
+        As a python coder create a pythonic response for the query with reference to the columns in my pandas dataframe{columns}.
+        Instruction:
+        Do not write the whole script just give me a pythonic response for this query and do not extend more than asked. Assume a dataframe variable df_temp.
+        Enclose the generated code in Markdown code embedding format. Do not generate sample output. Answer the question and provide a one-line explanation and stop.
+        example:
+        ```python
+        output = df['region'].unique()
+        ```
+        question: {input}
+        answer:
+        """
+        self.file_path=file_path
+    def extract_code(self,response):
+        start = 0
+        q = ""
+        temp_block=""
+        for line in response.splitlines():
+            if '```python' in line and start==0:
+                start=1
+            if '```' == line.strip() and start==1:
+                start =0
+                break
+            if start ==1 and '```' not in line:
+                q=q+'\n'+line
+        return q
+    def data_ops(self,query):
+        if os.path.isfile('./data/output.csv'):
+            df=pd.read_csv('./data/output.csv')
+        else:
+            df=pd.read_csv(self.file_path)
+        query = query
+        columns=df.columns.tolist()
+        prompt = PromptTemplate.from_template(self.instruction)
+        agent = LLMChain(llm=self.llm,prompt=prompt)
+        response = agent.invoke(input={"columns":columns,"input":query})
+        response = self.extract_code(response['text'])
+        gencode=autopep8.fix_code(response)
+        df_temp=df
+        exec(gencode)
+        df_temp.to_csv('./data/output.csv',index=False)
+        return df_temp

requirements.txt ADDED Viewed

	@@ -0,0 +1,10 @@

+#insert python library
+langchain
+autopep8
+langchain_experimental
+langchain-community
+python-dotenv
+langchain-groq
+unstructured[md]
+pandas
+streamlit

ui.py ADDED Viewed

	@@ -0,0 +1,49 @@

+import streamlit as st
+from main import datachat as dc
+data_file = r"C:\Users\Naresh Kumar Lahajal\Desktop\DE-LLM\data\input\world_population_data.csv"
+uploaded_file = st.file_uploader("Choose a file")
+# Write the uploaded file to a specific location
+if uploaded_file is not None:
+    with open(data_file, "wb") as f:
+        f.write(uploaded_file.read())
+#chat_object= dc(file_path='./data/employees.csv')
+chat_object= dc(file_path=data_file)
+st.title("Data Engineering Chatbot")
+# Initialize chat history
+if "messages" not in st.session_state:
+    st.session_state.messages = []
+# Display chat messages from history on app rerun
+for message in st.session_state.messages:
+    if message["role"] == 'user':
+        with st.chat_message(message["role"]):
+            st.markdown(message["content"])
+    if message["role"] == 'assistant':
+        with st.chat_message(message["role"]):
+            st.dataframe(message["content"],hide_index=True)
+# React to user input
+if prompt := st.chat_input("What is up?"):
+    # Display user message in chat message container
+    with st.chat_message("user"):
+        st.markdown(prompt)
+    # Add user message to chat history
+    st.session_state.messages.append({"role": "user", "content": prompt})
+    response = chat_object.data_ops(prompt)
+    # Display assistant response in chat message container
+    with st.chat_message("assistant"):
+        #st.markdown(response)
+        st.dataframe(response,hide_index=True)
+    # Add assistant response to chat history
+    st.session_state.messages.append({"role": "assistant", "content": response})
+# split the salary and define 10% as HRA, 70% as Basic and 20% as Allowance.
+# mask the SSN columns as *********1234
+# convert the hire date column from string to date time and format it as DD-MON-YYYY
+# combine the first name and last name columns