NEXAS's picture
Update main.py
7331cc5 verified
from langchain_groq import ChatGroq
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
from langchain.callbacks.manager import CallbackManager
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain.agents import Tool
from langchain_experimental.utilities import PythonREPL # type: ignore
from langchain_community.chat_models import ChatOllama
import autopep8 # type: ignore
import pandas as pd
import os
from dotenv import load_dotenv
load_dotenv()
groq_api_key = os.getenv("GROQ_API_KEY")
class datachat():
def __init__(self,file_path):
callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])
self.llm = ChatGroq(temperature=0, model_name="mixtral-8x7b-32768",callback_manager=callback_manager)
self.instruction = """
As a python coder create a pythonic response for the query with reference to the columns in my pandas dataframe{columns}.
Instruction:
Do not write the whole script just give me a pythonic response for this query and do not extend more than asked. Assume a dataframe variable df_temp.
Enclose the generated code in Markdown code embedding format. Do not generate sample output. Answer the question and provide a one-line explanation and stop.
example:
```python
output = df['region'].unique()
```
question: {input}
answer:
"""
self.file_path=file_path
def extract_code(self,response):
start = 0
q = ""
temp_block=""
for line in response.splitlines():
if '```python' in line and start==0:
start=1
if '```' == line.strip() and start==1:
start =0
break
if start ==1 and '```' not in line:
q=q+'\n'+line
return q
def data_ops(self,query):
if os.path.isfile('./output.csv'):
df=pd.read_csv('./output.csv')
else:
df=pd.read_csv(self.file_path)
query = query
columns=df.columns.tolist()
prompt = PromptTemplate.from_template(self.instruction)
agent = LLMChain(llm=self.llm,prompt=prompt)
response = agent.invoke(input={"columns":columns,"input":query})
response = self.extract_code(response['text'])
gencode=autopep8.fix_code(response)
df_temp=df
exec(gencode)
df_temp.to_csv('./output.csv',index=False)
return df_temp