File size: 2,596 Bytes
b660019
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7331cc5
 
b660019
 
 
 
 
 
 
 
 
 
 
289a8e7
b660019
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
from langchain_groq import ChatGroq
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
from langchain.callbacks.manager import CallbackManager
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain.agents import Tool
from langchain_experimental.utilities import PythonREPL # type: ignore
from langchain_community.chat_models import ChatOllama
import autopep8 # type: ignore
import pandas as pd
import os

from dotenv import load_dotenv
load_dotenv()

groq_api_key = os.getenv("GROQ_API_KEY")

class datachat():

    def __init__(self,file_path):
        callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])

        self.llm = ChatGroq(temperature=0, model_name="mixtral-8x7b-32768",callback_manager=callback_manager)
        self.instruction = """
        As a python coder create a pythonic response for the query with reference to the columns in my pandas dataframe{columns}.
        Instruction:
        Do not write the whole script just give me a pythonic response for this query and do not extend more than asked. Assume a dataframe variable df_temp.
        Enclose the generated code in Markdown code embedding format. Do not generate sample output. Answer the question and provide a one-line explanation and stop.

        example:
        ```python
        output = df['region'].unique()
        ```
                
        question: {input}

        answer:

        """
        self.file_path=file_path


    def extract_code(self,response):
        start = 0
        q = ""
        temp_block=""
        for line in response.splitlines(): 
            if '```python' in line and start==0:
                start=1
            if '```' == line.strip() and start==1:
                start =0
                break
            if start ==1 and '```' not in line:
                q=q+'\n'+line
        return q


    def data_ops(self,query):
        if os.path.isfile('./output.csv'):
            df=pd.read_csv('./output.csv') 
        else:
            df=pd.read_csv(self.file_path) 
        query = query 
        columns=df.columns.tolist()
        prompt = PromptTemplate.from_template(self.instruction)
        agent = LLMChain(llm=self.llm,prompt=prompt)
        response = agent.invoke(input={"columns":columns,"input":query})
        response = self.extract_code(response['text'])
        gencode=autopep8.fix_code(response)
        df_temp=df
        exec(gencode)
        df_temp.to_csv('./output.csv',index=False)
        return df_temp