Spaces:
Sleeping
Sleeping
import os | |
import pandas as pd | |
from pandasai import Agent, SmartDataframe | |
from typing import Tuple | |
from PIL import Image | |
from pandasai.llm import HuggingFaceTextGen | |
from dotenv import load_dotenv | |
from langchain_groq.chat_models import ChatGroq | |
load_dotenv("Groq.txt") | |
Groq_Token = os.environ["GROQ_API_KEY"] | |
models = {"mixtral": "mixtral-8x7b-32768", "llama": "llama2-70b-4096", "gemma": "gemma-7b-it"} | |
hf_token = os.getenv("HF_READ") | |
def preprocess_and_load_df(path: str) -> pd.DataFrame: | |
df = pd.read_csv(path) | |
df["Timestamp"] = pd.to_datetime(df["Timestamp"]) | |
return df | |
def load_agent(df: pd.DataFrame, context: str, inference_server: str, name="mixtral") -> Agent: | |
# llm = HuggingFaceTextGen( | |
# inference_server_url=inference_server, | |
# max_new_tokens=250, | |
# temperature=0.1, | |
# repetition_penalty=1.2, | |
# top_k=5, | |
# ) | |
# llm.client.headers = {"Authorization": f"Bearer {hf_token}"} | |
llm = ChatGroq(model=models[name], api_key=os.getenv("GROQ_API"), temperature=0.1) | |
agent = Agent(df, config={"llm": llm, "enable_cache": False, "options": {"wait_for_model": True}}) | |
agent.add_message(context) | |
return agent | |
def load_smart_df(df: pd.DataFrame, inference_server: str, name="mixtral") -> SmartDataframe: | |
# llm = HuggingFaceTextGen( | |
# inference_server_url=inference_server, | |
# ) | |
# llm.client.headers = {"Authorization": f"Bearer {hf_token}"} | |
llm = ChatGroq(model=models[name], api_key=os.getenv("GROQ_API"), temperature=0.1) | |
df = SmartDataframe(df, config={"llm": llm, "max_retries": 5, "enable_cache": False}) | |
return df | |
def get_from_user(prompt): | |
return {"role": "user", "content": prompt} | |
def ask_agent(agent: Agent, prompt: str) -> Tuple[str, str, str]: | |
response = agent.chat(prompt) | |
gen_code = agent.last_code_generated | |
ex_code = agent.last_code_executed | |
last_prompt = agent.last_prompt | |
return {"role": "assistant", "content": response, "gen_code": gen_code, "ex_code": ex_code, "last_prompt": last_prompt} | |
def decorate_with_code(response: dict) -> str: | |
return f"""<details> | |
<summary>Generated Code</summary> | |
```python | |
{response["gen_code"]} | |
``` | |
</details> | |
<details> | |
<summary>Prompt</summary> | |
{response["last_prompt"]} | |
""" | |
def show_response(st, response): | |
with st.chat_message(response["role"]): | |
try: | |
image = Image.open(response["content"]) | |
if "gen_code" in response: | |
st.markdown(decorate_with_code(response), unsafe_allow_html=True) | |
st.image(image) | |
except Exception as e: | |
if "gen_code" in response: | |
display_content = decorate_with_code(response) + f"""</details> | |
{response["content"]}""" | |
else: | |
display_content = response["content"] | |
st.markdown(display_content, unsafe_allow_html=True) | |
def ask_question(model_name, question): | |
llm = ChatGroq(model=models[model_name], api_key=os.getenv("GROQ_API"), temperature=0.1) | |
df_check = pd.read_csv("Data.csv") | |
df_check["Timestamp"] = pd.to_datetime(df_check["Timestamp"]) | |
df_check = df_check.head(5) | |
new_line = "\n" | |
template = f"""```python | |
import pandas as pd | |
import matplotlib.pyplot as plt | |
df = pd.read_csv("Data.csv") | |
df["Timestamp"] = pd.to_datetime(df["Timestamp"]) | |
# df.dtypes | |
{new_line.join(map(lambda x: '# '+x, str(df_check.dtypes).split(new_line)))} | |
# {question.strip()} | |
# <your code here> | |
``` | |
""" | |
query = f"""I have a pandas dataframe data of PM2.5 and PM10. | |
* Frequency of data is daily. | |
* `pollution` generally means `PM2.5`. | |
* Save result in a variable `answer` and make it global. | |
* If result is a plot, save it and save path in `answer`. Example: `answer='plot.png'` | |
* If result is not a plot, save it as a string in `answer`. Example: `answer='The city is Mumbai'` | |
Complete the following code. | |
{template} | |
""" | |
answer = llm.invoke(query) | |
code = f""" | |
{template.split("```python")[1].split("```")[0]} | |
{answer.content.split("```python")[1].split("```")[0]} | |
""" | |
# update variable `answer` when code is executed | |
exec(code) | |
return {"role": "assistant", "content": answer.content, "gen_code": code, "ex_code": code, "last_prompt": question} |