Spaces:
Sleeping
Sleeping
| import os | |
| import sys | |
| import requests | |
| import pandas as pd | |
| import matplotlib.pyplot as plt | |
| from io import StringIO | |
| from dotenv import load_dotenv | |
| load_dotenv() | |
| API_KEY = os.getenv("GEMINI_API_KEY") | |
| API_URL = "https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent" | |
| def get_df_info(df): | |
| return f"""Columns: {list(df.columns)} | |
| Sample data: | |
| {df.head(3).to_string()} | |
| Dtypes: {df.dtypes.to_dict()}""" | |
| def call_gemini(prompt): | |
| headers = {"Content-Type": "application/json", "x-goog-api-key": API_KEY} | |
| data = {"contents": [{"parts": [{"text": prompt}]}]} | |
| response = requests.post(API_URL, headers=headers, json=data) | |
| return response.json()["candidates"][0]["content"]["parts"][0]["text"] | |
| def query_csv(df, user_query): | |
| df_info = get_df_info(df) | |
| prompt = f"""You are a data analyst. Given this dataframe info: | |
| {df_info} | |
| User question: {user_query} | |
| Write Python code using pandas to answer this. The dataframe is called 'df'. | |
| IMPORTANT: Always assign your final answer to a variable called 'result'. | |
| Whenever possible, show both text result AND a plot for better visualization. | |
| Use matplotlib for plots (don't call plt.show()). | |
| Only output Python code, no markdown, no explanation.""" | |
| code = call_gemini(prompt) | |
| code = code.replace("```python", "").replace("```", "").strip() | |
| local_vars = {"df": df.copy(), "pd": pd, "plt": plt} | |
| fig = None | |
| old_stdout = sys.stdout | |
| sys.stdout = StringIO() | |
| try: | |
| plt.close('all') | |
| exec(code, local_vars) | |
| printed = sys.stdout.getvalue() | |
| sys.stdout = old_stdout | |
| if plt.get_fignums(): | |
| plt.gcf().savefig('plot.png', dpi=100, bbox_inches='tight') | |
| fig = 'plot.png' | |
| plt.close('all') | |
| result = local_vars.get("result", None) | |
| if result is None and printed: | |
| result = printed.strip() | |
| if isinstance(result, (pd.DataFrame, pd.Series)): | |
| result = result.to_string() | |
| return str(result) if result else "Query executed.", fig, code | |
| except Exception as e: | |
| sys.stdout = old_stdout | |
| plt.close('all') | |
| return f"Error: {e}", None, code | |