Spaces:
Running
Running
# --------------------------------------------------------------------------------- | |
# Aplicaci贸n principal para cargar el modelo, generar prompts y explicar los datos | |
# --------------------------------------------------------------------------------- | |
import streamlit as st # type: ignore | |
import os | |
import re | |
import pandas as pd # type: ignore | |
from dotenv import load_dotenv # type: ignore # Para cambios locales | |
from supabase import create_client, Client # type: ignore | |
# from transformers import pipeline | |
from pandasai import SmartDataframe # type: ignore | |
from pandasai.llm.local_llm import LocalLLM | |
# --------------------------------------------------------------------------------- | |
# Funciones auxiliares | |
# --------------------------------------------------------------------------------- | |
# Funci贸n para extracci贸n de c贸digo Python del output del modelo | |
def extract_code(llm_output): | |
code_match = re.search(r"```python\n(.*?)\n```", llm_output, re.DOTALL) | |
if code_match: | |
return code_match.group(1) | |
return None | |
# Funci贸n para generar prompts de gr谩ficos comparativos | |
# Ejemplo de prompt generado: | |
# generate_graph_prompt("Germany", "France", "fertility rate", 2020, 2030) | |
def generate_graph_prompt(country1, country2, metric, start_year, end_year): | |
prompt = f""" | |
You have access to a database of European countries with data on {metric}, labor force participation, population, and their predictions for future years. | |
Generate Python code using matplotlib to create a line graph showing the trend of {metric} for {country1} and {country2} from {start_year} to {end_year}. | |
Also, provide a concise explanation of what this graph represents for an end user who might not be familiar with the data. | |
""" | |
return prompt | |
# --------------------------------------------------------------------------------- | |
# Configuraci贸n de conexi贸n a Supabase | |
# --------------------------------------------------------------------------------- | |
# Cargar variables de entorno desde archivo .env | |
load_dotenv() | |
# Conectar las credenciales de Supabase (ubicadas en "Secrets" en Streamlit) | |
SUPABASE_URL = os.getenv("SUPABASE_URL") | |
SUPABASE_KEY = os.getenv("SUPABASE_KEY") | |
# Crear cliente Supabase | |
supabase: Client = create_client(SUPABASE_URL, SUPABASE_KEY) | |
# Funci贸n para cargar datos de una tabla de Supabase | |
# Tablas posibles: fertility, geo data, labor, population, predictions | |
def load_data(table): | |
try: | |
if supabase: | |
response = supabase.from_(table).select("*").execute() | |
print(f"Response object: {response}") # Inspeccionar objeto completo | |
print(f"Response type: {type(response)}") # Verificar tipo de objeto | |
# Acceder a atributos relacionados a error o data | |
if hasattr(response, 'data'): | |
print(f"Response data: {response.data}") | |
return pd.DataFrame(response.data) | |
elif hasattr(response, 'status_code'): | |
print(f"Response status code: {response.status_code}") | |
elif hasattr(response, '_error'): # Versiones antiguas | |
print(f"Older error attribute: {response._error}") | |
st.error(f"Error fetching data: {response._error}") | |
return pd.DataFrame() | |
else: | |
st.info("Response object does not have 'data' or known error attributes. Check the logs.") | |
return pd.DataFrame() | |
else: | |
st.error("Supabase client not initialized. Check environment variables.") | |
return pd.DataFrame() | |
except Exception as e: | |
st.error(f"An error occurred during data loading: {e}") | |
return pd.DataFrame() | |
# --------------------------------------------------------------------------------- | |
# Cargar datos iniciales | |
# --------------------------------------------------------------------------------- | |
# # Cargar datos desde la tabla "labor" | |
data = load_data("labor") | |
# TODO: La idea es luego usar todas las tablas, cuando ya funcione. | |
# Se puede si el modelo funciona con las gr谩ficas, sino que toca mejorarlo | |
# porque ser铆an consultas m谩s complejas. | |
# labor_data = load_data("labor") | |
# fertility_data = load_data("fertility") | |
# population_data = load_data("population") | |
# predictions_data = load_data("predictions") | |
""" | |
# Ej: | |
# import os | |
# import pandas as pd | |
# from pandasai import SmartDatalake | |
# employees_data = { | |
# 'EmployeeID': [1, 2, 3, 4, 5], | |
# 'Name': ['John', 'Emma', 'Liam', 'Olivia', 'William'], | |
# 'Department': ['HR', 'Sales', 'IT', 'Marketing', 'Finance'] | |
# } | |
# salaries_data = { | |
# 'EmployeeID': [1, 2, 3, 4, 5], | |
# 'Salary': [5000, 6000, 4500, 7000, 5500] | |
# } | |
# employees_df = pd.DataFrame(employees_data) | |
# salaries_df = pd.DataFrame(salaries_data) | |
# # By default, unless you choose a different LLM, it will use BambooLLM. | |
# # You can get your free API key signing up at https://pandabi.ai (you can also configure it in your .env file) | |
# os.environ["PANDASAI_API_KEY"] = "YOUR_API_KEY" | |
# lake = SmartDatalake([employees_df, salaries_df]) | |
# lake.chat("Who gets paid the most?") | |
# # Output: Olivia gets paid the most | |
""" | |
# --------------------------------------------------------------------------------- | |
# Inicializar modelo LLM | |
# --------------------------------------------------------------------------------- | |
# # Pendiente cambiar Keys dependiendo del modelo que escojamos | |
# model_name = "google/flan-t5-small" # Probando modelos | |
# generator = pipeline("text-generation", model=model_name) | |
# --------------------------------------------------------------------------------- | |
# Inicializar PandasAI con StarCoder | |
# --------------------------------------------------------------------------------- | |
# # Definir el modelo StarCoder desde Hugging Face | |
# huggingface_token = os.getenv("HUGGINGFACE_TOKEN") | |
# llm = Starcoder(api_token=huggingface_token) | |
ollama_llm = LocalLLM(api_base="http://localhost:11434/v1", | |
model="gemma3:12b", | |
temperature=0.1, | |
max_tokens=8000) | |
sdf = SmartDataframe(data, config={"llm": ollama_llm}) # DataFrame PandasAI-ready. | |
# --------------------------------------------------------------------------------- | |
# Configuraci贸n de la app en Streamlit | |
# --------------------------------------------------------------------------------- | |
# T铆tulo de la app | |
st.title("_Europe GraphGen_ :blue[Graph generator] :flag-eu:") | |
# Entrada de usuario para describir el gr谩fico | |
user_input = st.text_input("What graphics do you have in mind") | |
generate_button = st.button("Generate") | |
# Procesar el input del usuario con PandasAI | |
if generate_button and user_input: | |
st.dataframe(data.head()) | |
with st.spinner('Generating answer...'): | |
try: | |
answer = sdf.chat(user_input) | |
st.write(answer) | |
except Exception as e: | |
st.error(f"Error generating answer: {e}") | |
# TODO: Output estructurado si vemos que es necesario. | |