Spaces:

juancamval
/

graph_generator

Running

File size: 7,171 Bytes

# ---------------------------------------------------------------------------------
# Aplicación principal para cargar el modelo, generar prompts y explicar los datos
# ---------------------------------------------------------------------------------

import streamlit as st  # type: ignore
import os
import re
import pandas as pd  # type: ignore
from dotenv import load_dotenv  # type: ignore # Para cambios locales
from supabase import create_client, Client  # type: ignore
# from transformers import pipeline

from pandasai import SmartDataframe  # type: ignore
from pandasai.llm.local_llm import LocalLLM

# ---------------------------------------------------------------------------------
# Funciones auxiliares
# ---------------------------------------------------------------------------------

# Función para extracción de código Python del output del modelo
def extract_code(llm_output):
    code_match = re.search(r"```python\n(.*?)\n```", llm_output, re.DOTALL)
    if code_match:
        return code_match.group(1)
    return None

# Función para generar prompts de gráficos comparativos
# Ejemplo de prompt generado:
# generate_graph_prompt("Germany", "France", "fertility rate", 2020, 2030)
def generate_graph_prompt(country1, country2, metric, start_year, end_year):
    prompt = f"""

    You have access to a database of European countries with data on {metric}, labor force participation, population, and their predictions for future years.

    Generate Python code using matplotlib to create a line graph showing the trend of {metric} for {country1} and {country2} from {start_year} to {end_year}.

    Also, provide a concise explanation of what this graph represents for an end user who might not be familiar with the data.

    """
    return prompt

# ---------------------------------------------------------------------------------
# Configuración de conexión a Supabase
# ---------------------------------------------------------------------------------

# Cargar variables de entorno desde archivo .env
load_dotenv()

# Conectar las credenciales de Supabase (ubicadas en "Secrets" en Streamlit)
SUPABASE_URL = os.getenv("SUPABASE_URL")
SUPABASE_KEY = os.getenv("SUPABASE_KEY")

# Crear cliente Supabase
supabase: Client = create_client(SUPABASE_URL, SUPABASE_KEY)

# Función para cargar datos de una tabla de Supabase
# Tablas posibles: fertility, geo data, labor, population, predictions
def load_data(table):
    try:
        if supabase:
            response = supabase.from_(table).select("*").execute()
            print(f"Response object: {response}")  # Inspeccionar objeto completo
            print(f"Response type: {type(response)}")  # Verificar tipo de objeto

            # Acceder a atributos relacionados a error o data
            if hasattr(response, 'data'):
                print(f"Response data: {response.data}")
                return pd.DataFrame(response.data)
            elif hasattr(response, 'status_code'):
                print(f"Response status code: {response.status_code}")
            elif hasattr(response, '_error'):  # Versiones antiguas
                print(f"Older error attribute: {response._error}")
                st.error(f"Error fetching data: {response._error}")
                return pd.DataFrame()
            else:
                st.info("Response object does not have 'data' or known error attributes. Check the logs.")
                return pd.DataFrame()

        else:
            st.error("Supabase client not initialized. Check environment variables.")
            return pd.DataFrame()
    except Exception as e:
        st.error(f"An error occurred during data loading: {e}")
        return pd.DataFrame()

# ---------------------------------------------------------------------------------
# Cargar datos iniciales
# ---------------------------------------------------------------------------------

# # Cargar datos desde la tabla "labor"
data = load_data("labor")

# TODO: La idea es luego usar todas las tablas, cuando ya funcione.
# Se puede si el modelo funciona con las gráficas, sino que toca mejorarlo 
# porque serían consultas más complejas.
# labor_data = load_data("labor")
# fertility_data = load_data("fertility")
# population_data = load_data("population")
# predictions_data = load_data("predictions")

"""

# Ej:

# import os

# import pandas as pd

# from pandasai import SmartDatalake



# employees_data = {

#     'EmployeeID': [1, 2, 3, 4, 5],

#     'Name': ['John', 'Emma', 'Liam', 'Olivia', 'William'],

#     'Department': ['HR', 'Sales', 'IT', 'Marketing', 'Finance']

# }



# salaries_data = {

#     'EmployeeID': [1, 2, 3, 4, 5],

#     'Salary': [5000, 6000, 4500, 7000, 5500]

# }



# employees_df = pd.DataFrame(employees_data)

# salaries_df = pd.DataFrame(salaries_data)



# # By default, unless you choose a different LLM, it will use BambooLLM.

# # You can get your free API key signing up at https://pandabi.ai (you can also configure it in your .env file)

# os.environ["PANDASAI_API_KEY"] = "YOUR_API_KEY"



# lake = SmartDatalake([employees_df, salaries_df])

# lake.chat("Who gets paid the most?")

# # Output: Olivia gets paid the most



"""

# ---------------------------------------------------------------------------------
# Inicializar modelo LLM
# ---------------------------------------------------------------------------------

# # Pendiente cambiar Keys dependiendo del modelo que escojamos
# model_name = "google/flan-t5-small"  # Probando modelos
# generator = pipeline("text-generation", model=model_name)

# ---------------------------------------------------------------------------------
# Inicializar PandasAI con StarCoder
# ---------------------------------------------------------------------------------

# # Definir el modelo StarCoder desde Hugging Face
# huggingface_token = os.getenv("HUGGINGFACE_TOKEN")
# llm = Starcoder(api_token=huggingface_token)

ollama_llm = LocalLLM(api_base="http://localhost:11434/v1", 
                      model="gemma3:12b",
                      temperature=0.1,  
                      max_tokens=8000)

sdf = SmartDataframe(data, config={"llm": ollama_llm}) # DataFrame PandasAI-ready.

# ---------------------------------------------------------------------------------
# Configuración de la app en Streamlit
# ---------------------------------------------------------------------------------

# Título de la app
st.title("_Europe GraphGen_  :blue[Graph generator] :flag-eu:")

# Entrada de usuario para describir el gráfico
user_input = st.text_input("What graphics do you have in mind")
generate_button = st.button("Generate")

# Procesar el input del usuario con PandasAI
if generate_button and user_input:
    st.dataframe(data.head())
    
    with st.spinner('Generating answer...'):
        try:
            answer = sdf.chat(user_input)
            st.write(answer)
        except Exception as e:
            st.error(f"Error generating answer: {e}")


# TODO: Output estructurado si vemos que es necesario.