File size: 7,171 Bytes
371efcc
 
 
 
 
 
 
 
 
 
bbf89c9
371efcc
 
bbf89c9
371efcc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bbf89c9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
371efcc
 
 
 
 
 
 
 
 
 
 
 
bbf89c9
 
 
 
 
 
 
 
371efcc
bbf89c9
371efcc
 
 
 
 
 
 
 
 
 
 
 
bbf89c9
371efcc
bbf89c9
 
 
 
 
 
 
 
371efcc
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
# ---------------------------------------------------------------------------------
# Aplicaci贸n principal para cargar el modelo, generar prompts y explicar los datos
# ---------------------------------------------------------------------------------

import streamlit as st  # type: ignore
import os
import re
import pandas as pd  # type: ignore
from dotenv import load_dotenv  # type: ignore # Para cambios locales
from supabase import create_client, Client  # type: ignore
# from transformers import pipeline

from pandasai import SmartDataframe  # type: ignore
from pandasai.llm.local_llm import LocalLLM

# ---------------------------------------------------------------------------------
# Funciones auxiliares
# ---------------------------------------------------------------------------------

# Funci贸n para extracci贸n de c贸digo Python del output del modelo
def extract_code(llm_output):
    code_match = re.search(r"```python\n(.*?)\n```", llm_output, re.DOTALL)
    if code_match:
        return code_match.group(1)
    return None

# Funci贸n para generar prompts de gr谩ficos comparativos
# Ejemplo de prompt generado:
# generate_graph_prompt("Germany", "France", "fertility rate", 2020, 2030)
def generate_graph_prompt(country1, country2, metric, start_year, end_year):
    prompt = f"""

    You have access to a database of European countries with data on {metric}, labor force participation, population, and their predictions for future years.

    Generate Python code using matplotlib to create a line graph showing the trend of {metric} for {country1} and {country2} from {start_year} to {end_year}.

    Also, provide a concise explanation of what this graph represents for an end user who might not be familiar with the data.

    """
    return prompt

# ---------------------------------------------------------------------------------
# Configuraci贸n de conexi贸n a Supabase
# ---------------------------------------------------------------------------------

# Cargar variables de entorno desde archivo .env
load_dotenv()

# Conectar las credenciales de Supabase (ubicadas en "Secrets" en Streamlit)
SUPABASE_URL = os.getenv("SUPABASE_URL")
SUPABASE_KEY = os.getenv("SUPABASE_KEY")

# Crear cliente Supabase
supabase: Client = create_client(SUPABASE_URL, SUPABASE_KEY)

# Funci贸n para cargar datos de una tabla de Supabase
# Tablas posibles: fertility, geo data, labor, population, predictions
def load_data(table):
    try:
        if supabase:
            response = supabase.from_(table).select("*").execute()
            print(f"Response object: {response}")  # Inspeccionar objeto completo
            print(f"Response type: {type(response)}")  # Verificar tipo de objeto

            # Acceder a atributos relacionados a error o data
            if hasattr(response, 'data'):
                print(f"Response data: {response.data}")
                return pd.DataFrame(response.data)
            elif hasattr(response, 'status_code'):
                print(f"Response status code: {response.status_code}")
            elif hasattr(response, '_error'):  # Versiones antiguas
                print(f"Older error attribute: {response._error}")
                st.error(f"Error fetching data: {response._error}")
                return pd.DataFrame()
            else:
                st.info("Response object does not have 'data' or known error attributes. Check the logs.")
                return pd.DataFrame()

        else:
            st.error("Supabase client not initialized. Check environment variables.")
            return pd.DataFrame()
    except Exception as e:
        st.error(f"An error occurred during data loading: {e}")
        return pd.DataFrame()

# ---------------------------------------------------------------------------------
# Cargar datos iniciales
# ---------------------------------------------------------------------------------

# # Cargar datos desde la tabla "labor"
data = load_data("labor")

# TODO: La idea es luego usar todas las tablas, cuando ya funcione.
# Se puede si el modelo funciona con las gr谩ficas, sino que toca mejorarlo 
# porque ser铆an consultas m谩s complejas.
# labor_data = load_data("labor")
# fertility_data = load_data("fertility")
# population_data = load_data("population")
# predictions_data = load_data("predictions")

"""

# Ej:

# import os

# import pandas as pd

# from pandasai import SmartDatalake



# employees_data = {

#     'EmployeeID': [1, 2, 3, 4, 5],

#     'Name': ['John', 'Emma', 'Liam', 'Olivia', 'William'],

#     'Department': ['HR', 'Sales', 'IT', 'Marketing', 'Finance']

# }



# salaries_data = {

#     'EmployeeID': [1, 2, 3, 4, 5],

#     'Salary': [5000, 6000, 4500, 7000, 5500]

# }



# employees_df = pd.DataFrame(employees_data)

# salaries_df = pd.DataFrame(salaries_data)



# # By default, unless you choose a different LLM, it will use BambooLLM.

# # You can get your free API key signing up at https://pandabi.ai (you can also configure it in your .env file)

# os.environ["PANDASAI_API_KEY"] = "YOUR_API_KEY"



# lake = SmartDatalake([employees_df, salaries_df])

# lake.chat("Who gets paid the most?")

# # Output: Olivia gets paid the most



"""

# ---------------------------------------------------------------------------------
# Inicializar modelo LLM
# ---------------------------------------------------------------------------------

# # Pendiente cambiar Keys dependiendo del modelo que escojamos
# model_name = "google/flan-t5-small"  # Probando modelos
# generator = pipeline("text-generation", model=model_name)

# ---------------------------------------------------------------------------------
# Inicializar PandasAI con StarCoder
# ---------------------------------------------------------------------------------

# # Definir el modelo StarCoder desde Hugging Face
# huggingface_token = os.getenv("HUGGINGFACE_TOKEN")
# llm = Starcoder(api_token=huggingface_token)

ollama_llm = LocalLLM(api_base="http://localhost:11434/v1", 
                      model="gemma3:12b",
                      temperature=0.1,  
                      max_tokens=8000)

sdf = SmartDataframe(data, config={"llm": ollama_llm}) # DataFrame PandasAI-ready.

# ---------------------------------------------------------------------------------
# Configuraci贸n de la app en Streamlit
# ---------------------------------------------------------------------------------

# T铆tulo de la app
st.title("_Europe GraphGen_  :blue[Graph generator] :flag-eu:")

# Entrada de usuario para describir el gr谩fico
user_input = st.text_input("What graphics do you have in mind")
generate_button = st.button("Generate")

# Procesar el input del usuario con PandasAI
if generate_button and user_input:
    st.dataframe(data.head())
    
    with st.spinner('Generating answer...'):
        try:
            answer = sdf.chat(user_input)
            st.write(answer)
        except Exception as e:
            st.error(f"Error generating answer: {e}")


# TODO: Output estructurado si vemos que es necesario.