Spaces:

juancamval
/

graph_generator

Sleeping

App Files Files Community

graph_generator / app.py

Xilena

App.py running locally (w/ gemma3:12b from Ollama)

bbf89c9 verified 3 months ago

raw

history blame

7.17 kB

	# ---------------------------------------------------------------------------------
	# Aplicación principal para cargar el modelo, generar prompts y explicar los datos
	# ---------------------------------------------------------------------------------

	import streamlit as st # type: ignore
	import os
	import re
	import pandas as pd # type: ignore
	from dotenv import load_dotenv # type: ignore # Para cambios locales
	from supabase import create_client, Client # type: ignore
	# from transformers import pipeline

	from pandasai import SmartDataframe # type: ignore
	from pandasai.llm.local_llm import LocalLLM

	# ---------------------------------------------------------------------------------
	# Funciones auxiliares
	# ---------------------------------------------------------------------------------

	# Función para extracción de código Python del output del modelo
	def extract_code(llm_output):
	code_match = re.search(r"```python\n(.*?)\n```", llm_output, re.DOTALL)
	if code_match:
	return code_match.group(1)
	return None

	# Función para generar prompts de gráficos comparativos
	# Ejemplo de prompt generado:
	# generate_graph_prompt("Germany", "France", "fertility rate", 2020, 2030)
	def generate_graph_prompt(country1, country2, metric, start_year, end_year):
	prompt = f"""
	You have access to a database of European countries with data on {metric}, labor force participation, population, and their predictions for future years.
	Generate Python code using matplotlib to create a line graph showing the trend of {metric} for {country1} and {country2} from {start_year} to {end_year}.
	Also, provide a concise explanation of what this graph represents for an end user who might not be familiar with the data.
	"""
	return prompt

	# ---------------------------------------------------------------------------------
	# Configuración de conexión a Supabase
	# ---------------------------------------------------------------------------------

	# Cargar variables de entorno desde archivo .env
	load_dotenv()

	# Conectar las credenciales de Supabase (ubicadas en "Secrets" en Streamlit)
	SUPABASE_URL = os.getenv("SUPABASE_URL")
	SUPABASE_KEY = os.getenv("SUPABASE_KEY")

	# Crear cliente Supabase
	supabase: Client = create_client(SUPABASE_URL, SUPABASE_KEY)

	# Función para cargar datos de una tabla de Supabase
	# Tablas posibles: fertility, geo data, labor, population, predictions
	def load_data(table):
	try:
	if supabase:
	response = supabase.from_(table).select("*").execute()
	print(f"Response object: {response}") # Inspeccionar objeto completo
	print(f"Response type: {type(response)}") # Verificar tipo de objeto

	# Acceder a atributos relacionados a error o data
	if hasattr(response, 'data'):
	print(f"Response data: {response.data}")
	return pd.DataFrame(response.data)
	elif hasattr(response, 'status_code'):
	print(f"Response status code: {response.status_code}")
	elif hasattr(response, '_error'): # Versiones antiguas
	print(f"Older error attribute: {response._error}")
	st.error(f"Error fetching data: {response._error}")
	return pd.DataFrame()
	else:
	st.info("Response object does not have 'data' or known error attributes. Check the logs.")
	return pd.DataFrame()

	else:
	st.error("Supabase client not initialized. Check environment variables.")
	return pd.DataFrame()
	except Exception as e:
	st.error(f"An error occurred during data loading: {e}")
	return pd.DataFrame()

	# ---------------------------------------------------------------------------------
	# Cargar datos iniciales
	# ---------------------------------------------------------------------------------

	# # Cargar datos desde la tabla "labor"
	data = load_data("labor")

	# TODO: La idea es luego usar todas las tablas, cuando ya funcione.
	# Se puede si el modelo funciona con las gráficas, sino que toca mejorarlo
	# porque serían consultas más complejas.
	# labor_data = load_data("labor")
	# fertility_data = load_data("fertility")
	# population_data = load_data("population")
	# predictions_data = load_data("predictions")

	"""
	# Ej:
	# import os
	# import pandas as pd
	# from pandasai import SmartDatalake

	# employees_data = {
	# 'EmployeeID': [1, 2, 3, 4, 5],
	# 'Name': ['John', 'Emma', 'Liam', 'Olivia', 'William'],
	# 'Department': ['HR', 'Sales', 'IT', 'Marketing', 'Finance']
	# }

	# salaries_data = {
	# 'EmployeeID': [1, 2, 3, 4, 5],
	# 'Salary': [5000, 6000, 4500, 7000, 5500]
	# }

	# employees_df = pd.DataFrame(employees_data)
	# salaries_df = pd.DataFrame(salaries_data)

	# # By default, unless you choose a different LLM, it will use BambooLLM.
	# # You can get your free API key signing up at https://pandabi.ai (you can also configure it in your .env file)
	# os.environ["PANDASAI_API_KEY"] = "YOUR_API_KEY"

	# lake = SmartDatalake([employees_df, salaries_df])
	# lake.chat("Who gets paid the most?")
	# # Output: Olivia gets paid the most

	"""

	# ---------------------------------------------------------------------------------
	# Inicializar modelo LLM
	# ---------------------------------------------------------------------------------

	# # Pendiente cambiar Keys dependiendo del modelo que escojamos
	# model_name = "google/flan-t5-small" # Probando modelos
	# generator = pipeline("text-generation", model=model_name)

	# ---------------------------------------------------------------------------------
	# Inicializar PandasAI con StarCoder
	# ---------------------------------------------------------------------------------

	# # Definir el modelo StarCoder desde Hugging Face
	# huggingface_token = os.getenv("HUGGINGFACE_TOKEN")
	# llm = Starcoder(api_token=huggingface_token)

	ollama_llm = LocalLLM(api_base="http://localhost:11434/v1",
	model="gemma3:12b",
	temperature=0.1,
	max_tokens=8000)

	sdf = SmartDataframe(data, config={"llm": ollama_llm}) # DataFrame PandasAI-ready.

	# ---------------------------------------------------------------------------------
	# Configuración de la app en Streamlit
	# ---------------------------------------------------------------------------------

	# Título de la app
	st.title("_Europe GraphGen_ :blue[Graph generator] :flag-eu:")

	# Entrada de usuario para describir el gráfico
	user_input = st.text_input("What graphics do you have in mind")
	generate_button = st.button("Generate")

	# Procesar el input del usuario con PandasAI
	if generate_button and user_input:
	st.dataframe(data.head())

	with st.spinner('Generating answer...'):
	try:
	answer = sdf.chat(user_input)
	st.write(answer)
	except Exception as e:
	st.error(f"Error generating answer: {e}")


	# TODO: Output estructurado si vemos que es necesario.