Pingul's picture
Update app.py
ab6bdb7 verified
from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware
from datetime import datetime
from contextlib import asynccontextmanager
import os
from ExoMACModel import ExoMACModel
from models.requests import PredictRequest
from models.responses import (
PredictResponse,
ExoplanetCumulative,
ExoplanetK2,
ExoplanetTOI,
DatasetListResponse
)
from typing import Optional
from fastapi import HTTPException, Query
import pandas as pd
@asynccontextmanager
async def lifespan(app: FastAPI):
model = ExoMACModel(
repo_id=os.getenv("EXOMAC_REPO", "ZapatoProgramming/ExoMAC-KKT"),
local_dir=os.getenv("EXOMAC_LOCAL_DIR", "ExoMACModel/ExoMAC-KKT"),
prefer_snapshot=True,
always_download=False,
verbose=True,
)
app.state.model = model
yield
app = FastAPI(
title="NASA SpaceApp API",
description="API para el proyecto NASA SpaceApp 2025",
version="1.0.0",
lifespan=lifespan
)
# Configurar CORS
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
@app.get("/")
async def root():
"""Endpoint raíz de la API"""
return {
"message": "Bienvenido a NASA SpaceApp API",
"version": "1.0.0",
"docs": "/docs"
}
@app.get("/health")
async def health():
"""Endpoint de health check"""
return {
"status": "healthy",
"timestamp": datetime.now().isoformat(),
"service": "NASA SpaceApp API"
}
@app.post("/predict", response_model=PredictResponse)
def predict(
req: PredictRequest,
):
m: Optional[ExoMACModel] = getattr(app.state, "model", None)
if m is None:
raise HTTPException(503, "Model not loaded")
data = dict(req.features)
try:
label, probabilities = m.predict(
data,
return_proba=True,
compute_engineered_if_missing=True,
)
except Exception as e:
raise HTTPException(500, f"Prediction error")
cols = m.feature_columns
recognized = [c for c in cols if c in data]
unknown = [k for k in data.keys() if k not in cols]
used = m._ensure_engineered_features(dict(data))
X = pd.DataFrame([used], dtype=float).reindex(columns=cols)
missing = X.columns[X.iloc[0].isna()].tolist()
# Engineered features: those added beyond the original input keys
engineered_only = {k: used.get(k) for k in used.keys() if k not in data}
# JSON-safe (convert NaN to None and numpy floats to float)
engineered_json = {
k: (None if pd.isna(v) else float(v)) if isinstance(v, (int, float)) or hasattr(v, "__float__") else None
for k, v in engineered_only.items()
}
return PredictResponse(
label=label,
probabilities=probabilities,
recognized=recognized,
unknown=unknown,
missing=missing,
feature_order=cols,
engineered=engineered_json,
)
# ============================================================================
# HELPER FUNCTIONS PARA CARGA DE DATASETS
# ============================================================================
def load_csv_dataset(filename: str) -> pd.DataFrame:
"""Carga un CSV de NASA con manejo de comentarios y errores"""
try:
filepath = os.path.join("NASA_datasets", filename)
df = pd.read_csv(filepath, comment='#')
return df
except Exception as e:
raise HTTPException(500, f"Error loading dataset {filename}: {str(e)}")
def filter_dataframe(df: pd.DataFrame, limit: int = 100, offset: int = 0, **filters) -> pd.DataFrame:
"""Filtra un dataframe y aplica paginación"""
filtered_df = df.copy()
# Aplicar filtros si existen
for key, value in filters.items():
if value is not None and key in filtered_df.columns:
filtered_df = filtered_df[filtered_df[key] == value]
# Aplicar paginación
return filtered_df.iloc[offset:offset + limit]
def df_to_dict_list(df: pd.DataFrame) -> list:
"""Convierte DataFrame a lista de diccionarios, manejando NaN"""
return df.where(pd.notna(df), None).to_dict('records')
# ============================================================================
# ENDPOINTS PARA DATASET CUMULATIVE (KEPLER)
# ============================================================================
@app.get("/kepler", response_model=DatasetListResponse)
async def get_kepler_exoplanets(
limit: Optional[int] = Query(None, ge=1, le=1000, description="Número máximo de resultados (si no se especifica, devuelve todos)"),
offset: int = Query(0, ge=0, description="Offset para paginación"),
koi_disposition: Optional[str] = Query(None, description="Filtrar por disposición (CONFIRMED, FALSE POSITIVE, CANDIDATE)")
):
"""
Obtiene lista de exoplanetas del dataset Cumulative (Kepler).
Incluye las 11 características esenciales:
- koi_period: Periodo orbital
- koi_duration: Duración del tránsito
- koi_depth: Profundidad del tránsito
- koi_impact: Parámetro de impacto
- koi_prad: Radio del planeta
- koi_slogg: Gravedad superficial estelar
- koi_sma: Semi-eje mayor
- koi_smet: Metalicidad estelar
- koi_srad: Radio estelar
- koi_steff: Temperatura efectiva estelar
- koi_snr: Relación señal-ruido (nota: no disponible en el dataset)
"""
df = load_csv_dataset("cumulative_2025.10.05_10.28.27.csv")
total = len(df)
# Filtrar si se especifica disposición
if koi_disposition:
df = df[df['koi_disposition'] == koi_disposition]
# Aplicar paginación solo si limit está definido
if limit is not None:
df_page = df.iloc[offset:offset + limit]
else:
df_page = df.iloc[offset:]
# Seleccionar columnas relevantes (solo con ≤50% nulos)
columns_to_include = [
# Identificadores
'kepid', 'kepoi_name', 'kepler_name',
# Disposición
'koi_disposition', 'koi_pdisposition', 'koi_score',
# Características orbitales
'koi_period', 'koi_time0bk', 'koi_impact', 'koi_duration', 'koi_depth',
'koi_prad', 'koi_sma', 'koi_teq', 'koi_insol', 'koi_dor', 'koi_ror',
'koi_eccen', 'koi_incl',
# Características estelares
'koi_steff', 'koi_slogg', 'koi_srad', 'koi_smass', 'koi_smet', 'koi_kepmag',
# Coordenadas
'ra', 'dec'
]
# Filtrar solo columnas que existen
available_columns = [col for col in columns_to_include if col in df_page.columns]
df_result = df_page[available_columns]
return DatasetListResponse(
total=total,
count=len(df_result),
data=df_to_dict_list(df_result)
)
@app.get("/kepler/{id}", response_model=ExoplanetCumulative)
async def get_cumulative_exoplanet_by_id(id: str):
"""
Obtiene un exoplaneta específico del dataset Cumulative por su nombre KOI o nombre Kepler.
Ejemplo: K00001.01, K00002.01, Kepler-227 b, etc.
"""
df = load_csv_dataset("cumulative_2025.10.05_10.28.27.csv")
# Buscar por kepoi_name o kepler_name
exoplanet = df[(df['kepoi_name'] == id) | (df['kepler_name'] == id)]
if len(exoplanet) == 0:
raise HTTPException(404, f"Exoplanet with kepoi_name or kepler_name '{id}' not found")
data = exoplanet.iloc[0].where(pd.notna(exoplanet.iloc[0]), None).to_dict()
return ExoplanetCumulative(**data)
@app.get("/keplerSummary")
async def get_kepler_summary():
"""
Devuelve el conteo de exoplanetas confirmados, candidatos, falsos positivos y el total en el dataset Kepler.
"""
df = load_csv_dataset("cumulative_2025.10.05_10.28.27.csv")
disposition_counts = df['koi_disposition'].value_counts().to_dict()
# Normalizar claves
summary = {
"CONFIRMED": disposition_counts.get("CONFIRMED", 0),
"CANDIDATE": disposition_counts.get("CANDIDATE", 0),
"FALSE POSITIVE": disposition_counts.get("FALSE POSITIVE", 0),
"TOTAL": int(df.shape[0])
}
return summary
# ============================================================================
# ENDPOINTS PARA DATASET K2
# ============================================================================
@app.get("/k2", response_model=DatasetListResponse)
async def get_k2_exoplanets(
limit: Optional[int] = Query(None, ge=1, le=1000, description="Número máximo de resultados (si no se especifica, devuelve todos)"),
offset: int = Query(0, ge=0, description="Offset para paginación"),
disposition: Optional[str] = Query(None, description="Filtrar por disposición")
):
"""
Obtiene lista de exoplanetas del dataset K2.
Incluye datos de planetas y estrellas para visualización.
"""
df = load_csv_dataset("k2pandc_2025.10.05_10.29.57.csv")
total = len(df)
# Filtrar si se especifica disposición
if disposition:
df = df[df['disposition'] == disposition]
# Aplicar paginación solo si limit está definido
if limit is not None:
df_page = df.iloc[offset:offset + limit]
else:
df_page = df.iloc[offset:]
# Seleccionar columnas relevantes (solo con ≤50% nulos)
columns_to_include = [
# Identificadores
'pl_name', 'hostname', 'epic_hostname', 'tic_id', 'gaia_id',
'disposition', 'discoverymethod', 'disc_year',
# Datos del planeta
'pl_orbper', 'pl_rade', 'pl_radj', 'pl_trandep', 'pl_trandur',
'pl_tranmid', 'pl_imppar',
# Datos estelares
'st_teff', 'st_rad', 'st_mass', 'st_met', 'st_logg',
# Datos del sistema
'sy_dist', 'sy_vmag', 'sy_kmag', 'sy_jmag', 'sy_hmag', 'sy_gaiamag',
# Coordenadas
'ra', 'dec'
]
# Filtrar solo columnas que existen
available_columns = [col for col in columns_to_include if col in df_page.columns]
df_result = df_page[available_columns]
return DatasetListResponse(
total=total,
count=len(df_result),
data=df_to_dict_list(df_result)
)
@app.get("/k2/{pl_name}", response_model=ExoplanetK2)
async def get_k2_exoplanet_by_name(pl_name: str):
"""
Obtiene un exoplaneta específico del dataset K2 por su nombre.
Ejemplo: K2-1 b, K2-2 b, etc.
"""
df = load_csv_dataset("k2pandc_2025.10.05_10.29.57.csv")
# Buscar por pl_name
exoplanet = df[df['pl_name'] == pl_name]
if len(exoplanet) == 0:
raise HTTPException(404, f"Exoplanet with name '{pl_name}' not found")
# Convertir a diccionario
data = exoplanet.iloc[0].where(pd.notna(exoplanet.iloc[0]), None).to_dict()
return ExoplanetK2(**data)
# ============================================================================
# ENDPOINTS PARA DATASET TOI (TESS)
# ============================================================================
@app.get("/tess", response_model=DatasetListResponse)
async def get_tess_exoplanets(
limit: Optional[int] = Query(None, ge=1, le=1000, description="Número máximo de resultados (si no se especifica, devuelve todos)"),
offset: int = Query(0, ge=0, description="Offset para paginación"),
tfopwg_disp: Optional[str] = Query(None, description="Filtrar por disposición del grupo de trabajo")
):
"""
Obtiene lista de TESS Objects of Interest (TOI).
Incluye datos de candidatos a exoplanetas del telescopio TESS.
"""
df = load_csv_dataset("TOI_2025.10.05_10.30.20.csv")
total = len(df)
# Filtrar si se especifica disposición
if tfopwg_disp:
df = df[df['tfopwg_disp'] == tfopwg_disp]
# Aplicar paginación solo si limit está definido
if limit is not None:
df_page = df.iloc[offset:offset + limit]
else:
df_page = df.iloc[offset:]
# Seleccionar columnas relevantes (solo con ≤50% nulos)
columns_to_include = [
# Identificadores
'toi', 'tid', 'ctoi_alias', 'tfopwg_disp', 'pl_pnum',
# Datos del planeta
'pl_orbper', 'pl_rade', 'pl_eqt', 'pl_insol', 'pl_trandep',
'pl_trandurh', 'pl_tranmid', 'pl_imppar', 'pl_orbsmax',
# Datos estelares
'st_teff', 'st_rad', 'st_mass', 'st_met', 'st_logg', 'st_dist', 'st_tmag',
# Movimiento propio
'st_pmra', 'st_pmdec',
# Coordenadas
'ra', 'dec', 'rastr', 'decstr'
]
# Filtrar solo columnas que existen
available_columns = [col for col in columns_to_include if col in df_page.columns]
df_result = df_page[available_columns]
# Convertir campos que deben ser strings
string_fields = ['ctoi_alias', 'rastr', 'decstr']
for field in string_fields:
if field in df_result.columns:
df_result[field] = df_result[field].apply(lambda x: str(x) if pd.notna(x) else None)
return DatasetListResponse(
total=total,
count=len(df_result),
data=df_to_dict_list(df_result)
)
@app.get("/tess/{toi_id}", response_model=ExoplanetTOI)
async def get_tess_by_id(toi_id: float):
"""
Obtiene un TOI específico por su ID.
Ejemplo: 100.01, 101.01, etc.
"""
df = load_csv_dataset("TOI_2025.10.05_10.30.20.csv")
# Buscar por toi
toi_obj = df[df['toi'] == toi_id]
if len(toi_obj) == 0:
raise HTTPException(404, f"TOI with id '{toi_id}' not found")
# Convertir a diccionario
data = toi_obj.iloc[0].where(pd.notna(toi_obj.iloc[0]), None).to_dict()
# Convertir campos que deben ser strings
string_fields = ['ctoi_alias', 'rastr', 'decstr']
for field in string_fields:
if field in data and data[field] is not None:
data[field] = str(data[field])
return ExoplanetTOI(**data)