Spaces:
Sleeping
Sleeping
| from fastapi import FastAPI | |
| from fastapi.middleware.cors import CORSMiddleware | |
| from datetime import datetime | |
| from contextlib import asynccontextmanager | |
| import os | |
| from ExoMACModel import ExoMACModel | |
| from models.requests import PredictRequest | |
| from models.responses import ( | |
| PredictResponse, | |
| ExoplanetCumulative, | |
| ExoplanetK2, | |
| ExoplanetTOI, | |
| DatasetListResponse | |
| ) | |
| from typing import Optional | |
| from fastapi import HTTPException, Query | |
| import pandas as pd | |
| async def lifespan(app: FastAPI): | |
| model = ExoMACModel( | |
| repo_id=os.getenv("EXOMAC_REPO", "ZapatoProgramming/ExoMAC-KKT"), | |
| local_dir=os.getenv("EXOMAC_LOCAL_DIR", "ExoMACModel/ExoMAC-KKT"), | |
| prefer_snapshot=True, | |
| always_download=False, | |
| verbose=True, | |
| ) | |
| app.state.model = model | |
| yield | |
| app = FastAPI( | |
| title="NASA SpaceApp API", | |
| description="API para el proyecto NASA SpaceApp 2025", | |
| version="1.0.0", | |
| lifespan=lifespan | |
| ) | |
| # Configurar CORS | |
| app.add_middleware( | |
| CORSMiddleware, | |
| allow_origins=["*"], | |
| allow_credentials=True, | |
| allow_methods=["*"], | |
| allow_headers=["*"], | |
| ) | |
| async def root(): | |
| """Endpoint raíz de la API""" | |
| return { | |
| "message": "Bienvenido a NASA SpaceApp API", | |
| "version": "1.0.0", | |
| "docs": "/docs" | |
| } | |
| async def health(): | |
| """Endpoint de health check""" | |
| return { | |
| "status": "healthy", | |
| "timestamp": datetime.now().isoformat(), | |
| "service": "NASA SpaceApp API" | |
| } | |
| def predict( | |
| req: PredictRequest, | |
| ): | |
| m: Optional[ExoMACModel] = getattr(app.state, "model", None) | |
| if m is None: | |
| raise HTTPException(503, "Model not loaded") | |
| data = dict(req.features) | |
| try: | |
| label, probabilities = m.predict( | |
| data, | |
| return_proba=True, | |
| compute_engineered_if_missing=True, | |
| ) | |
| except Exception as e: | |
| raise HTTPException(500, f"Prediction error") | |
| cols = m.feature_columns | |
| recognized = [c for c in cols if c in data] | |
| unknown = [k for k in data.keys() if k not in cols] | |
| used = m._ensure_engineered_features(dict(data)) | |
| X = pd.DataFrame([used], dtype=float).reindex(columns=cols) | |
| missing = X.columns[X.iloc[0].isna()].tolist() | |
| # Engineered features: those added beyond the original input keys | |
| engineered_only = {k: used.get(k) for k in used.keys() if k not in data} | |
| # JSON-safe (convert NaN to None and numpy floats to float) | |
| engineered_json = { | |
| k: (None if pd.isna(v) else float(v)) if isinstance(v, (int, float)) or hasattr(v, "__float__") else None | |
| for k, v in engineered_only.items() | |
| } | |
| return PredictResponse( | |
| label=label, | |
| probabilities=probabilities, | |
| recognized=recognized, | |
| unknown=unknown, | |
| missing=missing, | |
| feature_order=cols, | |
| engineered=engineered_json, | |
| ) | |
| # ============================================================================ | |
| # HELPER FUNCTIONS PARA CARGA DE DATASETS | |
| # ============================================================================ | |
| def load_csv_dataset(filename: str) -> pd.DataFrame: | |
| """Carga un CSV de NASA con manejo de comentarios y errores""" | |
| try: | |
| filepath = os.path.join("NASA_datasets", filename) | |
| df = pd.read_csv(filepath, comment='#') | |
| return df | |
| except Exception as e: | |
| raise HTTPException(500, f"Error loading dataset {filename}: {str(e)}") | |
| def filter_dataframe(df: pd.DataFrame, limit: int = 100, offset: int = 0, **filters) -> pd.DataFrame: | |
| """Filtra un dataframe y aplica paginación""" | |
| filtered_df = df.copy() | |
| # Aplicar filtros si existen | |
| for key, value in filters.items(): | |
| if value is not None and key in filtered_df.columns: | |
| filtered_df = filtered_df[filtered_df[key] == value] | |
| # Aplicar paginación | |
| return filtered_df.iloc[offset:offset + limit] | |
| def df_to_dict_list(df: pd.DataFrame) -> list: | |
| """Convierte DataFrame a lista de diccionarios, manejando NaN""" | |
| return df.where(pd.notna(df), None).to_dict('records') | |
| # ============================================================================ | |
| # ENDPOINTS PARA DATASET CUMULATIVE (KEPLER) | |
| # ============================================================================ | |
| async def get_kepler_exoplanets( | |
| limit: Optional[int] = Query(None, ge=1, le=1000, description="Número máximo de resultados (si no se especifica, devuelve todos)"), | |
| offset: int = Query(0, ge=0, description="Offset para paginación"), | |
| koi_disposition: Optional[str] = Query(None, description="Filtrar por disposición (CONFIRMED, FALSE POSITIVE, CANDIDATE)") | |
| ): | |
| """ | |
| Obtiene lista de exoplanetas del dataset Cumulative (Kepler). | |
| Incluye las 11 características esenciales: | |
| - koi_period: Periodo orbital | |
| - koi_duration: Duración del tránsito | |
| - koi_depth: Profundidad del tránsito | |
| - koi_impact: Parámetro de impacto | |
| - koi_prad: Radio del planeta | |
| - koi_slogg: Gravedad superficial estelar | |
| - koi_sma: Semi-eje mayor | |
| - koi_smet: Metalicidad estelar | |
| - koi_srad: Radio estelar | |
| - koi_steff: Temperatura efectiva estelar | |
| - koi_snr: Relación señal-ruido (nota: no disponible en el dataset) | |
| """ | |
| df = load_csv_dataset("cumulative_2025.10.05_10.28.27.csv") | |
| total = len(df) | |
| # Filtrar si se especifica disposición | |
| if koi_disposition: | |
| df = df[df['koi_disposition'] == koi_disposition] | |
| # Aplicar paginación solo si limit está definido | |
| if limit is not None: | |
| df_page = df.iloc[offset:offset + limit] | |
| else: | |
| df_page = df.iloc[offset:] | |
| # Seleccionar columnas relevantes (solo con ≤50% nulos) | |
| columns_to_include = [ | |
| # Identificadores | |
| 'kepid', 'kepoi_name', 'kepler_name', | |
| # Disposición | |
| 'koi_disposition', 'koi_pdisposition', 'koi_score', | |
| # Características orbitales | |
| 'koi_period', 'koi_time0bk', 'koi_impact', 'koi_duration', 'koi_depth', | |
| 'koi_prad', 'koi_sma', 'koi_teq', 'koi_insol', 'koi_dor', 'koi_ror', | |
| 'koi_eccen', 'koi_incl', | |
| # Características estelares | |
| 'koi_steff', 'koi_slogg', 'koi_srad', 'koi_smass', 'koi_smet', 'koi_kepmag', | |
| # Coordenadas | |
| 'ra', 'dec' | |
| ] | |
| # Filtrar solo columnas que existen | |
| available_columns = [col for col in columns_to_include if col in df_page.columns] | |
| df_result = df_page[available_columns] | |
| return DatasetListResponse( | |
| total=total, | |
| count=len(df_result), | |
| data=df_to_dict_list(df_result) | |
| ) | |
| async def get_cumulative_exoplanet_by_id(id: str): | |
| """ | |
| Obtiene un exoplaneta específico del dataset Cumulative por su nombre KOI o nombre Kepler. | |
| Ejemplo: K00001.01, K00002.01, Kepler-227 b, etc. | |
| """ | |
| df = load_csv_dataset("cumulative_2025.10.05_10.28.27.csv") | |
| # Buscar por kepoi_name o kepler_name | |
| exoplanet = df[(df['kepoi_name'] == id) | (df['kepler_name'] == id)] | |
| if len(exoplanet) == 0: | |
| raise HTTPException(404, f"Exoplanet with kepoi_name or kepler_name '{id}' not found") | |
| data = exoplanet.iloc[0].where(pd.notna(exoplanet.iloc[0]), None).to_dict() | |
| return ExoplanetCumulative(**data) | |
| async def get_kepler_summary(): | |
| """ | |
| Devuelve el conteo de exoplanetas confirmados, candidatos, falsos positivos y el total en el dataset Kepler. | |
| """ | |
| df = load_csv_dataset("cumulative_2025.10.05_10.28.27.csv") | |
| disposition_counts = df['koi_disposition'].value_counts().to_dict() | |
| # Normalizar claves | |
| summary = { | |
| "CONFIRMED": disposition_counts.get("CONFIRMED", 0), | |
| "CANDIDATE": disposition_counts.get("CANDIDATE", 0), | |
| "FALSE POSITIVE": disposition_counts.get("FALSE POSITIVE", 0), | |
| "TOTAL": int(df.shape[0]) | |
| } | |
| return summary | |
| # ============================================================================ | |
| # ENDPOINTS PARA DATASET K2 | |
| # ============================================================================ | |
| async def get_k2_exoplanets( | |
| limit: Optional[int] = Query(None, ge=1, le=1000, description="Número máximo de resultados (si no se especifica, devuelve todos)"), | |
| offset: int = Query(0, ge=0, description="Offset para paginación"), | |
| disposition: Optional[str] = Query(None, description="Filtrar por disposición") | |
| ): | |
| """ | |
| Obtiene lista de exoplanetas del dataset K2. | |
| Incluye datos de planetas y estrellas para visualización. | |
| """ | |
| df = load_csv_dataset("k2pandc_2025.10.05_10.29.57.csv") | |
| total = len(df) | |
| # Filtrar si se especifica disposición | |
| if disposition: | |
| df = df[df['disposition'] == disposition] | |
| # Aplicar paginación solo si limit está definido | |
| if limit is not None: | |
| df_page = df.iloc[offset:offset + limit] | |
| else: | |
| df_page = df.iloc[offset:] | |
| # Seleccionar columnas relevantes (solo con ≤50% nulos) | |
| columns_to_include = [ | |
| # Identificadores | |
| 'pl_name', 'hostname', 'epic_hostname', 'tic_id', 'gaia_id', | |
| 'disposition', 'discoverymethod', 'disc_year', | |
| # Datos del planeta | |
| 'pl_orbper', 'pl_rade', 'pl_radj', 'pl_trandep', 'pl_trandur', | |
| 'pl_tranmid', 'pl_imppar', | |
| # Datos estelares | |
| 'st_teff', 'st_rad', 'st_mass', 'st_met', 'st_logg', | |
| # Datos del sistema | |
| 'sy_dist', 'sy_vmag', 'sy_kmag', 'sy_jmag', 'sy_hmag', 'sy_gaiamag', | |
| # Coordenadas | |
| 'ra', 'dec' | |
| ] | |
| # Filtrar solo columnas que existen | |
| available_columns = [col for col in columns_to_include if col in df_page.columns] | |
| df_result = df_page[available_columns] | |
| return DatasetListResponse( | |
| total=total, | |
| count=len(df_result), | |
| data=df_to_dict_list(df_result) | |
| ) | |
| async def get_k2_exoplanet_by_name(pl_name: str): | |
| """ | |
| Obtiene un exoplaneta específico del dataset K2 por su nombre. | |
| Ejemplo: K2-1 b, K2-2 b, etc. | |
| """ | |
| df = load_csv_dataset("k2pandc_2025.10.05_10.29.57.csv") | |
| # Buscar por pl_name | |
| exoplanet = df[df['pl_name'] == pl_name] | |
| if len(exoplanet) == 0: | |
| raise HTTPException(404, f"Exoplanet with name '{pl_name}' not found") | |
| # Convertir a diccionario | |
| data = exoplanet.iloc[0].where(pd.notna(exoplanet.iloc[0]), None).to_dict() | |
| return ExoplanetK2(**data) | |
| # ============================================================================ | |
| # ENDPOINTS PARA DATASET TOI (TESS) | |
| # ============================================================================ | |
| async def get_tess_exoplanets( | |
| limit: Optional[int] = Query(None, ge=1, le=1000, description="Número máximo de resultados (si no se especifica, devuelve todos)"), | |
| offset: int = Query(0, ge=0, description="Offset para paginación"), | |
| tfopwg_disp: Optional[str] = Query(None, description="Filtrar por disposición del grupo de trabajo") | |
| ): | |
| """ | |
| Obtiene lista de TESS Objects of Interest (TOI). | |
| Incluye datos de candidatos a exoplanetas del telescopio TESS. | |
| """ | |
| df = load_csv_dataset("TOI_2025.10.05_10.30.20.csv") | |
| total = len(df) | |
| # Filtrar si se especifica disposición | |
| if tfopwg_disp: | |
| df = df[df['tfopwg_disp'] == tfopwg_disp] | |
| # Aplicar paginación solo si limit está definido | |
| if limit is not None: | |
| df_page = df.iloc[offset:offset + limit] | |
| else: | |
| df_page = df.iloc[offset:] | |
| # Seleccionar columnas relevantes (solo con ≤50% nulos) | |
| columns_to_include = [ | |
| # Identificadores | |
| 'toi', 'tid', 'ctoi_alias', 'tfopwg_disp', 'pl_pnum', | |
| # Datos del planeta | |
| 'pl_orbper', 'pl_rade', 'pl_eqt', 'pl_insol', 'pl_trandep', | |
| 'pl_trandurh', 'pl_tranmid', 'pl_imppar', 'pl_orbsmax', | |
| # Datos estelares | |
| 'st_teff', 'st_rad', 'st_mass', 'st_met', 'st_logg', 'st_dist', 'st_tmag', | |
| # Movimiento propio | |
| 'st_pmra', 'st_pmdec', | |
| # Coordenadas | |
| 'ra', 'dec', 'rastr', 'decstr' | |
| ] | |
| # Filtrar solo columnas que existen | |
| available_columns = [col for col in columns_to_include if col in df_page.columns] | |
| df_result = df_page[available_columns] | |
| # Convertir campos que deben ser strings | |
| string_fields = ['ctoi_alias', 'rastr', 'decstr'] | |
| for field in string_fields: | |
| if field in df_result.columns: | |
| df_result[field] = df_result[field].apply(lambda x: str(x) if pd.notna(x) else None) | |
| return DatasetListResponse( | |
| total=total, | |
| count=len(df_result), | |
| data=df_to_dict_list(df_result) | |
| ) | |
| async def get_tess_by_id(toi_id: float): | |
| """ | |
| Obtiene un TOI específico por su ID. | |
| Ejemplo: 100.01, 101.01, etc. | |
| """ | |
| df = load_csv_dataset("TOI_2025.10.05_10.30.20.csv") | |
| # Buscar por toi | |
| toi_obj = df[df['toi'] == toi_id] | |
| if len(toi_obj) == 0: | |
| raise HTTPException(404, f"TOI with id '{toi_id}' not found") | |
| # Convertir a diccionario | |
| data = toi_obj.iloc[0].where(pd.notna(toi_obj.iloc[0]), None).to_dict() | |
| # Convertir campos que deben ser strings | |
| string_fields = ['ctoi_alias', 'rastr', 'decstr'] | |
| for field in string_fields: | |
| if field in data and data[field] is not None: | |
| data[field] = str(data[field]) | |
| return ExoplanetTOI(**data) | |