Spaces:
Sleeping
Sleeping
| import pandas as pd | |
| import streamlit as st | |
| import joblib | |
| from pathlib import Path | |
| st.set_page_config(page_title='Star System Classification (LightGBM)', page_icon='🪐', layout='centered') | |
| BASE_DIR = Path(__file__).resolve().parent | |
| MODEL_PATH = BASE_DIR / 'lightgbm_model.pkl' | |
| FEATURES_PATH = BASE_DIR / 'featurer.pkl' # you saved it with this name | |
| PLANET_ENCODER_PATH = BASE_DIR / 'planet_encoder.pkl' | |
| STAR_ENCODER_PATH = BASE_DIR / 'star_encoder.pkl' | |
| # --- Fixed mapping you used in training --- | |
| ACTIVITY_MAP = {'Low': 0, 'Medium': 1, 'High': 2} | |
| # Optional: label names (edit if your competition uses different names) | |
| LABEL_NAMES = { | |
| 0: 'Habitable', | |
| 1: 'Young', | |
| 2: 'Old', | |
| 3: 'Exotic' | |
| } | |
| def load_artifacts(): | |
| missing = [p.name for p in [MODEL_PATH, FEATURES_PATH, PLANET_ENCODER_PATH, STAR_ENCODER_PATH] if not p.exists()] | |
| if missing: | |
| raise FileNotFoundError( | |
| 'Missing files in repo root: ' + ', '.join(missing) + | |
| '\n\nMake sure these files are in the same folder as app.py:\n' | |
| '- lightgbm_model.pkl\n- featurer.pkl\n- planet_encoder.pkl\n- star_encoder.pkl' | |
| ) | |
| model = joblib.load(MODEL_PATH) | |
| features = joblib.load(FEATURES_PATH) | |
| le_planet = joblib.load(PLANET_ENCODER_PATH) | |
| le_star = joblib.load(STAR_ENCODER_PATH) | |
| return model, features, le_planet, le_star | |
| def safe_transform(le, value: str, col_name: str) -> int: | |
| """Transform a single category value with a saved LabelEncoder. | |
| If unseen value appears, show a helpful error.""" | |
| try: | |
| return int(le.transform([value])[0]) | |
| except Exception: | |
| known = list(getattr(le, 'classes_', [])) | |
| st.error(f'Unknown category for {col_name}: {value}. Known values: {known}') | |
| st.stop() | |
| model, FEATURES, le_planet, le_star = load_artifacts() | |
| st.title('🪐 Star System Classification (LightGBM)') | |
| st.write('Predict the star system type using 10 astrophysical measurements (multiclass).') | |
| with st.expander('ℹ️ Required files in this folder', expanded=False): | |
| st.code( | |
| 'app.py\n' | |
| 'lightgbm_model.pkl\n' | |
| 'featurer.pkl\n' | |
| 'planet_encoder.pkl\n' | |
| 'star_encoder.pkl\n' | |
| 'requirements.txt' | |
| ) | |
| st.subheader('Enter feature values') | |
| # --- Inputs --- | |
| # Numeric | |
| star_size = st.number_input('star_size', min_value=0.0, value=1.0, step=0.01) | |
| star_brightness = st.number_input('star_brightness', min_value=0.0, value=1.2, step=0.01) | |
| distance_from_earth = st.number_input('distance_from_earth', min_value=0.0, value=90.0, step=1.0) | |
| star_mass = st.number_input('star_mass', min_value=0.0, value=1.3, step=0.01) | |
| metallicity = st.number_input('metallicity', value=0.02, step=0.001, format='%.4f') | |
| # Discrete numeric / encoded-like | |
| galaxy_region = st.selectbox('galaxy_region', options=[0, 1, 2], index=1) | |
| galaxy_type = st.selectbox('galaxy_type', options=[0, 1, 2], index=0) | |
| # Categorical (original strings) | |
| star_spectral_class = st.selectbox( | |
| 'star_spectral_class', | |
| options=list(le_star.classes_), | |
| index=0 | |
| ) | |
| planet_configuration = st.selectbox( | |
| 'planet_configuration', | |
| options=list(le_planet.classes_), | |
| index=0 | |
| ) | |
| stellar_activity_class = st.selectbox( | |
| 'stellar_activity_class', | |
| options=['Low', 'Medium', 'High'], | |
| index=0 | |
| ) | |
| # --- Build row in the ORIGINAL feature space --- | |
| row = { | |
| 'star_size': float(star_size), | |
| 'star_brightness': float(star_brightness), | |
| 'galaxy_region': int(galaxy_region), | |
| 'distance_from_earth': float(distance_from_earth), | |
| 'galaxy_type': int(galaxy_type), | |
| 'star_spectral_class': star_spectral_class, | |
| 'planet_configuration': planet_configuration, | |
| 'stellar_activity_class': stellar_activity_class, | |
| 'star_mass': float(star_mass), | |
| 'metallicity': float(metallicity), | |
| } | |
| # --- Apply same preprocessing as training --- | |
| # Mapping for activity (ordinal) | |
| row['stellar_activity_class'] = ACTIVITY_MAP[row['stellar_activity_class']] | |
| # LabelEncoders for the other two categorical columns | |
| row['planet_configuration'] = safe_transform(le_planet, planet_configuration, 'planet_configuration') | |
| row['star_spectral_class'] = safe_transform(le_star, star_spectral_class, 'star_spectral_class') | |
| # Make DataFrame and enforce correct column order | |
| X_input = pd.DataFrame([row]) | |
| # Ensure all expected feature columns exist | |
| missing_cols = [c for c in FEATURES if c not in X_input.columns] | |
| extra_cols = [c for c in X_input.columns if c not in FEATURES] | |
| if missing_cols: | |
| st.error(f'Missing columns for model: {missing_cols}') | |
| st.stop() | |
| if extra_cols: | |
| # Not an error, but we will drop extras to be safe | |
| X_input = X_input.drop(columns=extra_cols) | |
| X_input = X_input[FEATURES] | |
| st.divider() | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| if st.button('🔮 Predict', use_container_width=True): | |
| pred = model.predict(X_input)[0] | |
| pred_int = int(pred) | |
| label = LABEL_NAMES.get(pred_int, str(pred_int)) | |
| st.success(f'Prediction: **{label}** (class {pred_int})') | |
| with col2: | |
| if st.button('📊 Predict probabilities', use_container_width=True): | |
| if hasattr(model, 'predict_proba'): | |
| proba = model.predict_proba(X_input)[0] | |
| proba_df = pd.DataFrame({'class': list(range(len(proba))), 'probability': proba}).sort_values('probability', ascending=False) | |
| proba_df['label'] = proba_df['class'].map(LABEL_NAMES).fillna(proba_df['class'].astype(str)) | |
| st.dataframe(proba_df[['label', 'class', 'probability']], use_container_width=True) | |
| else: | |
| st.warning('This model does not support predict_proba().') | |
| st.caption('Tip: If predictions look wrong, ensure the same encoders and feature order are used as during training.') |