Spaces:

EnYa32
/

StarSystemClassification

Sleeping

App Files Files Community

StarSystemClassification / src /streamlit_app.py

EnYa32

Update src/streamlit_app.py

e78a19b verified about 2 months ago

raw

history blame contribute delete

5.75 kB

	import pandas as pd
	import streamlit as st
	import joblib
	from pathlib import Path

	st.set_page_config(page_title='Star System Classification (LightGBM)', page_icon='🪐', layout='centered')

	BASE_DIR = Path(__file__).resolve().parent

	MODEL_PATH = BASE_DIR / 'lightgbm_model.pkl'
	FEATURES_PATH = BASE_DIR / 'featurer.pkl' # you saved it with this name
	PLANET_ENCODER_PATH = BASE_DIR / 'planet_encoder.pkl'
	STAR_ENCODER_PATH = BASE_DIR / 'star_encoder.pkl'

	# --- Fixed mapping you used in training ---
	ACTIVITY_MAP = {'Low': 0, 'Medium': 1, 'High': 2}

	# Optional: label names (edit if your competition uses different names)
	LABEL_NAMES = {
	0: 'Habitable',
	1: 'Young',
	2: 'Old',
	3: 'Exotic'
	}

	@st.cache_resource
	def load_artifacts():
	missing = [p.name for p in [MODEL_PATH, FEATURES_PATH, PLANET_ENCODER_PATH, STAR_ENCODER_PATH] if not p.exists()]
	if missing:
	raise FileNotFoundError(
	'Missing files in repo root: ' + ', '.join(missing) +
	'\n\nMake sure these files are in the same folder as app.py:\n'
	'- lightgbm_model.pkl\n- featurer.pkl\n- planet_encoder.pkl\n- star_encoder.pkl'
	)

	model = joblib.load(MODEL_PATH)
	features = joblib.load(FEATURES_PATH)
	le_planet = joblib.load(PLANET_ENCODER_PATH)
	le_star = joblib.load(STAR_ENCODER_PATH)
	return model, features, le_planet, le_star

	def safe_transform(le, value: str, col_name: str) -> int:
	"""Transform a single category value with a saved LabelEncoder.
	If unseen value appears, show a helpful error."""
	try:
	return int(le.transform([value])[0])
	except Exception:
	known = list(getattr(le, 'classes_', []))
	st.error(f'Unknown category for {col_name}: {value}. Known values: {known}')
	st.stop()

	model, FEATURES, le_planet, le_star = load_artifacts()

	st.title('🪐 Star System Classification (LightGBM)')
	st.write('Predict the star system type using 10 astrophysical measurements (multiclass).')

	with st.expander('ℹ️ Required files in this folder', expanded=False):
	st.code(
	'app.py\n'
	'lightgbm_model.pkl\n'
	'featurer.pkl\n'
	'planet_encoder.pkl\n'
	'star_encoder.pkl\n'
	'requirements.txt'
	)

	st.subheader('Enter feature values')

	# --- Inputs ---
	# Numeric
	star_size = st.number_input('star_size', min_value=0.0, value=1.0, step=0.01)
	star_brightness = st.number_input('star_brightness', min_value=0.0, value=1.2, step=0.01)
	distance_from_earth = st.number_input('distance_from_earth', min_value=0.0, value=90.0, step=1.0)
	star_mass = st.number_input('star_mass', min_value=0.0, value=1.3, step=0.01)
	metallicity = st.number_input('metallicity', value=0.02, step=0.001, format='%.4f')

	# Discrete numeric / encoded-like
	galaxy_region = st.selectbox('galaxy_region', options=[0, 1, 2], index=1)
	galaxy_type = st.selectbox('galaxy_type', options=[0, 1, 2], index=0)

	# Categorical (original strings)
	star_spectral_class = st.selectbox(
	'star_spectral_class',
	options=list(le_star.classes_),
	index=0
	)

	planet_configuration = st.selectbox(
	'planet_configuration',
	options=list(le_planet.classes_),
	index=0
	)

	stellar_activity_class = st.selectbox(
	'stellar_activity_class',
	options=['Low', 'Medium', 'High'],
	index=0
	)

	# --- Build row in the ORIGINAL feature space ---
	row = {
	'star_size': float(star_size),
	'star_brightness': float(star_brightness),
	'galaxy_region': int(galaxy_region),
	'distance_from_earth': float(distance_from_earth),
	'galaxy_type': int(galaxy_type),
	'star_spectral_class': star_spectral_class,
	'planet_configuration': planet_configuration,
	'stellar_activity_class': stellar_activity_class,
	'star_mass': float(star_mass),
	'metallicity': float(metallicity),
	}

	# --- Apply same preprocessing as training ---
	# Mapping for activity (ordinal)
	row['stellar_activity_class'] = ACTIVITY_MAP[row['stellar_activity_class']]

	# LabelEncoders for the other two categorical columns
	row['planet_configuration'] = safe_transform(le_planet, planet_configuration, 'planet_configuration')
	row['star_spectral_class'] = safe_transform(le_star, star_spectral_class, 'star_spectral_class')

	# Make DataFrame and enforce correct column order
	X_input = pd.DataFrame([row])

	# Ensure all expected feature columns exist
	missing_cols = [c for c in FEATURES if c not in X_input.columns]
	extra_cols = [c for c in X_input.columns if c not in FEATURES]
	if missing_cols:
	st.error(f'Missing columns for model: {missing_cols}')
	st.stop()
	if extra_cols:
	# Not an error, but we will drop extras to be safe
	X_input = X_input.drop(columns=extra_cols)

	X_input = X_input[FEATURES]

	st.divider()

	col1, col2 = st.columns(2)

	with col1:
	if st.button('🔮 Predict', use_container_width=True):
	pred = model.predict(X_input)[0]
	pred_int = int(pred)
	label = LABEL_NAMES.get(pred_int, str(pred_int))
	st.success(f'Prediction: {label} (class {pred_int})')

	with col2:
	if st.button('📊 Predict probabilities', use_container_width=True):
	if hasattr(model, 'predict_proba'):
	proba = model.predict_proba(X_input)[0]
	proba_df = pd.DataFrame({'class': list(range(len(proba))), 'probability': proba}).sort_values('probability', ascending=False)
	proba_df['label'] = proba_df['class'].map(LABEL_NAMES).fillna(proba_df['class'].astype(str))
	st.dataframe(proba_df[['label', 'class', 'probability']], use_container_width=True)
	else:
	st.warning('This model does not support predict_proba().')

	st.caption('Tip: If predictions look wrong, ensure the same encoders and feature order are used as during training.')