Spaces:

darly9991
/

Water_Quality_Central_Java

Running

App Files Files Community

Water_Quality_Central_Java / water_quality_index.py

darly9991

Update water_quality_index.py

36ec84c verified 2 months ago

raw

history blame contribute delete

6.59 kB

	import streamlit as st
	import pandas as pd
	import numpy as np
	import joblib
	import plotly.express as px
	import base64
	from sklearn.preprocessing import LabelEncoder

	# === Thresholds for Rule-Based Classification ===
	thresholds = {
	'pH_min': 6.0, 'pH_max': 9.0,
	'BOD': 3.0,
	'COD': 25.0,
	'TSS': 50.0,
	'DO': 4.0,
	'Nitrate': 10.0,
	'Phosphate': 0.2,
	'FecalColiform': 1000
	}

	def categorize_sample(row):
	pH = row['pH (Potential Hydrogen)']
	BOD = row['BOD (Biological Oxygen Demand) (mg/L)']
	COD = row['COD (Chemical Oxygen Demand) (mg/L)']
	DO = row['DO (Dissolved Oxygen) (mg/L)']
	nitrate = row['NO3N (Nitrat) (mg/L)']
	phosphate = row['Total Phosphat (mg/L)']
	fecal = row['Fecal Coliform (MPN/100 mL)']
	TSS = row['TSS (Total Suspended Solid) (mg/L)']

	if (
	thresholds['pH_min'] <= pH <= thresholds['pH_max'] and
	BOD <= thresholds['BOD'] and
	COD <= thresholds['COD'] and
	DO >= thresholds['DO'] and
	nitrate <= thresholds['Nitrate'] and
	phosphate <= thresholds['Phosphate'] and
	fecal <= thresholds['FecalColiform'] and
	TSS <= thresholds['TSS']
	):
	return "Safe", "Safe"

	categories = []
	if COD > thresholds['COD'] * 1.5 or pH < thresholds['pH_min'] or pH > thresholds['pH_max'] or TSS > thresholds['TSS']:
	categories.append("Chemical")
	if BOD > thresholds['BOD'] or DO < thresholds['DO'] or fecal > thresholds['FecalColiform'] or TSS > thresholds['TSS']:
	categories.append("Biological")
	if nitrate > thresholds['Nitrate'] or phosphate > thresholds['Phosphate'] or TSS > thresholds['TSS']:
	categories.append("Eutrophication")

	priority_order = ["Chemical", "Biological", "Eutrophication"]
	for cat in priority_order:
	if cat in categories:
	return ", ".join(categories), cat

	return "Safe", "Safe"

	# === Streamlit App ===
	def run():
	svc_model = joblib.load("svc_model.pkl")
	xgb_model = joblib.load("xgb_model.pkl")
	imputer = joblib.load("imputer.pkl")
	scaler = joblib.load("scaler.pkl")
	label_encoder = joblib.load("label_encoder.pkl")

	feature_cols = [
	"pH (Potential Hydrogen)",
	"BOD (Biological Oxygen Demand) (mg/L)",
	"COD (Chemical Oxygen Demand) (mg/L)",
	"TSS (Total Suspended Solid) (mg/L)",
	"DO (Dissolved Oxygen) (mg/L)",
	"NO3N (Nitrat) (mg/L)",
	"Total Phosphat (mg/L)",
	"Fecal Coliform (MPN/100 mL)"
	]

	st.set_page_config(page_title="Water Quality Classifier Dashboard", layout="wide")
	st.title("💧 Water Quality Prediction and Rule-Based Evaluation")

	model_choice = st.selectbox("Select Model", ["SVC + SMOTETomek", "XGBoost + SMOTETomek"])
	model = svc_model if model_choice == "SVC + SMOTETomek" else xgb_model

	st.header("📥 Input Data")
	data_option = st.radio("Choose Input Method", ["Upload CSV", "Manual Entry"])
	input_df = None

	if data_option == "Upload CSV":
	uploaded_file = st.file_uploader("Upload your CSV file", type=["csv"])
	if uploaded_file:
	df = pd.read_csv(uploaded_file)
	missing_cols = [col for col in feature_cols if col not in df.columns]
	if missing_cols:
	st.error(f"Missing required columns: {missing_cols}")
	else:
	input_df = df[feature_cols]
	else:
	with st.form("manual_form"):
	ph = st.number_input("pH", min_value=0.0, max_value=14.0, value=7.0)
	bod = st.number_input("BOD (mg/L)", min_value=0.0, max_value=100.0, value=2.0)
	cod = st.number_input("COD (mg/L)", min_value=0.0, max_value=500.0, value=10.0)
	tss = st.number_input("TSS (mg/L)", min_value=0.0, max_value=1000.0, value=20.0)
	do = st.number_input("DO (mg/L)", min_value=0.0, max_value=20.0, value=5.0)
	no3 = st.number_input("NO3N (mg/L)", min_value=0.0, max_value=10.0, value=1.0)
	tp = st.number_input("Total Phosphat (mg/L)", min_value=0.0, max_value=10.0, value=0.1)
	fecal = st.number_input("Fecal Coliform (MPN/100 mL)", min_value=0.0, max_value=1000000.0, value=500.0)
	submitted = st.form_submit_button("Predict")

	if submitted:
	input_df = pd.DataFrame([{
	"pH (Potential Hydrogen)": ph,
	"BOD (Biological Oxygen Demand) (mg/L)": bod,
	"COD (Chemical Oxygen Demand) (mg/L)": cod,
	"TSS (Total Suspended Solid) (mg/L)": tss,
	"DO (Dissolved Oxygen) (mg/L)": do,
	"NO3N (Nitrat) (mg/L)": no3,
	"Total Phosphat (mg/L)": tp,
	"Fecal Coliform (MPN/100 mL)": fecal
	}])

	if input_df is not None:
	st.header("🔍 Prediction Results")

	try:
	X_imp = imputer.transform(input_df)
	X_scaled = scaler.transform(X_imp)
	y_proba = model.predict_proba(X_scaled)
	y_pred = model.predict(X_scaled)
	pred_class = label_encoder.inverse_transform(y_pred)[0]

	# Rule-Based Evaluation
	rule_violations, rule_label = categorize_sample(input_df.iloc[0])

	# Display results
	st.markdown(f"### 🧪 ML Predicted Class: `{pred_class}`")
	st.markdown(f"### 📏 Rule-Based Class: `{rule_label}`")
	st.markdown(f"Violations Detected: {rule_violations}")

	fig_pie = px.pie(
	names=label_encoder.classes_,
	values=y_proba[0],
	title="Prediction Probability per Class",
	color_discrete_sequence=px.colors.qualitative.Set3
	)
	st.plotly_chart(fig_pie, use_container_width=True)

	# Export Results
	input_df["Predicted Class (ML)"] = pred_class
	input_df["Rule-Based Class"] = rule_label
	input_df["Rule-Based Violations"] = rule_violations
	input_df[[f"Prob_{cls}" for cls in label_encoder.classes_]] = y_proba
	csv = input_df.to_csv(index=False)
	b64 = base64.b64encode(csv.encode()).decode()
	href = f'<a href="data:file/csv;base64,{b64}" download="prediction_result.csv">Download CSV File</a>'
	st.subheader("📤 Download Result")
	st.markdown(href, unsafe_allow_html=True)

	except Exception as e:
	st.error(f"Prediction failed: {e}")

	st.markdown("---")
	st.caption("Developed with ❤️ for integrated ML + expert rule water quality system")