# app.py import pandas as pd import numpy as np import joblib import os from datetime import datetime from dateutil.relativedelta import relativedelta import gradio as gr import matplotlib.pyplot as plt import seaborn as sns from PIL import Image import io # Load Dataset (path relatif) dataset_path = os.path.join(os.path.dirname(__file__), 'dataset', 'data_bulanan.csv') df_historis = pd.read_csv(dataset_path) df_historis['YearMonth'] = pd.to_datetime(df_historis['YearMonth'], format='%Y-%m') df_historis['month'] = df_historis['YearMonth'].dt.month df_historis['year'] = df_historis['YearMonth'].dt.year # Tambahkan fitur yang dibutuhkan di awal df_historis['month_sin'] = np.sin(2 * np.pi * df_historis['month'] / 12) df_historis['month_cos'] = np.cos(2 * np.pi * df_historis['month'] / 12) def season(month): return 'rainy' if month in [11, 12, 1, 2, 3, 4] else 'dry' df_historis['season'] = df_historis['month'].apply(season) df_historis = pd.concat([df_historis, pd.get_dummies(df_historis['season'], prefix='season')], axis=1) # Hitung RR bulanan historis monthly_medians = df_historis.groupby('month')['RR'].median() monthly_stats = df_historis.groupby('month')['RR'].agg(['median', 'std']).rename(columns={'median': 'monthly_median', 'std': 'monthly_std'}) # Tambahkan RR lag12 ke data historis df_historis = df_historis.sort_values('YearMonth') df_historis['RR_lag12'] = df_historis['RR'].shift(12) df_historis = df_historis.reset_index(drop=True) # Fungsi preprocessing def preprocess_single_row_from_date(input_date: str) -> pd.DataFrame: """Preprocess input tanggal (YYYY-MM-DD) jadi 1-row DataFrame dengan fitur lengkap""" target_date = pd.to_datetime(input_date).replace(day=1) # Pastikan awal bulan df_all = df_historis.copy() # Ambil 12 bulan sebelumnya past_12_months = target_date - pd.DateOffset(months=12) df_up_to = df_all[df_all['YearMonth'] <= past_12_months] # Cek apakah data cukup panjang if df_up_to.empty: raise ValueError(f"Tanggal {input_date} tidak punya data historis 12 bulan ke belakang.") month = target_date.month season_str = season(month) # Buat 1 baris data row = { 'YearMonth': target_date, 'month': month, 'month_cos': np.cos(2 * np.pi * month / 12), 'season_dry': 1 if season_str == 'dry' else 0, 'season_rainy': 1 if season_str == 'rainy' else 0, } # Ambil RR_lag12 rr_lag12_date = target_date - pd.DateOffset(months=12) rr_lag12 = df_all.loc[df_all['YearMonth'] == rr_lag12_date, 'RR'] row['RR_lag12'] = rr_lag12.values[0] if not rr_lag12.empty else np.nan # Fitur statistik LOO bulan target rr_values_bulan_ini = df_all[df_all['month'] == month]['RR'].values if len(rr_values_bulan_ini) < 2: row['RR_monthly_median_loo'] = np.nan row['RR_monthly_std_loo'] = np.nan else: rr_loo = rr_values_bulan_ini[:-1] # anggap data terakhir belum tersedia row['RR_monthly_median_loo'] = np.median(rr_loo) row['RR_monthly_std_loo'] = np.std(rr_loo, ddof=1) # RR_above_monthly_median: bandingkan dengan median historis bulan tsb row['RR_above_monthly_median'] = ( 1 if row['RR_monthly_median_loo'] > monthly_medians[month] else 0 ) # Return sebagai DataFrame return pd.DataFrame([row]) # Fungsi untuk load model def load_model(): model_path = os.path.join(os.path.dirname(__file__), 'model', 'ModelFinalSVR_rainfallpkl.pkl') model = joblib.load(model_path) return model # Load model model = load_model() # PREDIKSI 1 BULAN def prediksi_curah_hujan(bulan_input: str): """Fungsi untuk memprediksi curah hujan berdasarkan bulan input.""" df_row = preprocess_single_row_from_date(bulan_input) features = [ 'month_cos', 'RR_lag12', 'season_dry', 'season_rainy', 'RR_above_monthly_median', 'RR_monthly_median_loo', 'RR_monthly_std_loo', ] X = df_row[features] prediksi = model.predict(X) return prediksi[0] # PREDIKSI BANYAK BULAN def predict_range(start: str, end: str) -> pd.DataFrame: """Melakukan prediksi curah hujan untuk rentang bulan tertentu.""" start_date = datetime.strptime(start, "%Y-%m") end_date = datetime.strptime(end, "%Y-%m") if start_date > end_date: raise ValueError("Bulan awal tidak boleh lebih besar dari bulan akhir") dates = [] current = start_date while current <= end_date: dates.append(current.strftime("%Y-%m")) current += relativedelta(months=1) processed_rows = [] for bulan_input in dates: try: row = preprocess_single_row_from_date(bulan_input) processed_rows.append(row) except Exception as e: print(f"[SKIP] {bulan_input} gagal diproses: {e}") if not processed_rows: raise ValueError("Tidak ada bulan yang berhasil diproses.") df_all = pd.concat(processed_rows, ignore_index=True) features = [ 'month_cos', 'RR_lag12', 'season_dry', 'season_rainy', 'RR_above_monthly_median', 'RR_monthly_median_loo', 'RR_monthly_std_loo', ] X = df_all[features] y_pred = model.predict(X) df_result = pd.DataFrame({ 'input_month': df_all['YearMonth'].dt.strftime('%Y-%m'), 'prediksi_rr': y_pred }) return df_result # Gradio interface untuk visualisasi def gradio_predict(start_month: str, end_month: str): try: df_pred = predict_range(start_month, end_month) df_pred['input_month'] = pd.to_datetime(df_pred['input_month']) # Label bulan dalam Bahasa Indonesia bulan_indonesia = [ 'Januari', 'Februari', 'Maret', 'April', 'Mei', 'Juni', 'Juli', 'Agustus', 'September', 'Oktober', 'November', 'Desember' ] df_pred['bulan_label'] = df_pred['input_month'].dt.month.apply(lambda x: bulan_indonesia[x-1]) + \ ' ' + df_pred['input_month'].dt.year.astype(str) # Setup visualisasi plt.figure(figsize=(15, 8)) sns.set_style("whitegrid") # Warna berdasarkan nilai cmap = plt.cm.Blues norm = plt.Normalize(df_pred['prediksi_rr'].min(), df_pred['prediksi_rr'].max()) colors = cmap(norm(df_pred['prediksi_rr'].values)) # Garis utama sns.lineplot(data=df_pred, x='bulan_label', y='prediksi_rr', color='darkcyan', linewidth=2, marker='o') # Titik-titik & label nilai for i, row in df_pred.iterrows(): plt.scatter(row['bulan_label'], row['prediksi_rr'], color=colors[i], s=120, edgecolor='black', zorder=5) plt.text( row['bulan_label'], row['prediksi_rr'] + 4, f"{row['prediksi_rr']:.1f} mm", ha='center', va='bottom', fontsize=9, color='black', bbox=dict(boxstyle="round,pad=0.2", fc="white", ec="gray", alpha=0.7) ) # Titik ekstrem (max dan min) max_idx = df_pred['prediksi_rr'].idxmax() min_idx = df_pred['prediksi_rr'].idxmin() plt.scatter(df_pred.loc[max_idx, 'bulan_label'], df_pred.loc[max_idx, 'prediksi_rr'], color='red', s=150, label='Tertinggi', zorder=6) plt.scatter(df_pred.loc[min_idx, 'bulan_label'], df_pred.loc[min_idx, 'prediksi_rr'], color='blue', s=150, label='Terendah', zorder=6) # Garis rata-rata mean_val = df_pred['prediksi_rr'].mean() plt.axhline(mean_val, color='orange', linestyle='--', linewidth=1.2, label=f'Rata-rata: {mean_val:.1f} mm') plt.text( x=len(df_pred) - 3, y=mean_val + 6, s=f'{mean_val:.1f} mm', color='orange', fontsize=10, style='italic' ) # Pengaturan plot plt.title('Peramalan Curah Hujan Bulanan\nWilayah: Kota Bandung', fontsize=18, weight='bold') plt.xlabel('Bulan', fontsize=12) plt.ylabel('Curah Hujan (mm)', fontsize=12) plt.xticks(rotation=45) plt.legend() plt.grid(True, linestyle='--', alpha=0.5) plt.tight_layout() # Simpan plot ke buffer buf = io.BytesIO() plt.savefig(buf, format='png', dpi=300) plt.close() buf.seek(0) img = Image.open(buf) # Format hasil tabel df_pred['input_month'] = df_pred['input_month'].dt.strftime('%Y-%m') result_str = df_pred[['input_month', 'prediksi_rr']].rename( columns={'input_month': 'Bulan', 'prediksi_rr': 'Prediksi RR (mm)'} ).to_string(index=False) return result_str, img except Exception as e: return f"Terjadi kesalahan: {str(e)}", None # Setup Gradio Interface gr.Interface( fn=gradio_predict, inputs=[ gr.Textbox(label="Bulan Awal (format: YYYY-MM)", placeholder="contoh: 2023-01"), gr.Textbox(label="Bulan Akhir (format: YYYY-MM)", placeholder="contoh: 2023-12") ], outputs=[ gr.Textbox(label="Tabel Hasil Prediksi"), gr.Image(label="Visualisasi Curah Hujan") ], title="Aplikasi Ramalan Curah Hujan", description="Masukkan rentang bulan untuk meramalkan curah hujan bulanan." ).launch()