File size: 9,157 Bytes
76c013a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3703659
 
 
 
 
 
 
 
76c013a
 
 
 
 
 
 
 
 
 
3703659
76c013a
 
 
 
3703659
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
76c013a
 
3703659
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
import gradio as gr
import pandas as pd
import plotly.express as px
import requests
from bs4 import BeautifulSoup
import csv
from datetime import datetime
import calendar

color_map = {
    "Shek Pik": "blue",
    "Quarry Bay": "red"
}

def get_end_date_from_month(month_str):
    try:
        dt = datetime.strptime(month_str, "%Y-%m")
    except ValueError:
        raise ValueError("Invalid format. Please use YYYY-MM (e.g., '2023-07')")
    last_day = calendar.monthrange(dt.year, dt.month)[1]
    return dt.year, dt.month, f"{dt.year}-{dt.month:02d}-{last_day:02d}"

def fetch_measured_data(station_name, endtime, period="30"):
    station_codes = {"Quarry Bay": "quar", "Shek Pik": "shek"}
    code = station_codes.get(station_name)
    if not code:
        raise ValueError(f"Invalid station name: {station_name}")
    if len(endtime) == 10:
        endtime_full = endtime + " 23:59:59"
    else:
        endtime_full = endtime
    url = f"https://www.ioc-sealevelmonitoring.org/bgraph.php?code={code}&output=tab&period={period}&endtime={endtime_full}"
    try:
        response = requests.get(url)
        response.raise_for_status()
    except requests.RequestException as e:
        raise RuntimeError(f"Error fetching data: {e}")
    soup = BeautifulSoup(response.text, 'html.parser')
    table = soup.find('table')
    if not table:
        raise ValueError(f"No data table found in HTML for station {station_name} at {endtime_full}")
    rows = table.find_all('tr')
    data = [[col.get_text(strip=True) for col in row.find_all(['td', 'th'])] for row in rows]
    output_csv = f"{code}_tide_data.csv"
    with open(output_csv, 'w', newline='', encoding='utf-8') as f:
        writer = csv.writer(f)
        writer.writerows(data)
    return output_csv

def load_measured_csv(file_path, station_name):
    df = pd.read_csv(file_path)
    df.columns = df.columns.str.strip()
    df['Time (UTC)'] = pd.to_datetime(df['Time (UTC)'], errors='coerce')
    df = df.dropna(subset=['Time (UTC)'])
    df['Time (UTC+8)'] = df['Time (UTC)'].dt.tz_localize('UTC').dt.tz_convert('Asia/Hong_Kong')
    df['Station'] = station_name
    return df[['Time (UTC+8)', 'flt(m)', 'Station']].rename(columns={'flt(m)': 'Measured'})

def fetch_hko_tide_data(url, station_name, year):
    try:
        response = requests.get(url)
        response.raise_for_status()
    except requests.RequestException:
        return None
    soup = BeautifulSoup(response.text, 'html.parser')
    rows = soup.find_all('tr')[1:]
    data = []
    for row in rows:
        cols = [td.get_text(strip=True) for td in row.find_all(['td', 'th'])]
        if len(cols) >= 26:
            mm, dd = cols[0], cols[1]
            for hour in range(24):
                tide_str = cols[hour + 2]
                if tide_str == '':
                    continue
                try:
                    tide = float(tide_str)
                    dt = datetime(year, int(mm), int(dd), hour)
                    data.append({'Datetime': dt, 'Tide Height (m)': tide, 'Station': station_name})
                except ValueError:
                    continue
    return pd.DataFrame(data)

def tide_analysis_for_month_gradio(month_str):
    logs = []

    if not month_str:
        return "Please enter a month in YYYY-MM format.", None, None, None

    try:
        logs.append(f"Parsing input month: {month_str}")
        year, month, end_date = get_end_date_from_month(month_str)
        logs.append(f"End date calculated: {end_date}")

        # Fetch measured data
        logs.append("Fetching measured data for Shek Pik...")
        file_shek = fetch_measured_data("Shek Pik", end_date)
        logs.append("Fetching measured data for Quarry Bay...")
        file_quar = fetch_measured_data("Quarry Bay", end_date)

        logs.append("Loading and processing measured CSV data...")
        df_shek = load_measured_csv(file_shek, "Shek Pik")
        df_quar = load_measured_csv(file_quar, "Quarry Bay")
        df_measured = pd.concat([df_shek, df_quar], ignore_index=True)
        min_time = df_measured['Time (UTC+8)'].min()
        max_time = df_measured['Time (UTC+8)'].max()
        logs.append(f"Measured data range: {min_time} to {max_time}")

        # Fetch predicted tide data
        logs.append("Fetching predicted tide data from HKO...")
        url_quar = f"https://www.hko.gov.hk/tide/QUBtextPH{year}.htm"
        url_shek = f"https://www.hko.gov.hk/tide/SPWtextPH{year}.htm"
        df_pred_quar = fetch_hko_tide_data(url_quar, "Quarry Bay", year)
        df_pred_shek = fetch_hko_tide_data(url_shek, "Shek Pik", year)

        if df_pred_quar is None or df_pred_shek is None:
            logs.append("Failed to fetch predicted tide data.")
            return "\n".join(logs), None, None, None

        logs.append("Processing predicted tide data...")
        df_pred = pd.concat([df_pred_quar, df_pred_shek], ignore_index=True)
        df_pred['Time (UTC+8)'] = pd.to_datetime(df_pred['Datetime']).dt.tz_localize('Asia/Hong_Kong')
        df_pred = df_pred.rename(columns={'Tide Height (m)': 'Predicted'})
        df_pred = df_pred[(df_pred['Time (UTC+8)'] >= min_time) & (df_pred['Time (UTC+8)'] <= max_time)]

        logs.append("Generating plot for predicted tide...")
        fig_pred = px.line(df_pred, x='Time (UTC+8)', y='Predicted', color='Station',
                           title='Predicted Tide',
                           labels={'Predicted': 'Tide Height (m)', 'Time (UTC+8)': 'Time (UTC+8)'},
                           color_discrete_map=color_map)
        fig_pred.update_traces(mode='lines+markers')

        logs.append("Generating plot for measured tide...")
        fig_meas = px.line(df_measured, x='Time (UTC+8)', y='Measured', color='Station',
                           title='Measured Tide',
                           labels={'Measured': 'Tide Height (m)', 'Time (UTC+8)': 'Time (UTC+8)'},
                           color_discrete_map=color_map)
        fig_meas.update_traces(mode='lines+markers')

        logs.append("Calculating and plotting residuals...")
        df_merged = pd.merge(df_measured, df_pred[['Time (UTC+8)', 'Predicted', 'Station']],
                             on=['Time (UTC+8)', 'Station'], how='inner')
        df_merged['Residual'] = df_merged['Measured'] - df_merged['Predicted']
        fig_resid = px.line(df_merged, x='Time (UTC+8)', y='Residual', color='Station',
                            title='Tide Residuals (Measured - Predicted)',
                            labels={'Residual': 'Residual (m)', 'Time (UTC+8)': 'Time (UTC+8)'},
                            color_discrete_map=color_map)
        fig_resid.update_traces(mode='lines+markers')

        logs.append("Analysis completed successfully.")
        return "\n".join(logs), fig_pred, fig_meas, fig_resid

    except Exception as e:
        logs.append(f"Error during processing: {e}")
        return "\n".join(logs), None, None, None

with gr.Blocks() as demo:
    gr.Markdown("## Tide Time Series Analysis by Month")

    # --- First Row: Controls ---
    with gr.Row():
        month_input = gr.Textbox(label="Enter Month (YYYY-MM)", placeholder="e.g. 2023-07")
        run_btn = gr.Button("Run Analysis")

    # --- Sample Storm Surge Buttons (small and inline) ---
    gr.Markdown("#### Sample Storm Surge Months")
    with gr.Row():
        sample_1 = gr.Button("2025-07 (Wipha)", scale=1)
        sample_2 = gr.Button("2021-10 (Lionrock)", scale=1)
        sample_3 = gr.Button("2022-08 (Ma-on)", scale=1)
        sample_4 = gr.Button("2022-11 (Nalgae)", scale=1)

    # --- Second Row: Plot Area ---
    with gr.Row():
        with gr.Column():
            with gr.Row():
                plot_meas = gr.Plot(label="Measured Tide")
                plot_resid = gr.Plot(label="Residuals")
            with gr.Row():
                plot_pred = gr.Plot(label="Predicted Tide")
                status_output = gr.Textbox(label="Status / Error", interactive=False, lines=1)

    # --- Main Run Button Action ---
    run_btn.click(fn=tide_analysis_for_month_gradio,
                  inputs=month_input,
                  outputs=[status_output, plot_pred, plot_meas, plot_resid])

    # --- Sample Buttons Actions ---
    sample_1.click(fn=lambda: "2025-07", inputs=[], outputs=month_input).then(
        fn=tide_analysis_for_month_gradio,
        inputs=month_input,
        outputs=[status_output, plot_pred, plot_meas, plot_resid]
    )
    sample_2.click(fn=lambda: "2021-10", inputs=[], outputs=month_input).then(
        fn=tide_analysis_for_month_gradio,
        inputs=month_input,
        outputs=[status_output, plot_pred, plot_meas, plot_resid]
    )
    sample_3.click(fn=lambda: "2022-08", inputs=[], outputs=month_input).then(
        fn=tide_analysis_for_month_gradio,
        inputs=month_input,
        outputs=[status_output, plot_pred, plot_meas, plot_resid]
    )
    sample_4.click(fn=lambda: "2022-11", inputs=[], outputs=month_input).then(
        fn=tide_analysis_for_month_gradio,
        inputs=month_input,
        outputs=[status_output, plot_pred, plot_meas, plot_resid]
    )

if __name__ == "__main__":
    demo.launch()