File size: 6,984 Bytes
90699dd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
import streamlit as st
import pandas as pd
import requests
import plotly.express as px
import plotly.graph_objs as go
from folium import DivIcon
import folium
from streamlit_folium import st_folium
from sklearn.linear_model import LinearRegression
from sklearn.cluster import DBSCAN
import matplotlib.cm as cm
import matplotlib.colors as mcolors
import time
import json
import pytz
from datetime import datetime

# Set page layout to wide
st.set_page_config(layout="wide", page_title="Real-Time Smart Lamppost Data Dashboard")

# Function to fetch JSON data with caching and expiration
@st.cache_data(ttl=600)
def fetch_data(url):
    response = requests.get(url)
    hk_tz = pytz.timezone('Asia/Hong_Kong')
    fetch_time = datetime.now(hk_tz).strftime('%Y-%m-%dT%H:%M:%S')
    return json.loads(response.text), fetch_time

# Function to calculate "feels like" temperature
def feels_like_temperature(temp_celsius, humidity_percent):
    return temp_celsius - (0.55 - 0.0055 * humidity_percent) * (temp_celsius - 14.5)

# Function to process the raw data into a DataFrame
def process_data(data):
    features = data['features']
    records = [
        {
            'latitude': feature['geometry']['coordinates'][1],
            'longitude': feature['geometry']['coordinates'][0],
            'temperature': feature['properties'].get('Air temperature (°C) / 氣溫 (°C) / 气温 (°C)'),
            'humidity': feature['properties'].get('Relative humidity (%) / 相對濕度 (%) / 相对湿度 (%)')
        }
        for feature in features
    ]
    df = pd.DataFrame(records)

    # Convert temperature and humidity to numeric, forcing errors to NaN
    df['temperature'] = pd.to_numeric(df['temperature'], errors='coerce')
    df['humidity'] = pd.to_numeric(df['humidity'], errors='coerce')

    # Drop rows with NaN values
    df = df.dropna(subset=['temperature', 'humidity'])

    # Calculate "feels like" temperature
    df['feels_like'] = df.apply(lambda row: feels_like_temperature(row['temperature'], row['humidity']), axis=1)

    return df

# Fetch and process data
url = "https://csdi.vercel.app/weather/smls"
data, fetch_time = fetch_data(url)
df = process_data(data)

# Perform clustering using DBSCAN
coords = df[['latitude', 'longitude']].values
db = DBSCAN(eps=0.01, min_samples=5).fit(coords)
df['cluster'] = db.labels_

# Initialize the 'predicted_humidity' column with NaN
df['predicted_humidity'] = pd.NA

# Perform linear regression for each cluster
for cluster in df['cluster'].unique():
    cluster_data = df[df['cluster'] == cluster]
    if len(cluster_data) > 1:  # Only perform regression if there are enough points
        X = cluster_data['temperature'].values.reshape(-1, 1)
        y = cluster_data['humidity'].values
        reg = LinearRegression().fit(X, y)
        df.loc[df['cluster'] == cluster, 'predicted_humidity'] = reg.predict(X)

# Calculate temperature statistics
temp_stats = df['temperature'].describe()
avg_temp = temp_stats['mean']
min_temp = temp_stats['min']
max_temp = temp_stats['max']
std_temp = temp_stats['std']

# Create regression plot using Plotly
fig = px.scatter(df, x='temperature', y='humidity', color='cluster',
                 title='Temperature vs. Relative Humidity with Regression by Cluster')

# Add regression lines to the plot
for cluster in df['cluster'].unique():
    cluster_data = df[df['cluster'] == cluster]
    if 'predicted_humidity' in cluster_data.columns and not cluster_data['predicted_humidity'].isna().all():
        fig.add_trace(go.Scatter(x=cluster_data['temperature'], y=cluster_data['predicted_humidity'], mode='lines',
                                 name=f'Cluster {cluster}'))

# Column 1: Regression Plot, Data, and Statistics
col1, col2, col3 = st.columns([1.65, 2, 1.15])

with col1:
    st.plotly_chart(fig, use_container_width=True, height=300)
    st.caption(f"Data fetched at: {fetch_time}")

    # Display temperature statistics
    col_1, col_2 = st.columns([1, 1])
    with col_1:
        st.metric(label="Average Temperature (°C)", value=f"{avg_temp:.2f}")
        st.metric(label="Minimum Temperature (°C)", value=f"{min_temp:.2f}")
    with col_2:
        st.metric(label="Maximum Temperature (°C)", value=f"{max_temp:.2f}")
        st.metric(label="Std. Dev (°C)", value=f"{std_temp:.2f}")

# Column 2: Map
with col2:
    # Initialize the Folium map
    m = folium.Map(location=[22.320394086610452, 114.21626912476121], zoom_start=14, tiles='https://landsd.azure-api.net/dev/osm/xyz/basemap/gs/WGS84/tile/{z}/{x}/{y}.png?key=f4d3e21d4fc14954a1d5930d4dde3809',attr="Map infortmation from Lands Department")

    folium.TileLayer(
        tiles='https://mapapi.geodata.gov.hk/gs/api/v1.0.0/xyz/label/hk/en/wgs84/{z}/{x}/{y}.png',
        attr="Map infortmation from Lands Department"
    ).add_to(m)

    # Define a color map for clusters
    unique_clusters = df['cluster'].unique()
    colors = cm.get_cmap('tab10', len(unique_clusters))  # Using 'tab10' colormap for up to 10 clusters
    cluster_colors = {cluster: mcolors.to_hex(colors(i)) for i, cluster in enumerate(unique_clusters)}

    # Plot original data points
    for _, row in df.iterrows():
        folium.CircleMarker(
            location=[row['latitude'], row['longitude']],
            radius=5,
            color=cluster_colors[row['cluster']],
            fill=True,
            fill_color=cluster_colors[row['cluster']],
            fill_opacity=0.7,
            popup=f"Temp: {row['temperature']} °C<br>Humidity: {row['humidity']} %<br>Feels Like: {row['feels_like']:.2f} °C<br>Cluster: {row['cluster']}"
        ).add_to(m)

    # Calculate the average temperature for each cluster
    cluster_centers = df.groupby('cluster').agg({
        'latitude': 'mean',
        'longitude': 'mean',
        'temperature': 'mean'
    }).reset_index()

    # Plot cluster centers
    for _, row in cluster_centers.iterrows():
        folium.Marker(
            location=[row['latitude'], row['longitude']],
            icon=DivIcon(
                icon_size=(150,36),
                icon_anchor=(85, 20),  # Adjusted anchor position to move text away from the point
                html=f'<strong><div style="font-size: 15px; color: {cluster_colors[row["cluster"]]}">{row["temperature"]:.2f} °C</div></strong>'
            ),
            popup=f"Cluster: {row['cluster']}<br>Avg Temp: {row['temperature']:.2f} °C"
        ).add_to(m)

    # Display the map in Streamlit
    st_folium(m, width=500, height=600)

# Column 3: Data Table
with col3:
    st.markdown(
        """
        <style>
        .dataframe-container {
            height: 600px;
            overflow-y: auto;
        }
        .dataframe th, .dataframe td {
            text-align: left;
            padding: 8px;
        }
        </style>
        """,
        unsafe_allow_html=True
    )
    # Display the DataFrame
    st.dataframe(df[['latitude', 'longitude', 'temperature', 'humidity', 'feels_like', 'cluster']], height=600)