Spaces:

OttoYu
/

CSDI-Weather

Runtime error

File size: 6,984 Bytes

90699dd

import streamlit as st
import pandas as pd
import requests
import plotly.express as px
import plotly.graph_objs as go
from folium import DivIcon
import folium
from streamlit_folium import st_folium
from sklearn.linear_model import LinearRegression
from sklearn.cluster import DBSCAN
import matplotlib.cm as cm
import matplotlib.colors as mcolors
import time
import json
import pytz
from datetime import datetime

# Set page layout to wide
st.set_page_config(layout="wide", page_title="Real-Time Smart Lamppost Data Dashboard")

# Function to fetch JSON data with caching and expiration
@st.cache_data(ttl=600)
def fetch_data(url):
    response = requests.get(url)
    hk_tz = pytz.timezone('Asia/Hong_Kong')
    fetch_time = datetime.now(hk_tz).strftime('%Y-%m-%dT%H:%M:%S')
    return json.loads(response.text), fetch_time

# Function to calculate "feels like" temperature
def feels_like_temperature(temp_celsius, humidity_percent):
    return temp_celsius - (0.55 - 0.0055 * humidity_percent) * (temp_celsius - 14.5)

# Function to process the raw data into a DataFrame
def process_data(data):
    features = data['features']
    records = [
        {
            'latitude': feature['geometry']['coordinates'][1],
            'longitude': feature['geometry']['coordinates'][0],
            'temperature': feature['properties'].get('Air temperature (°C) / 氣溫 (°C) / 气温 (°C)'),
            'humidity': feature['properties'].get('Relative humidity (%) / 相對濕度 (%) / 相对湿度 (%)')
        }
        for feature in features
    ]
    df = pd.DataFrame(records)

    # Convert temperature and humidity to numeric, forcing errors to NaN
    df['temperature'] = pd.to_numeric(df['temperature'], errors='coerce')
    df['humidity'] = pd.to_numeric(df['humidity'], errors='coerce')

    # Drop rows with NaN values
    df = df.dropna(subset=['temperature', 'humidity'])

    # Calculate "feels like" temperature
    df['feels_like'] = df.apply(lambda row: feels_like_temperature(row['temperature'], row['humidity']), axis=1)

    return df

# Fetch and process data
url = "https://csdi.vercel.app/weather/smls"
data, fetch_time = fetch_data(url)
df = process_data(data)

# Perform clustering using DBSCAN
coords = df[['latitude', 'longitude']].values
db = DBSCAN(eps=0.01, min_samples=5).fit(coords)
df['cluster'] = db.labels_

# Initialize the 'predicted_humidity' column with NaN
df['predicted_humidity'] = pd.NA

# Perform linear regression for each cluster
for cluster in df['cluster'].unique():
    cluster_data = df[df['cluster'] == cluster]
    if len(cluster_data) > 1:  # Only perform regression if there are enough points
        X = cluster_data['temperature'].values.reshape(-1, 1)
        y = cluster_data['humidity'].values
        reg = LinearRegression().fit(X, y)
        df.loc[df['cluster'] == cluster, 'predicted_humidity'] = reg.predict(X)

# Calculate temperature statistics
temp_stats = df['temperature'].describe()
avg_temp = temp_stats['mean']
min_temp = temp_stats['min']
max_temp = temp_stats['max']
std_temp = temp_stats['std']

# Create regression plot using Plotly
fig = px.scatter(df, x='temperature', y='humidity', color='cluster',
                 title='Temperature vs. Relative Humidity with Regression by Cluster')

# Add regression lines to the plot
for cluster in df['cluster'].unique():
    cluster_data = df[df['cluster'] == cluster]
    if 'predicted_humidity' in cluster_data.columns and not cluster_data['predicted_humidity'].isna().all():
        fig.add_trace(go.Scatter(x=cluster_data['temperature'], y=cluster_data['predicted_humidity'], mode='lines',
                                 name=f'Cluster {cluster}'))

# Column 1: Regression Plot, Data, and Statistics
col1, col2, col3 = st.columns([1.65, 2, 1.15])

with col1:
    st.plotly_chart(fig, use_container_width=True, height=300)
    st.caption(f"Data fetched at: {fetch_time}")

    # Display temperature statistics
    col_1, col_2 = st.columns([1, 1])
    with col_1:
        st.metric(label="Average Temperature (°C)", value=f"{avg_temp:.2f}")
        st.metric(label="Minimum Temperature (°C)", value=f"{min_temp:.2f}")
    with col_2:
        st.metric(label="Maximum Temperature (°C)", value=f"{max_temp:.2f}")
        st.metric(label="Std. Dev (°C)", value=f"{std_temp:.2f}")

# Column 2: Map
with col2:
    # Initialize the Folium map
    m = folium.Map(location=[22.320394086610452, 114.21626912476121], zoom_start=14, tiles='https://landsd.azure-api.net/dev/osm/xyz/basemap/gs/WGS84/tile/{z}/{x}/{y}.png?key=f4d3e21d4fc14954a1d5930d4dde3809',attr="Map infortmation from Lands Department")

    folium.TileLayer(
        tiles='https://mapapi.geodata.gov.hk/gs/api/v1.0.0/xyz/label/hk/en/wgs84/{z}/{x}/{y}.png',
        attr="Map infortmation from Lands Department"
    ).add_to(m)

    # Define a color map for clusters
    unique_clusters = df['cluster'].unique()
    colors = cm.get_cmap('tab10', len(unique_clusters))  # Using 'tab10' colormap for up to 10 clusters
    cluster_colors = {cluster: mcolors.to_hex(colors(i)) for i, cluster in enumerate(unique_clusters)}

    # Plot original data points
    for _, row in df.iterrows():
        folium.CircleMarker(
            location=[row['latitude'], row['longitude']],
            radius=5,
            color=cluster_colors[row['cluster']],
            fill=True,
            fill_color=cluster_colors[row['cluster']],
            fill_opacity=0.7,
            popup=f"Temp: {row['temperature']} °C<br>Humidity: {row['humidity']} %<br>Feels Like: {row['feels_like']:.2f} °C<br>Cluster: {row['cluster']}"
        ).add_to(m)

    # Calculate the average temperature for each cluster
    cluster_centers = df.groupby('cluster').agg({
        'latitude': 'mean',
        'longitude': 'mean',
        'temperature': 'mean'
    }).reset_index()

    # Plot cluster centers
    for _, row in cluster_centers.iterrows():
        folium.Marker(
            location=[row['latitude'], row['longitude']],
            icon=DivIcon(
                icon_size=(150,36),
                icon_anchor=(85, 20),  # Adjusted anchor position to move text away from the point
                html=f'<strong><div style="font-size: 15px; color: {cluster_colors[row["cluster"]]}">{row["temperature"]:.2f} °C</div></strong>'
            ),
            popup=f"Cluster: {row['cluster']}<br>Avg Temp: {row['temperature']:.2f} °C"
        ).add_to(m)

    # Display the map in Streamlit
    st_folium(m, width=500, height=600)

# Column 3: Data Table
with col3:
    st.markdown(
        """
        <style>
        .dataframe-container {
            height: 600px;
            overflow-y: auto;
        }
        .dataframe th, .dataframe td {
            text-align: left;
            padding: 8px;
        }
        </style>
        """,
        unsafe_allow_html=True
    )
    # Display the DataFrame
    st.dataframe(df[['latitude', 'longitude', 'temperature', 'humidity', 'feels_like', 'cluster']], height=600)