Spaces:
Runtime error
Runtime error
from __future__ import print_function, division, generators | |
import sys | |
from past.builtins import xrange | |
import streamlit as st | |
import pandas as pd | |
import numpy as np | |
import matplotlib.pyplot as plt | |
import seaborn as sns | |
import altair as alt | |
from scipy.stats import pearsonr, iqr | |
st.set_page_config(page_title="Monsoon Data Analysis", layout="wide") | |
# 读取 | |
monsoon = pd.read_csv('https://raw.githubusercontent.com/Koi4595/SI-649/main/Monsoon_data.csv', parse_dates=['Date']) | |
monsoon.index = monsoon.Date | |
olou = pd.read_csv('https://raw.githubusercontent.com/Koi4595/SI-649/main/Olou_counts.csv',parse_dates=['Date']) | |
olou.index = olou.Date | |
olou['Date'] = pd.to_datetime(olou['Date']) | |
drought_years = [1965, 1966, 1968, 1972, 1974, 1979, 1982, 1986, 1987, 2002, 2004, 2009] | |
flood_years = [1964, 1970, 1971, 1973, 1975, 1978, 1983, 1988, 1990, 1994, 2007, 2008] | |
# Visualization 1 Time series | |
def plot_timeseries(event_type): | |
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(10, 5), sharex=True) | |
if event_type == 'Drought': | |
monsoon_selected_years = monsoon[monsoon['Date'].dt.year.isin(drought_years)] | |
olou_selected_years = olou[olou['Date'].dt.year.isin(drought_years)] | |
elif event_type == 'Flood': | |
monsoon_selected_years = monsoon[monsoon['Date'].dt.year.isin(flood_years)] | |
olou_selected_years = olou[olou['Date'].dt.year.isin(flood_years)] | |
else: | |
monsoon_selected_years = monsoon | |
olou_selected_years = olou | |
ax1.step(monsoon_selected_years['Date'], monsoon_selected_years['Precip'], where='mid', color='blue') | |
ax1.set_title('Monthly Precipitation for Selected Years') | |
ax1.set_ylabel('Precipitation (mm)') | |
ax1.grid(True) | |
ax2.plot(olou_selected_years['Date'], olou_selected_years['Counts']/1000, 'r.', ms=3.0) | |
ax2.set_ylabel('Olou NM Counts for Selected Years (cnt./min. x 10^3)') | |
ax2.set_xlabel('Date') | |
ax2.grid(True) | |
plt.tight_layout() | |
st.pyplot(fig) | |
# Visualization 2 JJAS condition | |
def return_stderr(data): | |
"""Calculate uncertainty of a np array as Standard Error of the Mean""" | |
return np.nanstd(data)/np.sqrt(np.count_nonzero(data) - 1) | |
climo = {} # Produce a dic of monthly climatology using list comprehension | |
climo['means'] = [np.mean(monsoon.Precip[monsoon.index.month == (mnth+1)]) | |
for mnth in xrange(12)] | |
climo['error'] = [return_stderr(monsoon.Precip[monsoon.index.month == (mnth+1)].values) | |
for mnth in xrange(12)] | |
delta = [] | |
for date in monsoon.Precip.index: | |
delta.append(monsoon.Precip[date] - climo['means'][date.month-1]) | |
dseries = pd.Series(delta, index=monsoon.index) | |
# Create a dictionary of June July August September data | |
def lookup_index(yr): | |
return ((monsoon.index.year == yr) & (monsoon.index.month >= 6) | |
&(monsoon.index.month <= 9)) | |
jjas = {} | |
jjas['means']=[np.mean(dseries[lookup_index(yr)]) for yr in xrange(1964,2012,1)] | |
jjas['SEM']=[return_stderr(dseries[lookup_index(yr)])for yr in xrange(1964,2012,1)] | |
jjas['sum']=[np.sum(dseries[lookup_index(yr)]) for yr in xrange(1964,2012,1)] | |
def plot_jjas_condition(): | |
color_scale = alt.Scale( | |
domain=['Drought', 'Flood', 'Normal'], | |
range=['darkred', 'lightblue', 'orange'] | |
) | |
jjas_df = pd.DataFrame({ | |
'Year': range(1964, 2012), | |
'Means': jjas['means'], | |
'SEM': jjas['SEM'], | |
'Sum': jjas['sum'] | |
}) | |
jjas_df['Condition'] = jjas_df['Year'].apply(lambda x: 'Drought' if x in drought_years else 'Flood' if x in flood_years else 'Normal') | |
error_bars = alt.Chart(jjas_df).mark_errorbar(extent='ci').encode( | |
x=alt.X('Year:O', axis=alt.Axis(values=list(range(1960, 2011, 10)))), | |
y=alt.Y('Means:Q', scale=alt.Scale(zero=False)), | |
yError='SEM:Q', | |
color=alt.Color('Condition:N', scale=color_scale) | |
).properties( | |
width=400, | |
height=400, | |
title='Mean JJAS precipitation anomaly' | |
) | |
points = alt.Chart(jjas_df).mark_point(filled=True).encode( | |
x='Year:O', | |
y='Means:Q', | |
color=alt.Color('Condition', legend=alt.Legend(title='Condition')) | |
) | |
error_chart = (error_bars + points).interactive() | |
histogram = alt.Chart(jjas_df).transform_density( | |
density='Means', | |
as_=['Means', 'Density'] | |
).mark_area().encode( | |
x="Means:Q", | |
y='Density:Q', | |
tooltip=['Means:Q', 'Density:Q'] | |
).properties( | |
width=400, | |
height=400, | |
title='Distribution of JJAS anomalies' | |
) | |
chart = alt.hconcat(error_chart, histogram).resolve_legend(color='independent') | |
st.altair_chart(chart, use_container_width=True) | |
def return_stderr(data): | |
return np.nanstd(data)/np.sqrt(np.count_nonzero(data) - 1) | |
def make_cframe(c_years): | |
c_group = np.zeros((12,12),dtype=float) | |
for n, yr in enumerate(c_years): | |
tmp = olou.index.year == yr | |
for i in range(len(olou.Counts[tmp])): | |
c_group[n,i] = olou.Counts[tmp][i] | |
aaa = np.where(c_group == 0) | |
c_group[aaa] = np.nan | |
c_means = [] | |
c_errors = [] | |
for i in range(12): | |
c_means.append(np.nanmean(c_group[:,i])) | |
c_errors.append(return_stderr(c_group[:,i])) | |
return c_group,c_means,c_errors | |
def bootstrap_r(mean_list, error_list, iterations=1000): | |
bs_rvals = [] | |
bs_pvals = [] | |
for itr in range(iterations): | |
poss_vals = [] | |
for n in range(5): | |
poss_min = int((mean_list[4 + n] - error_list[4 + n]) * 100000) | |
poss_max = int((mean_list[4 + n] + error_list[4 + n]) * 100000) | |
poss_vals.append(np.random.randint(poss_min,poss_max)/100) | |
rv, pv = pearsonr([0,1,2,3,4],poss_vals) | |
bs_rvals.append(rv) | |
bs_pvals.append(pv) | |
bs_rvals = np.array(bs_rvals) | |
bs_pvals = np.array(bs_pvals) | |
return bs_rvals, bs_pvals | |
def freedman_diaconis_bins(a): | |
a = np.asarray(a) | |
h = 2 * iqr(a) / (len(a) ** (1 / 3)) | |
if h == 0: | |
return int(np.sqrt(a.size)) | |
else: | |
return int(np.ceil((a.max() - a.min()) / h)) | |
def add_hist(data, col_key, axobj, mkstyle='o', obsval=None, mylabel=None, bin_num=None): | |
if not bin_num: | |
bin_num = freedman_diaconis_bins(data) | |
hist, bin_edges = np.histogram(data, bins=bin_num, density=False) | |
norm_hist = hist / sum(hist) | |
axobj.bar(bin_edges[0:-1], norm_hist, width = bin_edges[1] - bin_edges[0], | |
color = col_key, edgecolor = col_key, alpha = 0.3, label=mylabel) | |
mylabel = None | |
if obsval: | |
lookup = np.where(abs(obsval - bin_edges[0:-1]) == min(abs(obsval - bin_edges[0:-1]))) | |
axobj.vlines(obsval,0,norm_hist[lookup], linestyles='dashed', | |
lw=1.0, zorder=6, label=mylabel) | |
axobj.plot(obsval, norm_hist[lookup], color='k', marker=mkstyle, | |
ms=5., zorder=7, label=mylabel) | |
if bin_num is not None: | |
bin_num = int(bin_num) | |
# Streamlit应用主体 | |
def main(): | |
# 应用标题 | |
st.title('Monsoon Data Analysis') | |
# 分析选项 | |
analysis_type = st.sidebar.selectbox("Select Analysis", | |
("Time Series", "JJAS Condition", "Drought/Flood Sample")) | |
if analysis_type == "Time Series": | |
event_type = st.sidebar.selectbox("Event Type", ('All', 'Drought', 'Flood')) | |
st.header("Time Series Analysis") | |
plot_timeseries(event_type) | |
elif analysis_type == "JJAS Condition": | |
st.header("JJAS Condition Analysis") | |
# 计算每年JJAS季节降水异常 | |
climo = {} | |
climo['means'] = [np.mean(monsoon.Precip[monsoon.index.month == (mnth+1)]) for mnth in range(12)] | |
climo['error'] = [return_stderr(monsoon.Precip[monsoon.index.month == (mnth+1)].values) for mnth in range(12)] | |
delta = [] | |
for date in monsoon.Precip.index: | |
delta.append(monsoon.Precip[date] - climo['means'][date.month-1]) | |
dseries = pd.Series(delta, index=monsoon.index) | |
def lookup_index(yr): | |
return ((monsoon.index.year == yr) & (monsoon.index.month >= 6) &(monsoon.index.month <= 9)) | |
jjas = {} | |
jjas['means']=[np.mean(dseries[lookup_index(yr)]) for yr in range(1964,2012,1)] | |
jjas['SEM']=[return_stderr(dseries[lookup_index(yr)])for yr in range(1964,2012,1)] | |
jjas['sum']=[np.sum(dseries[lookup_index(yr)]) for yr in range(1964,2012,1)] | |
plot_jjas_condition() | |
elif analysis_type == "Drought/Flood Sample": | |
st.header("Drought/Flood Sample Analysis") | |
d_group,d_means,d_errors = make_cframe(drought_years) | |
f_group,f_means,f_errors = make_cframe(flood_years) | |
d_means = np.array(d_means) * 0.001 | |
f_means = np.array(f_means) * 0.001 | |
d_errors = np.array(d_errors) * 0.001 | |
f_errors = np.array(f_errors) * 0.001 | |
mrange = np.arange(0,12) | |
xlabs =['Jan','Mar','May','Jul','Sep','Nov'] | |
fig = plt.figure() | |
fig.set_size_inches(7.48,3.54) | |
ax1 = fig.add_subplot(121) | |
ax2 = fig.add_subplot(122) | |
def simBA15plot(ax, dataset, derr, col_key): | |
lthick=1.0 | |
ax.plot(mrange[0:5], dataset[0:5], 'k--',lw=lthick) | |
ax.plot(mrange[4:9], dataset[4:9], 'k-',lw=lthick) | |
ax.plot(mrange[8:], dataset[8:], 'k--',lw=lthick) | |
ax.fill_between(mrange[0:5],(dataset[0:5] - derr[0:5]), | |
(dataset[0:5] + derr[0:5]), color=col_key, linewidth=0.1,alpha=0.15) | |
ax.fill_between(mrange[4:9], (dataset[4:9] - derr[4:9]), (dataset[4:9] + derr[4:9]), | |
color=col_key, linewidth=0.1, alpha=0.3) | |
ax.fill_between(mrange[8:],(dataset[8:] - derr[8:]), (dataset[8:] + derr[8:]), | |
color=col_key, linewidth=0.1, alpha=0.15) | |
ax.set_xticks(np.arange(len(xlabs))) | |
ax.set_xticklabels(xlabs) | |
ax.set_xlim(0,11) | |
return | |
simBA15plot(ax=ax1, dataset=d_means, derr=d_errors, col_key='r') | |
simBA15plot(ax=ax2, dataset=f_means, derr=f_errors, col_key='b') | |
ax1.set_ylabel(r"Neutron counts (cnt./min.$\times10^{3}$)", fontsize=11) | |
ax1.set_title('Drought sample') | |
ax2.set_title('Flood sample') | |
st.pyplot(fig) | |
rval_d,pval_d = pearsonr(range(5),d_means[4:9]) | |
rval_f,pval_f = pearsonr(range(5),f_means[4:9]) | |
st.write("A Pearson's r test, gives linear regressions and two-tailed p-values of:") | |
st.write(f"Drought sample: r-value = {rval_d:4.3f}, p-value = {pval_d:4.3f}") | |
st.write(f"Flood sample: r-value = {rval_f:4.3f}, p-value = {pval_f:4.3f}") | |
rbs1, pbs1 = bootstrap_r(mean_list = d_means, error_list = d_errors) | |
rbs2, pbs2 = bootstrap_r(mean_list = f_means, error_list = f_errors) | |
def update_plots(sample_type, bin_num_r, bin_num_p): | |
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(10, 5)) | |
if sample_type == 'Drought' or sample_type == 'Both': | |
add_hist(data=rbs1, col_key='r', axobj=ax1, obsval=rval_d, | |
mylabel="Drought sample", mkstyle='o', bin_num=bin_num_r) | |
if sample_type == 'Flood' or sample_type == 'Both': | |
add_hist(data=rbs2, col_key='b', axobj=ax1, obsval=rval_f, | |
mylabel="Flood sample", mkstyle='D', bin_num=bin_num_r) | |
ax1.legend(loc='upper right') | |
ax1.set_ylabel('Density') | |
ax1.set_xlabel('$r$-values') | |
ax1.set_title('Potential $r$-values from Bootstrap') | |
if sample_type == 'Drought' or sample_type == 'Both': | |
add_hist(data=pbs1, col_key='r', axobj=ax2, obsval=pval_d, mkstyle='o', bin_num=bin_num_p) | |
if sample_type == 'Flood' or sample_type == 'Both': | |
add_hist(data=pbs2, col_key='b', axobj=ax2, obsval=pval_f, mkstyle='D', bin_num=bin_num_p) | |
ax3 = ax2.twinx() | |
if sample_type == 'Drought' or sample_type == 'Both': | |
sns.kdeplot(pbs1, cumulative=True, color='r', ax=ax3, | |
lw=1, alpha=0.3, zorder=10) | |
if sample_type == 'Flood' or sample_type == 'Both': | |
sns.kdeplot(pbs2, cumulative=True, color='b', ax=ax3, | |
lw=1, alpha=0.3, zorder=11) | |
ax3.grid(False) | |
ax3.set_ylabel("Cumulative density") | |
ax2.set_xlabel(r'$p$-value') | |
ax2.set_title(r'Potential $p$-values from Bootstrap') | |
st.pyplot(fig) | |
sample_dropdown = st.sidebar.selectbox('Sample:', ['Both', 'Drought', 'Flood']) | |
bin_slider_r = st.sidebar.slider('r-value bins:', min_value=10, max_value=100, value=30, step=1) | |
bin_slider_p = st.sidebar.slider('p-value bins:', min_value=10, max_value=100, value=25, step=1) | |
update_plots(sample_dropdown, bin_slider_r, bin_slider_p) | |
if __name__ == "__main__": | |
main() |