Koi4595's picture
Update app.py
fd75b09 verified
from __future__ import print_function, division, generators
import sys
from past.builtins import xrange
import streamlit as st
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import altair as alt
from scipy.stats import pearsonr, iqr
st.set_page_config(page_title="Monsoon Data Analysis", layout="wide")
# 读取
monsoon = pd.read_csv('https://raw.githubusercontent.com/Koi4595/SI-649/main/Monsoon_data.csv', parse_dates=['Date'])
monsoon.index = monsoon.Date
olou = pd.read_csv('https://raw.githubusercontent.com/Koi4595/SI-649/main/Olou_counts.csv',parse_dates=['Date'])
olou.index = olou.Date
olou['Date'] = pd.to_datetime(olou['Date'])
drought_years = [1965, 1966, 1968, 1972, 1974, 1979, 1982, 1986, 1987, 2002, 2004, 2009]
flood_years = [1964, 1970, 1971, 1973, 1975, 1978, 1983, 1988, 1990, 1994, 2007, 2008]
# Visualization 1 Time series
def plot_timeseries(event_type):
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(10, 5), sharex=True)
if event_type == 'Drought':
monsoon_selected_years = monsoon[monsoon['Date'].dt.year.isin(drought_years)]
olou_selected_years = olou[olou['Date'].dt.year.isin(drought_years)]
elif event_type == 'Flood':
monsoon_selected_years = monsoon[monsoon['Date'].dt.year.isin(flood_years)]
olou_selected_years = olou[olou['Date'].dt.year.isin(flood_years)]
else:
monsoon_selected_years = monsoon
olou_selected_years = olou
ax1.step(monsoon_selected_years['Date'], monsoon_selected_years['Precip'], where='mid', color='blue')
ax1.set_title('Monthly Precipitation for Selected Years')
ax1.set_ylabel('Precipitation (mm)')
ax1.grid(True)
ax2.plot(olou_selected_years['Date'], olou_selected_years['Counts']/1000, 'r.', ms=3.0)
ax2.set_ylabel('Olou NM Counts for Selected Years (cnt./min. x 10^3)')
ax2.set_xlabel('Date')
ax2.grid(True)
plt.tight_layout()
st.pyplot(fig)
# Visualization 2 JJAS condition
def return_stderr(data):
"""Calculate uncertainty of a np array as Standard Error of the Mean"""
return np.nanstd(data)/np.sqrt(np.count_nonzero(data) - 1)
climo = {} # Produce a dic of monthly climatology using list comprehension
climo['means'] = [np.mean(monsoon.Precip[monsoon.index.month == (mnth+1)])
for mnth in xrange(12)]
climo['error'] = [return_stderr(monsoon.Precip[monsoon.index.month == (mnth+1)].values)
for mnth in xrange(12)]
delta = []
for date in monsoon.Precip.index:
delta.append(monsoon.Precip[date] - climo['means'][date.month-1])
dseries = pd.Series(delta, index=monsoon.index)
# Create a dictionary of June July August September data
def lookup_index(yr):
return ((monsoon.index.year == yr) & (monsoon.index.month >= 6)
&(monsoon.index.month <= 9))
jjas = {}
jjas['means']=[np.mean(dseries[lookup_index(yr)]) for yr in xrange(1964,2012,1)]
jjas['SEM']=[return_stderr(dseries[lookup_index(yr)])for yr in xrange(1964,2012,1)]
jjas['sum']=[np.sum(dseries[lookup_index(yr)]) for yr in xrange(1964,2012,1)]
def plot_jjas_condition():
color_scale = alt.Scale(
domain=['Drought', 'Flood', 'Normal'],
range=['darkred', 'lightblue', 'orange']
)
jjas_df = pd.DataFrame({
'Year': range(1964, 2012),
'Means': jjas['means'],
'SEM': jjas['SEM'],
'Sum': jjas['sum']
})
jjas_df['Condition'] = jjas_df['Year'].apply(lambda x: 'Drought' if x in drought_years else 'Flood' if x in flood_years else 'Normal')
error_bars = alt.Chart(jjas_df).mark_errorbar(extent='ci').encode(
x=alt.X('Year:O', axis=alt.Axis(values=list(range(1960, 2011, 10)))),
y=alt.Y('Means:Q', scale=alt.Scale(zero=False)),
yError='SEM:Q',
color=alt.Color('Condition:N', scale=color_scale)
).properties(
width=400,
height=400,
title='Mean JJAS precipitation anomaly'
)
points = alt.Chart(jjas_df).mark_point(filled=True).encode(
x='Year:O',
y='Means:Q',
color=alt.Color('Condition', legend=alt.Legend(title='Condition'))
)
error_chart = (error_bars + points).interactive()
histogram = alt.Chart(jjas_df).transform_density(
density='Means',
as_=['Means', 'Density']
).mark_area().encode(
x="Means:Q",
y='Density:Q',
tooltip=['Means:Q', 'Density:Q']
).properties(
width=400,
height=400,
title='Distribution of JJAS anomalies'
)
chart = alt.hconcat(error_chart, histogram).resolve_legend(color='independent')
st.altair_chart(chart, use_container_width=True)
def return_stderr(data):
return np.nanstd(data)/np.sqrt(np.count_nonzero(data) - 1)
def make_cframe(c_years):
c_group = np.zeros((12,12),dtype=float)
for n, yr in enumerate(c_years):
tmp = olou.index.year == yr
for i in range(len(olou.Counts[tmp])):
c_group[n,i] = olou.Counts[tmp][i]
aaa = np.where(c_group == 0)
c_group[aaa] = np.nan
c_means = []
c_errors = []
for i in range(12):
c_means.append(np.nanmean(c_group[:,i]))
c_errors.append(return_stderr(c_group[:,i]))
return c_group,c_means,c_errors
def bootstrap_r(mean_list, error_list, iterations=1000):
bs_rvals = []
bs_pvals = []
for itr in range(iterations):
poss_vals = []
for n in range(5):
poss_min = int((mean_list[4 + n] - error_list[4 + n]) * 100000)
poss_max = int((mean_list[4 + n] + error_list[4 + n]) * 100000)
poss_vals.append(np.random.randint(poss_min,poss_max)/100)
rv, pv = pearsonr([0,1,2,3,4],poss_vals)
bs_rvals.append(rv)
bs_pvals.append(pv)
bs_rvals = np.array(bs_rvals)
bs_pvals = np.array(bs_pvals)
return bs_rvals, bs_pvals
def freedman_diaconis_bins(a):
a = np.asarray(a)
h = 2 * iqr(a) / (len(a) ** (1 / 3))
if h == 0:
return int(np.sqrt(a.size))
else:
return int(np.ceil((a.max() - a.min()) / h))
def add_hist(data, col_key, axobj, mkstyle='o', obsval=None, mylabel=None, bin_num=None):
if not bin_num:
bin_num = freedman_diaconis_bins(data)
hist, bin_edges = np.histogram(data, bins=bin_num, density=False)
norm_hist = hist / sum(hist)
axobj.bar(bin_edges[0:-1], norm_hist, width = bin_edges[1] - bin_edges[0],
color = col_key, edgecolor = col_key, alpha = 0.3, label=mylabel)
mylabel = None
if obsval:
lookup = np.where(abs(obsval - bin_edges[0:-1]) == min(abs(obsval - bin_edges[0:-1])))
axobj.vlines(obsval,0,norm_hist[lookup], linestyles='dashed',
lw=1.0, zorder=6, label=mylabel)
axobj.plot(obsval, norm_hist[lookup], color='k', marker=mkstyle,
ms=5., zorder=7, label=mylabel)
if bin_num is not None:
bin_num = int(bin_num)
# Streamlit应用主体
def main():
# 应用标题
st.title('Monsoon Data Analysis')
# 分析选项
analysis_type = st.sidebar.selectbox("Select Analysis",
("Time Series", "JJAS Condition", "Drought/Flood Sample"))
if analysis_type == "Time Series":
event_type = st.sidebar.selectbox("Event Type", ('All', 'Drought', 'Flood'))
st.header("Time Series Analysis")
plot_timeseries(event_type)
elif analysis_type == "JJAS Condition":
st.header("JJAS Condition Analysis")
# 计算每年JJAS季节降水异常
climo = {}
climo['means'] = [np.mean(monsoon.Precip[monsoon.index.month == (mnth+1)]) for mnth in range(12)]
climo['error'] = [return_stderr(monsoon.Precip[monsoon.index.month == (mnth+1)].values) for mnth in range(12)]
delta = []
for date in monsoon.Precip.index:
delta.append(monsoon.Precip[date] - climo['means'][date.month-1])
dseries = pd.Series(delta, index=monsoon.index)
def lookup_index(yr):
return ((monsoon.index.year == yr) & (monsoon.index.month >= 6) &(monsoon.index.month <= 9))
jjas = {}
jjas['means']=[np.mean(dseries[lookup_index(yr)]) for yr in range(1964,2012,1)]
jjas['SEM']=[return_stderr(dseries[lookup_index(yr)])for yr in range(1964,2012,1)]
jjas['sum']=[np.sum(dseries[lookup_index(yr)]) for yr in range(1964,2012,1)]
plot_jjas_condition()
elif analysis_type == "Drought/Flood Sample":
st.header("Drought/Flood Sample Analysis")
d_group,d_means,d_errors = make_cframe(drought_years)
f_group,f_means,f_errors = make_cframe(flood_years)
d_means = np.array(d_means) * 0.001
f_means = np.array(f_means) * 0.001
d_errors = np.array(d_errors) * 0.001
f_errors = np.array(f_errors) * 0.001
mrange = np.arange(0,12)
xlabs =['Jan','Mar','May','Jul','Sep','Nov']
fig = plt.figure()
fig.set_size_inches(7.48,3.54)
ax1 = fig.add_subplot(121)
ax2 = fig.add_subplot(122)
def simBA15plot(ax, dataset, derr, col_key):
lthick=1.0
ax.plot(mrange[0:5], dataset[0:5], 'k--',lw=lthick)
ax.plot(mrange[4:9], dataset[4:9], 'k-',lw=lthick)
ax.plot(mrange[8:], dataset[8:], 'k--',lw=lthick)
ax.fill_between(mrange[0:5],(dataset[0:5] - derr[0:5]),
(dataset[0:5] + derr[0:5]), color=col_key, linewidth=0.1,alpha=0.15)
ax.fill_between(mrange[4:9], (dataset[4:9] - derr[4:9]), (dataset[4:9] + derr[4:9]),
color=col_key, linewidth=0.1, alpha=0.3)
ax.fill_between(mrange[8:],(dataset[8:] - derr[8:]), (dataset[8:] + derr[8:]),
color=col_key, linewidth=0.1, alpha=0.15)
ax.set_xticks(np.arange(len(xlabs)))
ax.set_xticklabels(xlabs)
ax.set_xlim(0,11)
return
simBA15plot(ax=ax1, dataset=d_means, derr=d_errors, col_key='r')
simBA15plot(ax=ax2, dataset=f_means, derr=f_errors, col_key='b')
ax1.set_ylabel(r"Neutron counts (cnt./min.$\times10^{3}$)", fontsize=11)
ax1.set_title('Drought sample')
ax2.set_title('Flood sample')
st.pyplot(fig)
rval_d,pval_d = pearsonr(range(5),d_means[4:9])
rval_f,pval_f = pearsonr(range(5),f_means[4:9])
st.write("A Pearson's r test, gives linear regressions and two-tailed p-values of:")
st.write(f"Drought sample: r-value = {rval_d:4.3f}, p-value = {pval_d:4.3f}")
st.write(f"Flood sample: r-value = {rval_f:4.3f}, p-value = {pval_f:4.3f}")
rbs1, pbs1 = bootstrap_r(mean_list = d_means, error_list = d_errors)
rbs2, pbs2 = bootstrap_r(mean_list = f_means, error_list = f_errors)
def update_plots(sample_type, bin_num_r, bin_num_p):
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(10, 5))
if sample_type == 'Drought' or sample_type == 'Both':
add_hist(data=rbs1, col_key='r', axobj=ax1, obsval=rval_d,
mylabel="Drought sample", mkstyle='o', bin_num=bin_num_r)
if sample_type == 'Flood' or sample_type == 'Both':
add_hist(data=rbs2, col_key='b', axobj=ax1, obsval=rval_f,
mylabel="Flood sample", mkstyle='D', bin_num=bin_num_r)
ax1.legend(loc='upper right')
ax1.set_ylabel('Density')
ax1.set_xlabel('$r$-values')
ax1.set_title('Potential $r$-values from Bootstrap')
if sample_type == 'Drought' or sample_type == 'Both':
add_hist(data=pbs1, col_key='r', axobj=ax2, obsval=pval_d, mkstyle='o', bin_num=bin_num_p)
if sample_type == 'Flood' or sample_type == 'Both':
add_hist(data=pbs2, col_key='b', axobj=ax2, obsval=pval_f, mkstyle='D', bin_num=bin_num_p)
ax3 = ax2.twinx()
if sample_type == 'Drought' or sample_type == 'Both':
sns.kdeplot(pbs1, cumulative=True, color='r', ax=ax3,
lw=1, alpha=0.3, zorder=10)
if sample_type == 'Flood' or sample_type == 'Both':
sns.kdeplot(pbs2, cumulative=True, color='b', ax=ax3,
lw=1, alpha=0.3, zorder=11)
ax3.grid(False)
ax3.set_ylabel("Cumulative density")
ax2.set_xlabel(r'$p$-value')
ax2.set_title(r'Potential $p$-values from Bootstrap')
st.pyplot(fig)
sample_dropdown = st.sidebar.selectbox('Sample:', ['Both', 'Drought', 'Flood'])
bin_slider_r = st.sidebar.slider('r-value bins:', min_value=10, max_value=100, value=30, step=1)
bin_slider_p = st.sidebar.slider('p-value bins:', min_value=10, max_value=100, value=25, step=1)
update_plots(sample_dropdown, bin_slider_r, bin_slider_p)
if __name__ == "__main__":
main()