from __future__ import print_function, division, generators import sys from past.builtins import xrange import streamlit as st import pandas as pd import numpy as np import matplotlib.pyplot as plt import seaborn as sns import altair as alt from scipy.stats import pearsonr, iqr st.set_page_config(page_title="Monsoon Data Analysis", layout="wide") # 读取 monsoon = pd.read_csv('https://raw.githubusercontent.com/Koi4595/SI-649/main/Monsoon_data.csv', parse_dates=['Date']) monsoon.index = monsoon.Date olou = pd.read_csv('https://raw.githubusercontent.com/Koi4595/SI-649/main/Olou_counts.csv',parse_dates=['Date']) olou.index = olou.Date olou['Date'] = pd.to_datetime(olou['Date']) drought_years = [1965, 1966, 1968, 1972, 1974, 1979, 1982, 1986, 1987, 2002, 2004, 2009] flood_years = [1964, 1970, 1971, 1973, 1975, 1978, 1983, 1988, 1990, 1994, 2007, 2008] # Visualization 1 Time series def plot_timeseries(event_type): fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(10, 5), sharex=True) if event_type == 'Drought': monsoon_selected_years = monsoon[monsoon['Date'].dt.year.isin(drought_years)] olou_selected_years = olou[olou['Date'].dt.year.isin(drought_years)] elif event_type == 'Flood': monsoon_selected_years = monsoon[monsoon['Date'].dt.year.isin(flood_years)] olou_selected_years = olou[olou['Date'].dt.year.isin(flood_years)] else: monsoon_selected_years = monsoon olou_selected_years = olou ax1.step(monsoon_selected_years['Date'], monsoon_selected_years['Precip'], where='mid', color='blue') ax1.set_title('Monthly Precipitation for Selected Years') ax1.set_ylabel('Precipitation (mm)') ax1.grid(True) ax2.plot(olou_selected_years['Date'], olou_selected_years['Counts']/1000, 'r.', ms=3.0) ax2.set_ylabel('Olou NM Counts for Selected Years (cnt./min. x 10^3)') ax2.set_xlabel('Date') ax2.grid(True) plt.tight_layout() st.pyplot(fig) # Visualization 2 JJAS condition def return_stderr(data): """Calculate uncertainty of a np array as Standard Error of the Mean""" return np.nanstd(data)/np.sqrt(np.count_nonzero(data) - 1) climo = {} # Produce a dic of monthly climatology using list comprehension climo['means'] = [np.mean(monsoon.Precip[monsoon.index.month == (mnth+1)]) for mnth in xrange(12)] climo['error'] = [return_stderr(monsoon.Precip[monsoon.index.month == (mnth+1)].values) for mnth in xrange(12)] delta = [] for date in monsoon.Precip.index: delta.append(monsoon.Precip[date] - climo['means'][date.month-1]) dseries = pd.Series(delta, index=monsoon.index) # Create a dictionary of June July August September data def lookup_index(yr): return ((monsoon.index.year == yr) & (monsoon.index.month >= 6) &(monsoon.index.month <= 9)) jjas = {} jjas['means']=[np.mean(dseries[lookup_index(yr)]) for yr in xrange(1964,2012,1)] jjas['SEM']=[return_stderr(dseries[lookup_index(yr)])for yr in xrange(1964,2012,1)] jjas['sum']=[np.sum(dseries[lookup_index(yr)]) for yr in xrange(1964,2012,1)] def plot_jjas_condition(): color_scale = alt.Scale( domain=['Drought', 'Flood', 'Normal'], range=['darkred', 'lightblue', 'orange'] ) jjas_df = pd.DataFrame({ 'Year': range(1964, 2012), 'Means': jjas['means'], 'SEM': jjas['SEM'], 'Sum': jjas['sum'] }) jjas_df['Condition'] = jjas_df['Year'].apply(lambda x: 'Drought' if x in drought_years else 'Flood' if x in flood_years else 'Normal') error_bars = alt.Chart(jjas_df).mark_errorbar(extent='ci').encode( x=alt.X('Year:O', axis=alt.Axis(values=list(range(1960, 2011, 10)))), y=alt.Y('Means:Q', scale=alt.Scale(zero=False)), yError='SEM:Q', color=alt.Color('Condition:N', scale=color_scale) ).properties( width=400, height=400, title='Mean JJAS precipitation anomaly' ) points = alt.Chart(jjas_df).mark_point(filled=True).encode( x='Year:O', y='Means:Q', color=alt.Color('Condition', legend=alt.Legend(title='Condition')) ) error_chart = (error_bars + points).interactive() histogram = alt.Chart(jjas_df).transform_density( density='Means', as_=['Means', 'Density'] ).mark_area().encode( x="Means:Q", y='Density:Q', tooltip=['Means:Q', 'Density:Q'] ).properties( width=400, height=400, title='Distribution of JJAS anomalies' ) chart = alt.hconcat(error_chart, histogram).resolve_legend(color='independent') st.altair_chart(chart, use_container_width=True) def return_stderr(data): return np.nanstd(data)/np.sqrt(np.count_nonzero(data) - 1) def make_cframe(c_years): c_group = np.zeros((12,12),dtype=float) for n, yr in enumerate(c_years): tmp = olou.index.year == yr for i in range(len(olou.Counts[tmp])): c_group[n,i] = olou.Counts[tmp][i] aaa = np.where(c_group == 0) c_group[aaa] = np.nan c_means = [] c_errors = [] for i in range(12): c_means.append(np.nanmean(c_group[:,i])) c_errors.append(return_stderr(c_group[:,i])) return c_group,c_means,c_errors def bootstrap_r(mean_list, error_list, iterations=1000): bs_rvals = [] bs_pvals = [] for itr in range(iterations): poss_vals = [] for n in range(5): poss_min = int((mean_list[4 + n] - error_list[4 + n]) * 100000) poss_max = int((mean_list[4 + n] + error_list[4 + n]) * 100000) poss_vals.append(np.random.randint(poss_min,poss_max)/100) rv, pv = pearsonr([0,1,2,3,4],poss_vals) bs_rvals.append(rv) bs_pvals.append(pv) bs_rvals = np.array(bs_rvals) bs_pvals = np.array(bs_pvals) return bs_rvals, bs_pvals def freedman_diaconis_bins(a): a = np.asarray(a) h = 2 * iqr(a) / (len(a) ** (1 / 3)) if h == 0: return int(np.sqrt(a.size)) else: return int(np.ceil((a.max() - a.min()) / h)) def add_hist(data, col_key, axobj, mkstyle='o', obsval=None, mylabel=None, bin_num=None): if not bin_num: bin_num = freedman_diaconis_bins(data) hist, bin_edges = np.histogram(data, bins=bin_num, density=False) norm_hist = hist / sum(hist) axobj.bar(bin_edges[0:-1], norm_hist, width = bin_edges[1] - bin_edges[0], color = col_key, edgecolor = col_key, alpha = 0.3, label=mylabel) mylabel = None if obsval: lookup = np.where(abs(obsval - bin_edges[0:-1]) == min(abs(obsval - bin_edges[0:-1]))) axobj.vlines(obsval,0,norm_hist[lookup], linestyles='dashed', lw=1.0, zorder=6, label=mylabel) axobj.plot(obsval, norm_hist[lookup], color='k', marker=mkstyle, ms=5., zorder=7, label=mylabel) if bin_num is not None: bin_num = int(bin_num) # Streamlit应用主体 def main(): # 应用标题 st.title('Monsoon Data Analysis') # 分析选项 analysis_type = st.sidebar.selectbox("Select Analysis", ("Time Series", "JJAS Condition", "Drought/Flood Sample")) if analysis_type == "Time Series": event_type = st.sidebar.selectbox("Event Type", ('All', 'Drought', 'Flood')) st.header("Time Series Analysis") plot_timeseries(event_type) elif analysis_type == "JJAS Condition": st.header("JJAS Condition Analysis") # 计算每年JJAS季节降水异常 climo = {} climo['means'] = [np.mean(monsoon.Precip[monsoon.index.month == (mnth+1)]) for mnth in range(12)] climo['error'] = [return_stderr(monsoon.Precip[monsoon.index.month == (mnth+1)].values) for mnth in range(12)] delta = [] for date in monsoon.Precip.index: delta.append(monsoon.Precip[date] - climo['means'][date.month-1]) dseries = pd.Series(delta, index=monsoon.index) def lookup_index(yr): return ((monsoon.index.year == yr) & (monsoon.index.month >= 6) &(monsoon.index.month <= 9)) jjas = {} jjas['means']=[np.mean(dseries[lookup_index(yr)]) for yr in range(1964,2012,1)] jjas['SEM']=[return_stderr(dseries[lookup_index(yr)])for yr in range(1964,2012,1)] jjas['sum']=[np.sum(dseries[lookup_index(yr)]) for yr in range(1964,2012,1)] plot_jjas_condition() elif analysis_type == "Drought/Flood Sample": st.header("Drought/Flood Sample Analysis") d_group,d_means,d_errors = make_cframe(drought_years) f_group,f_means,f_errors = make_cframe(flood_years) d_means = np.array(d_means) * 0.001 f_means = np.array(f_means) * 0.001 d_errors = np.array(d_errors) * 0.001 f_errors = np.array(f_errors) * 0.001 mrange = np.arange(0,12) xlabs =['Jan','Mar','May','Jul','Sep','Nov'] fig = plt.figure() fig.set_size_inches(7.48,3.54) ax1 = fig.add_subplot(121) ax2 = fig.add_subplot(122) def simBA15plot(ax, dataset, derr, col_key): lthick=1.0 ax.plot(mrange[0:5], dataset[0:5], 'k--',lw=lthick) ax.plot(mrange[4:9], dataset[4:9], 'k-',lw=lthick) ax.plot(mrange[8:], dataset[8:], 'k--',lw=lthick) ax.fill_between(mrange[0:5],(dataset[0:5] - derr[0:5]), (dataset[0:5] + derr[0:5]), color=col_key, linewidth=0.1,alpha=0.15) ax.fill_between(mrange[4:9], (dataset[4:9] - derr[4:9]), (dataset[4:9] + derr[4:9]), color=col_key, linewidth=0.1, alpha=0.3) ax.fill_between(mrange[8:],(dataset[8:] - derr[8:]), (dataset[8:] + derr[8:]), color=col_key, linewidth=0.1, alpha=0.15) ax.set_xticks(np.arange(len(xlabs))) ax.set_xticklabels(xlabs) ax.set_xlim(0,11) return simBA15plot(ax=ax1, dataset=d_means, derr=d_errors, col_key='r') simBA15plot(ax=ax2, dataset=f_means, derr=f_errors, col_key='b') ax1.set_ylabel(r"Neutron counts (cnt./min.$\times10^{3}$)", fontsize=11) ax1.set_title('Drought sample') ax2.set_title('Flood sample') st.pyplot(fig) rval_d,pval_d = pearsonr(range(5),d_means[4:9]) rval_f,pval_f = pearsonr(range(5),f_means[4:9]) st.write("A Pearson's r test, gives linear regressions and two-tailed p-values of:") st.write(f"Drought sample: r-value = {rval_d:4.3f}, p-value = {pval_d:4.3f}") st.write(f"Flood sample: r-value = {rval_f:4.3f}, p-value = {pval_f:4.3f}") rbs1, pbs1 = bootstrap_r(mean_list = d_means, error_list = d_errors) rbs2, pbs2 = bootstrap_r(mean_list = f_means, error_list = f_errors) def update_plots(sample_type, bin_num_r, bin_num_p): fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(10, 5)) if sample_type == 'Drought' or sample_type == 'Both': add_hist(data=rbs1, col_key='r', axobj=ax1, obsval=rval_d, mylabel="Drought sample", mkstyle='o', bin_num=bin_num_r) if sample_type == 'Flood' or sample_type == 'Both': add_hist(data=rbs2, col_key='b', axobj=ax1, obsval=rval_f, mylabel="Flood sample", mkstyle='D', bin_num=bin_num_r) ax1.legend(loc='upper right') ax1.set_ylabel('Density') ax1.set_xlabel('$r$-values') ax1.set_title('Potential $r$-values from Bootstrap') if sample_type == 'Drought' or sample_type == 'Both': add_hist(data=pbs1, col_key='r', axobj=ax2, obsval=pval_d, mkstyle='o', bin_num=bin_num_p) if sample_type == 'Flood' or sample_type == 'Both': add_hist(data=pbs2, col_key='b', axobj=ax2, obsval=pval_f, mkstyle='D', bin_num=bin_num_p) ax3 = ax2.twinx() if sample_type == 'Drought' or sample_type == 'Both': sns.kdeplot(pbs1, cumulative=True, color='r', ax=ax3, lw=1, alpha=0.3, zorder=10) if sample_type == 'Flood' or sample_type == 'Both': sns.kdeplot(pbs2, cumulative=True, color='b', ax=ax3, lw=1, alpha=0.3, zorder=11) ax3.grid(False) ax3.set_ylabel("Cumulative density") ax2.set_xlabel(r'$p$-value') ax2.set_title(r'Potential $p$-values from Bootstrap') st.pyplot(fig) sample_dropdown = st.sidebar.selectbox('Sample:', ['Both', 'Drought', 'Flood']) bin_slider_r = st.sidebar.slider('r-value bins:', min_value=10, max_value=100, value=30, step=1) bin_slider_p = st.sidebar.slider('p-value bins:', min_value=10, max_value=100, value=25, step=1) update_plots(sample_dropdown, bin_slider_r, bin_slider_p) if __name__ == "__main__": main()