Spaces:
Runtime error
Runtime error
| from __future__ import print_function, division, generators | |
| import sys | |
| from past.builtins import xrange | |
| import streamlit as st | |
| import pandas as pd | |
| import numpy as np | |
| import matplotlib.pyplot as plt | |
| import seaborn as sns | |
| import altair as alt | |
| from scipy.stats import pearsonr, iqr | |
| st.set_page_config(page_title="Monsoon Data Analysis", layout="wide") | |
| # 读取 | |
| monsoon = pd.read_csv('https://raw.githubusercontent.com/Koi4595/SI-649/main/Monsoon_data.csv', parse_dates=['Date']) | |
| monsoon.index = monsoon.Date | |
| olou = pd.read_csv('https://raw.githubusercontent.com/Koi4595/SI-649/main/Olou_counts.csv',parse_dates=['Date']) | |
| olou.index = olou.Date | |
| olou['Date'] = pd.to_datetime(olou['Date']) | |
| drought_years = [1965, 1966, 1968, 1972, 1974, 1979, 1982, 1986, 1987, 2002, 2004, 2009] | |
| flood_years = [1964, 1970, 1971, 1973, 1975, 1978, 1983, 1988, 1990, 1994, 2007, 2008] | |
| # Visualization 1 Time series | |
| def plot_timeseries(event_type): | |
| fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(10, 5), sharex=True) | |
| if event_type == 'Drought': | |
| monsoon_selected_years = monsoon[monsoon['Date'].dt.year.isin(drought_years)] | |
| olou_selected_years = olou[olou['Date'].dt.year.isin(drought_years)] | |
| elif event_type == 'Flood': | |
| monsoon_selected_years = monsoon[monsoon['Date'].dt.year.isin(flood_years)] | |
| olou_selected_years = olou[olou['Date'].dt.year.isin(flood_years)] | |
| else: | |
| monsoon_selected_years = monsoon | |
| olou_selected_years = olou | |
| ax1.step(monsoon_selected_years['Date'], monsoon_selected_years['Precip'], where='mid', color='blue') | |
| ax1.set_title('Monthly Precipitation for Selected Years') | |
| ax1.set_ylabel('Precipitation (mm)') | |
| ax1.grid(True) | |
| ax2.plot(olou_selected_years['Date'], olou_selected_years['Counts']/1000, 'r.', ms=3.0) | |
| ax2.set_ylabel('Olou NM Counts for Selected Years (cnt./min. x 10^3)') | |
| ax2.set_xlabel('Date') | |
| ax2.grid(True) | |
| plt.tight_layout() | |
| st.pyplot(fig) | |
| # Visualization 2 JJAS condition | |
| def return_stderr(data): | |
| """Calculate uncertainty of a np array as Standard Error of the Mean""" | |
| return np.nanstd(data)/np.sqrt(np.count_nonzero(data) - 1) | |
| climo = {} # Produce a dic of monthly climatology using list comprehension | |
| climo['means'] = [np.mean(monsoon.Precip[monsoon.index.month == (mnth+1)]) | |
| for mnth in xrange(12)] | |
| climo['error'] = [return_stderr(monsoon.Precip[monsoon.index.month == (mnth+1)].values) | |
| for mnth in xrange(12)] | |
| delta = [] | |
| for date in monsoon.Precip.index: | |
| delta.append(monsoon.Precip[date] - climo['means'][date.month-1]) | |
| dseries = pd.Series(delta, index=monsoon.index) | |
| # Create a dictionary of June July August September data | |
| def lookup_index(yr): | |
| return ((monsoon.index.year == yr) & (monsoon.index.month >= 6) | |
| &(monsoon.index.month <= 9)) | |
| jjas = {} | |
| jjas['means']=[np.mean(dseries[lookup_index(yr)]) for yr in xrange(1964,2012,1)] | |
| jjas['SEM']=[return_stderr(dseries[lookup_index(yr)])for yr in xrange(1964,2012,1)] | |
| jjas['sum']=[np.sum(dseries[lookup_index(yr)]) for yr in xrange(1964,2012,1)] | |
| def plot_jjas_condition(): | |
| color_scale = alt.Scale( | |
| domain=['Drought', 'Flood', 'Normal'], | |
| range=['darkred', 'lightblue', 'orange'] | |
| ) | |
| jjas_df = pd.DataFrame({ | |
| 'Year': range(1964, 2012), | |
| 'Means': jjas['means'], | |
| 'SEM': jjas['SEM'], | |
| 'Sum': jjas['sum'] | |
| }) | |
| jjas_df['Condition'] = jjas_df['Year'].apply(lambda x: 'Drought' if x in drought_years else 'Flood' if x in flood_years else 'Normal') | |
| error_bars = alt.Chart(jjas_df).mark_errorbar(extent='ci').encode( | |
| x=alt.X('Year:O', axis=alt.Axis(values=list(range(1960, 2011, 10)))), | |
| y=alt.Y('Means:Q', scale=alt.Scale(zero=False)), | |
| yError='SEM:Q', | |
| color=alt.Color('Condition:N', scale=color_scale) | |
| ).properties( | |
| width=400, | |
| height=400, | |
| title='Mean JJAS precipitation anomaly' | |
| ) | |
| points = alt.Chart(jjas_df).mark_point(filled=True).encode( | |
| x='Year:O', | |
| y='Means:Q', | |
| color=alt.Color('Condition', legend=alt.Legend(title='Condition')) | |
| ) | |
| error_chart = (error_bars + points).interactive() | |
| histogram = alt.Chart(jjas_df).transform_density( | |
| density='Means', | |
| as_=['Means', 'Density'] | |
| ).mark_area().encode( | |
| x="Means:Q", | |
| y='Density:Q', | |
| tooltip=['Means:Q', 'Density:Q'] | |
| ).properties( | |
| width=400, | |
| height=400, | |
| title='Distribution of JJAS anomalies' | |
| ) | |
| chart = alt.hconcat(error_chart, histogram).resolve_legend(color='independent') | |
| st.altair_chart(chart, use_container_width=True) | |
| def return_stderr(data): | |
| return np.nanstd(data)/np.sqrt(np.count_nonzero(data) - 1) | |
| def make_cframe(c_years): | |
| c_group = np.zeros((12,12),dtype=float) | |
| for n, yr in enumerate(c_years): | |
| tmp = olou.index.year == yr | |
| for i in range(len(olou.Counts[tmp])): | |
| c_group[n,i] = olou.Counts[tmp][i] | |
| aaa = np.where(c_group == 0) | |
| c_group[aaa] = np.nan | |
| c_means = [] | |
| c_errors = [] | |
| for i in range(12): | |
| c_means.append(np.nanmean(c_group[:,i])) | |
| c_errors.append(return_stderr(c_group[:,i])) | |
| return c_group,c_means,c_errors | |
| def bootstrap_r(mean_list, error_list, iterations=1000): | |
| bs_rvals = [] | |
| bs_pvals = [] | |
| for itr in range(iterations): | |
| poss_vals = [] | |
| for n in range(5): | |
| poss_min = int((mean_list[4 + n] - error_list[4 + n]) * 100000) | |
| poss_max = int((mean_list[4 + n] + error_list[4 + n]) * 100000) | |
| poss_vals.append(np.random.randint(poss_min,poss_max)/100) | |
| rv, pv = pearsonr([0,1,2,3,4],poss_vals) | |
| bs_rvals.append(rv) | |
| bs_pvals.append(pv) | |
| bs_rvals = np.array(bs_rvals) | |
| bs_pvals = np.array(bs_pvals) | |
| return bs_rvals, bs_pvals | |
| def freedman_diaconis_bins(a): | |
| a = np.asarray(a) | |
| h = 2 * iqr(a) / (len(a) ** (1 / 3)) | |
| if h == 0: | |
| return int(np.sqrt(a.size)) | |
| else: | |
| return int(np.ceil((a.max() - a.min()) / h)) | |
| def add_hist(data, col_key, axobj, mkstyle='o', obsval=None, mylabel=None, bin_num=None): | |
| if not bin_num: | |
| bin_num = freedman_diaconis_bins(data) | |
| hist, bin_edges = np.histogram(data, bins=bin_num, density=False) | |
| norm_hist = hist / sum(hist) | |
| axobj.bar(bin_edges[0:-1], norm_hist, width = bin_edges[1] - bin_edges[0], | |
| color = col_key, edgecolor = col_key, alpha = 0.3, label=mylabel) | |
| mylabel = None | |
| if obsval: | |
| lookup = np.where(abs(obsval - bin_edges[0:-1]) == min(abs(obsval - bin_edges[0:-1]))) | |
| axobj.vlines(obsval,0,norm_hist[lookup], linestyles='dashed', | |
| lw=1.0, zorder=6, label=mylabel) | |
| axobj.plot(obsval, norm_hist[lookup], color='k', marker=mkstyle, | |
| ms=5., zorder=7, label=mylabel) | |
| if bin_num is not None: | |
| bin_num = int(bin_num) | |
| # Streamlit应用主体 | |
| def main(): | |
| # 应用标题 | |
| st.title('Monsoon Data Analysis') | |
| # 分析选项 | |
| analysis_type = st.sidebar.selectbox("Select Analysis", | |
| ("Time Series", "JJAS Condition", "Drought/Flood Sample")) | |
| if analysis_type == "Time Series": | |
| event_type = st.sidebar.selectbox("Event Type", ('All', 'Drought', 'Flood')) | |
| st.header("Time Series Analysis") | |
| plot_timeseries(event_type) | |
| elif analysis_type == "JJAS Condition": | |
| st.header("JJAS Condition Analysis") | |
| # 计算每年JJAS季节降水异常 | |
| climo = {} | |
| climo['means'] = [np.mean(monsoon.Precip[monsoon.index.month == (mnth+1)]) for mnth in range(12)] | |
| climo['error'] = [return_stderr(monsoon.Precip[monsoon.index.month == (mnth+1)].values) for mnth in range(12)] | |
| delta = [] | |
| for date in monsoon.Precip.index: | |
| delta.append(monsoon.Precip[date] - climo['means'][date.month-1]) | |
| dseries = pd.Series(delta, index=monsoon.index) | |
| def lookup_index(yr): | |
| return ((monsoon.index.year == yr) & (monsoon.index.month >= 6) &(monsoon.index.month <= 9)) | |
| jjas = {} | |
| jjas['means']=[np.mean(dseries[lookup_index(yr)]) for yr in range(1964,2012,1)] | |
| jjas['SEM']=[return_stderr(dseries[lookup_index(yr)])for yr in range(1964,2012,1)] | |
| jjas['sum']=[np.sum(dseries[lookup_index(yr)]) for yr in range(1964,2012,1)] | |
| plot_jjas_condition() | |
| elif analysis_type == "Drought/Flood Sample": | |
| st.header("Drought/Flood Sample Analysis") | |
| d_group,d_means,d_errors = make_cframe(drought_years) | |
| f_group,f_means,f_errors = make_cframe(flood_years) | |
| d_means = np.array(d_means) * 0.001 | |
| f_means = np.array(f_means) * 0.001 | |
| d_errors = np.array(d_errors) * 0.001 | |
| f_errors = np.array(f_errors) * 0.001 | |
| mrange = np.arange(0,12) | |
| xlabs =['Jan','Mar','May','Jul','Sep','Nov'] | |
| fig = plt.figure() | |
| fig.set_size_inches(7.48,3.54) | |
| ax1 = fig.add_subplot(121) | |
| ax2 = fig.add_subplot(122) | |
| def simBA15plot(ax, dataset, derr, col_key): | |
| lthick=1.0 | |
| ax.plot(mrange[0:5], dataset[0:5], 'k--',lw=lthick) | |
| ax.plot(mrange[4:9], dataset[4:9], 'k-',lw=lthick) | |
| ax.plot(mrange[8:], dataset[8:], 'k--',lw=lthick) | |
| ax.fill_between(mrange[0:5],(dataset[0:5] - derr[0:5]), | |
| (dataset[0:5] + derr[0:5]), color=col_key, linewidth=0.1,alpha=0.15) | |
| ax.fill_between(mrange[4:9], (dataset[4:9] - derr[4:9]), (dataset[4:9] + derr[4:9]), | |
| color=col_key, linewidth=0.1, alpha=0.3) | |
| ax.fill_between(mrange[8:],(dataset[8:] - derr[8:]), (dataset[8:] + derr[8:]), | |
| color=col_key, linewidth=0.1, alpha=0.15) | |
| ax.set_xticks(np.arange(len(xlabs))) | |
| ax.set_xticklabels(xlabs) | |
| ax.set_xlim(0,11) | |
| return | |
| simBA15plot(ax=ax1, dataset=d_means, derr=d_errors, col_key='r') | |
| simBA15plot(ax=ax2, dataset=f_means, derr=f_errors, col_key='b') | |
| ax1.set_ylabel(r"Neutron counts (cnt./min.$\times10^{3}$)", fontsize=11) | |
| ax1.set_title('Drought sample') | |
| ax2.set_title('Flood sample') | |
| st.pyplot(fig) | |
| rval_d,pval_d = pearsonr(range(5),d_means[4:9]) | |
| rval_f,pval_f = pearsonr(range(5),f_means[4:9]) | |
| st.write("A Pearson's r test, gives linear regressions and two-tailed p-values of:") | |
| st.write(f"Drought sample: r-value = {rval_d:4.3f}, p-value = {pval_d:4.3f}") | |
| st.write(f"Flood sample: r-value = {rval_f:4.3f}, p-value = {pval_f:4.3f}") | |
| rbs1, pbs1 = bootstrap_r(mean_list = d_means, error_list = d_errors) | |
| rbs2, pbs2 = bootstrap_r(mean_list = f_means, error_list = f_errors) | |
| def update_plots(sample_type, bin_num_r, bin_num_p): | |
| fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(10, 5)) | |
| if sample_type == 'Drought' or sample_type == 'Both': | |
| add_hist(data=rbs1, col_key='r', axobj=ax1, obsval=rval_d, | |
| mylabel="Drought sample", mkstyle='o', bin_num=bin_num_r) | |
| if sample_type == 'Flood' or sample_type == 'Both': | |
| add_hist(data=rbs2, col_key='b', axobj=ax1, obsval=rval_f, | |
| mylabel="Flood sample", mkstyle='D', bin_num=bin_num_r) | |
| ax1.legend(loc='upper right') | |
| ax1.set_ylabel('Density') | |
| ax1.set_xlabel('$r$-values') | |
| ax1.set_title('Potential $r$-values from Bootstrap') | |
| if sample_type == 'Drought' or sample_type == 'Both': | |
| add_hist(data=pbs1, col_key='r', axobj=ax2, obsval=pval_d, mkstyle='o', bin_num=bin_num_p) | |
| if sample_type == 'Flood' or sample_type == 'Both': | |
| add_hist(data=pbs2, col_key='b', axobj=ax2, obsval=pval_f, mkstyle='D', bin_num=bin_num_p) | |
| ax3 = ax2.twinx() | |
| if sample_type == 'Drought' or sample_type == 'Both': | |
| sns.kdeplot(pbs1, cumulative=True, color='r', ax=ax3, | |
| lw=1, alpha=0.3, zorder=10) | |
| if sample_type == 'Flood' or sample_type == 'Both': | |
| sns.kdeplot(pbs2, cumulative=True, color='b', ax=ax3, | |
| lw=1, alpha=0.3, zorder=11) | |
| ax3.grid(False) | |
| ax3.set_ylabel("Cumulative density") | |
| ax2.set_xlabel(r'$p$-value') | |
| ax2.set_title(r'Potential $p$-values from Bootstrap') | |
| st.pyplot(fig) | |
| sample_dropdown = st.sidebar.selectbox('Sample:', ['Both', 'Drought', 'Flood']) | |
| bin_slider_r = st.sidebar.slider('r-value bins:', min_value=10, max_value=100, value=30, step=1) | |
| bin_slider_p = st.sidebar.slider('p-value bins:', min_value=10, max_value=100, value=25, step=1) | |
| update_plots(sample_dropdown, bin_slider_r, bin_slider_p) | |
| if __name__ == "__main__": | |
| main() |