Spaces:

Koi4595
/

SI649_project_ningzr

Runtime error

App Files Files Community

SI649_project_ningzr / app.py

Koi4595

Update app.py

fd75b09 verified about 1 year ago

raw

history blame contribute delete

13 kB

	from __future__ import print_function, division, generators
	import sys
	from past.builtins import xrange
	import streamlit as st
	import pandas as pd
	import numpy as np
	import matplotlib.pyplot as plt
	import seaborn as sns
	import altair as alt
	from scipy.stats import pearsonr, iqr


	st.set_page_config(page_title="Monsoon Data Analysis", layout="wide")

	# 读取
	monsoon = pd.read_csv('https://raw.githubusercontent.com/Koi4595/SI-649/main/Monsoon_data.csv', parse_dates=['Date'])
	monsoon.index = monsoon.Date

	olou = pd.read_csv('https://raw.githubusercontent.com/Koi4595/SI-649/main/Olou_counts.csv',parse_dates=['Date'])
	olou.index = olou.Date
	olou['Date'] = pd.to_datetime(olou['Date'])

	drought_years = [1965, 1966, 1968, 1972, 1974, 1979, 1982, 1986, 1987, 2002, 2004, 2009]
	flood_years = [1964, 1970, 1971, 1973, 1975, 1978, 1983, 1988, 1990, 1994, 2007, 2008]

	# Visualization 1 Time series
	def plot_timeseries(event_type):
	fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(10, 5), sharex=True)

	if event_type == 'Drought':
	monsoon_selected_years = monsoon[monsoon['Date'].dt.year.isin(drought_years)]
	olou_selected_years = olou[olou['Date'].dt.year.isin(drought_years)]
	elif event_type == 'Flood':
	monsoon_selected_years = monsoon[monsoon['Date'].dt.year.isin(flood_years)]
	olou_selected_years = olou[olou['Date'].dt.year.isin(flood_years)]
	else:
	monsoon_selected_years = monsoon
	olou_selected_years = olou

	ax1.step(monsoon_selected_years['Date'], monsoon_selected_years['Precip'], where='mid', color='blue')
	ax1.set_title('Monthly Precipitation for Selected Years')
	ax1.set_ylabel('Precipitation (mm)')
	ax1.grid(True)

	ax2.plot(olou_selected_years['Date'], olou_selected_years['Counts']/1000, 'r.', ms=3.0)
	ax2.set_ylabel('Olou NM Counts for Selected Years (cnt./min. x 10^3)')
	ax2.set_xlabel('Date')
	ax2.grid(True)

	plt.tight_layout()
	st.pyplot(fig)

	# Visualization 2 JJAS condition

	def return_stderr(data):
	"""Calculate uncertainty of a np array as Standard Error of the Mean"""
	return np.nanstd(data)/np.sqrt(np.count_nonzero(data) - 1)

	climo = {} # Produce a dic of monthly climatology using list comprehension
	climo['means'] = [np.mean(monsoon.Precip[monsoon.index.month == (mnth+1)])
	for mnth in xrange(12)]
	climo['error'] = [return_stderr(monsoon.Precip[monsoon.index.month == (mnth+1)].values)
	for mnth in xrange(12)]
	delta = []
	for date in monsoon.Precip.index:
	delta.append(monsoon.Precip[date] - climo['means'][date.month-1])
	dseries = pd.Series(delta, index=monsoon.index)
	# Create a dictionary of June July August September data
	def lookup_index(yr):
	return ((monsoon.index.year == yr) & (monsoon.index.month >= 6)
	&(monsoon.index.month <= 9))
	jjas = {}
	jjas['means']=[np.mean(dseries[lookup_index(yr)]) for yr in xrange(1964,2012,1)]
	jjas['SEM']=[return_stderr(dseries[lookup_index(yr)])for yr in xrange(1964,2012,1)]
	jjas['sum']=[np.sum(dseries[lookup_index(yr)]) for yr in xrange(1964,2012,1)]
	def plot_jjas_condition():
	color_scale = alt.Scale(
	domain=['Drought', 'Flood', 'Normal'],
	range=['darkred', 'lightblue', 'orange']
	)

	jjas_df = pd.DataFrame({
	'Year': range(1964, 2012),
	'Means': jjas['means'],
	'SEM': jjas['SEM'],
	'Sum': jjas['sum']
	})
	jjas_df['Condition'] = jjas_df['Year'].apply(lambda x: 'Drought' if x in drought_years else 'Flood' if x in flood_years else 'Normal')

	error_bars = alt.Chart(jjas_df).mark_errorbar(extent='ci').encode(
	x=alt.X('Year:O', axis=alt.Axis(values=list(range(1960, 2011, 10)))),
	y=alt.Y('Means:Q', scale=alt.Scale(zero=False)),
	yError='SEM:Q',
	color=alt.Color('Condition:N', scale=color_scale)
	).properties(
	width=400,
	height=400,
	title='Mean JJAS precipitation anomaly'
	)

	points = alt.Chart(jjas_df).mark_point(filled=True).encode(
	x='Year:O',
	y='Means:Q',
	color=alt.Color('Condition', legend=alt.Legend(title='Condition'))
	)

	error_chart = (error_bars + points).interactive()

	histogram = alt.Chart(jjas_df).transform_density(
	density='Means',
	as_=['Means', 'Density']
	).mark_area().encode(
	x="Means:Q",
	y='Density:Q',
	tooltip=['Means:Q', 'Density:Q']
	).properties(
	width=400,
	height=400,
	title='Distribution of JJAS anomalies'
	)

	chart = alt.hconcat(error_chart, histogram).resolve_legend(color='independent')
	st.altair_chart(chart, use_container_width=True)

	def return_stderr(data):
	return np.nanstd(data)/np.sqrt(np.count_nonzero(data) - 1)

	def make_cframe(c_years):
	c_group = np.zeros((12,12),dtype=float)
	for n, yr in enumerate(c_years):
	tmp = olou.index.year == yr
	for i in range(len(olou.Counts[tmp])):
	c_group[n,i] = olou.Counts[tmp][i]
	aaa = np.where(c_group == 0)
	c_group[aaa] = np.nan
	c_means = []
	c_errors = []
	for i in range(12):
	c_means.append(np.nanmean(c_group[:,i]))
	c_errors.append(return_stderr(c_group[:,i]))
	return c_group,c_means,c_errors

	def bootstrap_r(mean_list, error_list, iterations=1000):
	bs_rvals = []
	bs_pvals = []
	for itr in range(iterations):
	poss_vals = []
	for n in range(5):
	poss_min = int((mean_list[4 + n] - error_list[4 + n]) * 100000)
	poss_max = int((mean_list[4 + n] + error_list[4 + n]) * 100000)
	poss_vals.append(np.random.randint(poss_min,poss_max)/100)
	rv, pv = pearsonr([0,1,2,3,4],poss_vals)
	bs_rvals.append(rv)
	bs_pvals.append(pv)
	bs_rvals = np.array(bs_rvals)
	bs_pvals = np.array(bs_pvals)
	return bs_rvals, bs_pvals

	def freedman_diaconis_bins(a):
	a = np.asarray(a)
	h = 2 * iqr(a) / (len(a) ** (1 / 3))
	if h == 0:
	return int(np.sqrt(a.size))
	else:
	return int(np.ceil((a.max() - a.min()) / h))

	def add_hist(data, col_key, axobj, mkstyle='o', obsval=None, mylabel=None, bin_num=None):
	if not bin_num:
	bin_num = freedman_diaconis_bins(data)
	hist, bin_edges = np.histogram(data, bins=bin_num, density=False)
	norm_hist = hist / sum(hist)
	axobj.bar(bin_edges[0:-1], norm_hist, width = bin_edges[1] - bin_edges[0],
	color = col_key, edgecolor = col_key, alpha = 0.3, label=mylabel)
	mylabel = None
	if obsval:
	lookup = np.where(abs(obsval - bin_edges[0:-1]) == min(abs(obsval - bin_edges[0:-1])))
	axobj.vlines(obsval,0,norm_hist[lookup], linestyles='dashed',
	lw=1.0, zorder=6, label=mylabel)
	axobj.plot(obsval, norm_hist[lookup], color='k', marker=mkstyle,
	ms=5., zorder=7, label=mylabel)
	if bin_num is not None:
	bin_num = int(bin_num)

	# Streamlit应用主体
	def main():
	# 应用标题
	st.title('Monsoon Data Analysis')

	# 分析选项
	analysis_type = st.sidebar.selectbox("Select Analysis",
	("Time Series", "JJAS Condition", "Drought/Flood Sample"))

	if analysis_type == "Time Series":
	event_type = st.sidebar.selectbox("Event Type", ('All', 'Drought', 'Flood'))

	st.header("Time Series Analysis")
	plot_timeseries(event_type)

	elif analysis_type == "JJAS Condition":
	st.header("JJAS Condition Analysis")

	# 计算每年JJAS季节降水异常
	climo = {}
	climo['means'] = [np.mean(monsoon.Precip[monsoon.index.month == (mnth+1)]) for mnth in range(12)]
	climo['error'] = [return_stderr(monsoon.Precip[monsoon.index.month == (mnth+1)].values) for mnth in range(12)]

	delta = []
	for date in monsoon.Precip.index:
	delta.append(monsoon.Precip[date] - climo['means'][date.month-1])
	dseries = pd.Series(delta, index=monsoon.index)

	def lookup_index(yr):
	return ((monsoon.index.year == yr) & (monsoon.index.month >= 6) &(monsoon.index.month <= 9))
	jjas = {}
	jjas['means']=[np.mean(dseries[lookup_index(yr)]) for yr in range(1964,2012,1)]
	jjas['SEM']=[return_stderr(dseries[lookup_index(yr)])for yr in range(1964,2012,1)]
	jjas['sum']=[np.sum(dseries[lookup_index(yr)]) for yr in range(1964,2012,1)]

	plot_jjas_condition()

	elif analysis_type == "Drought/Flood Sample":
	st.header("Drought/Flood Sample Analysis")

	d_group,d_means,d_errors = make_cframe(drought_years)
	f_group,f_means,f_errors = make_cframe(flood_years)

	d_means = np.array(d_means) * 0.001
	f_means = np.array(f_means) * 0.001
	d_errors = np.array(d_errors) * 0.001
	f_errors = np.array(f_errors) * 0.001

	mrange = np.arange(0,12)
	xlabs =['Jan','Mar','May','Jul','Sep','Nov']

	fig = plt.figure()
	fig.set_size_inches(7.48,3.54)
	ax1 = fig.add_subplot(121)
	ax2 = fig.add_subplot(122)

	def simBA15plot(ax, dataset, derr, col_key):
	lthick=1.0
	ax.plot(mrange[0:5], dataset[0:5], 'k--',lw=lthick)
	ax.plot(mrange[4:9], dataset[4:9], 'k-',lw=lthick)
	ax.plot(mrange[8:], dataset[8:], 'k--',lw=lthick)
	ax.fill_between(mrange[0:5],(dataset[0:5] - derr[0:5]),
	(dataset[0:5] + derr[0:5]), color=col_key, linewidth=0.1,alpha=0.15)
	ax.fill_between(mrange[4:9], (dataset[4:9] - derr[4:9]), (dataset[4:9] + derr[4:9]),
	color=col_key, linewidth=0.1, alpha=0.3)
	ax.fill_between(mrange[8:],(dataset[8:] - derr[8:]), (dataset[8:] + derr[8:]),
	color=col_key, linewidth=0.1, alpha=0.15)
	ax.set_xticks(np.arange(len(xlabs)))
	ax.set_xticklabels(xlabs)
	ax.set_xlim(0,11)
	return

	simBA15plot(ax=ax1, dataset=d_means, derr=d_errors, col_key='r')
	simBA15plot(ax=ax2, dataset=f_means, derr=f_errors, col_key='b')
	ax1.set_ylabel(r"Neutron counts (cnt./min.$\times10^{3}$)", fontsize=11)
	ax1.set_title('Drought sample')
	ax2.set_title('Flood sample')
	st.pyplot(fig)

	rval_d,pval_d = pearsonr(range(5),d_means[4:9])
	rval_f,pval_f = pearsonr(range(5),f_means[4:9])
	st.write("A Pearson's r test, gives linear regressions and two-tailed p-values of:")
	st.write(f"Drought sample: r-value = {rval_d:4.3f}, p-value = {pval_d:4.3f}")
	st.write(f"Flood sample: r-value = {rval_f:4.3f}, p-value = {pval_f:4.3f}")

	rbs1, pbs1 = bootstrap_r(mean_list = d_means, error_list = d_errors)
	rbs2, pbs2 = bootstrap_r(mean_list = f_means, error_list = f_errors)

	def update_plots(sample_type, bin_num_r, bin_num_p):
	fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(10, 5))

	if sample_type == 'Drought' or sample_type == 'Both':
	add_hist(data=rbs1, col_key='r', axobj=ax1, obsval=rval_d,
	mylabel="Drought sample", mkstyle='o', bin_num=bin_num_r)
	if sample_type == 'Flood' or sample_type == 'Both':
	add_hist(data=rbs2, col_key='b', axobj=ax1, obsval=rval_f,
	mylabel="Flood sample", mkstyle='D', bin_num=bin_num_r)
	ax1.legend(loc='upper right')
	ax1.set_ylabel('Density')
	ax1.set_xlabel('$r$-values')
	ax1.set_title('Potential $r$-values from Bootstrap')

	if sample_type == 'Drought' or sample_type == 'Both':
	add_hist(data=pbs1, col_key='r', axobj=ax2, obsval=pval_d, mkstyle='o', bin_num=bin_num_p)
	if sample_type == 'Flood' or sample_type == 'Both':
	add_hist(data=pbs2, col_key='b', axobj=ax2, obsval=pval_f, mkstyle='D', bin_num=bin_num_p)

	ax3 = ax2.twinx()
	if sample_type == 'Drought' or sample_type == 'Both':
	sns.kdeplot(pbs1, cumulative=True, color='r', ax=ax3,
	lw=1, alpha=0.3, zorder=10)
	if sample_type == 'Flood' or sample_type == 'Both':
	sns.kdeplot(pbs2, cumulative=True, color='b', ax=ax3,
	lw=1, alpha=0.3, zorder=11)
	ax3.grid(False)
	ax3.set_ylabel("Cumulative density")
	ax2.set_xlabel(r'$p$-value')
	ax2.set_title(r'Potential $p$-values from Bootstrap')

	st.pyplot(fig)

	sample_dropdown = st.sidebar.selectbox('Sample:', ['Both', 'Drought', 'Flood'])
	bin_slider_r = st.sidebar.slider('r-value bins:', min_value=10, max_value=100, value=30, step=1)
	bin_slider_p = st.sidebar.slider('p-value bins:', min_value=10, max_value=100, value=25, step=1)

	update_plots(sample_dropdown, bin_slider_r, bin_slider_p)

	if __name__ == "__main__":
	main()