Spaces:
Running
Running
huggingface112
commited on
Commit
•
a77544c
1
Parent(s):
2b059b0
enhanced historyReturnCard
Browse files- appComponents.py +172 -122
- db_operation.py +48 -2
- index_page.py +33 -30
- instance/local.db +2 -2
- instance/log.json +1 -1
- pipeline.py +65 -23
- script/processing.py → processing.py +365 -15
- testing_pipeline.ipynb +0 -0
appComponents.py
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
-
|
2 |
from datetime import datetime, timedelta
|
3 |
import panel as pn
|
4 |
import pandas as pd
|
@@ -14,7 +14,7 @@ import plotly.graph_objs as go
|
|
14 |
# import warnings
|
15 |
pn.extension('mathjax')
|
16 |
pn.extension('plotly')
|
17 |
-
|
18 |
# warnings.filterwarnings("ignore", category=pd.core.common.SettingWithCopyWarning)
|
19 |
# overal performance default to 30 days
|
20 |
|
@@ -38,7 +38,6 @@ def create_portfolio_overview(df_list):
|
|
38 |
ip_eval_df = p_eval_df.interactive()
|
39 |
isector_eval_df = sector_eval_df.interactive()
|
40 |
|
41 |
-
|
42 |
ranged_ip_eval_df = ip_eval_df[ip_eval_df.date.between(
|
43 |
range_slider.param.value_start, range_slider.param.value_end)]
|
44 |
ranged_isector_eval_df = isector_eval_df[isector_eval_df.date.between(
|
@@ -233,7 +232,7 @@ class TotalReturnCard(Viewer):
|
|
233 |
|
234 |
def create_report(self):
|
235 |
# Calculate the total return and risk
|
236 |
-
result = processing.
|
237 |
self.eval_df, self.start_date, self.end_date)
|
238 |
most_recent_row = result.tail(1)
|
239 |
active_return = most_recent_row.active_return.values[0]
|
@@ -258,7 +257,7 @@ class TotalReturnCard(Viewer):
|
|
258 |
allocation = total_attributes.allocation
|
259 |
selection = total_attributes.selection
|
260 |
|
261 |
-
# Create a function for text report
|
262 |
report = f"""
|
263 |
<style>
|
264 |
.compact-container {{
|
@@ -347,7 +346,7 @@ class TotalReturnCard(Viewer):
|
|
347 |
return report
|
348 |
|
349 |
def create_plot(self):
|
350 |
-
result = processing.
|
351 |
self.eval_df, self.start_date, self.end_date)
|
352 |
fig = px.line(result, x="date", y=['return_p', 'return_b'])
|
353 |
fig.update_traces(mode="lines+markers",
|
@@ -463,37 +462,52 @@ class DrawDownCard(Viewer):
|
|
463 |
|
464 |
|
465 |
class HistReturnCard(Viewer):
|
466 |
-
|
467 |
return_barplot = param.Parameterized()
|
|
|
|
|
468 |
select_resolution = param.ObjectSelector(
|
469 |
default='每月回报', objects=['每日回报', '每周回报', '每月回报', '每年回报'])
|
470 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
471 |
def update_aggregate_df(self):
|
472 |
freq = None
|
473 |
if self.select_resolution == "每日回报":
|
474 |
-
|
475 |
elif self.select_resolution == "每月回报":
|
476 |
freq = 'M'
|
477 |
elif self.select_resolution == "每年回报":
|
478 |
freq = 'Y'
|
479 |
elif self.select_resolution == "每周回报":
|
480 |
-
freq = 'W'
|
481 |
-
|
482 |
-
|
483 |
-
|
484 |
-
|
485 |
-
|
486 |
-
|
|
|
487 |
|
488 |
def create_attributes_barplot(self):
|
489 |
-
self.attribute_df = self.
|
490 |
-
fig = px.bar(self.attribute_df, x='
|
491 |
-
'allocation', 'selection', 'interaction', '
|
492 |
colname_to_name = {
|
493 |
'allocation': '分配',
|
494 |
'selection': '选择',
|
495 |
'interaction': '交互',
|
496 |
-
'
|
497 |
'active_return': '实际主动回报'
|
498 |
}
|
499 |
fig.for_each_trace(lambda t: t.update(name=colname_to_name.get(t.name, t.name),
|
@@ -502,6 +516,7 @@ class HistReturnCard(Viewer):
|
|
502 |
hovertemplate=t.hovertemplate.replace(
|
503 |
t.name, colname_to_name.get(t.name, t.name))
|
504 |
))
|
|
|
505 |
fig.update_layout(barmode='group', title='主动回报归因',
|
506 |
bargap=0.0, bargroupgap=0.0)
|
507 |
fig.update_layout(**styling.plot_layout)
|
@@ -510,8 +525,8 @@ class HistReturnCard(Viewer):
|
|
510 |
|
511 |
def create_return_barplot(self):
|
512 |
self.agg_df = self.update_aggregate_df()
|
513 |
-
fig = px.bar(self.agg_df, x='
|
514 |
-
'
|
515 |
barmode='overlay',
|
516 |
title='周期回报',
|
517 |
)
|
@@ -533,14 +548,14 @@ class HistReturnCard(Viewer):
|
|
533 |
|
534 |
return fig.to_dict()
|
535 |
|
536 |
-
@param.depends('
|
537 |
def update(self):
|
538 |
return_barplot = self.create_return_barplot()
|
539 |
self.return_barplot.object = return_barplot
|
540 |
attributes_barplot = self.create_attributes_barplot()
|
541 |
self.attribute_barplot.object = attributes_barplot
|
542 |
|
543 |
-
def
|
544 |
freq = None
|
545 |
if self.select_resolution == "每日回报":
|
546 |
freq = 'D'
|
@@ -549,19 +564,23 @@ class HistReturnCard(Viewer):
|
|
549 |
elif self.select_resolution == "每年回报":
|
550 |
freq = 'Y'
|
551 |
elif self.select_resolution == "每周回报":
|
552 |
-
freq = 'W'
|
553 |
-
|
554 |
-
|
555 |
-
|
556 |
-
|
557 |
-
|
558 |
-
|
|
|
|
|
559 |
self.calculated_p_stock = calculated_p_stock
|
560 |
self.calculated_b_stock = calculated_b_stock
|
|
|
561 |
self._range_slider = pn.widgets.DateRangeSlider(
|
562 |
name='Date Range Slider',
|
563 |
-
start=self.
|
564 |
-
value=(self.
|
|
|
565 |
|
566 |
)
|
567 |
self.return_barplot = pn.pane.Plotly(self.create_return_barplot())
|
@@ -578,20 +597,21 @@ class HistReturnCard(Viewer):
|
|
578 |
|
579 |
class PortfolioComposationCard(Viewer):
|
580 |
p_stock_df = param.Parameterized()
|
|
|
581 |
|
582 |
def create_cash_position_df(self):
|
583 |
-
aggregate_df = self.p_stock_df.groupby('
|
584 |
-
'
|
585 |
})
|
586 |
aggregate_df['type'] = 'portfolio'
|
587 |
not_in_portfolio_df = aggregate_df.copy()
|
588 |
not_in_portfolio_df['type'] = 'not_in_portfolio'
|
589 |
-
not_in_portfolio_df['
|
590 |
# append df
|
591 |
aggregate_df = pd.concat([aggregate_df, not_in_portfolio_df])
|
592 |
# sort
|
593 |
-
aggregate_df.sort_values(by=['
|
594 |
-
return aggregate_df[aggregate_df.
|
595 |
|
596 |
@param.depends('p_stock_df', 'date_range.value', watch=True)
|
597 |
def update_trend_plot(self):
|
@@ -599,7 +619,7 @@ class PortfolioComposationCard(Viewer):
|
|
599 |
|
600 |
def create_trend_plot(self):
|
601 |
aggregate_df = self.create_cash_position_df()
|
602 |
-
fig = px.bar(aggregate_df, x='
|
603 |
fig.update_layout(legend=dict(
|
604 |
orientation="h",
|
605 |
yanchor="bottom",
|
@@ -619,57 +639,69 @@ class PortfolioComposationCard(Viewer):
|
|
619 |
return fig.to_dict()
|
620 |
|
621 |
def create_treemap(self):
|
622 |
-
self.selected_df
|
|
|
|
|
623 |
not_in_portfolio_row = pd.DataFrame({
|
624 |
-
'display_name': ['
|
625 |
-
'position': ['
|
626 |
-
'aggregate_sector': ['
|
627 |
-
'
|
628 |
-
'
|
629 |
-
'portfolio_pct': [0]
|
630 |
})
|
631 |
df = pd.concat([self.selected_df, not_in_portfolio_row],
|
632 |
ignore_index=True)
|
633 |
-
|
634 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
635 |
color_continuous_scale='RdBu',
|
636 |
color_continuous_midpoint=np.average(
|
637 |
-
df['
|
638 |
)
|
|
|
639 |
fig.update_layout(styling.plot_layout)
|
640 |
fig.update_layout(coloraxis_colorbar=dict(
|
641 |
-
title="
|
642 |
-
colname_to_name = {
|
643 |
-
|
644 |
-
|
645 |
-
|
646 |
-
|
647 |
-
|
648 |
-
}
|
649 |
-
fig.for_each_trace(lambda t: t.update(name=colname_to_name.get(t.name, t.name),
|
650 |
-
|
651 |
-
|
652 |
-
))
|
653 |
return fig.to_dict()
|
654 |
|
655 |
-
def __init__(self,
|
656 |
-
self.p_stock_df =
|
657 |
-
self.
|
658 |
-
|
659 |
-
|
660 |
-
|
661 |
-
|
662 |
-
|
663 |
-
|
664 |
-
|
|
|
|
|
|
|
|
|
|
|
665 |
self.date_range = pn.widgets.DateRangeSlider(name='选择资金分布走势区间',
|
666 |
-
start=self.p_stock_df.
|
667 |
-
end=self.p_stock_df.
|
668 |
-
value=(self.p_stock_df.
|
669 |
-
), self.p_stock_df.
|
670 |
)
|
671 |
-
|
672 |
-
self.date_picker.value]
|
673 |
self.tree_plot = pn.pane.Plotly(self.create_treemap())
|
674 |
self.trend_plot = pn.pane.Plotly(self.create_trend_plot())
|
675 |
|
@@ -677,83 +709,96 @@ class PortfolioComposationCard(Viewer):
|
|
677 |
super().__init__(**params)
|
678 |
|
679 |
def __panel__(self):
|
680 |
-
self._layout = pn.Card(self.
|
681 |
width=500, header=pn.pane.Str('资金分布'))
|
682 |
return self._layout
|
683 |
|
684 |
-
@param.depends('
|
685 |
def update(self):
|
686 |
-
self.selected_df = self.p_stock_df[self.p_stock_df.date ==
|
687 |
-
self.date_picker.value]
|
688 |
tree_plot = self.create_treemap()
|
689 |
self.tree_plot.object = tree_plot
|
690 |
|
691 |
|
692 |
class BestAndWorstStocks(Viewer):
|
693 |
-
p_stock_df = param.Parameter()
|
694 |
-
b_stock_df = param.Parameter()
|
695 |
start_date = param.Parameter()
|
696 |
end_date = param.Parameter()
|
697 |
-
|
698 |
-
|
699 |
-
|
700 |
-
|
701 |
-
|
702 |
-
|
703 |
-
|
704 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
705 |
|
706 |
def create_tabulator(self, df):
|
707 |
col_title_map = {
|
708 |
-
'
|
709 |
'ticker': '股票代码',
|
710 |
-
'
|
711 |
-
'
|
712 |
-
'
|
713 |
-
'
|
714 |
-
'
|
715 |
-
'
|
716 |
-
'return': '未加权回报率',
|
717 |
-
'active_return': '加权主动回报率',
|
718 |
}
|
719 |
return pn.widgets.Tabulator(df, sizing_mode='stretch_width',
|
720 |
-
hidden_columns=
|
721 |
-
|
722 |
-
|
723 |
-
|
724 |
-
titles=col_title_map)
|
725 |
|
726 |
@param.depends('start_date', 'end_date', watch=True)
|
727 |
def update(self):
|
728 |
result_df = self.get_processed_df()
|
729 |
-
self.best_5_tabulator.value = result_df.
|
730 |
-
self.worst_5_tabulator.value = result_df.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
731 |
|
732 |
def get_processed_df(self):
|
733 |
'''
|
734 |
calculate attributes and return a sorted dataframe on weighted return
|
735 |
'''
|
736 |
-
|
737 |
-
|
738 |
-
|
739 |
-
|
740 |
-
return
|
741 |
-
|
742 |
-
def __init__(self,
|
743 |
-
self.
|
744 |
-
self.b_stock_df = b_stock_df
|
745 |
self._date_range = pn.widgets.DateRangeSlider(
|
746 |
name='选择计算回报的时间区间',
|
747 |
-
start=
|
748 |
-
end=
|
749 |
-
value=(
|
750 |
-
timedelta(days=7),
|
751 |
)
|
752 |
self.start_date = self._date_range.value_start
|
753 |
self.end_date = self._date_range.value_end
|
754 |
result_df = self.get_processed_df()
|
755 |
-
self.best_5_tabulator = self.create_tabulator(result_df.
|
756 |
-
self.worst_5_tabulator = self.create_tabulator(result_df.
|
757 |
super().__init__(**params)
|
758 |
|
759 |
@param.depends('_date_range.value', watch=True)
|
@@ -768,7 +813,12 @@ class BestAndWorstStocks(Viewer):
|
|
768 |
self.best_5_tabulator,
|
769 |
pn.pane.Str('加权回报率最低回报5只股票'),
|
770 |
self.worst_5_tabulator,
|
771 |
-
max_width=500,
|
|
|
|
|
|
|
|
|
|
|
772 |
return self._layout
|
773 |
|
774 |
|
|
|
1 |
+
import processing
|
2 |
from datetime import datetime, timedelta
|
3 |
import panel as pn
|
4 |
import pandas as pd
|
|
|
14 |
# import warnings
|
15 |
pn.extension('mathjax')
|
16 |
pn.extension('plotly')
|
17 |
+
|
18 |
# warnings.filterwarnings("ignore", category=pd.core.common.SettingWithCopyWarning)
|
19 |
# overal performance default to 30 days
|
20 |
|
|
|
38 |
ip_eval_df = p_eval_df.interactive()
|
39 |
isector_eval_df = sector_eval_df.interactive()
|
40 |
|
|
|
41 |
ranged_ip_eval_df = ip_eval_df[ip_eval_df.date.between(
|
42 |
range_slider.param.value_start, range_slider.param.value_end)]
|
43 |
ranged_isector_eval_df = isector_eval_df[isector_eval_df.date.between(
|
|
|
232 |
|
233 |
def create_report(self):
|
234 |
# Calculate the total return and risk
|
235 |
+
result = processing.calculate_norm_return(
|
236 |
self.eval_df, self.start_date, self.end_date)
|
237 |
most_recent_row = result.tail(1)
|
238 |
active_return = most_recent_row.active_return.values[0]
|
|
|
257 |
allocation = total_attributes.allocation
|
258 |
selection = total_attributes.selection
|
259 |
|
260 |
+
# Create a function for text report
|
261 |
report = f"""
|
262 |
<style>
|
263 |
.compact-container {{
|
|
|
346 |
return report
|
347 |
|
348 |
def create_plot(self):
|
349 |
+
result = processing.calculate_norm_return(
|
350 |
self.eval_df, self.start_date, self.end_date)
|
351 |
fig = px.line(result, x="date", y=['return_p', 'return_b'])
|
352 |
fig.update_traces(mode="lines+markers",
|
|
|
462 |
|
463 |
|
464 |
class HistReturnCard(Viewer):
|
465 |
+
|
466 |
return_barplot = param.Parameterized()
|
467 |
+
calculated_b_stock = param.Parameterized()
|
468 |
+
calculated_p_stock = param.Parameterized()
|
469 |
select_resolution = param.ObjectSelector(
|
470 |
default='每月回报', objects=['每日回报', '每周回报', '每月回报', '每年回报'])
|
471 |
|
472 |
+
def _calculate_return(self, df, freq):
|
473 |
+
# start on tuesday, end on monday
|
474 |
+
grouped = df.groupby(pd.Grouper(key='time', freq=freq))
|
475 |
+
agg_df = grouped.agg({'weighted_log_return': 'sum'})
|
476 |
+
# time indicating the last end of the week
|
477 |
+
agg_df['time'] = agg_df.index
|
478 |
+
# convert cumulative log return to percentage return
|
479 |
+
agg_df['return'] = np.exp(agg_df['weighted_log_return']) - 1
|
480 |
+
|
481 |
+
# return agg_df
|
482 |
+
return agg_df.reset_index(drop=True)
|
483 |
+
|
484 |
def update_aggregate_df(self):
|
485 |
freq = None
|
486 |
if self.select_resolution == "每日回报":
|
487 |
+
freq = "D"
|
488 |
elif self.select_resolution == "每月回报":
|
489 |
freq = 'M'
|
490 |
elif self.select_resolution == "每年回报":
|
491 |
freq = 'Y'
|
492 |
elif self.select_resolution == "每周回报":
|
493 |
+
freq = 'W-MON'
|
494 |
+
|
495 |
+
p_return = self._calculate_return(self.calculated_p_stock, freq)
|
496 |
+
b_return = self._calculate_return(self.calculated_b_stock, freq)
|
497 |
+
|
498 |
+
merge_df = pd.merge(p_return, b_return, on='time',
|
499 |
+
how='outer', suffixes=('_p', '_b'))
|
500 |
+
return merge_df
|
501 |
|
502 |
def create_attributes_barplot(self):
|
503 |
+
self.attribute_df = self._update_attributes_df()
|
504 |
+
fig = px.bar(self.attribute_df, x='period_str', y=[
|
505 |
+
'allocation', 'selection', 'interaction', 'notional_active_return', 'active_return'])
|
506 |
colname_to_name = {
|
507 |
'allocation': '分配',
|
508 |
'selection': '选择',
|
509 |
'interaction': '交互',
|
510 |
+
'notional_active_return': '名义主动回报',
|
511 |
'active_return': '实际主动回报'
|
512 |
}
|
513 |
fig.for_each_trace(lambda t: t.update(name=colname_to_name.get(t.name, t.name),
|
|
|
516 |
hovertemplate=t.hovertemplate.replace(
|
517 |
t.name, colname_to_name.get(t.name, t.name))
|
518 |
))
|
519 |
+
|
520 |
fig.update_layout(barmode='group', title='主动回报归因',
|
521 |
bargap=0.0, bargroupgap=0.0)
|
522 |
fig.update_layout(**styling.plot_layout)
|
|
|
525 |
|
526 |
def create_return_barplot(self):
|
527 |
self.agg_df = self.update_aggregate_df()
|
528 |
+
fig = px.bar(self.agg_df, x='time', y=[
|
529 |
+
'return_p', 'return_b'],
|
530 |
barmode='overlay',
|
531 |
title='周期回报',
|
532 |
)
|
|
|
548 |
|
549 |
return fig.to_dict()
|
550 |
|
551 |
+
@param.depends('calculated_p_stock', 'calculated_b_stock', 'select_resolution', watch=True)
|
552 |
def update(self):
|
553 |
return_barplot = self.create_return_barplot()
|
554 |
self.return_barplot.object = return_barplot
|
555 |
attributes_barplot = self.create_attributes_barplot()
|
556 |
self.attribute_barplot.object = attributes_barplot
|
557 |
|
558 |
+
def _update_attributes_df(self):
|
559 |
freq = None
|
560 |
if self.select_resolution == "每日回报":
|
561 |
freq = 'D'
|
|
|
564 |
elif self.select_resolution == "每年回报":
|
565 |
freq = 'Y'
|
566 |
elif self.select_resolution == "每周回报":
|
567 |
+
freq = 'W-MON'
|
568 |
+
agg_p = processing.aggregate_analytic_df_by_period(self.calculated_p_stock, freq)
|
569 |
+
agg_b = processing.aggregate_analytic_df_by_period(self.calculated_b_stock, freq)
|
570 |
+
bhb_df = processing.calculate_periodic_BHB(agg_p, agg_b)
|
571 |
+
agg_bhb = processing.aggregate_bhb_df(bhb_df)
|
572 |
+
agg_bhb['period_str'] = agg_bhb.index.map(lambda x: str(x))
|
573 |
+
return agg_bhb
|
574 |
+
|
575 |
+
def __init__(self, calculated_p_stock, calculated_b_stock, **params):
|
576 |
self.calculated_p_stock = calculated_p_stock
|
577 |
self.calculated_b_stock = calculated_b_stock
|
578 |
+
|
579 |
self._range_slider = pn.widgets.DateRangeSlider(
|
580 |
name='Date Range Slider',
|
581 |
+
start=self.calculated_p_stock.time.min(), end=self.calculated_p_stock.time.max(),
|
582 |
+
value=(self.calculated_p_stock.time.min(),
|
583 |
+
self.calculated_p_stock.time.max()),
|
584 |
|
585 |
)
|
586 |
self.return_barplot = pn.pane.Plotly(self.create_return_barplot())
|
|
|
597 |
|
598 |
class PortfolioComposationCard(Viewer):
|
599 |
p_stock_df = param.Parameterized()
|
600 |
+
selected_date = param.Parameterized()
|
601 |
|
602 |
def create_cash_position_df(self):
|
603 |
+
aggregate_df = self.p_stock_df.groupby('time', as_index=False).agg({
|
604 |
+
'cash': 'sum'
|
605 |
})
|
606 |
aggregate_df['type'] = 'portfolio'
|
607 |
not_in_portfolio_df = aggregate_df.copy()
|
608 |
not_in_portfolio_df['type'] = 'not_in_portfolio'
|
609 |
+
not_in_portfolio_df['cash'] = 1000
|
610 |
# append df
|
611 |
aggregate_df = pd.concat([aggregate_df, not_in_portfolio_df])
|
612 |
# sort
|
613 |
+
aggregate_df.sort_values(by=['time'], inplace=True)
|
614 |
+
return aggregate_df[aggregate_df.time.between(self.date_range.value[0], self.date_range.value[1])]
|
615 |
|
616 |
@param.depends('p_stock_df', 'date_range.value', watch=True)
|
617 |
def update_trend_plot(self):
|
|
|
619 |
|
620 |
def create_trend_plot(self):
|
621 |
aggregate_df = self.create_cash_position_df()
|
622 |
+
fig = px.bar(aggregate_df, x='time', y='cash', color='type')
|
623 |
fig.update_layout(legend=dict(
|
624 |
orientation="h",
|
625 |
yanchor="bottom",
|
|
|
639 |
return fig.to_dict()
|
640 |
|
641 |
def create_treemap(self):
|
642 |
+
self.selected_df = self.p_stock_df[self.p_stock_df.time ==
|
643 |
+
self.datetime_picker.value]
|
644 |
+
self.selected_df['position'] = '股票'
|
645 |
not_in_portfolio_row = pd.DataFrame({
|
646 |
+
'display_name': ['闲置'],
|
647 |
+
'position': ['闲置'],
|
648 |
+
'aggregate_sector': ['闲置'],
|
649 |
+
'cash': [100],
|
650 |
+
'weighted_return': [0]
|
|
|
651 |
})
|
652 |
df = pd.concat([self.selected_df, not_in_portfolio_row],
|
653 |
ignore_index=True)
|
654 |
+
|
655 |
+
fig = px.treemap(df,
|
656 |
+
# path=[px.Constant('cash_position'), 'position',
|
657 |
+
# 'aggregate_sector', 'display_name'],
|
658 |
+
path=['position', 'aggregate_sector', 'display_name'],
|
659 |
+
values='cash',
|
660 |
+
color='weighted_return',
|
661 |
+
hover_data=['weighted_return', 'cash'],
|
662 |
color_continuous_scale='RdBu',
|
663 |
color_continuous_midpoint=np.average(
|
664 |
+
df['weighted_return'])
|
665 |
)
|
666 |
+
|
667 |
fig.update_layout(styling.plot_layout)
|
668 |
fig.update_layout(coloraxis_colorbar=dict(
|
669 |
+
title="累计加权回报率"))
|
670 |
+
# colname_to_name = {
|
671 |
+
# 'cash_position': '现金分布',
|
672 |
+
# 'portfolio_return': '加权回报',
|
673 |
+
# 'not_in_portfolio': '不在portfolio中',
|
674 |
+
# 'current_weight': '现金',
|
675 |
+
|
676 |
+
# }
|
677 |
+
# fig.for_each_trace(lambda t: t.update(name=colname_to_name.get(t.name, t.name),
|
678 |
+
# hovertemplate=t.hovertemplate.replace(
|
679 |
+
# t.name, colname_to_name.get(t.name, t.name))
|
680 |
+
# ))
|
681 |
return fig.to_dict()
|
682 |
|
683 |
+
def __init__(self, analytic_df, **params):
|
684 |
+
self.p_stock_df = analytic_df
|
685 |
+
self.p_stock_df = processing.calculate_weighted_return(self.p_stock_df,
|
686 |
+
start=self.p_stock_df.time.min(),
|
687 |
+
end=self.p_stock_df.time.max())
|
688 |
+
|
689 |
+
# convert to datetime to date
|
690 |
+
enabled_dates = [time.date() for time in self.p_stock_df.time.unique()]
|
691 |
+
self.datetime_picker = pn.widgets.DatetimePicker(name='选择某日资金分布',
|
692 |
+
start=self.p_stock_df.time.min(),
|
693 |
+
end=self.p_stock_df.time.max(),
|
694 |
+
value=self.p_stock_df.time.max(),
|
695 |
+
enabled_dates=enabled_dates,
|
696 |
+
|
697 |
+
)
|
698 |
self.date_range = pn.widgets.DateRangeSlider(name='选择资金分布走势区间',
|
699 |
+
start=self.p_stock_df.time.min(),
|
700 |
+
end=self.p_stock_df.time.max(),
|
701 |
+
value=(self.p_stock_df.time.min(
|
702 |
+
), self.p_stock_df.time.max()),
|
703 |
)
|
704 |
+
|
|
|
705 |
self.tree_plot = pn.pane.Plotly(self.create_treemap())
|
706 |
self.trend_plot = pn.pane.Plotly(self.create_trend_plot())
|
707 |
|
|
|
709 |
super().__init__(**params)
|
710 |
|
711 |
def __panel__(self):
|
712 |
+
self._layout = pn.Card(self.datetime_picker, self.tree_plot, self.date_range, self.trend_plot,
|
713 |
width=500, header=pn.pane.Str('资金分布'))
|
714 |
return self._layout
|
715 |
|
716 |
+
@param.depends('datetime_picker.value', 'p_stock_df', watch=True)
|
717 |
def update(self):
|
|
|
|
|
718 |
tree_plot = self.create_treemap()
|
719 |
self.tree_plot.object = tree_plot
|
720 |
|
721 |
|
722 |
class BestAndWorstStocks(Viewer):
|
|
|
|
|
723 |
start_date = param.Parameter()
|
724 |
end_date = param.Parameter()
|
725 |
+
hidden_col = [
|
726 |
+
'index',
|
727 |
+
'open',
|
728 |
+
'high',
|
729 |
+
'low',
|
730 |
+
'close',
|
731 |
+
'volume',
|
732 |
+
'money',
|
733 |
+
'pct',
|
734 |
+
'sector',
|
735 |
+
'aggregate_sector',
|
736 |
+
'ave_price',
|
737 |
+
'weight',
|
738 |
+
'ini_w',
|
739 |
+
'name',
|
740 |
+
'pnl'
|
741 |
+
]
|
742 |
+
forzen_columns = ['display_name', 'return', 'cum_pnl', 'shares']
|
743 |
+
description = "股票表现排名"
|
744 |
+
tooltip = "在一个时间窗口中累计盈利最高和最低的股票,包括已经卖出的股票,如果表格的日期小于窗口的结束时间代表已经卖出"
|
745 |
|
746 |
def create_tabulator(self, df):
|
747 |
col_title_map = {
|
748 |
+
'display_name': '股票名称',
|
749 |
'ticker': '股票代码',
|
750 |
+
'time': '日期',
|
751 |
+
'return': '回报率',
|
752 |
+
'sector': '行业',
|
753 |
+
'shares': '持仓',
|
754 |
+
'cash': '现金',
|
755 |
+
'cum_pnl': '累计盈利',
|
|
|
|
|
756 |
}
|
757 |
return pn.widgets.Tabulator(df, sizing_mode='stretch_width',
|
758 |
+
hidden_columns=self.hidden_col,
|
759 |
+
frozen_columns=self.forzen_columns,
|
760 |
+
titles=col_title_map
|
761 |
+
)
|
|
|
762 |
|
763 |
@param.depends('start_date', 'end_date', watch=True)
|
764 |
def update(self):
|
765 |
result_df = self.get_processed_df()
|
766 |
+
self.best_5_tabulator.value = result_df.head(5)
|
767 |
+
self.worst_5_tabulator.value = result_df.tail(5)
|
768 |
+
|
769 |
+
def _get_cum_return(self, df):
|
770 |
+
'''return a df contain cumulative return at the end date'''
|
771 |
+
result_df = processing.calcualte_return(df=df,
|
772 |
+
start=self.start_date,
|
773 |
+
end=self.end_date)
|
774 |
+
grouped = result_df.groupby('ticker')
|
775 |
+
last_row = result_df.loc[grouped.time.idxmax()]
|
776 |
+
return last_row
|
777 |
|
778 |
def get_processed_df(self):
|
779 |
'''
|
780 |
calculate attributes and return a sorted dataframe on weighted return
|
781 |
'''
|
782 |
+
df = processing.calculate_cum_pnl(self.analytic_df,
|
783 |
+
start=self.start_date,
|
784 |
+
end=self.end_date)
|
785 |
+
df = self._get_cum_return(df)
|
786 |
+
return df.sort_values(by='cum_pnl', ascending=False)
|
787 |
+
|
788 |
+
def __init__(self, analytic_df, **params):
|
789 |
+
self.analytic_df = analytic_df
|
|
|
790 |
self._date_range = pn.widgets.DateRangeSlider(
|
791 |
name='选择计算回报的时间区间',
|
792 |
+
start=self.analytic_df.time.min(),
|
793 |
+
end=self.analytic_df.time.max(),
|
794 |
+
value=(self.analytic_df.time.max() -
|
795 |
+
timedelta(days=7), self.analytic_df.time.max())
|
796 |
)
|
797 |
self.start_date = self._date_range.value_start
|
798 |
self.end_date = self._date_range.value_end
|
799 |
result_df = self.get_processed_df()
|
800 |
+
self.best_5_tabulator = self.create_tabulator(result_df.head(5))
|
801 |
+
self.worst_5_tabulator = self.create_tabulator(result_df.tail(5))
|
802 |
super().__init__(**params)
|
803 |
|
804 |
@param.depends('_date_range.value', watch=True)
|
|
|
813 |
self.best_5_tabulator,
|
814 |
pn.pane.Str('加权回报率最低回报5只股票'),
|
815 |
self.worst_5_tabulator,
|
816 |
+
max_width=500,
|
817 |
+
header=pn.Row(
|
818 |
+
pn.pane.Str(self.description),
|
819 |
+
pn.widgets.TooltipIcon(value=self.tooltip)
|
820 |
+
)
|
821 |
+
)
|
822 |
return self._layout
|
823 |
|
824 |
|
db_operation.py
CHANGED
@@ -121,10 +121,49 @@ def append_to_stocks_price_table(df):
|
|
121 |
_append_df_to_db(df, ts.STOCKS_PRICE_TABLE, ts.STOCKS_PRICE_TABLE_SCHEMA)
|
122 |
|
123 |
def get_all_stocks():
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
124 |
with create_engine(db_url).connect() as conn:
|
125 |
all_stocks = pd.read_sql(ts.STOCKS_DETAILS_TABLE, con=conn)
|
126 |
return all_stocks
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
127 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
128 |
def _get_all_row(table_name, ts_column='date'):
|
129 |
with create_engine(db_url).connect() as conn:
|
130 |
df = pd.read_sql(table_name, con=conn)
|
@@ -141,8 +180,15 @@ def get_stocks_price(tickers: list[str]):
|
|
141 |
'''
|
142 |
return df of stock price within ticker in stocks price table
|
143 |
'''
|
144 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
145 |
with create_engine(db_url).connect() as conn:
|
146 |
df = pd.read_sql(query, con=conn)
|
147 |
df.time = pd.to_datetime(df.time)
|
148 |
-
|
|
|
|
121 |
_append_df_to_db(df, ts.STOCKS_PRICE_TABLE, ts.STOCKS_PRICE_TABLE_SCHEMA)
|
122 |
|
123 |
def get_all_stocks():
|
124 |
+
'''
|
125 |
+
get all stocks information
|
126 |
+
|
127 |
+
Returns
|
128 |
+
-------
|
129 |
+
pd.DataFrame
|
130 |
+
all stocks information
|
131 |
+
'''
|
132 |
with create_engine(db_url).connect() as conn:
|
133 |
all_stocks = pd.read_sql(ts.STOCKS_DETAILS_TABLE, con=conn)
|
134 |
return all_stocks
|
135 |
+
def save_portfolio_analytic_df(df):
|
136 |
+
table_name = 'analytic_p'
|
137 |
+
with create_engine(db_url).connect() as conn:
|
138 |
+
df.to_sql(table_name, con=conn, if_exists='replace', index=False)
|
139 |
+
|
140 |
+
def get_portfolio_analytic_df():
|
141 |
+
table_name = 'analytic_p'
|
142 |
+
with create_engine(db_url).connect() as conn:
|
143 |
+
df = pd.read_sql(table_name, con=conn)
|
144 |
+
return df
|
145 |
+
|
146 |
|
147 |
+
def save_benchmark_analytic_df(df):
|
148 |
+
table_name = 'analytic_b'
|
149 |
+
with create_engine(db_url).connect() as conn:
|
150 |
+
df.to_sql(table_name, con=conn, if_exists='replace', index=False)
|
151 |
+
|
152 |
+
def get_benchmark_analytic_df():
|
153 |
+
table_name = 'analytic_b'
|
154 |
+
with create_engine(db_url).connect() as conn:
|
155 |
+
df = pd.read_sql(table_name, con=conn)
|
156 |
+
return df
|
157 |
+
|
158 |
+
def save_analytic_df(df):
|
159 |
+
table_name = 'analytic'
|
160 |
+
with create_engine(db_url).connect() as conn:
|
161 |
+
df.to_sql(table_name, con=conn, if_exists='replace', index=False)
|
162 |
+
def get_analytic_df():
|
163 |
+
table_name = 'analytic'
|
164 |
+
with create_engine(db_url).connect() as conn:
|
165 |
+
df = pd.read_sql(table_name, con=conn)
|
166 |
+
return df
|
167 |
def _get_all_row(table_name, ts_column='date'):
|
168 |
with create_engine(db_url).connect() as conn:
|
169 |
df = pd.read_sql(table_name, con=conn)
|
|
|
180 |
'''
|
181 |
return df of stock price within ticker in stocks price table
|
182 |
'''
|
183 |
+
if len(tickers) == 0:
|
184 |
+
# so returned df has the same schema as the table
|
185 |
+
query = f"SELECT * FROM {ts.STOCKS_PRICE_TABLE} WHERE 1=0"
|
186 |
+
elif len(tickers) == 1:
|
187 |
+
query = f"SELECT * FROM {ts.STOCKS_PRICE_TABLE} WHERE ticker = '{tickers[0]}'"
|
188 |
+
else:
|
189 |
+
query = f"SELECT * FROM {ts.STOCKS_PRICE_TABLE} WHERE ticker IN {tuple(tickers)}"
|
190 |
with create_engine(db_url).connect() as conn:
|
191 |
df = pd.read_sql(query, con=conn)
|
192 |
df.time = pd.to_datetime(df.time)
|
193 |
+
# drop duplicates
|
194 |
+
return df.drop_duplicates(subset=['ticker', 'time'])
|
index_page.py
CHANGED
@@ -8,14 +8,15 @@ import random
|
|
8 |
import scipy.stats as stats
|
9 |
import hvplot.pandas # noqa
|
10 |
from sqlalchemy import create_engine
|
11 |
-
|
12 |
# from backgroundTask import stocks_stream
|
13 |
from functools import partial
|
14 |
import plotly.graph_objects as go
|
15 |
from panel.viewable import Viewer
|
16 |
-
|
17 |
import appComponents
|
18 |
import param
|
|
|
19 |
# import warnings
|
20 |
pn.extension('mathjax')
|
21 |
# warnings.filterwarnings("ignore", category=pd.core.common.SettingWithCopyWarning)
|
@@ -25,45 +26,47 @@ db_url = 'sqlite:///instance/local.db'
|
|
25 |
engine = create_engine(db_url)
|
26 |
|
27 |
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
# load benchmark stock
|
32 |
-
with engine.connect() as connection:
|
33 |
-
|
34 |
-
|
35 |
-
|
|
|
36 |
|
37 |
stock_overview = appComponents.BestAndWorstStocks(
|
38 |
-
|
39 |
composation_card = appComponents.PortfolioComposationCard(
|
40 |
-
|
41 |
monthly_return_card = appComponents.HistReturnCard(
|
42 |
-
|
43 |
-
total_return_card = appComponents.TotalReturnCard(name='Range', eval_df=p_eval_df,
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
drawdown_card = appComponents.DrawDownCard(
|
48 |
-
|
49 |
|
50 |
-
top_header = appComponents.TopHeader(
|
51 |
-
|
52 |
-
)
|
53 |
|
54 |
template = pn.template.FastListTemplate(
|
55 |
title="Portfolio一览",
|
56 |
# sidebar=[freq, phase],
|
57 |
)
|
58 |
-
template.main.extend(
|
59 |
-
|
60 |
-
pn.Row(
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
)
|
|
|
67 |
template.servable()
|
68 |
# pn.Row(
|
69 |
|
|
|
8 |
import scipy.stats as stats
|
9 |
import hvplot.pandas # noqa
|
10 |
from sqlalchemy import create_engine
|
11 |
+
import api
|
12 |
# from backgroundTask import stocks_stream
|
13 |
from functools import partial
|
14 |
import plotly.graph_objects as go
|
15 |
from panel.viewable import Viewer
|
16 |
+
import processing
|
17 |
import appComponents
|
18 |
import param
|
19 |
+
import db_operation as db
|
20 |
# import warnings
|
21 |
pn.extension('mathjax')
|
22 |
# warnings.filterwarnings("ignore", category=pd.core.common.SettingWithCopyWarning)
|
|
|
26 |
engine = create_engine(db_url)
|
27 |
|
28 |
|
29 |
+
|
30 |
+
analytic_p = db.get_portfolio_analytic_df()
|
31 |
+
analytic_b = db.get_benchmark_analytic_df()
|
32 |
# load benchmark stock
|
33 |
+
# with engine.connect() as connection:
|
34 |
+
# analytics_df = pd.read
|
35 |
+
# calculated_b_stock = pd.read_sql('calculated_b_stock', con=connection)
|
36 |
+
# calculated_p_stock = pd.read_sql('calculated_p_stock', con=connection)
|
37 |
+
# p_eval_df = pd.read_sql('p_eval_result', con=connection)
|
38 |
|
39 |
stock_overview = appComponents.BestAndWorstStocks(
|
40 |
+
analytic_df=analytic_p)
|
41 |
composation_card = appComponents.PortfolioComposationCard(
|
42 |
+
analytic_p)
|
43 |
monthly_return_card = appComponents.HistReturnCard(
|
44 |
+
calculated_p_stock=analytic_p, calculated_b_stock=analytic_b)
|
45 |
+
# total_return_card = appComponents.TotalReturnCard(name='Range', eval_df=p_eval_df,
|
46 |
+
# b_stock_df=calculated_b_stock,
|
47 |
+
# p_stock_df=calculated_p_stock,
|
48 |
+
# value=(0, 20))
|
49 |
+
# drawdown_card = appComponents.DrawDownCard(
|
50 |
+
# eval_df=p_eval_df, calculated_p_stock=calculated_p_stock, calculated_b_stock=calculated_b_stock)
|
51 |
|
52 |
+
# top_header = appComponents.TopHeader(
|
53 |
+
# eval_df=p_eval_df
|
54 |
+
# )
|
55 |
|
56 |
template = pn.template.FastListTemplate(
|
57 |
title="Portfolio一览",
|
58 |
# sidebar=[freq, phase],
|
59 |
)
|
60 |
+
template.main.extend([stock_overview, composation_card, monthly_return_card])
|
61 |
+
# template.main.extend(
|
62 |
+
# [pn.Row(top_header),
|
63 |
+
# pn.Row(
|
64 |
+
# pn.Column(monthly_return_card, stock_overview,
|
65 |
+
# width=500, margin=(10, 10, 10, 10)),
|
66 |
+
# pn.Column(total_return_card, drawdown_card, margin=(10, 10, 10, 10)),
|
67 |
+
# pn.Column(composation_card, margin=(10, 10, 10, 10)),
|
68 |
+
# )]
|
69 |
+
# )
|
70 |
template.servable()
|
71 |
# pn.Row(
|
72 |
|
instance/local.db
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b918cd8420f4f314aaff25fb9348f5fcca206b01a0403d9a96b0039d00b55047
|
3 |
+
size 17780736
|
instance/log.json
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
{
|
2 |
-
"daily_update": "2023-08-
|
3 |
}
|
|
|
1 |
{
|
2 |
+
"daily_update": "2023-08-30 09:02:54"
|
3 |
}
|
pipeline.py
CHANGED
@@ -8,6 +8,7 @@ import pytz
|
|
8 |
import table_schema as ts
|
9 |
import db_operation as db
|
10 |
from log import Log
|
|
|
11 |
# import settings
|
12 |
# fetch new stock price
|
13 |
stock_price_stream = Stream()
|
@@ -138,11 +139,6 @@ def need_to_update_stocks_price(delta_time):
|
|
138 |
return False
|
139 |
|
140 |
|
141 |
-
def processing():
|
142 |
-
'''
|
143 |
-
run the whole processing pipeline here
|
144 |
-
'''
|
145 |
-
pass
|
146 |
|
147 |
|
148 |
def add_details_to_stock_df(stock_df):
|
@@ -214,27 +210,27 @@ def right_fill_stock_price():
|
|
214 |
'''
|
215 |
update all stocks price until today.
|
216 |
|
217 |
-
if no
|
218 |
-
default start date is the most recent date in
|
219 |
'''
|
220 |
-
|
221 |
most_recent_stocks_price = db.get_most_recent_stocks_price()
|
222 |
|
223 |
# fetch all stocks price until today
|
224 |
stocks_dates = most_recent_stocks_price.time
|
225 |
-
|
226 |
-
if len(
|
227 |
return
|
228 |
-
start = stocks_dates[0] if len(stocks_dates) > 0 else
|
229 |
end = utils.time_in_beijing()
|
230 |
|
231 |
# frequency is set to daily
|
232 |
if end - start > dt.timedelta(days=1):
|
233 |
-
new_stocks_price =
|
234 |
db.append_to_stocks_price_table(new_stocks_price)
|
235 |
|
236 |
|
237 |
-
def
|
238 |
'''
|
239 |
patch stock price db with all daily stock price within window
|
240 |
inclusive on both start and end date
|
@@ -260,8 +256,7 @@ def fetch_all_stocks_price_between(start, end):
|
|
260 |
security=tickers,
|
261 |
start_date=start,
|
262 |
end_date=end,
|
263 |
-
frequency='daily'
|
264 |
-
skip_paused=True,)
|
265 |
# drop where closing price is null
|
266 |
stock_price.dropna(subset=['close'], inplace=True)
|
267 |
return stock_price
|
@@ -339,15 +334,17 @@ def left_fill_benchmark_profile():
|
|
339 |
def left_fill_stocks_price():
|
340 |
'''
|
341 |
left fill stock price
|
342 |
-
fill missing entries between the oldest date in
|
|
|
343 |
|
344 |
-
if no
|
345 |
-
if no stock price table, the span would be from
|
|
|
346 |
|
347 |
|
348 |
'''
|
349 |
-
#
|
350 |
-
p_start = db.
|
351 |
# get oldest time in stock price table
|
352 |
stock_start = db.get_oldest_stocks_price().time
|
353 |
# if no portfolio profile, terminate
|
@@ -356,14 +353,14 @@ def left_fill_stocks_price():
|
|
356 |
# no stock price, span the entire portfolio profile
|
357 |
elif len(stock_start) == 0:
|
358 |
start = p_start[0]
|
359 |
-
end = db.
|
360 |
else:
|
361 |
start = p_start[0]
|
362 |
end = stock_start[0]
|
363 |
-
|
364 |
if start < end:
|
365 |
# fetch and update
|
366 |
-
new_entry =
|
367 |
db.append_to_stocks_price_table(new_entry)
|
368 |
|
369 |
|
@@ -374,6 +371,49 @@ def updaet_benchmark_to_db():
|
|
374 |
pass
|
375 |
|
376 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
377 |
async def daily_update():
|
378 |
last_update = log.get_time('daily_update')
|
379 |
if last_update is None or utils.time_in_beijing() - last_update >= dt.timedelta(days=1):
|
@@ -393,6 +433,8 @@ async def daily_update():
|
|
393 |
log.update_log('daily_update')
|
394 |
else:
|
395 |
print("no update needed")
|
|
|
|
|
396 |
|
397 |
|
398 |
def update():
|
|
|
8 |
import table_schema as ts
|
9 |
import db_operation as db
|
10 |
from log import Log
|
11 |
+
import processing
|
12 |
# import settings
|
13 |
# fetch new stock price
|
14 |
stock_price_stream = Stream()
|
|
|
139 |
return False
|
140 |
|
141 |
|
|
|
|
|
|
|
|
|
|
|
142 |
|
143 |
|
144 |
def add_details_to_stock_df(stock_df):
|
|
|
210 |
'''
|
211 |
update all stocks price until today.
|
212 |
|
213 |
+
if no benchmark profile, terminate without warning
|
214 |
+
default start date is the most recent date in benchmark profile
|
215 |
'''
|
216 |
+
most_recent_benchmark = db.get_most_recent_benchmark_profile()
|
217 |
most_recent_stocks_price = db.get_most_recent_stocks_price()
|
218 |
|
219 |
# fetch all stocks price until today
|
220 |
stocks_dates = most_recent_stocks_price.time
|
221 |
+
b_dates = most_recent_benchmark.date
|
222 |
+
if len(b_dates) == 0:
|
223 |
return
|
224 |
+
start = stocks_dates[0] if len(stocks_dates) > 0 else b_dates[0]
|
225 |
end = utils.time_in_beijing()
|
226 |
|
227 |
# frequency is set to daily
|
228 |
if end - start > dt.timedelta(days=1):
|
229 |
+
new_stocks_price = _fetch_all_stocks_price_between(start, end)
|
230 |
db.append_to_stocks_price_table(new_stocks_price)
|
231 |
|
232 |
|
233 |
+
def _fetch_all_stocks_price_between(start, end):
|
234 |
'''
|
235 |
patch stock price db with all daily stock price within window
|
236 |
inclusive on both start and end date
|
|
|
256 |
security=tickers,
|
257 |
start_date=start,
|
258 |
end_date=end,
|
259 |
+
frequency='daily')
|
|
|
260 |
# drop where closing price is null
|
261 |
stock_price.dropna(subset=['close'], inplace=True)
|
262 |
return stock_price
|
|
|
334 |
def left_fill_stocks_price():
|
335 |
'''
|
336 |
left fill stock price
|
337 |
+
fill missing entries between the oldest date in benchmark
|
338 |
+
profile and the oldest date in stock price table
|
339 |
|
340 |
+
if no benchmark profile, terminate without warning
|
341 |
+
if no stock price table, the span would be from
|
342 |
+
the oldest date in benchmark profile to the most recent date in benchmark profile
|
343 |
|
344 |
|
345 |
'''
|
346 |
+
# use benchmark because benchmari profile only update once a month
|
347 |
+
p_start = db.get_oldest_benchmark_profile().date
|
348 |
# get oldest time in stock price table
|
349 |
stock_start = db.get_oldest_stocks_price().time
|
350 |
# if no portfolio profile, terminate
|
|
|
353 |
# no stock price, span the entire portfolio profile
|
354 |
elif len(stock_start) == 0:
|
355 |
start = p_start[0]
|
356 |
+
end = db.get_most_recent_benchmark_profile().date[0]
|
357 |
else:
|
358 |
start = p_start[0]
|
359 |
end = stock_start[0]
|
360 |
+
|
361 |
if start < end:
|
362 |
# fetch and update
|
363 |
+
new_entry = _fetch_all_stocks_price_between(start, end)
|
364 |
db.append_to_stocks_price_table(new_entry)
|
365 |
|
366 |
|
|
|
371 |
pass
|
372 |
|
373 |
|
374 |
+
def get_stocks_in_profile(profile_df):
|
375 |
+
ticker_list = profile_df.ticker.unique().tolist()
|
376 |
+
stocks_df = db.get_stocks_price(ticker_list)
|
377 |
+
return stocks_df
|
378 |
+
|
379 |
+
|
380 |
+
def batch_processing():
|
381 |
+
'''perform when portfolio or benchmark is updated'''
|
382 |
+
portfolio_p = db.get_all_portfolio_profile()
|
383 |
+
benchmark_p = db.get_all_benchmark_profile()
|
384 |
+
p_stocks_df = get_stocks_in_profile(portfolio_p)
|
385 |
+
b_stocks_df = get_stocks_in_profile(benchmark_p)
|
386 |
+
|
387 |
+
# temperaraly handle rename date to time
|
388 |
+
portfolio_p.rename(
|
389 |
+
columns={'date': 'time', 'weight': 'ini_w'}, inplace=True)
|
390 |
+
benchmark_p.rename(columns={'date': 'time'}, inplace=True)
|
391 |
+
# normalize weight in benchmark
|
392 |
+
grouped = benchmark_p.groupby('time')
|
393 |
+
benchmark_p['ini_w'] = grouped['weight'].transform(lambda x: x / x.sum())
|
394 |
+
# add profile information into stock price
|
395 |
+
analytic_b = processing.create_analytic_df(b_stocks_df, benchmark_p)
|
396 |
+
analytic_p = processing.create_analytic_df(p_stocks_df, portfolio_p)
|
397 |
+
# p stock weigth
|
398 |
+
processing.calculate_cash(analytic_p)
|
399 |
+
processing.calculate_weight_using_cash(analytic_p)
|
400 |
+
processing.calculate_pct(analytic_p)
|
401 |
+
processing.calculate_norm_pct(analytic_p)
|
402 |
+
# b stock weight
|
403 |
+
analytic_b.sort_values(by=['time'], inplace=True)
|
404 |
+
grouped = analytic_b.groupby('ticker')
|
405 |
+
analytic_b['pct'] = grouped['close'].pct_change()
|
406 |
+
processing.calculate_weight_using_pct(analytic_b)
|
407 |
+
# pnl
|
408 |
+
processing.calculate_pnl(analytic_p)
|
409 |
+
# log return
|
410 |
+
# need to crop on left side first
|
411 |
+
analytic_b = analytic_b[analytic_b['time'] >= analytic_p.time.min()].copy()
|
412 |
+
processing.calculate_log_return(analytic_p)
|
413 |
+
processing.calculate_log_return(analytic_b)
|
414 |
+
db.save_portfolio_analytic_df(analytic_p)
|
415 |
+
db.save_benchmark_analytic_df(analytic_b)
|
416 |
+
|
417 |
async def daily_update():
|
418 |
last_update = log.get_time('daily_update')
|
419 |
if last_update is None or utils.time_in_beijing() - last_update >= dt.timedelta(days=1):
|
|
|
433 |
log.update_log('daily_update')
|
434 |
else:
|
435 |
print("no update needed")
|
436 |
+
batch_processing()
|
437 |
+
print("updated analytic")
|
438 |
|
439 |
|
440 |
def update():
|
script/processing.py → processing.py
RENAMED
@@ -281,21 +281,165 @@ def calculate_total_attribution(calculated_p_stock, calculated_b_stock):
|
|
281 |
# return df
|
282 |
|
283 |
|
284 |
-
def
|
285 |
'''
|
286 |
-
return a
|
287 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
288 |
'''
|
289 |
-
|
290 |
-
|
291 |
-
|
292 |
-
|
293 |
-
|
294 |
-
|
295 |
-
|
296 |
-
|
297 |
-
|
298 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
299 |
|
300 |
|
301 |
def calculate_attributes_between_dates(start, end, calculated_p_stock, calculated_b_stock):
|
@@ -361,6 +505,15 @@ def calculate_attributes_between_dates(start, end, calculated_p_stock, calculate
|
|
361 |
return df
|
362 |
|
363 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
364 |
def change_resolution(df, freq='W'):
|
365 |
'''
|
366 |
aggregate by keeping the first entry of the freq period,
|
@@ -370,6 +523,33 @@ def change_resolution(df, freq='W'):
|
|
370 |
return df.groupby('freq').first().reset_index()
|
371 |
|
372 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
373 |
def calculate_weight_using_cash(df):
|
374 |
'''
|
375 |
patch df with current weight for each entry
|
@@ -381,9 +561,9 @@ def calculate_weight_using_cash(df):
|
|
381 |
dataframe with processed cash column
|
382 |
|
383 |
'''
|
384 |
-
df['
|
385 |
grouped = df.groupby('time')
|
386 |
-
df.
|
387 |
|
388 |
|
389 |
def calculate_cash(df):
|
@@ -397,3 +577,173 @@ def calculate_cash(df):
|
|
397 |
dataframe with processed shares and close column
|
398 |
'''
|
399 |
df['cash'] = df['shares'] * df['close']
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
281 |
# return df
|
282 |
|
283 |
|
284 |
+
def calcualte_return(df: pd.DataFrame, start, end):
|
285 |
'''
|
286 |
+
calcualte return within a window for each entry of ticker
|
287 |
+
inclusive
|
288 |
+
|
289 |
+
this is an intermediate step to calculate attribute
|
290 |
+
calculation using the weighted_log_return
|
291 |
+
'''
|
292 |
+
df = df[(df.time >= start) & (df.time <= end)].copy()
|
293 |
+
inter_df = df.sort_values(by=['time'])
|
294 |
+
inter_df['cum_log_return'] = inter_df.groupby(
|
295 |
+
'ticker')['log_return'].cumsum()
|
296 |
+
inter_df['percentage_return'] = np.exp(
|
297 |
+
inter_df['cum_log_return']) - 1
|
298 |
+
# patch
|
299 |
+
df['return'] = inter_df['percentage_return']
|
300 |
+
return df
|
301 |
+
|
302 |
+
|
303 |
+
def calculate_weighted_return(df: pd.DataFrame, start, end):
|
304 |
+
'''
|
305 |
+
calcualte weighted return within a window for each entry of ticker
|
306 |
+
inclusive
|
307 |
+
calculation using the weighted_log_return
|
308 |
+
'''
|
309 |
+
df = df[(df.time >= start) & (df.time <= end)].copy()
|
310 |
+
inter_df = df.sort_values(by=['time'])
|
311 |
+
inter_df['cum_weighted_log_return'] = inter_df.groupby(
|
312 |
+
'ticker')['weighted_log_return'].cumsum()
|
313 |
+
inter_df['percentage_return'] = np.exp(
|
314 |
+
inter_df['cum_weighted_log_return']) - 1
|
315 |
+
# patch
|
316 |
+
df['weighted_return'] = inter_df['percentage_return']
|
317 |
+
return df
|
318 |
+
|
319 |
+
|
320 |
+
def calculate_log_return(df: pd.DataFrame):
|
321 |
+
'''
|
322 |
+
patch df with the weighted log return and unweighted log return
|
323 |
+
calculated using close price
|
324 |
+
|
325 |
+
an intermediate step to calculate the weighted return,
|
326 |
+
the benefit using this is this can be aggregated with any time window
|
327 |
+
and work for both portfolio and benchmark
|
328 |
+
'''
|
329 |
+
inter_df = df.sort_values(by=['time'])
|
330 |
+
grouped = inter_df.groupby('ticker')
|
331 |
+
inter_df['prev_w'] = grouped['weight'].shift(1)
|
332 |
+
inter_df['prev_close'] = grouped['close'].shift(1)
|
333 |
+
inter_df['log_return'] = np.log(inter_df['close'] / inter_df['prev_close'])
|
334 |
+
inter_df['weighted_log_return'] = inter_df['log_return'] * \
|
335 |
+
inter_df['prev_w']
|
336 |
+
# patch
|
337 |
+
df['log_return'] = inter_df['log_return']
|
338 |
+
df['weighted_log_return'] = inter_df['weighted_log_return']
|
339 |
+
|
340 |
+
# TODO: change to log return instead
|
341 |
+
# def calculate_return(df, start, end):
|
342 |
+
# df = df[(df.time >= start) & (df.time <= end)].copy()
|
343 |
+
# df.sort_values(by=['time'], inplace=True)
|
344 |
+
# grouped = df.groupby('ticker')
|
345 |
+
# df['return'] = (1 + grouped.pct.cumprod()) - 1
|
346 |
+
# return df
|
347 |
+
|
348 |
+
# def calculate_norm_return(df, start, end):
|
349 |
+
# '''
|
350 |
+
# calculate accumlative normalized return within a window
|
351 |
+
# for each entry of ticker using norm_pct
|
352 |
+
|
353 |
+
# normalized return is the weighted return in respect to
|
354 |
+
# the whole portfolio
|
355 |
+
|
356 |
+
# Return
|
357 |
+
# ------
|
358 |
+
# dataframe
|
359 |
+
# dataframe with return for each ticker
|
360 |
+
# '''
|
361 |
+
# df = df[(df.time >= start) & (df.time <= end)].copy()
|
362 |
+
# df.sort_values(by=['time'], inplace=True)
|
363 |
+
# grouped = df.groupby('ticker')
|
364 |
+
# df['norm_return'] = (1 + grouped.norm_pct.cumprod()) - 1
|
365 |
+
# return df
|
366 |
+
|
367 |
+
|
368 |
+
def _uniformize_time_series(profile_df):
|
369 |
'''
|
370 |
+
a helper function to create analytic_df
|
371 |
+
|
372 |
+
make each entry in the time series has the same dimension
|
373 |
+
by filling none holding stock that was held in previous period has 0 shares and 0 ini_w
|
374 |
+
|
375 |
+
Parameters
|
376 |
+
----------
|
377 |
+
profile_df : dataframe
|
378 |
+
portfolio profile dataframe or benchmark profile dataframe
|
379 |
+
|
380 |
+
Returns
|
381 |
+
-------
|
382 |
+
dataframe
|
383 |
+
dataframe with uniformized time series
|
384 |
+
'''
|
385 |
+
# Get unique time periods
|
386 |
+
time_periods = profile_df['time'].unique()
|
387 |
+
time_periods = sorted(time_periods)
|
388 |
+
|
389 |
+
# Iterate through time periods
|
390 |
+
for i in range(len(time_periods) - 1):
|
391 |
+
current_period = time_periods[i]
|
392 |
+
next_period = time_periods[i + 1]
|
393 |
+
|
394 |
+
current_df = profile_df[profile_df['time'] == current_period]
|
395 |
+
next_df = profile_df[profile_df['time'] == next_period]
|
396 |
+
|
397 |
+
tickers_current = current_df['ticker']
|
398 |
+
tickers_next = next_df['ticker']
|
399 |
+
|
400 |
+
# row that has ticker not in tickers_next
|
401 |
+
missing_tickers = current_df[~tickers_current.isin(
|
402 |
+
tickers_next)].copy()
|
403 |
+
|
404 |
+
if len(missing_tickers) != 0:
|
405 |
+
missing_tickers.time = next_period
|
406 |
+
missing_tickers.shares = 0
|
407 |
+
missing_tickers.ini_w = 0
|
408 |
+
profile_df = pd.concat(
|
409 |
+
[profile_df, missing_tickers], ignore_index=True)
|
410 |
+
# reset index
|
411 |
+
return profile_df.reset_index(drop=True)
|
412 |
+
|
413 |
+
|
414 |
+
def create_analytic_df(price_df, profile_df):
|
415 |
+
'''
|
416 |
+
create a df for analysis processing
|
417 |
+
|
418 |
+
filling information from profile df to stock price df
|
419 |
+
|
420 |
+
'''
|
421 |
+
uni_profile_df = _uniformize_time_series(profile_df)
|
422 |
+
# TODO handle rename column here
|
423 |
+
df = price_df.merge(uni_profile_df, on=['ticker', 'time'], how='outer')
|
424 |
+
df.sort_values(by=['ticker', 'time'], inplace=True)
|
425 |
+
# add sector, aggregate_sector, display_name and name to missing rows
|
426 |
+
grouped = df.groupby('ticker')
|
427 |
+
df['sector'] = grouped['sector'].fillna(method='ffill')
|
428 |
+
df['aggregate_sector'] = grouped['aggregate_sector'].fillna(method='ffill')
|
429 |
+
df['display_name'] = grouped['display_name'].fillna(method='ffill')
|
430 |
+
df['name'] = grouped['name'].fillna(method='ffill')
|
431 |
+
|
432 |
+
# assign missing ini_w
|
433 |
+
df['ini_w'] = grouped['ini_w'].fillna(method='ffill')
|
434 |
+
# assign missing shares, benchmark doesn't have shares
|
435 |
+
if ('shares' in df.columns):
|
436 |
+
df['shares'] = grouped['shares'].fillna(method='ffill')
|
437 |
+
# remove profile and price entry before first profile entry from df
|
438 |
+
df.dropna(subset=['ini_w'], inplace=True)
|
439 |
+
df.dropna(subset=['close'], inplace=True)
|
440 |
+
# remove where weight is 0
|
441 |
+
df = df[df['ini_w'] != 0].copy()
|
442 |
+
return df
|
443 |
|
444 |
|
445 |
def calculate_attributes_between_dates(start, end, calculated_p_stock, calculated_b_stock):
|
|
|
505 |
return df
|
506 |
|
507 |
|
508 |
+
def calculate_cum_pnl(df, start, end):
|
509 |
+
'''return df with cumulative pnl within a window'''
|
510 |
+
df = df[df.time.between(start, end, inclusive='both')].copy()
|
511 |
+
df.sort_values(by=['time'], inplace=True)
|
512 |
+
grouped = df.groupby('ticker')
|
513 |
+
df['cum_pnl'] = grouped['pnl'].cumsum()
|
514 |
+
return df
|
515 |
+
|
516 |
+
|
517 |
def change_resolution(df, freq='W'):
|
518 |
'''
|
519 |
aggregate by keeping the first entry of the freq period,
|
|
|
523 |
return df.groupby('freq').first().reset_index()
|
524 |
|
525 |
|
526 |
+
def calculate_pnl(df):
|
527 |
+
'''
|
528 |
+
patch df with pnl column
|
529 |
+
|
530 |
+
pnl is calculated using cash
|
531 |
+
'''
|
532 |
+
df.sort_values(by=['time'], inplace=True)
|
533 |
+
grouped = df.groupby('ticker')
|
534 |
+
df['pnl'] = grouped['cash'].diff()
|
535 |
+
|
536 |
+
|
537 |
+
def calculate_pct(df):
|
538 |
+
'''
|
539 |
+
calculate pct using close price
|
540 |
+
'''
|
541 |
+
df.sort_values(by=['time'], inplace=True)
|
542 |
+
grouped = df.groupby('ticker')
|
543 |
+
df['pct'] = grouped['close'].pct_change()
|
544 |
+
|
545 |
+
|
546 |
+
def calculate_norm_pct(df):
|
547 |
+
'''
|
548 |
+
use weight to calculate the norm pct
|
549 |
+
'''
|
550 |
+
df['norm_pct'] = df.weight * df.pct
|
551 |
+
|
552 |
+
|
553 |
def calculate_weight_using_cash(df):
|
554 |
'''
|
555 |
patch df with current weight for each entry
|
|
|
561 |
dataframe with processed cash column
|
562 |
|
563 |
'''
|
564 |
+
df['weight'] = float('nan')
|
565 |
grouped = df.groupby('time')
|
566 |
+
df.weight = grouped.cash.transform(lambda x: x / x.sum())
|
567 |
|
568 |
|
569 |
def calculate_cash(df):
|
|
|
577 |
dataframe with processed shares and close column
|
578 |
'''
|
579 |
df['cash'] = df['shares'] * df['close']
|
580 |
+
|
581 |
+
|
582 |
+
def calculate_weight_using_pct(df):
|
583 |
+
'''
|
584 |
+
calculate weight using weight column
|
585 |
+
|
586 |
+
calculate benchmark stock using this, since benchmark stock
|
587 |
+
doesn't have share information
|
588 |
+
|
589 |
+
Parameters
|
590 |
+
----------
|
591 |
+
df: dataframe
|
592 |
+
dataframe with weight, pct on closing and ini_w columns
|
593 |
+
'''
|
594 |
+
df.sort_values(by=['time'], inplace=True)
|
595 |
+
grouped = df.groupby('ticker')
|
596 |
+
for _, group in grouped:
|
597 |
+
prev_row = None
|
598 |
+
for index, row in group.iterrows():
|
599 |
+
if prev_row is None:
|
600 |
+
prev_row = df.loc[index]
|
601 |
+
continue
|
602 |
+
df.loc[index, 'weight'] = prev_row['weight'] * (1 + row['pct'])
|
603 |
+
prev_row = df.loc[index]
|
604 |
+
# normalize weight
|
605 |
+
grouped = df.groupby('time')
|
606 |
+
normed_weight = grouped['weight'].transform(lambda x: x / x.sum())
|
607 |
+
df['weight'] = normed_weight
|
608 |
+
|
609 |
+
|
610 |
+
def calculate_periodic_BHB(agg_b, agg_p):
|
611 |
+
'''
|
612 |
+
calculate periodic BHB for each ticker entry
|
613 |
+
|
614 |
+
the accumulated return of a period will be used,
|
615 |
+
the weight is the weight at the began of the period
|
616 |
+
|
617 |
+
Note:
|
618 |
+
----
|
619 |
+
if only one entry in a period, the return will be nan,
|
620 |
+
|
621 |
+
Parameters
|
622 |
+
----------
|
623 |
+
agg_b : pd.DataFrame
|
624 |
+
aggregated benchmark analytic_df
|
625 |
+
agg_p : pd.DataFrame
|
626 |
+
aggregated portfolio analytic_df
|
627 |
+
|
628 |
+
Returns
|
629 |
+
-------
|
630 |
+
pd.DataFrame
|
631 |
+
periodic BHB result contain allocation, interaction, selection, nominal_active_return and active_return
|
632 |
+
|
633 |
+
'''
|
634 |
+
# merge both
|
635 |
+
agg_b['in_benchmark'] = True
|
636 |
+
agg_p['in_portfolio'] = True
|
637 |
+
selected_column = ['ticker', 'aggregate_sector',
|
638 |
+
'prev_weight', 'return', 'period', 'display_name']
|
639 |
+
columns_to_fill = ['return_b', 'return_p',
|
640 |
+
'prev_weight_p', 'prev_weight_b']
|
641 |
+
merged_df = pd.merge(agg_b[['in_benchmark'] + selected_column],
|
642 |
+
agg_p,
|
643 |
+
how='outer',
|
644 |
+
on=['period', 'ticker'],
|
645 |
+
suffixes=('_b', '_p'))
|
646 |
+
merged_df['in_portfolio'].fillna(False, inplace=True)
|
647 |
+
merged_df['in_benchmark'].fillna(False, inplace=True)
|
648 |
+
merged_df[columns_to_fill] = merged_df[columns_to_fill].fillna(0)
|
649 |
+
|
650 |
+
# complement fill aggregate_sector and display_name
|
651 |
+
merged_df['aggregate_sector_b'].fillna(
|
652 |
+
merged_df['aggregate_sector_p'], inplace=True)
|
653 |
+
merged_df["display_name_b"].fillna(merged_df.display_name_p, inplace=True)
|
654 |
+
merged_df.rename(columns={'aggregate_sector_b': 'aggregate_sector',
|
655 |
+
'display_name_b': 'display_name',
|
656 |
+
}, inplace=True)
|
657 |
+
merged_df.drop(columns=['aggregate_sector_p',
|
658 |
+
'display_name_p'], inplace=True)
|
659 |
+
|
660 |
+
# calculate active return
|
661 |
+
merged_df['weighted_return_p'] = merged_df['return_p'] * \
|
662 |
+
merged_df['prev_weight_p']
|
663 |
+
merged_df['weighted_return_b'] = merged_df['return_b'] * \
|
664 |
+
merged_df['prev_weight_b']
|
665 |
+
merged_df['active_return'] = merged_df['weighted_return_p'] - \
|
666 |
+
merged_df['weighted_return_b']
|
667 |
+
|
668 |
+
# allocation, interaction, selection and nominal active return
|
669 |
+
merged_df['allocation'] = (
|
670 |
+
merged_df.prev_weight_p - merged_df.prev_weight_b) * merged_df.return_b
|
671 |
+
merged_df['interaction'] = (merged_df.return_p - merged_df.return_b) \
|
672 |
+
* (merged_df.prev_weight_p - merged_df.prev_weight_b)
|
673 |
+
merged_df['selection'] = (
|
674 |
+
merged_df.return_p - merged_df.return_b) * merged_df.prev_weight_b
|
675 |
+
merged_df['notional_active_return'] = merged_df['allocation'] + \
|
676 |
+
merged_df['interaction'] + merged_df['selection']
|
677 |
+
return merged_df
|
678 |
+
|
679 |
+
|
680 |
+
def _merge_anlaytic_df(portfolio_df, benchmark_df):
|
681 |
+
pass
|
682 |
+
|
683 |
+
|
684 |
+
def aggregate_analytic_df_by_period(df, freq):
|
685 |
+
'''
|
686 |
+
return an aggregated analytic_df with weekly, monthly, yearly or daily frequency
|
687 |
+
|
688 |
+
each ticker will have 1 rows for each period,
|
689 |
+
cash is the value at the end of the period.
|
690 |
+
shares is the # of shares at end of the period.
|
691 |
+
prev_weight is the weight of that ticker entry at end of previous period.
|
692 |
+
log_return is sum of log_return within the period.
|
693 |
+
weight is the weight of that ticker entry at end of the period.
|
694 |
+
return is from last of previous period to last of current period.
|
695 |
+
|
696 |
+
Parameters
|
697 |
+
----------
|
698 |
+
df : pd.DataFrame
|
699 |
+
analytic_df, dateframe of stock price has weight, log_return information
|
700 |
+
freq : str
|
701 |
+
weekly: 'W-MON' start on tuesday end on monday,
|
702 |
+
monthly: 'M',
|
703 |
+
yearly: 'Y',
|
704 |
+
daily: "D"
|
705 |
+
|
706 |
+
Returns
|
707 |
+
-------
|
708 |
+
pd.DataFrame
|
709 |
+
aggregated analytic_df with weekly, monthly, yearly or daily frequency
|
710 |
+
'''
|
711 |
+
# create prev_weight
|
712 |
+
df.sort_values(by=['time'], inplace=True)
|
713 |
+
grouped = df.groupby('ticker')
|
714 |
+
df['prev_weight'] = grouped['weight'].shift(1)
|
715 |
+
|
716 |
+
# aggregate by summing log return and keep the first prev_weight
|
717 |
+
df['period'] = df.time.dt.to_period(freq)
|
718 |
+
grouped = df.groupby(['period', 'ticker'])
|
719 |
+
agg_rules = {'display_name': 'first',
|
720 |
+
'aggregate_sector': 'first',
|
721 |
+
'prev_weight': 'first',
|
722 |
+
'log_return': 'sum',
|
723 |
+
'weight': 'last'
|
724 |
+
}
|
725 |
+
|
726 |
+
# handle aggregate on benchamrk
|
727 |
+
if 'cash' in df.columns and 'shares' in df.columns:
|
728 |
+
agg_rules['cash'] = 'last'
|
729 |
+
agg_rules['shares'] = 'last'
|
730 |
+
|
731 |
+
# aggregation
|
732 |
+
agg_df = grouped.agg(agg_rules)
|
733 |
+
|
734 |
+
# calculate return by convert sum log return to percentage return
|
735 |
+
agg_df['return'] = np.exp(agg_df.log_return) - 1
|
736 |
+
|
737 |
+
# make it a one dimensional dataframe
|
738 |
+
agg_df.reset_index(inplace=True)
|
739 |
+
return agg_df
|
740 |
+
|
741 |
+
|
742 |
+
def aggregate_bhb_df(df, by="total"):
|
743 |
+
keys = ['period', 'aggregate_sector'] if by == 'sector' else ['period']
|
744 |
+
agg_df = df.groupby(keys)[['active_return',
|
745 |
+
'allocation',
|
746 |
+
'interaction',
|
747 |
+
'selection',
|
748 |
+
'notional_active_return']].sum()
|
749 |
+
return agg_df
|
testing_pipeline.ipynb
CHANGED
The diff for this file is too large to render.
See raw diff
|
|