huggingface112 commited on
Commit
a77544c
1 Parent(s): 2b059b0

enhanced historyReturnCard

Browse files
appComponents.py CHANGED
@@ -1,4 +1,4 @@
1
- from script import processing
2
  from datetime import datetime, timedelta
3
  import panel as pn
4
  import pandas as pd
@@ -14,7 +14,7 @@ import plotly.graph_objs as go
14
  # import warnings
15
  pn.extension('mathjax')
16
  pn.extension('plotly')
17
- pn.extension('plotly')
18
  # warnings.filterwarnings("ignore", category=pd.core.common.SettingWithCopyWarning)
19
  # overal performance default to 30 days
20
 
@@ -38,7 +38,6 @@ def create_portfolio_overview(df_list):
38
  ip_eval_df = p_eval_df.interactive()
39
  isector_eval_df = sector_eval_df.interactive()
40
 
41
-
42
  ranged_ip_eval_df = ip_eval_df[ip_eval_df.date.between(
43
  range_slider.param.value_start, range_slider.param.value_end)]
44
  ranged_isector_eval_df = isector_eval_df[isector_eval_df.date.between(
@@ -233,7 +232,7 @@ class TotalReturnCard(Viewer):
233
 
234
  def create_report(self):
235
  # Calculate the total return and risk
236
- result = processing.calculate_return(
237
  self.eval_df, self.start_date, self.end_date)
238
  most_recent_row = result.tail(1)
239
  active_return = most_recent_row.active_return.values[0]
@@ -258,7 +257,7 @@ class TotalReturnCard(Viewer):
258
  allocation = total_attributes.allocation
259
  selection = total_attributes.selection
260
 
261
- # Create a function for text report
262
  report = f"""
263
  <style>
264
  .compact-container {{
@@ -347,7 +346,7 @@ class TotalReturnCard(Viewer):
347
  return report
348
 
349
  def create_plot(self):
350
- result = processing.calculate_return(
351
  self.eval_df, self.start_date, self.end_date)
352
  fig = px.line(result, x="date", y=['return_p', 'return_b'])
353
  fig.update_traces(mode="lines+markers",
@@ -463,37 +462,52 @@ class DrawDownCard(Viewer):
463
 
464
 
465
  class HistReturnCard(Viewer):
466
- eval_df = param.Parameter()
467
  return_barplot = param.Parameterized()
 
 
468
  select_resolution = param.ObjectSelector(
469
  default='每月回报', objects=['每日回报', '每周回报', '每月回报', '每年回报'])
470
 
 
 
 
 
 
 
 
 
 
 
 
 
471
  def update_aggregate_df(self):
472
  freq = None
473
  if self.select_resolution == "每日回报":
474
- return self.eval_df
475
  elif self.select_resolution == "每月回报":
476
  freq = 'M'
477
  elif self.select_resolution == "每年回报":
478
  freq = 'Y'
479
  elif self.select_resolution == "每周回报":
480
- freq = 'W'
481
- # I don't think this formula is correct, check this later
482
- agg_df = self.eval_df.groupby([pd.Grouper(key='date', freq=freq)])\
483
- .aggregate({'portfolio_pct_p': 'sum', 'portfolio_pct_b': 'sum'})
484
- agg_df['portfolio_return_p'] = np.exp(agg_df.portfolio_pct_p) - 1
485
- agg_df['portfolio_return_b'] = np.exp(agg_df.portfolio_pct_b) - 1
486
- return agg_df.reset_index()
 
487
 
488
  def create_attributes_barplot(self):
489
- self.attribute_df = self.update_attributes_df()
490
- fig = px.bar(self.attribute_df, x='date', y=[
491
- 'allocation', 'selection', 'interaction', 'notional_return', 'active_return'])
492
  colname_to_name = {
493
  'allocation': '分配',
494
  'selection': '选择',
495
  'interaction': '交互',
496
- 'notional_return': '名义主动回报',
497
  'active_return': '实际主动回报'
498
  }
499
  fig.for_each_trace(lambda t: t.update(name=colname_to_name.get(t.name, t.name),
@@ -502,6 +516,7 @@ class HistReturnCard(Viewer):
502
  hovertemplate=t.hovertemplate.replace(
503
  t.name, colname_to_name.get(t.name, t.name))
504
  ))
 
505
  fig.update_layout(barmode='group', title='主动回报归因',
506
  bargap=0.0, bargroupgap=0.0)
507
  fig.update_layout(**styling.plot_layout)
@@ -510,8 +525,8 @@ class HistReturnCard(Viewer):
510
 
511
  def create_return_barplot(self):
512
  self.agg_df = self.update_aggregate_df()
513
- fig = px.bar(self.agg_df, x='date', y=[
514
- 'portfolio_return_p', 'portfolio_return_b'],
515
  barmode='overlay',
516
  title='周期回报',
517
  )
@@ -533,14 +548,14 @@ class HistReturnCard(Viewer):
533
 
534
  return fig.to_dict()
535
 
536
- @param.depends('eval_df', 'select_resolution', watch=True)
537
  def update(self):
538
  return_barplot = self.create_return_barplot()
539
  self.return_barplot.object = return_barplot
540
  attributes_barplot = self.create_attributes_barplot()
541
  self.attribute_barplot.object = attributes_barplot
542
 
543
- def update_attributes_df(self):
544
  freq = None
545
  if self.select_resolution == "每日回报":
546
  freq = 'D'
@@ -549,19 +564,23 @@ class HistReturnCard(Viewer):
549
  elif self.select_resolution == "每年回报":
550
  freq = 'Y'
551
  elif self.select_resolution == "每周回报":
552
- freq = 'W'
553
- p_stock = processing.change_resolution(self.calculated_p_stock, freq)
554
- b_stock = processing.change_resolution(self.calculated_b_stock, freq)
555
- return processing.calculate_total_attribution(p_stock, b_stock)
556
-
557
- def __init__(self, eval_df, calculated_p_stock, calculated_b_stock, **params):
558
- self.eval_df = eval_df
 
 
559
  self.calculated_p_stock = calculated_p_stock
560
  self.calculated_b_stock = calculated_b_stock
 
561
  self._range_slider = pn.widgets.DateRangeSlider(
562
  name='Date Range Slider',
563
- start=self.eval_df.date.min(), end=self.eval_df.date.max(),
564
- value=(self.eval_df.date.min(), self.eval_df.date.max()),
 
565
 
566
  )
567
  self.return_barplot = pn.pane.Plotly(self.create_return_barplot())
@@ -578,20 +597,21 @@ class HistReturnCard(Viewer):
578
 
579
  class PortfolioComposationCard(Viewer):
580
  p_stock_df = param.Parameterized()
 
581
 
582
  def create_cash_position_df(self):
583
- aggregate_df = self.p_stock_df.groupby('date', as_index=False).agg({
584
- 'current_weight': 'sum'
585
  })
586
  aggregate_df['type'] = 'portfolio'
587
  not_in_portfolio_df = aggregate_df.copy()
588
  not_in_portfolio_df['type'] = 'not_in_portfolio'
589
- not_in_portfolio_df['current_weight'] = 1000
590
  # append df
591
  aggregate_df = pd.concat([aggregate_df, not_in_portfolio_df])
592
  # sort
593
- aggregate_df.sort_values(by=['date'], inplace=True)
594
- return aggregate_df[aggregate_df.date.between(self.date_range.value[0], self.date_range.value[1])]
595
 
596
  @param.depends('p_stock_df', 'date_range.value', watch=True)
597
  def update_trend_plot(self):
@@ -599,7 +619,7 @@ class PortfolioComposationCard(Viewer):
599
 
600
  def create_trend_plot(self):
601
  aggregate_df = self.create_cash_position_df()
602
- fig = px.bar(aggregate_df, x='date', y='current_weight', color='type')
603
  fig.update_layout(legend=dict(
604
  orientation="h",
605
  yanchor="bottom",
@@ -619,57 +639,69 @@ class PortfolioComposationCard(Viewer):
619
  return fig.to_dict()
620
 
621
  def create_treemap(self):
622
- self.selected_df['position'] = 'portfolio'
 
 
623
  not_in_portfolio_row = pd.DataFrame({
624
- 'display_name': ['不在portfolio中'],
625
- 'position': ['not_in_portfolio'],
626
- 'aggregate_sector': ['���在portfolio中'],
627
- 'current_weight': [1000],
628
- 'portfolio_return': [0],
629
- 'portfolio_pct': [0]
630
  })
631
  df = pd.concat([self.selected_df, not_in_portfolio_row],
632
  ignore_index=True)
633
- fig = px.treemap(df, path=[px.Constant('cash_position'), 'position', 'aggregate_sector', 'display_name'], values='current_weight',
634
- color='portfolio_return', hover_data=['portfolio_return', 'portfolio_pct'],
 
 
 
 
 
 
635
  color_continuous_scale='RdBu',
636
  color_continuous_midpoint=np.average(
637
- df['portfolio_return'])
638
  )
 
639
  fig.update_layout(styling.plot_layout)
640
  fig.update_layout(coloraxis_colorbar=dict(
641
- title="weighted return"))
642
- colname_to_name = {
643
- 'cash_position': '现金分布',
644
- 'portfolio_return': '加权回报',
645
- 'not_in_portfolio': '不在portfolio中',
646
- 'current_weight': '现金',
647
-
648
- }
649
- fig.for_each_trace(lambda t: t.update(name=colname_to_name.get(t.name, t.name),
650
- hovertemplate=t.hovertemplate.replace(
651
- t.name, colname_to_name.get(t.name, t.name))
652
- ))
653
  return fig.to_dict()
654
 
655
- def __init__(self, p_stock_df, **params):
656
- self.p_stock_df = p_stock_df
657
- self.date_picker = pn.widgets.DatetimePicker(name='选择某日资金分布',
658
- start=self.p_stock_df.date.min(),
659
- end=self.p_stock_df.date.max(),
660
- value=self.p_stock_df.date.max(),
661
- enabled_dates=[datetime_object.date(
662
- ) for datetime_object in self.p_stock_df.date.unique()],
663
- enable_time=False,
664
- )
 
 
 
 
 
665
  self.date_range = pn.widgets.DateRangeSlider(name='选择资金分布走势区间',
666
- start=self.p_stock_df.date.min(),
667
- end=self.p_stock_df.date.max(),
668
- value=(self.p_stock_df.date.min(
669
- ), self.p_stock_df.date.max()),
670
  )
671
- self.selected_df = self.p_stock_df[self.p_stock_df.date ==
672
- self.date_picker.value]
673
  self.tree_plot = pn.pane.Plotly(self.create_treemap())
674
  self.trend_plot = pn.pane.Plotly(self.create_trend_plot())
675
 
@@ -677,83 +709,96 @@ class PortfolioComposationCard(Viewer):
677
  super().__init__(**params)
678
 
679
  def __panel__(self):
680
- self._layout = pn.Card(self.date_picker, self.tree_plot, self.date_range, self.trend_plot,
681
  width=500, header=pn.pane.Str('资金分布'))
682
  return self._layout
683
 
684
- @param.depends('date_picker.value', 'p_stock_df', watch=True)
685
  def update(self):
686
- self.selected_df = self.p_stock_df[self.p_stock_df.date ==
687
- self.date_picker.value]
688
  tree_plot = self.create_treemap()
689
  self.tree_plot.object = tree_plot
690
 
691
 
692
  class BestAndWorstStocks(Viewer):
693
- p_stock_df = param.Parameter()
694
- b_stock_df = param.Parameter()
695
  start_date = param.Parameter()
696
  end_date = param.Parameter()
697
-
698
- def calculate_attributes(self):
699
- result_df = processing.calculate_attributes_between_dates(self.start_date,
700
- self.end_date,
701
- self.p_stock_df,
702
- self.b_stock_df)
703
-
704
- return result_df
 
 
 
 
 
 
 
 
 
 
 
 
705
 
706
  def create_tabulator(self, df):
707
  col_title_map = {
708
- 'display_name_p': '股票名称',
709
  'ticker': '股票代码',
710
- 'pct_p': '加权回报率',
711
- 'prev_w_in_p_b': '在benchmark中的权重',
712
- 'prev_w_in_p_p': '在portfolio中的权重',
713
- 'allocation': '分配分数',
714
- 'selection': '选择分数',
715
- 'interaction': '交互分数',
716
- 'return': '未加权回报率',
717
- 'active_return': '加权主动回报率',
718
  }
719
  return pn.widgets.Tabulator(df, sizing_mode='stretch_width',
720
- hidden_columns=['index', 'display_name_b',
721
- 'pct_b', 'in_portfolio',
722
- ],
723
- frozen_columns=['display_name_p'],
724
- titles=col_title_map)
725
 
726
  @param.depends('start_date', 'end_date', watch=True)
727
  def update(self):
728
  result_df = self.get_processed_df()
729
- self.best_5_tabulator.value = result_df.tail(5)
730
- self.worst_5_tabulator.value = result_df.head(5)
 
 
 
 
 
 
 
 
 
731
 
732
  def get_processed_df(self):
733
  '''
734
  calculate attributes and return a sorted dataframe on weighted return
735
  '''
736
-
737
- result_df = self.calculate_attributes()
738
- result_df = result_df[result_df.in_portfolio]
739
- result_df.sort_values(by='return', inplace=True)
740
- return result_df
741
-
742
- def __init__(self, p_stock_df, b_stock_df, **params):
743
- self.p_stock_df = p_stock_df
744
- self.b_stock_df = b_stock_df
745
  self._date_range = pn.widgets.DateRangeSlider(
746
  name='选择计算回报的时间区间',
747
- start=p_stock_df.date.min(),
748
- end=p_stock_df.date.max(),
749
- value=(p_stock_df.date.max() -
750
- timedelta(days=7), p_stock_df.date.max())
751
  )
752
  self.start_date = self._date_range.value_start
753
  self.end_date = self._date_range.value_end
754
  result_df = self.get_processed_df()
755
- self.best_5_tabulator = self.create_tabulator(result_df.tail(5))
756
- self.worst_5_tabulator = self.create_tabulator(result_df.head(5))
757
  super().__init__(**params)
758
 
759
  @param.depends('_date_range.value', watch=True)
@@ -768,7 +813,12 @@ class BestAndWorstStocks(Viewer):
768
  self.best_5_tabulator,
769
  pn.pane.Str('加权回报率最低回报5只股票'),
770
  self.worst_5_tabulator,
771
- max_width=500, header=pn.pane.Str('Portfolio中最高回报和最低加权回报率股票'))
 
 
 
 
 
772
  return self._layout
773
 
774
 
 
1
+ import processing
2
  from datetime import datetime, timedelta
3
  import panel as pn
4
  import pandas as pd
 
14
  # import warnings
15
  pn.extension('mathjax')
16
  pn.extension('plotly')
17
+
18
  # warnings.filterwarnings("ignore", category=pd.core.common.SettingWithCopyWarning)
19
  # overal performance default to 30 days
20
 
 
38
  ip_eval_df = p_eval_df.interactive()
39
  isector_eval_df = sector_eval_df.interactive()
40
 
 
41
  ranged_ip_eval_df = ip_eval_df[ip_eval_df.date.between(
42
  range_slider.param.value_start, range_slider.param.value_end)]
43
  ranged_isector_eval_df = isector_eval_df[isector_eval_df.date.between(
 
232
 
233
  def create_report(self):
234
  # Calculate the total return and risk
235
+ result = processing.calculate_norm_return(
236
  self.eval_df, self.start_date, self.end_date)
237
  most_recent_row = result.tail(1)
238
  active_return = most_recent_row.active_return.values[0]
 
257
  allocation = total_attributes.allocation
258
  selection = total_attributes.selection
259
 
260
+ # Create a function for text report
261
  report = f"""
262
  <style>
263
  .compact-container {{
 
346
  return report
347
 
348
  def create_plot(self):
349
+ result = processing.calculate_norm_return(
350
  self.eval_df, self.start_date, self.end_date)
351
  fig = px.line(result, x="date", y=['return_p', 'return_b'])
352
  fig.update_traces(mode="lines+markers",
 
462
 
463
 
464
  class HistReturnCard(Viewer):
465
+
466
  return_barplot = param.Parameterized()
467
+ calculated_b_stock = param.Parameterized()
468
+ calculated_p_stock = param.Parameterized()
469
  select_resolution = param.ObjectSelector(
470
  default='每月回报', objects=['每日回报', '每周回报', '每月回报', '每年回报'])
471
 
472
+ def _calculate_return(self, df, freq):
473
+ # start on tuesday, end on monday
474
+ grouped = df.groupby(pd.Grouper(key='time', freq=freq))
475
+ agg_df = grouped.agg({'weighted_log_return': 'sum'})
476
+ # time indicating the last end of the week
477
+ agg_df['time'] = agg_df.index
478
+ # convert cumulative log return to percentage return
479
+ agg_df['return'] = np.exp(agg_df['weighted_log_return']) - 1
480
+
481
+ # return agg_df
482
+ return agg_df.reset_index(drop=True)
483
+
484
  def update_aggregate_df(self):
485
  freq = None
486
  if self.select_resolution == "每日回报":
487
+ freq = "D"
488
  elif self.select_resolution == "每月回报":
489
  freq = 'M'
490
  elif self.select_resolution == "每年回报":
491
  freq = 'Y'
492
  elif self.select_resolution == "每周回报":
493
+ freq = 'W-MON'
494
+
495
+ p_return = self._calculate_return(self.calculated_p_stock, freq)
496
+ b_return = self._calculate_return(self.calculated_b_stock, freq)
497
+
498
+ merge_df = pd.merge(p_return, b_return, on='time',
499
+ how='outer', suffixes=('_p', '_b'))
500
+ return merge_df
501
 
502
  def create_attributes_barplot(self):
503
+ self.attribute_df = self._update_attributes_df()
504
+ fig = px.bar(self.attribute_df, x='period_str', y=[
505
+ 'allocation', 'selection', 'interaction', 'notional_active_return', 'active_return'])
506
  colname_to_name = {
507
  'allocation': '分配',
508
  'selection': '选择',
509
  'interaction': '交互',
510
+ 'notional_active_return': '名义主动回报',
511
  'active_return': '实际主动回报'
512
  }
513
  fig.for_each_trace(lambda t: t.update(name=colname_to_name.get(t.name, t.name),
 
516
  hovertemplate=t.hovertemplate.replace(
517
  t.name, colname_to_name.get(t.name, t.name))
518
  ))
519
+
520
  fig.update_layout(barmode='group', title='主动回报归因',
521
  bargap=0.0, bargroupgap=0.0)
522
  fig.update_layout(**styling.plot_layout)
 
525
 
526
  def create_return_barplot(self):
527
  self.agg_df = self.update_aggregate_df()
528
+ fig = px.bar(self.agg_df, x='time', y=[
529
+ 'return_p', 'return_b'],
530
  barmode='overlay',
531
  title='周期回报',
532
  )
 
548
 
549
  return fig.to_dict()
550
 
551
+ @param.depends('calculated_p_stock', 'calculated_b_stock', 'select_resolution', watch=True)
552
  def update(self):
553
  return_barplot = self.create_return_barplot()
554
  self.return_barplot.object = return_barplot
555
  attributes_barplot = self.create_attributes_barplot()
556
  self.attribute_barplot.object = attributes_barplot
557
 
558
+ def _update_attributes_df(self):
559
  freq = None
560
  if self.select_resolution == "每日回报":
561
  freq = 'D'
 
564
  elif self.select_resolution == "每年回报":
565
  freq = 'Y'
566
  elif self.select_resolution == "每周回报":
567
+ freq = 'W-MON'
568
+ agg_p = processing.aggregate_analytic_df_by_period(self.calculated_p_stock, freq)
569
+ agg_b = processing.aggregate_analytic_df_by_period(self.calculated_b_stock, freq)
570
+ bhb_df = processing.calculate_periodic_BHB(agg_p, agg_b)
571
+ agg_bhb = processing.aggregate_bhb_df(bhb_df)
572
+ agg_bhb['period_str'] = agg_bhb.index.map(lambda x: str(x))
573
+ return agg_bhb
574
+
575
+ def __init__(self, calculated_p_stock, calculated_b_stock, **params):
576
  self.calculated_p_stock = calculated_p_stock
577
  self.calculated_b_stock = calculated_b_stock
578
+
579
  self._range_slider = pn.widgets.DateRangeSlider(
580
  name='Date Range Slider',
581
+ start=self.calculated_p_stock.time.min(), end=self.calculated_p_stock.time.max(),
582
+ value=(self.calculated_p_stock.time.min(),
583
+ self.calculated_p_stock.time.max()),
584
 
585
  )
586
  self.return_barplot = pn.pane.Plotly(self.create_return_barplot())
 
597
 
598
  class PortfolioComposationCard(Viewer):
599
  p_stock_df = param.Parameterized()
600
+ selected_date = param.Parameterized()
601
 
602
  def create_cash_position_df(self):
603
+ aggregate_df = self.p_stock_df.groupby('time', as_index=False).agg({
604
+ 'cash': 'sum'
605
  })
606
  aggregate_df['type'] = 'portfolio'
607
  not_in_portfolio_df = aggregate_df.copy()
608
  not_in_portfolio_df['type'] = 'not_in_portfolio'
609
+ not_in_portfolio_df['cash'] = 1000
610
  # append df
611
  aggregate_df = pd.concat([aggregate_df, not_in_portfolio_df])
612
  # sort
613
+ aggregate_df.sort_values(by=['time'], inplace=True)
614
+ return aggregate_df[aggregate_df.time.between(self.date_range.value[0], self.date_range.value[1])]
615
 
616
  @param.depends('p_stock_df', 'date_range.value', watch=True)
617
  def update_trend_plot(self):
 
619
 
620
  def create_trend_plot(self):
621
  aggregate_df = self.create_cash_position_df()
622
+ fig = px.bar(aggregate_df, x='time', y='cash', color='type')
623
  fig.update_layout(legend=dict(
624
  orientation="h",
625
  yanchor="bottom",
 
639
  return fig.to_dict()
640
 
641
  def create_treemap(self):
642
+ self.selected_df = self.p_stock_df[self.p_stock_df.time ==
643
+ self.datetime_picker.value]
644
+ self.selected_df['position'] = '股票'
645
  not_in_portfolio_row = pd.DataFrame({
646
+ 'display_name': ['闲置'],
647
+ 'position': ['闲置'],
648
+ 'aggregate_sector': ['闲置'],
649
+ 'cash': [100],
650
+ 'weighted_return': [0]
 
651
  })
652
  df = pd.concat([self.selected_df, not_in_portfolio_row],
653
  ignore_index=True)
654
+
655
+ fig = px.treemap(df,
656
+ # path=[px.Constant('cash_position'), 'position',
657
+ # 'aggregate_sector', 'display_name'],
658
+ path=['position', 'aggregate_sector', 'display_name'],
659
+ values='cash',
660
+ color='weighted_return',
661
+ hover_data=['weighted_return', 'cash'],
662
  color_continuous_scale='RdBu',
663
  color_continuous_midpoint=np.average(
664
+ df['weighted_return'])
665
  )
666
+
667
  fig.update_layout(styling.plot_layout)
668
  fig.update_layout(coloraxis_colorbar=dict(
669
+ title="累计加权回报率"))
670
+ # colname_to_name = {
671
+ # 'cash_position': '现金分布',
672
+ # 'portfolio_return': '加权回报',
673
+ # 'not_in_portfolio': '不在portfolio中',
674
+ # 'current_weight': '现金',
675
+
676
+ # }
677
+ # fig.for_each_trace(lambda t: t.update(name=colname_to_name.get(t.name, t.name),
678
+ # hovertemplate=t.hovertemplate.replace(
679
+ # t.name, colname_to_name.get(t.name, t.name))
680
+ # ))
681
  return fig.to_dict()
682
 
683
+ def __init__(self, analytic_df, **params):
684
+ self.p_stock_df = analytic_df
685
+ self.p_stock_df = processing.calculate_weighted_return(self.p_stock_df,
686
+ start=self.p_stock_df.time.min(),
687
+ end=self.p_stock_df.time.max())
688
+
689
+ # convert to datetime to date
690
+ enabled_dates = [time.date() for time in self.p_stock_df.time.unique()]
691
+ self.datetime_picker = pn.widgets.DatetimePicker(name='选择某日资金分布',
692
+ start=self.p_stock_df.time.min(),
693
+ end=self.p_stock_df.time.max(),
694
+ value=self.p_stock_df.time.max(),
695
+ enabled_dates=enabled_dates,
696
+
697
+ )
698
  self.date_range = pn.widgets.DateRangeSlider(name='选择资金分布走势区间',
699
+ start=self.p_stock_df.time.min(),
700
+ end=self.p_stock_df.time.max(),
701
+ value=(self.p_stock_df.time.min(
702
+ ), self.p_stock_df.time.max()),
703
  )
704
+
 
705
  self.tree_plot = pn.pane.Plotly(self.create_treemap())
706
  self.trend_plot = pn.pane.Plotly(self.create_trend_plot())
707
 
 
709
  super().__init__(**params)
710
 
711
  def __panel__(self):
712
+ self._layout = pn.Card(self.datetime_picker, self.tree_plot, self.date_range, self.trend_plot,
713
  width=500, header=pn.pane.Str('资金分布'))
714
  return self._layout
715
 
716
+ @param.depends('datetime_picker.value', 'p_stock_df', watch=True)
717
  def update(self):
 
 
718
  tree_plot = self.create_treemap()
719
  self.tree_plot.object = tree_plot
720
 
721
 
722
  class BestAndWorstStocks(Viewer):
 
 
723
  start_date = param.Parameter()
724
  end_date = param.Parameter()
725
+ hidden_col = [
726
+ 'index',
727
+ 'open',
728
+ 'high',
729
+ 'low',
730
+ 'close',
731
+ 'volume',
732
+ 'money',
733
+ 'pct',
734
+ 'sector',
735
+ 'aggregate_sector',
736
+ 'ave_price',
737
+ 'weight',
738
+ 'ini_w',
739
+ 'name',
740
+ 'pnl'
741
+ ]
742
+ forzen_columns = ['display_name', 'return', 'cum_pnl', 'shares']
743
+ description = "股票表现排名"
744
+ tooltip = "在一个时间窗口中累计盈利最高和最低的股票,包括已经卖出的股票,如果表格的日期小于窗口的结束时间代表已经卖出"
745
 
746
  def create_tabulator(self, df):
747
  col_title_map = {
748
+ 'display_name': '股票名称',
749
  'ticker': '股票代码',
750
+ 'time': '日期',
751
+ 'return': '回报率',
752
+ 'sector': '行业',
753
+ 'shares': '持仓',
754
+ 'cash': '现金',
755
+ 'cum_pnl': '累计盈利',
 
 
756
  }
757
  return pn.widgets.Tabulator(df, sizing_mode='stretch_width',
758
+ hidden_columns=self.hidden_col,
759
+ frozen_columns=self.forzen_columns,
760
+ titles=col_title_map
761
+ )
 
762
 
763
  @param.depends('start_date', 'end_date', watch=True)
764
  def update(self):
765
  result_df = self.get_processed_df()
766
+ self.best_5_tabulator.value = result_df.head(5)
767
+ self.worst_5_tabulator.value = result_df.tail(5)
768
+
769
+ def _get_cum_return(self, df):
770
+ '''return a df contain cumulative return at the end date'''
771
+ result_df = processing.calcualte_return(df=df,
772
+ start=self.start_date,
773
+ end=self.end_date)
774
+ grouped = result_df.groupby('ticker')
775
+ last_row = result_df.loc[grouped.time.idxmax()]
776
+ return last_row
777
 
778
  def get_processed_df(self):
779
  '''
780
  calculate attributes and return a sorted dataframe on weighted return
781
  '''
782
+ df = processing.calculate_cum_pnl(self.analytic_df,
783
+ start=self.start_date,
784
+ end=self.end_date)
785
+ df = self._get_cum_return(df)
786
+ return df.sort_values(by='cum_pnl', ascending=False)
787
+
788
+ def __init__(self, analytic_df, **params):
789
+ self.analytic_df = analytic_df
 
790
  self._date_range = pn.widgets.DateRangeSlider(
791
  name='选择计算回报的时间区间',
792
+ start=self.analytic_df.time.min(),
793
+ end=self.analytic_df.time.max(),
794
+ value=(self.analytic_df.time.max() -
795
+ timedelta(days=7), self.analytic_df.time.max())
796
  )
797
  self.start_date = self._date_range.value_start
798
  self.end_date = self._date_range.value_end
799
  result_df = self.get_processed_df()
800
+ self.best_5_tabulator = self.create_tabulator(result_df.head(5))
801
+ self.worst_5_tabulator = self.create_tabulator(result_df.tail(5))
802
  super().__init__(**params)
803
 
804
  @param.depends('_date_range.value', watch=True)
 
813
  self.best_5_tabulator,
814
  pn.pane.Str('加权回报率最低回报5只股票'),
815
  self.worst_5_tabulator,
816
+ max_width=500,
817
+ header=pn.Row(
818
+ pn.pane.Str(self.description),
819
+ pn.widgets.TooltipIcon(value=self.tooltip)
820
+ )
821
+ )
822
  return self._layout
823
 
824
 
db_operation.py CHANGED
@@ -121,10 +121,49 @@ def append_to_stocks_price_table(df):
121
  _append_df_to_db(df, ts.STOCKS_PRICE_TABLE, ts.STOCKS_PRICE_TABLE_SCHEMA)
122
 
123
  def get_all_stocks():
 
 
 
 
 
 
 
 
124
  with create_engine(db_url).connect() as conn:
125
  all_stocks = pd.read_sql(ts.STOCKS_DETAILS_TABLE, con=conn)
126
  return all_stocks
 
 
 
 
 
 
 
 
 
 
 
127
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
128
  def _get_all_row(table_name, ts_column='date'):
129
  with create_engine(db_url).connect() as conn:
130
  df = pd.read_sql(table_name, con=conn)
@@ -141,8 +180,15 @@ def get_stocks_price(tickers: list[str]):
141
  '''
142
  return df of stock price within ticker in stocks price table
143
  '''
144
- query = f"SELECT * FROM {ts.STOCKS_PRICE_TABLE} WHERE ticker IN {tuple(tickers)}"
 
 
 
 
 
 
145
  with create_engine(db_url).connect() as conn:
146
  df = pd.read_sql(query, con=conn)
147
  df.time = pd.to_datetime(df.time)
148
- return df
 
 
121
  _append_df_to_db(df, ts.STOCKS_PRICE_TABLE, ts.STOCKS_PRICE_TABLE_SCHEMA)
122
 
123
  def get_all_stocks():
124
+ '''
125
+ get all stocks information
126
+
127
+ Returns
128
+ -------
129
+ pd.DataFrame
130
+ all stocks information
131
+ '''
132
  with create_engine(db_url).connect() as conn:
133
  all_stocks = pd.read_sql(ts.STOCKS_DETAILS_TABLE, con=conn)
134
  return all_stocks
135
+ def save_portfolio_analytic_df(df):
136
+ table_name = 'analytic_p'
137
+ with create_engine(db_url).connect() as conn:
138
+ df.to_sql(table_name, con=conn, if_exists='replace', index=False)
139
+
140
+ def get_portfolio_analytic_df():
141
+ table_name = 'analytic_p'
142
+ with create_engine(db_url).connect() as conn:
143
+ df = pd.read_sql(table_name, con=conn)
144
+ return df
145
+
146
 
147
+ def save_benchmark_analytic_df(df):
148
+ table_name = 'analytic_b'
149
+ with create_engine(db_url).connect() as conn:
150
+ df.to_sql(table_name, con=conn, if_exists='replace', index=False)
151
+
152
+ def get_benchmark_analytic_df():
153
+ table_name = 'analytic_b'
154
+ with create_engine(db_url).connect() as conn:
155
+ df = pd.read_sql(table_name, con=conn)
156
+ return df
157
+
158
+ def save_analytic_df(df):
159
+ table_name = 'analytic'
160
+ with create_engine(db_url).connect() as conn:
161
+ df.to_sql(table_name, con=conn, if_exists='replace', index=False)
162
+ def get_analytic_df():
163
+ table_name = 'analytic'
164
+ with create_engine(db_url).connect() as conn:
165
+ df = pd.read_sql(table_name, con=conn)
166
+ return df
167
  def _get_all_row(table_name, ts_column='date'):
168
  with create_engine(db_url).connect() as conn:
169
  df = pd.read_sql(table_name, con=conn)
 
180
  '''
181
  return df of stock price within ticker in stocks price table
182
  '''
183
+ if len(tickers) == 0:
184
+ # so returned df has the same schema as the table
185
+ query = f"SELECT * FROM {ts.STOCKS_PRICE_TABLE} WHERE 1=0"
186
+ elif len(tickers) == 1:
187
+ query = f"SELECT * FROM {ts.STOCKS_PRICE_TABLE} WHERE ticker = '{tickers[0]}'"
188
+ else:
189
+ query = f"SELECT * FROM {ts.STOCKS_PRICE_TABLE} WHERE ticker IN {tuple(tickers)}"
190
  with create_engine(db_url).connect() as conn:
191
  df = pd.read_sql(query, con=conn)
192
  df.time = pd.to_datetime(df.time)
193
+ # drop duplicates
194
+ return df.drop_duplicates(subset=['ticker', 'time'])
index_page.py CHANGED
@@ -8,14 +8,15 @@ import random
8
  import scipy.stats as stats
9
  import hvplot.pandas # noqa
10
  from sqlalchemy import create_engine
11
- from . import api
12
  # from backgroundTask import stocks_stream
13
  from functools import partial
14
  import plotly.graph_objects as go
15
  from panel.viewable import Viewer
16
- from script import processing
17
  import appComponents
18
  import param
 
19
  # import warnings
20
  pn.extension('mathjax')
21
  # warnings.filterwarnings("ignore", category=pd.core.common.SettingWithCopyWarning)
@@ -25,45 +26,47 @@ db_url = 'sqlite:///instance/local.db'
25
  engine = create_engine(db_url)
26
 
27
 
28
- p_eval_df = None
29
- calculated_b_stock = None
30
- calculated_p_stock = None
31
  # load benchmark stock
32
- with engine.connect() as connection:
33
- calculated_b_stock = pd.read_sql('calculated_b_stock', con=connection)
34
- calculated_p_stock = pd.read_sql('calculated_p_stock', con=connection)
35
- p_eval_df = pd.read_sql('p_eval_result', con=connection)
 
36
 
37
  stock_overview = appComponents.BestAndWorstStocks(
38
- p_stock_df=calculated_p_stock, b_stock_df=calculated_b_stock)
39
  composation_card = appComponents.PortfolioComposationCard(
40
- p_stock_df=calculated_p_stock)
41
  monthly_return_card = appComponents.HistReturnCard(
42
- eval_df=p_eval_df, calculated_p_stock=calculated_p_stock, calculated_b_stock=calculated_b_stock)
43
- total_return_card = appComponents.TotalReturnCard(name='Range', eval_df=p_eval_df,
44
- b_stock_df=calculated_b_stock,
45
- p_stock_df=calculated_p_stock,
46
- value=(0, 20))
47
- drawdown_card = appComponents.DrawDownCard(
48
- eval_df=p_eval_df, calculated_p_stock=calculated_p_stock, calculated_b_stock=calculated_b_stock)
49
 
50
- top_header = appComponents.TopHeader(
51
- eval_df=p_eval_df
52
- )
53
 
54
  template = pn.template.FastListTemplate(
55
  title="Portfolio一览",
56
  # sidebar=[freq, phase],
57
  )
58
- template.main.extend(
59
- [pn.Row(top_header),
60
- pn.Row(
61
- pn.Column(monthly_return_card, stock_overview,
62
- width=500, margin=(10, 10, 10, 10)),
63
- pn.Column(total_return_card, drawdown_card, margin=(10, 10, 10, 10)),
64
- pn.Column(composation_card, margin=(10, 10, 10, 10)),
65
- )]
66
- )
 
67
  template.servable()
68
  # pn.Row(
69
 
 
8
  import scipy.stats as stats
9
  import hvplot.pandas # noqa
10
  from sqlalchemy import create_engine
11
+ import api
12
  # from backgroundTask import stocks_stream
13
  from functools import partial
14
  import plotly.graph_objects as go
15
  from panel.viewable import Viewer
16
+ import processing
17
  import appComponents
18
  import param
19
+ import db_operation as db
20
  # import warnings
21
  pn.extension('mathjax')
22
  # warnings.filterwarnings("ignore", category=pd.core.common.SettingWithCopyWarning)
 
26
  engine = create_engine(db_url)
27
 
28
 
29
+
30
+ analytic_p = db.get_portfolio_analytic_df()
31
+ analytic_b = db.get_benchmark_analytic_df()
32
  # load benchmark stock
33
+ # with engine.connect() as connection:
34
+ # analytics_df = pd.read
35
+ # calculated_b_stock = pd.read_sql('calculated_b_stock', con=connection)
36
+ # calculated_p_stock = pd.read_sql('calculated_p_stock', con=connection)
37
+ # p_eval_df = pd.read_sql('p_eval_result', con=connection)
38
 
39
  stock_overview = appComponents.BestAndWorstStocks(
40
+ analytic_df=analytic_p)
41
  composation_card = appComponents.PortfolioComposationCard(
42
+ analytic_p)
43
  monthly_return_card = appComponents.HistReturnCard(
44
+ calculated_p_stock=analytic_p, calculated_b_stock=analytic_b)
45
+ # total_return_card = appComponents.TotalReturnCard(name='Range', eval_df=p_eval_df,
46
+ # b_stock_df=calculated_b_stock,
47
+ # p_stock_df=calculated_p_stock,
48
+ # value=(0, 20))
49
+ # drawdown_card = appComponents.DrawDownCard(
50
+ # eval_df=p_eval_df, calculated_p_stock=calculated_p_stock, calculated_b_stock=calculated_b_stock)
51
 
52
+ # top_header = appComponents.TopHeader(
53
+ # eval_df=p_eval_df
54
+ # )
55
 
56
  template = pn.template.FastListTemplate(
57
  title="Portfolio一览",
58
  # sidebar=[freq, phase],
59
  )
60
+ template.main.extend([stock_overview, composation_card, monthly_return_card])
61
+ # template.main.extend(
62
+ # [pn.Row(top_header),
63
+ # pn.Row(
64
+ # pn.Column(monthly_return_card, stock_overview,
65
+ # width=500, margin=(10, 10, 10, 10)),
66
+ # pn.Column(total_return_card, drawdown_card, margin=(10, 10, 10, 10)),
67
+ # pn.Column(composation_card, margin=(10, 10, 10, 10)),
68
+ # )]
69
+ # )
70
  template.servable()
71
  # pn.Row(
72
 
instance/local.db CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b0651e0e8a48a69d61d008bf5ae3262ca38a8968b020e700604841d55f8784a3
3
- size 8728576
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b918cd8420f4f314aaff25fb9348f5fcca206b01a0403d9a96b0039d00b55047
3
+ size 17780736
instance/log.json CHANGED
@@ -1,3 +1,3 @@
1
  {
2
- "daily_update": "2023-08-29 03:02:51"
3
  }
 
1
  {
2
+ "daily_update": "2023-08-30 09:02:54"
3
  }
pipeline.py CHANGED
@@ -8,6 +8,7 @@ import pytz
8
  import table_schema as ts
9
  import db_operation as db
10
  from log import Log
 
11
  # import settings
12
  # fetch new stock price
13
  stock_price_stream = Stream()
@@ -138,11 +139,6 @@ def need_to_update_stocks_price(delta_time):
138
  return False
139
 
140
 
141
- def processing():
142
- '''
143
- run the whole processing pipeline here
144
- '''
145
- pass
146
 
147
 
148
  def add_details_to_stock_df(stock_df):
@@ -214,27 +210,27 @@ def right_fill_stock_price():
214
  '''
215
  update all stocks price until today.
216
 
217
- if no portfolio, terminate without warning
218
- default start date is the most recent date in portfolio
219
  '''
220
- most_recent_portfolio = db.get_most_recent_portfolio_profile()
221
  most_recent_stocks_price = db.get_most_recent_stocks_price()
222
 
223
  # fetch all stocks price until today
224
  stocks_dates = most_recent_stocks_price.time
225
- portfolio_dates = most_recent_portfolio.date
226
- if len(portfolio_dates) == 0:
227
  return
228
- start = stocks_dates[0] if len(stocks_dates) > 0 else portfolio_dates[0]
229
  end = utils.time_in_beijing()
230
 
231
  # frequency is set to daily
232
  if end - start > dt.timedelta(days=1):
233
- new_stocks_price = fetch_all_stocks_price_between(start, end)
234
  db.append_to_stocks_price_table(new_stocks_price)
235
 
236
 
237
- def fetch_all_stocks_price_between(start, end):
238
  '''
239
  patch stock price db with all daily stock price within window
240
  inclusive on both start and end date
@@ -260,8 +256,7 @@ def fetch_all_stocks_price_between(start, end):
260
  security=tickers,
261
  start_date=start,
262
  end_date=end,
263
- frequency='daily',
264
- skip_paused=True,)
265
  # drop where closing price is null
266
  stock_price.dropna(subset=['close'], inplace=True)
267
  return stock_price
@@ -339,15 +334,17 @@ def left_fill_benchmark_profile():
339
  def left_fill_stocks_price():
340
  '''
341
  left fill stock price
342
- fill missing entries between the oldest date in portfolio profile and the oldest date in stock price table
 
343
 
344
- if no portfolio profile, terminate without warning
345
- if no stock price table, the span would be from the oldest date in portfolio profile to the most recent date in portfolio profile
 
346
 
347
 
348
  '''
349
- # get oldest time in portfolio profile
350
- p_start = db.get_oldest_portfolio_profile().date
351
  # get oldest time in stock price table
352
  stock_start = db.get_oldest_stocks_price().time
353
  # if no portfolio profile, terminate
@@ -356,14 +353,14 @@ def left_fill_stocks_price():
356
  # no stock price, span the entire portfolio profile
357
  elif len(stock_start) == 0:
358
  start = p_start[0]
359
- end = db.get_most_recent_portfolio_profile().date[0]
360
  else:
361
  start = p_start[0]
362
  end = stock_start[0]
363
-
364
  if start < end:
365
  # fetch and update
366
- new_entry = fetch_all_stocks_price_between(start, end)
367
  db.append_to_stocks_price_table(new_entry)
368
 
369
 
@@ -374,6 +371,49 @@ def updaet_benchmark_to_db():
374
  pass
375
 
376
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
377
  async def daily_update():
378
  last_update = log.get_time('daily_update')
379
  if last_update is None or utils.time_in_beijing() - last_update >= dt.timedelta(days=1):
@@ -393,6 +433,8 @@ async def daily_update():
393
  log.update_log('daily_update')
394
  else:
395
  print("no update needed")
 
 
396
 
397
 
398
  def update():
 
8
  import table_schema as ts
9
  import db_operation as db
10
  from log import Log
11
+ import processing
12
  # import settings
13
  # fetch new stock price
14
  stock_price_stream = Stream()
 
139
  return False
140
 
141
 
 
 
 
 
 
142
 
143
 
144
  def add_details_to_stock_df(stock_df):
 
210
  '''
211
  update all stocks price until today.
212
 
213
+ if no benchmark profile, terminate without warning
214
+ default start date is the most recent date in benchmark profile
215
  '''
216
+ most_recent_benchmark = db.get_most_recent_benchmark_profile()
217
  most_recent_stocks_price = db.get_most_recent_stocks_price()
218
 
219
  # fetch all stocks price until today
220
  stocks_dates = most_recent_stocks_price.time
221
+ b_dates = most_recent_benchmark.date
222
+ if len(b_dates) == 0:
223
  return
224
+ start = stocks_dates[0] if len(stocks_dates) > 0 else b_dates[0]
225
  end = utils.time_in_beijing()
226
 
227
  # frequency is set to daily
228
  if end - start > dt.timedelta(days=1):
229
+ new_stocks_price = _fetch_all_stocks_price_between(start, end)
230
  db.append_to_stocks_price_table(new_stocks_price)
231
 
232
 
233
+ def _fetch_all_stocks_price_between(start, end):
234
  '''
235
  patch stock price db with all daily stock price within window
236
  inclusive on both start and end date
 
256
  security=tickers,
257
  start_date=start,
258
  end_date=end,
259
+ frequency='daily')
 
260
  # drop where closing price is null
261
  stock_price.dropna(subset=['close'], inplace=True)
262
  return stock_price
 
334
  def left_fill_stocks_price():
335
  '''
336
  left fill stock price
337
+ fill missing entries between the oldest date in benchmark
338
+ profile and the oldest date in stock price table
339
 
340
+ if no benchmark profile, terminate without warning
341
+ if no stock price table, the span would be from
342
+ the oldest date in benchmark profile to the most recent date in benchmark profile
343
 
344
 
345
  '''
346
+ # use benchmark because benchmari profile only update once a month
347
+ p_start = db.get_oldest_benchmark_profile().date
348
  # get oldest time in stock price table
349
  stock_start = db.get_oldest_stocks_price().time
350
  # if no portfolio profile, terminate
 
353
  # no stock price, span the entire portfolio profile
354
  elif len(stock_start) == 0:
355
  start = p_start[0]
356
+ end = db.get_most_recent_benchmark_profile().date[0]
357
  else:
358
  start = p_start[0]
359
  end = stock_start[0]
360
+
361
  if start < end:
362
  # fetch and update
363
+ new_entry = _fetch_all_stocks_price_between(start, end)
364
  db.append_to_stocks_price_table(new_entry)
365
 
366
 
 
371
  pass
372
 
373
 
374
+ def get_stocks_in_profile(profile_df):
375
+ ticker_list = profile_df.ticker.unique().tolist()
376
+ stocks_df = db.get_stocks_price(ticker_list)
377
+ return stocks_df
378
+
379
+
380
+ def batch_processing():
381
+ '''perform when portfolio or benchmark is updated'''
382
+ portfolio_p = db.get_all_portfolio_profile()
383
+ benchmark_p = db.get_all_benchmark_profile()
384
+ p_stocks_df = get_stocks_in_profile(portfolio_p)
385
+ b_stocks_df = get_stocks_in_profile(benchmark_p)
386
+
387
+ # temperaraly handle rename date to time
388
+ portfolio_p.rename(
389
+ columns={'date': 'time', 'weight': 'ini_w'}, inplace=True)
390
+ benchmark_p.rename(columns={'date': 'time'}, inplace=True)
391
+ # normalize weight in benchmark
392
+ grouped = benchmark_p.groupby('time')
393
+ benchmark_p['ini_w'] = grouped['weight'].transform(lambda x: x / x.sum())
394
+ # add profile information into stock price
395
+ analytic_b = processing.create_analytic_df(b_stocks_df, benchmark_p)
396
+ analytic_p = processing.create_analytic_df(p_stocks_df, portfolio_p)
397
+ # p stock weigth
398
+ processing.calculate_cash(analytic_p)
399
+ processing.calculate_weight_using_cash(analytic_p)
400
+ processing.calculate_pct(analytic_p)
401
+ processing.calculate_norm_pct(analytic_p)
402
+ # b stock weight
403
+ analytic_b.sort_values(by=['time'], inplace=True)
404
+ grouped = analytic_b.groupby('ticker')
405
+ analytic_b['pct'] = grouped['close'].pct_change()
406
+ processing.calculate_weight_using_pct(analytic_b)
407
+ # pnl
408
+ processing.calculate_pnl(analytic_p)
409
+ # log return
410
+ # need to crop on left side first
411
+ analytic_b = analytic_b[analytic_b['time'] >= analytic_p.time.min()].copy()
412
+ processing.calculate_log_return(analytic_p)
413
+ processing.calculate_log_return(analytic_b)
414
+ db.save_portfolio_analytic_df(analytic_p)
415
+ db.save_benchmark_analytic_df(analytic_b)
416
+
417
  async def daily_update():
418
  last_update = log.get_time('daily_update')
419
  if last_update is None or utils.time_in_beijing() - last_update >= dt.timedelta(days=1):
 
433
  log.update_log('daily_update')
434
  else:
435
  print("no update needed")
436
+ batch_processing()
437
+ print("updated analytic")
438
 
439
 
440
  def update():
script/processing.py → processing.py RENAMED
@@ -281,21 +281,165 @@ def calculate_total_attribution(calculated_p_stock, calculated_b_stock):
281
  # return df
282
 
283
 
284
- def calculate_return(df, start, end):
285
  '''
286
- return a df consist of total return for each day,
287
- the return at start date would be 0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
288
  '''
289
- selected_df = df[df.date.between(start, end)].copy()
290
- # set the pct of first row to null
291
- selected_df.iloc[0, selected_df.columns.get_indexer(
292
- ['portfolio_pct_p', 'portfolio_pct_b'])] = 0
293
- selected_df['return_p'] = (
294
- 1 + selected_df['portfolio_pct_p']).cumprod() - 1
295
- selected_df['return_b'] = (
296
- 1 + selected_df['portfolio_pct_b']).cumprod() - 1
297
- selected_df['active_return'] = selected_df.return_p - selected_df.return_b
298
- return selected_df
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
299
 
300
 
301
  def calculate_attributes_between_dates(start, end, calculated_p_stock, calculated_b_stock):
@@ -361,6 +505,15 @@ def calculate_attributes_between_dates(start, end, calculated_p_stock, calculate
361
  return df
362
 
363
 
 
 
 
 
 
 
 
 
 
364
  def change_resolution(df, freq='W'):
365
  '''
366
  aggregate by keeping the first entry of the freq period,
@@ -370,6 +523,33 @@ def change_resolution(df, freq='W'):
370
  return df.groupby('freq').first().reset_index()
371
 
372
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
373
  def calculate_weight_using_cash(df):
374
  '''
375
  patch df with current weight for each entry
@@ -381,9 +561,9 @@ def calculate_weight_using_cash(df):
381
  dataframe with processed cash column
382
 
383
  '''
384
- df['cur_w'] = float('nan')
385
  grouped = df.groupby('time')
386
- df.cur_w = grouped.cash.transform(lambda x: x / x.sum())
387
 
388
 
389
  def calculate_cash(df):
@@ -397,3 +577,173 @@ def calculate_cash(df):
397
  dataframe with processed shares and close column
398
  '''
399
  df['cash'] = df['shares'] * df['close']
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
281
  # return df
282
 
283
 
284
+ def calcualte_return(df: pd.DataFrame, start, end):
285
  '''
286
+ calcualte return within a window for each entry of ticker
287
+ inclusive
288
+
289
+ this is an intermediate step to calculate attribute
290
+ calculation using the weighted_log_return
291
+ '''
292
+ df = df[(df.time >= start) & (df.time <= end)].copy()
293
+ inter_df = df.sort_values(by=['time'])
294
+ inter_df['cum_log_return'] = inter_df.groupby(
295
+ 'ticker')['log_return'].cumsum()
296
+ inter_df['percentage_return'] = np.exp(
297
+ inter_df['cum_log_return']) - 1
298
+ # patch
299
+ df['return'] = inter_df['percentage_return']
300
+ return df
301
+
302
+
303
+ def calculate_weighted_return(df: pd.DataFrame, start, end):
304
+ '''
305
+ calcualte weighted return within a window for each entry of ticker
306
+ inclusive
307
+ calculation using the weighted_log_return
308
+ '''
309
+ df = df[(df.time >= start) & (df.time <= end)].copy()
310
+ inter_df = df.sort_values(by=['time'])
311
+ inter_df['cum_weighted_log_return'] = inter_df.groupby(
312
+ 'ticker')['weighted_log_return'].cumsum()
313
+ inter_df['percentage_return'] = np.exp(
314
+ inter_df['cum_weighted_log_return']) - 1
315
+ # patch
316
+ df['weighted_return'] = inter_df['percentage_return']
317
+ return df
318
+
319
+
320
+ def calculate_log_return(df: pd.DataFrame):
321
+ '''
322
+ patch df with the weighted log return and unweighted log return
323
+ calculated using close price
324
+
325
+ an intermediate step to calculate the weighted return,
326
+ the benefit using this is this can be aggregated with any time window
327
+ and work for both portfolio and benchmark
328
+ '''
329
+ inter_df = df.sort_values(by=['time'])
330
+ grouped = inter_df.groupby('ticker')
331
+ inter_df['prev_w'] = grouped['weight'].shift(1)
332
+ inter_df['prev_close'] = grouped['close'].shift(1)
333
+ inter_df['log_return'] = np.log(inter_df['close'] / inter_df['prev_close'])
334
+ inter_df['weighted_log_return'] = inter_df['log_return'] * \
335
+ inter_df['prev_w']
336
+ # patch
337
+ df['log_return'] = inter_df['log_return']
338
+ df['weighted_log_return'] = inter_df['weighted_log_return']
339
+
340
+ # TODO: change to log return instead
341
+ # def calculate_return(df, start, end):
342
+ # df = df[(df.time >= start) & (df.time <= end)].copy()
343
+ # df.sort_values(by=['time'], inplace=True)
344
+ # grouped = df.groupby('ticker')
345
+ # df['return'] = (1 + grouped.pct.cumprod()) - 1
346
+ # return df
347
+
348
+ # def calculate_norm_return(df, start, end):
349
+ # '''
350
+ # calculate accumlative normalized return within a window
351
+ # for each entry of ticker using norm_pct
352
+
353
+ # normalized return is the weighted return in respect to
354
+ # the whole portfolio
355
+
356
+ # Return
357
+ # ------
358
+ # dataframe
359
+ # dataframe with return for each ticker
360
+ # '''
361
+ # df = df[(df.time >= start) & (df.time <= end)].copy()
362
+ # df.sort_values(by=['time'], inplace=True)
363
+ # grouped = df.groupby('ticker')
364
+ # df['norm_return'] = (1 + grouped.norm_pct.cumprod()) - 1
365
+ # return df
366
+
367
+
368
+ def _uniformize_time_series(profile_df):
369
  '''
370
+ a helper function to create analytic_df
371
+
372
+ make each entry in the time series has the same dimension
373
+ by filling none holding stock that was held in previous period has 0 shares and 0 ini_w
374
+
375
+ Parameters
376
+ ----------
377
+ profile_df : dataframe
378
+ portfolio profile dataframe or benchmark profile dataframe
379
+
380
+ Returns
381
+ -------
382
+ dataframe
383
+ dataframe with uniformized time series
384
+ '''
385
+ # Get unique time periods
386
+ time_periods = profile_df['time'].unique()
387
+ time_periods = sorted(time_periods)
388
+
389
+ # Iterate through time periods
390
+ for i in range(len(time_periods) - 1):
391
+ current_period = time_periods[i]
392
+ next_period = time_periods[i + 1]
393
+
394
+ current_df = profile_df[profile_df['time'] == current_period]
395
+ next_df = profile_df[profile_df['time'] == next_period]
396
+
397
+ tickers_current = current_df['ticker']
398
+ tickers_next = next_df['ticker']
399
+
400
+ # row that has ticker not in tickers_next
401
+ missing_tickers = current_df[~tickers_current.isin(
402
+ tickers_next)].copy()
403
+
404
+ if len(missing_tickers) != 0:
405
+ missing_tickers.time = next_period
406
+ missing_tickers.shares = 0
407
+ missing_tickers.ini_w = 0
408
+ profile_df = pd.concat(
409
+ [profile_df, missing_tickers], ignore_index=True)
410
+ # reset index
411
+ return profile_df.reset_index(drop=True)
412
+
413
+
414
+ def create_analytic_df(price_df, profile_df):
415
+ '''
416
+ create a df for analysis processing
417
+
418
+ filling information from profile df to stock price df
419
+
420
+ '''
421
+ uni_profile_df = _uniformize_time_series(profile_df)
422
+ # TODO handle rename column here
423
+ df = price_df.merge(uni_profile_df, on=['ticker', 'time'], how='outer')
424
+ df.sort_values(by=['ticker', 'time'], inplace=True)
425
+ # add sector, aggregate_sector, display_name and name to missing rows
426
+ grouped = df.groupby('ticker')
427
+ df['sector'] = grouped['sector'].fillna(method='ffill')
428
+ df['aggregate_sector'] = grouped['aggregate_sector'].fillna(method='ffill')
429
+ df['display_name'] = grouped['display_name'].fillna(method='ffill')
430
+ df['name'] = grouped['name'].fillna(method='ffill')
431
+
432
+ # assign missing ini_w
433
+ df['ini_w'] = grouped['ini_w'].fillna(method='ffill')
434
+ # assign missing shares, benchmark doesn't have shares
435
+ if ('shares' in df.columns):
436
+ df['shares'] = grouped['shares'].fillna(method='ffill')
437
+ # remove profile and price entry before first profile entry from df
438
+ df.dropna(subset=['ini_w'], inplace=True)
439
+ df.dropna(subset=['close'], inplace=True)
440
+ # remove where weight is 0
441
+ df = df[df['ini_w'] != 0].copy()
442
+ return df
443
 
444
 
445
  def calculate_attributes_between_dates(start, end, calculated_p_stock, calculated_b_stock):
 
505
  return df
506
 
507
 
508
+ def calculate_cum_pnl(df, start, end):
509
+ '''return df with cumulative pnl within a window'''
510
+ df = df[df.time.between(start, end, inclusive='both')].copy()
511
+ df.sort_values(by=['time'], inplace=True)
512
+ grouped = df.groupby('ticker')
513
+ df['cum_pnl'] = grouped['pnl'].cumsum()
514
+ return df
515
+
516
+
517
  def change_resolution(df, freq='W'):
518
  '''
519
  aggregate by keeping the first entry of the freq period,
 
523
  return df.groupby('freq').first().reset_index()
524
 
525
 
526
+ def calculate_pnl(df):
527
+ '''
528
+ patch df with pnl column
529
+
530
+ pnl is calculated using cash
531
+ '''
532
+ df.sort_values(by=['time'], inplace=True)
533
+ grouped = df.groupby('ticker')
534
+ df['pnl'] = grouped['cash'].diff()
535
+
536
+
537
+ def calculate_pct(df):
538
+ '''
539
+ calculate pct using close price
540
+ '''
541
+ df.sort_values(by=['time'], inplace=True)
542
+ grouped = df.groupby('ticker')
543
+ df['pct'] = grouped['close'].pct_change()
544
+
545
+
546
+ def calculate_norm_pct(df):
547
+ '''
548
+ use weight to calculate the norm pct
549
+ '''
550
+ df['norm_pct'] = df.weight * df.pct
551
+
552
+
553
  def calculate_weight_using_cash(df):
554
  '''
555
  patch df with current weight for each entry
 
561
  dataframe with processed cash column
562
 
563
  '''
564
+ df['weight'] = float('nan')
565
  grouped = df.groupby('time')
566
+ df.weight = grouped.cash.transform(lambda x: x / x.sum())
567
 
568
 
569
  def calculate_cash(df):
 
577
  dataframe with processed shares and close column
578
  '''
579
  df['cash'] = df['shares'] * df['close']
580
+
581
+
582
+ def calculate_weight_using_pct(df):
583
+ '''
584
+ calculate weight using weight column
585
+
586
+ calculate benchmark stock using this, since benchmark stock
587
+ doesn't have share information
588
+
589
+ Parameters
590
+ ----------
591
+ df: dataframe
592
+ dataframe with weight, pct on closing and ini_w columns
593
+ '''
594
+ df.sort_values(by=['time'], inplace=True)
595
+ grouped = df.groupby('ticker')
596
+ for _, group in grouped:
597
+ prev_row = None
598
+ for index, row in group.iterrows():
599
+ if prev_row is None:
600
+ prev_row = df.loc[index]
601
+ continue
602
+ df.loc[index, 'weight'] = prev_row['weight'] * (1 + row['pct'])
603
+ prev_row = df.loc[index]
604
+ # normalize weight
605
+ grouped = df.groupby('time')
606
+ normed_weight = grouped['weight'].transform(lambda x: x / x.sum())
607
+ df['weight'] = normed_weight
608
+
609
+
610
+ def calculate_periodic_BHB(agg_b, agg_p):
611
+ '''
612
+ calculate periodic BHB for each ticker entry
613
+
614
+ the accumulated return of a period will be used,
615
+ the weight is the weight at the began of the period
616
+
617
+ Note:
618
+ ----
619
+ if only one entry in a period, the return will be nan,
620
+
621
+ Parameters
622
+ ----------
623
+ agg_b : pd.DataFrame
624
+ aggregated benchmark analytic_df
625
+ agg_p : pd.DataFrame
626
+ aggregated portfolio analytic_df
627
+
628
+ Returns
629
+ -------
630
+ pd.DataFrame
631
+ periodic BHB result contain allocation, interaction, selection, nominal_active_return and active_return
632
+
633
+ '''
634
+ # merge both
635
+ agg_b['in_benchmark'] = True
636
+ agg_p['in_portfolio'] = True
637
+ selected_column = ['ticker', 'aggregate_sector',
638
+ 'prev_weight', 'return', 'period', 'display_name']
639
+ columns_to_fill = ['return_b', 'return_p',
640
+ 'prev_weight_p', 'prev_weight_b']
641
+ merged_df = pd.merge(agg_b[['in_benchmark'] + selected_column],
642
+ agg_p,
643
+ how='outer',
644
+ on=['period', 'ticker'],
645
+ suffixes=('_b', '_p'))
646
+ merged_df['in_portfolio'].fillna(False, inplace=True)
647
+ merged_df['in_benchmark'].fillna(False, inplace=True)
648
+ merged_df[columns_to_fill] = merged_df[columns_to_fill].fillna(0)
649
+
650
+ # complement fill aggregate_sector and display_name
651
+ merged_df['aggregate_sector_b'].fillna(
652
+ merged_df['aggregate_sector_p'], inplace=True)
653
+ merged_df["display_name_b"].fillna(merged_df.display_name_p, inplace=True)
654
+ merged_df.rename(columns={'aggregate_sector_b': 'aggregate_sector',
655
+ 'display_name_b': 'display_name',
656
+ }, inplace=True)
657
+ merged_df.drop(columns=['aggregate_sector_p',
658
+ 'display_name_p'], inplace=True)
659
+
660
+ # calculate active return
661
+ merged_df['weighted_return_p'] = merged_df['return_p'] * \
662
+ merged_df['prev_weight_p']
663
+ merged_df['weighted_return_b'] = merged_df['return_b'] * \
664
+ merged_df['prev_weight_b']
665
+ merged_df['active_return'] = merged_df['weighted_return_p'] - \
666
+ merged_df['weighted_return_b']
667
+
668
+ # allocation, interaction, selection and nominal active return
669
+ merged_df['allocation'] = (
670
+ merged_df.prev_weight_p - merged_df.prev_weight_b) * merged_df.return_b
671
+ merged_df['interaction'] = (merged_df.return_p - merged_df.return_b) \
672
+ * (merged_df.prev_weight_p - merged_df.prev_weight_b)
673
+ merged_df['selection'] = (
674
+ merged_df.return_p - merged_df.return_b) * merged_df.prev_weight_b
675
+ merged_df['notional_active_return'] = merged_df['allocation'] + \
676
+ merged_df['interaction'] + merged_df['selection']
677
+ return merged_df
678
+
679
+
680
+ def _merge_anlaytic_df(portfolio_df, benchmark_df):
681
+ pass
682
+
683
+
684
+ def aggregate_analytic_df_by_period(df, freq):
685
+ '''
686
+ return an aggregated analytic_df with weekly, monthly, yearly or daily frequency
687
+
688
+ each ticker will have 1 rows for each period,
689
+ cash is the value at the end of the period.
690
+ shares is the # of shares at end of the period.
691
+ prev_weight is the weight of that ticker entry at end of previous period.
692
+ log_return is sum of log_return within the period.
693
+ weight is the weight of that ticker entry at end of the period.
694
+ return is from last of previous period to last of current period.
695
+
696
+ Parameters
697
+ ----------
698
+ df : pd.DataFrame
699
+ analytic_df, dateframe of stock price has weight, log_return information
700
+ freq : str
701
+ weekly: 'W-MON' start on tuesday end on monday,
702
+ monthly: 'M',
703
+ yearly: 'Y',
704
+ daily: "D"
705
+
706
+ Returns
707
+ -------
708
+ pd.DataFrame
709
+ aggregated analytic_df with weekly, monthly, yearly or daily frequency
710
+ '''
711
+ # create prev_weight
712
+ df.sort_values(by=['time'], inplace=True)
713
+ grouped = df.groupby('ticker')
714
+ df['prev_weight'] = grouped['weight'].shift(1)
715
+
716
+ # aggregate by summing log return and keep the first prev_weight
717
+ df['period'] = df.time.dt.to_period(freq)
718
+ grouped = df.groupby(['period', 'ticker'])
719
+ agg_rules = {'display_name': 'first',
720
+ 'aggregate_sector': 'first',
721
+ 'prev_weight': 'first',
722
+ 'log_return': 'sum',
723
+ 'weight': 'last'
724
+ }
725
+
726
+ # handle aggregate on benchamrk
727
+ if 'cash' in df.columns and 'shares' in df.columns:
728
+ agg_rules['cash'] = 'last'
729
+ agg_rules['shares'] = 'last'
730
+
731
+ # aggregation
732
+ agg_df = grouped.agg(agg_rules)
733
+
734
+ # calculate return by convert sum log return to percentage return
735
+ agg_df['return'] = np.exp(agg_df.log_return) - 1
736
+
737
+ # make it a one dimensional dataframe
738
+ agg_df.reset_index(inplace=True)
739
+ return agg_df
740
+
741
+
742
+ def aggregate_bhb_df(df, by="total"):
743
+ keys = ['period', 'aggregate_sector'] if by == 'sector' else ['period']
744
+ agg_df = df.groupby(keys)[['active_return',
745
+ 'allocation',
746
+ 'interaction',
747
+ 'selection',
748
+ 'notional_active_return']].sum()
749
+ return agg_df
testing_pipeline.ipynb CHANGED
The diff for this file is too large to render. See raw diff