Spaces:

lamonkey
/

portfolio_management

Running

App Files Files Community

huggingface112 commited on Aug 30, 2023

Commit

a77544c

•

1 Parent(s): 2b059b0

enhanced historyReturnCard

Browse files

Files changed (8) hide show

appComponents.py +172 -122
db_operation.py +48 -2
index_page.py +33 -30
instance/local.db +2 -2
instance/log.json +1 -1
pipeline.py +65 -23
script/processing.py → processing.py +365 -15
testing_pipeline.ipynb +0 -0

appComponents.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from script import processing
 from datetime import datetime, timedelta
 import panel as pn
 import pandas as pd
@@ -14,7 +14,7 @@ import plotly.graph_objs as go
 # import warnings
 pn.extension('mathjax')
 pn.extension('plotly')
-pn.extension('plotly')
 # warnings.filterwarnings("ignore", category=pd.core.common.SettingWithCopyWarning)
 # overal performance default to 30 days
@@ -38,7 +38,6 @@ def create_portfolio_overview(df_list):
     ip_eval_df = p_eval_df.interactive()
     isector_eval_df = sector_eval_df.interactive()
     ranged_ip_eval_df = ip_eval_df[ip_eval_df.date.between(
         range_slider.param.value_start, range_slider.param.value_end)]
     ranged_isector_eval_df = isector_eval_df[isector_eval_df.date.between(
@@ -233,7 +232,7 @@ class TotalReturnCard(Viewer):
     def create_report(self):
         # Calculate the total return and risk
-        result = processing.calculate_return(
             self.eval_df, self.start_date, self.end_date)
         most_recent_row = result.tail(1)
         active_return = most_recent_row.active_return.values[0]
@@ -258,7 +257,7 @@ class TotalReturnCard(Viewer):
         allocation = total_attributes.allocation
         selection = total_attributes.selection
-        # Create a function for text report
         report = f"""
 <style>
     .compact-container {{
@@ -347,7 +346,7 @@ class TotalReturnCard(Viewer):
         return report
     def create_plot(self):
-        result = processing.calculate_return(
             self.eval_df, self.start_date, self.end_date)
         fig = px.line(result, x="date", y=['return_p', 'return_b'])
         fig.update_traces(mode="lines+markers",
@@ -463,37 +462,52 @@ class DrawDownCard(Viewer):
 class HistReturnCard(Viewer):
-    eval_df = param.Parameter()
     return_barplot = param.Parameterized()
     select_resolution = param.ObjectSelector(
         default='每月回报', objects=['每日回报', '每周回报', '每月回报', '每年回报'])
     def update_aggregate_df(self):
         freq = None
         if self.select_resolution == "每日回报":
-            return self.eval_df
         elif self.select_resolution == "每月回报":
             freq = 'M'
         elif self.select_resolution == "每年回报":
             freq = 'Y'
         elif self.select_resolution == "每周回报":
-            freq = 'W'
-        # I don't think this formula is correct, check this later
-        agg_df = self.eval_df.groupby([pd.Grouper(key='date', freq=freq)])\
-            .aggregate({'portfolio_pct_p': 'sum', 'portfolio_pct_b': 'sum'})
-        agg_df['portfolio_return_p'] = np.exp(agg_df.portfolio_pct_p) - 1
-        agg_df['portfolio_return_b'] = np.exp(agg_df.portfolio_pct_b) - 1
-        return agg_df.reset_index()
     def create_attributes_barplot(self):
-        self.attribute_df = self.update_attributes_df()
-        fig = px.bar(self.attribute_df, x='date', y=[
-                     'allocation', 'selection', 'interaction', 'notional_return', 'active_return'])
         colname_to_name = {
             'allocation': '分配',
             'selection': '选择',
             'interaction': '交互',
-            'notional_return': '名义主动回报',
             'active_return': '实际主动回报'
         }
         fig.for_each_trace(lambda t: t.update(name=colname_to_name.get(t.name, t.name),
@@ -502,6 +516,7 @@ class HistReturnCard(Viewer):
             hovertemplate=t.hovertemplate.replace(
             t.name, colname_to_name.get(t.name, t.name))
         ))
         fig.update_layout(barmode='group', title='主动回报归因',
                           bargap=0.0, bargroupgap=0.0)
         fig.update_layout(**styling.plot_layout)
@@ -510,8 +525,8 @@ class HistReturnCard(Viewer):
     def create_return_barplot(self):
         self.agg_df = self.update_aggregate_df()
-        fig = px.bar(self.agg_df, x='date', y=[
-                     'portfolio_return_p', 'portfolio_return_b'],
                      barmode='overlay',
                      title='周期回报',
                      )
@@ -533,14 +548,14 @@ class HistReturnCard(Viewer):
         return fig.to_dict()
-    @param.depends('eval_df', 'select_resolution', watch=True)
     def update(self):
         return_barplot = self.create_return_barplot()
         self.return_barplot.object = return_barplot
         attributes_barplot = self.create_attributes_barplot()
         self.attribute_barplot.object = attributes_barplot
-    def update_attributes_df(self):
         freq = None
         if self.select_resolution == "每日回报":
             freq = 'D'
@@ -549,19 +564,23 @@ class HistReturnCard(Viewer):
         elif self.select_resolution == "每年回报":
             freq = 'Y'
         elif self.select_resolution == "每周回报":
-            freq = 'W'
-        p_stock = processing.change_resolution(self.calculated_p_stock, freq)
-        b_stock = processing.change_resolution(self.calculated_b_stock, freq)
-        return processing.calculate_total_attribution(p_stock, b_stock)
-    def __init__(self, eval_df, calculated_p_stock, calculated_b_stock, **params):
-        self.eval_df = eval_df
         self.calculated_p_stock = calculated_p_stock
         self.calculated_b_stock = calculated_b_stock
         self._range_slider = pn.widgets.DateRangeSlider(
             name='Date Range Slider',
-            start=self.eval_df.date.min(), end=self.eval_df.date.max(),
-            value=(self.eval_df.date.min(), self.eval_df.date.max()),
         )
         self.return_barplot = pn.pane.Plotly(self.create_return_barplot())
@@ -578,20 +597,21 @@ class HistReturnCard(Viewer):
 class PortfolioComposationCard(Viewer):
     p_stock_df = param.Parameterized()
     def create_cash_position_df(self):
-        aggregate_df = self.p_stock_df.groupby('date', as_index=False).agg({
-            'current_weight': 'sum'
         })
         aggregate_df['type'] = 'portfolio'
         not_in_portfolio_df = aggregate_df.copy()
         not_in_portfolio_df['type'] = 'not_in_portfolio'
-        not_in_portfolio_df['current_weight'] = 1000
         # append df
         aggregate_df = pd.concat([aggregate_df, not_in_portfolio_df])
         # sort
-        aggregate_df.sort_values(by=['date'], inplace=True)
-        return aggregate_df[aggregate_df.date.between(self.date_range.value[0], self.date_range.value[1])]
     @param.depends('p_stock_df', 'date_range.value', watch=True)
     def update_trend_plot(self):
@@ -599,7 +619,7 @@ class PortfolioComposationCard(Viewer):
     def create_trend_plot(self):
         aggregate_df = self.create_cash_position_df()
-        fig = px.bar(aggregate_df, x='date', y='current_weight', color='type')
         fig.update_layout(legend=dict(
             orientation="h",
             yanchor="bottom",
@@ -619,57 +639,69 @@ class PortfolioComposationCard(Viewer):
         return fig.to_dict()
     def create_treemap(self):
-        self.selected_df['position'] = 'portfolio'
         not_in_portfolio_row = pd.DataFrame({
-            'display_name': ['不在portfolio中'],
-            'position': ['not_in_portfolio'],
-            'aggregate_sector': ['���在portfolio中'],
-            'current_weight': [1000],
-            'portfolio_return': [0],
-            'portfolio_pct': [0]
         })
         df = pd.concat([self.selected_df, not_in_portfolio_row],
                        ignore_index=True)
-        fig = px.treemap(df, path=[px.Constant('cash_position'), 'position', 'aggregate_sector', 'display_name'], values='current_weight',
-                         color='portfolio_return', hover_data=['portfolio_return', 'portfolio_pct'],
                          color_continuous_scale='RdBu',
                          color_continuous_midpoint=np.average(
-                             df['portfolio_return'])
                          )
         fig.update_layout(styling.plot_layout)
         fig.update_layout(coloraxis_colorbar=dict(
-            title="weighted return"))
-        colname_to_name = {
-            'cash_position': '现金分布',
-            'portfolio_return': '加权回报',
-            'not_in_portfolio': '不在portfolio中',
-            'current_weight': '现金',
-        }
-        fig.for_each_trace(lambda t: t.update(name=colname_to_name.get(t.name, t.name),
-                                              hovertemplate=t.hovertemplate.replace(
-            t.name, colname_to_name.get(t.name, t.name))
-        ))
         return fig.to_dict()
-    def __init__(self, p_stock_df, **params):
-        self.p_stock_df = p_stock_df
-        self.date_picker = pn.widgets.DatetimePicker(name='选择某日资金分布',
-                                                     start=self.p_stock_df.date.min(),
-                                                     end=self.p_stock_df.date.max(),
-                                                     value=self.p_stock_df.date.max(),
-                                                     enabled_dates=[datetime_object.date(
-                                                     ) for datetime_object in self.p_stock_df.date.unique()],
-                                                     enable_time=False,
-                                                     )
         self.date_range = pn.widgets.DateRangeSlider(name='选择资金分布走势区间',
-                                                     start=self.p_stock_df.date.min(),
-                                                     end=self.p_stock_df.date.max(),
-                                                     value=(self.p_stock_df.date.min(
-                                                     ), self.p_stock_df.date.max()),
                                                      )
-        self.selected_df = self.p_stock_df[self.p_stock_df.date ==
-                                           self.date_picker.value]
         self.tree_plot = pn.pane.Plotly(self.create_treemap())
         self.trend_plot = pn.pane.Plotly(self.create_trend_plot())
@@ -677,83 +709,96 @@ class PortfolioComposationCard(Viewer):
         super().__init__(**params)
     def __panel__(self):
-        self._layout = pn.Card(self.date_picker, self.tree_plot, self.date_range, self.trend_plot,
                                width=500, header=pn.pane.Str('资金分布'))
         return self._layout
-    @param.depends('date_picker.value', 'p_stock_df', watch=True)
     def update(self):
-        self.selected_df = self.p_stock_df[self.p_stock_df.date ==
-                                           self.date_picker.value]
         tree_plot = self.create_treemap()
         self.tree_plot.object = tree_plot
 class BestAndWorstStocks(Viewer):
-    p_stock_df = param.Parameter()
-    b_stock_df = param.Parameter()
     start_date = param.Parameter()
     end_date = param.Parameter()
-    def calculate_attributes(self):
-        result_df = processing.calculate_attributes_between_dates(self.start_date,
-                                                                  self.end_date,
-                                                                  self.p_stock_df,
-                                                                  self.b_stock_df)
-        return result_df
     def create_tabulator(self, df):
         col_title_map = {
-            'display_name_p': '股票名称',
             'ticker': '股票代码',
-            'pct_p': '加权回报率',
-            'prev_w_in_p_b': '在benchmark中的权重',
-            'prev_w_in_p_p': '在portfolio中的权重',
-            'allocation': '分配分数',
-            'selection': '选择分数',
-            'interaction': '交互分数',
-            'return': '未加权回报率',
-            'active_return': '加权主动回报率',
         }
         return pn.widgets.Tabulator(df, sizing_mode='stretch_width',
-                                    hidden_columns=['index', 'display_name_b',
-                                                    'pct_b', 'in_portfolio',
-                                                    ],
-                                    frozen_columns=['display_name_p'],
-                                    titles=col_title_map)
     @param.depends('start_date', 'end_date', watch=True)
     def update(self):
         result_df = self.get_processed_df()
-        self.best_5_tabulator.value = result_df.tail(5)
-        self.worst_5_tabulator.value = result_df.head(5)
     def get_processed_df(self):
         '''
         calculate attributes and return a sorted dataframe on weighted return
         '''
-        result_df = self.calculate_attributes()
-        result_df = result_df[result_df.in_portfolio]
-        result_df.sort_values(by='return', inplace=True)
-        return result_df
-    def __init__(self, p_stock_df, b_stock_df, **params):
-        self.p_stock_df = p_stock_df
-        self.b_stock_df = b_stock_df
         self._date_range = pn.widgets.DateRangeSlider(
             name='选择计算回报的时间区间',
-            start=p_stock_df.date.min(),
-            end=p_stock_df.date.max(),
-            value=(p_stock_df.date.max() -
-                   timedelta(days=7), p_stock_df.date.max())
         )
         self.start_date = self._date_range.value_start
         self.end_date = self._date_range.value_end
         result_df = self.get_processed_df()
-        self.best_5_tabulator = self.create_tabulator(result_df.tail(5))
-        self.worst_5_tabulator = self.create_tabulator(result_df.head(5))
         super().__init__(**params)
     @param.depends('_date_range.value', watch=True)
@@ -768,7 +813,12 @@ class BestAndWorstStocks(Viewer):
                                self.best_5_tabulator,
                                pn.pane.Str('加权回报率最低回报5只股票'),
                                self.worst_5_tabulator,
-                               max_width=500, header=pn.pane.Str('Portfolio中最高回报和最低加权回报率股票'))
         return self._layout

+import processing
 from datetime import datetime, timedelta
 import panel as pn
 import pandas as pd
 # import warnings
 pn.extension('mathjax')
 pn.extension('plotly')
 # warnings.filterwarnings("ignore", category=pd.core.common.SettingWithCopyWarning)
 # overal performance default to 30 days
     ip_eval_df = p_eval_df.interactive()
     isector_eval_df = sector_eval_df.interactive()
     ranged_ip_eval_df = ip_eval_df[ip_eval_df.date.between(
         range_slider.param.value_start, range_slider.param.value_end)]
     ranged_isector_eval_df = isector_eval_df[isector_eval_df.date.between(
     def create_report(self):
         # Calculate the total return and risk
+        result = processing.calculate_norm_return(
             self.eval_df, self.start_date, self.end_date)
         most_recent_row = result.tail(1)
         active_return = most_recent_row.active_return.values[0]
         allocation = total_attributes.allocation
         selection = total_attributes.selection
+        # Create a function for text report
         report = f"""
 <style>
     .compact-container {{
         return report
     def create_plot(self):
+        result = processing.calculate_norm_return(
             self.eval_df, self.start_date, self.end_date)
         fig = px.line(result, x="date", y=['return_p', 'return_b'])
         fig.update_traces(mode="lines+markers",
 class HistReturnCard(Viewer):
     return_barplot = param.Parameterized()
+    calculated_b_stock = param.Parameterized()
+    calculated_p_stock = param.Parameterized()
     select_resolution = param.ObjectSelector(
         default='每月回报', objects=['每日回报', '每周回报', '每月回报', '每年回报'])
+    def _calculate_return(self, df, freq):
+        # start on tuesday, end on monday
+        grouped = df.groupby(pd.Grouper(key='time', freq=freq))
+        agg_df = grouped.agg({'weighted_log_return': 'sum'})
+        # time indicating the last end of the week
+        agg_df['time'] = agg_df.index
+        # convert cumulative log return to percentage return
+        agg_df['return'] = np.exp(agg_df['weighted_log_return']) - 1
+        # return agg_df
+        return agg_df.reset_index(drop=True)
     def update_aggregate_df(self):
         freq = None
         if self.select_resolution == "每日回报":
+            freq = "D"
         elif self.select_resolution == "每月回报":
             freq = 'M'
         elif self.select_resolution == "每年回报":
             freq = 'Y'
         elif self.select_resolution == "每周回报":
+            freq = 'W-MON'
+        p_return = self._calculate_return(self.calculated_p_stock, freq)
+        b_return = self._calculate_return(self.calculated_b_stock, freq)
+        merge_df = pd.merge(p_return, b_return, on='time',
+                            how='outer', suffixes=('_p', '_b'))
+        return merge_df
     def create_attributes_barplot(self):
+        self.attribute_df = self._update_attributes_df()
+        fig = px.bar(self.attribute_df, x='period_str', y=[
+                     'allocation', 'selection', 'interaction', 'notional_active_return', 'active_return'])
         colname_to_name = {
             'allocation': '分配',
             'selection': '选择',
             'interaction': '交互',
+            'notional_active_return': '名义主动回报',
             'active_return': '实际主动回报'
         }
         fig.for_each_trace(lambda t: t.update(name=colname_to_name.get(t.name, t.name),
             hovertemplate=t.hovertemplate.replace(
             t.name, colname_to_name.get(t.name, t.name))
         ))
         fig.update_layout(barmode='group', title='主动回报归因',
                           bargap=0.0, bargroupgap=0.0)
         fig.update_layout(**styling.plot_layout)
     def create_return_barplot(self):
         self.agg_df = self.update_aggregate_df()
+        fig = px.bar(self.agg_df, x='time', y=[
+                     'return_p', 'return_b'],
                      barmode='overlay',
                      title='周期回报',
                      )
         return fig.to_dict()
+    @param.depends('calculated_p_stock', 'calculated_b_stock', 'select_resolution', watch=True)
     def update(self):
         return_barplot = self.create_return_barplot()
         self.return_barplot.object = return_barplot
         attributes_barplot = self.create_attributes_barplot()
         self.attribute_barplot.object = attributes_barplot
+    def _update_attributes_df(self):
         freq = None
         if self.select_resolution == "每日回报":
             freq = 'D'
         elif self.select_resolution == "每年回报":
             freq = 'Y'
         elif self.select_resolution == "每周回报":
+            freq = 'W-MON'
+        agg_p = processing.aggregate_analytic_df_by_period(self.calculated_p_stock, freq)
+        agg_b = processing.aggregate_analytic_df_by_period(self.calculated_b_stock, freq)
+        bhb_df = processing.calculate_periodic_BHB(agg_p, agg_b)
+        agg_bhb = processing.aggregate_bhb_df(bhb_df)
+        agg_bhb['period_str'] = agg_bhb.index.map(lambda x: str(x))
+        return agg_bhb
+    def __init__(self, calculated_p_stock, calculated_b_stock, **params):
         self.calculated_p_stock = calculated_p_stock
         self.calculated_b_stock = calculated_b_stock
         self._range_slider = pn.widgets.DateRangeSlider(
             name='Date Range Slider',
+            start=self.calculated_p_stock.time.min(), end=self.calculated_p_stock.time.max(),
+            value=(self.calculated_p_stock.time.min(),
+                   self.calculated_p_stock.time.max()),
         )
         self.return_barplot = pn.pane.Plotly(self.create_return_barplot())
 class PortfolioComposationCard(Viewer):
     p_stock_df = param.Parameterized()
+    selected_date = param.Parameterized()
     def create_cash_position_df(self):
+        aggregate_df = self.p_stock_df.groupby('time', as_index=False).agg({
+            'cash': 'sum'
         })
         aggregate_df['type'] = 'portfolio'
         not_in_portfolio_df = aggregate_df.copy()
         not_in_portfolio_df['type'] = 'not_in_portfolio'
+        not_in_portfolio_df['cash'] = 1000
         # append df
         aggregate_df = pd.concat([aggregate_df, not_in_portfolio_df])
         # sort
+        aggregate_df.sort_values(by=['time'], inplace=True)
+        return aggregate_df[aggregate_df.time.between(self.date_range.value[0], self.date_range.value[1])]
     @param.depends('p_stock_df', 'date_range.value', watch=True)
     def update_trend_plot(self):
     def create_trend_plot(self):
         aggregate_df = self.create_cash_position_df()
+        fig = px.bar(aggregate_df, x='time', y='cash', color='type')
         fig.update_layout(legend=dict(
             orientation="h",
             yanchor="bottom",
         return fig.to_dict()
     def create_treemap(self):
+        self.selected_df = self.p_stock_df[self.p_stock_df.time ==
+                                           self.datetime_picker.value]
+        self.selected_df['position'] = '股票'
         not_in_portfolio_row = pd.DataFrame({
+            'display_name': ['闲置'],
+            'position': ['闲置'],
+            'aggregate_sector': ['闲置'],
+            'cash': [100],
+            'weighted_return': [0]
         })
         df = pd.concat([self.selected_df, not_in_portfolio_row],
                        ignore_index=True)
+        fig = px.treemap(df,
+                         #  path=[px.Constant('cash_position'), 'position',
+                         #        'aggregate_sector', 'display_name'],
+                         path=['position', 'aggregate_sector', 'display_name'],
+                         values='cash',
+                         color='weighted_return',
+                         hover_data=['weighted_return', 'cash'],
                          color_continuous_scale='RdBu',
                          color_continuous_midpoint=np.average(
+                             df['weighted_return'])
                          )
         fig.update_layout(styling.plot_layout)
         fig.update_layout(coloraxis_colorbar=dict(
+            title="累计加权回报率"))
+        # colname_to_name = {
+        #     'cash_position': '现金分布',
+        #     'portfolio_return': '加权回报',
+        #     'not_in_portfolio': '不在portfolio中',
+        #     'current_weight': '现金',
+        # }
+        # fig.for_each_trace(lambda t: t.update(name=colname_to_name.get(t.name, t.name),
+        #                                       hovertemplate=t.hovertemplate.replace(
+        #     t.name, colname_to_name.get(t.name, t.name))
+        # ))
         return fig.to_dict()
+    def __init__(self, analytic_df, **params):
+        self.p_stock_df = analytic_df
+        self.p_stock_df = processing.calculate_weighted_return(self.p_stock_df,
+                                                               start=self.p_stock_df.time.min(),
+                                                               end=self.p_stock_df.time.max())
+        # convert to datetime to date
+        enabled_dates = [time.date() for time in self.p_stock_df.time.unique()]
+        self.datetime_picker = pn.widgets.DatetimePicker(name='选择某日资金分布',
+                                                         start=self.p_stock_df.time.min(),
+                                                         end=self.p_stock_df.time.max(),
+                                                         value=self.p_stock_df.time.max(),
+                                                         enabled_dates=enabled_dates,
+                                                         )
         self.date_range = pn.widgets.DateRangeSlider(name='选择资金分布走势区间',
+                                                     start=self.p_stock_df.time.min(),
+                                                     end=self.p_stock_df.time.max(),
+                                                     value=(self.p_stock_df.time.min(
+                                                     ), self.p_stock_df.time.max()),
                                                      )
         self.tree_plot = pn.pane.Plotly(self.create_treemap())
         self.trend_plot = pn.pane.Plotly(self.create_trend_plot())
         super().__init__(**params)
     def __panel__(self):
+        self._layout = pn.Card(self.datetime_picker, self.tree_plot, self.date_range, self.trend_plot,
                                width=500, header=pn.pane.Str('资金分布'))
         return self._layout
+    @param.depends('datetime_picker.value', 'p_stock_df', watch=True)
     def update(self):
         tree_plot = self.create_treemap()
         self.tree_plot.object = tree_plot
 class BestAndWorstStocks(Viewer):
     start_date = param.Parameter()
     end_date = param.Parameter()
+    hidden_col = [
+        'index',
+        'open',
+        'high',
+        'low',
+        'close',
+        'volume',
+        'money',
+        'pct',
+        'sector',
+        'aggregate_sector',
+        'ave_price',
+        'weight',
+        'ini_w',
+        'name',
+        'pnl'
+    ]
+    forzen_columns = ['display_name', 'return', 'cum_pnl', 'shares']
+    description = "股票表现排名"
+    tooltip = "在一个时间窗口中累计盈利最高和最低的股票，包括已经卖出的股票，如果表格的日期小于窗口的结束时间代表已经卖出"
     def create_tabulator(self, df):
         col_title_map = {
+            'display_name': '股票名称',
             'ticker': '股票代码',
+            'time': '日期',
+            'return': '回报率',
+            'sector': '行业',
+            'shares': '持仓',
+            'cash': '现金',
+            'cum_pnl': '累计盈利',
         }
         return pn.widgets.Tabulator(df, sizing_mode='stretch_width',
+                                    hidden_columns=self.hidden_col,
+                                    frozen_columns=self.forzen_columns,
+                                    titles=col_title_map
+                                    )
     @param.depends('start_date', 'end_date', watch=True)
     def update(self):
         result_df = self.get_processed_df()
+        self.best_5_tabulator.value = result_df.head(5)
+        self.worst_5_tabulator.value = result_df.tail(5)
+    def _get_cum_return(self, df):
+        '''return a df contain cumulative return at the end date'''
+        result_df = processing.calcualte_return(df=df,
+                                                start=self.start_date,
+                                                end=self.end_date)
+        grouped = result_df.groupby('ticker')
+        last_row = result_df.loc[grouped.time.idxmax()]
+        return last_row
     def get_processed_df(self):
         '''
         calculate attributes and return a sorted dataframe on weighted return
         '''
+        df = processing.calculate_cum_pnl(self.analytic_df,
+                                          start=self.start_date,
+                                          end=self.end_date)
+        df = self._get_cum_return(df)
+        return df.sort_values(by='cum_pnl', ascending=False)
+    def __init__(self, analytic_df, **params):
+        self.analytic_df = analytic_df
         self._date_range = pn.widgets.DateRangeSlider(
             name='选择计算回报的时间区间',
+            start=self.analytic_df.time.min(),
+            end=self.analytic_df.time.max(),
+            value=(self.analytic_df.time.max() -
+                   timedelta(days=7), self.analytic_df.time.max())
         )
         self.start_date = self._date_range.value_start
         self.end_date = self._date_range.value_end
         result_df = self.get_processed_df()
+        self.best_5_tabulator = self.create_tabulator(result_df.head(5))
+        self.worst_5_tabulator = self.create_tabulator(result_df.tail(5))
         super().__init__(**params)
     @param.depends('_date_range.value', watch=True)
                                self.best_5_tabulator,
                                pn.pane.Str('加权回报率最低回报5只股票'),
                                self.worst_5_tabulator,
+                               max_width=500,
+                               header=pn.Row(
+                                   pn.pane.Str(self.description),
+                                   pn.widgets.TooltipIcon(value=self.tooltip)
+                               )
+                               )
         return self._layout

db_operation.py CHANGED Viewed

@@ -121,10 +121,49 @@ def append_to_stocks_price_table(df):
     _append_df_to_db(df, ts.STOCKS_PRICE_TABLE, ts.STOCKS_PRICE_TABLE_SCHEMA)
 def get_all_stocks():
     with create_engine(db_url).connect() as conn:
         all_stocks = pd.read_sql(ts.STOCKS_DETAILS_TABLE, con=conn)
         return all_stocks
 def _get_all_row(table_name, ts_column='date'):
     with create_engine(db_url).connect() as conn:
         df = pd.read_sql(table_name, con=conn)
@@ -141,8 +180,15 @@ def get_stocks_price(tickers: list[str]):
     '''
     return df of stock price within ticker in stocks price table
     '''
-    query = f"SELECT * FROM {ts.STOCKS_PRICE_TABLE} WHERE ticker IN {tuple(tickers)}"
     with create_engine(db_url).connect() as conn:
         df = pd.read_sql(query, con=conn)
         df.time = pd.to_datetime(df.time)
-        return df

     _append_df_to_db(df, ts.STOCKS_PRICE_TABLE, ts.STOCKS_PRICE_TABLE_SCHEMA)
 def get_all_stocks():
+    '''
+    get all stocks information
+    Returns
+    -------
+    pd.DataFrame
+        all stocks information
+    '''
     with create_engine(db_url).connect() as conn:
         all_stocks = pd.read_sql(ts.STOCKS_DETAILS_TABLE, con=conn)
         return all_stocks
+def save_portfolio_analytic_df(df):
+    table_name = 'analytic_p'
+    with create_engine(db_url).connect() as conn:
+        df.to_sql(table_name, con=conn, if_exists='replace', index=False)
+def get_portfolio_analytic_df():
+    table_name = 'analytic_p'
+    with create_engine(db_url).connect() as conn:
+        df = pd.read_sql(table_name, con=conn)
+        return df
+def save_benchmark_analytic_df(df):
+    table_name = 'analytic_b'
+    with create_engine(db_url).connect() as conn:
+        df.to_sql(table_name, con=conn, if_exists='replace', index=False)
+def get_benchmark_analytic_df():
+    table_name = 'analytic_b'
+    with create_engine(db_url).connect() as conn:
+        df = pd.read_sql(table_name, con=conn)
+        return df
+def save_analytic_df(df):
+    table_name = 'analytic'
+    with create_engine(db_url).connect() as conn:
+        df.to_sql(table_name, con=conn, if_exists='replace', index=False)
+def get_analytic_df():
+    table_name = 'analytic'
+    with create_engine(db_url).connect() as conn:
+        df = pd.read_sql(table_name, con=conn)
+        return df
 def _get_all_row(table_name, ts_column='date'):
     with create_engine(db_url).connect() as conn:
         df = pd.read_sql(table_name, con=conn)
     '''
     return df of stock price within ticker in stocks price table
     '''
+    if len(tickers) == 0:
+        # so returned df has the same schema as the table
+        query = f"SELECT * FROM {ts.STOCKS_PRICE_TABLE} WHERE 1=0"
+    elif len(tickers) == 1:
+        query = f"SELECT * FROM {ts.STOCKS_PRICE_TABLE} WHERE ticker = '{tickers[0]}'"
+    else:
+        query = f"SELECT * FROM {ts.STOCKS_PRICE_TABLE} WHERE ticker IN {tuple(tickers)}"
     with create_engine(db_url).connect() as conn:
         df = pd.read_sql(query, con=conn)
         df.time = pd.to_datetime(df.time)
+        # drop duplicates
+        return df.drop_duplicates(subset=['ticker', 'time'])

index_page.py CHANGED Viewed

@@ -8,14 +8,15 @@ import random
 import scipy.stats as stats
 import hvplot.pandas  # noqa
 from sqlalchemy import create_engine
-from . import api
 # from backgroundTask import stocks_stream
 from functools import partial
 import plotly.graph_objects as go
 from panel.viewable import Viewer
-from script import processing
 import appComponents
 import param
 # import warnings
 pn.extension('mathjax')
 # warnings.filterwarnings("ignore", category=pd.core.common.SettingWithCopyWarning)
@@ -25,45 +26,47 @@ db_url = 'sqlite:///instance/local.db'
 engine = create_engine(db_url)
-p_eval_df = None
-calculated_b_stock = None
-calculated_p_stock = None
 # load benchmark stock
-with engine.connect() as connection:
-    calculated_b_stock = pd.read_sql('calculated_b_stock', con=connection)
-    calculated_p_stock = pd.read_sql('calculated_p_stock', con=connection)
-    p_eval_df = pd.read_sql('p_eval_result', con=connection)
 stock_overview = appComponents.BestAndWorstStocks(
-    p_stock_df=calculated_p_stock, b_stock_df=calculated_b_stock)
 composation_card = appComponents.PortfolioComposationCard(
-    p_stock_df=calculated_p_stock)
 monthly_return_card = appComponents.HistReturnCard(
-    eval_df=p_eval_df, calculated_p_stock=calculated_p_stock, calculated_b_stock=calculated_b_stock)
-total_return_card = appComponents.TotalReturnCard(name='Range', eval_df=p_eval_df,
-                                                  b_stock_df=calculated_b_stock,
-                                                  p_stock_df=calculated_p_stock,
-                                                  value=(0, 20))
-drawdown_card = appComponents.DrawDownCard(
-    eval_df=p_eval_df, calculated_p_stock=calculated_p_stock, calculated_b_stock=calculated_b_stock)
-top_header = appComponents.TopHeader(
-    eval_df=p_eval_df
-)
 template = pn.template.FastListTemplate(
     title="Portfolio一览",
     # sidebar=[freq, phase],
 )
-template.main.extend(
-    [pn.Row(top_header),
-     pn.Row(
-        pn.Column(monthly_return_card, stock_overview,
-                  width=500, margin=(10, 10, 10, 10)),
-        pn.Column(total_return_card, drawdown_card, margin=(10, 10, 10, 10)),
-        pn.Column(composation_card, margin=(10, 10, 10, 10)),
-    )]
-)
 template.servable()
 # pn.Row(

 import scipy.stats as stats
 import hvplot.pandas  # noqa
 from sqlalchemy import create_engine
+import api
 # from backgroundTask import stocks_stream
 from functools import partial
 import plotly.graph_objects as go
 from panel.viewable import Viewer
+import processing
 import appComponents
 import param
+import db_operation as db
 # import warnings
 pn.extension('mathjax')
 # warnings.filterwarnings("ignore", category=pd.core.common.SettingWithCopyWarning)
 engine = create_engine(db_url)
+analytic_p = db.get_portfolio_analytic_df()
+analytic_b = db.get_benchmark_analytic_df()
 # load benchmark stock
+# with engine.connect() as connection:
+#     analytics_df = pd.read
+#     calculated_b_stock = pd.read_sql('calculated_b_stock', con=connection)
+#     calculated_p_stock = pd.read_sql('calculated_p_stock', con=connection)
+#     p_eval_df = pd.read_sql('p_eval_result', con=connection)
 stock_overview = appComponents.BestAndWorstStocks(
+    analytic_df=analytic_p)
 composation_card = appComponents.PortfolioComposationCard(
+    analytic_p)
 monthly_return_card = appComponents.HistReturnCard(
+     calculated_p_stock=analytic_p, calculated_b_stock=analytic_b)
+# total_return_card = appComponents.TotalReturnCard(name='Range', eval_df=p_eval_df,
+#                                                   b_stock_df=calculated_b_stock,
+#                                                   p_stock_df=calculated_p_stock,
+#                                                   value=(0, 20))
+# drawdown_card = appComponents.DrawDownCard(
+#     eval_df=p_eval_df, calculated_p_stock=calculated_p_stock, calculated_b_stock=calculated_b_stock)
+# top_header = appComponents.TopHeader(
+#     eval_df=p_eval_df
+# )
 template = pn.template.FastListTemplate(
     title="Portfolio一览",
     # sidebar=[freq, phase],
 )
+template.main.extend([stock_overview, composation_card, monthly_return_card])
+# template.main.extend(
+#     [pn.Row(top_header),
+#      pn.Row(
+#         pn.Column(monthly_return_card, stock_overview,
+#                   width=500, margin=(10, 10, 10, 10)),
+#         pn.Column(total_return_card, drawdown_card, margin=(10, 10, 10, 10)),
+#         pn.Column(composation_card, margin=(10, 10, 10, 10)),
+#     )]
+# )
 template.servable()
 # pn.Row(

instance/local.db CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b0651e0e8a48a69d61d008bf5ae3262ca38a8968b020e700604841d55f8784a3
-size 8728576

 version https://git-lfs.github.com/spec/v1
+oid sha256:b918cd8420f4f314aaff25fb9348f5fcca206b01a0403d9a96b0039d00b55047
+size 17780736

instance/log.json CHANGED Viewed

@@ -1,3 +1,3 @@
 {
-    "daily_update": "2023-08-29 03:02:51"
 }

 {
+    "daily_update": "2023-08-30 09:02:54"
 }

pipeline.py CHANGED Viewed

@@ -8,6 +8,7 @@ import pytz
 import table_schema as ts
 import db_operation as db
 from log import Log
 # import settings
 # fetch new stock price
 stock_price_stream = Stream()
@@ -138,11 +139,6 @@ def need_to_update_stocks_price(delta_time):
                 return False
-def processing():
-    '''
-    run the whole processing pipeline here
-    '''
-    pass
 def add_details_to_stock_df(stock_df):
@@ -214,27 +210,27 @@ def right_fill_stock_price():
     '''
     update all stocks price until today.
-    if no portfolio, terminate without warning
-    default start date is the most recent date in portfolio
     '''
-    most_recent_portfolio = db.get_most_recent_portfolio_profile()
     most_recent_stocks_price = db.get_most_recent_stocks_price()
     # fetch all stocks price until today
     stocks_dates = most_recent_stocks_price.time
-    portfolio_dates = most_recent_portfolio.date
-    if len(portfolio_dates) == 0:
         return
-    start = stocks_dates[0] if len(stocks_dates) > 0 else portfolio_dates[0]
     end = utils.time_in_beijing()
     # frequency is set to daily
     if end - start > dt.timedelta(days=1):
-        new_stocks_price = fetch_all_stocks_price_between(start, end)
         db.append_to_stocks_price_table(new_stocks_price)
-def fetch_all_stocks_price_between(start, end):
     '''
     patch stock price db with all daily stock price within window
     inclusive on both start and end date
@@ -260,8 +256,7 @@ def fetch_all_stocks_price_between(start, end):
         security=tickers,
         start_date=start,
         end_date=end,
-        frequency='daily',
-        skip_paused=True,)
     # drop where closing price is null
     stock_price.dropna(subset=['close'], inplace=True)
     return stock_price
@@ -339,15 +334,17 @@ def left_fill_benchmark_profile():
 def left_fill_stocks_price():
     '''
     left fill stock price
-    fill missing entries between the oldest date in portfolio profile and the oldest date in stock price table
-    if no portfolio profile, terminate without warning
-    if no stock price table, the span would be from the oldest date in portfolio profile to the most recent date in portfolio profile
     '''
-    # get oldest time in portfolio profile
-    p_start = db.get_oldest_portfolio_profile().date
     # get oldest time in stock price table
     stock_start = db.get_oldest_stocks_price().time
     # if no portfolio profile, terminate
@@ -356,14 +353,14 @@ def left_fill_stocks_price():
     # no stock price, span the entire portfolio profile
     elif len(stock_start) == 0:
         start = p_start[0]
-        end = db.get_most_recent_portfolio_profile().date[0]
     else:
         start = p_start[0]
         end = stock_start[0]
     if start < end:
         # fetch and update
-        new_entry = fetch_all_stocks_price_between(start, end)
         db.append_to_stocks_price_table(new_entry)
@@ -374,6 +371,49 @@ def updaet_benchmark_to_db():
     pass
 async def daily_update():
     last_update = log.get_time('daily_update')
     if last_update is None or utils.time_in_beijing() - last_update >= dt.timedelta(days=1):
@@ -393,6 +433,8 @@ async def daily_update():
         log.update_log('daily_update')
     else:
         print("no update needed")
 def update():

 import table_schema as ts
 import db_operation as db
 from log import Log
+import processing
 # import settings
 # fetch new stock price
 stock_price_stream = Stream()
                 return False
 def add_details_to_stock_df(stock_df):
     '''
     update all stocks price until today.
+    if no benchmark profile, terminate without warning
+    default start date is the most recent date in benchmark profile
     '''
+    most_recent_benchmark = db.get_most_recent_benchmark_profile()
     most_recent_stocks_price = db.get_most_recent_stocks_price()
     # fetch all stocks price until today
     stocks_dates = most_recent_stocks_price.time
+    b_dates = most_recent_benchmark.date
+    if len(b_dates) == 0:
         return
+    start = stocks_dates[0] if len(stocks_dates) > 0 else b_dates[0]
     end = utils.time_in_beijing()
     # frequency is set to daily
     if end - start > dt.timedelta(days=1):
+        new_stocks_price = _fetch_all_stocks_price_between(start, end)
         db.append_to_stocks_price_table(new_stocks_price)
+def _fetch_all_stocks_price_between(start, end):
     '''
     patch stock price db with all daily stock price within window
     inclusive on both start and end date
         security=tickers,
         start_date=start,
         end_date=end,
+        frequency='daily')
     # drop where closing price is null
     stock_price.dropna(subset=['close'], inplace=True)
     return stock_price
 def left_fill_stocks_price():
     '''
     left fill stock price
+    fill missing entries between the oldest date in benchmark
+    profile and the oldest date in stock price table
+    if no benchmark profile, terminate without warning
+    if no stock price table, the span would be from
+    the oldest date in benchmark profile to the most recent date in benchmark profile
     '''
+    # use benchmark because benchmari profile only update once a month
+    p_start = db.get_oldest_benchmark_profile().date
     # get oldest time in stock price table
     stock_start = db.get_oldest_stocks_price().time
     # if no portfolio profile, terminate
     # no stock price, span the entire portfolio profile
     elif len(stock_start) == 0:
         start = p_start[0]
+        end = db.get_most_recent_benchmark_profile().date[0]
     else:
         start = p_start[0]
         end = stock_start[0]
     if start < end:
         # fetch and update
+        new_entry = _fetch_all_stocks_price_between(start, end)
         db.append_to_stocks_price_table(new_entry)
     pass
+def get_stocks_in_profile(profile_df):
+    ticker_list = profile_df.ticker.unique().tolist()
+    stocks_df = db.get_stocks_price(ticker_list)
+    return stocks_df
+def batch_processing():
+    '''perform when portfolio or benchmark is updated'''
+    portfolio_p = db.get_all_portfolio_profile()
+    benchmark_p = db.get_all_benchmark_profile()
+    p_stocks_df = get_stocks_in_profile(portfolio_p)
+    b_stocks_df = get_stocks_in_profile(benchmark_p)
+    # temperaraly handle rename date to time
+    portfolio_p.rename(
+        columns={'date': 'time', 'weight': 'ini_w'}, inplace=True)
+    benchmark_p.rename(columns={'date': 'time'}, inplace=True)
+    # normalize weight in benchmark
+    grouped = benchmark_p.groupby('time')
+    benchmark_p['ini_w'] = grouped['weight'].transform(lambda x: x / x.sum())
+    # add profile information into stock price
+    analytic_b = processing.create_analytic_df(b_stocks_df, benchmark_p)
+    analytic_p = processing.create_analytic_df(p_stocks_df, portfolio_p)
+    # p stock weigth
+    processing.calculate_cash(analytic_p)
+    processing.calculate_weight_using_cash(analytic_p)
+    processing.calculate_pct(analytic_p)
+    processing.calculate_norm_pct(analytic_p)
+    # b stock weight
+    analytic_b.sort_values(by=['time'], inplace=True)
+    grouped = analytic_b.groupby('ticker')
+    analytic_b['pct'] = grouped['close'].pct_change()
+    processing.calculate_weight_using_pct(analytic_b)
+    # pnl
+    processing.calculate_pnl(analytic_p)
+    # log return
+    # need to crop on left side first
+    analytic_b = analytic_b[analytic_b['time'] >= analytic_p.time.min()].copy()
+    processing.calculate_log_return(analytic_p)
+    processing.calculate_log_return(analytic_b)
+    db.save_portfolio_analytic_df(analytic_p)
+    db.save_benchmark_analytic_df(analytic_b)
 async def daily_update():
     last_update = log.get_time('daily_update')
     if last_update is None or utils.time_in_beijing() - last_update >= dt.timedelta(days=1):
         log.update_log('daily_update')
     else:
         print("no update needed")
+    batch_processing()
+    print("updated analytic")
 def update():

script/processing.py → processing.py RENAMED Viewed

@@ -281,21 +281,165 @@ def calculate_total_attribution(calculated_p_stock, calculated_b_stock):
     # return df
-def calculate_return(df, start, end):
     '''
-    return a df consist of total return for each day,
-    the return at start date would be 0
     '''
-    selected_df = df[df.date.between(start, end)].copy()
-    # set the pct of first row to null
-    selected_df.iloc[0, selected_df.columns.get_indexer(
-        ['portfolio_pct_p', 'portfolio_pct_b'])] = 0
-    selected_df['return_p'] = (
-        1 + selected_df['portfolio_pct_p']).cumprod() - 1
-    selected_df['return_b'] = (
-        1 + selected_df['portfolio_pct_b']).cumprod() - 1
-    selected_df['active_return'] = selected_df.return_p - selected_df.return_b
-    return selected_df
 def calculate_attributes_between_dates(start, end, calculated_p_stock, calculated_b_stock):
@@ -361,6 +505,15 @@ def calculate_attributes_between_dates(start, end, calculated_p_stock, calculate
     return df
 def change_resolution(df, freq='W'):
     '''
     aggregate by keeping the first entry of the freq period,
@@ -370,6 +523,33 @@ def change_resolution(df, freq='W'):
     return df.groupby('freq').first().reset_index()
 def calculate_weight_using_cash(df):
     '''
     patch df with current weight for each entry
@@ -381,9 +561,9 @@ def calculate_weight_using_cash(df):
         dataframe with processed cash column
     '''
-    df['cur_w'] = float('nan')
     grouped = df.groupby('time')
-    df.cur_w = grouped.cash.transform(lambda x: x / x.sum())
 def calculate_cash(df):
@@ -397,3 +577,173 @@ def calculate_cash(df):
         dataframe with processed shares and close column
     '''
     df['cash'] = df['shares'] * df['close']

     # return df
+def calcualte_return(df: pd.DataFrame, start, end):
     '''
+    calcualte return within a window for each entry of ticker
+    inclusive
+    this is an intermediate step to calculate attribute
+    calculation using the weighted_log_return
+    '''
+    df = df[(df.time >= start) & (df.time <= end)].copy()
+    inter_df = df.sort_values(by=['time'])
+    inter_df['cum_log_return'] = inter_df.groupby(
+        'ticker')['log_return'].cumsum()
+    inter_df['percentage_return'] = np.exp(
+        inter_df['cum_log_return']) - 1
+    # patch
+    df['return'] = inter_df['percentage_return']
+    return df
+def calculate_weighted_return(df: pd.DataFrame, start, end):
+    '''
+    calcualte weighted return within a window for each entry of ticker
+inclusive
+    calculation using the weighted_log_return
+    '''
+    df = df[(df.time >= start) & (df.time <= end)].copy()
+    inter_df = df.sort_values(by=['time'])
+    inter_df['cum_weighted_log_return'] = inter_df.groupby(
+        'ticker')['weighted_log_return'].cumsum()
+    inter_df['percentage_return'] = np.exp(
+        inter_df['cum_weighted_log_return']) - 1
+    # patch
+    df['weighted_return'] = inter_df['percentage_return']
+    return df
+def calculate_log_return(df: pd.DataFrame):
+    '''
+    patch df with the weighted log return and unweighted log return
+    calculated using close price
+    an intermediate step to calculate the weighted return,
+    the benefit using this is this can be aggregated with any time window
+    and work for both portfolio and benchmark
+    '''
+    inter_df = df.sort_values(by=['time'])
+    grouped = inter_df.groupby('ticker')
+    inter_df['prev_w'] = grouped['weight'].shift(1)
+    inter_df['prev_close'] = grouped['close'].shift(1)
+    inter_df['log_return'] = np.log(inter_df['close'] / inter_df['prev_close'])
+    inter_df['weighted_log_return'] = inter_df['log_return'] * \
+        inter_df['prev_w']
+    # patch
+    df['log_return'] = inter_df['log_return']
+    df['weighted_log_return'] = inter_df['weighted_log_return']
+# TODO: change to log return instead
+# def calculate_return(df, start, end):
+#     df = df[(df.time >= start) & (df.time <= end)].copy()
+#     df.sort_values(by=['time'], inplace=True)
+#     grouped = df.groupby('ticker')
+#     df['return'] = (1 + grouped.pct.cumprod()) - 1
+#     return df
+# def calculate_norm_return(df, start, end):
+#     '''
+#     calculate accumlative normalized return within a window
+#     for each entry of ticker using norm_pct
+#     normalized return is the weighted return in respect to
+#     the whole portfolio
+#     Return
+#     ------
+#     dataframe
+#         dataframe with return for each ticker
+#     '''
+#     df = df[(df.time >= start) & (df.time <= end)].copy()
+#     df.sort_values(by=['time'], inplace=True)
+#     grouped = df.groupby('ticker')
+#     df['norm_return'] = (1 + grouped.norm_pct.cumprod()) - 1
+#     return df
+def _uniformize_time_series(profile_df):
     '''
+    a helper function to create analytic_df
+    make each entry in the time series has the same dimension
+    by filling none holding stock that was held in previous period has 0 shares and 0 ini_w
+    Parameters
+    ----------
+    profile_df : dataframe
+        portfolio profile dataframe or benchmark profile dataframe
+    Returns
+    -------
+    dataframe
+        dataframe with uniformized time series
+    '''
+    # Get unique time periods
+    time_periods = profile_df['time'].unique()
+    time_periods = sorted(time_periods)
+    # Iterate through time periods
+    for i in range(len(time_periods) - 1):
+        current_period = time_periods[i]
+        next_period = time_periods[i + 1]
+        current_df = profile_df[profile_df['time'] == current_period]
+        next_df = profile_df[profile_df['time'] == next_period]
+        tickers_current = current_df['ticker']
+        tickers_next = next_df['ticker']
+        # row that has ticker not in tickers_next
+        missing_tickers = current_df[~tickers_current.isin(
+            tickers_next)].copy()
+        if len(missing_tickers) != 0:
+            missing_tickers.time = next_period
+            missing_tickers.shares = 0
+            missing_tickers.ini_w = 0
+            profile_df = pd.concat(
+                [profile_df, missing_tickers], ignore_index=True)
+    # reset index
+    return profile_df.reset_index(drop=True)
+def create_analytic_df(price_df, profile_df):
+    '''
+    create a df for analysis processing
+    filling information from profile df to stock price df
+    '''
+    uni_profile_df = _uniformize_time_series(profile_df)
+    # TODO handle rename column here
+    df = price_df.merge(uni_profile_df, on=['ticker', 'time'], how='outer')
+    df.sort_values(by=['ticker', 'time'], inplace=True)
+    # add sector, aggregate_sector, display_name and name to missing rows
+    grouped = df.groupby('ticker')
+    df['sector'] = grouped['sector'].fillna(method='ffill')
+    df['aggregate_sector'] = grouped['aggregate_sector'].fillna(method='ffill')
+    df['display_name'] = grouped['display_name'].fillna(method='ffill')
+    df['name'] = grouped['name'].fillna(method='ffill')
+    # assign missing ini_w
+    df['ini_w'] = grouped['ini_w'].fillna(method='ffill')
+    # assign missing shares, benchmark doesn't have shares
+    if ('shares' in df.columns):
+        df['shares'] = grouped['shares'].fillna(method='ffill')
+    # remove profile and price entry before first profile entry from df
+    df.dropna(subset=['ini_w'], inplace=True)
+    df.dropna(subset=['close'], inplace=True)
+    # remove where weight is 0
+    df = df[df['ini_w'] != 0].copy()
+    return df
 def calculate_attributes_between_dates(start, end, calculated_p_stock, calculated_b_stock):
     return df
+def calculate_cum_pnl(df, start, end):
+    '''return df with cumulative pnl within a window'''
+    df = df[df.time.between(start, end, inclusive='both')].copy()
+    df.sort_values(by=['time'], inplace=True)
+    grouped = df.groupby('ticker')
+    df['cum_pnl'] = grouped['pnl'].cumsum()
+    return df
 def change_resolution(df, freq='W'):
     '''
     aggregate by keeping the first entry of the freq period,
     return df.groupby('freq').first().reset_index()
+def calculate_pnl(df):
+    '''
+    patch df with pnl column
+    pnl is calculated using cash
+    '''
+    df.sort_values(by=['time'], inplace=True)
+    grouped = df.groupby('ticker')
+    df['pnl'] = grouped['cash'].diff()
+def calculate_pct(df):
+    '''
+    calculate pct using close price
+    '''
+    df.sort_values(by=['time'], inplace=True)
+    grouped = df.groupby('ticker')
+    df['pct'] = grouped['close'].pct_change()
+def calculate_norm_pct(df):
+    '''
+    use weight to calculate the norm pct
+    '''
+    df['norm_pct'] = df.weight * df.pct
 def calculate_weight_using_cash(df):
     '''
     patch df with current weight for each entry
         dataframe with processed cash column
     '''
+    df['weight'] = float('nan')
     grouped = df.groupby('time')
+    df.weight = grouped.cash.transform(lambda x: x / x.sum())
 def calculate_cash(df):
         dataframe with processed shares and close column
     '''
     df['cash'] = df['shares'] * df['close']
+def calculate_weight_using_pct(df):
+    '''
+    calculate weight using weight column
+    calculate benchmark stock using this, since benchmark stock
+    doesn't have share information
+    Parameters
+    ----------
+    df: dataframe
+        dataframe with weight, pct on closing and ini_w columns
+    '''
+    df.sort_values(by=['time'], inplace=True)
+    grouped = df.groupby('ticker')
+    for _, group in grouped:
+        prev_row = None
+        for index, row in group.iterrows():
+            if prev_row is None:
+                prev_row = df.loc[index]
+                continue
+            df.loc[index, 'weight'] = prev_row['weight'] * (1 + row['pct'])
+            prev_row = df.loc[index]
+    # normalize weight
+    grouped = df.groupby('time')
+    normed_weight = grouped['weight'].transform(lambda x: x / x.sum())
+    df['weight'] = normed_weight
+def calculate_periodic_BHB(agg_b, agg_p):
+    '''
+    calculate periodic BHB for each ticker entry
+    the accumulated return of a period will be used,
+    the weight is the weight at the began of the period
+    Note:
+    ----
+    if only one entry in a period, the return will be nan,
+    Parameters
+    ----------
+    agg_b : pd.DataFrame
+        aggregated benchmark analytic_df
+    agg_p : pd.DataFrame
+        aggregated portfolio analytic_df
+    Returns
+    -------
+    pd.DataFrame
+        periodic BHB result contain allocation, interaction, selection, nominal_active_return and active_return
+    '''
+    # merge both
+    agg_b['in_benchmark'] = True
+    agg_p['in_portfolio'] = True
+    selected_column = ['ticker', 'aggregate_sector',
+                       'prev_weight', 'return', 'period', 'display_name']
+    columns_to_fill = ['return_b', 'return_p',
+                       'prev_weight_p', 'prev_weight_b']
+    merged_df = pd.merge(agg_b[['in_benchmark'] + selected_column],
+                         agg_p,
+                         how='outer',
+                         on=['period', 'ticker'],
+                         suffixes=('_b', '_p'))
+    merged_df['in_portfolio'].fillna(False, inplace=True)
+    merged_df['in_benchmark'].fillna(False, inplace=True)
+    merged_df[columns_to_fill] = merged_df[columns_to_fill].fillna(0)
+    # complement fill aggregate_sector and display_name
+    merged_df['aggregate_sector_b'].fillna(
+        merged_df['aggregate_sector_p'], inplace=True)
+    merged_df["display_name_b"].fillna(merged_df.display_name_p, inplace=True)
+    merged_df.rename(columns={'aggregate_sector_b': 'aggregate_sector',
+                              'display_name_b': 'display_name',
+                              }, inplace=True)
+    merged_df.drop(columns=['aggregate_sector_p',
+                   'display_name_p'], inplace=True)
+    # calculate active return
+    merged_df['weighted_return_p'] = merged_df['return_p'] * \
+        merged_df['prev_weight_p']
+    merged_df['weighted_return_b'] = merged_df['return_b'] * \
+        merged_df['prev_weight_b']
+    merged_df['active_return'] = merged_df['weighted_return_p'] - \
+        merged_df['weighted_return_b']
+    # allocation, interaction, selection and nominal active return
+    merged_df['allocation'] = (
+        merged_df.prev_weight_p - merged_df.prev_weight_b) * merged_df.return_b
+    merged_df['interaction'] = (merged_df.return_p - merged_df.return_b) \
+        * (merged_df.prev_weight_p - merged_df.prev_weight_b)
+    merged_df['selection'] = (
+        merged_df.return_p - merged_df.return_b) * merged_df.prev_weight_b
+    merged_df['notional_active_return'] = merged_df['allocation'] + \
+        merged_df['interaction'] + merged_df['selection']
+    return merged_df
+def _merge_anlaytic_df(portfolio_df, benchmark_df):
+    pass
+def aggregate_analytic_df_by_period(df, freq):
+    '''
+    return an aggregated analytic_df with weekly, monthly, yearly or daily frequency
+    each ticker will have 1 rows for each period,
+    cash is the value at the end of the period.
+    shares is the # of shares at end of the period.
+    prev_weight is the weight of that ticker entry at end of previous period.
+    log_return is sum of log_return within the period.
+    weight is the weight of that ticker entry at end of the period.
+    return is from last of previous period to last of current period.
+    Parameters
+    ----------
+    df : pd.DataFrame
+        analytic_df, dateframe of stock price has weight, log_return information
+    freq : str
+        weekly: 'W-MON' start on tuesday end on monday,
+        monthly: 'M',
+        yearly: 'Y',
+        daily: "D"
+    Returns
+    -------
+    pd.DataFrame
+        aggregated analytic_df with weekly, monthly, yearly or daily frequency
+    '''
+    # create prev_weight
+    df.sort_values(by=['time'], inplace=True)
+    grouped = df.groupby('ticker')
+    df['prev_weight'] = grouped['weight'].shift(1)
+    # aggregate by summing log return and keep the first prev_weight
+    df['period'] = df.time.dt.to_period(freq)
+    grouped = df.groupby(['period', 'ticker'])
+    agg_rules = {'display_name': 'first',
+                 'aggregate_sector': 'first',
+                 'prev_weight': 'first',
+                 'log_return': 'sum',
+                 'weight': 'last'
+                 }
+    # handle aggregate on benchamrk
+    if 'cash' in df.columns and 'shares' in df.columns:
+        agg_rules['cash'] = 'last'
+        agg_rules['shares'] = 'last'
+    # aggregation
+    agg_df = grouped.agg(agg_rules)
+    # calculate return by convert sum log return to percentage return
+    agg_df['return'] = np.exp(agg_df.log_return) - 1
+    # make it a one dimensional dataframe
+    agg_df.reset_index(inplace=True)
+    return agg_df
+def aggregate_bhb_df(df, by="total"):
+    keys = ['period', 'aggregate_sector'] if by == 'sector' else ['period']
+    agg_df = df.groupby(keys)[['active_return',
+                               'allocation',
+                               'interaction',
+                               'selection',
+                               'notional_active_return']].sum()
+    return agg_df

testing_pipeline.ipynb CHANGED Viewed

The diff for this file is too large to render. See raw diff