{ "cells": [ { "cell_type": "code", "execution_count": 36, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import math\n", "from datetime import datetime\n", "import hvplot.pandas\n", "import math\n", "import numpy as np\n", "# load data\n", "profile_df = pd.read_pickle('../data/portfolio_portfile.pkl')\n", "benchmark_df = pd.read_pickle('../data/benchmark_portfolio.pkl')\n", "portfolio_df = pd.read_pickle('../data/portfolio_data.pkl')" ] }, { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "## This section transfrom benchmark_df and creat an benchmark profile to accomadate current pipeline" ] }, { "cell_type": "code", "execution_count": 37, "metadata": {}, "outputs": [], "source": [ "# drop weight\n", "# benchmark_df = benchmark_df.drop(columns=['weight'])\n", "\n", "## simulate update potfolio weigth at 2021-01-10\n", "update_profile_df = profile_df.copy()\n", "update_profile_df['date'] = datetime(2021,1,10)\n", "update_profile_df['weight'] = [50,100,200,300,400,500]\n", "profile_df = pd.concat([profile_df, update_profile_df])\n" ] }, { "cell_type": "code", "execution_count": 38, "metadata": {}, "outputs": [], "source": [ "## create a profile for benchmark\n", "b_profile = benchmark_df.drop_duplicates(subset=['ticker', 'actual_data'])\n", "# df_unique[df_unique.ticker == \"000008.XSHE\"]\n", "# only keep ticker\tdate\tweight\tdisplay_name\tname\tsector\taggregate_sector column\n", "b_profile = b_profile[['ticker','date','weight','name','sector','aggregate_sector','display_name_x']]\n", "b_profile.rename(columns={'display_name_x': 'display_name'}, inplace=True)" ] }, { "cell_type": "code", "execution_count": 39, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
datetickeropenclosehighlowvolumemoney
02021-01-05000008.XSHE2.522.572.672.4933215803.085358605.99
\n", "
" ], "text/plain": [ " date ticker open close high low volume money\n", "0 2021-01-05 000008.XSHE 2.52 2.57 2.67 2.49 33215803.0 85358605.99" ] }, "execution_count": 39, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# drop weight in benchmark\n", "benchmark_df = benchmark_df.drop(columns=['weight'])\n", "benchmark_df = benchmark_df.drop(columns=['display_name_x'])\n", "# drop display_name_y\n", "benchmark_df = benchmark_df.drop(columns=['display_name_y'])\n", "# drop actual_data\n", "benchmark_df = benchmark_df.drop(columns=['actual_data'])\n", "# drop name\n", "benchmark_df = benchmark_df.drop(columns=['name'])\n", "# drop aggregate_sector\n", "benchmark_df = benchmark_df.drop(columns=['aggregate_sector'])\n", "# drop sector\n", "benchmark_df = benchmark_df.drop(columns=['sector'])\n", "benchmark_df.head(1)" ] }, { "cell_type": "code", "execution_count": 40, "metadata": {}, "outputs": [], "source": [ "for col_name in benchmark_df.columns:\n", " if (col_name not in portfolio_df.columns):\n", " print(f'portfolio does not have {col_name}')\n", " \n", "\n", "for col_name in portfolio_df.columns:\n", " if (col_name not in benchmark_df.columns):\n", " print(f'benchmark does not have {col_name}')" ] }, { "cell_type": "code", "execution_count": 41, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "True\n", "True\n", "True\n", "True\n", "True\n", "True\n", "True\n", "True\n", "True\n", "True\n", "True\n", "True\n", "True\n", "True\n" ] } ], "source": [ "for col_name in b_profile.columns:\n", " print(col_name in profile_df.columns)\n", "\n", "for col_name in profile_df.columns:\n", " print(col_name in b_profile.columns)" ] }, { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "## calculate result for each individual stock this part should return a table" ] }, { "cell_type": "code", "execution_count": 42, "metadata": {}, "outputs": [], "source": [ "def get_processing_result_of_stocks_df(stock_df, profile_df):\n", " ## add sector_name display_name name\n", " ticker_sector_map = dict(zip(profile_df['ticker'], profile_df['aggregate_sector']))\n", " ticker_display_name_map = dict(zip(profile_df['ticker'], profile_df['display_name']))\n", " ticker_name_map = dict(zip(profile_df['ticker'], profile_df['name']))\n", "\n", " stock_df['display_name'] = stock_df['ticker'].map(ticker_display_name_map)\n", " stock_df['name'] = stock_df['ticker'].map(ticker_name_map)\n", " stock_df['aggregate_sector'] = stock_df['ticker'].map(ticker_sector_map)\n", "\n", " ## calculate pct using closing price\n", " stock_df.sort_values(by=['date'], inplace=True)\n", " stock_df['pct'] = stock_df.groupby('ticker')['close'].pct_change()\n", "\n", " ## calculate weight TODO: think about how to optimize this\n", " stock_df = stock_df.merge(profile_df[['weight', 'date', 'ticker']], on=['ticker', 'date'], how='outer')\n", " stock_df.rename(columns={'weight': 'initial_weight'}, inplace=True)\n", " stock_df['current_weight'] = float('nan')\n", " stock_df['previous_weight'] = float('nan')\n", " df_grouped = stock_df.groupby('ticker')\n", " for _, group in df_grouped:\n", " pre_w = float('nan')\n", " ini_w = float('nan')\n", " for index, row in group.iterrows():\n", " cur_w = float('nan')\n", "\n", " # if has initial weight, the following row all use this initial weight\n", " if not pd.isna(row['initial_weight']):\n", " ini_w = row['initial_weight']\n", " cur_w = ini_w\n", "\n", " # just calculate current weight based on previous weight\n", " else:\n", " cur_w = pre_w * (1 + row['pct'])\n", "\n", " stock_df.loc[index, 'current_weight'] = cur_w \n", " stock_df.loc[index, 'previous_weight'] = pre_w\n", " stock_df.loc[index, 'initial_weight'] = ini_w\n", " pre_w = cur_w\n", "\n", " stock_df.rename(columns={'weight': 'initial_weight'}, inplace=True)\n", " stock_df.dropna(subset=['close'], inplace=True)\n", "\n", " ## normalize weight\n", " stock_df['prev_w_in_p'] = stock_df['previous_weight'] / \\\n", " stock_df.groupby('date')['previous_weight'].transform('sum')\n", "\n", " stock_df['ini_w_in_p'] = stock_df['initial_weight'] / \\\n", " stock_df.groupby('date')['initial_weight'].transform('sum')\n", "\n", " ## calculate weighted pct in portfolio\n", " stock_df['portfolio_pct'] = stock_df['pct'] * stock_df['prev_w_in_p']\n", "\n", " ## calculate weight in sector TODO: remove\n", " stock_df['prev_w_in_sectore'] = stock_df['previous_weight'] / \\\n", " stock_df.groupby(['date', 'aggregate_sector'])['previous_weight'].transform('sum')\n", " stock_df['ini_w_in_sector'] = stock_df['initial_weight'] / \\\n", " stock_df.groupby(['date', 'aggregate_sector'])['initial_weight'].transform('sum')\n", " ## weighted pct in sector TODO: remove\n", " stock_df['sector_pct'] = stock_df['pct'] * stock_df['prev_w_in_sectore']\n", "\n", " ## portfolio return\n", " stock_df['cum_p_pct'] = stock_df.groupby('ticker')['portfolio_pct'].cumsum()\n", " stock_df['portfolio_return'] = np.exp(stock_df['cum_p_pct']) -1 \n", " # drop intermediate columns\n", " stock_df = stock_df.drop(columns=['cum_p_pct'])\n", "\n", "\n", " ## sector return TODO:remove \n", " # stock_df['sector_return'] = stock_df['ini_w_in_sector'] * stock_df['return']\n", "\n", " return stock_df\n" ] }, { "cell_type": "code", "execution_count": 43, "metadata": {}, "outputs": [], "source": [ "# portfolio_stock = get_processing_result_of_stocks_df(portfolio_df, profile_df)\n", "# portfolio_stock[portfolio_stock.ticker == '002709.XSHE'][['date','portfolio_pct','prev_w_in_p','portfolio_return']]\n", "# # benchmark_stock = get_processing_result_of_stocks_df(benchmark_df, b_profile)" ] }, { "cell_type": "code", "execution_count": 44, "metadata": {}, "outputs": [], "source": [ "# profile_df.groupby('date')['weight'].sum()" ] }, { "cell_type": "code", "execution_count": 45, "metadata": {}, "outputs": [], "source": [ "## total return by date\n", "def get_portfolio_evaluation(portfolio_stock, benchmark_stock, profile_df):\n", " # add pct of benchmark \n", " merged_df = portfolio_stock.merge(benchmark_stock[['ticker','date','portfolio_pct','portfolio_return']], \n", " on=['ticker','date'],how='left',suffixes=('_p','_b'))\n", "\n", " # sum up pct and return from portfolio and benchmark\n", " merged_df = merged_df.groupby('date',as_index=False).agg({'portfolio_return_p':'sum',\n", " 'portfolio_return_b':'sum',\n", " 'portfolio_pct_p':'sum',\n", " 'portfolio_pct_b':'sum'})\n", "\n", " # portfolio mkt cap\n", " mkt_adjustment = pd.DataFrame(profile_df.groupby('date')['weight'].sum())\n", " mkt_adjustment.rename(columns={'weight':'mkt_cap'}, inplace=True)\n", " merged_df = merged_df.merge(mkt_adjustment, on=['date'], how='outer')\n", "\n", " for i in range(len(merged_df)):\n", " if pd.isna(merged_df.loc[i, 'mkt_cap']):\n", " merged_df.loc[i, 'mkt_cap'] = merged_df.loc[i-1, 'mkt_cap'] * (1 + merged_df.loc[i, 'portfolio_pct_p'])\n", " # drop where portfolio_return_p is nan\n", " merged_df.dropna(subset=['portfolio_return_p'], inplace=True)\n", " # portfolio pnl TODO seem I can just use current wegith to do this\n", " merged_df['prev_mkt_cap'] = merged_df['mkt_cap'].shift(1)\n", " merged_df['pnl'] = merged_df['prev_mkt_cap'] * merged_df['portfolio_pct_p']\n", "\n", " # risk std(pct)\n", " merged_df['risk'] = merged_df['portfolio_pct_p'].rolling(len(merged_df), min_periods=1).std() * math.sqrt(252)\n", "\n", " # active return\n", " merged_df['active_return'] = merged_df['portfolio_pct_p'] - merged_df['portfolio_pct_b']\n", "\n", " # tracking errro std(active return)\n", " merged_df['tracking_error'] = merged_df['active_return'].rolling(len(merged_df), min_periods=1).std() * math.sqrt(252)\n", "\n", " # cum pnl\n", " merged_df['cum_pnl'] = merged_df['pnl'].cumsum()\n", "\n", " return merged_df\n" ] }, { "cell_type": "code", "execution_count": 46, "metadata": {}, "outputs": [], "source": [ "# portfolio_eval_df = get_portfolio_evaluation(portfolio_stock, benchmark_stock, profile_df)" ] }, { "cell_type": "code", "execution_count": 47, "metadata": {}, "outputs": [], "source": [ "# portfolio_stock.columns" ] }, { "cell_type": "code", "execution_count": 48, "metadata": {}, "outputs": [], "source": [ "## TODO convert below to funciton\n", "\n", "def get_portfolio_sector_evaluation(portfolio_stock,benchmark_df):\n", "# aggregate on sector and day\n", " p_sector_df = portfolio_stock.groupby(['date','aggregate_sector'], as_index=False)\\\n", " .agg({'prev_w_in_p': 'sum','ini_w_in_p':\"sum\",\"current_weight\":'sum',\\\n", " \"portfolio_pct\":\"sum\", \"portfolio_return\":\"sum\"})\n", " # TODO shrink it down before aggregate\n", " b_sector_df = benchmark_df.groupby(['date','aggregate_sector'], as_index=False)\\\n", " .agg({'prev_w_in_p': 'sum','ini_w_in_p':\"sum\",\"current_weight\":'sum',\\\n", " \"portfolio_pct\":\"sum\", \"portfolio_return\":\"sum\"})\n", " \n", " # merge portfolio and benchmark\n", " merge_df = p_sector_df.merge(b_sector_df, on=['date','aggregate_sector'], how='left', suffixes=('_p','_b'))\n", "\n", " return merge_df" ] }, { "cell_type": "code", "execution_count": 49, "metadata": {}, "outputs": [], "source": [ "# sector_eval_df = get_portfolio_sector_evaluation(portfolio_stock, benchmark_stock)\n", "# sector_eval_df[sector_eval_df.date == datetime(2021, 10,13)].hvplot.bar(x='aggregate_sector', y=['portfolio_pct_p','portfolio_pct_b'], stacked=True, rot=90, title='sector pct')" ] }, { "cell_type": "code", "execution_count": 50, "metadata": {}, "outputs": [], "source": [ "def merge_on_date(portfolio_stock, benchmark_df):\n", " p_selected = portfolio_stock.reset_index()[['ini_w_in_p', 'portfolio_return', 'date', 'ticker', 'display_name']]\n", " b_selected = benchmark_df.reset_index()[['ini_w_in_p', 'portfolio_return', 'date', 'ticker']]\n", " merged_stock_df = pd.merge(p_selected, b_selected, on=['date', 'ticker'], how='outer', suffixes=('_p', '_b'))\n", " return merged_stock_df" ] }, { "cell_type": "code", "execution_count": 51, "metadata": {}, "outputs": [], "source": [ "# merged_df = merge_on_date(portfolio_stock, benchmark_stock)" ] }, { "cell_type": "code", "execution_count": 52, "metadata": {}, "outputs": [], "source": [ "def get_bhb_result(merged_stock_df):\n", " # merged_stock_df['ini_w_in_p_p'].fillna(0, inplace=True)\n", " # merged_stock_df['ini_w_in_p_b'].fillna(0, inplace=True)\n", " # merged_stock_df['portfolio_return_b'].fillna(0, inplace=True)\n", " # merged_stock_df['portfolio_return_p'].fillna(0, inplace=True)\n", " # allocation\n", " merged_stock_df['allocation'] = (merged_stock_df['ini_w_in_p_p'] - merged_stock_df['ini_w_in_p_b']) \\\n", " * merged_stock_df['portfolio_return_b']\n", "\n", " # selection\n", " merged_stock_df['selection'] = merged_stock_df['ini_w_in_p_b'] * \\\n", " (merged_stock_df['portfolio_return_p'] - merged_stock_df['portfolio_return_b'])\n", "\n", " # interaction\n", " merged_stock_df['interaction'] = (merged_stock_df['ini_w_in_p_p'] - merged_stock_df['ini_w_in_p_b']) * \\\n", " (merged_stock_df['portfolio_return_p'] - merged_stock_df['portfolio_return_b'])\n", "\n", " # excess\n", " merged_stock_df['excess'] = merged_stock_df['portfolio_return_p'] - merged_stock_df['portfolio_return_b']\n", "\n", " # replace inf with nan\n", " merged_stock_df.replace([np.inf, -np.inf], np.nan, inplace=True)\n", " return merged_stock_df" ] }, { "cell_type": "code", "execution_count": 53, "metadata": {}, "outputs": [], "source": [ "# test ing pipeline here " ] }, { "cell_type": "code", "execution_count": 54, "metadata": {}, "outputs": [], "source": [ "portfolio_stock = get_processing_result_of_stocks_df(portfolio_df, profile_df)\n", "benchmark_stock = get_processing_result_of_stocks_df(benchmark_df, b_profile)\n", "\n", "portfolio_eval_df = get_portfolio_evaluation(portfolio_stock, benchmark_stock, profile_df)\n", "sector_eval_df = get_portfolio_sector_evaluation(portfolio_stock, benchmark_stock)\n", "merged_df = merge_on_date(portfolio_stock, benchmark_stock)\n", "bnb_sector_result = get_bhb_result(sector_eval_df)\n", "bnb_stock_result = get_bhb_result(merged_df)\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# save result \n", "portfolio_eval_df.to_pickle('../data/portfolio_eval_df.pkl')\n", "sector_eval_df.to_pickle('../data/sector_eval_df.pkl')\n", "# merged_df.to_csv('merged_df.csv')\n", "bnb_sector_result.to_pickle('../data/bnb_sector_result.pkl')\n", "bnb_stock_result.to_pickle('../data/bnb_stock_result.pkl')\n", "profile_df.to_pickle('../data/protfolio_profile.pkl')\n", "b_profile.to_pickle('../data/benchmark_profile.pkl')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# bnb_sector_result[bnb_sector_result.date == datetime(2021, 10,13)].hvplot.bar(x='aggregate_sector', y=['allocation','selection','interaction'], stacked=True, rot=90, title='sector allocation')" ] } ], "metadata": { "kernelspec": { "display_name": "portfolio_risk_assesment", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.4" }, "orig_nbformat": 4 }, "nbformat": 4, "nbformat_minor": 2 }