{ "cells": [ { "cell_type": "code", "execution_count": 142, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import math\n", "from datetime import datetime\n", "import hvplot.pandas\n", "import math\n", "# load data\n", "profile_df = pd.read_pickle('../data/portfolio_portfile.pkl')\n", "benchmark_df = pd.read_pickle('../data/benchmark_portfolio.pkl')\n", "portfolio_df = pd.read_pickle('../data/portfolio_data.pkl')" ] }, { "cell_type": "code", "execution_count": 143, "metadata": {}, "outputs": [], "source": [ "# to acoomodate the current pipe line\n", "min_dates = benchmark_df.groupby('ticker')['date'].min()\n", "\n", "for ticker, min_date in min_dates.items():\n", " benchmark_df.loc[(benchmark_df['ticker'] == ticker) & (benchmark_df['date'] != min_date), 'weight'] = float('nan')\n", "\n", "benchmark_df['initial_weight'] = benchmark_df['weight']\n", "# drop weight\n", "benchmark_df = benchmark_df.drop(columns=['weight'])" ] }, { "cell_type": "code", "execution_count": 144, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Series([], Name: initial_weight, dtype: int64)" ] }, "execution_count": 144, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# check if all unique ticker has an weight\n", "count_list = benchmark_df.groupby('ticker')['initial_weight'].count().sort_values(ascending=False)\n", "count_list[count_list != 1]\n" ] }, { "cell_type": "code", "execution_count": 145, "metadata": {}, "outputs": [], "source": [ "update_profile_df = profile_df.copy()\n", "update_profile_df['date'] = datetime(2021,1,10)\n", "update_profile_df['weight'] = [50,100,200,300,400,500]\n", "profile_df = pd.concat([profile_df, update_profile_df])\n" ] }, { "cell_type": "code", "execution_count": 146, "metadata": {}, "outputs": [], "source": [ "def calculate_pct(stock_df):\n", " stock_df['pct'] = stock_df.groupby(['ticker'])['close'].pct_change()\n" ] }, { "cell_type": "code", "execution_count": 147, "metadata": {}, "outputs": [], "source": [ "# step 1 pct\n", "calculate_pct(portfolio_df)\n", "calculate_pct(benchmark_df)" ] }, { "cell_type": "code", "execution_count": 148, "metadata": {}, "outputs": [], "source": [ "def return_weighted_stock_df(stock_price_df, profile_df=None):\n", " # TODO change later this a temporary solution\n", " # initialize weight if profile_df is not none\n", " merged_df = pd.DataFrame()\n", " if profile_df is not None:\n", " merged_df = stock_price_df.merge(profile_df[['weight', 'date', 'ticker']], on=['ticker', 'date'], how='outer')\n", " merged_df.sort_values(by=['date'], inplace=True)\n", " merged_df.rename(columns={'weight': 'initial_weight'}, inplace=True)\n", " else:\n", " merged_df = stock_price_df.copy()\n", " merged_df['current_weight'] = float('nan')\n", " merged_df['previous_weight'] = float('nan')\n", " df_grouped = merged_df.groupby('ticker')\n", " for _, group in df_grouped:\n", " pre_w = float('nan')\n", " ini_w = float('nan')\n", " for index, row in group.iterrows():\n", " cur_w = float('nan')\n", " # if has initial weight, the following row all use this initial weight\n", " if not pd.isna(row['initial_weight']):\n", " ini_w = row['initial_weight']\n", " cur_w = ini_w\n", " # just calculate current weight based on previous weight\n", " else:\n", " cur_w = pre_w * (1 + row['pct'])\n", "\n", " merged_df.loc[index, 'current_weight'] = cur_w \n", " merged_df.loc[index, 'previous_weight'] = pre_w\n", " merged_df.loc[index, 'initial_weight'] = ini_w\n", " pre_w = cur_w\n", " \n", " # drop row where closing price is none\n", " merged_df = merged_df[~pd.isna(merged_df['close'])]\n", " # drop index\n", " return merged_df" ] }, { "cell_type": "code", "execution_count": 149, "metadata": {}, "outputs": [], "source": [ "# TODO consider save the weight calculation\n", "portfolio_df = return_weighted_stock_df(portfolio_df, profile_df)\n", "benchmark_df = return_weighted_stock_df(benchmark_df)" ] }, { "cell_type": "code", "execution_count": 150, "metadata": {}, "outputs": [], "source": [ "# benchmark_df[benchmark_df.ticker =='000008.XSHE']" ] }, { "cell_type": "code", "execution_count": 151, "metadata": {}, "outputs": [], "source": [ "## normalize all weight\n", "def normalize_weight(stock_df):\n", " stock_df['current_weight'] = stock_df['current_weight'] / \\\n", " stock_df.groupby('date')['current_weight'].transform('sum')\n", "\n", " stock_df['previous_weight'] = stock_df['previous_weight'] / \\\n", " stock_df.groupby('date')['previous_weight'].transform('sum')\n", "\n", " stock_df['initial_weight'] = stock_df['initial_weight'] / \\\n", " stock_df.groupby('date')['initial_weight'].transform('sum')\n" ] }, { "cell_type": "code", "execution_count": 152, "metadata": {}, "outputs": [], "source": [ "normalize_weight(portfolio_df)\n", "normalize_weight(benchmark_df)" ] }, { "cell_type": "code", "execution_count": 153, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "260 0.032258\n", "258 0.064516\n", "262 0.129032\n", "263 0.258065\n", "259 0.322581\n", "261 0.193548\n", "Name: initial_weight, dtype: float64" ] }, "execution_count": 153, "metadata": {}, "output_type": "execute_result" } ], "source": [ "portfolio_df[portfolio_df.date == datetime(2021, 3, 12)]['initial_weight']" ] }, { "cell_type": "code", "execution_count": 154, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "1.0\n", "1.0\n", "1.0\n" ] } ], "source": [ "print(benchmark_df[benchmark_df.date == datetime(2021, 3, 12)]['initial_weight'].sum())\n", "print(benchmark_df[benchmark_df.date == datetime(2021, 3, 12)]['current_weight'].sum())\n", "print(benchmark_df[benchmark_df.date == datetime(2021, 3, 12)]['previous_weight'].sum())" ] }, { "cell_type": "code", "execution_count": 155, "metadata": {}, "outputs": [], "source": [ "# step 3 sector wegiht\n", "\n", "# add sector information first\n", "def create_sector_weight(stock_df, profile_df=None):\n", " # if profile_df is none assume the aggregate_sector stock info already in stock_df\n", " merged_df = None\n", " if profile_df is not None:\n", " merged_df = stock_df.merge(profile_df[['ticker', 'aggregate_sector']], on='ticker', how='left')\n", " else:\n", " merged_df = stock_df.copy()\n", " # set null to others\n", " merged_df['aggregate_sector'] = merged_df['aggregate_sector'].fillna('其他')\n", " # calculate previous_sector_weight\n", " merged_df['previous_sector_weight'] = merged_df['previous_weight'] / \\\n", " merged_df.groupby(['date', 'aggregate_sector'])['previous_weight'].transform('sum')\n", " # calculate initial sectore weight\n", " merged_df['initial_sector_weight'] = merged_df['initial_weight'] / \\\n", " merged_df.groupby(['date', 'aggregate_sector'])['initial_weight'].transform('sum')\n", " \n", " return merged_df" ] }, { "cell_type": "code", "execution_count": 156, "metadata": {}, "outputs": [], "source": [ "portfolio_df = create_sector_weight(stock_df = portfolio_df, profile_df = profile_df)\n", "benchmark_df = create_sector_weight(benchmark_df)\n" ] }, { "cell_type": "code", "execution_count": 157, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "aggregate_sector\n", "信息与通信 1.0\n", "公用事业 1.0\n", "其他 1.0\n", "医药卫生 1.0\n", "原料与能源 1.0\n", "工业 1.0\n", "消费 1.0\n", "金融与地产 1.0\n", "Name: previous_sector_weight, dtype: float64\n", "aggregate_sector\n", "信息与通信 1.0\n", "公用事业 1.0\n", "其他 1.0\n", "医药卫生 1.0\n", "原料与能源 1.0\n", "工业 1.0\n", "消费 1.0\n", "金融与地产 1.0\n", "Name: initial_sector_weight, dtype: float64\n" ] } ], "source": [ "# check result \n", "print(benchmark_df[benchmark_df.date == datetime(2021, 3, 12)].groupby('aggregate_sector')['previous_sector_weight'].sum())\n", "print(benchmark_df[benchmark_df.date == datetime(2021, 3, 12)].groupby('aggregate_sector')['initial_sector_weight'].sum())\n" ] }, { "cell_type": "code", "execution_count": 158, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "aggregate_sector\n", "信息与通信 1.0\n", "医药卫生 1.0\n", "原料与能源 1.0\n", "工业 1.0\n", "消费 1.0\n", "Name: previous_sector_weight, dtype: float64\n", "aggregate_sector\n", "信息与通信 1.0\n", "医药卫生 1.0\n", "原料与能源 1.0\n", "工业 1.0\n", "消费 1.0\n", "Name: initial_sector_weight, dtype: float64\n" ] } ], "source": [ "# check result \n", "print(portfolio_df[portfolio_df.date == datetime(2021, 3, 12)].groupby('aggregate_sector')['previous_sector_weight'].sum())\n", "print(portfolio_df[portfolio_df.date == datetime(2021, 3, 12)].groupby('aggregate_sector')['initial_sector_weight'].sum())\n" ] }, { "cell_type": "code", "execution_count": 159, "metadata": {}, "outputs": [], "source": [ "## return define as the total return since the portfolio created\n", "def calcualte_return(stock_df):\n", " stock_df['return'] = stock_df['close'] / stock_df.groupby(['ticker'])['close'].transform('first') - 1" ] }, { "cell_type": "code", "execution_count": 160, "metadata": {}, "outputs": [], "source": [ "calcualte_return(portfolio_df)\n", "calcualte_return(benchmark_df)" ] }, { "cell_type": "code", "execution_count": 161, "metadata": {}, "outputs": [], "source": [ "def calculate_weighted_sector_return(stock_df):\n", " stock_df['weighted_sectore_return'] = stock_df['return'] * stock_df['initial_sector_weight']" ] }, { "cell_type": "code", "execution_count": 162, "metadata": {}, "outputs": [], "source": [ "calculate_weighted_sector_return(portfolio_df)\n", "calculate_weighted_sector_return(benchmark_df)" ] }, { "cell_type": "code", "execution_count": 163, "metadata": {}, "outputs": [], "source": [ "## weighted return and sector weighred return \n", "def calculate_weighted_return(stock_df):\n", " stock_df['weighted_return'] = stock_df['return'] * stock_df['initial_weight']" ] }, { "cell_type": "code", "execution_count": 164, "metadata": {}, "outputs": [], "source": [ "# step\n", "calculate_weighted_return(portfolio_df)\n", "calculate_weighted_return(benchmark_df)" ] }, { "cell_type": "code", "execution_count": 165, "metadata": {}, "outputs": [], "source": [ "def calculate_weighted_sector_return(stock_df):\n", " stock_df['weighted_sector_return'] = stock_df['return'] * stock_df['initial_sector_weight']" ] }, { "cell_type": "code", "execution_count": 166, "metadata": {}, "outputs": [], "source": [ "calculate_weighted_sector_return(portfolio_df)\n", "calculate_weighted_sector_return(benchmark_df)" ] }, { "cell_type": "code", "execution_count": 167, "metadata": {}, "outputs": [], "source": [ "## calcualte weighted pc\n", "def calculate_weighted_pct(stock_df):\n", " stock_df['weighted_pct'] = stock_df['pct'] * stock_df['previous_weight']\n", "\n" ] }, { "cell_type": "code", "execution_count": 168, "metadata": {}, "outputs": [], "source": [ "def calculate_weighted_sector_pct(stock_df):\n", " stock_df['weighted_sector_pct'] = stock_df['pct'] * stock_df['previous_sector_weight']\n", " " ] }, { "cell_type": "code", "execution_count": 169, "metadata": {}, "outputs": [], "source": [ "calculate_weighted_sector_pct(portfolio_df)\n", "calculate_weighted_sector_pct(benchmark_df)" ] }, { "cell_type": "code", "execution_count": 170, "metadata": {}, "outputs": [], "source": [ "calculate_weighted_pct(portfolio_df)\n", "calculate_weighted_pct(benchmark_df)" ] }, { "cell_type": "code", "execution_count": 171, "metadata": {}, "outputs": [], "source": [ "calculate_weighted_sector_return(portfolio_df)\n", "calculate_weighted_sector_return(benchmark_df)" ] }, { "cell_type": "code", "execution_count": 172, "metadata": {}, "outputs": [], "source": [ "## aggregate by date\n", "\n", "# pct and weighted_return\n", "# def agg_by_date(stock_df)\n", "def agg_by_date(stock_df):\n", " agg_on_date_df = pd.DataFrame(stock_df.groupby('date')[['weighted_return','weighted_pct']].sum())\n", " agg_on_date_df.rename(columns={'weighted_return': 'return', 'weighted_pct': 'pct'}, inplace=True)\n", " return agg_on_date_df\n", "\n", "\n" ] }, { "cell_type": "code", "execution_count": 173, "metadata": {}, "outputs": [], "source": [ "p_total_view = agg_by_date(portfolio_df)\n", "b_total_view = agg_by_date(benchmark_df)" ] }, { "cell_type": "code", "execution_count": 174, "metadata": {}, "outputs": [], "source": [ "## aggregate by sector\n", "def agg_by_sector(stock_df):\n", " agg_on_sector_df = pd.DataFrame(stock_df.groupby(['aggregate_sector','date'])[['weighted_sector_return','weighted_sector_pct']].sum())\n", " agg_on_sector_df.rename(columns={'weighted_sector_return': 'return', 'weighted_sector_pct': 'pct'}, inplace=True)\n", " return agg_on_sector_df" ] }, { "cell_type": "code", "execution_count": 175, "metadata": {}, "outputs": [], "source": [ "p_sector_view = agg_by_sector(portfolio_df)\n", "b_sector_view = agg_by_sector(benchmark_df)" ] }, { "cell_type": "code", "execution_count": 200, "metadata": {}, "outputs": [], "source": [ "def create_risk_table(portfolio_summary, benchmark_summary):\n", " # total risk tracking error \n", " merged_df = pd.merge(portfolio_summary, benchmark_summary, on='date', how='outer', suffixes=('_p', '_b'))\n", " merged_df['risk_p'] = merged_df['return_p'].expanding().std() * math.sqrt(252)\n", " merged_df['risk_b'] = merged_df['return_b'].expanding().std() * math.sqrt(252)\n", " merged_df['active_return'] = merged_df['return_p'] - merged_df['return_b']\n", " merged_df['tracking_error'] = merged_df['active_return'].expanding().std() * math.sqrt(252)\n", " merged_df['date'] = merged_df.index\n", " # drop index\n", " merged_df.reset_index(drop=True, inplace=True)\n", " return merged_df" ] }, { "cell_type": "code", "execution_count": 201, "metadata": {}, "outputs": [], "source": [ "portfolio_risk_by_date_df = create_risk_table(p_total_view, b_total_view)" ] }, { "cell_type": "code", "execution_count": 202, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
return_ppct_preturn_bpct_brisk_prisk_bactive_returntracking_errordate
00.0000000.0000000.0000000.000000NaNNaN0.000000NaN2021-01-05
10.0121460.012146-0.001934-0.0019340.1363410.0217050.0140800.1580462021-01-06
20.0868300.074233-0.0008110.0011250.7464020.0154140.0876410.7471272021-01-07
30.0894350.0024960.0025350.0033490.7563820.0301370.0869000.7409792021-01-08
40.1480630.029363-0.013015-0.0155110.9709840.0956540.1610781.0324232021-01-11
..............................
2420.028005-0.0710810.0868270.0001562.0976310.886298-0.0588221.8562132022-01-04
243-0.033053-0.0595820.067931-0.0173862.0990520.884891-0.1009841.8613472022-01-05
244-0.042238-0.0081120.0695220.0014902.1011180.883542-0.1117611.8674452022-01-06
245-0.073118-0.0310150.062056-0.0069812.1057600.881986-0.1351741.8759592022-01-07
246-0.0299440.0443000.0645880.0023842.1067490.880502-0.0945321.8800602022-01-10
\n", "

247 rows × 9 columns

\n", "
" ], "text/plain": [ " return_p pct_p return_b pct_b risk_p risk_b \\\n", "0 0.000000 0.000000 0.000000 0.000000 NaN NaN \n", "1 0.012146 0.012146 -0.001934 -0.001934 0.136341 0.021705 \n", "2 0.086830 0.074233 -0.000811 0.001125 0.746402 0.015414 \n", "3 0.089435 0.002496 0.002535 0.003349 0.756382 0.030137 \n", "4 0.148063 0.029363 -0.013015 -0.015511 0.970984 0.095654 \n", ".. ... ... ... ... ... ... \n", "242 0.028005 -0.071081 0.086827 0.000156 2.097631 0.886298 \n", "243 -0.033053 -0.059582 0.067931 -0.017386 2.099052 0.884891 \n", "244 -0.042238 -0.008112 0.069522 0.001490 2.101118 0.883542 \n", "245 -0.073118 -0.031015 0.062056 -0.006981 2.105760 0.881986 \n", "246 -0.029944 0.044300 0.064588 0.002384 2.106749 0.880502 \n", "\n", " active_return tracking_error date \n", "0 0.000000 NaN 2021-01-05 \n", "1 0.014080 0.158046 2021-01-06 \n", "2 0.087641 0.747127 2021-01-07 \n", "3 0.086900 0.740979 2021-01-08 \n", "4 0.161078 1.032423 2021-01-11 \n", ".. ... ... ... \n", "242 -0.058822 1.856213 2022-01-04 \n", "243 -0.100984 1.861347 2022-01-05 \n", "244 -0.111761 1.867445 2022-01-06 \n", "245 -0.135174 1.875959 2022-01-07 \n", "246 -0.094532 1.880060 2022-01-10 \n", "\n", "[247 rows x 9 columns]" ] }, "execution_count": 202, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# add mkt cap\n", "portfolio_risk_by_date_df" ] }, { "cell_type": "code", "execution_count": 217, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "date\n", "2021-01-05 600\n", "2021-01-10 1550\n", "Name: weight, dtype: int64" ] }, "execution_count": 217, "metadata": {}, "output_type": "execute_result" } ], "source": [ "profile_df.groupby('date')['weight'].sum()\n", "\n", "# for i in range(1, len(portfolio_risk_by_date_df)):\n", "# cur_mkt = portfolio_risk_by_date_df.loc[i, 'mkt_cap']\n", "# if pd.isna(cur_mkt):\n", "# portfolio_risk_by_date_df.loc[i, 'mkt_cap'] = portfolio_risk_by_date_df.loc[i-1, 'mkt_cap'] * (1 + portfolio_risk_by_date_df.loc[i, 'pct_p'])\n", " " ] }, { "cell_type": "code", "execution_count": 216, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
return_ppct_preturn_bpct_brisk_prisk_bactive_returntracking_errordatemkt_cap
00.00.00.00.0NaNNaN0.0NaN2021-01-05600.0
\n", "
" ], "text/plain": [ " return_p pct_p return_b pct_b risk_p risk_b active_return \\\n", "0 0.0 0.0 0.0 0.0 NaN NaN 0.0 \n", "\n", " tracking_error date mkt_cap \n", "0 NaN 2021-01-05 600.0 " ] }, "execution_count": 216, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# display row where mkt_cap is not nana\n", "portfolio_risk_by_date_df[portfolio_risk_by_date_df['mkt_cap'].notna()]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/var/folders/v5/2108rh5964q9j741wg_s8r1w0000gn/T/ipykernel_23255/2871737262.py:10: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame.\n", "Try using .loc[row_indexer,col_indexer] = value instead\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " pct['weighted_pct'] = pct['pct'] * pct['norm_weight']\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
datereturnpct
02021-01-050.0000000.000000
12021-01-060.0070110.036439
22021-01-070.0475310.218707
32021-01-080.0471110.013639
42021-01-110.0527680.014559
............
2422022-01-040.363845-0.199827
2432022-01-050.306697-0.193598
2442022-01-060.3312910.023418
2452022-01-070.313726-0.080728
2462022-01-100.3132620.110254
\n", "

247 rows × 3 columns

\n", "
" ], "text/plain": [ " date return pct\n", "0 2021-01-05 0.000000 0.000000\n", "1 2021-01-06 0.007011 0.036439\n", "2 2021-01-07 0.047531 0.218707\n", "3 2021-01-08 0.047111 0.013639\n", "4 2021-01-11 0.052768 0.014559\n", ".. ... ... ...\n", "242 2022-01-04 0.363845 -0.199827\n", "243 2022-01-05 0.306697 -0.193598\n", "244 2022-01-06 0.331291 0.023418\n", "245 2022-01-07 0.313726 -0.080728\n", "246 2022-01-10 0.313262 0.110254\n", "\n", "[247 rows x 3 columns]" ] }, "execution_count": 191, "metadata": {}, "output_type": "execute_result" } ], "source": [ "## aggregate by date\n", "# step 7 aggregate (get portfolio return and pct(change of daily return))by date\n", "def create_agg_by_date(stock_df):\n", " # sum up weighted return to get return \n", " agg_return = stock_df.groupby(['date'])['weighted_return'].sum().reset_index()\n", " agg_return.rename(columns={'weighted_return':'return'}, inplace=True)\n", "\n", " # sum up weighted pct to get pct\n", " pct = stock_df[['date','pct','norm_weight','ticker']]\n", " pct['weighted_pct'] = pct['pct'] * pct['norm_weight']\n", " agg_pct = pct.groupby(['date'])['pct'].sum().reset_index()\n", "\n", " agg_df = pd.merge(agg_return, agg_pct, on='date', how='outer')\n", " return agg_df\n", "\n", "\n", "\n", "p_perform_result = create_agg_by_date(p_stock_df)\n", "p_perform_result" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
tickerdateopenclosehighlowvolumemoneypctweightreturnnorm_weightweighted_returnaggregate_sectordisplay_name
1452603882.XSHG2022-01-04106.8998.84106.8998.675140406.05.181929e+08-0.07626279.300385-0.2069960.107586-0.022270医药卫生金域医学
1453002709.XSHE2022-01-0457.6454.6457.8754.2942150916.02.333429e+09-0.028277161.2275010.6122750.2187350.133926工业天赐材料
1454600409.XSHG2022-01-048.168.218.258.1527288613.02.237925e+080.00736285.788924-0.1421110.116389-0.016540原料与能源三友化工
1455002920.XSHE2022-01-04139.71131.69140.91131.455410083.07.233361e+08-0.060833150.9340970.5093410.2047700.104298信息与通信德赛西威
1456300274.XSHE2022-01-04146.52134.96148.46134.6124205007.03.333125e+09-0.071291176.5336820.7653370.2395010.183299工业阳光电源
1457600415.XSHG2022-01-044.804.894.904.7858291943.02.832956e+080.02947483.304940-0.1669510.113019-0.018869消费小商品城
\n", "
" ], "text/plain": [ " ticker date open close high low volume \\\n", "1452 603882.XSHG 2022-01-04 106.89 98.84 106.89 98.67 5140406.0 \n", "1453 002709.XSHE 2022-01-04 57.64 54.64 57.87 54.29 42150916.0 \n", "1454 600409.XSHG 2022-01-04 8.16 8.21 8.25 8.15 27288613.0 \n", "1455 002920.XSHE 2022-01-04 139.71 131.69 140.91 131.45 5410083.0 \n", "1456 300274.XSHE 2022-01-04 146.52 134.96 148.46 134.61 24205007.0 \n", "1457 600415.XSHG 2022-01-04 4.80 4.89 4.90 4.78 58291943.0 \n", "\n", " money pct weight return norm_weight \\\n", "1452 5.181929e+08 -0.076262 79.300385 -0.206996 0.107586 \n", "1453 2.333429e+09 -0.028277 161.227501 0.612275 0.218735 \n", "1454 2.237925e+08 0.007362 85.788924 -0.142111 0.116389 \n", "1455 7.233361e+08 -0.060833 150.934097 0.509341 0.204770 \n", "1456 3.333125e+09 -0.071291 176.533682 0.765337 0.239501 \n", "1457 2.832956e+08 0.029474 83.304940 -0.166951 0.113019 \n", "\n", " weighted_return aggregate_sector display_name \n", "1452 -0.022270 医药卫生 金域医学 \n", "1453 0.133926 工业 天赐材料 \n", "1454 -0.016540 原料与能源 三友化工 \n", "1455 0.104298 信息与通信 德赛西威 \n", "1456 0.183299 工业 阳光电源 \n", "1457 -0.018869 消费 小商品城 " ] }, "execution_count": 194, "metadata": {}, "output_type": "execute_result" } ], "source": [ "p_stock_df[p_stock_df.date==datetime(2022,1,4)]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
datemkt_cap
02021-01-05600
12021-01-101550
\n", "
" ], "text/plain": [ " date mkt_cap\n", "0 2021-01-05 600\n", "1 2021-01-10 1550" ] }, "execution_count": 102, "metadata": {}, "output_type": "execute_result" } ], "source": [ "mkt_cap_df = pd.DataFrame(profile_df.groupby(['date'])['weight'].sum()).reset_index()\n", "mkt_cap_df.rename(columns={'weight':'mkt_cap'}, inplace=True)\n", "mkt_cap_df" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
datereturnpctmkt_cap
02021-01-050.0000000.000000600.000000
12021-01-060.0070110.036439621.863161
22021-01-070.0475310.218707757.869005
32021-01-080.0471110.013639768.205269
\n", "
" ], "text/plain": [ " date return pct mkt_cap\n", "0 2021-01-05 0.000000 0.000000 600.000000\n", "1 2021-01-06 0.007011 0.036439 621.863161\n", "2 2021-01-07 0.047531 0.218707 757.869005\n", "3 2021-01-08 0.047111 0.013639 768.205269" ] }, "execution_count": 103, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# get mkt adjustment (weight is the fund in a stock)\n", "mkt_adjustment = pd.DataFrame(profile_df.groupby(['date'])['weight'].sum()).reset_index()\n", "mkt_adjustment.rename(columns={'weight':'mkt_cap'}, inplace=True)\n", "merge_df = p_perform_result.merge(mkt_adjustment, on='date', how='outer')\n", "\n", "\n", "for i in range(1, len(merge_df)):\n", " merge_df.loc[i, 'mkt_cap'] = merge_df.loc[i-1, 'mkt_cap'] * (1 + merge_df.loc[i, 'pct'])\n", "\n", "# # calculate daily mkt_cap\n", "# # initial_mkt_cap = merge_df.loc[0, 'mkt_cap']\n", "# for i in range(1, len(merge_df)):\n", "# row = merge_df.loc[i]\n", "# if pd.isna(row['mkt_cap']):\n", "# merge_df.loc[i, 'mkt_cap'] = merge_df.loc[i-1, 'mkt_cap'] * (1 + merge_df.loc[i, 'pct_portfolio'])\n", " \n", "# # step 8 calculate daily mkt cap\n", "\n", "merge_df[merge_df.date < datetime(2021,1,10)]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
tickerdateopenclosehighlowvolumemoneypctweightreturnnorm_weightweighted_returnaggregate_sectordisplay_name
0002709.XSHE2021-01-0532.5433.8934.2231.3959152352.01.942406e+09NaN100.0000000.0000000.1666670.000000工业天赐材料
1600415.XSHG2021-01-055.335.875.875.22180936477.01.010225e+09NaN100.0000000.0000000.1666670.000000消费小商品城
2600409.XSHG2021-01-059.239.579.669.0882669289.07.803391e+08NaN100.0000000.0000000.1666670.000000原料与能源三友化工
3300274.XSHE2021-01-0576.0376.4580.2075.2751384827.03.961995e+09NaN100.0000000.0000000.1666670.000000工业阳光电源
4002920.XSHE2021-01-0585.4487.2587.9584.073852674.03.322598e+08NaN100.0000000.0000000.1666670.000000信息与通信德赛西威
................................................
1477600409.XSHG2022-01-108.248.358.398.2132516017.02.699300e+080.01581587.251829-0.1274820.121949-0.015546原料与能源三友化工
1478002920.XSHE2022-01-10130.36138.43141.96130.115005400.06.901614e+080.046888158.6590260.5865900.2217520.130077信息与通信德赛西威
1479002709.XSHE2022-01-1051.6350.7351.9350.0329821246.01.518902e+09-0.019142149.6901740.4969020.2092160.103960工业天赐材料
1480600415.XSHG2022-01-104.704.754.854.6739278041.01.859827e+080.01063880.919932-0.1908010.113099-0.021579消费小商品城
1481603882.XSHG2022-01-1088.4595.5395.5988.396991445.06.468392e+080.08569276.644737-0.2335530.107123-0.025019医药卫生金域医学
\n", "

1482 rows × 15 columns

\n", "
" ], "text/plain": [ " ticker date open close high low volume \\\n", "0 002709.XSHE 2021-01-05 32.54 33.89 34.22 31.39 59152352.0 \n", "1 600415.XSHG 2021-01-05 5.33 5.87 5.87 5.22 180936477.0 \n", "2 600409.XSHG 2021-01-05 9.23 9.57 9.66 9.08 82669289.0 \n", "3 300274.XSHE 2021-01-05 76.03 76.45 80.20 75.27 51384827.0 \n", "4 002920.XSHE 2021-01-05 85.44 87.25 87.95 84.07 3852674.0 \n", "... ... ... ... ... ... ... ... \n", "1477 600409.XSHG 2022-01-10 8.24 8.35 8.39 8.21 32516017.0 \n", "1478 002920.XSHE 2022-01-10 130.36 138.43 141.96 130.11 5005400.0 \n", "1479 002709.XSHE 2022-01-10 51.63 50.73 51.93 50.03 29821246.0 \n", "1480 600415.XSHG 2022-01-10 4.70 4.75 4.85 4.67 39278041.0 \n", "1481 603882.XSHG 2022-01-10 88.45 95.53 95.59 88.39 6991445.0 \n", "\n", " money pct weight return norm_weight \\\n", "0 1.942406e+09 NaN 100.000000 0.000000 0.166667 \n", "1 1.010225e+09 NaN 100.000000 0.000000 0.166667 \n", "2 7.803391e+08 NaN 100.000000 0.000000 0.166667 \n", "3 3.961995e+09 NaN 100.000000 0.000000 0.166667 \n", "4 3.322598e+08 NaN 100.000000 0.000000 0.166667 \n", "... ... ... ... ... ... \n", "1477 2.699300e+08 0.015815 87.251829 -0.127482 0.121949 \n", "1478 6.901614e+08 0.046888 158.659026 0.586590 0.221752 \n", "1479 1.518902e+09 -0.019142 149.690174 0.496902 0.209216 \n", "1480 1.859827e+08 0.010638 80.919932 -0.190801 0.113099 \n", "1481 6.468392e+08 0.085692 76.644737 -0.233553 0.107123 \n", "\n", " weighted_return aggregate_sector display_name \n", "0 0.000000 工业 天赐材料 \n", "1 0.000000 消费 小商品城 \n", "2 0.000000 原料与能源 三友化工 \n", "3 0.000000 工业 阳光电源 \n", "4 0.000000 信息与通信 德赛西威 \n", "... ... ... ... \n", "1477 -0.015546 原料与能源 三友化工 \n", "1478 0.130077 信息与通信 德赛西威 \n", "1479 0.103960 工业 天赐材料 \n", "1480 -0.021579 消费 小商品城 \n", "1481 -0.025019 医药卫生 金域医学 \n", "\n", "[1482 rows x 15 columns]" ] }, "execution_count": 127, "metadata": {}, "output_type": "execute_result" } ], "source": [ "## agg by sector and day\n", "p_stock_df['weight_in_sector'] = p_stock_df.groupby" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def creaet_portfolio_return(stock_df):\n", " portfolio_df = stock_df.groupby(['date'])['weighted_return'].sum().reset_index()\n", " portfolio_df.rename(columns={'weighted_return':'portfolio_return'}, inplace=True)\n", " return portfolio_df" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
dateportfolio_return
02021-01-050.000000
12021-01-060.007011
22021-01-070.047531
32021-01-080.047111
42021-01-110.052768
.........
2422022-01-040.363845
2432022-01-050.306697
2442022-01-060.331291
2452022-01-070.313726
2462022-01-100.313262
\n", "

247 rows × 2 columns

\n", "
" ], "text/plain": [ " date portfolio_return\n", "0 2021-01-05 0.000000\n", "1 2021-01-06 0.007011\n", "2 2021-01-07 0.047531\n", "3 2021-01-08 0.047111\n", "4 2021-01-11 0.052768\n", ".. ... ...\n", "242 2022-01-04 0.363845\n", "243 2022-01-05 0.306697\n", "244 2022-01-06 0.331291\n", "245 2022-01-07 0.313726\n", "246 2022-01-10 0.313262\n", "\n", "[247 rows x 2 columns]" ] }, "execution_count": 58, "metadata": {}, "output_type": "execute_result" } ], "source": [ "portfolio_df = creaet_portfolio_return(p_stock_df)\n", "portfolio_df" ] } ], "metadata": { "kernelspec": { "display_name": "portfolio_risk_assesment", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.4" }, "orig_nbformat": 4 }, "nbformat": 4, "nbformat_minor": 2 }