{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 142,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import math\n",
    "from datetime import datetime\n",
    "import hvplot.pandas\n",
    "import math\n",
    "# load data\n",
    "profile_df = pd.read_pickle('../data/portfolio_portfile.pkl')\n",
    "benchmark_df = pd.read_pickle('../data/benchmark_portfolio.pkl')\n",
    "portfolio_df = pd.read_pickle('../data/portfolio_data.pkl')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 143,
   "metadata": {},
   "outputs": [],
   "source": [
    "# to acoomodate the current pipe line\n",
    "min_dates = benchmark_df.groupby('ticker')['date'].min()\n",
    "\n",
    "for ticker, min_date in min_dates.items():\n",
    "   benchmark_df.loc[(benchmark_df['ticker'] == ticker) & (benchmark_df['date'] != min_date), 'weight'] = float('nan')\n",
    "\n",
    "benchmark_df['initial_weight'] = benchmark_df['weight']\n",
    "# drop weight\n",
    "benchmark_df = benchmark_df.drop(columns=['weight'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 144,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Series([], Name: initial_weight, dtype: int64)"
      ]
     },
     "execution_count": 144,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# check if all unique ticker has an weight\n",
    "count_list = benchmark_df.groupby('ticker')['initial_weight'].count().sort_values(ascending=False)\n",
    "count_list[count_list != 1]\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 145,
   "metadata": {},
   "outputs": [],
   "source": [
    "update_profile_df = profile_df.copy()\n",
    "update_profile_df['date'] = datetime(2021,1,10)\n",
    "update_profile_df['weight'] = [50,100,200,300,400,500]\n",
    "profile_df = pd.concat([profile_df, update_profile_df])\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 146,
   "metadata": {},
   "outputs": [],
   "source": [
    "def calculate_pct(stock_df):\n",
    "    stock_df['pct'] = stock_df.groupby(['ticker'])['close'].pct_change()\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 147,
   "metadata": {},
   "outputs": [],
   "source": [
    "# step 1 pct\n",
    "calculate_pct(portfolio_df)\n",
    "calculate_pct(benchmark_df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 148,
   "metadata": {},
   "outputs": [],
   "source": [
    "def return_weighted_stock_df(stock_price_df, profile_df=None):\n",
    "    # TODO change later this a temporary solution\n",
    "    # initialize weight if profile_df is not none\n",
    "    merged_df = pd.DataFrame()\n",
    "    if profile_df is not None:\n",
    "        merged_df = stock_price_df.merge(profile_df[['weight', 'date', 'ticker']], on=['ticker', 'date'], how='outer')\n",
    "        merged_df.sort_values(by=['date'], inplace=True)\n",
    "        merged_df.rename(columns={'weight': 'initial_weight'}, inplace=True)\n",
    "    else:\n",
    "        merged_df = stock_price_df.copy()\n",
    "    merged_df['current_weight'] = float('nan')\n",
    "    merged_df['previous_weight'] = float('nan')\n",
    "    df_grouped = merged_df.groupby('ticker')\n",
    "    for _, group in df_grouped:\n",
    "        pre_w = float('nan')\n",
    "        ini_w = float('nan')\n",
    "        for index, row in group.iterrows():\n",
    "            cur_w = float('nan')\n",
    "            # if has initial weight, the following row all use this initial weight\n",
    "            if not pd.isna(row['initial_weight']):\n",
    "                ini_w = row['initial_weight']\n",
    "                cur_w = ini_w\n",
    "            # just calculate current weight based on previous weight\n",
    "            else:\n",
    "                cur_w = pre_w * (1 + row['pct'])\n",
    "\n",
    "            merged_df.loc[index, 'current_weight'] = cur_w    \n",
    "            merged_df.loc[index, 'previous_weight'] = pre_w\n",
    "            merged_df.loc[index, 'initial_weight'] = ini_w\n",
    "            pre_w = cur_w\n",
    "            \n",
    "    # drop row where closing price is none\n",
    "    merged_df = merged_df[~pd.isna(merged_df['close'])]\n",
    "    # drop index\n",
    "    return merged_df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 149,
   "metadata": {},
   "outputs": [],
   "source": [
    "# TODO consider save the weight calculation\n",
    "portfolio_df = return_weighted_stock_df(portfolio_df, profile_df)\n",
    "benchmark_df = return_weighted_stock_df(benchmark_df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 150,
   "metadata": {},
   "outputs": [],
   "source": [
    "# benchmark_df[benchmark_df.ticker =='000008.XSHE']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 151,
   "metadata": {},
   "outputs": [],
   "source": [
    "## normalize all weight\n",
    "def normalize_weight(stock_df):\n",
    "    stock_df['current_weight'] = stock_df['current_weight'] / \\\n",
    "                                    stock_df.groupby('date')['current_weight'].transform('sum')\n",
    "\n",
    "    stock_df['previous_weight'] = stock_df['previous_weight'] / \\\n",
    "                                    stock_df.groupby('date')['previous_weight'].transform('sum')\n",
    "\n",
    "    stock_df['initial_weight'] = stock_df['initial_weight'] / \\\n",
    "                                    stock_df.groupby('date')['initial_weight'].transform('sum')\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 152,
   "metadata": {},
   "outputs": [],
   "source": [
    "normalize_weight(portfolio_df)\n",
    "normalize_weight(benchmark_df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 153,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "260    0.032258\n",
       "258    0.064516\n",
       "262    0.129032\n",
       "263    0.258065\n",
       "259    0.322581\n",
       "261    0.193548\n",
       "Name: initial_weight, dtype: float64"
      ]
     },
     "execution_count": 153,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "portfolio_df[portfolio_df.date == datetime(2021, 3, 12)]['initial_weight']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 154,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1.0\n",
      "1.0\n",
      "1.0\n"
     ]
    }
   ],
   "source": [
    "print(benchmark_df[benchmark_df.date == datetime(2021, 3, 12)]['initial_weight'].sum())\n",
    "print(benchmark_df[benchmark_df.date == datetime(2021, 3, 12)]['current_weight'].sum())\n",
    "print(benchmark_df[benchmark_df.date == datetime(2021, 3, 12)]['previous_weight'].sum())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 155,
   "metadata": {},
   "outputs": [],
   "source": [
    "# step 3 sector wegiht\n",
    "\n",
    "# add sector information first\n",
    "def create_sector_weight(stock_df, profile_df=None):\n",
    "    # if profile_df is none assume the aggregate_sector stock info already in stock_df\n",
    "    merged_df = None\n",
    "    if profile_df is not None:\n",
    "        merged_df = stock_df.merge(profile_df[['ticker', 'aggregate_sector']], on='ticker', how='left')\n",
    "    else:\n",
    "        merged_df = stock_df.copy()\n",
    "    # set null to others\n",
    "    merged_df['aggregate_sector'] = merged_df['aggregate_sector'].fillna('其他')\n",
    "    # calculate previous_sector_weight\n",
    "    merged_df['previous_sector_weight'] = merged_df['previous_weight'] / \\\n",
    "                                            merged_df.groupby(['date', 'aggregate_sector'])['previous_weight'].transform('sum')\n",
    "    # calculate initial sectore weight\n",
    "    merged_df['initial_sector_weight'] = merged_df['initial_weight'] / \\\n",
    "                                            merged_df.groupby(['date', 'aggregate_sector'])['initial_weight'].transform('sum')\n",
    "    \n",
    "    return merged_df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 156,
   "metadata": {},
   "outputs": [],
   "source": [
    "portfolio_df = create_sector_weight(stock_df = portfolio_df, profile_df = profile_df)\n",
    "benchmark_df = create_sector_weight(benchmark_df)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 157,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "aggregate_sector\n",
      "信息与通信    1.0\n",
      "公用事业     1.0\n",
      "其他       1.0\n",
      "医药卫生     1.0\n",
      "原料与能源    1.0\n",
      "工业       1.0\n",
      "消费       1.0\n",
      "金融与地产    1.0\n",
      "Name: previous_sector_weight, dtype: float64\n",
      "aggregate_sector\n",
      "信息与通信    1.0\n",
      "公用事业     1.0\n",
      "其他       1.0\n",
      "医药卫生     1.0\n",
      "原料与能源    1.0\n",
      "工业       1.0\n",
      "消费       1.0\n",
      "金融与地产    1.0\n",
      "Name: initial_sector_weight, dtype: float64\n"
     ]
    }
   ],
   "source": [
    "# check result \n",
    "print(benchmark_df[benchmark_df.date == datetime(2021, 3, 12)].groupby('aggregate_sector')['previous_sector_weight'].sum())\n",
    "print(benchmark_df[benchmark_df.date == datetime(2021, 3, 12)].groupby('aggregate_sector')['initial_sector_weight'].sum())\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 158,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "aggregate_sector\n",
      "信息与通信    1.0\n",
      "医药卫生     1.0\n",
      "原料与能源    1.0\n",
      "工业       1.0\n",
      "消费       1.0\n",
      "Name: previous_sector_weight, dtype: float64\n",
      "aggregate_sector\n",
      "信息与通信    1.0\n",
      "医药卫生     1.0\n",
      "原料与能源    1.0\n",
      "工业       1.0\n",
      "消费       1.0\n",
      "Name: initial_sector_weight, dtype: float64\n"
     ]
    }
   ],
   "source": [
    "# check result \n",
    "print(portfolio_df[portfolio_df.date == datetime(2021, 3, 12)].groupby('aggregate_sector')['previous_sector_weight'].sum())\n",
    "print(portfolio_df[portfolio_df.date == datetime(2021, 3, 12)].groupby('aggregate_sector')['initial_sector_weight'].sum())\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 159,
   "metadata": {},
   "outputs": [],
   "source": [
    "## return define as the total return since the portfolio created\n",
    "def calcualte_return(stock_df):\n",
    "    stock_df['return'] = stock_df['close'] / stock_df.groupby(['ticker'])['close'].transform('first') - 1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 160,
   "metadata": {},
   "outputs": [],
   "source": [
    "calcualte_return(portfolio_df)\n",
    "calcualte_return(benchmark_df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 161,
   "metadata": {},
   "outputs": [],
   "source": [
    "def calculate_weighted_sector_return(stock_df):\n",
    "    stock_df['weighted_sectore_return'] = stock_df['return'] * stock_df['initial_sector_weight']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 162,
   "metadata": {},
   "outputs": [],
   "source": [
    "calculate_weighted_sector_return(portfolio_df)\n",
    "calculate_weighted_sector_return(benchmark_df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 163,
   "metadata": {},
   "outputs": [],
   "source": [
    "## weighted return and sector weighred return \n",
    "def calculate_weighted_return(stock_df):\n",
    "    stock_df['weighted_return'] = stock_df['return'] * stock_df['initial_weight']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 164,
   "metadata": {},
   "outputs": [],
   "source": [
    "# step\n",
    "calculate_weighted_return(portfolio_df)\n",
    "calculate_weighted_return(benchmark_df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 165,
   "metadata": {},
   "outputs": [],
   "source": [
    "def calculate_weighted_sector_return(stock_df):\n",
    "    stock_df['weighted_sector_return'] = stock_df['return'] * stock_df['initial_sector_weight']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 166,
   "metadata": {},
   "outputs": [],
   "source": [
    "calculate_weighted_sector_return(portfolio_df)\n",
    "calculate_weighted_sector_return(benchmark_df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 167,
   "metadata": {},
   "outputs": [],
   "source": [
    "## calcualte weighted pc\n",
    "def calculate_weighted_pct(stock_df):\n",
    "    stock_df['weighted_pct'] = stock_df['pct'] * stock_df['previous_weight']\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 168,
   "metadata": {},
   "outputs": [],
   "source": [
    "def calculate_weighted_sector_pct(stock_df):\n",
    "    stock_df['weighted_sector_pct'] = stock_df['pct'] * stock_df['previous_sector_weight']\n",
    "    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 169,
   "metadata": {},
   "outputs": [],
   "source": [
    "calculate_weighted_sector_pct(portfolio_df)\n",
    "calculate_weighted_sector_pct(benchmark_df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 170,
   "metadata": {},
   "outputs": [],
   "source": [
    "calculate_weighted_pct(portfolio_df)\n",
    "calculate_weighted_pct(benchmark_df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 171,
   "metadata": {},
   "outputs": [],
   "source": [
    "calculate_weighted_sector_return(portfolio_df)\n",
    "calculate_weighted_sector_return(benchmark_df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 172,
   "metadata": {},
   "outputs": [],
   "source": [
    "## aggregate by date\n",
    "\n",
    "# pct and weighted_return\n",
    "# def agg_by_date(stock_df)\n",
    "def agg_by_date(stock_df):\n",
    "    agg_on_date_df = pd.DataFrame(stock_df.groupby('date')[['weighted_return','weighted_pct']].sum())\n",
    "    agg_on_date_df.rename(columns={'weighted_return': 'return', 'weighted_pct': 'pct'}, inplace=True)\n",
    "    return agg_on_date_df\n",
    "\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 173,
   "metadata": {},
   "outputs": [],
   "source": [
    "p_total_view = agg_by_date(portfolio_df)\n",
    "b_total_view = agg_by_date(benchmark_df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 174,
   "metadata": {},
   "outputs": [],
   "source": [
    "## aggregate by sector\n",
    "def agg_by_sector(stock_df):\n",
    "    agg_on_sector_df = pd.DataFrame(stock_df.groupby(['aggregate_sector','date'])[['weighted_sector_return','weighted_sector_pct']].sum())\n",
    "    agg_on_sector_df.rename(columns={'weighted_sector_return': 'return', 'weighted_sector_pct': 'pct'}, inplace=True)\n",
    "    return agg_on_sector_df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 175,
   "metadata": {},
   "outputs": [],
   "source": [
    "p_sector_view = agg_by_sector(portfolio_df)\n",
    "b_sector_view = agg_by_sector(benchmark_df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 200,
   "metadata": {},
   "outputs": [],
   "source": [
    "def create_risk_table(portfolio_summary, benchmark_summary):\n",
    "    # total risk tracking error  \n",
    "    merged_df = pd.merge(portfolio_summary, benchmark_summary, on='date', how='outer', suffixes=('_p', '_b'))\n",
    "    merged_df['risk_p'] = merged_df['return_p'].expanding().std() * math.sqrt(252)\n",
    "    merged_df['risk_b'] = merged_df['return_b'].expanding().std() * math.sqrt(252)\n",
    "    merged_df['active_return'] = merged_df['return_p'] - merged_df['return_b']\n",
    "    merged_df['tracking_error'] = merged_df['active_return'].expanding().std() * math.sqrt(252)\n",
    "    merged_df['date'] = merged_df.index\n",
    "    # drop index\n",
    "    merged_df.reset_index(drop=True, inplace=True)\n",
    "    return merged_df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 201,
   "metadata": {},
   "outputs": [],
   "source": [
    "portfolio_risk_by_date_df = create_risk_table(p_total_view, b_total_view)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 202,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>return_p</th>\n",
       "      <th>pct_p</th>\n",
       "      <th>return_b</th>\n",
       "      <th>pct_b</th>\n",
       "      <th>risk_p</th>\n",
       "      <th>risk_b</th>\n",
       "      <th>active_return</th>\n",
       "      <th>tracking_error</th>\n",
       "      <th>date</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2021-01-05</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0.012146</td>\n",
       "      <td>0.012146</td>\n",
       "      <td>-0.001934</td>\n",
       "      <td>-0.001934</td>\n",
       "      <td>0.136341</td>\n",
       "      <td>0.021705</td>\n",
       "      <td>0.014080</td>\n",
       "      <td>0.158046</td>\n",
       "      <td>2021-01-06</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0.086830</td>\n",
       "      <td>0.074233</td>\n",
       "      <td>-0.000811</td>\n",
       "      <td>0.001125</td>\n",
       "      <td>0.746402</td>\n",
       "      <td>0.015414</td>\n",
       "      <td>0.087641</td>\n",
       "      <td>0.747127</td>\n",
       "      <td>2021-01-07</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0.089435</td>\n",
       "      <td>0.002496</td>\n",
       "      <td>0.002535</td>\n",
       "      <td>0.003349</td>\n",
       "      <td>0.756382</td>\n",
       "      <td>0.030137</td>\n",
       "      <td>0.086900</td>\n",
       "      <td>0.740979</td>\n",
       "      <td>2021-01-08</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0.148063</td>\n",
       "      <td>0.029363</td>\n",
       "      <td>-0.013015</td>\n",
       "      <td>-0.015511</td>\n",
       "      <td>0.970984</td>\n",
       "      <td>0.095654</td>\n",
       "      <td>0.161078</td>\n",
       "      <td>1.032423</td>\n",
       "      <td>2021-01-11</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>242</th>\n",
       "      <td>0.028005</td>\n",
       "      <td>-0.071081</td>\n",
       "      <td>0.086827</td>\n",
       "      <td>0.000156</td>\n",
       "      <td>2.097631</td>\n",
       "      <td>0.886298</td>\n",
       "      <td>-0.058822</td>\n",
       "      <td>1.856213</td>\n",
       "      <td>2022-01-04</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>243</th>\n",
       "      <td>-0.033053</td>\n",
       "      <td>-0.059582</td>\n",
       "      <td>0.067931</td>\n",
       "      <td>-0.017386</td>\n",
       "      <td>2.099052</td>\n",
       "      <td>0.884891</td>\n",
       "      <td>-0.100984</td>\n",
       "      <td>1.861347</td>\n",
       "      <td>2022-01-05</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>244</th>\n",
       "      <td>-0.042238</td>\n",
       "      <td>-0.008112</td>\n",
       "      <td>0.069522</td>\n",
       "      <td>0.001490</td>\n",
       "      <td>2.101118</td>\n",
       "      <td>0.883542</td>\n",
       "      <td>-0.111761</td>\n",
       "      <td>1.867445</td>\n",
       "      <td>2022-01-06</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>245</th>\n",
       "      <td>-0.073118</td>\n",
       "      <td>-0.031015</td>\n",
       "      <td>0.062056</td>\n",
       "      <td>-0.006981</td>\n",
       "      <td>2.105760</td>\n",
       "      <td>0.881986</td>\n",
       "      <td>-0.135174</td>\n",
       "      <td>1.875959</td>\n",
       "      <td>2022-01-07</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>246</th>\n",
       "      <td>-0.029944</td>\n",
       "      <td>0.044300</td>\n",
       "      <td>0.064588</td>\n",
       "      <td>0.002384</td>\n",
       "      <td>2.106749</td>\n",
       "      <td>0.880502</td>\n",
       "      <td>-0.094532</td>\n",
       "      <td>1.880060</td>\n",
       "      <td>2022-01-10</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>247 rows × 9 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "     return_p     pct_p  return_b     pct_b    risk_p    risk_b  \\\n",
       "0    0.000000  0.000000  0.000000  0.000000       NaN       NaN   \n",
       "1    0.012146  0.012146 -0.001934 -0.001934  0.136341  0.021705   \n",
       "2    0.086830  0.074233 -0.000811  0.001125  0.746402  0.015414   \n",
       "3    0.089435  0.002496  0.002535  0.003349  0.756382  0.030137   \n",
       "4    0.148063  0.029363 -0.013015 -0.015511  0.970984  0.095654   \n",
       "..        ...       ...       ...       ...       ...       ...   \n",
       "242  0.028005 -0.071081  0.086827  0.000156  2.097631  0.886298   \n",
       "243 -0.033053 -0.059582  0.067931 -0.017386  2.099052  0.884891   \n",
       "244 -0.042238 -0.008112  0.069522  0.001490  2.101118  0.883542   \n",
       "245 -0.073118 -0.031015  0.062056 -0.006981  2.105760  0.881986   \n",
       "246 -0.029944  0.044300  0.064588  0.002384  2.106749  0.880502   \n",
       "\n",
       "     active_return  tracking_error       date  \n",
       "0         0.000000             NaN 2021-01-05  \n",
       "1         0.014080        0.158046 2021-01-06  \n",
       "2         0.087641        0.747127 2021-01-07  \n",
       "3         0.086900        0.740979 2021-01-08  \n",
       "4         0.161078        1.032423 2021-01-11  \n",
       "..             ...             ...        ...  \n",
       "242      -0.058822        1.856213 2022-01-04  \n",
       "243      -0.100984        1.861347 2022-01-05  \n",
       "244      -0.111761        1.867445 2022-01-06  \n",
       "245      -0.135174        1.875959 2022-01-07  \n",
       "246      -0.094532        1.880060 2022-01-10  \n",
       "\n",
       "[247 rows x 9 columns]"
      ]
     },
     "execution_count": 202,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# add mkt cap\n",
    "portfolio_risk_by_date_df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 217,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "date\n",
       "2021-01-05     600\n",
       "2021-01-10    1550\n",
       "Name: weight, dtype: int64"
      ]
     },
     "execution_count": 217,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "profile_df.groupby('date')['weight'].sum()\n",
    "\n",
    "# for i in range(1, len(portfolio_risk_by_date_df)):\n",
    "#     cur_mkt = portfolio_risk_by_date_df.loc[i, 'mkt_cap']\n",
    "#     if pd.isna(cur_mkt):\n",
    "#         portfolio_risk_by_date_df.loc[i, 'mkt_cap'] = portfolio_risk_by_date_df.loc[i-1, 'mkt_cap'] * (1 + portfolio_risk_by_date_df.loc[i, 'pct_p'])\n",
    "  "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 216,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>return_p</th>\n",
       "      <th>pct_p</th>\n",
       "      <th>return_b</th>\n",
       "      <th>pct_b</th>\n",
       "      <th>risk_p</th>\n",
       "      <th>risk_b</th>\n",
       "      <th>active_return</th>\n",
       "      <th>tracking_error</th>\n",
       "      <th>date</th>\n",
       "      <th>mkt_cap</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2021-01-05</td>\n",
       "      <td>600.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   return_p  pct_p  return_b  pct_b  risk_p  risk_b  active_return  \\\n",
       "0       0.0    0.0       0.0    0.0     NaN     NaN            0.0   \n",
       "\n",
       "   tracking_error       date  mkt_cap  \n",
       "0             NaN 2021-01-05    600.0  "
      ]
     },
     "execution_count": 216,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# display row where mkt_cap is not nana\n",
    "portfolio_risk_by_date_df[portfolio_risk_by_date_df['mkt_cap'].notna()]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/var/folders/v5/2108rh5964q9j741wg_s8r1w0000gn/T/ipykernel_23255/2871737262.py:10: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
      "Try using .loc[row_indexer,col_indexer] = value instead\n",
      "\n",
      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
      "  pct['weighted_pct'] = pct['pct'] * pct['norm_weight']\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>date</th>\n",
       "      <th>return</th>\n",
       "      <th>pct</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>2021-01-05</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2021-01-06</td>\n",
       "      <td>0.007011</td>\n",
       "      <td>0.036439</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2021-01-07</td>\n",
       "      <td>0.047531</td>\n",
       "      <td>0.218707</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>2021-01-08</td>\n",
       "      <td>0.047111</td>\n",
       "      <td>0.013639</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>2021-01-11</td>\n",
       "      <td>0.052768</td>\n",
       "      <td>0.014559</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>242</th>\n",
       "      <td>2022-01-04</td>\n",
       "      <td>0.363845</td>\n",
       "      <td>-0.199827</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>243</th>\n",
       "      <td>2022-01-05</td>\n",
       "      <td>0.306697</td>\n",
       "      <td>-0.193598</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>244</th>\n",
       "      <td>2022-01-06</td>\n",
       "      <td>0.331291</td>\n",
       "      <td>0.023418</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>245</th>\n",
       "      <td>2022-01-07</td>\n",
       "      <td>0.313726</td>\n",
       "      <td>-0.080728</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>246</th>\n",
       "      <td>2022-01-10</td>\n",
       "      <td>0.313262</td>\n",
       "      <td>0.110254</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>247 rows × 3 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "          date    return       pct\n",
       "0   2021-01-05  0.000000  0.000000\n",
       "1   2021-01-06  0.007011  0.036439\n",
       "2   2021-01-07  0.047531  0.218707\n",
       "3   2021-01-08  0.047111  0.013639\n",
       "4   2021-01-11  0.052768  0.014559\n",
       "..         ...       ...       ...\n",
       "242 2022-01-04  0.363845 -0.199827\n",
       "243 2022-01-05  0.306697 -0.193598\n",
       "244 2022-01-06  0.331291  0.023418\n",
       "245 2022-01-07  0.313726 -0.080728\n",
       "246 2022-01-10  0.313262  0.110254\n",
       "\n",
       "[247 rows x 3 columns]"
      ]
     },
     "execution_count": 191,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "## aggregate by date\n",
    "# step 7 aggregate (get portfolio return and pct(change of daily return))by date\n",
    "def create_agg_by_date(stock_df):\n",
    "    # sum up weighted return to get return \n",
    "    agg_return = stock_df.groupby(['date'])['weighted_return'].sum().reset_index()\n",
    "    agg_return.rename(columns={'weighted_return':'return'}, inplace=True)\n",
    "\n",
    "    # sum up weighted pct to get pct\n",
    "    pct = stock_df[['date','pct','norm_weight','ticker']]\n",
    "    pct['weighted_pct'] = pct['pct'] * pct['norm_weight']\n",
    "    agg_pct = pct.groupby(['date'])['pct'].sum().reset_index()\n",
    "\n",
    "    agg_df = pd.merge(agg_return, agg_pct, on='date', how='outer')\n",
    "    return agg_df\n",
    "\n",
    "\n",
    "\n",
    "p_perform_result = create_agg_by_date(p_stock_df)\n",
    "p_perform_result"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>ticker</th>\n",
       "      <th>date</th>\n",
       "      <th>open</th>\n",
       "      <th>close</th>\n",
       "      <th>high</th>\n",
       "      <th>low</th>\n",
       "      <th>volume</th>\n",
       "      <th>money</th>\n",
       "      <th>pct</th>\n",
       "      <th>weight</th>\n",
       "      <th>return</th>\n",
       "      <th>norm_weight</th>\n",
       "      <th>weighted_return</th>\n",
       "      <th>aggregate_sector</th>\n",
       "      <th>display_name</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1452</th>\n",
       "      <td>603882.XSHG</td>\n",
       "      <td>2022-01-04</td>\n",
       "      <td>106.89</td>\n",
       "      <td>98.84</td>\n",
       "      <td>106.89</td>\n",
       "      <td>98.67</td>\n",
       "      <td>5140406.0</td>\n",
       "      <td>5.181929e+08</td>\n",
       "      <td>-0.076262</td>\n",
       "      <td>79.300385</td>\n",
       "      <td>-0.206996</td>\n",
       "      <td>0.107586</td>\n",
       "      <td>-0.022270</td>\n",
       "      <td>医药卫生</td>\n",
       "      <td>金域医学</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1453</th>\n",
       "      <td>002709.XSHE</td>\n",
       "      <td>2022-01-04</td>\n",
       "      <td>57.64</td>\n",
       "      <td>54.64</td>\n",
       "      <td>57.87</td>\n",
       "      <td>54.29</td>\n",
       "      <td>42150916.0</td>\n",
       "      <td>2.333429e+09</td>\n",
       "      <td>-0.028277</td>\n",
       "      <td>161.227501</td>\n",
       "      <td>0.612275</td>\n",
       "      <td>0.218735</td>\n",
       "      <td>0.133926</td>\n",
       "      <td>工业</td>\n",
       "      <td>天赐材料</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1454</th>\n",
       "      <td>600409.XSHG</td>\n",
       "      <td>2022-01-04</td>\n",
       "      <td>8.16</td>\n",
       "      <td>8.21</td>\n",
       "      <td>8.25</td>\n",
       "      <td>8.15</td>\n",
       "      <td>27288613.0</td>\n",
       "      <td>2.237925e+08</td>\n",
       "      <td>0.007362</td>\n",
       "      <td>85.788924</td>\n",
       "      <td>-0.142111</td>\n",
       "      <td>0.116389</td>\n",
       "      <td>-0.016540</td>\n",
       "      <td>原料与能源</td>\n",
       "      <td>三友化工</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1455</th>\n",
       "      <td>002920.XSHE</td>\n",
       "      <td>2022-01-04</td>\n",
       "      <td>139.71</td>\n",
       "      <td>131.69</td>\n",
       "      <td>140.91</td>\n",
       "      <td>131.45</td>\n",
       "      <td>5410083.0</td>\n",
       "      <td>7.233361e+08</td>\n",
       "      <td>-0.060833</td>\n",
       "      <td>150.934097</td>\n",
       "      <td>0.509341</td>\n",
       "      <td>0.204770</td>\n",
       "      <td>0.104298</td>\n",
       "      <td>信息与通信</td>\n",
       "      <td>德赛西威</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1456</th>\n",
       "      <td>300274.XSHE</td>\n",
       "      <td>2022-01-04</td>\n",
       "      <td>146.52</td>\n",
       "      <td>134.96</td>\n",
       "      <td>148.46</td>\n",
       "      <td>134.61</td>\n",
       "      <td>24205007.0</td>\n",
       "      <td>3.333125e+09</td>\n",
       "      <td>-0.071291</td>\n",
       "      <td>176.533682</td>\n",
       "      <td>0.765337</td>\n",
       "      <td>0.239501</td>\n",
       "      <td>0.183299</td>\n",
       "      <td>工业</td>\n",
       "      <td>阳光电源</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1457</th>\n",
       "      <td>600415.XSHG</td>\n",
       "      <td>2022-01-04</td>\n",
       "      <td>4.80</td>\n",
       "      <td>4.89</td>\n",
       "      <td>4.90</td>\n",
       "      <td>4.78</td>\n",
       "      <td>58291943.0</td>\n",
       "      <td>2.832956e+08</td>\n",
       "      <td>0.029474</td>\n",
       "      <td>83.304940</td>\n",
       "      <td>-0.166951</td>\n",
       "      <td>0.113019</td>\n",
       "      <td>-0.018869</td>\n",
       "      <td>消费</td>\n",
       "      <td>小商品城</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "           ticker       date    open   close    high     low      volume  \\\n",
       "1452  603882.XSHG 2022-01-04  106.89   98.84  106.89   98.67   5140406.0   \n",
       "1453  002709.XSHE 2022-01-04   57.64   54.64   57.87   54.29  42150916.0   \n",
       "1454  600409.XSHG 2022-01-04    8.16    8.21    8.25    8.15  27288613.0   \n",
       "1455  002920.XSHE 2022-01-04  139.71  131.69  140.91  131.45   5410083.0   \n",
       "1456  300274.XSHE 2022-01-04  146.52  134.96  148.46  134.61  24205007.0   \n",
       "1457  600415.XSHG 2022-01-04    4.80    4.89    4.90    4.78  58291943.0   \n",
       "\n",
       "             money       pct      weight    return  norm_weight  \\\n",
       "1452  5.181929e+08 -0.076262   79.300385 -0.206996     0.107586   \n",
       "1453  2.333429e+09 -0.028277  161.227501  0.612275     0.218735   \n",
       "1454  2.237925e+08  0.007362   85.788924 -0.142111     0.116389   \n",
       "1455  7.233361e+08 -0.060833  150.934097  0.509341     0.204770   \n",
       "1456  3.333125e+09 -0.071291  176.533682  0.765337     0.239501   \n",
       "1457  2.832956e+08  0.029474   83.304940 -0.166951     0.113019   \n",
       "\n",
       "      weighted_return aggregate_sector display_name  \n",
       "1452        -0.022270             医药卫生         金域医学  \n",
       "1453         0.133926               工业         天赐材料  \n",
       "1454        -0.016540            原料与能源         三友化工  \n",
       "1455         0.104298            信息与通信         德赛西威  \n",
       "1456         0.183299               工业         阳光电源  \n",
       "1457        -0.018869               消费         小商品城  "
      ]
     },
     "execution_count": 194,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "p_stock_df[p_stock_df.date==datetime(2022,1,4)]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>date</th>\n",
       "      <th>mkt_cap</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>2021-01-05</td>\n",
       "      <td>600</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2021-01-10</td>\n",
       "      <td>1550</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "        date  mkt_cap\n",
       "0 2021-01-05      600\n",
       "1 2021-01-10     1550"
      ]
     },
     "execution_count": 102,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "mkt_cap_df =  pd.DataFrame(profile_df.groupby(['date'])['weight'].sum()).reset_index()\n",
    "mkt_cap_df.rename(columns={'weight':'mkt_cap'}, inplace=True)\n",
    "mkt_cap_df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>date</th>\n",
       "      <th>return</th>\n",
       "      <th>pct</th>\n",
       "      <th>mkt_cap</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>2021-01-05</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>600.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2021-01-06</td>\n",
       "      <td>0.007011</td>\n",
       "      <td>0.036439</td>\n",
       "      <td>621.863161</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2021-01-07</td>\n",
       "      <td>0.047531</td>\n",
       "      <td>0.218707</td>\n",
       "      <td>757.869005</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>2021-01-08</td>\n",
       "      <td>0.047111</td>\n",
       "      <td>0.013639</td>\n",
       "      <td>768.205269</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "        date    return       pct     mkt_cap\n",
       "0 2021-01-05  0.000000  0.000000  600.000000\n",
       "1 2021-01-06  0.007011  0.036439  621.863161\n",
       "2 2021-01-07  0.047531  0.218707  757.869005\n",
       "3 2021-01-08  0.047111  0.013639  768.205269"
      ]
     },
     "execution_count": 103,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# get mkt adjustment (weight is the fund in a stock)\n",
    "mkt_adjustment =  pd.DataFrame(profile_df.groupby(['date'])['weight'].sum()).reset_index()\n",
    "mkt_adjustment.rename(columns={'weight':'mkt_cap'}, inplace=True)\n",
    "merge_df = p_perform_result.merge(mkt_adjustment, on='date', how='outer')\n",
    "\n",
    "\n",
    "for i in range(1, len(merge_df)):\n",
    "    merge_df.loc[i, 'mkt_cap'] = merge_df.loc[i-1, 'mkt_cap'] * (1 + merge_df.loc[i, 'pct'])\n",
    "\n",
    "# # calculate daily mkt_cap\n",
    "# # initial_mkt_cap = merge_df.loc[0, 'mkt_cap']\n",
    "# for i in range(1, len(merge_df)):\n",
    "#     row = merge_df.loc[i]\n",
    "#     if pd.isna(row['mkt_cap']):\n",
    "#         merge_df.loc[i, 'mkt_cap'] = merge_df.loc[i-1, 'mkt_cap'] * (1 + merge_df.loc[i, 'pct_portfolio'])\n",
    "    \n",
    "# # step 8 calculate daily mkt cap\n",
    "\n",
    "merge_df[merge_df.date < datetime(2021,1,10)]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>ticker</th>\n",
       "      <th>date</th>\n",
       "      <th>open</th>\n",
       "      <th>close</th>\n",
       "      <th>high</th>\n",
       "      <th>low</th>\n",
       "      <th>volume</th>\n",
       "      <th>money</th>\n",
       "      <th>pct</th>\n",
       "      <th>weight</th>\n",
       "      <th>return</th>\n",
       "      <th>norm_weight</th>\n",
       "      <th>weighted_return</th>\n",
       "      <th>aggregate_sector</th>\n",
       "      <th>display_name</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>002709.XSHE</td>\n",
       "      <td>2021-01-05</td>\n",
       "      <td>32.54</td>\n",
       "      <td>33.89</td>\n",
       "      <td>34.22</td>\n",
       "      <td>31.39</td>\n",
       "      <td>59152352.0</td>\n",
       "      <td>1.942406e+09</td>\n",
       "      <td>NaN</td>\n",
       "      <td>100.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.166667</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>工业</td>\n",
       "      <td>天赐材料</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>600415.XSHG</td>\n",
       "      <td>2021-01-05</td>\n",
       "      <td>5.33</td>\n",
       "      <td>5.87</td>\n",
       "      <td>5.87</td>\n",
       "      <td>5.22</td>\n",
       "      <td>180936477.0</td>\n",
       "      <td>1.010225e+09</td>\n",
       "      <td>NaN</td>\n",
       "      <td>100.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.166667</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>消费</td>\n",
       "      <td>小商品城</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>600409.XSHG</td>\n",
       "      <td>2021-01-05</td>\n",
       "      <td>9.23</td>\n",
       "      <td>9.57</td>\n",
       "      <td>9.66</td>\n",
       "      <td>9.08</td>\n",
       "      <td>82669289.0</td>\n",
       "      <td>7.803391e+08</td>\n",
       "      <td>NaN</td>\n",
       "      <td>100.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.166667</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>原料与能源</td>\n",
       "      <td>三友化工</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>300274.XSHE</td>\n",
       "      <td>2021-01-05</td>\n",
       "      <td>76.03</td>\n",
       "      <td>76.45</td>\n",
       "      <td>80.20</td>\n",
       "      <td>75.27</td>\n",
       "      <td>51384827.0</td>\n",
       "      <td>3.961995e+09</td>\n",
       "      <td>NaN</td>\n",
       "      <td>100.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.166667</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>工业</td>\n",
       "      <td>阳光电源</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>002920.XSHE</td>\n",
       "      <td>2021-01-05</td>\n",
       "      <td>85.44</td>\n",
       "      <td>87.25</td>\n",
       "      <td>87.95</td>\n",
       "      <td>84.07</td>\n",
       "      <td>3852674.0</td>\n",
       "      <td>3.322598e+08</td>\n",
       "      <td>NaN</td>\n",
       "      <td>100.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.166667</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>信息与通信</td>\n",
       "      <td>德赛西威</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1477</th>\n",
       "      <td>600409.XSHG</td>\n",
       "      <td>2022-01-10</td>\n",
       "      <td>8.24</td>\n",
       "      <td>8.35</td>\n",
       "      <td>8.39</td>\n",
       "      <td>8.21</td>\n",
       "      <td>32516017.0</td>\n",
       "      <td>2.699300e+08</td>\n",
       "      <td>0.015815</td>\n",
       "      <td>87.251829</td>\n",
       "      <td>-0.127482</td>\n",
       "      <td>0.121949</td>\n",
       "      <td>-0.015546</td>\n",
       "      <td>原料与能源</td>\n",
       "      <td>三友化工</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1478</th>\n",
       "      <td>002920.XSHE</td>\n",
       "      <td>2022-01-10</td>\n",
       "      <td>130.36</td>\n",
       "      <td>138.43</td>\n",
       "      <td>141.96</td>\n",
       "      <td>130.11</td>\n",
       "      <td>5005400.0</td>\n",
       "      <td>6.901614e+08</td>\n",
       "      <td>0.046888</td>\n",
       "      <td>158.659026</td>\n",
       "      <td>0.586590</td>\n",
       "      <td>0.221752</td>\n",
       "      <td>0.130077</td>\n",
       "      <td>信息与通信</td>\n",
       "      <td>德赛西威</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1479</th>\n",
       "      <td>002709.XSHE</td>\n",
       "      <td>2022-01-10</td>\n",
       "      <td>51.63</td>\n",
       "      <td>50.73</td>\n",
       "      <td>51.93</td>\n",
       "      <td>50.03</td>\n",
       "      <td>29821246.0</td>\n",
       "      <td>1.518902e+09</td>\n",
       "      <td>-0.019142</td>\n",
       "      <td>149.690174</td>\n",
       "      <td>0.496902</td>\n",
       "      <td>0.209216</td>\n",
       "      <td>0.103960</td>\n",
       "      <td>工业</td>\n",
       "      <td>天赐材料</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1480</th>\n",
       "      <td>600415.XSHG</td>\n",
       "      <td>2022-01-10</td>\n",
       "      <td>4.70</td>\n",
       "      <td>4.75</td>\n",
       "      <td>4.85</td>\n",
       "      <td>4.67</td>\n",
       "      <td>39278041.0</td>\n",
       "      <td>1.859827e+08</td>\n",
       "      <td>0.010638</td>\n",
       "      <td>80.919932</td>\n",
       "      <td>-0.190801</td>\n",
       "      <td>0.113099</td>\n",
       "      <td>-0.021579</td>\n",
       "      <td>消费</td>\n",
       "      <td>小商品城</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1481</th>\n",
       "      <td>603882.XSHG</td>\n",
       "      <td>2022-01-10</td>\n",
       "      <td>88.45</td>\n",
       "      <td>95.53</td>\n",
       "      <td>95.59</td>\n",
       "      <td>88.39</td>\n",
       "      <td>6991445.0</td>\n",
       "      <td>6.468392e+08</td>\n",
       "      <td>0.085692</td>\n",
       "      <td>76.644737</td>\n",
       "      <td>-0.233553</td>\n",
       "      <td>0.107123</td>\n",
       "      <td>-0.025019</td>\n",
       "      <td>医药卫生</td>\n",
       "      <td>金域医学</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>1482 rows × 15 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "           ticker       date    open   close    high     low       volume  \\\n",
       "0     002709.XSHE 2021-01-05   32.54   33.89   34.22   31.39   59152352.0   \n",
       "1     600415.XSHG 2021-01-05    5.33    5.87    5.87    5.22  180936477.0   \n",
       "2     600409.XSHG 2021-01-05    9.23    9.57    9.66    9.08   82669289.0   \n",
       "3     300274.XSHE 2021-01-05   76.03   76.45   80.20   75.27   51384827.0   \n",
       "4     002920.XSHE 2021-01-05   85.44   87.25   87.95   84.07    3852674.0   \n",
       "...           ...        ...     ...     ...     ...     ...          ...   \n",
       "1477  600409.XSHG 2022-01-10    8.24    8.35    8.39    8.21   32516017.0   \n",
       "1478  002920.XSHE 2022-01-10  130.36  138.43  141.96  130.11    5005400.0   \n",
       "1479  002709.XSHE 2022-01-10   51.63   50.73   51.93   50.03   29821246.0   \n",
       "1480  600415.XSHG 2022-01-10    4.70    4.75    4.85    4.67   39278041.0   \n",
       "1481  603882.XSHG 2022-01-10   88.45   95.53   95.59   88.39    6991445.0   \n",
       "\n",
       "             money       pct      weight    return  norm_weight  \\\n",
       "0     1.942406e+09       NaN  100.000000  0.000000     0.166667   \n",
       "1     1.010225e+09       NaN  100.000000  0.000000     0.166667   \n",
       "2     7.803391e+08       NaN  100.000000  0.000000     0.166667   \n",
       "3     3.961995e+09       NaN  100.000000  0.000000     0.166667   \n",
       "4     3.322598e+08       NaN  100.000000  0.000000     0.166667   \n",
       "...            ...       ...         ...       ...          ...   \n",
       "1477  2.699300e+08  0.015815   87.251829 -0.127482     0.121949   \n",
       "1478  6.901614e+08  0.046888  158.659026  0.586590     0.221752   \n",
       "1479  1.518902e+09 -0.019142  149.690174  0.496902     0.209216   \n",
       "1480  1.859827e+08  0.010638   80.919932 -0.190801     0.113099   \n",
       "1481  6.468392e+08  0.085692   76.644737 -0.233553     0.107123   \n",
       "\n",
       "      weighted_return aggregate_sector display_name  \n",
       "0            0.000000               工业         天赐材料  \n",
       "1            0.000000               消费         小商品城  \n",
       "2            0.000000            原料与能源         三友化工  \n",
       "3            0.000000               工业         阳光电源  \n",
       "4            0.000000            信息与通信         德赛西威  \n",
       "...               ...              ...          ...  \n",
       "1477        -0.015546            原料与能源         三友化工  \n",
       "1478         0.130077            信息与通信         德赛西威  \n",
       "1479         0.103960               工业         天赐材料  \n",
       "1480        -0.021579               消费         小商品城  \n",
       "1481        -0.025019             医药卫生         金域医学  \n",
       "\n",
       "[1482 rows x 15 columns]"
      ]
     },
     "execution_count": 127,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "## agg by sector and day\n",
    "p_stock_df['weight_in_sector'] = p_stock_df.groupby"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def creaet_portfolio_return(stock_df):\n",
    "    portfolio_df = stock_df.groupby(['date'])['weighted_return'].sum().reset_index()\n",
    "    portfolio_df.rename(columns={'weighted_return':'portfolio_return'}, inplace=True)\n",
    "    return portfolio_df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>date</th>\n",
       "      <th>portfolio_return</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>2021-01-05</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2021-01-06</td>\n",
       "      <td>0.007011</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2021-01-07</td>\n",
       "      <td>0.047531</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>2021-01-08</td>\n",
       "      <td>0.047111</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>2021-01-11</td>\n",
       "      <td>0.052768</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>242</th>\n",
       "      <td>2022-01-04</td>\n",
       "      <td>0.363845</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>243</th>\n",
       "      <td>2022-01-05</td>\n",
       "      <td>0.306697</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>244</th>\n",
       "      <td>2022-01-06</td>\n",
       "      <td>0.331291</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>245</th>\n",
       "      <td>2022-01-07</td>\n",
       "      <td>0.313726</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>246</th>\n",
       "      <td>2022-01-10</td>\n",
       "      <td>0.313262</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>247 rows × 2 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "          date  portfolio_return\n",
       "0   2021-01-05          0.000000\n",
       "1   2021-01-06          0.007011\n",
       "2   2021-01-07          0.047531\n",
       "3   2021-01-08          0.047111\n",
       "4   2021-01-11          0.052768\n",
       "..         ...               ...\n",
       "242 2022-01-04          0.363845\n",
       "243 2022-01-05          0.306697\n",
       "244 2022-01-06          0.331291\n",
       "245 2022-01-07          0.313726\n",
       "246 2022-01-10          0.313262\n",
       "\n",
       "[247 rows x 2 columns]"
      ]
     },
     "execution_count": 58,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "portfolio_df = creaet_portfolio_return(p_stock_df)\n",
    "portfolio_df"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "portfolio_risk_assesment",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.4"
  },
  "orig_nbformat": 4
 },
 "nbformat": 4,
 "nbformat_minor": 2
}