{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "8b63d618-1f46-49e8-b388-c4185624e58c",
   "metadata": {},
   "outputs": [],
   "source": [
    "import os \n",
    "import torch\n",
    "import numpy as np\n",
    "from tabulate import tabulate\n",
    "import random\n",
    "from matplotlib import pyplot as plt\n",
    "import pickle\n",
    "from scipy.signal import find_peaks"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "2129fb21-fcea-40b4-ba3c-473aa2e6f1e2",
   "metadata": {},
   "outputs": [],
   "source": [
    "pre = \"../data/\""
   ]
  },
  {
   "cell_type": "markdown",
   "id": "b2f4a411-a2b9-4711-964a-81e34de04c60",
   "metadata": {},
   "source": [
    "### Load the data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "ae2009a3-a989-4756-849a-1180bb7ba087",
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "x = {}\n",
    "y = {}\n",
    "var_vec = [\"Tprev\"]\n",
    "for var in var_vec:\n",
    "    x[var] = {}\n",
    "    y[var] = {}\n",
    "    \n",
    "for var in var_vec:\n",
    "    with open(pre + 'x_' + var + '.pkl', 'rb') as file: \n",
    "        x[var] = pickle.load(file) \n",
    "    with open(pre + 'y_' + var + '.pkl', 'rb') as file: \n",
    "        y[var] = pickle.load(file) \n",
    "        "
   ]
  },
  {
   "cell_type": "markdown",
   "id": "bff0409c-0adb-42df-96e3-2ace88051f6e",
   "metadata": {},
   "source": [
    "### Identify train/cv/test"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "22cbeaae-7421-4ae9-8042-079591939e07",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "97\n",
      "15\n",
      "16\n"
     ]
    }
   ],
   "source": [
    "sims = torch.load(pre + \"/sims.pt\")\n",
    "\n",
    "extrapolation_sims = []\n",
    "interpolation_sims = []\n",
    "for si, sim in enumerate(sims):\n",
    "    if si !=39 and si!=8: # and (si==0 or si==100 or si==120):\n",
    "        #print(tabulate([[\"num\", \"dataset\", \"raq\", \"fkt\", \"fkp\", \"gr\", \"ar\"],\n",
    "        #                sim[:-1]\n",
    "        #               ]))\n",
    "        ignr, ignr, raq, fkt, fkp, gr, ar, ignr = sim\n",
    "\n",
    "        #if (fkt < 5e+5 or fkt > 5e+8) and (fkp < 15 or fkp > 85) and (raq < 1.5 or raq > 8.5):\n",
    "        if (fkt > 5e+9) or (fkp > 95) or (raq > 9.5):\n",
    "            extrapolation_sims.append(si)\n",
    "        else:\n",
    "            interpolation_sims.append(si)\n",
    "\n",
    "random.seed(1992)\n",
    "inds = {}\n",
    "inds[\"test\"] = extrapolation_sims #+ random.choices(interpolation_sims, k=9)\n",
    "\n",
    "remain_inds = []\n",
    "for inp in interpolation_sims:\n",
    "    if inp not in inds[\"test\"]:\n",
    "        remain_inds.append(inp)\n",
    "        \n",
    "inds[\"cv\"] = random.choices(remain_inds, k=16)\n",
    "\n",
    "inds[\"train\"] = []\n",
    "for inp in remain_inds:\n",
    "    if inp not in inds[\"test\"] and inp not in inds[\"cv\"]:\n",
    "        inds[\"train\"].append(inp)\n",
    "\n",
    "inds[\"train\"] = np.unique(inds[\"train\"])\n",
    "inds[\"cv\"] = np.unique(inds[\"cv\"])\n",
    "inds[\"test\"] = np.unique(inds[\"test\"])\n",
    "\n",
    "print(len(inds[\"train\"]))#, sorted(inds[\"train\"]))\n",
    "print(len(inds[\"cv\"]))#, sorted(inds[\"cv\"]))\n",
    "print(len(inds[\"test\"]))#, sorted(inds[\"test\"]))"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "7ade165c-b896-4192-a4e6-d4ac96a6dd07",
   "metadata": {},
   "source": [
    "### Write simulation parameters "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "f3ec1278-58cb-445a-a8e4-112d0032c784",
   "metadata": {},
   "outputs": [],
   "source": [
    "sims_table = [[\"Simulation\", \"Dataset\", \"RaQ/Ra\", \"FKT\", \"FKV\"]] \n",
    "\n",
    "for sim in sims:\n",
    "    if sim[0] in inds[\"train\"]:\n",
    "        an = \"train\"\n",
    "    elif sim[0] in inds[\"cv\"]:\n",
    "        an = \"cv\"\n",
    "    elif sim[0] in inds[\"test\"]:\n",
    "        an = \"test\"\n",
    "    sims_table.append([sim[0], an, sim[2], sim[3], sim[4]])\n",
    "\n",
    "with open('../inputs/simulations.txt', 'w') as f:\n",
    "    f.write(tabulate(sims_table))\n",
    "\n",
    "with open('../Paper/simulations.pkl', 'wb') as f:\n",
    "    pickle.dump(sims_table, f)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "4f93a6bb-3b87-4822-a58a-a105ca049c0c",
   "metadata": {},
   "source": [
    "### Pointwise input preparation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "97e5f984-6dc9-4ec9-b770-3ca1a721f67c",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(26966, 4) (26966, 1)\n",
      "(4170, 4) (4170, 1)\n",
      "(4448, 4) (4448, 1)\n",
      "(12416, 4) (12416, 1)\n",
      "(1920, 4) (1920, 1)\n",
      "(2048, 4) (2048, 1)\n"
     ]
    }
   ],
   "source": [
    "x_pointwise = {}\n",
    "y_pointwise = {}\n",
    "\n",
    "y_prof = torch.load(pre + \"/y_prof.pt\").flatten().numpy()\n",
    "\n",
    "y_prof = y_prof[::-1]\n",
    "y_new = np.sort(np.concatenate((np.linspace(1,y_prof[15],100), \n",
    "                                y_prof, np.linspace(y_prof[-10],y_prof[-1],50)), axis=0))[::-1]\n",
    "\n",
    "\n",
    "for an in [\"train\", \"cv\", \"test\"]:\n",
    "    x_pointwise[an] = np.zeros((len(inds[an])*y_new.shape[0], 4))\n",
    "    y_pointwise[an] = np.zeros((len(inds[an])*y_new.shape[0], 1))\n",
    "    \n",
    "\n",
    "    cntr = 0\n",
    "    \n",
    "    for i in inds[an]:\n",
    "        #print(an, i)\n",
    "        #u = y[\"uprev\"][i]*20\n",
    "        #v = y[\"vprev\"][i]*20\n",
    "        #vmag = np.sqrt(u[-50:,:]**2 + v[-50:,:]**2)\n",
    "        #vmag = np.mean(vmag, axis=0)\n",
    "        T = np.mean(y[\"Tprev\"][i], axis=0)\n",
    "        T_new = np.interp(y_new, y_prof[::-1], T[::-1])\n",
    "\n",
    "        #plt.figure()\n",
    "        #plt.plot(T, y_prof)\n",
    "        #plt.plot(T_new, y_new, 'kx')\n",
    "        #plt.ylim([1,0])\n",
    "        #plt.show()\n",
    "\n",
    "        for j in range(y_new.shape[0]):\n",
    "            x_pointwise[an][cntr,:3] = x[\"Tprev\"][i]\n",
    "            x_pointwise[an][cntr,3:4] = y_new[j]\n",
    "            \n",
    "            y_pointwise[an][cntr,0]   = T_new[j]\n",
    "            cntr += 1        \n",
    "    print(x_pointwise[an].shape, y_pointwise[an].shape)\n",
    "\n",
    "\n",
    "\n",
    "with open(pre + 'x_pointwise.pkl', 'wb') as file: \n",
    "    pickle.dump(x_pointwise, file) \n",
    "with open(pre + 'y_pointwise.pkl', 'wb') as file: \n",
    "    pickle.dump(y_pointwise, file) \n",
    "\n",
    "\n",
    "for an in [\"train\", \"cv\", \"test\"]:\n",
    "    x_pointwise[an] = np.zeros((len(inds[an])*y_prof.shape[0], 4))\n",
    "    y_pointwise[an] = np.zeros((len(inds[an])*y_prof.shape[0], 1))\n",
    "    \n",
    "    cntr = 0\n",
    "    \n",
    "    for i in inds[an]:\n",
    "        T = np.mean(y[\"Tprev\"][i], axis=0)\n",
    "\n",
    "        for j in range(y_prof.shape[0]):\n",
    "            x_pointwise[an][cntr,:3] = x[\"Tprev\"][i]\n",
    "            x_pointwise[an][cntr,3:4] = y_prof[j]\n",
    "            \n",
    "            y_pointwise[an][cntr,0]   = T[j]\n",
    "            cntr += 1        \n",
    "    print(x_pointwise[an].shape, y_pointwise[an].shape)\n",
    "\n",
    "with open(pre + 'x_pointwise_orgres.pkl', 'wb') as file: \n",
    "    pickle.dump(x_pointwise, file) \n",
    "with open(pre + 'y_pointwise_orgres.pkl', 'wb') as file: \n",
    "    pickle.dump(y_pointwise, file) "
   ]
  },
  {
   "cell_type": "markdown",
   "id": "9b08966c-31ef-4edf-b298-665cce954eb2",
   "metadata": {},
   "source": [
    "### Full profile input preparation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "40d86b1d-8db5-405c-a305-f7fb23c297dc",
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "train 0\n",
      "train 3\n",
      "train 4\n",
      "train 5\n",
      "train 6\n",
      "train 7\n",
      "train 9\n",
      "train 10\n",
      "train 11\n",
      "train 12\n",
      "train 13\n",
      "train 14\n",
      "train 16\n",
      "train 18\n",
      "train 19\n",
      "train 20\n",
      "train 21\n",
      "train 22\n",
      "train 23\n",
      "train 24\n",
      "train 25\n",
      "train 26\n",
      "train 27\n",
      "train 28\n",
      "train 29\n",
      "train 30\n",
      "train 31\n",
      "train 33\n",
      "train 34\n",
      "train 35\n",
      "train 36\n",
      "train 37\n",
      "train 41\n",
      "train 43\n",
      "train 44\n",
      "train 45\n",
      "train 46\n",
      "train 47\n",
      "train 48\n",
      "train 49\n",
      "train 50\n",
      "train 51\n",
      "train 52\n",
      "train 53\n",
      "train 54\n",
      "train 56\n",
      "train 61\n",
      "train 62\n",
      "train 63\n",
      "train 64\n",
      "train 65\n",
      "train 66\n",
      "train 67\n",
      "train 70\n",
      "train 71\n",
      "train 72\n",
      "train 73\n",
      "train 74\n",
      "train 75\n",
      "train 78\n",
      "train 79\n",
      "train 80\n",
      "train 81\n",
      "train 82\n",
      "train 84\n",
      "train 88\n",
      "train 89\n",
      "train 90\n",
      "train 91\n",
      "train 96\n",
      "train 97\n",
      "train 99\n",
      "train 100\n",
      "train 101\n",
      "train 102\n",
      "train 103\n",
      "train 104\n",
      "train 106\n",
      "train 107\n",
      "train 108\n",
      "train 109\n",
      "train 110\n",
      "train 111\n",
      "train 113\n",
      "train 114\n",
      "train 115\n",
      "train 116\n",
      "train 117\n",
      "train 119\n",
      "train 120\n",
      "train 121\n",
      "train 123\n",
      "train 124\n",
      "train 125\n",
      "train 126\n",
      "train 128\n",
      "train 129\n",
      "(97, 3) (97, 128)\n",
      "cv 2\n",
      "cv 17\n",
      "cv 32\n",
      "cv 38\n",
      "cv 40\n",
      "cv 57\n",
      "cv 59\n",
      "cv 60\n",
      "cv 76\n",
      "cv 83\n",
      "cv 92\n",
      "cv 95\n",
      "cv 98\n",
      "cv 105\n",
      "cv 122\n",
      "(15, 3) (15, 128)\n",
      "test 1\n",
      "test 15\n",
      "test 42\n",
      "test 55\n",
      "test 58\n",
      "test 68\n",
      "test 69\n",
      "test 77\n",
      "test 85\n",
      "test 86\n",
      "test 87\n",
      "test 93\n",
      "test 94\n",
      "test 112\n",
      "test 118\n",
      "test 127\n",
      "(16, 3) (16, 128)\n"
     ]
    }
   ],
   "source": [
    "x_p = {}\n",
    "y_p = {}\n",
    "\n",
    "for an in [\"train\", \"cv\", \"test\"]:\n",
    "    x_p[an] = np.zeros((len(inds[an]),3))\n",
    "    y_p[an] = np.zeros((len(inds[an]),128))\n",
    "\n",
    "    cntr = 0\n",
    "    for i in inds[an]:\n",
    "        print(an, i)\n",
    "        T = np.mean(y[\"Tprev\"][i], axis=0)\n",
    "\n",
    "        x_p[an][cntr,:] = x[\"Tprev\"][i]\n",
    "        y_p[an][cntr,:] = T\n",
    "        cntr += 1        \n",
    "    \n",
    "    print(x_p[an].shape, y_p[an].shape)\n",
    "\n",
    "\n",
    "\n",
    "with open(pre + 'x_p.pkl', 'wb') as file: \n",
    "    pickle.dump(x_p, file) \n",
    "with open(pre + 'y_p.pkl', 'wb') as file: \n",
    "    pickle.dump(y_p, file) "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "960cc6f5-de5e-4788-aaaa-65e06aa33a42",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.9"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}