In [1]:
import os 
import torch
import numpy as np
from tabulate import tabulate
import random
from matplotlib import pyplot as plt
import pickle
from scipy.signal import find_peaks

In [2]:
pre = "../data/"

### Load the data

In [3]:
x = {}
y = {}
var_vec = ["Tprev"]
for var in var_vec:
 x[var] = {}
 y[var] = {}
 
for var in var_vec:
 with open(pre + 'x_' + var + '.pkl', 'rb') as file: 
 x[var] = pickle.load(file) 
 with open(pre + 'y_' + var + '.pkl', 'rb') as file: 
 y[var] = pickle.load(file) 
 

### Identify train/cv/test

In [4]:
sims = torch.load(pre + "/sims.pt")

extrapolation_sims = []
interpolation_sims = []
for si, sim in enumerate(sims):
 if si !=39 and si!=8: # and (si==0 or si==100 or si==120):
 #print(tabulate([["num", "dataset", "raq", "fkt", "fkp", "gr", "ar"],
 # sim[:-1]
 # ]))
 ignr, ignr, raq, fkt, fkp, gr, ar, ignr = sim

 #if (fkt < 5e+5 or fkt > 5e+8) and (fkp < 15 or fkp > 85) and (raq < 1.5 or raq > 8.5):
 if (fkt > 5e+9) or (fkp > 95) or (raq > 9.5):
 extrapolation_sims.append(si)
 else:
 interpolation_sims.append(si)

random.seed(1992)
inds = {}
inds["test"] = extrapolation_sims #+ random.choices(interpolation_sims, k=9)

remain_inds = []
for inp in interpolation_sims:
 if inp not in inds["test"]:
 remain_inds.append(inp)
 
inds["cv"] = random.choices(remain_inds, k=16)

inds["train"] = []
for inp in remain_inds:
 if inp not in inds["test"] and inp not in inds["cv"]:
 inds["train"].append(inp)

inds["train"] = np.unique(inds["train"])
inds["cv"] = np.unique(inds["cv"])
inds["test"] = np.unique(inds["test"])

print(len(inds["train"]))#, sorted(inds["train"]))
print(len(inds["cv"]))#, sorted(inds["cv"]))
print(len(inds["test"]))#, sorted(inds["test"]))

97
15
16


### Write simulation parameters 

In [5]:
sims_table = [["Simulation", "Dataset", "RaQ/Ra", "FKT", "FKV"]] 

for sim in sims:
 if sim[0] in inds["train"]:
 an = "train"
 elif sim[0] in inds["cv"]:
 an = "cv"
 elif sim[0] in inds["test"]:
 an = "test"
 sims_table.append([sim[0], an, sim[2], sim[3], sim[4]])

with open('../inputs/simulations.txt', 'w') as f:
 f.write(tabulate(sims_table))

with open('../Paper/simulations.pkl', 'wb') as f:
 pickle.dump(sims_table, f)

### Pointwise input preparation

In [6]:
x_pointwise = {}
y_pointwise = {}

y_prof = torch.load(pre + "/y_prof.pt").flatten().numpy()

y_prof = y_prof[::-1]
y_new = np.sort(np.concatenate((np.linspace(1,y_prof[15],100), 
 y_prof, np.linspace(y_prof[-10],y_prof[-1],50)), axis=0))[::-1]


for an in ["train", "cv", "test"]:
 x_pointwise[an] = np.zeros((len(inds[an])*y_new.shape[0], 4))
 y_pointwise[an] = np.zeros((len(inds[an])*y_new.shape[0], 1))
 

 cntr = 0
 
 for i in inds[an]:
 #print(an, i)
 #u = y["uprev"][i]*20
 #v = y["vprev"][i]*20
 #vmag = np.sqrt(u[-50:,:]**2 + v[-50:,:]**2)
 #vmag = np.mean(vmag, axis=0)
 T = np.mean(y["Tprev"][i], axis=0)
 T_new = np.interp(y_new, y_prof[::-1], T[::-1])

 #plt.figure()
 #plt.plot(T, y_prof)
 #plt.plot(T_new, y_new, 'kx')
 #plt.ylim([1,0])
 #plt.show()

 for j in range(y_new.shape[0]):
 x_pointwise[an][cntr,:3] = x["Tprev"][i]
 x_pointwise[an][cntr,3:4] = y_new[j]
 
 y_pointwise[an][cntr,0] = T_new[j]
 cntr += 1 
 print(x_pointwise[an].shape, y_pointwise[an].shape)



with open(pre + 'x_pointwise.pkl', 'wb') as file: 
 pickle.dump(x_pointwise, file) 
with open(pre + 'y_pointwise.pkl', 'wb') as file: 
 pickle.dump(y_pointwise, file) 


for an in ["train", "cv", "test"]:
 x_pointwise[an] = np.zeros((len(inds[an])*y_prof.shape[0], 4))
 y_pointwise[an] = np.zeros((len(inds[an])*y_prof.shape[0], 1))
 
 cntr = 0
 
 for i in inds[an]:
 T = np.mean(y["Tprev"][i], axis=0)

 for j in range(y_prof.shape[0]):
 x_pointwise[an][cntr,:3] = x["Tprev"][i]
 x_pointwise[an][cntr,3:4] = y_prof[j]
 
 y_pointwise[an][cntr,0] = T[j]
 cntr += 1 
 print(x_pointwise[an].shape, y_pointwise[an].shape)

with open(pre + 'x_pointwise_orgres.pkl', 'wb') as file: 
 pickle.dump(x_pointwise, file) 
with open(pre + 'y_pointwise_orgres.pkl', 'wb') as file: 
 pickle.dump(y_pointwise, file) 

(26966, 4) (26966, 1)
(4170, 4) (4170, 1)
(4448, 4) (4448, 1)
(12416, 4) (12416, 1)
(1920, 4) (1920, 1)
(2048, 4) (2048, 1)


### Full profile input preparation

In [9]:
x_p = {}
y_p = {}

for an in ["train", "cv", "test"]:
 x_p[an] = np.zeros((len(inds[an]),3))
 y_p[an] = np.zeros((len(inds[an]),128))

 cntr = 0
 for i in inds[an]:
 print(an, i)
 T = np.mean(y["Tprev"][i], axis=0)

 x_p[an][cntr,:] = x["Tprev"][i]
 y_p[an][cntr,:] = T
 cntr += 1 
 
 print(x_p[an].shape, y_p[an].shape)



with open(pre + 'x_p.pkl', 'wb') as file: 
 pickle.dump(x_p, file) 
with open(pre + 'y_p.pkl', 'wb') as file: 
 pickle.dump(y_p, file) 

train 0
train 3
train 4
train 5
train 6
train 7
train 9
train 10
train 11
train 12
train 13
train 14
train 16
train 18
train 19
train 20
train 21
train 22
train 23
train 24
train 25
train 26
train 27
train 28
train 29
train 30
train 31
train 33
train 34
train 35
train 36
train 37
train 41
train 43
train 44
train 45
train 46
train 47
train 48
train 49
train 50
train 51
train 52
train 53
train 54
train 56
train 61
train 62
train 63
train 64
train 65
train 66
train 67
train 70
train 71
train 72
train 73
train 74
train 75
train 78
train 79
train 80
train 81
train 82
train 84
train 88
train 89
train 90
train 91
train 96
train 97
train 99
train 100
train 101
train 102
train 103
train 104
train 106
train 107
train 108
train 109
train 110
train 111
train 113
train 114
train 115
train 116
train 117
train 119
train 120
train 121
train 123
train 124
train 125
train 126
train 128
train 129
(97, 3) (97, 128)
cv 2
cv 17
cv 32
cv 38
cv 40
cv 57
cv 59
cv 60
cv 76
cv 83
cv 92
cv 95
cv 98
cv 105
cv 122
