File size: 760 Bytes
dcfa366
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
#!/usr/bin/env python
# coding: utf-8

# In[ ]:


from numerapi import NumerAPI
import os
import pandas as pd
import numpy as np


# In[ ]:


napi = NumerAPI()
data_path='./data/'


# In[ ]:


napi.download_dataset("v4.2/validation_int8.parquet", data_path+"validation_int8.parquet")
validation_data=pd.read_parquet(data_path+"validation_int8.parquet")
recent_eras=list(validation_data.loc[validation_data['data_type']=='validation']['era'].unique()[-2:])
validation_subset=validation_data[validation_data['era'].isin(recent_eras)]
validation_subset.to_parquet(data_path+"validation_subset_int8.parquet",index=False)


# In[ ]:


print("Now please copy the file to server via: scp ../../data/validation_subset_int8.parquet gms@gms1:/home/gms/numerai/data/.")