Spaces:

Numerati
/

numerai-example-script

Runtime error

App Files Files Community

hellno-o commited on Dec 11, 2022

Commit

2501733

1 Parent(s): 44be985

try fix running in hf.co

Browse files

Files changed (1) hide show

app.py +6 -5

app.py CHANGED Viewed

@@ -61,9 +61,10 @@ def download_dataset():
 @st.cache
 def load_dataset(feature_set: str):
-    print('load_dataset with feature_set', feature_set)
     # read the feature metadata and get a feature set (or all the features)
-    with open("v4/features.json", "r") as f:
         feature_metadata = json.load(f)
     # features = list(feature_metadata["feature_stats"].keys()) # get all the features
     # features = feature_metadata["feature_sets"]["small"] # get the small
@@ -75,9 +76,9 @@ def load_dataset(feature_set: str):
     # note: sometimes when trying to read the downloaded data you get an error about invalid magic parquet bytes...
     # if so, delete the file and rerun the napi.download_dataset to fix the
     # corrupted file
-    training_data = pd.read_parquet('v4/train.parquet',
                                     columns=read_columns)
-    validation_data = pd.read_parquet('v4/validation.parquet',
                                       columns=read_columns)
     live_data = pd.read_parquet(f'v4/live_{current_round}.parquet',
                                 columns=read_columns)
@@ -215,7 +216,7 @@ def get_model_preds(model_name, *params):
     validation_data["prediction"].to_csv(validation_prediction_fname)
     live_data["prediction"].to_csv(f"live_predictions_{current_round}.csv")
-    validation_preds = pd.read_parquet('v4/validation_example_preds.parquet')
     validation_data[EXAMPLE_PREDS_COL] = validation_preds["prediction"]
     # get some stats about each of our models to compare...

 @st.cache
 def load_dataset(feature_set: str):
+    dataset_path = get_dataset_path()
+    print(f'load_dataset with feature_set {feature_set} and path {dataset_path}')
     # read the feature metadata and get a feature set (or all the features)
+    with open(f"{dataset_path}/features.json", "r") as f:
         feature_metadata = json.load(f)
     # features = list(feature_metadata["feature_stats"].keys()) # get all the features
     # features = feature_metadata["feature_sets"]["small"] # get the small
     # note: sometimes when trying to read the downloaded data you get an error about invalid magic parquet bytes...
     # if so, delete the file and rerun the napi.download_dataset to fix the
     # corrupted file
+    training_data = pd.read_parquet(f'{dataset_path}/train.parquet',
                                     columns=read_columns)
+    validation_data = pd.read_parquet(f'{dataset_path}/validation.parquet',
                                       columns=read_columns)
     live_data = pd.read_parquet(f'v4/live_{current_round}.parquet',
                                 columns=read_columns)
     validation_data["prediction"].to_csv(validation_prediction_fname)
     live_data["prediction"].to_csv(f"live_predictions_{current_round}.csv")
+    validation_preds = pd.read_parquet(f'{get_dataset_path()}/validation_example_preds.parquet')
     validation_data[EXAMPLE_PREDS_COL] = validation_preds["prediction"]
     # get some stats about each of our models to compare...