SaulLu commited on
Commit
2e8bd01
1 Parent(s): 4810cf9
Files changed (2) hide show
  1. app.py +16 -6
  2. dataset_configs.json +0 -0
app.py CHANGED
@@ -2,21 +2,21 @@ import os
2
  import pprint as pp
3
  from collections import OrderedDict, defaultdict
4
 
 
5
  import diff_viewer
6
  import pandas as pd
7
  import streamlit as st
8
  from datasets import load_dataset, get_dataset_config_names
9
 
10
- CHECK_DATASET_DIR_PATH_BEFORE_CLEAN_SELECT = os.getenv("CHECK_DATASET_DIR_PATH_BEFORE_CLEAN_SELECT")
11
- LOGS_DATASET_DIR_PATH_BEFORE_CLEAN_SELECT = os.getenv("LOGS_DATASET_DIR_PATH_BEFORE_CLEAN_SELECT")
12
- HF_API_TOKEN = os.getenv("HF_API_TOKEN")
13
-
14
  OPERATION_TYPES = [
15
  "Applied filter",
16
  "Applied deduplication function",
17
  "Applied map function",
18
  ]
19
- MAX_LEN_DS_CHECKS = os.getenv("MAX_LEN_DS_CHECKS")
20
 
21
 
22
  def get_ds(config):
@@ -261,9 +261,19 @@ st.set_page_config(page_title="Dataset explorer", page_icon=":hugging_face:", la
261
  st.write(
262
  "The purpose of this application is to sequentially view the changes made to a dataset."
263
  )
 
 
 
 
 
 
 
 
264
  col_option_clean, col_option_ds = st.columns(2)
265
 
266
- CHECK_CONFIGS = get_dataset_config_names(CHECK_DATASET_DIR_PATH_BEFORE_CLEAN_SELECT, use_auth_token=HF_API_TOKEN)
 
 
267
 
268
  CLEANING_VERSIONS = set()
269
  dataset_names = set()
 
2
  import pprint as pp
3
  from collections import OrderedDict, defaultdict
4
 
5
+ import json
6
  import diff_viewer
7
  import pandas as pd
8
  import streamlit as st
9
  from datasets import load_dataset, get_dataset_config_names
10
 
11
+ CHECK_DATASET_DIR_PATH_BEFORE_CLEAN_SELECT = st.secrets["CHECK_DATASET_DIR_PATH_BEFORE_CLEAN_SELECT"]
12
+ LOGS_DATASET_DIR_PATH_BEFORE_CLEAN_SELECT = st.secrets["LOGS_DATASET_DIR_PATH_BEFORE_CLEAN_SELECT"]
13
+ HF_API_TOKEN = st.secrets["HF_API_TOKEN"]
 
14
  OPERATION_TYPES = [
15
  "Applied filter",
16
  "Applied deduplication function",
17
  "Applied map function",
18
  ]
19
+ MAX_LEN_DS_CHECKS = st.secrets["MAX_LEN_DS_CHECKS"]
20
 
21
 
22
  def get_ds(config):
 
261
  st.write(
262
  "The purpose of this application is to sequentially view the changes made to a dataset."
263
  )
264
+
265
+
266
+ st.write(CHECK_DATASET_DIR_PATH_BEFORE_CLEAN_SELECT)
267
+ ds_log = load_dataset(LOGS_DATASET_DIR_PATH_BEFORE_CLEAN_SELECT, 'clean_v1_dsname_lm_en_multi_un_2', use_auth_token=HF_API_TOKEN)
268
+ st.write(ds_log)
269
+
270
+
271
+
272
  col_option_clean, col_option_ds = st.columns(2)
273
 
274
+ with open("dataset_configs.json", "r") as f:
275
+ CHECK_CONFIGS = json.load(f)
276
+ # CHECK_CONFIGS = get_dataset_config_names(CHECK_DATASET_DIR_PATH_BEFORE_CLEAN_SELECT, use_auth_token=HF_API_TOKEN)
277
 
278
  CLEANING_VERSIONS = set()
279
  dataset_names = set()
dataset_configs.json ADDED
The diff for this file is too large to render. See raw diff