friendshipkim commited on
Commit
d057d13
·
1 Parent(s): 83247fa
Files changed (1) hide show
  1. app.py +5 -4
app.py CHANGED
@@ -10,6 +10,7 @@ import time
10
  from annotated_text import annotated_text
11
 
12
  ORG_ID = "cornell-authorship"
 
13
 
14
  @st.cache
15
  def preprocess_text(s):
@@ -17,7 +18,7 @@ def preprocess_text(s):
17
 
18
  @st.cache
19
  def get_pairwise_distances(model):
20
- dataset = load_dataset(f"{ORG_ID}/{model}_distance", use_auth_token=True)["train"]
21
  df = pd.DataFrame(dataset).set_index('index')
22
  return df
23
 
@@ -32,7 +33,7 @@ def get_pairwise_distances_chunked(model, chunk):
32
  @st.cache
33
  def get_query_strings():
34
  # df = pd.read_json(hf_hub_download(repo_id=repo_id, filename="IUR_Reddit_test_queries_english.jsonl"), lines = True)
35
- dataset = load_dataset(f"{ORG_ID}/IUR_Reddit_test_queries_english", use_auth_token=True)["train"]
36
  df = pd.DataFrame(dataset)
37
  df['index'] = df.reset_index().index
38
  return df
@@ -44,7 +45,7 @@ def get_query_strings():
44
  @st.cache
45
  def get_candidate_strings():
46
  # df = pd.read_json(f"{ASSETS_PATH}/IUR_Reddit_test_candidates_english.jsonl", lines = True)
47
- dataset = load_dataset(f"{ORG_ID}/IUR_Reddit_test_candidates_english", use_auth_token=True)["train"]
48
  df = pd.DataFrame(dataset)
49
  df['index'] = df.reset_index().index
50
  return df
@@ -55,7 +56,7 @@ def get_candidate_strings():
55
  @st.cache
56
  def get_embedding_dataset(model):
57
  # data = load_from_disk(f"{ASSETS_PATH}/{model}/embedding")
58
- data = load_dataset(f"{ORG_ID}/{model}_embedding", use_auth_token=True)
59
  return data
60
 
61
  @st.cache
 
10
  from annotated_text import annotated_text
11
 
12
  ORG_ID = "cornell-authorship"
13
+ TOKEN = st.secrets["token"]
14
 
15
  @st.cache
16
  def preprocess_text(s):
 
18
 
19
  @st.cache
20
  def get_pairwise_distances(model):
21
+ dataset = load_dataset(f"{ORG_ID}/{model}_distance", use_auth_token=TOKEN)["train"]
22
  df = pd.DataFrame(dataset).set_index('index')
23
  return df
24
 
 
33
  @st.cache
34
  def get_query_strings():
35
  # df = pd.read_json(hf_hub_download(repo_id=repo_id, filename="IUR_Reddit_test_queries_english.jsonl"), lines = True)
36
+ dataset = load_dataset(f"{ORG_ID}/IUR_Reddit_test_queries_english", use_auth_token=TOKEN)["train"]
37
  df = pd.DataFrame(dataset)
38
  df['index'] = df.reset_index().index
39
  return df
 
45
  @st.cache
46
  def get_candidate_strings():
47
  # df = pd.read_json(f"{ASSETS_PATH}/IUR_Reddit_test_candidates_english.jsonl", lines = True)
48
+ dataset = load_dataset(f"{ORG_ID}/IUR_Reddit_test_candidates_english", use_auth_token=TOKEN)["train"]
49
  df = pd.DataFrame(dataset)
50
  df['index'] = df.reset_index().index
51
  return df
 
56
  @st.cache
57
  def get_embedding_dataset(model):
58
  # data = load_from_disk(f"{ASSETS_PATH}/{model}/embedding")
59
+ data = load_dataset(f"{ORG_ID}/{model}_embedding", use_auth_token=TOKEN)
60
  return data
61
 
62
  @st.cache