Spaces:
Runtime error
Runtime error
Commit
·
d057d13
1
Parent(s):
83247fa
use token
Browse files
app.py
CHANGED
@@ -10,6 +10,7 @@ import time
|
|
10 |
from annotated_text import annotated_text
|
11 |
|
12 |
ORG_ID = "cornell-authorship"
|
|
|
13 |
|
14 |
@st.cache
|
15 |
def preprocess_text(s):
|
@@ -17,7 +18,7 @@ def preprocess_text(s):
|
|
17 |
|
18 |
@st.cache
|
19 |
def get_pairwise_distances(model):
|
20 |
-
dataset = load_dataset(f"{ORG_ID}/{model}_distance", use_auth_token=
|
21 |
df = pd.DataFrame(dataset).set_index('index')
|
22 |
return df
|
23 |
|
@@ -32,7 +33,7 @@ def get_pairwise_distances_chunked(model, chunk):
|
|
32 |
@st.cache
|
33 |
def get_query_strings():
|
34 |
# df = pd.read_json(hf_hub_download(repo_id=repo_id, filename="IUR_Reddit_test_queries_english.jsonl"), lines = True)
|
35 |
-
dataset = load_dataset(f"{ORG_ID}/IUR_Reddit_test_queries_english", use_auth_token=
|
36 |
df = pd.DataFrame(dataset)
|
37 |
df['index'] = df.reset_index().index
|
38 |
return df
|
@@ -44,7 +45,7 @@ def get_query_strings():
|
|
44 |
@st.cache
|
45 |
def get_candidate_strings():
|
46 |
# df = pd.read_json(f"{ASSETS_PATH}/IUR_Reddit_test_candidates_english.jsonl", lines = True)
|
47 |
-
dataset = load_dataset(f"{ORG_ID}/IUR_Reddit_test_candidates_english", use_auth_token=
|
48 |
df = pd.DataFrame(dataset)
|
49 |
df['index'] = df.reset_index().index
|
50 |
return df
|
@@ -55,7 +56,7 @@ def get_candidate_strings():
|
|
55 |
@st.cache
|
56 |
def get_embedding_dataset(model):
|
57 |
# data = load_from_disk(f"{ASSETS_PATH}/{model}/embedding")
|
58 |
-
data = load_dataset(f"{ORG_ID}/{model}_embedding", use_auth_token=
|
59 |
return data
|
60 |
|
61 |
@st.cache
|
|
|
10 |
from annotated_text import annotated_text
|
11 |
|
12 |
ORG_ID = "cornell-authorship"
|
13 |
+
TOKEN = st.secrets["token"]
|
14 |
|
15 |
@st.cache
|
16 |
def preprocess_text(s):
|
|
|
18 |
|
19 |
@st.cache
|
20 |
def get_pairwise_distances(model):
|
21 |
+
dataset = load_dataset(f"{ORG_ID}/{model}_distance", use_auth_token=TOKEN)["train"]
|
22 |
df = pd.DataFrame(dataset).set_index('index')
|
23 |
return df
|
24 |
|
|
|
33 |
@st.cache
|
34 |
def get_query_strings():
|
35 |
# df = pd.read_json(hf_hub_download(repo_id=repo_id, filename="IUR_Reddit_test_queries_english.jsonl"), lines = True)
|
36 |
+
dataset = load_dataset(f"{ORG_ID}/IUR_Reddit_test_queries_english", use_auth_token=TOKEN)["train"]
|
37 |
df = pd.DataFrame(dataset)
|
38 |
df['index'] = df.reset_index().index
|
39 |
return df
|
|
|
45 |
@st.cache
|
46 |
def get_candidate_strings():
|
47 |
# df = pd.read_json(f"{ASSETS_PATH}/IUR_Reddit_test_candidates_english.jsonl", lines = True)
|
48 |
+
dataset = load_dataset(f"{ORG_ID}/IUR_Reddit_test_candidates_english", use_auth_token=TOKEN)["train"]
|
49 |
df = pd.DataFrame(dataset)
|
50 |
df['index'] = df.reset_index().index
|
51 |
return df
|
|
|
56 |
@st.cache
|
57 |
def get_embedding_dataset(model):
|
58 |
# data = load_from_disk(f"{ASSETS_PATH}/{model}/embedding")
|
59 |
+
data = load_dataset(f"{ORG_ID}/{model}_embedding", use_auth_token=TOKEN)
|
60 |
return data
|
61 |
|
62 |
@st.cache
|