Spaces:
Runtime error
Runtime error
Neal Caren
commited on
Commit
•
032092a
1
Parent(s):
fb8ce76
Removed old file loading
Browse files
app.py
CHANGED
@@ -26,8 +26,6 @@ import pandas as pd
|
|
26 |
|
27 |
@st.cache
|
28 |
def load_data():
|
29 |
-
#df = pd.read_json('https://www.dropbox.com/s/82lwbaym3b1o6uq/passages.jsonl?raw=1', lines=True)
|
30 |
-
|
31 |
url = "https://drive.google.com/uc?export=download&id=1nIBS9is8YCeiPBqA7MifVC5xeaKWH8uL"
|
32 |
output = "passages.jsonl"
|
33 |
gdown.download(url, output, quiet=False)
|
@@ -46,17 +44,17 @@ st.write('This project is a work-in-progress that searches the text of recently-
|
|
46 |
with st.spinner(text="Loading data..."):
|
47 |
df = load_data()
|
48 |
passages = df['text'].values
|
49 |
-
|
50 |
no_of_graphs=len(df)
|
51 |
no_of_articles = len(df['cite'].value_counts())
|
52 |
|
53 |
|
54 |
notes = f'''Notes:
|
55 |
* I have found three types of searches that work best:
|
56 |
-
* Phrases or specific topics, such as "inequality in latin america", "race color skin tone measurement", "audit study experiment gender", or "logistic regression or linear probability model".
|
57 |
* Citations to well-known works, either using author year ("bourdieu 1984") or author idea ("Crenshaw intersectionality")
|
58 |
* Questions: "What is a topic model?" or "How did Weber define bureaucracy?"
|
59 |
-
* The search expands beyond exact matching, so "asia social movements" may return paragraphs on Asian-Americans politics and South Korean labor unions.
|
60 |
* The first search can take up to 10 seconds as the files load. After that, it's quicker to respond.
|
61 |
* The most relevant paragraph to your search is returned first, along with up to four other related paragraphs from that article.
|
62 |
* The most relevant sentence within each paragraph, as determined by math, is bolded.
|
@@ -86,18 +84,11 @@ def sent_cross_load():
|
|
86 |
|
87 |
@st.cache
|
88 |
def load_embeddings():
|
89 |
-
#efs = [np.load(f'embeddings_{i}.pt.npy') for i in range(0,5)]
|
90 |
-
#corpus_embeddings = np.concatenate(efs)
|
91 |
-
|
92 |
url = "https://drive.google.com/uc?export=download&id=1z9eoBI07p_YtrdK1ZWZeCRT5T5mu5nhV"
|
93 |
output = "embeddings.npy"
|
94 |
gdown.download(url, output, quiet=False)
|
95 |
|
96 |
-
corpus_embeddings = np.load(output)
|
97 |
-
#response = requests.get("https://www.dropbox.com/s/px8kjdd3p5mzw6j/corpus_embeddings.pt.npy?raw=1")
|
98 |
-
#corpus_embeddings = np.load(io.BytesIO(response.content))
|
99 |
|
100 |
-
|
101 |
return corpus_embeddings
|
102 |
|
103 |
with st.spinner(text="Loading embeddings..."):
|
|
|
26 |
|
27 |
@st.cache
|
28 |
def load_data():
|
|
|
|
|
29 |
url = "https://drive.google.com/uc?export=download&id=1nIBS9is8YCeiPBqA7MifVC5xeaKWH8uL"
|
30 |
output = "passages.jsonl"
|
31 |
gdown.download(url, output, quiet=False)
|
|
|
44 |
with st.spinner(text="Loading data..."):
|
45 |
df = load_data()
|
46 |
passages = df['text'].values
|
47 |
+
|
48 |
no_of_graphs=len(df)
|
49 |
no_of_articles = len(df['cite'].value_counts())
|
50 |
|
51 |
|
52 |
notes = f'''Notes:
|
53 |
* I have found three types of searches that work best:
|
54 |
+
* Phrases or specific topics, such as "inequality in latin america", "race color skin tone measurement", "audit study experiment gender", or "logistic regression or linear probability model".
|
55 |
* Citations to well-known works, either using author year ("bourdieu 1984") or author idea ("Crenshaw intersectionality")
|
56 |
* Questions: "What is a topic model?" or "How did Weber define bureaucracy?"
|
57 |
+
* The search expands beyond exact matching, so "asia social movements" may return paragraphs on Asian-Americans politics and South Korean labor unions.
|
58 |
* The first search can take up to 10 seconds as the files load. After that, it's quicker to respond.
|
59 |
* The most relevant paragraph to your search is returned first, along with up to four other related paragraphs from that article.
|
60 |
* The most relevant sentence within each paragraph, as determined by math, is bolded.
|
|
|
84 |
|
85 |
@st.cache
|
86 |
def load_embeddings():
|
|
|
|
|
|
|
87 |
url = "https://drive.google.com/uc?export=download&id=1z9eoBI07p_YtrdK1ZWZeCRT5T5mu5nhV"
|
88 |
output = "embeddings.npy"
|
89 |
gdown.download(url, output, quiet=False)
|
90 |
|
|
|
|
|
|
|
91 |
|
|
|
92 |
return corpus_embeddings
|
93 |
|
94 |
with st.spinner(text="Loading embeddings..."):
|