Spaces:

NealCaren
/

paragraphs

Runtime error

App Files Files Community

Neal Caren commited on Jul 31, 2022

Commit

032092a

•

1 Parent(s): fb8ce76

Removed old file loading

Browse files

Files changed (1) hide show

app.py +3 -12

app.py CHANGED Viewed

@@ -26,8 +26,6 @@ import pandas as pd
 @st.cache
 def load_data():
-    #df = pd.read_json('https://www.dropbox.com/s/82lwbaym3b1o6uq/passages.jsonl?raw=1', lines=True)
     url = "https://drive.google.com/uc?export=download&id=1nIBS9is8YCeiPBqA7MifVC5xeaKWH8uL"
     output = "passages.jsonl"
     gdown.download(url, output, quiet=False)
@@ -46,17 +44,17 @@ st.write('This project is a work-in-progress that searches the text of recently-
 with st.spinner(text="Loading data..."):
     df = load_data()
     passages = df['text'].values
 no_of_graphs=len(df)
 no_of_articles = len(df['cite'].value_counts())
 notes = f'''Notes:
 * I have found three types of searches that work best:
-     * Phrases or specific topics, such as "inequality in latin america", "race color skin tone measurement", "audit study experiment gender",  or "logistic regression or linear probability model".
      * Citations to well-known works, either using author year ("bourdieu 1984") or author idea ("Crenshaw intersectionality")
     * Questions: "What is a topic model?" or "How did Weber define bureaucracy?"
-* The search expands beyond exact matching, so "asia social movements" may return paragraphs on Asian-Americans politics and South Korean labor unions.
 * The first search can take up to 10 seconds as the files load. After that, it's quicker to respond.
 * The most relevant paragraph to your search is returned first, along with up to four other related paragraphs from that article.
 * The most relevant sentence within each paragraph, as determined by math, is bolded.
@@ -86,18 +84,11 @@ def sent_cross_load():
 @st.cache
 def load_embeddings():
-    #efs = [np.load(f'embeddings_{i}.pt.npy')  for i in range(0,5)]
-    #corpus_embeddings = np.concatenate(efs)
     url = "https://drive.google.com/uc?export=download&id=1z9eoBI07p_YtrdK1ZWZeCRT5T5mu5nhV"
     output = "embeddings.npy"
     gdown.download(url, output, quiet=False)
-    corpus_embeddings = np.load(output)
-    #response = requests.get("https://www.dropbox.com/s/px8kjdd3p5mzw6j/corpus_embeddings.pt.npy?raw=1")
-    #corpus_embeddings = np.load(io.BytesIO(response.content))
     return corpus_embeddings
 with st.spinner(text="Loading embeddings..."):

 @st.cache
 def load_data():
     url = "https://drive.google.com/uc?export=download&id=1nIBS9is8YCeiPBqA7MifVC5xeaKWH8uL"
     output = "passages.jsonl"
     gdown.download(url, output, quiet=False)
 with st.spinner(text="Loading data..."):
     df = load_data()
     passages = df['text'].values
 no_of_graphs=len(df)
 no_of_articles = len(df['cite'].value_counts())
 notes = f'''Notes:
 * I have found three types of searches that work best:
+     * Phrases or specific topics, such as "inequality in latin america", "race color skin tone measurement", "audit study experiment gender",  or "logistic regression or linear probability model".
      * Citations to well-known works, either using author year ("bourdieu 1984") or author idea ("Crenshaw intersectionality")
     * Questions: "What is a topic model?" or "How did Weber define bureaucracy?"
+* The search expands beyond exact matching, so "asia social movements" may return paragraphs on Asian-Americans politics and South Korean labor unions.
 * The first search can take up to 10 seconds as the files load. After that, it's quicker to respond.
 * The most relevant paragraph to your search is returned first, along with up to four other related paragraphs from that article.
 * The most relevant sentence within each paragraph, as determined by math, is bolded.
 @st.cache
 def load_embeddings():
     url = "https://drive.google.com/uc?export=download&id=1z9eoBI07p_YtrdK1ZWZeCRT5T5mu5nhV"
     output = "embeddings.npy"
     gdown.download(url, output, quiet=False)
     return corpus_embeddings
 with st.spinner(text="Loading embeddings..."):