Neal Caren commited on
Commit
032092a
1 Parent(s): fb8ce76

Removed old file loading

Browse files
Files changed (1) hide show
  1. app.py +3 -12
app.py CHANGED
@@ -26,8 +26,6 @@ import pandas as pd
26
 
27
  @st.cache
28
  def load_data():
29
- #df = pd.read_json('https://www.dropbox.com/s/82lwbaym3b1o6uq/passages.jsonl?raw=1', lines=True)
30
-
31
  url = "https://drive.google.com/uc?export=download&id=1nIBS9is8YCeiPBqA7MifVC5xeaKWH8uL"
32
  output = "passages.jsonl"
33
  gdown.download(url, output, quiet=False)
@@ -46,17 +44,17 @@ st.write('This project is a work-in-progress that searches the text of recently-
46
  with st.spinner(text="Loading data..."):
47
  df = load_data()
48
  passages = df['text'].values
49
-
50
  no_of_graphs=len(df)
51
  no_of_articles = len(df['cite'].value_counts())
52
 
53
 
54
  notes = f'''Notes:
55
  * I have found three types of searches that work best:
56
- * Phrases or specific topics, such as "inequality in latin america", "race color skin tone measurement", "audit study experiment gender", or "logistic regression or linear probability model".
57
  * Citations to well-known works, either using author year ("bourdieu 1984") or author idea ("Crenshaw intersectionality")
58
  * Questions: "What is a topic model?" or "How did Weber define bureaucracy?"
59
- * The search expands beyond exact matching, so "asia social movements" may return paragraphs on Asian-Americans politics and South Korean labor unions.
60
  * The first search can take up to 10 seconds as the files load. After that, it's quicker to respond.
61
  * The most relevant paragraph to your search is returned first, along with up to four other related paragraphs from that article.
62
  * The most relevant sentence within each paragraph, as determined by math, is bolded.
@@ -86,18 +84,11 @@ def sent_cross_load():
86
 
87
  @st.cache
88
  def load_embeddings():
89
- #efs = [np.load(f'embeddings_{i}.pt.npy') for i in range(0,5)]
90
- #corpus_embeddings = np.concatenate(efs)
91
-
92
  url = "https://drive.google.com/uc?export=download&id=1z9eoBI07p_YtrdK1ZWZeCRT5T5mu5nhV"
93
  output = "embeddings.npy"
94
  gdown.download(url, output, quiet=False)
95
 
96
- corpus_embeddings = np.load(output)
97
- #response = requests.get("https://www.dropbox.com/s/px8kjdd3p5mzw6j/corpus_embeddings.pt.npy?raw=1")
98
- #corpus_embeddings = np.load(io.BytesIO(response.content))
99
 
100
-
101
  return corpus_embeddings
102
 
103
  with st.spinner(text="Loading embeddings..."):
 
26
 
27
  @st.cache
28
  def load_data():
 
 
29
  url = "https://drive.google.com/uc?export=download&id=1nIBS9is8YCeiPBqA7MifVC5xeaKWH8uL"
30
  output = "passages.jsonl"
31
  gdown.download(url, output, quiet=False)
 
44
  with st.spinner(text="Loading data..."):
45
  df = load_data()
46
  passages = df['text'].values
47
+
48
  no_of_graphs=len(df)
49
  no_of_articles = len(df['cite'].value_counts())
50
 
51
 
52
  notes = f'''Notes:
53
  * I have found three types of searches that work best:
54
+ * Phrases or specific topics, such as "inequality in latin america", "race color skin tone measurement", "audit study experiment gender", or "logistic regression or linear probability model".
55
  * Citations to well-known works, either using author year ("bourdieu 1984") or author idea ("Crenshaw intersectionality")
56
  * Questions: "What is a topic model?" or "How did Weber define bureaucracy?"
57
+ * The search expands beyond exact matching, so "asia social movements" may return paragraphs on Asian-Americans politics and South Korean labor unions.
58
  * The first search can take up to 10 seconds as the files load. After that, it's quicker to respond.
59
  * The most relevant paragraph to your search is returned first, along with up to four other related paragraphs from that article.
60
  * The most relevant sentence within each paragraph, as determined by math, is bolded.
 
84
 
85
  @st.cache
86
  def load_embeddings():
 
 
 
87
  url = "https://drive.google.com/uc?export=download&id=1z9eoBI07p_YtrdK1ZWZeCRT5T5mu5nhV"
88
  output = "embeddings.npy"
89
  gdown.download(url, output, quiet=False)
90
 
 
 
 
91
 
 
92
  return corpus_embeddings
93
 
94
  with st.spinner(text="Loading embeddings..."):