tombryan commited on
Commit
447c93d
1 Parent(s): 01ef847

Updating UI

Browse files
Files changed (1) hide show
  1. app.py +41 -7
app.py CHANGED
@@ -9,7 +9,16 @@ from typing import List
9
 
10
  NER_MODEL_PATH = 'dell-research-harvard/historical_newspaper_ner'
11
  EMBED_MODEL_PATH = 'dell-research-harvard/same-story'
12
- AZURE_VM_ALABAMA = os.environ.get('AZURE_VM_ALABAMA')
 
 
 
 
 
 
 
 
 
13
 
14
 
15
  def find_sep_token(tokenizer):
@@ -113,26 +122,51 @@ def embed(text: str) -> List[str]:
113
 
114
  return embedding
115
 
116
- def query(sentence: str) -> List[str]:
117
  mask_results = ner_and_mask([sentence])
118
  embedding = embed(mask_results)
119
 
 
120
  assert embedding.shape == (1, 768)
121
  embedding = embedding[0].astype(np.float64)
122
  req = {"vector": list(embedding), 'nn': 5}
123
 
124
- # Send embedding to Azure VM
125
- response = requests.post(f"http://{AZURE_VM_ALABAMA}/retrieve", json = req)
 
 
 
 
 
126
  doc = response.json()
127
  article = doc['bboxes'][int(doc['article_id'])]
128
- return article['raw_text']
 
 
 
 
 
 
 
 
 
129
 
130
 
131
  if __name__ == "__main__":
132
  demo = gr.Interface(
133
  fn=query,
134
- inputs=["text"],
135
- outputs=["text"],
 
 
 
 
 
 
 
 
 
 
136
  )
137
 
138
  demo.launch()
 
9
 
10
  NER_MODEL_PATH = 'dell-research-harvard/historical_newspaper_ner'
11
  EMBED_MODEL_PATH = 'dell-research-harvard/same-story'
12
+
13
+ AZURE_VMS = {}
14
+ AVAILABLE_STATES = ['All States']
15
+ for k, v in os.environ.items():
16
+ if 'AZURE_VM' in k:
17
+ AZURE_VMS[k.split('_')[-1]] = v
18
+ AVAILABLE_STATES.append(k.split('_')[-1].capitalize())
19
+
20
+ AVAILABLE_YEARS = ['All Years']
21
+
22
 
23
 
24
  def find_sep_token(tokenizer):
 
122
 
123
  return embedding
124
 
125
+ def query(sentence: str, state: str, years: List[str]) -> List[str]:
126
  mask_results = ner_and_mask([sentence])
127
  embedding = embed(mask_results)
128
 
129
+
130
  assert embedding.shape == (1, 768)
131
  embedding = embedding[0].astype(np.float64)
132
  req = {"vector": list(embedding), 'nn': 5}
133
 
134
+ if state == 'All States':
135
+ pass
136
+ else:
137
+ vm_address = AZURE_VMS[state.upper()]
138
+ # Send embedding to Azure VM
139
+ response = requests.post(f"http://{vm_address}/retrieve", json = req)
140
+
141
  doc = response.json()
142
  article = doc['bboxes'][int(doc['article_id'])]
143
+
144
+ results = {
145
+ 'newspaper_name': doc['lccn']['title'],
146
+ 'location': doc['lccn']['dbpedia_ids'][0].replace('%2C_', ', '),
147
+ 'date': doc['scan']['date'],
148
+ 'article_text': article['raw_text'],
149
+ 'pdf_link': doc['scan']['jp2_url'].replace('jp2', 'pdf')
150
+ }
151
+
152
+ return results['newspaper_name'], results['location'], results['date'], results['article_text'], results['pdf_link']
153
 
154
 
155
  if __name__ == "__main__":
156
  demo = gr.Interface(
157
  fn=query,
158
+ inputs=[
159
+ gr.Textbox(lines=10, label="News Article"),
160
+ gr.Dropdown(AVAILABLE_STATES, label="States to Search"),
161
+ gr.CheckboxGroup(AVAILABLE_YEARS, label="Years to Search")
162
+ ],
163
+ outputs=[
164
+ gr.Textbox(label="Newspaper Name"),
165
+ gr.Textbox(label="Location"),
166
+ gr.Textbox(label="Date"),
167
+ gr.Textbox(lines = 10, label="Article Text OCR"),
168
+ gr.Textbox(label="PDF Link")
169
+ ]
170
  )
171
 
172
  demo.launch()