Updating UI
Browse files
app.py
CHANGED
@@ -9,7 +9,16 @@ from typing import List
|
|
9 |
|
10 |
NER_MODEL_PATH = 'dell-research-harvard/historical_newspaper_ner'
|
11 |
EMBED_MODEL_PATH = 'dell-research-harvard/same-story'
|
12 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
13 |
|
14 |
|
15 |
def find_sep_token(tokenizer):
|
@@ -113,26 +122,51 @@ def embed(text: str) -> List[str]:
|
|
113 |
|
114 |
return embedding
|
115 |
|
116 |
-
def query(sentence: str) -> List[str]:
|
117 |
mask_results = ner_and_mask([sentence])
|
118 |
embedding = embed(mask_results)
|
119 |
|
|
|
120 |
assert embedding.shape == (1, 768)
|
121 |
embedding = embedding[0].astype(np.float64)
|
122 |
req = {"vector": list(embedding), 'nn': 5}
|
123 |
|
124 |
-
|
125 |
-
|
|
|
|
|
|
|
|
|
|
|
126 |
doc = response.json()
|
127 |
article = doc['bboxes'][int(doc['article_id'])]
|
128 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
129 |
|
130 |
|
131 |
if __name__ == "__main__":
|
132 |
demo = gr.Interface(
|
133 |
fn=query,
|
134 |
-
inputs=[
|
135 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
136 |
)
|
137 |
|
138 |
demo.launch()
|
|
|
9 |
|
10 |
NER_MODEL_PATH = 'dell-research-harvard/historical_newspaper_ner'
|
11 |
EMBED_MODEL_PATH = 'dell-research-harvard/same-story'
|
12 |
+
|
13 |
+
AZURE_VMS = {}
|
14 |
+
AVAILABLE_STATES = ['All States']
|
15 |
+
for k, v in os.environ.items():
|
16 |
+
if 'AZURE_VM' in k:
|
17 |
+
AZURE_VMS[k.split('_')[-1]] = v
|
18 |
+
AVAILABLE_STATES.append(k.split('_')[-1].capitalize())
|
19 |
+
|
20 |
+
AVAILABLE_YEARS = ['All Years']
|
21 |
+
|
22 |
|
23 |
|
24 |
def find_sep_token(tokenizer):
|
|
|
122 |
|
123 |
return embedding
|
124 |
|
125 |
+
def query(sentence: str, state: str, years: List[str]) -> List[str]:
|
126 |
mask_results = ner_and_mask([sentence])
|
127 |
embedding = embed(mask_results)
|
128 |
|
129 |
+
|
130 |
assert embedding.shape == (1, 768)
|
131 |
embedding = embedding[0].astype(np.float64)
|
132 |
req = {"vector": list(embedding), 'nn': 5}
|
133 |
|
134 |
+
if state == 'All States':
|
135 |
+
pass
|
136 |
+
else:
|
137 |
+
vm_address = AZURE_VMS[state.upper()]
|
138 |
+
# Send embedding to Azure VM
|
139 |
+
response = requests.post(f"http://{vm_address}/retrieve", json = req)
|
140 |
+
|
141 |
doc = response.json()
|
142 |
article = doc['bboxes'][int(doc['article_id'])]
|
143 |
+
|
144 |
+
results = {
|
145 |
+
'newspaper_name': doc['lccn']['title'],
|
146 |
+
'location': doc['lccn']['dbpedia_ids'][0].replace('%2C_', ', '),
|
147 |
+
'date': doc['scan']['date'],
|
148 |
+
'article_text': article['raw_text'],
|
149 |
+
'pdf_link': doc['scan']['jp2_url'].replace('jp2', 'pdf')
|
150 |
+
}
|
151 |
+
|
152 |
+
return results['newspaper_name'], results['location'], results['date'], results['article_text'], results['pdf_link']
|
153 |
|
154 |
|
155 |
if __name__ == "__main__":
|
156 |
demo = gr.Interface(
|
157 |
fn=query,
|
158 |
+
inputs=[
|
159 |
+
gr.Textbox(lines=10, label="News Article"),
|
160 |
+
gr.Dropdown(AVAILABLE_STATES, label="States to Search"),
|
161 |
+
gr.CheckboxGroup(AVAILABLE_YEARS, label="Years to Search")
|
162 |
+
],
|
163 |
+
outputs=[
|
164 |
+
gr.Textbox(label="Newspaper Name"),
|
165 |
+
gr.Textbox(label="Location"),
|
166 |
+
gr.Textbox(label="Date"),
|
167 |
+
gr.Textbox(lines = 10, label="Article Text OCR"),
|
168 |
+
gr.Textbox(label="PDF Link")
|
169 |
+
]
|
170 |
)
|
171 |
|
172 |
demo.launch()
|