Spaces:
Runtime error
Runtime error
Atharva
commited on
Commit
•
5bc0741
1
Parent(s):
5ae066c
pipeline update
Browse files- app.py +5 -4
- src/__init__.py +11 -14
app.py
CHANGED
@@ -2,7 +2,7 @@ import pandas as pd
|
|
2 |
import streamlit as st
|
3 |
from transformers import AutoModelForTokenClassification, AutoTokenizer, pipeline
|
4 |
|
5 |
-
from src import GBRT,
|
6 |
|
7 |
TYPE = {
|
8 |
'LOC': ' location',
|
@@ -52,11 +52,12 @@ def get_candidates(mentions_tags):
|
|
52 |
if (mention, tag) in cache.keys():
|
53 |
candidates.append((mention, cache[(mention, tag)]))
|
54 |
else:
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
cache[(mention, tag)] = cands
|
59 |
candidates.append((mention, cands))
|
|
|
60 |
return candidates
|
61 |
|
62 |
|
|
|
2 |
import streamlit as st
|
3 |
from transformers import AutoModelForTokenClassification, AutoTokenizer, pipeline
|
4 |
|
5 |
+
from src import GBRT, wikipedia_search, wikidata_search
|
6 |
|
7 |
TYPE = {
|
8 |
'LOC': ' location',
|
|
|
52 |
if (mention, tag) in cache.keys():
|
53 |
candidates.append((mention, cache[(mention, tag)]))
|
54 |
else:
|
55 |
+
cands = wikidata_search(mention, limit=3)
|
56 |
+
if cands == []:
|
57 |
+
cands = wikipedia_search(mention, limit=3)
|
58 |
cache[(mention, tag)] = cands
|
59 |
candidates.append((mention, cands))
|
60 |
+
print(mention, cands)
|
61 |
return candidates
|
62 |
|
63 |
|
src/__init__.py
CHANGED
@@ -106,22 +106,19 @@ def wikidata_search(query, limit=3):
|
|
106 |
return [i for i in candidates if is_disamb_page(i) == False]
|
107 |
|
108 |
|
109 |
-
def
|
110 |
-
service_url =
|
111 |
params = {
|
112 |
-
'
|
113 |
-
'
|
114 |
-
'
|
115 |
-
'
|
116 |
-
'
|
117 |
}
|
118 |
-
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
-
return [i.replace(' ', '_') for i in cands]
|
123 |
-
except:
|
124 |
-
return []
|
125 |
|
126 |
|
127 |
def get_entity_extract(entity_title, num_sentences=0):
|
|
|
106 |
return [i for i in candidates if is_disamb_page(i) == False]
|
107 |
|
108 |
|
109 |
+
def wikipedia_search(query, limit=3):
|
110 |
+
service_url = 'https://en.wikipedia.org/w/api.php'
|
111 |
params = {
|
112 |
+
'action': 'opensearch',
|
113 |
+
'search': query,
|
114 |
+
'namespace': 0,
|
115 |
+
'limit': limit,
|
116 |
+
'redirects': 'resolve',
|
117 |
}
|
118 |
+
|
119 |
+
results = requests.get(service_url, params=params).json()[1]
|
120 |
+
results = [i.replace(' ', '_') for i in results if 'disambiguation' not in i.lower()]
|
121 |
+
return [i for i in results if is_disamb_page(i) == False]
|
|
|
|
|
|
|
122 |
|
123 |
|
124 |
def get_entity_extract(entity_title, num_sentences=0):
|