Spaces:
Build error
Build error
Update app.py
Browse filesSetting up basemap testing
app.py
CHANGED
@@ -39,49 +39,61 @@ import datamapplot
|
|
39 |
import numpy as np
|
40 |
import requests
|
41 |
import io
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
42 |
|
43 |
|
44 |
|
45 |
-
def predict(text_input):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
46 |
file_name = f"{datetime.utcnow().strftime('%s')}.html"
|
47 |
file_path = static_dir / file_name
|
48 |
print(file_path)
|
49 |
|
50 |
-
|
51 |
-
data_map_file = requests.get(
|
52 |
-
f"{base_url}/raw/main/examples/arxiv_ml_data_map.npy"
|
53 |
-
)
|
54 |
-
arxivml_data_map = np.load(io.BytesIO(data_map_file.content))
|
55 |
-
arxivml_label_layers = []
|
56 |
-
for layer_num in range(5):
|
57 |
-
label_file = requests.get(
|
58 |
-
f"{base_url}/raw/interactive/examples/arxiv_ml_layer{layer_num}_cluster_labels.npy"
|
59 |
-
)
|
60 |
-
arxivml_label_layers.append(np.load(io.BytesIO(label_file.content), allow_pickle=True))
|
61 |
|
62 |
-
|
63 |
-
|
64 |
-
hover_data_file = requests.get(
|
65 |
-
f"{base_url}/raw/interactive/examples/arxiv_ml_hover_data.npy"
|
66 |
-
)
|
67 |
-
arxiv_hover_data = np.load(io.BytesIO(hover_data_file.content), allow_pickle=True)
|
68 |
-
|
69 |
|
70 |
plot = datamapplot.create_interactive_plot(
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
arxivml_label_layers[4],
|
75 |
-
hover_text = arxiv_hover_data,
|
76 |
font_family="Roboto Condensed",
|
77 |
)
|
78 |
|
79 |
-
|
80 |
-
|
81 |
plot.save(file_path)
|
82 |
|
83 |
-
|
84 |
-
|
85 |
iframe = f"""<iframe src="/static/{file_name}" width="100%" height="500px"></iframe>"""
|
86 |
link = f'<a href="/static/{file_name}" target="_blank">{file_name}</a>'
|
87 |
return link, iframe
|
@@ -102,6 +114,34 @@ The Gradio app generates dynamic HTML files and stores them in a static director
|
|
102 |
|
103 |
new_btn.click(fn=predict, inputs=[text_input], outputs=[markdown, html])
|
104 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
105 |
# mount Gradio app to FastAPI app
|
106 |
app = gr.mount_gradio_app(app, block, path="/")
|
107 |
|
|
|
39 |
import numpy as np
|
40 |
import requests
|
41 |
import io
|
42 |
+
import pandas as pd
|
43 |
+
from pyalex import Works, Authors, Sources, Institutions, Concepts, Publishers, Funders
|
44 |
+
from itertools import chain
|
45 |
+
from compress_pickle import load, dump
|
46 |
+
|
47 |
+
def query_records(search_term):
|
48 |
+
def invert_abstract(inv_index):
|
49 |
+
if inv_index is not None:
|
50 |
+
l_inv = [(w, p) for w, pos in inv_index.items() for p in pos]
|
51 |
+
return " ".join(map(lambda x: x[0], sorted(l_inv, key=lambda x: x[1])))
|
52 |
+
else:
|
53 |
+
return ' '
|
54 |
+
|
55 |
+
# Fetch records based on the search term
|
56 |
+
query = Works().search_filter(abstract=search_term)
|
57 |
+
|
58 |
+
records = []
|
59 |
+
for record in chain(*query.paginate(per_page=200)):
|
60 |
+
records.append(record)
|
61 |
+
|
62 |
+
records_df = pd.DataFrame(records)
|
63 |
+
records_df['abstract'] = [invert_abstract(t) for t in records_df['abstract_inverted_index']]
|
64 |
+
|
65 |
+
return records_df
|
66 |
+
|
67 |
|
68 |
|
69 |
|
70 |
+
def predict(text_input, progress=gr.Progress()):
|
71 |
+
|
72 |
+
# get data.
|
73 |
+
records_df = query_records(text_input)
|
74 |
+
print(records_df)
|
75 |
+
|
76 |
+
|
77 |
+
|
78 |
file_name = f"{datetime.utcnow().strftime('%s')}.html"
|
79 |
file_path = static_dir / file_name
|
80 |
print(file_path)
|
81 |
|
82 |
+
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
83 |
|
84 |
+
progress(0.7, desc="Loading hover data...")
|
|
|
|
|
|
|
|
|
|
|
|
|
85 |
|
86 |
plot = datamapplot.create_interactive_plot(
|
87 |
+
basedata_df[['x','y']].values,
|
88 |
+
np.array(basedata_df['cluster_1_labels']),
|
89 |
+
hover_text=[str(ix) + ', ' + str(row['parsed_publication']) + str(row['title']) for ix, row in basedata_df.iterrows()],
|
|
|
|
|
90 |
font_family="Roboto Condensed",
|
91 |
)
|
92 |
|
93 |
+
progress(0.9, desc="Saving plot...")
|
|
|
94 |
plot.save(file_path)
|
95 |
|
96 |
+
progress(1.0, desc="Done!")
|
|
|
97 |
iframe = f"""<iframe src="/static/{file_name}" width="100%" height="500px"></iframe>"""
|
98 |
link = f'<a href="/static/{file_name}" target="_blank">{file_name}</a>'
|
99 |
return link, iframe
|
|
|
114 |
|
115 |
new_btn.click(fn=predict, inputs=[text_input], outputs=[markdown, html])
|
116 |
|
117 |
+
|
118 |
+
|
119 |
+
|
120 |
+
def setup_basemap_data():
|
121 |
+
# get data.
|
122 |
+
print("getting basemap data...")
|
123 |
+
basedata_file= requests.get(
|
124 |
+
"https://www.maxnoichl.eu/full/oa_project_on_scimap_background_data/100k_filtered_OA_sample_cluster_and_positions.bz"
|
125 |
+
)
|
126 |
+
# Write the response content to a .bz file in the static directory
|
127 |
+
static_dir = Path("static")
|
128 |
+
static_dir.mkdir(exist_ok=True)
|
129 |
+
bz_file_name = "100k_filtered_OA_sample_cluster_and_positions.bz"
|
130 |
+
bz_file_path = static_dir / bz_file_name
|
131 |
+
|
132 |
+
with open(bz_file_path, "wb") as f:
|
133 |
+
f.write(basedata_file.content)
|
134 |
+
|
135 |
+
# Load the data from the saved .bz file
|
136 |
+
|
137 |
+
basedata_df = load(bz_file_path)
|
138 |
+
|
139 |
+
|
140 |
+
return basedata_df
|
141 |
+
|
142 |
+
basedata_df = setup_basemap_data()
|
143 |
+
|
144 |
+
|
145 |
# mount Gradio app to FastAPI app
|
146 |
app = gr.mount_gradio_app(app, block, path="/")
|
147 |
|