m7n commited on
Commit
955747f
·
verified ·
1 Parent(s): 05560e1

Update app.py

Browse files

Setting up basemap testing

Files changed (1) hide show
  1. app.py +68 -28
app.py CHANGED
@@ -39,49 +39,61 @@ import datamapplot
39
  import numpy as np
40
  import requests
41
  import io
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
 
43
 
44
 
45
- def predict(text_input):
 
 
 
 
 
 
 
46
  file_name = f"{datetime.utcnow().strftime('%s')}.html"
47
  file_path = static_dir / file_name
48
  print(file_path)
49
 
50
- base_url = "https://github.com/TutteInstitute/datamapplot"
51
- data_map_file = requests.get(
52
- f"{base_url}/raw/main/examples/arxiv_ml_data_map.npy"
53
- )
54
- arxivml_data_map = np.load(io.BytesIO(data_map_file.content))
55
- arxivml_label_layers = []
56
- for layer_num in range(5):
57
- label_file = requests.get(
58
- f"{base_url}/raw/interactive/examples/arxiv_ml_layer{layer_num}_cluster_labels.npy"
59
- )
60
- arxivml_label_layers.append(np.load(io.BytesIO(label_file.content), allow_pickle=True))
61
 
62
-
63
-
64
- hover_data_file = requests.get(
65
- f"{base_url}/raw/interactive/examples/arxiv_ml_hover_data.npy"
66
- )
67
- arxiv_hover_data = np.load(io.BytesIO(hover_data_file.content), allow_pickle=True)
68
-
69
 
70
  plot = datamapplot.create_interactive_plot(
71
- arxivml_data_map,
72
- arxivml_label_layers[0],
73
- arxivml_label_layers[2],
74
- arxivml_label_layers[4],
75
- hover_text = arxiv_hover_data,
76
  font_family="Roboto Condensed",
77
  )
78
 
79
-
80
-
81
  plot.save(file_path)
82
 
83
-
84
-
85
  iframe = f"""<iframe src="/static/{file_name}" width="100%" height="500px"></iframe>"""
86
  link = f'<a href="/static/{file_name}" target="_blank">{file_name}</a>'
87
  return link, iframe
@@ -102,6 +114,34 @@ The Gradio app generates dynamic HTML files and stores them in a static director
102
 
103
  new_btn.click(fn=predict, inputs=[text_input], outputs=[markdown, html])
104
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
105
  # mount Gradio app to FastAPI app
106
  app = gr.mount_gradio_app(app, block, path="/")
107
 
 
39
  import numpy as np
40
  import requests
41
  import io
42
+ import pandas as pd
43
+ from pyalex import Works, Authors, Sources, Institutions, Concepts, Publishers, Funders
44
+ from itertools import chain
45
+ from compress_pickle import load, dump
46
+
47
+ def query_records(search_term):
48
+ def invert_abstract(inv_index):
49
+ if inv_index is not None:
50
+ l_inv = [(w, p) for w, pos in inv_index.items() for p in pos]
51
+ return " ".join(map(lambda x: x[0], sorted(l_inv, key=lambda x: x[1])))
52
+ else:
53
+ return ' '
54
+
55
+ # Fetch records based on the search term
56
+ query = Works().search_filter(abstract=search_term)
57
+
58
+ records = []
59
+ for record in chain(*query.paginate(per_page=200)):
60
+ records.append(record)
61
+
62
+ records_df = pd.DataFrame(records)
63
+ records_df['abstract'] = [invert_abstract(t) for t in records_df['abstract_inverted_index']]
64
+
65
+ return records_df
66
+
67
 
68
 
69
 
70
+ def predict(text_input, progress=gr.Progress()):
71
+
72
+ # get data.
73
+ records_df = query_records(text_input)
74
+ print(records_df)
75
+
76
+
77
+
78
  file_name = f"{datetime.utcnow().strftime('%s')}.html"
79
  file_path = static_dir / file_name
80
  print(file_path)
81
 
82
+ #
 
 
 
 
 
 
 
 
 
 
83
 
84
+ progress(0.7, desc="Loading hover data...")
 
 
 
 
 
 
85
 
86
  plot = datamapplot.create_interactive_plot(
87
+ basedata_df[['x','y']].values,
88
+ np.array(basedata_df['cluster_1_labels']),
89
+ hover_text=[str(ix) + ', ' + str(row['parsed_publication']) + str(row['title']) for ix, row in basedata_df.iterrows()],
 
 
90
  font_family="Roboto Condensed",
91
  )
92
 
93
+ progress(0.9, desc="Saving plot...")
 
94
  plot.save(file_path)
95
 
96
+ progress(1.0, desc="Done!")
 
97
  iframe = f"""<iframe src="/static/{file_name}" width="100%" height="500px"></iframe>"""
98
  link = f'<a href="/static/{file_name}" target="_blank">{file_name}</a>'
99
  return link, iframe
 
114
 
115
  new_btn.click(fn=predict, inputs=[text_input], outputs=[markdown, html])
116
 
117
+
118
+
119
+
120
+ def setup_basemap_data():
121
+ # get data.
122
+ print("getting basemap data...")
123
+ basedata_file= requests.get(
124
+ "https://www.maxnoichl.eu/full/oa_project_on_scimap_background_data/100k_filtered_OA_sample_cluster_and_positions.bz"
125
+ )
126
+ # Write the response content to a .bz file in the static directory
127
+ static_dir = Path("static")
128
+ static_dir.mkdir(exist_ok=True)
129
+ bz_file_name = "100k_filtered_OA_sample_cluster_and_positions.bz"
130
+ bz_file_path = static_dir / bz_file_name
131
+
132
+ with open(bz_file_path, "wb") as f:
133
+ f.write(basedata_file.content)
134
+
135
+ # Load the data from the saved .bz file
136
+
137
+ basedata_df = load(bz_file_path)
138
+
139
+
140
+ return basedata_df
141
+
142
+ basedata_df = setup_basemap_data()
143
+
144
+
145
  # mount Gradio app to FastAPI app
146
  app = gr.mount_gradio_app(app, block, path="/")
147