Corran commited on
Commit
d494f69
1 Parent(s): 0f82ab6
Files changed (1) hide show
  1. app.py +262 -0
app.py ADDED
@@ -0,0 +1,262 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import re
4
+ import os
5
+ import base64
6
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification
7
+ import torch
8
+ import math
9
+
10
+ # Realistic placeholder dataframe (added Abstract field)
11
+ data = {
12
+ "Title": [
13
+ "The impact of climate change on biodiversity",
14
+ "Deep learning algorithms for image classification",
15
+ "Quantum computing and its applications in cryptography",
16
+ "Machine learning approaches for natural language processing",
17
+ "Modeling the effects of climate change on agricultural production",
18
+ "Graph neural networks for social network analysis",
19
+ "Biodiversity conservation strategies in the face of climate change",
20
+ "Exploring the potential of quantum computing in drug discovery",
21
+ "A survey of reinforcement learning algorithms and applications",
22
+ "The role of artificial intelligence in combating climate change",
23
+ ]*10,
24
+ "Authors": [
25
+ "Smith, J.; Doe, J.; Brown, M.",
26
+ "Garcia, L.; Johnson, N.; Patel, K.",
27
+ "Kim, D.; Taylor, R.; Yamamoto, Y.",
28
+ "Roberts, A.; Jackson, T.; Davis, M.",
29
+ "Turner, B.; Adams, C.; Evans, D.",
30
+ "Baker, E.; Stewart, F.; Roberts, G.",
31
+ "Nelson, H.; Mitchell, I.; Cooper, J.",
32
+ "Parker, K.; Lewis, L.; Jenkins, M.",
33
+ "Edwards, N.; Harrison, O.; Simmons, P.",
34
+ "Fisher, Q.; Grant, R.; Turner, S.",
35
+ ]*10,
36
+ "Year": [2020, 2019, 2018, 2021, 2019, 2020, 2018, 2021, 2019, 2020]*10,
37
+ "Keywords": [
38
+ "climate change, biodiversity, ecosystems",
39
+ "deep learning, image classification, convolutional neural networks",
40
+ "quantum computing, cryptography, Shor's algorithm",
41
+ "machine learning, natural language processing, text analysis",
42
+ "climate change, agriculture, crop modeling",
43
+ "graph neural networks, social network analysis, machine learning",
44
+ "biodiversity conservation, climate change, environmental management",
45
+ "quantum computing, drug discovery, computational chemistry",
46
+ "reinforcement learning, algorithms, applications",
47
+ "artificial intelligence, climate change, mitigation strategies",
48
+ ]*10,
49
+ "Subject_Area": [
50
+ "Environmental Science",
51
+ "Computer Science",
52
+ "Physics",
53
+ "Computer Science",
54
+ "Environmental Science",
55
+ "Computer Science",
56
+ "Environmental Science",
57
+ "Physics",
58
+ "Computer Science",
59
+ "Environmental Science",
60
+ ]*10,
61
+ "Journal": [
62
+ "Nature",
63
+ "IEEE Transactions on Pattern Analysis and Machine Intelligence",
64
+ "Physical Review Letters",
65
+ "Journal of Machine Learning Research",
66
+ "Agricultural Systems",
67
+ "IEEE Transactions on Neural Networks and Learning Systems",
68
+ "Conservation Biology",
69
+ "Journal of Chemical Information and Modeling",
70
+ "Neural Computing and Applications",
71
+ "Science",
72
+ ]*10,
73
+ "Is_Open_Access": [True, False, True, False, True, False, True, False, True, False]*10,
74
+ "Abstract": [
75
+ "This study analyzes the impact of climate change on biodiversity and ecosystem health...",
76
+ "We present novel deep learning algorithms for image classification using convolutional neural networks...",
77
+ "Quantum computing has the potential to revolutionize cryptography, and in this paper, we discuss...",
78
+ "Natural language processing is a growing field in machine learning, and in this review, we explore...",
79
+ "Climate change poses significant challenges to agriculture, and this paper investigates...",
80
+ "Graph neural networks have gained popularity in recent years for their ability to model complex...",
81
+ "Biodiversity conservation is crucial in the face of climate change, and this study outlines...",
82
+ "Quantum computing offers new opportunities for drug discovery, and in this paper, we analyze...",
83
+ "Reinforcement learning is a powerful machine learning paradigm, and in this survey, we...",
84
+ "Artificial intelligence has the potential to help combat climate change by providing new...",
85
+ ]*10,
86
+ }
87
+
88
+
89
+ def rank_results(query, filtered_papers):
90
+ # Generate embeddings for user query and filtered paper abstracts
91
+ abstracts = [abstract for abstract in filtered_papers['Abstract']]
92
+ features = tokenizer([query for _ in range(len(abstracts))], abstracts, padding=True, truncation=True, return_tensors="pt")
93
+ with torch.no_grad():
94
+ scores = model(**features).logits
95
+
96
+ # Rank papers based on similarity scores
97
+ filtered_papers['Similarity Score'] = scores.numpy()
98
+ ranked_papers = filtered_papers.sort_values(by='Similarity Score', ascending=False)
99
+
100
+ return ranked_papers
101
+
102
+ # Function to generate a download link for a PDF file
103
+ def generate_pdf_link(pdf_file_path, link_text):
104
+ with open(pdf_file_path, "rb") as f:
105
+ pdf_data = f.read()
106
+
107
+ b64_pdf_data = base64.b64encode(pdf_data).decode()
108
+ href = f'<a href="data:application/octet-stream;base64,{b64_pdf_data}" download="{os.path.basename(pdf_file_path)}">{link_text}</a>'
109
+ return href
110
+
111
+ # Function to filter papers based on user input
112
+ def filter_papers(papers,year_range, is_open_access, abstract_query):
113
+ if year_range:
114
+ papers = papers[(papers['Year'] >= year_range[0]) & (papers['Year'] <= year_range[1])]
115
+ if is_open_access is not None:
116
+ papers = papers[papers['Is_Open_Access'] == is_open_access]
117
+
118
+ return papers
119
+
120
+ # Function to perform complex boolean search
121
+ def complex_boolean_search(text, query):
122
+ query = re.sub(r'(?<=[A-Za-z0-9])\s+(?=[A-Za-z0-9])', 'AND', query)
123
+ query = re.sub(r'\b(AND|OR)\b', r'\\\1', query)
124
+ query = re.sub(r'(?<=\s)\bNOT\b(?=\s)', ' -', query)
125
+ query = re.sub(r'(?<=\b)\bNOT\b(?=\s)', '-', query)
126
+ try:
127
+ return bool(re.search(query, text, flags=re.IGNORECASE))
128
+ except re.error:
129
+ return False
130
+
131
+ papers_df = pd.DataFrame(data)
132
+ if "model" not in locals():
133
+ model = AutoModelForSequenceClassification.from_pretrained('cross-encoder/ms-marco-MiniLM-L-6-v2')
134
+ tokenizer = AutoTokenizer.from_pretrained('cross-encoder/ms-marco-MiniLM-L-6-v2')
135
+ model.eval()
136
+
137
+ # Streamlit interface
138
+ st.set_page_config(page_title="Scientific Article Search", layout="wide")
139
+
140
+ hide_menu_style = """
141
+ <style>
142
+ #MainMenu {visibility: hidden;}
143
+ </style>
144
+ """
145
+ st.markdown(hide_menu_style, unsafe_allow_html=True)
146
+
147
+ # Add custom CSS to scale the sidebar
148
+ scale = 0.4
149
+ custom_css = """
150
+ <style>
151
+ .filterbar .sidebar-content {{
152
+ transform: scale({scale});
153
+ transform-origin: top left;
154
+ }}
155
+ </style>"""
156
+
157
+ st.markdown(custom_css, unsafe_allow_html=True)
158
+ page=1
159
+ per_page=10
160
+ title = ""
161
+ filtered_papers = papers_df
162
+
163
+ # Sidebar for filters
164
+ with st.sidebar:
165
+ st.header("Filters")
166
+ search_query= st.text_input("Query")
167
+ so = st.multiselect(
168
+ label='Search Over',
169
+ options=['Abstract','Everything','Authors'],
170
+ default=['Everything'],
171
+ help='Search and select multiple options from the dropdown menu')
172
+
173
+ sites = st.multiselect(
174
+ label='Search Over',
175
+ options=['OpenAlex','Google Scholar','Base Search','All Sites'],
176
+ default=['All Sites'],
177
+ help='Search and select multiple options from the dropdown menu')
178
+
179
+ year_range = st.slider("Year Range", min_value=1900, max_value=2022, value=(1990, 2022), step=1)
180
+
181
+ is_open_access = st.multiselect(
182
+ label='Open Access',
183
+ options=["All","Yes","No"],
184
+ default="All",
185
+ help='Search and select multiple options from the dropdown menu')
186
+
187
+ # Convert is_open_access to boolean or None
188
+ if is_open_access == "Yes":
189
+ is_open_access = True
190
+ elif is_open_access == "No":
191
+ is_open_access = False
192
+ else:
193
+ is_open_access = None
194
+
195
+ # Filter button
196
+ if st.button("Search"):
197
+ filtered_papers = filter_papers(papers_df, year_range, is_open_access,search_query)
198
+ else:
199
+ filtered_papers = papers_df # Empty dataframe
200
+
201
+ filtered_papers = rank_results(search_query, filtered_papers)
202
+
203
+ if not filtered_papers.empty:
204
+ # Pagination
205
+ no_pages = math.ceil(len(filtered_papers)/per_page)
206
+
207
+ # Generate pagination buttons
208
+ if no_pages == 1:
209
+ pagination_buttons = []
210
+ elif no_pages == 2:
211
+ pagination_buttons = [st.button('1'), st.write('2'), ]
212
+ else:
213
+ pagination_buttons = [st.button(str(page-1) if page > 1 else '1'),
214
+ st.write(str(page)),
215
+ st.button(str(page+1) if page < no_pages else str(no_pages))]
216
+
217
+ # Display results with a more advanced look
218
+ col1, col2 = st.columns([3, 1])
219
+ title, authors, year, journal = st.columns([5, 5, 2, 3])
220
+ with title:
221
+ st.subheader("Title")
222
+ with year:
223
+ st.subheader("Year")
224
+ with journal:
225
+ st.subheader("Journal")
226
+
227
+ # Display paginated results
228
+ start_idx = (page - 1) * per_page
229
+ end_idx = start_idx + per_page
230
+ paginated_papers = filtered_papers.iloc[start_idx:end_idx]
231
+
232
+ for idx, paper in paginated_papers.iterrows():
233
+ st.write("---")
234
+ title, authors, year, journal = st.columns([5, 5, 2, 3])
235
+
236
+ with col1:
237
+ with title:
238
+ st.write(f"{paper['Title']}")
239
+ with authors:
240
+ st.write(f"{paper['Authors']}")
241
+ with year:
242
+ st.write(f"{paper['Year']}")
243
+ with journal:
244
+ st.write(f"{paper['Journal']}")
245
+ abstract = st.expander("Abstract")
246
+ abstract.write(f"{paper['Abstract']}")
247
+
248
+ with col2:
249
+ pdf_file_path = "/content/ADVS-6-1801195.pdf" # Replace with the actual path to the PDF file associated with the paper
250
+ # st.markdown(generate_pdf_link(pdf_file_path, "Show PDF"), unsafe_allow_html=True)
251
+
252
+ st.write("---")
253
+
254
+ # Display pagination buttons
255
+ per_page = st.selectbox("Results per page", [10, 20, 30], index=0)
256
+ pagination_bar = st.columns(3)
257
+ if no_pages > 1:
258
+ with pagination_bar[1]:
259
+ for button in pagination_buttons:
260
+ button
261
+ else:
262
+ st.header("No papers found.")