Spaces:

Corran
/

Scrapalot-Search-Demo

Runtime error

App Files Files Community

Corran commited on Jul 23, 2023

Commit

d494f69

1 Parent(s): 0f82ab6

app.py

Browse files

Files changed (1) hide show

app.py +262 -0

app.py ADDED Viewed

	@@ -0,0 +1,262 @@

+import streamlit as st
+import pandas as pd
+import re
+import os
+import base64
+from transformers import AutoTokenizer, AutoModelForSequenceClassification
+import torch
+import math
+# Realistic placeholder dataframe (added Abstract field)
+data = {
+    "Title": [
+        "The impact of climate change on biodiversity",
+        "Deep learning algorithms for image classification",
+        "Quantum computing and its applications in cryptography",
+        "Machine learning approaches for natural language processing",
+        "Modeling the effects of climate change on agricultural production",
+        "Graph neural networks for social network analysis",
+        "Biodiversity conservation strategies in the face of climate change",
+        "Exploring the potential of quantum computing in drug discovery",
+        "A survey of reinforcement learning algorithms and applications",
+        "The role of artificial intelligence in combating climate change",
+    ]*10,
+    "Authors": [
+        "Smith, J.; Doe, J.; Brown, M.",
+        "Garcia, L.; Johnson, N.; Patel, K.",
+        "Kim, D.; Taylor, R.; Yamamoto, Y.",
+        "Roberts, A.; Jackson, T.; Davis, M.",
+        "Turner, B.; Adams, C.; Evans, D.",
+        "Baker, E.; Stewart, F.; Roberts, G.",
+        "Nelson, H.; Mitchell, I.; Cooper, J.",
+        "Parker, K.; Lewis, L.; Jenkins, M.",
+        "Edwards, N.; Harrison, O.; Simmons, P.",
+        "Fisher, Q.; Grant, R.; Turner, S.",
+    ]*10,
+    "Year": [2020, 2019, 2018, 2021, 2019, 2020, 2018, 2021, 2019, 2020]*10,
+    "Keywords": [
+        "climate change, biodiversity, ecosystems",
+        "deep learning, image classification, convolutional neural networks",
+        "quantum computing, cryptography, Shor's algorithm",
+        "machine learning, natural language processing, text analysis",
+        "climate change, agriculture, crop modeling",
+        "graph neural networks, social network analysis, machine learning",
+        "biodiversity conservation, climate change, environmental management",
+        "quantum computing, drug discovery, computational chemistry",
+        "reinforcement learning, algorithms, applications",
+        "artificial intelligence, climate change, mitigation strategies",
+    ]*10,
+    "Subject_Area": [
+        "Environmental Science",
+        "Computer Science",
+        "Physics",
+        "Computer Science",
+        "Environmental Science",
+        "Computer Science",
+        "Environmental Science",
+        "Physics",
+        "Computer Science",
+        "Environmental Science",
+    ]*10,
+    "Journal": [
+        "Nature",
+        "IEEE Transactions on Pattern Analysis and Machine Intelligence",
+        "Physical Review Letters",
+        "Journal of Machine Learning Research",
+        "Agricultural Systems",
+        "IEEE Transactions on Neural Networks and Learning Systems",
+        "Conservation Biology",
+        "Journal of Chemical Information and Modeling",
+        "Neural Computing and Applications",
+        "Science",
+    ]*10,
+    "Is_Open_Access": [True, False, True, False, True, False, True, False, True, False]*10,
+    "Abstract": [
+        "This study analyzes the impact of climate change on biodiversity and ecosystem health...",
+        "We present novel deep learning algorithms for image classification using convolutional neural networks...",
+        "Quantum computing has the potential to revolutionize cryptography, and in this paper, we discuss...",
+        "Natural language processing is a growing field in machine learning, and in this review, we explore...",
+        "Climate change poses significant challenges to agriculture, and this paper investigates...",
+        "Graph neural networks have gained popularity in recent years for their ability to model complex...",
+        "Biodiversity conservation is crucial in the face of climate change, and this study outlines...",
+        "Quantum computing offers new opportunities for drug discovery, and in this paper, we analyze...",
+        "Reinforcement learning is a powerful machine learning paradigm, and in this survey, we...",
+        "Artificial intelligence has the potential to help combat climate change by providing new...",
+    ]*10,
+}
+def rank_results(query, filtered_papers):
+    # Generate embeddings for user query and filtered paper abstracts
+    abstracts = [abstract for abstract in filtered_papers['Abstract']]
+    features = tokenizer([query for _ in range(len(abstracts))], abstracts,  padding=True, truncation=True, return_tensors="pt")
+    with torch.no_grad():
+        scores = model(**features).logits
+    # Rank papers based on similarity scores
+    filtered_papers['Similarity Score'] = scores.numpy()
+    ranked_papers = filtered_papers.sort_values(by='Similarity Score', ascending=False)
+    return ranked_papers
+# Function to generate a download link for a PDF file
+def generate_pdf_link(pdf_file_path, link_text):
+    with open(pdf_file_path, "rb") as f:
+        pdf_data = f.read()
+    b64_pdf_data = base64.b64encode(pdf_data).decode()
+    href = f'<a href="data:application/octet-stream;base64,{b64_pdf_data}" download="{os.path.basename(pdf_file_path)}">{link_text}</a>'
+    return href
+# Function to filter papers based on user input
+def filter_papers(papers,year_range, is_open_access, abstract_query):
+    if year_range:
+        papers = papers[(papers['Year'] >= year_range[0]) & (papers['Year'] <= year_range[1])]
+    if is_open_access is not None:
+        papers = papers[papers['Is_Open_Access'] == is_open_access]
+    return papers
+# Function to perform complex boolean search
+def complex_boolean_search(text, query):
+    query = re.sub(r'(?<=[A-Za-z0-9])\s+(?=[A-Za-z0-9])', 'AND', query)
+    query = re.sub(r'\b(AND|OR)\b', r'\\\1', query)
+    query = re.sub(r'(?<=\s)\bNOT\b(?=\s)', ' -', query)
+    query = re.sub(r'(?<=\b)\bNOT\b(?=\s)', '-', query)
+    try:
+        return bool(re.search(query, text, flags=re.IGNORECASE))
+    except re.error:
+        return False
+papers_df = pd.DataFrame(data)
+if "model" not in locals():
+  model = AutoModelForSequenceClassification.from_pretrained('cross-encoder/ms-marco-MiniLM-L-6-v2')
+  tokenizer = AutoTokenizer.from_pretrained('cross-encoder/ms-marco-MiniLM-L-6-v2')
+  model.eval()
+# Streamlit interface
+st.set_page_config(page_title="Scientific Article Search", layout="wide")
+hide_menu_style = """
+        <style>
+        #MainMenu {visibility: hidden;}
+        </style>
+        """
+st.markdown(hide_menu_style, unsafe_allow_html=True)
+# Add custom CSS to scale the sidebar
+scale = 0.4
+custom_css = """
+<style>
+    .filterbar .sidebar-content {{
+        transform: scale({scale});
+        transform-origin: top left;
+    }}
+</style>"""
+st.markdown(custom_css, unsafe_allow_html=True)
+page=1
+per_page=10
+title = ""
+filtered_papers = papers_df
+# Sidebar for filters
+with st.sidebar:
+    st.header("Filters")
+    search_query= st.text_input("Query")
+    so = st.multiselect(
+        label='Search Over',
+        options=['Abstract','Everything','Authors'],
+        default=['Everything'],
+        help='Search and select multiple options from the dropdown menu')
+    sites = st.multiselect(
+        label='Search Over',
+        options=['OpenAlex','Google Scholar','Base Search','All Sites'],
+        default=['All Sites'],
+        help='Search and select multiple options from the dropdown menu')
+    year_range = st.slider("Year Range", min_value=1900, max_value=2022, value=(1990, 2022), step=1)
+    is_open_access = st.multiselect(
+        label='Open Access',
+        options=["All","Yes","No"],
+        default="All",
+        help='Search and select multiple options from the dropdown menu')
+    # Convert is_open_access to boolean or None
+    if is_open_access == "Yes":
+        is_open_access = True
+    elif is_open_access == "No":
+        is_open_access = False
+    else:
+        is_open_access = None
+    # Filter button
+    if st.button("Search"):
+      filtered_papers = filter_papers(papers_df, year_range, is_open_access,search_query)
+    else:
+      filtered_papers = papers_df  # Empty dataframe
+    filtered_papers = rank_results(search_query, filtered_papers)
+if not filtered_papers.empty:
+    # Pagination
+    no_pages = math.ceil(len(filtered_papers)/per_page)
+    # Generate pagination buttons
+    if no_pages == 1:
+        pagination_buttons = []
+    elif no_pages == 2:
+        pagination_buttons = [st.button('1'), st.write('2'), ]
+    else:
+        pagination_buttons = [st.button(str(page-1) if page > 1 else '1'),
+                              st.write(str(page)),
+                              st.button(str(page+1) if page < no_pages else str(no_pages))]
+    # Display results with a more advanced look
+    col1, col2 = st.columns([3, 1])
+    title, authors, year, journal = st.columns([5, 5, 2, 3])
+    with title:
+        st.subheader("Title")
+    with year:
+        st.subheader("Year")
+    with journal:
+        st.subheader("Journal")
+    # Display paginated results
+    start_idx = (page - 1) * per_page
+    end_idx = start_idx + per_page
+    paginated_papers = filtered_papers.iloc[start_idx:end_idx]
+    for idx, paper in paginated_papers.iterrows():
+        st.write("---")
+        title, authors, year, journal = st.columns([5, 5, 2, 3])
+        with col1:
+            with title:
+                st.write(f"{paper['Title']}")
+            with authors:
+                st.write(f"{paper['Authors']}")
+            with year:
+                st.write(f"{paper['Year']}")
+            with journal:
+                st.write(f"{paper['Journal']}")
+        abstract = st.expander("Abstract")
+        abstract.write(f"{paper['Abstract']}")
+        with col2:
+            pdf_file_path = "/content/ADVS-6-1801195.pdf"  # Replace with the actual path to the PDF file associated with the paper
+            # st.markdown(generate_pdf_link(pdf_file_path, "Show PDF"), unsafe_allow_html=True)
+    st.write("---")
+    # Display pagination buttons
+    per_page = st.selectbox("Results per page", [10, 20, 30], index=0)
+    pagination_bar = st.columns(3)
+    if no_pages > 1:
+        with pagination_bar[1]:
+            for button in pagination_buttons:
+                button
+else:
+    st.header("No papers found.")