Spaces:

INLEXIO
/

semantic-search

Sleeping

App Files Files Community

INLEXIO commited on Oct 23

Commit

2f71153

verified ·

1 Parent(s): 95564ef

Update src/streamlit_app.py

Browse files

Files changed (1) hide show

src/streamlit_app.py +51 -6

src/streamlit_app.py CHANGED Viewed

@@ -4,6 +4,12 @@ from sentence_transformers import SentenceTransformer
 import numpy as np
 from collections import defaultdict
 import time
 # Page config
 st.set_page_config(
@@ -16,12 +22,13 @@ st.set_page_config(
 @st.cache_resource
 def load_model():
     """Load the sentence transformer model"""
-    return SentenceTransformer('all-MiniLM-L6-v2')
 @st.cache_data(ttl=3600)
-def search_openalex_papers(query, num_results=50):
     """
     Search OpenAlex for papers related to the query
     """
     base_url = "https://api.openalex.org/works"
@@ -32,6 +39,10 @@ def search_openalex_papers(query, num_results=50):
         "mailto": "user@example.com"  # Polite pool
     }
     try:
         response = requests.get(base_url, params=params, timeout=30)
         response.raise_for_status()
@@ -185,13 +196,47 @@ def main():
     **How it works:**
     1. Enter your search terms (e.g., "machine learning for drug discovery")
-    2. The app finds relevant papers using semantic similarity
-    3. Authors are ranked by relevance, h-index, and citation metrics
     """)
     # Sidebar controls
     st.sidebar.header("Search Settings")
     num_papers = st.sidebar.slider(
         "Number of papers to fetch",
         min_value=20,
@@ -240,8 +285,8 @@ def main():
             model = load_model()
         # Search papers
-        with st.spinner(f"Searching OpenAlex for papers about '{query}'..."):
-            papers = search_openalex_papers(query, num_papers)
         if not papers:
             st.warning("No papers found. Try different search terms.")

 import numpy as np
 from collections import defaultdict
 import time
+import os
+# Set cache directory to writable location
+os.environ['HF_HOME'] = '/tmp/huggingface'
+os.environ['TRANSFORMERS_CACHE'] = '/tmp/huggingface'
+os.environ['SENTENCE_TRANSFORMERS_HOME'] = '/tmp/huggingface'
 # Page config
 st.set_page_config(
 @st.cache_resource
 def load_model():
     """Load the sentence transformer model"""
+    return SentenceTransformer('all-MiniLM-L6-v2', cache_folder='/tmp/huggingface')
 @st.cache_data(ttl=3600)
+def search_openalex_papers(query, num_results=50, country_code=None):
     """
     Search OpenAlex for papers related to the query
+    Optionally filter by author's country
     """
     base_url = "https://api.openalex.org/works"
         "mailto": "user@example.com"  # Polite pool
     }
+    # Add country filter if specified
+    if country_code:
+        params["filter"] = f"authorships.countries:{country_code}"
     try:
         response = requests.get(base_url, params=params, timeout=30)
         response.raise_for_status()
     **How it works:**
     1. Enter your search terms (e.g., "machine learning for drug discovery")
+    2. Optionally filter by author country
+    3. The app finds relevant papers using semantic similarity
+    4. Authors are ranked by relevance, h-index, and citation metrics
     """)
     # Sidebar controls
     st.sidebar.header("Search Settings")
+    # Country filter
+    country_options = {
+        "Any Country": None,
+        "United States": "US",
+        "United Kingdom": "GB",
+        "Germany": "DE",
+        "France": "FR",
+        "Canada": "CA",
+        "Australia": "AU",
+        "China": "CN",
+        "Japan": "JP",
+        "India": "IN",
+        "South Korea": "KR",
+        "Netherlands": "NL",
+        "Switzerland": "CH",
+        "Sweden": "SE",
+        "Italy": "IT",
+        "Spain": "ES",
+        "Brazil": "BR",
+        "Singapore": "SG",
+        "Israel": "IL",
+        "Belgium": "BE",
+        "Austria": "AT",
+    }
+    selected_country = st.sidebar.selectbox(
+        "Filter by author country",
+        options=list(country_options.keys()),
+        help="Filter papers by the country of at least one author. Uses ISO country codes from OpenAlex data."
+    )
+    country_code = country_options[selected_country]
     num_papers = st.sidebar.slider(
         "Number of papers to fetch",
         min_value=20,
             model = load_model()
         # Search papers
+        with st.spinner(f"Searching OpenAlex for papers about '{query}'{' from ' + selected_country if country_code else ''}..."):
+            papers = search_openalex_papers(query, num_papers, country_code)
         if not papers:
             st.warning("No papers found. Try different search terms.")