Spaces:
Running
Running
import gradio as gr | |
import requests | |
from bs4 import BeautifulSoup | |
import time | |
# --- API Search Function (Semantic Scholar) --- | |
def search_semantic_scholar(query, limit=5): | |
base_url = "https://api.semanticscholar.org/graph/v1/paper/search" | |
params = { | |
"query": query, | |
"fields": "title,abstract,url,authors", | |
"limit": limit | |
} | |
try: | |
response = requests.get(base_url, params=params) | |
response.raise_for_status() | |
data = response.json() | |
papers = data.get("data", []) | |
results = [] | |
for paper in papers: | |
title = paper.get("title", "No Title") | |
abstract = paper.get("abstract", "No Abstract") | |
url = paper.get("url", "No URL") | |
authors = ", ".join(author.get("name", "Unknown") for author in paper.get("authors", [])) # Corrected line | |
results.append({ | |
"title": title, | |
"abstract": abstract, | |
"url": url, | |
"authors": authors | |
}) | |
return results | |
except requests.exceptions.RequestException as e: | |
print(f"Error during Semantic Scholar API request: {e}") | |
return [] | |
# --- Scraping Function (Google Scholar - use cautiously) --- | |
def search_google_scholar(query, limit=3): | |
url = f"https://scholar.google.com/scholar?q={query}" | |
try: | |
response = requests.get(url) | |
response.raise_for_status() | |
soup = BeautifulSoup(response.content, 'html.parser') | |
results = [] | |
for i, result in enumerate(soup.find_all('div', class_='gs_r gs_or gs_scl')): | |
if i >= limit: | |
break | |
title_element = result.find('h3', class_='gs_rt') | |
title = title_element.text if title_element else "No Title" | |
link_element = title_element.find('a') if title_element else None | |
link = link_element['href'] if link_element else "No Link" | |
snippet_element = result.find('div', class_='gs_rs') | |
snippet = snippet_element.text if snippet_element else "No Snippet" | |
results.append({ | |
"title": title, | |
"link": link, | |
"snippet": snippet | |
}) | |
return results | |
except requests.exceptions.RequestException as e: | |
print(f"Error during Google Scholar scraping: {e}") | |
return [] | |
# --- Combine Search Results --- | |
def search_multiple_sources(query): | |
"""Searches multiple sources and combines results.""" | |
results = [] | |
results.extend(search_semantic_scholar(query, limit=5)) | |
results.extend(search_google_scholar(query, limit=3)) | |
return results | |
# --- Gradio Search Function --- | |
def search(query): | |
results = search_multiple_sources(query) | |
if not results: | |
return "No results found." | |
formatted_results = "" | |
for result in results: | |
formatted_results += f""" | |
**Title:** {result['title']} | |
**Authors:** {result.get('authors', 'Unknown')} | |
**Abstract:** {result.get('abstract', result.get('snippet', 'No Abstract'))} | |
**Link:** {result['url'] if 'url' in result else result['link']} | |
---------------------------------- | |
""" | |
return formatted_results | |
# --- Gradio Interface --- | |
with gr.Blocks() as interface: | |
gr.Markdown("# Research Paper Search") | |
query_input = gr.Textbox(lines=2, placeholder="Enter keywords...") | |
search_button = gr.Button("Search") | |
result_output = gr.Markdown() | |
search_button.click(search, inputs=query_input, outputs=result_output) | |
# --- Launch --- | |
interface.launch() |