Spaces:
Paused
Paused
# arxiv_fetcher.py | |
import arxiv | |
from typing import List, Dict, Any | |
import logging | |
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') | |
def fetch_arxiv_metadata(query: str, max_results: int = 10) -> List[Dict[str, Any]]: | |
logging.info(f"Fetching arXiv metadata for query: {query}") | |
if not query.strip(): | |
logging.warning("Empty or whitespace-only query provided") | |
return [] | |
client = arxiv.Client(page_size=max_results, delay_seconds=3, num_retries=3) | |
search = arxiv.Search(query=query, max_results=max_results, sort_by=arxiv.SortCriterion.SubmittedDate) | |
results = [] | |
try: | |
for result in client.results(search): | |
metadata = { | |
"title": result.title, | |
"authors": [author.name for author in result.authors], | |
"published": result.published.isoformat(), | |
"updated": result.updated.isoformat(), | |
"pdf_url": result.pdf_url, | |
"entry_id": result.entry_id, | |
"summary": result.summary, | |
"categories": result.categories, | |
"primary_category": result.primary_category, | |
"html_url": f"http://arxiv.org/abs/{result.entry_id.split('/')[-1]}" | |
} | |
results.append(metadata) | |
logging.info(f"Fetched metadata for {len(results)} papers") | |
except Exception as e: | |
logging.error(f"Error fetching metadata: {str(e)}") | |
return results | |