import gradio as gr from typing import List, Dict, Any from config import DATASET_NAME from arxiv_retrieval_service import ArxivRetrievalService from dataset_management_service import DatasetManagementService import logging logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') arxiv_service = ArxivRetrievalService() dataset_service = DatasetManagementService(DATASET_NAME) def handle_metadata_extraction(query: str, max_results: int) -> str: try: logging.info(f"Fetching metadata for query: {query}, max_results: {max_results}") metadata_list = arxiv_service.fetch_metadata(query, max_results) if not metadata_list: return "No metadata found for the given query." result = dataset_service.update_dataset(metadata_list) logging.info(f"Dataset update result: {result}") return result except Exception as e: error_msg = f"An error occurred during metadata extraction: {str(e)}" logging.error(error_msg) return error_msg def handle_dataset_view(page: int = 1, page_size: int = 10) -> Dict[str, Any]: logging.info(f"handle_dataset_view called with page={page}, page_size={page_size}") try: total_records = dataset_service.get_dataset_size() logging.info(f"Total records: {total_records}") records = dataset_service.get_dataset_records(page, page_size) logging.info(f"Records type: {type(records)}") logging.info(f"Number of records returned: {len(records)}") result = { "total_records": total_records, "current_page": page, "page_size": page_size, "records": records } logging.info(f"Returning result: {result}") return result except Exception as e: error_msg = f"Error loading dataset: {str(e)}" logging.error(error_msg) return {"error": error_msg} with gr.Blocks() as demo: gr.Markdown( f"""# ArXiv Metadata Extraction and Dataset Management This application extracts metadata from ArXiv papers and manages the dataset: [{DATASET_NAME}](https://huggingface.co/datasets/{DATASET_NAME}/viewer) """ ) with gr.Tab("Extract Metadata"): query_input = gr.Textbox(label="ArXiv Query") max_results = gr.Slider(minimum=1, maximum=100, value=10, step=1, label="Max Results") submit_button = gr.Button("Extract Metadata") output = gr.Textbox(label="Result") submit_button.click( fn=handle_metadata_extraction, inputs=[query_input, max_results], outputs=output ) with gr.Tab("View Dataset"): page_number = gr.Number(value=1, label="Page Number", precision=0) page_size = gr.Slider(minimum=5, maximum=50, value=10, step=5, label="Page Size") refresh_button = gr.Button("Refresh Dataset View") dataset_info = gr.JSON(label="Dataset Info") refresh_button.click( fn=handle_dataset_view, inputs=[page_number, page_size], outputs=dataset_info ) if __name__ == "__main__": demo.launch()