import os import re import copy import datasets import pandas as pd import gradio as gr from datetime import datetime, timedelta from datasets import Dataset from huggingface_hub import HfApi from huggingface_hub import create_repo from huggingface_hub.utils import HfHubHTTPError import utils from paper.download import ( download_pdf_from_arxiv, get_papers_from_hf_daily_papers, get_papers_from_arxiv_ids ) from paper.parser import extract_text_and_figures from gen.gemini import get_basic_qa, get_deep_qa from constants.styles import STYLE from constants.js import UPDATE_SEARCH_RESULTS, UPDATE_IF_TYPE from apscheduler.schedulers.background import BackgroundScheduler gemini_api_key = os.getenv("GEMINI_API_KEY") hf_token = os.getenv("HF_TOKEN") dataset_repo_id = "chansung/auto-paper-qa2" request_arxiv_repo_id="chansung/requested-arxiv-ids-3" ds = datasets.load_dataset(dataset_repo_id) request_ds = datasets.load_dataset(request_arxiv_repo_id) requested_arxiv_ids = [] for request_d in request_ds['train']: arxiv_ids = request_d['Requested arXiv IDs'] requested_arxiv_ids = requested_arxiv_ids + arxiv_ids requested_arxiv_ids_df = pd.DataFrame({'Requested arXiv IDs': requested_arxiv_ids}) title2qna = {} date2qna = {} longest_qans = 0 def filter_function(example, ids): ids_e = example['Requested arXiv IDs'] for iid in ids: if iid in ids_e: ids_e.remove(iid) example['Requested arXiv IDs'] = ids_e print(example) return example def process_arxiv_ids(gemini_api, hf_repo_id, req_hf_repo_id, hf_token, how_many=10): arxiv_ids = [] ds1 = datasets.load_dataset(req_hf_repo_id) for d in ds1['train']: req_arxiv_ids = d['Requested arXiv IDs'] if len(req_arxiv_ids) > 0 and req_arxiv_ids[0] != "top": arxiv_ids = arxiv_ids + req_arxiv_ids arxiv_ids = arxiv_ids[:how_many] if arxiv_ids is not None and len(arxiv_ids) > 0: print(f"1. Get metadata for the papers [{arxiv_ids}]") papers = get_papers_from_arxiv_ids(arxiv_ids) print("...DONE") print("2. Generating QAs for the paper") for paper in papers: try: title = paper['title'] target_date = paper['target_date'] abstract = paper['paper']['summary'] arxiv_id = paper['paper']['id'] authors = paper['paper']['authors'] print(f"...PROCESSING ON[{arxiv_id}, {title}]") print(f"......Downloading the paper PDF") filename = download_pdf_from_arxiv(arxiv_id) print(f"......DONE") print(f"......Extracting text and figures") texts, figures = extract_text_and_figures(filename) text =' '.join(texts) print(f"......DONE") print(f"......Generating the seed(basic) QAs") qnas = get_basic_qa(text, gemini_api_key=gemini_api, trucate=30000) qnas['title'] = title qnas['abstract'] = abstract qnas['authors'] = ','.join(authors) qnas['arxiv_id'] = arxiv_id qnas['target_date'] = target_date qnas['full_text'] = text print(f"......DONE") print(f"......Generating the follow-up QAs") qnas = get_deep_qa(text, qnas, gemini_api_key=gemini_api, trucate=30000) del qnas["qna"] print(f"......DONE") print(f"......Exporting to HF Dataset repo at [{hf_repo_id}]") utils.push_to_hf_hub(qnas, hf_repo_id, hf_token) print(f"......DONE") print(f"......Updating request arXiv HF Dataset repo at [{req_hf_repo_id}]") ds1 = ds1['train'].map( lambda example: filter_function(example, [arxiv_id]) ).filter( lambda example: len(example['Requested arXiv IDs']) > 0 ) ds1.push_to_hub(req_hf_repo_id, token=hf_token) print(f"......DONE") except Exception as e: print(f".......failed due to exception {e}") continue HfApi(token=hf_token).restart_space( repo_id="chansung/paper_qa", token=hf_token ) def push_to_hf_hub( df, repo_id, token, append=True ): exist = False ds = Dataset.from_pandas(df) try: create_repo(request_arxiv_repo_id, repo_type="dataset", token=hf_token) except HfHubHTTPError as e: exist = True if exist and append: existing_ds = datasets.load_dataset(repo_id) ds = datasets.concatenate_datasets([existing_ds['train'], ds]) ds.push_to_hub(repo_id, token=token) def _filter_duplicate_arxiv_ids(arxiv_ids_to_be_added): ds1 = datasets.load_dataset("chansung/requested-arxiv-ids-3") ds2 = datasets.load_dataset("chansung/auto-paper-qa2") unique_arxiv_ids = set() for d in ds1['train']: arxiv_ids = d['Requested arXiv IDs'] unique_arxiv_ids = set(list(unique_arxiv_ids) + arxiv_ids) for d in ds2['train']: arxiv_id = d['arxiv_id'] unique_arxiv_ids.add(arxiv_id) return list(set(arxiv_ids_to_be_added) - unique_arxiv_ids) def _is_arxiv_id_valid(arxiv_id): pattern = r"^\d{4}\.\d{5}$" return bool(re.match(pattern, arxiv_id)) def _get_valid_arxiv_ids(arxiv_ids_str): valid_arxiv_ids = [] invalid_arxiv_ids = [] for arxiv_id in arxiv_ids_str.split(","): arxiv_id = arxiv_id.strip() if _is_arxiv_id_valid(arxiv_id): valid_arxiv_ids.append(arxiv_id) else: invalid_arxiv_ids.append(arxiv_id) return valid_arxiv_ids, invalid_arxiv_ids def add_arxiv_ids_to_queue(queue, arxiv_ids_str): print(0) valid_arxiv_ids, invalid_arxiv_ids = _get_valid_arxiv_ids(arxiv_ids_str) print("01") if len(invalid_arxiv_ids) > 0: gr.Warning(f"found invalid arXiv ids as in {invalid_arxiv_ids}") if len(valid_arxiv_ids) > 0: valid_arxiv_ids = _filter_duplicate_arxiv_ids(valid_arxiv_ids) if len(valid_arxiv_ids) > 0: valid_arxiv_ids = [[arxiv_id] for arxiv_id in valid_arxiv_ids] gr.Warning(f"Processing on [{valid_arxiv_ids}]. Other requested arXiv IDs not found on this list should be already processed or being processed...") valid_arxiv_ids = pd.DataFrame({'Requested arXiv IDs': valid_arxiv_ids}) queue = pd.concat([queue, valid_arxiv_ids]) queue.reset_index(drop=True) push_to_hf_hub(valid_arxiv_ids, request_arxiv_repo_id, hf_token) else: gr.Warning(f"All requested arXiv IDs are already processed or being processed...") else: gr.Warning(f"No valid arXiv IDs found...") return queue def count_nans(row): count = 0 for _, (k, v) in enumerate(data.items()): if v is None: count = count + 1 return count for data in ds["train"]: date = data["target_date"].strftime("%Y-%m-%d") if date in date2qna: papers = copy.deepcopy(date2qna[date]) for paper in papers: if paper["title"] == data["title"]: if count_nans(paper) > count_nans(data): date2qna[date].remove(paper) date2qna[date].append(data) del papers else: date2qna[date] = [data] for date in date2qna: papers = date2qna[date] for paper in papers: title2qna[paper["title"]] = paper titles = title2qna.keys() sorted_dates = sorted(date2qna.keys()) last_date = sorted_dates[-1] last_papers = date2qna[last_date] selected_paper = last_papers[0] def get_papers(date): papers = [paper["title"] for paper in date2qna[date]] return gr.Dropdown( papers, value=papers[0] ) def set_paper(date, paper_title): selected_paper = None for paper in date2qna[date]: if paper["title"] == paper_title: selected_paper = paper break return ( gr.Markdown(f"# {selected_paper['title']}"), gr.Markdown( "[![arXiv](https://img.shields.io/badge/arXiv-%s-b31b1b.svg)](https://arxiv.org/abs/%s)" % (selected_paper['arxiv_id'], selected_paper['arxiv_id']) + "[![Paper page](https://huggingface.co/datasets/huggingface/badges/resolve/main/paper-page-md.svg)](https://huggingface.co/papers/%s)" % selected_paper['arxiv_id'] ), gr.Markdown(selected_paper["summary"]), gr.Markdown(f"### 🙋 {selected_paper['0_question']}"), gr.Markdown(f"↪ **(ELI5)** {selected_paper['0_answers:eli5']}"), gr.Markdown(f"↪ **(Technical)** {selected_paper['0_answers:expert']}"), gr.Markdown(f"### 🙋🙋 {selected_paper['0_additional_depth_q:follow up question']}"), gr.Markdown(f"↪ **(ELI5)** {selected_paper['0_additional_depth_q:answers:eli5']}"), gr.Markdown(f"↪ **(Technical)** {selected_paper['0_additional_depth_q:answers:expert']}"), gr.Markdown(f"### 🙋🙋 {selected_paper['0_additional_breath_q:follow up question']}"), gr.Markdown(f"↪ **(ELI5)** {selected_paper['0_additional_breath_q:answers:eli5']}"), gr.Markdown(f"↪ **(Technical)** {selected_paper['0_additional_breath_q:answers:expert']}"), gr.Markdown(f"### 🙋 {selected_paper['1_question']}"), gr.Markdown(f"↪ **(ELI5)** {selected_paper['1_answers:eli5']}"), gr.Markdown(f"↪ **(Technical)** {selected_paper['1_answers:expert']}"), gr.Markdown(f"### 🙋🙋 {selected_paper['1_additional_depth_q:follow up question']}"), gr.Markdown(f"↪ **(ELI5)** {selected_paper['1_additional_depth_q:answers:eli5']}"), gr.Markdown(f"↪ **(Technical)** {selected_paper['1_additional_depth_q:answers:expert']}"), gr.Markdown(f"### 🙋🙋 {selected_paper['1_additional_breath_q:follow up question']}"), gr.Markdown(f"↪ **(ELI5)** {selected_paper['1_additional_breath_q:answers:eli5']}"), gr.Markdown(f"↪ **(Technical)** {selected_paper['1_additional_breath_q:answers:expert']}"), gr.Markdown(f"### 🙋 {selected_paper['2_question']}"), gr.Markdown(f"↪ **(ELI5)** {selected_paper['2_answers:eli5']}"), gr.Markdown(f"↪ **(Technical)** {selected_paper['2_answers:expert']}"), gr.Markdown(f"### 🙋🙋 {selected_paper['2_additional_depth_q:follow up question']}"), gr.Markdown(f"↪ **(ELI5)** {selected_paper['2_additional_depth_q:answers:eli5']}"), gr.Markdown(f"↪ **(Technical)** {selected_paper['2_additional_depth_q:answers:expert']}"), gr.Markdown(f"### 🙋🙋 {selected_paper['2_additional_breath_q:follow up question']}"), gr.Markdown(f"↪ **(ELI5)** {selected_paper['2_additional_breath_q:answers:eli5']}"), gr.Markdown(f"↪ **(Technical)** {selected_paper['2_additional_breath_q:answers:expert']}"), ) def change_exp_type(exp_type): if exp_type == "ELI5": return ( gr.Markdown(visible=True), gr.Markdown(visible=False), gr.Markdown(visible=True), gr.Markdown(visible=False), gr.Markdown(visible=True), gr.Markdown(visible=False), gr.Markdown(visible=True), gr.Markdown(visible=False), gr.Markdown(visible=True), gr.Markdown(visible=False), gr.Markdown(visible=True), gr.Markdown(visible=False), gr.Markdown(visible=True), gr.Markdown(visible=False), gr.Markdown(visible=True), gr.Markdown(visible=False), gr.Markdown(visible=True), gr.Markdown(visible=False), ) else: return ( gr.Markdown(visible=False), gr.Markdown(visible=True), gr.Markdown(visible=False), gr.Markdown(visible=True), gr.Markdown(visible=False), gr.Markdown(visible=True), gr.Markdown(visible=False), gr.Markdown(visible=True), gr.Markdown(visible=False), gr.Markdown(visible=True), gr.Markdown(visible=False), gr.Markdown(visible=True), gr.Markdown(visible=False), gr.Markdown(visible=True), gr.Markdown(visible=False), gr.Markdown(visible=True), gr.Markdown(visible=False), gr.Markdown(visible=True), ) def search(search_in, max_results=3): results = [] for title in titles: if len(results) > 3: break else: if search_in in title: results.append(title) return ( gr.Textbox( visible=True if len(results) > 0 else False, value=results[0] if len(results) > 0 else "" ), gr.Textbox( visible=True if len(results) > 1 else False, value=results[1] if len(results) > 1 else "" ), gr.Textbox( visible=True if len(results) > 2 else False, value=results[2] if len(results) > 2 else "" ) ) def set_date(title): paper = title2qna[title] date = paper["target_date"].strftime("%Y-%m-%d") return date def set_papers(date, title): papers = [paper["title"] for paper in date2qna[date]] return ( gr.Dropdown(choices=papers, value=title), gr.Textbox("") ) with gr.Blocks(css=STYLE, theme=gr.themes.Soft()) as demo: gr.Markdown("# Let's explore papers with auto generated Q&As") with gr.Column(elem_classes=["group"]): with gr.Row(): date_dd = gr.Dropdown( sorted_dates, value=last_date, label="Select date", interactive=True, scale=3, filterable=False ) papers_dd = gr.Dropdown( [paper["title"] for paper in last_papers], value=selected_paper["title"], label="Select paper title", interactive=True, scale=7, filterable=False ) with gr.Column(elem_classes=["no-gap"]): search_in = gr.Textbox("", placeholder="Enter keywords to search...", elem_classes=["textbox-no-label"]) search_r1 = gr.Button(visible=False, elem_id="search_r1", elem_classes=["no-radius"]) search_r2 = gr.Button(visible=False, elem_id="search_r2", elem_classes=["no-radius"]) search_r3 = gr.Button(visible=False, elem_id="search_r3", elem_classes=["no-radius"]) search_r4 = gr.Button(visible=False, elem_id="search_r4", elem_classes=["no-radius"]) search_r5 = gr.Button(visible=False, elem_id="search_r5", elem_classes=["no-radius"]) search_r6 = gr.Button(visible=False, elem_id="search_r6", elem_classes=["no-radius"]) search_r7 = gr.Button(visible=False, elem_id="search_r7", elem_classes=["no-radius"]) search_r8 = gr.Button(visible=False, elem_id="search_r8", elem_classes=["no-radius"]) search_r9 = gr.Button(visible=False, elem_id="search_r9", elem_classes=["no-radius"]) search_r10 = gr.Button(visible=False, elem_id="search_r10", elem_classes=["no-radius"]) conv_type = gr.Radio(choices=["Q&As", "Chat"], value="Q&As", interactive=True, visible=False, elem_classes=["conv-type"]) with gr.Column(scale=7): title = gr.Markdown(f"# {selected_paper['title']}") # with gr.Row(): arxiv_link = gr.Markdown( "[![arXiv](https://img.shields.io/badge/arXiv-%s-b31b1b.svg)](https://arxiv.org/abs/%s)" % (selected_paper['arxiv_id'], selected_paper['arxiv_id']) + "[![Paper page](https://huggingface.co/datasets/huggingface/badges/resolve/main/paper-page-md.svg)](https://huggingface.co/papers/%s)" % selected_paper['arxiv_id'] ) # hf_paper_link = gr.Markdown( # ) summary = gr.Markdown(f"{selected_paper['summary']}", elem_classes=["small-font"]) with gr.Column(elem_id="chat_block", visible=False): gr.Chatbot([("hello", "world"), ("how", "are you?")]) with gr.Column(elem_id="qna_block", visible=True): with gr.Row(): with gr.Column(scale=7): gr.Markdown("## Auto generated Questions & Answers") exp_type = gr.Radio(choices=["ELI5", "Technical"], value="ELI5", elem_classes=["exp-type"], scale=3) # 1 with gr.Column(elem_classes=["group"], visible=True) as q_0: basic_q_0 = gr.Markdown(f"### 🙋 {selected_paper['0_question']}") basic_q_eli5_0 = gr.Markdown(f"↪ **(ELI5)** {selected_paper['0_answers:eli5']}", elem_classes=["small-font"]) basic_q_expert_0 = gr.Markdown(f"↪ **(Technical)** {selected_paper['0_answers:expert']}", visible=False, elem_classes=["small-font"]) with gr.Accordion("Additional question #1", open=False, elem_classes=["accordion"]) as aq_0_0: depth_q_0 = gr.Markdown(f"### 🙋🙋 {selected_paper['0_additional_depth_q:follow up question']}") depth_q_eli5_0 = gr.Markdown(f"↪ **(ELI5)** {selected_paper['0_additional_depth_q:answers:eli5']}", elem_classes=["small-font"]) depth_q_expert_0 = gr.Markdown(f"↪ **(Technical)** {selected_paper['0_additional_depth_q:answers:expert']}", visible=False, elem_classes=["small-font"]) with gr.Accordion("Additional question #2", open=False, elem_classes=["accordion"]) as aq_0_1: breath_q_0 = gr.Markdown(f"### 🙋🙋 {selected_paper['0_additional_breath_q:follow up question']}") breath_q_eli5_0 = gr.Markdown(f"↪ **(ELI5)** {selected_paper['0_additional_breath_q:answers:eli5']}", elem_classes=["small-font"]) breath_q_expert_0 = gr.Markdown(f"↪ **(Technical)** {selected_paper['0_additional_breath_q:answers:expert']}", visible=False, elem_classes=["small-font"]) # 2 with gr.Column(elem_classes=["group"], visible=True) as q_1: basic_q_1 = gr.Markdown(f"### 🙋 {selected_paper['1_question']}") basic_q_eli5_1 = gr.Markdown(f"↪ **(ELI5)** {selected_paper['1_answers:eli5']}", elem_classes=["small-font"]) basic_q_expert_1 = gr.Markdown(f"↪ **(Technical)** {selected_paper['1_answers:expert']}", visible=False, elem_classes=["small-font"]) with gr.Accordion("Additional question #1", open=False, elem_classes=["accordion"]) as aq_1_0: depth_q_1 = gr.Markdown(f"### 🙋🙋 {selected_paper['1_additional_depth_q:follow up question']}") depth_q_eli5_1 = gr.Markdown(f"↪ **(ELI5)** {selected_paper['1_additional_depth_q:answers:eli5']}", elem_classes=["small-font"]) depth_q_expert_1 = gr.Markdown(f"↪ **(Technical)** {selected_paper['1_additional_depth_q:answers:expert']}", visible=False, elem_classes=["small-font"]) with gr.Accordion("Additional question #2", open=False, elem_classes=["accordion"]) as aq_1_1: breath_q_1 = gr.Markdown(f"### 🙋🙋 {selected_paper['1_additional_breath_q:follow up question']}") breath_q_eli5_1 = gr.Markdown(f"↪ **(ELI5)** {selected_paper['1_additional_breath_q:answers:eli5']}", elem_classes=["small-font"]) breath_q_expert_1 = gr.Markdown(f"↪ **(Technical)** {selected_paper['1_additional_breath_q:answers:expert']}", visible=False, elem_classes=["small-font"]) # 3 with gr.Column(elem_classes=["group"], visible=True) as q_2: basic_q_2 = gr.Markdown(f"### 🙋 {selected_paper['2_question']}") basic_q_eli5_2 = gr.Markdown(f"↪ **(ELI5)** {selected_paper['2_answers:eli5']}", elem_classes=["small-font"]) basic_q_expert_2 = gr.Markdown(f"↪ **(Technical)** {selected_paper['2_answers:expert']}", visible=False, elem_classes=["small-font"]) with gr.Accordion("Additional question #1", open=False, elem_classes=["accordion"]) as aq_2_0: depth_q_2 = gr.Markdown(f"### 🙋🙋 {selected_paper['2_additional_depth_q:follow up question']}") depth_q_eli5_2 = gr.Markdown(f"↪ **(ELI5)** {selected_paper['2_additional_depth_q:answers:eli5']}", elem_classes=["small-font"]) depth_q_expert_2 = gr.Markdown(f"↪ **(Technical)** {selected_paper['2_additional_depth_q:answers:expert']}", visible=False, elem_classes=["small-font"]) with gr.Accordion("Additional question #2", open=False, elem_classes=["accordion"]) as aq_2_1: breath_q_2 = gr.Markdown(f"### 🙋🙋 {selected_paper['2_additional_breath_q:follow up question']}") breath_q_eli5_2 = gr.Markdown(f"↪ **(ELI5)** {selected_paper['2_additional_breath_q:answers:eli5']}", elem_classes=["small-font"]) breath_q_expert_2 = gr.Markdown(f"↪ **(Technical)** {selected_paper['2_additional_breath_q:answers:expert']}", visible=False, elem_classes=["small-font"]) gr.Markdown("## Request any arXiv ids") arxiv_queue = gr.Dataframe( headers=["Requested arXiv IDs"], col_count=(1, "fixed"), value=requested_arxiv_ids_df, datatype=["str"], interactive=False ) arxiv_id_enter = gr.Textbox(placeholder="Enter comma separated arXiv IDs...", elem_classes=["textbox-no-label"]) arxiv_id_enter.submit( add_arxiv_ids_to_queue, [arxiv_queue, arxiv_id_enter], arxiv_queue ) gr.Markdown("The target papers are collected from [Hugging Face 🤗 Daily Papers](https://huggingface.co/papers) on a daily basis. " "The entire data is generated by [Google's Gemini 1.0](https://deepmind.google/technologies/gemini/) Pro. " "If you are curious how it is done, visit the [Auto Paper Q&A Generation project repository](https://github.com/deep-diver/auto-paper-analysis) " "Also, the generated dataset is hosted on Hugging Face 🤗 Dataset repository as well([Link](https://huggingface.co/datasets/chansung/auto-paper-qa2)). ") search_r1.click(set_date, search_r1, date_dd).then( set_papers, inputs=[date_dd, search_r1], outputs=[papers_dd, search_in] ) search_r2.click(set_date, search_r2, date_dd).then( set_papers, inputs=[date_dd, search_r2], outputs=[papers_dd, search_in] ) search_r3.click(set_date, search_r3, date_dd).then( set_papers, inputs=[date_dd, search_r3], outputs=[papers_dd, search_in] ) search_r4.click(set_date, search_r4, date_dd).then( set_papers, inputs=[date_dd, search_r4], outputs=[papers_dd, search_in] ) search_r5.click(set_date, search_r5, date_dd).then( set_papers, inputs=[date_dd, search_r5], outputs=[papers_dd, search_in] ) search_r6.click(set_date, search_r6, date_dd).then( set_papers, inputs=[date_dd, search_r6], outputs=[papers_dd, search_in] ) search_r7.click(set_date, search_r7, date_dd).then( set_papers, inputs=[date_dd, search_r7], outputs=[papers_dd, search_in] ) search_r8.click(set_date, search_r8, date_dd).then( set_papers, inputs=[date_dd, search_r8], outputs=[papers_dd, search_in] ) search_r9.click(set_date, search_r9, date_dd).then( set_papers, inputs=[date_dd, search_r9], outputs=[papers_dd, search_in] ) search_r10.click(set_date, search_r10, date_dd).then( set_papers, inputs=[date_dd, search_r10], outputs=[papers_dd, search_in] ) date_dd.input(get_papers, date_dd, papers_dd).then( set_paper, [date_dd, papers_dd], [ title, arxiv_link, summary, basic_q_0, basic_q_eli5_0, basic_q_expert_0, depth_q_0, depth_q_eli5_0, depth_q_expert_0, breath_q_0, breath_q_eli5_0, breath_q_expert_0, basic_q_1, basic_q_eli5_1, basic_q_expert_1, depth_q_1, depth_q_eli5_1, depth_q_expert_1, breath_q_1, breath_q_eli5_1, breath_q_expert_1, basic_q_2, basic_q_eli5_2, basic_q_expert_2, depth_q_2, depth_q_eli5_2, depth_q_expert_2, breath_q_2, breath_q_eli5_2, breath_q_expert_2 ] ) papers_dd.change( set_paper, [date_dd, papers_dd], [ title, arxiv_link, summary, basic_q_0, basic_q_eli5_0, basic_q_expert_0, depth_q_0, depth_q_eli5_0, depth_q_expert_0, breath_q_0, breath_q_eli5_0, breath_q_expert_0, basic_q_1, basic_q_eli5_1, basic_q_expert_1, depth_q_1, depth_q_eli5_1, depth_q_expert_1, breath_q_1, breath_q_eli5_1, breath_q_expert_1, basic_q_2, basic_q_eli5_2, basic_q_expert_2, depth_q_2, depth_q_eli5_2, depth_q_expert_2, breath_q_2, breath_q_eli5_2, breath_q_expert_2 ] ) search_in.change( inputs=[search_in], outputs=[ search_r1, search_r2, search_r3, search_r4, search_r5, search_r6, search_r7, search_r8, search_r9, search_r10 ], js=UPDATE_SEARCH_RESULTS % str(list(titles)), fn=None ) exp_type.select( change_exp_type, exp_type, [ basic_q_eli5_0, basic_q_expert_0, depth_q_eli5_0, depth_q_expert_0, breath_q_eli5_0, breath_q_expert_0, basic_q_eli5_1, basic_q_expert_1, depth_q_eli5_1, depth_q_expert_1, breath_q_eli5_1, breath_q_expert_1, basic_q_eli5_2, basic_q_expert_2, depth_q_eli5_2, depth_q_expert_2, breath_q_eli5_2, breath_q_expert_2 ] ) conv_type.select( inputs=[conv_type], js=UPDATE_IF_TYPE, outputs=None, fn=None ) start_date = datetime.now() + timedelta(minutes=1) scheduler = BackgroundScheduler() scheduler.add_job( process_arxiv_ids, trigger='interval', seconds=3600, args=[ gemini_api_key, dataset_repo_id, request_arxiv_repo_id, hf_token ], start_date=start_date ) scheduler.start() demo.launch(share=True, debug=True)