chansung commited on
Commit
4bad4d8
β€’
1 Parent(s): 73cb5b4

fix empty data

Browse files
Files changed (3) hide show
  1. app.py +2 -2
  2. init.py +1 -1
  3. ui.py +4 -3
app.py CHANGED
@@ -16,9 +16,9 @@ from background import process_arxiv_ids
16
  from apscheduler.schedulers.background import BackgroundScheduler
17
 
18
  gemini_api_key, hf_token, dataset_repo_id, request_arxiv_repo_id, restart_repo_id = get_secrets()
19
- empty_src_dataset = initialize_repos(dataset_repo_id, request_arxiv_repo_id, hf_token)
20
 
21
- titles, date_dict, requested_arxiv_ids_df, arxivid2data = initialize_data(dataset_repo_id, request_arxiv_repo_id, empty_src_dataset)
22
 
23
  from ui import (
24
  get_paper_by_year, get_paper_by_month, get_paper_by_day,
 
16
  from apscheduler.schedulers.background import BackgroundScheduler
17
 
18
  gemini_api_key, hf_token, dataset_repo_id, request_arxiv_repo_id, restart_repo_id = get_secrets()
19
+ initialize_repos(dataset_repo_id, request_arxiv_repo_id, hf_token)
20
 
21
+ titles, date_dict, requested_arxiv_ids_df, arxivid2data = initialize_data(dataset_repo_id, request_arxiv_repo_id)
22
 
23
  from ui import (
24
  get_paper_by_year, get_paper_by_month, get_paper_by_day,
init.py CHANGED
@@ -66,7 +66,7 @@ def _initialize_paper_info(source_ds):
66
  else:
67
  return [], {}, {}
68
 
69
- def initialize_data(source_data_repo_id, request_data_repo_id, empty_src_dataset):
70
  global date_dict, arxivid2data
71
  global requested_arxiv_ids_df
72
 
 
66
  else:
67
  return [], {}, {}
68
 
69
+ def initialize_data(source_data_repo_id, request_data_repo_id):
70
  global date_dict, arxivid2data
71
  global requested_arxiv_ids_df
72
 
ui.py CHANGED
@@ -166,9 +166,10 @@ def _filter_duplicate_arxiv_ids(arxiv_ids_to_be_added):
166
  arxiv_ids = d['Requested arXiv IDs']
167
  unique_arxiv_ids = set(list(unique_arxiv_ids) + arxiv_ids)
168
 
169
- for d in ds2['train']:
170
- arxiv_id = d['arxiv_id']
171
- unique_arxiv_ids.add(arxiv_id)
 
172
 
173
  return list(set(arxiv_ids_to_be_added) - unique_arxiv_ids)
174
 
 
166
  arxiv_ids = d['Requested arXiv IDs']
167
  unique_arxiv_ids = set(list(unique_arxiv_ids) + arxiv_ids)
168
 
169
+ if len(ds2) > 1:
170
+ for d in ds2['train']:
171
+ arxiv_id = d['arxiv_id']
172
+ unique_arxiv_ids.add(arxiv_id)
173
 
174
  return list(set(arxiv_ids_to_be_added) - unique_arxiv_ids)
175