m97j commited on
Commit
6aaa57e
ยท
1 Parent(s): 3c9754e

Initial codes commit

Browse files
Files changed (2) hide show
  1. db/initializer.py +5 -0
  2. modules/corpus.py +1 -1
db/initializer.py CHANGED
@@ -26,6 +26,11 @@ def initialize_dbs():
26
  corpus.prepare_corpus()
27
  # 2) ์ธ๋ฑ์Šค/ID ๋งคํ•‘ ๋ฉ”๋ชจ๋ฆฌ ๋กœ๋“œ
28
  _load_index_in_memory()
 
 
 
 
 
29
 
30
  def force_update():
31
  _load_index_in_memory()
 
26
  corpus.prepare_corpus()
27
  # 2) ์ธ๋ฑ์Šค/ID ๋งคํ•‘ ๋ฉ”๋ชจ๋ฆฌ ๋กœ๋“œ
28
  _load_index_in_memory()
29
+ # 3) Arrow ์บ์‹œ ์ƒ์„ฑ
30
+ datasets = corpus._get_datasets()
31
+ for _subset, ds in datasets.items():
32
+ # dummy ํ˜ธ์ถœ๋กœ ์บ์‹œ ์ƒ์„ฑ
33
+ _ = ds.filter(lambda r: False)
34
 
35
  def force_update():
36
  _load_index_in_memory()
modules/corpus.py CHANGED
@@ -1,4 +1,4 @@
1
- # rag/modules/corpus_store.py
2
  from typing import List, Dict, Any
3
  from datasets import load_dataset, DatasetDict, Dataset
4
  from config import HF_CORPUS_REPO, HF_CORPUS_SUBSET, HF_CORPUS_SPLIT, MARKER_DIR, CORPUS_READY_MARK
 
1
+ # rag/modules/corpus.py
2
  from typing import List, Dict, Any
3
  from datasets import load_dataset, DatasetDict, Dataset
4
  from config import HF_CORPUS_REPO, HF_CORPUS_SUBSET, HF_CORPUS_SPLIT, MARKER_DIR, CORPUS_READY_MARK