alperensn commited on
Commit
59992e2
·
verified ·
1 Parent(s): e375b3e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +28 -11
app.py CHANGED
@@ -10,21 +10,23 @@ import os
10
  LLAMA_INDEX_DATASET_ID = os.getenv("HF_INDEX_DATASET_ID", "alperensn/llamaIndexVectorBase_fda")
11
  LLAMA_INDEX_SUBDIR = os.getenv("HF_INDEX_SUBDIR", "").strip() # dataset içinde alt klasör kullanıyorsan burada belirt
12
 
 
 
 
 
 
13
  def _persist_path(base_dir: str) -> str:
14
  return os.path.join(base_dir, LLAMA_INDEX_SUBDIR) if LLAMA_INDEX_SUBDIR else base_dir
15
 
16
  def llama_index_exists(base_dir: str) -> bool:
17
- """LlamaIndex'in persist formatındaki belirgin dosyalardan birine bak."""
18
  path = _persist_path(base_dir)
19
- markers = ["index_store.json", "docstore.json", "vector_store.json"]
20
- return any(os.path.exists(os.path.join(path, m)) for m in markers)
 
 
 
21
 
22
  def download_llama_index_if_needed(base_dir: str):
23
- """
24
- Persist edilmiş LlamaIndex dosyaları yerelde yoksa dataset'ten indir.
25
- - Public dataset ise token gerekmiyor
26
- - Private ise HUGGINGFACEHUB_API_TOKEN secret'ı otomatik kullanılır
27
- """
28
  path = _persist_path(base_dir)
29
  os.makedirs(path, exist_ok=True)
30
  if llama_index_exists(base_dir):
@@ -34,10 +36,22 @@ def download_llama_index_if_needed(base_dir: str):
34
  repo_type="dataset",
35
  local_dir=path,
36
  local_dir_use_symlinks=False,
37
- # gerekirse allow_patterns ile kısıtlayabilirsin:
38
- # allow_patterns=["*.json", "*.bin", "*.pkl", "*.npy"]
39
  )
40
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
  # Load environment variables from .env file
42
  load_dotenv()
43
 
@@ -128,6 +142,9 @@ def main():
128
  try:
129
  status.write(f"Downloading persisted index from: {LLAMA_INDEX_DATASET_ID}")
130
  download_llama_index_if_needed(config.LLAMA_INDEX_STORE_PATH)
 
 
 
131
  status.update(label="Index downloaded from dataset.", state="complete", expanded=False)
132
  time.sleep(1)
133
  except Exception as e:
 
10
  LLAMA_INDEX_DATASET_ID = os.getenv("HF_INDEX_DATASET_ID", "alperensn/llamaIndexVectorBase_fda")
11
  LLAMA_INDEX_SUBDIR = os.getenv("HF_INDEX_SUBDIR", "").strip() # dataset içinde alt klasör kullanıyorsan burada belirt
12
 
13
+ # Eski ve yeni (default__) adlandırmaları birlikte kontrol edelim
14
+ MARKERS_CLASSIC = {"index_store.json", "docstore.json", "graph_store.json", "default__vector_store.json", "image__vector_store.json" }
15
+ MARKERS_DEFAULT = {"default__index_store.json", "default__docstore.json", "default__vector_store.json", "default_image__vector_store.json","default__graph_store.json"}
16
+
17
+
18
  def _persist_path(base_dir: str) -> str:
19
  return os.path.join(base_dir, LLAMA_INDEX_SUBDIR) if LLAMA_INDEX_SUBDIR else base_dir
20
 
21
  def llama_index_exists(base_dir: str) -> bool:
 
22
  path = _persist_path(base_dir)
23
+ if not os.path.isdir(path):
24
+ return False
25
+ files = set(os.listdir(path))
26
+ return (MARKERS_CLASSIC.issubset(files) or MARKERS_DEFAULT.issubset(files))
27
+
28
 
29
  def download_llama_index_if_needed(base_dir: str):
 
 
 
 
 
30
  path = _persist_path(base_dir)
31
  os.makedirs(path, exist_ok=True)
32
  if llama_index_exists(base_dir):
 
36
  repo_type="dataset",
37
  local_dir=path,
38
  local_dir_use_symlinks=False,
 
 
39
  )
40
+
41
+ # İndirilen LlamaIndex persist klasörünü altlarda aramak gerekirse:
42
+ def find_llama_index_dir(base_dir: str) -> str:
43
+ wanted_sets = [MARKERS_CLASSIC, MARKERS_DEFAULT]
44
+ if os.path.isdir(base_dir):
45
+ files = set(os.listdir(base_dir))
46
+ if any(ws.issubset(files) for ws in wanted_sets):
47
+ return base_dir
48
+ for root, _, files in os.walk(base_dir):
49
+ files = set(files)
50
+ if any(ws.issubset(files) for ws in wanted_sets):
51
+ return root
52
+ return base_dir
53
+
54
+
55
  # Load environment variables from .env file
56
  load_dotenv()
57
 
 
142
  try:
143
  status.write(f"Downloading persisted index from: {LLAMA_INDEX_DATASET_ID}")
144
  download_llama_index_if_needed(config.LLAMA_INDEX_STORE_PATH)
145
+ detected_dir = find_llama_index_dir(config.LLAMA_INDEX_STORE_PATH)
146
+ if detected_dir != config.LLAMA_INDEX_STORE_PATH:
147
+ config.LLAMA_INDEX_STORE_PATH = detected_dir
148
  status.update(label="Index downloaded from dataset.", state="complete", expanded=False)
149
  time.sleep(1)
150
  except Exception as e: