Update app.py
Browse files
app.py
CHANGED
|
@@ -10,21 +10,23 @@ import os
|
|
| 10 |
LLAMA_INDEX_DATASET_ID = os.getenv("HF_INDEX_DATASET_ID", "alperensn/llamaIndexVectorBase_fda")
|
| 11 |
LLAMA_INDEX_SUBDIR = os.getenv("HF_INDEX_SUBDIR", "").strip() # dataset içinde alt klasör kullanıyorsan burada belirt
|
| 12 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
def _persist_path(base_dir: str) -> str:
|
| 14 |
return os.path.join(base_dir, LLAMA_INDEX_SUBDIR) if LLAMA_INDEX_SUBDIR else base_dir
|
| 15 |
|
| 16 |
def llama_index_exists(base_dir: str) -> bool:
|
| 17 |
-
"""LlamaIndex'in persist formatındaki belirgin dosyalardan birine bak."""
|
| 18 |
path = _persist_path(base_dir)
|
| 19 |
-
|
| 20 |
-
|
|
|
|
|
|
|
|
|
|
| 21 |
|
| 22 |
def download_llama_index_if_needed(base_dir: str):
|
| 23 |
-
"""
|
| 24 |
-
Persist edilmiş LlamaIndex dosyaları yerelde yoksa dataset'ten indir.
|
| 25 |
-
- Public dataset ise token gerekmiyor
|
| 26 |
-
- Private ise HUGGINGFACEHUB_API_TOKEN secret'ı otomatik kullanılır
|
| 27 |
-
"""
|
| 28 |
path = _persist_path(base_dir)
|
| 29 |
os.makedirs(path, exist_ok=True)
|
| 30 |
if llama_index_exists(base_dir):
|
|
@@ -34,10 +36,22 @@ def download_llama_index_if_needed(base_dir: str):
|
|
| 34 |
repo_type="dataset",
|
| 35 |
local_dir=path,
|
| 36 |
local_dir_use_symlinks=False,
|
| 37 |
-
# gerekirse allow_patterns ile kısıtlayabilirsin:
|
| 38 |
-
# allow_patterns=["*.json", "*.bin", "*.pkl", "*.npy"]
|
| 39 |
)
|
| 40 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 41 |
# Load environment variables from .env file
|
| 42 |
load_dotenv()
|
| 43 |
|
|
@@ -128,6 +142,9 @@ def main():
|
|
| 128 |
try:
|
| 129 |
status.write(f"Downloading persisted index from: {LLAMA_INDEX_DATASET_ID}")
|
| 130 |
download_llama_index_if_needed(config.LLAMA_INDEX_STORE_PATH)
|
|
|
|
|
|
|
|
|
|
| 131 |
status.update(label="Index downloaded from dataset.", state="complete", expanded=False)
|
| 132 |
time.sleep(1)
|
| 133 |
except Exception as e:
|
|
|
|
| 10 |
LLAMA_INDEX_DATASET_ID = os.getenv("HF_INDEX_DATASET_ID", "alperensn/llamaIndexVectorBase_fda")
|
| 11 |
LLAMA_INDEX_SUBDIR = os.getenv("HF_INDEX_SUBDIR", "").strip() # dataset içinde alt klasör kullanıyorsan burada belirt
|
| 12 |
|
| 13 |
+
# Eski ve yeni (default__) adlandırmaları birlikte kontrol edelim
|
| 14 |
+
MARKERS_CLASSIC = {"index_store.json", "docstore.json", "graph_store.json", "default__vector_store.json", "image__vector_store.json" }
|
| 15 |
+
MARKERS_DEFAULT = {"default__index_store.json", "default__docstore.json", "default__vector_store.json", "default_image__vector_store.json","default__graph_store.json"}
|
| 16 |
+
|
| 17 |
+
|
| 18 |
def _persist_path(base_dir: str) -> str:
|
| 19 |
return os.path.join(base_dir, LLAMA_INDEX_SUBDIR) if LLAMA_INDEX_SUBDIR else base_dir
|
| 20 |
|
| 21 |
def llama_index_exists(base_dir: str) -> bool:
|
|
|
|
| 22 |
path = _persist_path(base_dir)
|
| 23 |
+
if not os.path.isdir(path):
|
| 24 |
+
return False
|
| 25 |
+
files = set(os.listdir(path))
|
| 26 |
+
return (MARKERS_CLASSIC.issubset(files) or MARKERS_DEFAULT.issubset(files))
|
| 27 |
+
|
| 28 |
|
| 29 |
def download_llama_index_if_needed(base_dir: str):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 30 |
path = _persist_path(base_dir)
|
| 31 |
os.makedirs(path, exist_ok=True)
|
| 32 |
if llama_index_exists(base_dir):
|
|
|
|
| 36 |
repo_type="dataset",
|
| 37 |
local_dir=path,
|
| 38 |
local_dir_use_symlinks=False,
|
|
|
|
|
|
|
| 39 |
)
|
| 40 |
+
|
| 41 |
+
# İndirilen LlamaIndex persist klasörünü altlarda aramak gerekirse:
|
| 42 |
+
def find_llama_index_dir(base_dir: str) -> str:
|
| 43 |
+
wanted_sets = [MARKERS_CLASSIC, MARKERS_DEFAULT]
|
| 44 |
+
if os.path.isdir(base_dir):
|
| 45 |
+
files = set(os.listdir(base_dir))
|
| 46 |
+
if any(ws.issubset(files) for ws in wanted_sets):
|
| 47 |
+
return base_dir
|
| 48 |
+
for root, _, files in os.walk(base_dir):
|
| 49 |
+
files = set(files)
|
| 50 |
+
if any(ws.issubset(files) for ws in wanted_sets):
|
| 51 |
+
return root
|
| 52 |
+
return base_dir
|
| 53 |
+
|
| 54 |
+
|
| 55 |
# Load environment variables from .env file
|
| 56 |
load_dotenv()
|
| 57 |
|
|
|
|
| 142 |
try:
|
| 143 |
status.write(f"Downloading persisted index from: {LLAMA_INDEX_DATASET_ID}")
|
| 144 |
download_llama_index_if_needed(config.LLAMA_INDEX_STORE_PATH)
|
| 145 |
+
detected_dir = find_llama_index_dir(config.LLAMA_INDEX_STORE_PATH)
|
| 146 |
+
if detected_dir != config.LLAMA_INDEX_STORE_PATH:
|
| 147 |
+
config.LLAMA_INDEX_STORE_PATH = detected_dir
|
| 148 |
status.update(label="Index downloaded from dataset.", state="complete", expanded=False)
|
| 149 |
time.sleep(1)
|
| 150 |
except Exception as e:
|