Spaces:
Sleeping
Sleeping
Commit
·
1586c67
1
Parent(s):
70dea09
Use older compatible versions: sentence-transformers 2.2.2, transformers 4.30.2, torch 2.0.1, huggingface-hub 0.16.4 - fix dataset loader for older API
Browse files- requirements.txt +7 -7
- src/dataset_loader.py +9 -6
requirements.txt
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
-
# Core ML/NLP
|
| 2 |
-
sentence-transformers==2.
|
| 3 |
hnswlib==0.8.0
|
| 4 |
-
transformers==4.
|
| 5 |
-
torch==2.
|
| 6 |
-
numpy==1.
|
| 7 |
scikit-learn==1.5.0
|
| 8 |
scipy==1.13.0
|
| 9 |
|
|
@@ -26,8 +26,8 @@ pandas==2.2.2
|
|
| 26 |
rdflib==7.0.0
|
| 27 |
SPARQLWrapper==2.0.0
|
| 28 |
|
| 29 |
-
# Hugging Face -
|
| 30 |
-
huggingface-hub==0.
|
| 31 |
|
| 32 |
# Web interface
|
| 33 |
streamlit==1.36.0
|
|
|
|
| 1 |
+
# Core ML/NLP - using older compatible versions
|
| 2 |
+
sentence-transformers==2.2.2
|
| 3 |
hnswlib==0.8.0
|
| 4 |
+
transformers==4.30.2
|
| 5 |
+
torch==2.0.1
|
| 6 |
+
numpy==1.24.3
|
| 7 |
scikit-learn==1.5.0
|
| 8 |
scipy==1.13.0
|
| 9 |
|
|
|
|
| 26 |
rdflib==7.0.0
|
| 27 |
SPARQLWrapper==2.0.0
|
| 28 |
|
| 29 |
+
# Hugging Face - pin to version compatible with transformers 4.30.2
|
| 30 |
+
huggingface-hub==0.16.4
|
| 31 |
|
| 32 |
# Web interface
|
| 33 |
streamlit==1.36.0
|
src/dataset_loader.py
CHANGED
|
@@ -51,6 +51,7 @@ def ensure_dataset_files(dataset_dir: str = "dataset/wikipedia_ireland") -> bool
|
|
| 51 |
print(f"[INFO] Missing {len(missing_files)} files, downloading from HF Datasets...")
|
| 52 |
|
| 53 |
# Download missing files
|
|
|
|
| 54 |
try:
|
| 55 |
for filename in missing_files:
|
| 56 |
print(f"[INFO] Downloading {filename}...")
|
|
@@ -59,19 +60,21 @@ def ensure_dataset_files(dataset_dir: str = "dataset/wikipedia_ireland") -> bool
|
|
| 59 |
downloaded_path = hf_hub_download(
|
| 60 |
repo_id=DATASET_REPO,
|
| 61 |
filename=filename,
|
| 62 |
-
repo_type="dataset"
|
| 63 |
-
local_dir=dataset_dir,
|
| 64 |
-
local_dir_use_symlinks=False
|
| 65 |
)
|
|
|
|
|
|
|
|
|
|
| 66 |
status.update(label=f"✓ Downloaded {filename}", state="complete")
|
| 67 |
else:
|
| 68 |
downloaded_path = hf_hub_download(
|
| 69 |
repo_id=DATASET_REPO,
|
| 70 |
filename=filename,
|
| 71 |
-
repo_type="dataset"
|
| 72 |
-
local_dir=dataset_dir,
|
| 73 |
-
local_dir_use_symlinks=False
|
| 74 |
)
|
|
|
|
|
|
|
|
|
|
| 75 |
print(f"[SUCCESS] Downloaded {filename}")
|
| 76 |
|
| 77 |
print("[SUCCESS] All dataset files downloaded successfully!")
|
|
|
|
| 51 |
print(f"[INFO] Missing {len(missing_files)} files, downloading from HF Datasets...")
|
| 52 |
|
| 53 |
# Download missing files
|
| 54 |
+
import shutil
|
| 55 |
try:
|
| 56 |
for filename in missing_files:
|
| 57 |
print(f"[INFO] Downloading {filename}...")
|
|
|
|
| 60 |
downloaded_path = hf_hub_download(
|
| 61 |
repo_id=DATASET_REPO,
|
| 62 |
filename=filename,
|
| 63 |
+
repo_type="dataset"
|
|
|
|
|
|
|
| 64 |
)
|
| 65 |
+
# Move to target directory
|
| 66 |
+
target_path = dataset_path / filename
|
| 67 |
+
shutil.copy2(downloaded_path, target_path)
|
| 68 |
status.update(label=f"✓ Downloaded {filename}", state="complete")
|
| 69 |
else:
|
| 70 |
downloaded_path = hf_hub_download(
|
| 71 |
repo_id=DATASET_REPO,
|
| 72 |
filename=filename,
|
| 73 |
+
repo_type="dataset"
|
|
|
|
|
|
|
| 74 |
)
|
| 75 |
+
# Move to target directory
|
| 76 |
+
target_path = dataset_path / filename
|
| 77 |
+
shutil.copy2(downloaded_path, target_path)
|
| 78 |
print(f"[SUCCESS] Downloaded {filename}")
|
| 79 |
|
| 80 |
print("[SUCCESS] All dataset files downloaded successfully!")
|