Spaces:

hirthickraj2015
/

graphwiz-ireland

Sleeping

hirthickraj2015 commited on 12 days ago

Commit

1586c67

1 Parent(s): 70dea09

Use older compatible versions: sentence-transformers 2.2.2, transformers 4.30.2, torch 2.0.1, huggingface-hub 0.16.4 - fix dataset loader for older API

Files changed (2) hide show

requirements.txt CHANGED Viewed

@@ -1,9 +1,9 @@
-# Core ML/NLP
-sentence-transformers==2.7.0
 hnswlib==0.8.0
-transformers==4.40.2
-torch==2.3.0
-numpy==1.26.4
 scikit-learn==1.5.0
 scipy==1.13.0
@@ -26,8 +26,8 @@ pandas==2.2.2
 rdflib==7.0.0
 SPARQLWrapper==2.0.0
-# Hugging Face - use older version for compatibility
-huggingface-hub==0.23.4
 # Web interface
 streamlit==1.36.0

+# Core ML/NLP - using older compatible versions
+sentence-transformers==2.2.2
 hnswlib==0.8.0
+transformers==4.30.2
+torch==2.0.1
+numpy==1.24.3
 scikit-learn==1.5.0
 scipy==1.13.0
 rdflib==7.0.0
 SPARQLWrapper==2.0.0
+# Hugging Face - pin to version compatible with transformers 4.30.2
+huggingface-hub==0.16.4
 # Web interface
 streamlit==1.36.0

src/dataset_loader.py CHANGED Viewed

@@ -51,6 +51,7 @@ def ensure_dataset_files(dataset_dir: str = "dataset/wikipedia_ireland") -> bool
     print(f"[INFO] Missing {len(missing_files)} files, downloading from HF Datasets...")
     # Download missing files
     try:
         for filename in missing_files:
             print(f"[INFO] Downloading {filename}...")
@@ -59,19 +60,21 @@ def ensure_dataset_files(dataset_dir: str = "dataset/wikipedia_ireland") -> bool
                     downloaded_path = hf_hub_download(
                         repo_id=DATASET_REPO,
                         filename=filename,
-                        repo_type="dataset",
-                        local_dir=dataset_dir,
-                        local_dir_use_symlinks=False
                     )
                     status.update(label=f"✓ Downloaded {filename}", state="complete")
             else:
                 downloaded_path = hf_hub_download(
                     repo_id=DATASET_REPO,
                     filename=filename,
-                    repo_type="dataset",
-                    local_dir=dataset_dir,
-                    local_dir_use_symlinks=False
                 )
             print(f"[SUCCESS] Downloaded {filename}")
         print("[SUCCESS] All dataset files downloaded successfully!")

     print(f"[INFO] Missing {len(missing_files)} files, downloading from HF Datasets...")
     # Download missing files
+    import shutil
     try:
         for filename in missing_files:
             print(f"[INFO] Downloading {filename}...")
                     downloaded_path = hf_hub_download(
                         repo_id=DATASET_REPO,
                         filename=filename,
+                        repo_type="dataset"
                     )
+                    # Move to target directory
+                    target_path = dataset_path / filename
+                    shutil.copy2(downloaded_path, target_path)
                     status.update(label=f"✓ Downloaded {filename}", state="complete")
             else:
                 downloaded_path = hf_hub_download(
                     repo_id=DATASET_REPO,
                     filename=filename,
+                    repo_type="dataset"
                 )
+                # Move to target directory
+                target_path = dataset_path / filename
+                shutil.copy2(downloaded_path, target_path)
             print(f"[SUCCESS] Downloaded {filename}")
         print("[SUCCESS] All dataset files downloaded successfully!")