hirthickraj2015 commited on
Commit
1586c67
·
1 Parent(s): 70dea09

Use older compatible versions: sentence-transformers 2.2.2, transformers 4.30.2, torch 2.0.1, huggingface-hub 0.16.4 - fix dataset loader for older API

Browse files
Files changed (2) hide show
  1. requirements.txt +7 -7
  2. src/dataset_loader.py +9 -6
requirements.txt CHANGED
@@ -1,9 +1,9 @@
1
- # Core ML/NLP
2
- sentence-transformers==2.7.0
3
  hnswlib==0.8.0
4
- transformers==4.40.2
5
- torch==2.3.0
6
- numpy==1.26.4
7
  scikit-learn==1.5.0
8
  scipy==1.13.0
9
 
@@ -26,8 +26,8 @@ pandas==2.2.2
26
  rdflib==7.0.0
27
  SPARQLWrapper==2.0.0
28
 
29
- # Hugging Face - use older version for compatibility
30
- huggingface-hub==0.23.4
31
 
32
  # Web interface
33
  streamlit==1.36.0
 
1
+ # Core ML/NLP - using older compatible versions
2
+ sentence-transformers==2.2.2
3
  hnswlib==0.8.0
4
+ transformers==4.30.2
5
+ torch==2.0.1
6
+ numpy==1.24.3
7
  scikit-learn==1.5.0
8
  scipy==1.13.0
9
 
 
26
  rdflib==7.0.0
27
  SPARQLWrapper==2.0.0
28
 
29
+ # Hugging Face - pin to version compatible with transformers 4.30.2
30
+ huggingface-hub==0.16.4
31
 
32
  # Web interface
33
  streamlit==1.36.0
src/dataset_loader.py CHANGED
@@ -51,6 +51,7 @@ def ensure_dataset_files(dataset_dir: str = "dataset/wikipedia_ireland") -> bool
51
  print(f"[INFO] Missing {len(missing_files)} files, downloading from HF Datasets...")
52
 
53
  # Download missing files
 
54
  try:
55
  for filename in missing_files:
56
  print(f"[INFO] Downloading {filename}...")
@@ -59,19 +60,21 @@ def ensure_dataset_files(dataset_dir: str = "dataset/wikipedia_ireland") -> bool
59
  downloaded_path = hf_hub_download(
60
  repo_id=DATASET_REPO,
61
  filename=filename,
62
- repo_type="dataset",
63
- local_dir=dataset_dir,
64
- local_dir_use_symlinks=False
65
  )
 
 
 
66
  status.update(label=f"✓ Downloaded {filename}", state="complete")
67
  else:
68
  downloaded_path = hf_hub_download(
69
  repo_id=DATASET_REPO,
70
  filename=filename,
71
- repo_type="dataset",
72
- local_dir=dataset_dir,
73
- local_dir_use_symlinks=False
74
  )
 
 
 
75
  print(f"[SUCCESS] Downloaded {filename}")
76
 
77
  print("[SUCCESS] All dataset files downloaded successfully!")
 
51
  print(f"[INFO] Missing {len(missing_files)} files, downloading from HF Datasets...")
52
 
53
  # Download missing files
54
+ import shutil
55
  try:
56
  for filename in missing_files:
57
  print(f"[INFO] Downloading {filename}...")
 
60
  downloaded_path = hf_hub_download(
61
  repo_id=DATASET_REPO,
62
  filename=filename,
63
+ repo_type="dataset"
 
 
64
  )
65
+ # Move to target directory
66
+ target_path = dataset_path / filename
67
+ shutil.copy2(downloaded_path, target_path)
68
  status.update(label=f"✓ Downloaded {filename}", state="complete")
69
  else:
70
  downloaded_path = hf_hub_download(
71
  repo_id=DATASET_REPO,
72
  filename=filename,
73
+ repo_type="dataset"
 
 
74
  )
75
+ # Move to target directory
76
+ target_path = dataset_path / filename
77
+ shutil.copy2(downloaded_path, target_path)
78
  print(f"[SUCCESS] Downloaded {filename}")
79
 
80
  print("[SUCCESS] All dataset files downloaded successfully!")