Sami Ali commited on
Commit
c48531b
·
1 Parent(s): b098829

fix dir issue

Browse files
Files changed (2) hide show
  1. src/download_data.py +6 -3
  2. src/embedding.py +1 -1
src/download_data.py CHANGED
@@ -17,11 +17,14 @@ def download_pmc_docs(
17
  target_dir=TARGET_DIR,
18
  limit=1000
19
  ):
20
- if (len(os.listdir(target_dir)) > 0):
21
- return
22
-
23
  os.makedirs(target_dir, exist_ok=True)
24
 
 
 
 
 
 
25
  s3 = boto3.client("s3", config=Config(signature_version=UNSIGNED))
26
  paginator = s3.get_paginator('list_objects_v2')
27
 
 
17
  target_dir=TARGET_DIR,
18
  limit=1000
19
  ):
20
+
 
 
21
  os.makedirs(target_dir, exist_ok=True)
22
 
23
+ existing_files = os.listdir(target_dir)
24
+ if len(existing_files) >= limit:
25
+ print(f"✅ Found {len(existing_files)} existing files. Skipping download.")
26
+ return True
27
+
28
  s3 = boto3.client("s3", config=Config(signature_version=UNSIGNED))
29
  paginator = s3.get_paginator('list_objects_v2')
30
 
src/embedding.py CHANGED
@@ -14,7 +14,7 @@ class EmbeddingManager:
14
  def load_model(self):
15
  print("Loading embedding model:", self.model_name)
16
  print('Using device', self.device)
17
- self.model = SentenceTransformer(model_name=self.model_name, device=self.device)
18
  print("Model loaded.")
19
 
20
  def get_model(self):
 
14
  def load_model(self):
15
  print("Loading embedding model:", self.model_name)
16
  print('Using device', self.device)
17
+ self.model = SentenceTransformer(model_name_or_path=self.model_name, device=self.device)
18
  print("Model loaded.")
19
 
20
  def get_model(self):