Spaces:

helal94hb1
/

backend_chatbot

Sleeping

App Files Files Community

helal94hb1 commited on Aug 15

Commit

757abfc

1 Parent(s): 9d8e329

feat: Update application with new changes

Browse files

Files changed (2) hide show

app/services/reranker_service.py +22 -9
app/services/retrieval.py +24 -12

app/services/reranker_service.py CHANGED Viewed

@@ -14,37 +14,50 @@ logger = logging.getLogger(__name__)
 def load_reranker_model():
     """
-    Loads the custom-trained ExpertJudgeCrossEncoder model and its tokenizer
-    into the state object.
     """
     if state.reranker_model_loaded:
         logger.info("Re-ranker model already loaded in state.")
         return True
     model_path = settings.RERANKER_MODEL_PATH
     base_model_name = settings.RERANKER_MODEL_NAME
     logger.info(f"Loading custom ExpertJudgeCrossEncoder from: {model_path}")
     logger.info(f"Using base model architecture: {base_model_name}")
     try:
         # 1. Instantiate the model structure
         model = ExpertJudgeCrossEncoder(model_name=base_model_name)
         # 2. Load the saved weights (the state_dict) into the model structure
         model.load_state_dict(torch.load(model_path, map_location=state.device))
         # 3. Set up the model for inference
         model.to(state.device)
         model.eval()
         # 4. Load the corresponding tokenizer
         tokenizer = get_tokenizer(model_name=base_model_name)
         # 5. Store both in the state
         state.reranker_model = model
         state.reranker_tokenizer = tokenizer
         state.reranker_model_loaded = True
         logger.info("Custom ExpertJudgeCrossEncoder model and tokenizer loaded successfully.")
         return True
     except Exception as e:

 def load_reranker_model():
     """
+    Loads the custom-trained ExpertJudgeCrossEncoder model. If running on a
+    new server, it first downloads the model from S3.
     """
     if state.reranker_model_loaded:
         logger.info("Re-ranker model already loaded in state.")
         return True
+    # --- ADDED: Download from S3 if file doesn't exist ---
     model_path = settings.RERANKER_MODEL_PATH
+    if not os.path.exists(model_path) and settings.S3_RERANKER_URL:
+        logger.info(f"Re-ranker model not found at {model_path}. Downloading from S3...")
+        try:
+            # Create the 'data' directory if it doesn't exist
+            os.makedirs(os.path.dirname(model_path), exist_ok=True)
+            with requests.get(settings.S3_RERANKER_URL, stream=True) as r:
+                r.raise_for_status()
+                with open(model_path, 'wb') as f:
+                    for chunk in r.iter_content(chunk_size=8192):
+                        f.write(chunk)
+            logger.info("Successfully downloaded re-ranker model from S3.")
+        except Exception as e:
+            logger.exception(f"FATAL: Failed to download re-ranker model from S3: {e}")
+            return False
+    # --- END OF ADDITION ---
     base_model_name = settings.RERANKER_MODEL_NAME
     logger.info(f"Loading custom ExpertJudgeCrossEncoder from: {model_path}")
     logger.info(f"Using base model architecture: {base_model_name}")
     try:
         # 1. Instantiate the model structure
         model = ExpertJudgeCrossEncoder(model_name=base_model_name)
         # 2. Load the saved weights (the state_dict) into the model structure
         model.load_state_dict(torch.load(model_path, map_location=state.device))
         # 3. Set up the model for inference
         model.to(state.device)
         model.eval()
         # 4. Load the corresponding tokenizer
         tokenizer = get_tokenizer(model_name=base_model_name)
         # 5. Store both in the state
         state.reranker_model = model
         state.reranker_tokenizer = tokenizer
         state.reranker_model_loaded = True
         logger.info("Custom ExpertJudgeCrossEncoder model and tokenizer loaded successfully.")
         return True
     except Exception as e:

app/services/retrieval.py CHANGED Viewed

@@ -17,42 +17,55 @@ logger = logging.getLogger(__name__)
 def load_retrieval_artifacts():
     """
-    Loads all necessary artifacts for retrieval from the pre-computed NPZ file.
-    This includes the query encoder, pre-transformed chunk embeddings, and the
-    query transformation matrix (Wq).
     """
     if state.artifacts_loaded:
         logger.info("Retrieval artifacts already loaded in state.")
         return True
     device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
     logger.info(f"Using device for retrieval: {device}")
     state.device = device
     # 1. Load the pre-computed artifacts file
-    artifacts_path = settings.RETRIEVAL_ARTIFACTS_PATH
     logger.info(f"Loading retrieval artifacts from {artifacts_path}...")
     try:
         if not os.path.exists(artifacts_path):
             logger.error(f"FATAL: Artifacts file not found at {artifacts_path}")
             return False
         artifacts = np.load(artifacts_path, allow_pickle=True)
         # Load into state
         state.transformed_chunk_embeddings = artifacts['transformed_chunk_embeddings']
         state.chunk_ids_in_order = artifacts['chunk_ids']
         state.wq_weights = torch.from_numpy(artifacts['wq_weights']).to(device)
         state.temperature = artifacts['temperature'][0] # Extract scalar from array
         logger.info(f"Successfully loaded {len(state.chunk_ids_in_order)} transformed embeddings.")
         logger.info(f"Loaded Wq matrix of shape: {state.wq_weights.shape}")
         logger.info(f"Loaded temperature value: {state.temperature:.4f}")
     except Exception as e:
         logger.exception(f"Failed to load and process retrieval artifacts: {e}")
         return False
     # 2. Load the Sentence Transformer model for encoding queries
     logger.info(f"Loading Sentence Transformer model: {settings.QUERY_ENCODER_MODEL_NAME}...")
     try:
@@ -63,7 +76,6 @@ def load_retrieval_artifacts():
     except Exception as e:
         logger.exception(f"Failed to load Sentence Transformer model: {e}")
         return False
     state.artifacts_loaded = True
     return True

 def load_retrieval_artifacts():
     """
+    Loads all necessary artifacts for retrieval. If running on a new server,
+    it first downloads the artifacts from S3.
     """
     if state.artifacts_loaded:
         logger.info("Retrieval artifacts already loaded in state.")
         return True
+    # --- ADDED: Download from S3 if file doesn't exist ---
+    artifacts_path = settings.RETRIEVAL_ARTIFACTS_PATH
+    if not os.path.exists(artifacts_path) and settings.S3_ARTIFACTS_URL:
+        logger.info(f"Artifacts file not found at {artifacts_path}. Downloading from S3...")
+        try:
+            # Create the 'data' directory if it doesn't exist
+            os.makedirs(os.path.dirname(artifacts_path), exist_ok=True)
+            with requests.get(settings.S3_ARTIFACTS_URL, stream=True) as r:
+                r.raise_for_status()
+                with open(artifacts_path, 'wb') as f:
+                    for chunk in r.iter_content(chunk_size=8192):
+                        f.write(chunk)
+            logger.info("Successfully downloaded artifacts from S3.")
+        except Exception as e:
+            logger.exception(f"FATAL: Failed to download artifacts from S3: {e}")
+            return False
+    # --- END OF ADDITION ---
     device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
     logger.info(f"Using device for retrieval: {device}")
     state.device = device
     # 1. Load the pre-computed artifacts file
     logger.info(f"Loading retrieval artifacts from {artifacts_path}...")
     try:
         if not os.path.exists(artifacts_path):
             logger.error(f"FATAL: Artifacts file not found at {artifacts_path}")
             return False
         artifacts = np.load(artifacts_path, allow_pickle=True)
         # Load into state
         state.transformed_chunk_embeddings = artifacts['transformed_chunk_embeddings']
         state.chunk_ids_in_order = artifacts['chunk_ids']
         state.wq_weights = torch.from_numpy(artifacts['wq_weights']).to(device)
         state.temperature = artifacts['temperature'][0] # Extract scalar from array
         logger.info(f"Successfully loaded {len(state.chunk_ids_in_order)} transformed embeddings.")
         logger.info(f"Loaded Wq matrix of shape: {state.wq_weights.shape}")
         logger.info(f"Loaded temperature value: {state.temperature:.4f}")
     except Exception as e:
         logger.exception(f"Failed to load and process retrieval artifacts: {e}")
         return False
     # 2. Load the Sentence Transformer model for encoding queries
     logger.info(f"Loading Sentence Transformer model: {settings.QUERY_ENCODER_MODEL_NAME}...")
     try:
     except Exception as e:
         logger.exception(f"Failed to load Sentence Transformer model: {e}")
         return False
     state.artifacts_loaded = True
     return True