Spaces:

abrar-adnan
/

GeekBot

Sleeping

abrar-adnan commited on Nov 3

Commit

bdc550c

verified ·

1 Parent(s): 337338b

Update agents/research_agent.py

Files changed (1) hide show

agents/research_agent.py CHANGED Viewed

@@ -9,16 +9,21 @@ class ResearchAgent:
         """
         Initialize the research agent with local Ollama LLM.
         """
-        print("Initializing ResearchAgent with Hugging Face Transformers...")
-        model_name = getattr(settings, "HF_MODEL_RESEARCH", "google/flan-t5-large")
-        self.tokenizer = AutoTokenizer.from_pretrained(model_name)
-        self.model = AutoModelForSeq2SeqLM.from_pretrained(model_name, torch_dtype=torch.float16)
-        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-        self.model.to(self.device)
-        print(f"Model '{model_name}' initialized successfully on {self.device}.")
     def sanitize_response(self, response_text: str) -> str:

         """
         Initialize the research agent with local Ollama LLM.
         """
+        print("Initializing RelevanceChecker with lightweight Hugging Face model...")
+        # Use a smaller, CPU-friendly model by default
+        model_name = getattr(settings, "HF_MODEL_RELEVANCE", "google/flan-t5-small")
+        self.device = "cuda" if torch.cuda.is_available() else "cpu"
+        # Use float32 on CPU (fp16 only works on GPU)
+        torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
+        self.tokenizer = AutoTokenizer.from_pretrained(model_name)
+        self.model = AutoModelForSeq2SeqLM.from_pretrained(model_name, torch_dtype=torch_dtype).to(self.device)
+        print(f"Model '{model_name}' loaded on {self.device} with dtype={torch_dtype}.")
     def sanitize_response(self, response_text: str) -> str: