Spaces:

gufett0
/

chatbot-llamaindex

Sleeping

gufett0 commited on Sep 16, 2024

Commit

b277c0d

1 Parent(s): 40986a4

changed class interface with iterator

Files changed (2) hide show

backend.py CHANGED Viewed

@@ -20,7 +20,7 @@ login(huggingface_token)
 device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
-model_id = "google/gemma-2-2b-it"
 model = AutoModelForCausalLM.from_pretrained(
     model_id,
     device_map="auto",
@@ -28,12 +28,12 @@ model = AutoModelForCausalLM.from_pretrained(
     token=True)
 model.tokenizer = AutoTokenizer.from_pretrained("google/gemma-2b-it")
-model.eval()
 # what models will be used by LlamaIndex:
 Settings.embed_model = InstructorEmbedding(model_name="hkunlp/instructor-base")
-Settings.llm = GemmaLLMInterface(model=model)
 #Settings.llm = GemmaLLMInterface(model_name=model_id)
 ############################---------------------------------

 device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+"""model_id = "google/gemma-2-2b-it"
 model = AutoModelForCausalLM.from_pretrained(
     model_id,
     device_map="auto",
     token=True)
 model.tokenizer = AutoTokenizer.from_pretrained("google/gemma-2b-it")
+model.eval()"""
 # what models will be used by LlamaIndex:
 Settings.embed_model = InstructorEmbedding(model_name="hkunlp/instructor-base")
+Settings.llm = GemmaLLMInterface()
 #Settings.llm = GemmaLLMInterface(model_name=model_id)
 ############################---------------------------------

interface.py CHANGED Viewed

@@ -9,11 +9,17 @@ from pydantic import Field, field_validator
 # for transformers 2
 class GemmaLLMInterface(CustomLLM):
-    model: Any = None
-    tokenizer: Any = None
-    context_window: int = 8192
-    num_output: int = 2048
-    model_name: str = "gemma-2b-it"
     def _format_prompt(self, message: str) -> str:
         return (
@@ -23,7 +29,6 @@ class GemmaLLMInterface(CustomLLM):
     @property
     def metadata(self) -> LLMMetadata:
-        """Get LLM metadata."""
         return LLMMetadata(
             context_window=self.context_window,
             num_output=self.num_output,

 # for transformers 2
 class GemmaLLMInterface(CustomLLM):
+    def __init__(self, model_name: str = "google/gemma-2b-it", **kwargs):
+        super().__init__(**kwargs)
+        self.model_name = model_name
+        self.model = AutoModelForCausalLM.from_pretrained(
+            self.model_name,
+            device_map="auto",
+            torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32,
+        )
+        self.tokenizer = AutoTokenizer.from_pretrained(self.model_name)
+        self.context_window = 8192
+        self.num_output = 2048
     def _format_prompt(self, message: str) -> str:
         return (
     @property
     def metadata(self) -> LLMMetadata:
         return LLMMetadata(
             context_window=self.context_window,
             num_output=self.num_output,