Final_Assignment_Template

Sleeping

Update app.py

#31

by sdeepanraj - opened May 12

←

Files changed (1) hide show

app.py CHANGED Viewed

@@ -8,6 +8,7 @@ from huggingface_hub import login
 from transformers import (
     AutoTokenizer,
     AutoModelForCausalLM,
 )
 import os
@@ -28,24 +29,27 @@ class BasicAgent:
         if token is None:
             print("Hugging Face API token not found in environment variables.")
-        model_name = "gpt2"  # or "gpt2-medium", "gpt2-large"
         # Load tokenizer and inject a minimal chat_template
         tokenizer = AutoTokenizer.from_pretrained(model_name)
         if not getattr(tokenizer, "chat_template", None):
             tokenizer.chat_template = (
-                "<|im_start|>system\n"
-                "{system}\n"
-                "<|im_end|>\n"
-                "<|im_start|>user\n"
-                "{user}\n"
-                "<|im_end|>\n"
-                "<|im_start|>assistant\n"
             )
         # Load the GPT-2 model (FP16 if you like, but default is fine)
         model = AutoModelForCausalLM.from_pretrained(
             model_name,
             device_map="auto"
         )

 from transformers import (
     AutoTokenizer,
     AutoModelForCausalLM,
+    BitsAndBytesConfig
 )
 import os
         if token is None:
             print("Hugging Face API token not found in environment variables.")
+        model_name = "pankajmathur/orca_mini_3b"  # or "gpt2-medium", "gpt2-large"
         # Load tokenizer and inject a minimal chat_template
         tokenizer = AutoTokenizer.from_pretrained(model_name)
         if not getattr(tokenizer, "chat_template", None):
             tokenizer.chat_template = (
+                "### System:\n{system}\n\n"
+                "### User:\n{instruction}\n\n"
+                "### Input:\n{input}\n\n"
+                "### Response:\n"
             )
+        quant_config = BitsAndBytesConfig(
+            load_in_4bit=True,
+            bnb_4bit_quant_type="nf4",
+            bnb_4bit_use_double_quant=True,
+            bnb_4bit_compute_dtype=torch.float32
+        )
         # Load the GPT-2 model (FP16 if you like, but default is fine)
         model = AutoModelForCausalLM.from_pretrained(
             model_name,
+            quantization_config=quant_config,
             device_map="auto"
         )