Files changed (1) hide show
  1. app.py +13 -9
app.py CHANGED
@@ -8,6 +8,7 @@ from huggingface_hub import login
8
  from transformers import (
9
  AutoTokenizer,
10
  AutoModelForCausalLM,
 
11
  )
12
  import os
13
 
@@ -28,24 +29,27 @@ class BasicAgent:
28
  if token is None:
29
  print("Hugging Face API token not found in environment variables.")
30
 
31
- model_name = "gpt2" # or "gpt2-medium", "gpt2-large"
32
 
33
  # Load tokenizer and inject a minimal chat_template
34
  tokenizer = AutoTokenizer.from_pretrained(model_name)
35
  if not getattr(tokenizer, "chat_template", None):
36
  tokenizer.chat_template = (
37
- "<|im_start|>system\n"
38
- "{system}\n"
39
- "<|im_end|>\n"
40
- "<|im_start|>user\n"
41
- "{user}\n"
42
- "<|im_end|>\n"
43
- "<|im_start|>assistant\n"
44
  )
45
-
 
 
 
 
 
46
  # Load the GPT-2 model (FP16 if you like, but default is fine)
47
  model = AutoModelForCausalLM.from_pretrained(
48
  model_name,
 
49
  device_map="auto"
50
  )
51
 
 
8
  from transformers import (
9
  AutoTokenizer,
10
  AutoModelForCausalLM,
11
+ BitsAndBytesConfig
12
  )
13
  import os
14
 
 
29
  if token is None:
30
  print("Hugging Face API token not found in environment variables.")
31
 
32
+ model_name = "pankajmathur/orca_mini_3b" # or "gpt2-medium", "gpt2-large"
33
 
34
  # Load tokenizer and inject a minimal chat_template
35
  tokenizer = AutoTokenizer.from_pretrained(model_name)
36
  if not getattr(tokenizer, "chat_template", None):
37
  tokenizer.chat_template = (
38
+ "### System:\n{system}\n\n"
39
+ "### User:\n{instruction}\n\n"
40
+ "### Input:\n{input}\n\n"
41
+ "### Response:\n"
 
 
 
42
  )
43
+ quant_config = BitsAndBytesConfig(
44
+ load_in_4bit=True,
45
+ bnb_4bit_quant_type="nf4",
46
+ bnb_4bit_use_double_quant=True,
47
+ bnb_4bit_compute_dtype=torch.float32
48
+ )
49
  # Load the GPT-2 model (FP16 if you like, but default is fine)
50
  model = AutoModelForCausalLM.from_pretrained(
51
  model_name,
52
+ quantization_config=quant_config,
53
  device_map="auto"
54
  )
55