loubnabnl HF staff commited on
Commit
eaa8ac9
1 Parent(s): 5c5e0d3
Files changed (1) hide show
  1. README.md +2 -2
README.md CHANGED
@@ -61,7 +61,7 @@ checkpoint = "bigcode/starcoder2-15b"
61
  device = "cuda" # for GPU usage or "cpu" for CPU usage
62
 
63
  tokenizer = AutoTokenizer.from_pretrained(checkpoint)
64
- # to use Multiple GPUs do `model = AutoModelForCausalLM.from_pretrained(checkpoint, device_map="auto")`
65
  model = AutoModelForCausalLM.from_pretrained(checkpoint).to(device)
66
 
67
  inputs = tokenizer.encode("def print_hello_world():", return_tensors="pt").to(device)
@@ -100,7 +100,7 @@ from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
100
  # to use 4bit use `load_in_4bit=True` instead
101
  quantization_config = BitsAndBytesConfig(load_in_8bit=True)
102
 
103
- checkpoint = "bigcode/starcoder2-15b_16k"
104
  tokenizer = AutoTokenizer.from_pretrained(checkpoint)
105
  model = AutoModelForCausalLM.from_pretrained("bigcode/starcoder2-15b_16k", quantization_config=quantization_config)
106
 
 
61
  device = "cuda" # for GPU usage or "cpu" for CPU usage
62
 
63
  tokenizer = AutoTokenizer.from_pretrained(checkpoint)
64
+ # for multiple GPUs install accelerate and do `model = AutoModelForCausalLM.from_pretrained(checkpoint, device_map="auto")`
65
  model = AutoModelForCausalLM.from_pretrained(checkpoint).to(device)
66
 
67
  inputs = tokenizer.encode("def print_hello_world():", return_tensors="pt").to(device)
 
100
  # to use 4bit use `load_in_4bit=True` instead
101
  quantization_config = BitsAndBytesConfig(load_in_8bit=True)
102
 
103
+ checkpoint = "bigcode/starcoder2-15b"
104
  tokenizer = AutoTokenizer.from_pretrained(checkpoint)
105
  model = AutoModelForCausalLM.from_pretrained("bigcode/starcoder2-15b_16k", quantization_config=quantization_config)
106