dahara1 commited on
Commit
ca9fb88
1 Parent(s): 5f482cd

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +4 -3
README.md CHANGED
@@ -30,9 +30,8 @@ pip install -vvv --no-build-isolation -e .
30
 
31
  ### Sample code
32
  ```
33
- from transformers import AutoModelForCausalLM, AutoTokenizer, AutoConfig
34
- from optimum.gptq import GPTQQuantizer, load_quantized_model
35
  import torch
 
36
  model_name = "webbigdata/C3TR-Adapter_gptq"
37
 
38
  # thanks to tk-master
@@ -41,9 +40,11 @@ config = AutoConfig.from_pretrained(model_name)
41
  config.quantization_config["use_exllama"] = False
42
  config.quantization_config["exllama_config"] = {"version":2}
43
 
 
44
  max_memory={0: "12GiB", "cpu": "10GiB"}
 
45
  quantized_model = AutoModelForCausalLM.from_pretrained(model_name
46
- , torch_dtype=torch.bfloat16 # chage float16 if you use free colab or something not support bfloat16.
47
  , device_map="auto", max_memory=max_memory
48
  , config=config)
49
  tokenizer = AutoTokenizer.from_pretrained(model_name)
 
30
 
31
  ### Sample code
32
  ```
 
 
33
  import torch
34
+ from transformers import AutoModelForCausalLM, AutoTokenizer, AutoConfig
35
  model_name = "webbigdata/C3TR-Adapter_gptq"
36
 
37
  # thanks to tk-master
 
40
  config.quantization_config["use_exllama"] = False
41
  config.quantization_config["exllama_config"] = {"version":2}
42
 
43
+ # adust your gpu memory size. 0 means first gpu.
44
  max_memory={0: "12GiB", "cpu": "10GiB"}
45
+
46
  quantized_model = AutoModelForCausalLM.from_pretrained(model_name
47
+ , torch_dtype=torch.bfloat16 # change torch.float16 if you use free colab or something not support bfloat16.
48
  , device_map="auto", max_memory=max_memory
49
  , config=config)
50
  tokenizer = AutoTokenizer.from_pretrained(model_name)