psinger commited on
Commit
73d0cbf
1 Parent(s): 3131b76

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +37 -10
README.md CHANGED
@@ -43,7 +43,7 @@ model_kwargs = {}
43
  # optional quantization
44
  quantization_config = BitsAndBytesConfig(
45
  load_in_8bit=True,
46
- llm_int8_threshold=3.0,
47
  )
48
  model_kwargs["quantization_config"] = quantization_config
49
 
@@ -93,18 +93,28 @@ Alternatively, if you prefer to not use `trust_remote_code=True` you can downloa
93
  ```python
94
  import torch
95
  from h2oai_pipeline import H2OTextGenerationPipeline
96
- from transformers import AutoModelForCausalLM, AutoTokenizer
 
 
 
 
 
 
 
97
 
98
  tokenizer = AutoTokenizer.from_pretrained(
99
  "psinger/h2ogpt-gm-oasst1-en-2048-falcon-40b-v1",
100
  use_fast=False,
101
- padding_side="left"
 
102
  )
103
  model = AutoModelForCausalLM.from_pretrained(
104
  "psinger/h2ogpt-gm-oasst1-en-2048-falcon-40b-v1",
 
105
  torch_dtype=torch.float16,
106
- device_map={"": "cuda:0"}
107
- )
 
108
  generate_text = H2OTextGenerationPipeline(model=model, tokenizer=tokenizer)
109
 
110
  res = generate_text(
@@ -124,16 +134,33 @@ print(res[0]["generated_text"])
124
  You may also construct the pipeline from the loaded model and tokenizer yourself and consider the preprocessing steps:
125
 
126
  ```python
127
- from transformers import AutoModelForCausalLM, AutoTokenizer
128
 
129
- model_name = "psinger/h2ogpt-gm-oasst1-en-2048-falcon-40b-v1" # either local folder or huggingface model name
130
  # Important: The prompt needs to be in the same format the model was trained with.
131
  # You can find an example prompt in the experiment logs.
132
  prompt = "<|prompt|>How are you?<|endoftext|><|answer|>"
133
 
134
- tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=False)
135
- model = AutoModelForCausalLM.from_pretrained(model_name)
136
- model.cuda().eval()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
137
  inputs = tokenizer(prompt, return_tensors="pt", add_special_tokens=False).to("cuda")
138
 
139
  # generate configuration can be modified to your needs
 
43
  # optional quantization
44
  quantization_config = BitsAndBytesConfig(
45
  load_in_8bit=True,
46
+ llm_int8_threshold=6.0,
47
  )
48
  model_kwargs["quantization_config"] = quantization_config
49
 
 
93
  ```python
94
  import torch
95
  from h2oai_pipeline import H2OTextGenerationPipeline
96
+ from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
97
+
98
+ quantization_config = None
99
+ # optional quantization
100
+ quantization_config = BitsAndBytesConfig(
101
+ load_in_8bit=True,
102
+ llm_int8_threshold=6.0,
103
+ )
104
 
105
  tokenizer = AutoTokenizer.from_pretrained(
106
  "psinger/h2ogpt-gm-oasst1-en-2048-falcon-40b-v1",
107
  use_fast=False,
108
+ padding_side="left",
109
+ trust_remote_code=True,
110
  )
111
  model = AutoModelForCausalLM.from_pretrained(
112
  "psinger/h2ogpt-gm-oasst1-en-2048-falcon-40b-v1",
113
+ trust_remote_code=True,
114
  torch_dtype=torch.float16,
115
+ device_map={"": "cuda:0"},
116
+ quantization_config=quantization_config
117
+ ).eval()
118
  generate_text = H2OTextGenerationPipeline(model=model, tokenizer=tokenizer)
119
 
120
  res = generate_text(
 
134
  You may also construct the pipeline from the loaded model and tokenizer yourself and consider the preprocessing steps:
135
 
136
  ```python
137
+ from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
138
 
 
139
  # Important: The prompt needs to be in the same format the model was trained with.
140
  # You can find an example prompt in the experiment logs.
141
  prompt = "<|prompt|>How are you?<|endoftext|><|answer|>"
142
 
143
+ quantization_config = None
144
+ # optional quantization
145
+ quantization_config = BitsAndBytesConfig(
146
+ load_in_8bit=True,
147
+ llm_int8_threshold=6.0,
148
+ )
149
+
150
+ tokenizer = AutoTokenizer.from_pretrained(
151
+ "psinger/h2ogpt-gm-oasst1-en-2048-falcon-40b-v1",
152
+ use_fast=False,
153
+ padding_side="left",
154
+ trust_remote_code=True,
155
+ )
156
+ model = AutoModelForCausalLM.from_pretrained(
157
+ "psinger/h2ogpt-gm-oasst1-en-2048-falcon-40b-v1",
158
+ trust_remote_code=True,
159
+ torch_dtype=torch.float16,
160
+ device_map={"": "cuda:0"},
161
+ quantization_config=quantization_config
162
+ ).eval()
163
+
164
  inputs = tokenizer(prompt, return_tensors="pt", add_special_tokens=False).to("cuda")
165
 
166
  # generate configuration can be modified to your needs