Update README.md
Browse files
README.md
CHANGED
@@ -29,14 +29,31 @@ pip install torch==2.0.0
|
|
29 |
|
30 |
```python
|
31 |
import torch
|
32 |
-
from transformers import pipeline
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
33 |
|
34 |
generate_text = pipeline(
|
35 |
model="psinger/h2ogpt-gm-oasst1-en-2048-falcon-40b-v1",
|
|
|
36 |
torch_dtype=torch.float16,
|
37 |
trust_remote_code=True,
|
38 |
use_fast=False,
|
39 |
device_map={"": "cuda:0"},
|
|
|
40 |
)
|
41 |
|
42 |
res = generate_text(
|
|
|
29 |
|
30 |
```python
|
31 |
import torch
|
32 |
+
from transformers import pipeline, BitsAndBytesConfig, AutoTokenizer
|
33 |
+
|
34 |
+
quantization_config = BitsAndBytesConfig(
|
35 |
+
load_in_8bit=True,
|
36 |
+
llm_int8_threshold=3.0,
|
37 |
+
)
|
38 |
+
|
39 |
+
model_kwargs = {}
|
40 |
+
model_kwargs["quantization_config"] = quantization_config
|
41 |
+
|
42 |
+
tokenizer = AutoTokenizer.from_pretrained(
|
43 |
+
"psinger/h2ogpt-gm-oasst1-en-2048-falcon-40b-v1",
|
44 |
+
use_fast=False,
|
45 |
+
padding_side="left",
|
46 |
+
trust_remote_code=True,
|
47 |
+
)
|
48 |
|
49 |
generate_text = pipeline(
|
50 |
model="psinger/h2ogpt-gm-oasst1-en-2048-falcon-40b-v1",
|
51 |
+
tokenizer=tokenizer,
|
52 |
torch_dtype=torch.float16,
|
53 |
trust_remote_code=True,
|
54 |
use_fast=False,
|
55 |
device_map={"": "cuda:0"},
|
56 |
+
model_kwargs=model_kwargs,
|
57 |
)
|
58 |
|
59 |
res = generate_text(
|