h2oai
/

h2ogpt-gm-oasst1-en-2048-falcon-40b-v1

Text Generation

large language model

text-generation-inference

Model card Files Files and versions Community

psinger commited on Jun 6, 2023

Commit

9a4a1bb

•

1 Parent(s): 2fdec59

Update README.md

Files changed (1) hide show

README.md +18 -1

README.md CHANGED Viewed

@@ -29,14 +29,31 @@ pip install torch==2.0.0
 ```python
 import torch
-from transformers import pipeline
 generate_text = pipeline(
     model="psinger/h2ogpt-gm-oasst1-en-2048-falcon-40b-v1",
     torch_dtype=torch.float16,
     trust_remote_code=True,
     use_fast=False,
     device_map={"": "cuda:0"},
 )
 res = generate_text(

 ```python
 import torch
+from transformers import pipeline, BitsAndBytesConfig, AutoTokenizer
+quantization_config = BitsAndBytesConfig(
+    load_in_8bit=True,
+    llm_int8_threshold=3.0,
+)
+model_kwargs = {}
+model_kwargs["quantization_config"] = quantization_config
+tokenizer = AutoTokenizer.from_pretrained(
+    "psinger/h2ogpt-gm-oasst1-en-2048-falcon-40b-v1",
+    use_fast=False,
+    padding_side="left",
+    trust_remote_code=True,
+)
 generate_text = pipeline(
     model="psinger/h2ogpt-gm-oasst1-en-2048-falcon-40b-v1",
+    tokenizer=tokenizer,
     torch_dtype=torch.float16,
     trust_remote_code=True,
     use_fast=False,
     device_map={"": "cuda:0"},
+    model_kwargs=model_kwargs,
 )
 res = generate_text(