readme
Browse files
README.md
CHANGED
@@ -1,4 +1,5 @@
|
|
1 |
---
|
|
|
2 |
license: gemma
|
3 |
library_name: transformers
|
4 |
pipeline_tag: text-generation
|
@@ -8,10 +9,13 @@ extra_gated_prompt: >-
|
|
8 |
Google’s usage license. To do this, please ensure you’re logged in to Hugging
|
9 |
Face and click below. Requests are processed immediately.
|
10 |
extra_gated_button_content: Acknowledge license
|
|
|
|
|
11 |
---
|
12 |
|
13 |
|
14 |
-
#
|
|
|
15 |
|
16 |
**Model Page**: [Gemma](https://ai.google.dev/gemma/docs)
|
17 |
|
@@ -21,7 +25,7 @@ extra_gated_button_content: Acknowledge license
|
|
21 |
* [Gemma on Kaggle][kaggle-gemma]
|
22 |
* [Gemma on Vertex Model Garden][vertex-mg-gemma]
|
23 |
|
24 |
-
**Terms of Use**: [Terms](https://www.kaggle.com/models/google/gemma/license/consent/verify/huggingface?returnModelRepoId=google/gemma-2-
|
25 |
|
26 |
**Authors**: Google
|
27 |
|
@@ -58,7 +62,7 @@ from transformers import pipeline
|
|
58 |
|
59 |
pipe = pipeline(
|
60 |
"text-generation",
|
61 |
-
model="google/gemma-2-
|
62 |
model_kwargs={"torch_dtype": torch.bfloat16},
|
63 |
device="cuda", # replace with "mps" to run on a Mac device
|
64 |
)
|
@@ -80,9 +84,9 @@ print(assistant_response)
|
|
80 |
from transformers import AutoTokenizer, AutoModelForCausalLM
|
81 |
import torch
|
82 |
|
83 |
-
tokenizer = AutoTokenizer.from_pretrained("google/gemma-2-
|
84 |
model = AutoModelForCausalLM.from_pretrained(
|
85 |
-
"google/gemma-2-
|
86 |
device_map="auto",
|
87 |
torch_dtype=torch.bfloat16,
|
88 |
)
|
@@ -118,9 +122,9 @@ You can also use `float32` if you skip the dtype, but no precision increase will
|
|
118 |
# pip install accelerate
|
119 |
from transformers import AutoTokenizer, AutoModelForCausalLM
|
120 |
|
121 |
-
tokenizer = AutoTokenizer.from_pretrained("google/gemma-2-
|
122 |
model = AutoModelForCausalLM.from_pretrained(
|
123 |
-
"google/gemma-2-
|
124 |
device_map="auto",
|
125 |
)
|
126 |
|
@@ -138,7 +142,7 @@ for running Gemma 2 through a command line interface, or CLI. Follow the [instal
|
|
138 |
for getting started, then launch the CLI through the following command:
|
139 |
|
140 |
```shell
|
141 |
-
local-gemma --model
|
142 |
```
|
143 |
|
144 |
#### Quantized Versions through `bitsandbytes`
|
@@ -154,9 +158,9 @@ from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
|
|
154 |
|
155 |
quantization_config = BitsAndBytesConfig(load_in_8bit=True)
|
156 |
|
157 |
-
tokenizer = AutoTokenizer.from_pretrained("google/gemma-2-
|
158 |
model = AutoModelForCausalLM.from_pretrained(
|
159 |
-
"google/gemma-2-
|
160 |
quantization_config=quantization_config,
|
161 |
)
|
162 |
|
@@ -179,9 +183,9 @@ from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
|
|
179 |
|
180 |
quantization_config = BitsAndBytesConfig(load_in_4bit=True)
|
181 |
|
182 |
-
tokenizer = AutoTokenizer.from_pretrained("google/gemma-2-
|
183 |
model = AutoModelForCausalLM.from_pretrained(
|
184 |
-
"google/gemma-2-
|
185 |
quantization_config=quantization_config,
|
186 |
)
|
187 |
|
@@ -216,8 +220,8 @@ import torch
|
|
216 |
torch.set_float32_matmul_precision("high")
|
217 |
|
218 |
# load the model + tokenizer
|
219 |
-
tokenizer = AutoTokenizer.from_pretrained("google/gemma-2-
|
220 |
-
model = Gemma2ForCausalLM.from_pretrained("google/gemma-2-
|
221 |
model.to("cuda")
|
222 |
|
223 |
# apply the torch compile transformation
|
@@ -267,15 +271,14 @@ from transformers import AutoTokenizer, AutoModelForCausalLM
|
|
267 |
import transformers
|
268 |
import torch
|
269 |
|
270 |
-
model_id = "google/gemma-2-
|
271 |
dtype = torch.bfloat16
|
272 |
|
273 |
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
274 |
model = AutoModelForCausalLM.from_pretrained(
|
275 |
model_id,
|
276 |
device_map="cuda",
|
277 |
-
torch_dtype=dtype,
|
278 |
-
)
|
279 |
|
280 |
chat = [
|
281 |
{ "role": "user", "content": "Write a hello world program" },
|
|
|
1 |
---
|
2 |
+
base_model: google/gemma-2-27b-it
|
3 |
license: gemma
|
4 |
library_name: transformers
|
5 |
pipeline_tag: text-generation
|
|
|
9 |
Google’s usage license. To do this, please ensure you’re logged in to Hugging
|
10 |
Face and click below. Requests are processed immediately.
|
11 |
extra_gated_button_content: Acknowledge license
|
12 |
+
tags:
|
13 |
+
- conversational
|
14 |
---
|
15 |
|
16 |
|
17 |
+
# SystemGemma2 27B model card
|
18 |
+
This is a version of [Gemma 2 27B](https://huggingface.co/google/gemma-2-27b-it) with system prompts enabled.
|
19 |
|
20 |
**Model Page**: [Gemma](https://ai.google.dev/gemma/docs)
|
21 |
|
|
|
25 |
* [Gemma on Kaggle][kaggle-gemma]
|
26 |
* [Gemma on Vertex Model Garden][vertex-mg-gemma]
|
27 |
|
28 |
+
**Terms of Use**: [Terms](https://www.kaggle.com/models/google/gemma/license/consent/verify/huggingface?returnModelRepoId=google/gemma-2-9b-it)
|
29 |
|
30 |
**Authors**: Google
|
31 |
|
|
|
62 |
|
63 |
pipe = pipeline(
|
64 |
"text-generation",
|
65 |
+
model="google/gemma-2-9b-it",
|
66 |
model_kwargs={"torch_dtype": torch.bfloat16},
|
67 |
device="cuda", # replace with "mps" to run on a Mac device
|
68 |
)
|
|
|
84 |
from transformers import AutoTokenizer, AutoModelForCausalLM
|
85 |
import torch
|
86 |
|
87 |
+
tokenizer = AutoTokenizer.from_pretrained("google/gemma-2-9b-it")
|
88 |
model = AutoModelForCausalLM.from_pretrained(
|
89 |
+
"google/gemma-2-9b-it",
|
90 |
device_map="auto",
|
91 |
torch_dtype=torch.bfloat16,
|
92 |
)
|
|
|
122 |
# pip install accelerate
|
123 |
from transformers import AutoTokenizer, AutoModelForCausalLM
|
124 |
|
125 |
+
tokenizer = AutoTokenizer.from_pretrained("google/gemma-2-9b-it")
|
126 |
model = AutoModelForCausalLM.from_pretrained(
|
127 |
+
"google/gemma-2-9b-it",
|
128 |
device_map="auto",
|
129 |
)
|
130 |
|
|
|
142 |
for getting started, then launch the CLI through the following command:
|
143 |
|
144 |
```shell
|
145 |
+
local-gemma --model 9b --preset speed
|
146 |
```
|
147 |
|
148 |
#### Quantized Versions through `bitsandbytes`
|
|
|
158 |
|
159 |
quantization_config = BitsAndBytesConfig(load_in_8bit=True)
|
160 |
|
161 |
+
tokenizer = AutoTokenizer.from_pretrained("google/gemma-2-9b-it")
|
162 |
model = AutoModelForCausalLM.from_pretrained(
|
163 |
+
"google/gemma-2-9b-it",
|
164 |
quantization_config=quantization_config,
|
165 |
)
|
166 |
|
|
|
183 |
|
184 |
quantization_config = BitsAndBytesConfig(load_in_4bit=True)
|
185 |
|
186 |
+
tokenizer = AutoTokenizer.from_pretrained("google/gemma-2-9b-it")
|
187 |
model = AutoModelForCausalLM.from_pretrained(
|
188 |
+
"google/gemma-2-9b-it",
|
189 |
quantization_config=quantization_config,
|
190 |
)
|
191 |
|
|
|
220 |
torch.set_float32_matmul_precision("high")
|
221 |
|
222 |
# load the model + tokenizer
|
223 |
+
tokenizer = AutoTokenizer.from_pretrained("google/gemma-2-9b-it")
|
224 |
+
model = Gemma2ForCausalLM.from_pretrained("google/gemma-2-9b-it", torch_dtype=torch.bfloat16)
|
225 |
model.to("cuda")
|
226 |
|
227 |
# apply the torch compile transformation
|
|
|
271 |
import transformers
|
272 |
import torch
|
273 |
|
274 |
+
model_id = "google/gemma-2-9b-it"
|
275 |
dtype = torch.bfloat16
|
276 |
|
277 |
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
278 |
model = AutoModelForCausalLM.from_pretrained(
|
279 |
model_id,
|
280 |
device_map="cuda",
|
281 |
+
torch_dtype=dtype,)
|
|
|
282 |
|
283 |
chat = [
|
284 |
{ "role": "user", "content": "Write a hello world program" },
|