lunahr
/

SystemGemma2-27b-it

@@ -1,4 +1,5 @@
 ---
 license: gemma
 library_name: transformers
 pipeline_tag: text-generation
@@ -8,10 +9,13 @@ extra_gated_prompt: >-
   Google’s usage license. To do this, please ensure you’re logged in to Hugging
   Face and click below. Requests are processed immediately.
 extra_gated_button_content: Acknowledge license
 ---
-# Gemma 2 model card
 **Model Page**: [Gemma](https://ai.google.dev/gemma/docs)
@@ -21,7 +25,7 @@ extra_gated_button_content: Acknowledge license
 * [Gemma on Kaggle][kaggle-gemma]
 * [Gemma on Vertex Model Garden][vertex-mg-gemma]
-**Terms of Use**: [Terms](https://www.kaggle.com/models/google/gemma/license/consent/verify/huggingface?returnModelRepoId=google/gemma-2-27b-it)
 **Authors**: Google
@@ -58,7 +62,7 @@ from transformers import pipeline
 pipe = pipeline(
     "text-generation",
-    model="google/gemma-2-27b-it",
     model_kwargs={"torch_dtype": torch.bfloat16},
     device="cuda",  # replace with "mps" to run on a Mac device
 )
@@ -80,9 +84,9 @@ print(assistant_response)
 from transformers import AutoTokenizer, AutoModelForCausalLM
 import torch
-tokenizer = AutoTokenizer.from_pretrained("google/gemma-2-27b-it")
 model = AutoModelForCausalLM.from_pretrained(
-    "google/gemma-2-27b-it",
     device_map="auto",
     torch_dtype=torch.bfloat16,
 )
@@ -118,9 +122,9 @@ You can also use `float32` if you skip the dtype, but no precision increase will
 # pip install accelerate
 from transformers import AutoTokenizer, AutoModelForCausalLM
-tokenizer = AutoTokenizer.from_pretrained("google/gemma-2-27b-it")
 model = AutoModelForCausalLM.from_pretrained(
-    "google/gemma-2-27b-it",
     device_map="auto",
 )
@@ -138,7 +142,7 @@ for running Gemma 2 through a command line interface, or CLI. Follow the [instal
 for getting started, then launch the CLI through the following command:
 ```shell
-local-gemma --model 27b --preset speed
 ```
 #### Quantized Versions through `bitsandbytes`
@@ -154,9 +158,9 @@ from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
 quantization_config = BitsAndBytesConfig(load_in_8bit=True)
-tokenizer = AutoTokenizer.from_pretrained("google/gemma-2-27b-it")
 model = AutoModelForCausalLM.from_pretrained(
-    "google/gemma-2-27b-it",
     quantization_config=quantization_config,
 )
@@ -179,9 +183,9 @@ from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
 quantization_config = BitsAndBytesConfig(load_in_4bit=True)
-tokenizer = AutoTokenizer.from_pretrained("google/gemma-2-27b-it")
 model = AutoModelForCausalLM.from_pretrained(
-    "google/gemma-2-27b-it",
     quantization_config=quantization_config,
 )
@@ -216,8 +220,8 @@ import torch
 torch.set_float32_matmul_precision("high")
 # load the model + tokenizer
-tokenizer = AutoTokenizer.from_pretrained("google/gemma-2-27b-it")
-model = Gemma2ForCausalLM.from_pretrained("google/gemma-2-27b-it", torch_dtype=torch.bfloat16)
 model.to("cuda")
 # apply the torch compile transformation
@@ -267,15 +271,14 @@ from transformers import AutoTokenizer, AutoModelForCausalLM
 import transformers
 import torch
-model_id = "google/gemma-2-27b-it"
 dtype = torch.bfloat16
 tokenizer = AutoTokenizer.from_pretrained(model_id)
 model = AutoModelForCausalLM.from_pretrained(
     model_id,
     device_map="cuda",
-    torch_dtype=dtype,
-)
 chat = [
     { "role": "user", "content": "Write a hello world program" },

 ---
+base_model: google/gemma-2-27b-it
 license: gemma
 library_name: transformers
 pipeline_tag: text-generation
   Google’s usage license. To do this, please ensure you’re logged in to Hugging
   Face and click below. Requests are processed immediately.
 extra_gated_button_content: Acknowledge license
+tags:
+- conversational
 ---
+# SystemGemma2 27B model card
+This is a version of [Gemma 2 27B](https://huggingface.co/google/gemma-2-27b-it) with system prompts enabled.
 **Model Page**: [Gemma](https://ai.google.dev/gemma/docs)
 * [Gemma on Kaggle][kaggle-gemma]
 * [Gemma on Vertex Model Garden][vertex-mg-gemma]
+**Terms of Use**: [Terms](https://www.kaggle.com/models/google/gemma/license/consent/verify/huggingface?returnModelRepoId=google/gemma-2-9b-it)
 **Authors**: Google
 pipe = pipeline(
     "text-generation",
+    model="google/gemma-2-9b-it",
     model_kwargs={"torch_dtype": torch.bfloat16},
     device="cuda",  # replace with "mps" to run on a Mac device
 )
 from transformers import AutoTokenizer, AutoModelForCausalLM
 import torch
+tokenizer = AutoTokenizer.from_pretrained("google/gemma-2-9b-it")
 model = AutoModelForCausalLM.from_pretrained(
+    "google/gemma-2-9b-it",
     device_map="auto",
     torch_dtype=torch.bfloat16,
 )
 # pip install accelerate
 from transformers import AutoTokenizer, AutoModelForCausalLM
+tokenizer = AutoTokenizer.from_pretrained("google/gemma-2-9b-it")
 model = AutoModelForCausalLM.from_pretrained(
+    "google/gemma-2-9b-it",
     device_map="auto",
 )
 for getting started, then launch the CLI through the following command:
 ```shell
+local-gemma --model 9b --preset speed
 ```
 #### Quantized Versions through `bitsandbytes`
 quantization_config = BitsAndBytesConfig(load_in_8bit=True)
+tokenizer = AutoTokenizer.from_pretrained("google/gemma-2-9b-it")
 model = AutoModelForCausalLM.from_pretrained(
+    "google/gemma-2-9b-it",
     quantization_config=quantization_config,
 )
 quantization_config = BitsAndBytesConfig(load_in_4bit=True)
+tokenizer = AutoTokenizer.from_pretrained("google/gemma-2-9b-it")
 model = AutoModelForCausalLM.from_pretrained(
+    "google/gemma-2-9b-it",
     quantization_config=quantization_config,
 )
 torch.set_float32_matmul_precision("high")
 # load the model + tokenizer
+tokenizer = AutoTokenizer.from_pretrained("google/gemma-2-9b-it")
+model = Gemma2ForCausalLM.from_pretrained("google/gemma-2-9b-it", torch_dtype=torch.bfloat16)
 model.to("cuda")
 # apply the torch compile transformation
 import transformers
 import torch
+model_id = "google/gemma-2-9b-it"
 dtype = torch.bfloat16
 tokenizer = AutoTokenizer.from_pretrained(model_id)
 model = AutoModelForCausalLM.from_pretrained(
     model_id,
     device_map="cuda",
+    torch_dtype=dtype,)
 chat = [
     { "role": "user", "content": "Write a hello world program" },