llama-3.2-3B-Instruct

Running on Zero

reach-vb HF Staff commited on Sep 23, 2024

Commit

625f637

verified ·

1 Parent(s): 2479537

Update app.py (#1)

Files changed (1) hide show

app.py CHANGED Viewed

@@ -5,11 +5,12 @@ from typing import Iterator
 import gradio as gr
 import spaces
 import torch
-from transformers import AutoModelForCausalLM, GemmaTokenizerFast, TextIteratorStreamer
 DESCRIPTION = """\
-# Gemma 2 2B IT
 Gemma 2 is Google's latest iteration of open LLMs.
 This is a demo of [`google/gemma-2-2b-it`](https://huggingface.co/google/gemma-2-2b-it), fine-tuned for instruction following.
 For more details, please check [our post](https://huggingface.co/blog/gemma2).
@@ -23,14 +24,13 @@ MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
 device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
-model_id = "google/gemma-2-2b-it"
-tokenizer = GemmaTokenizerFast.from_pretrained(model_id)
 model = AutoModelForCausalLM.from_pretrained(
     model_id,
     device_map="auto",
     torch_dtype=torch.bfloat16,
 )
-model.config.sliding_window = 4096
 model.eval()

 import gradio as gr
 import spaces
 import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
 DESCRIPTION = """\
+# L3.2 3B Instruct
+Need to Update the below text
 Gemma 2 is Google's latest iteration of open LLMs.
 This is a demo of [`google/gemma-2-2b-it`](https://huggingface.co/google/gemma-2-2b-it), fine-tuned for instruction following.
 For more details, please check [our post](https://huggingface.co/blog/gemma2).
 device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+model_id = "nltpt/Llama-3.2-3B-Instruct"
+tokenizer = AutoTokenizer.from_pretrained(model_id)
 model = AutoModelForCausalLM.from_pretrained(
     model_id,
     device_map="auto",
     torch_dtype=torch.bfloat16,
 )
 model.eval()