placed the model in float16
Browse files
app.py
CHANGED
@@ -4,6 +4,7 @@ import spaces
|
|
4 |
from transformers import GemmaTokenizer, AutoModelForCausalLM
|
5 |
from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
|
6 |
from threading import Thread
|
|
|
7 |
|
8 |
# Set an environment variable
|
9 |
HF_TOKEN = os.environ.get("HF_TOKEN", None)
|
@@ -46,7 +47,7 @@ h1 {
|
|
46 |
|
47 |
# Load the tokenizer and model
|
48 |
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Meta-Llama-3-8B-Instruct")
|
49 |
-
model = AutoModelForCausalLM.from_pretrained("meta-llama/Meta-Llama-3-8B-Instruct", device_map="auto") # to("cuda:0")
|
50 |
terminators = [
|
51 |
tokenizer.eos_token_id,
|
52 |
tokenizer.convert_tokens_to_ids("<|eot_id|>")
|
|
|
4 |
from transformers import GemmaTokenizer, AutoModelForCausalLM
|
5 |
from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
|
6 |
from threading import Thread
|
7 |
+
import torch
|
8 |
|
9 |
# Set an environment variable
|
10 |
HF_TOKEN = os.environ.get("HF_TOKEN", None)
|
|
|
47 |
|
48 |
# Load the tokenizer and model
|
49 |
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Meta-Llama-3-8B-Instruct")
|
50 |
+
model = AutoModelForCausalLM.from_pretrained("meta-llama/Meta-Llama-3-8B-Instruct", device_map="auto", torch_dtpye=torch.float16) # to("cuda:0")
|
51 |
terminators = [
|
52 |
tokenizer.eos_token_id,
|
53 |
tokenizer.convert_tokens_to_ids("<|eot_id|>")
|