Spaces:

Upto12forenglish
/

teletalk

Runtime error

Upto12forenglish commited on May 17

Commit

ddaaa7d

•

1 Parent(s): 79ecda9

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -2,19 +2,17 @@ import os
 HF_TOKEN = os.getenv('HF_TOKEN')
 print("Token loaded")
-from transformers import GemmaTokenizer, AutoModelForCausalLM
-from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
-from threading import Thread
 import transformers
 import torch
-# Load the tokenizer and model
-tokenizer = AutoTokenizer.from_pretrained("meta-llama/Meta-Llama-3-8B-Instruct")
-model = AutoModelForCausalLM.from_pretrained("meta-llama/Meta-Llama-3-8B-Instruct", device_map="auto")  # to("cuda:0")
-terminators = [
-    tokenizer.eos_token_id,
-    tokenizer.convert_tokens_to_ids("<|eot_id|>")
-]
 print("llama download successfully")

 HF_TOKEN = os.getenv('HF_TOKEN')
 print("Token loaded")
 import transformers
 import torch
+model_id = "meta-llama/Meta-Llama-3-8B-Instruct"
+pipeline = transformers.pipeline(
+  "text-generation",
+  model="meta-llama/Meta-Llama-3-8B-Instruct",
+  model_kwargs={"torch_dtype": torch.bfloat16},
+  device="cuda",
+)
 print("llama download successfully")