stack-llama

Runtime error

lewtun HF Staff commited on Apr 4, 2023

Commit

1d89618

1 Parent(s): ebddf99

Pass token

Files changed (1) hide show

app.py CHANGED Viewed

@@ -26,13 +26,15 @@ os.environ["TOKENIZERS_PARALLELISM"] = "false"
 # Load peft config for pre-trained checkpoint etc.
 device = "cuda" if torch.cuda.is_available() else "cpu"
-model_id = "HuggingFaceH4/llama-se-rl-ed"
 if device == "cpu":
-    model = AutoModelForCausalLM.from_pretrained(model_id, low_cpu_mem_usage=True)
 else:
     # torch_dtype = torch.bfloat16 if torch.cuda.get_device_capability()[0] == 8 else torch.float16
     # model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch_dtype, device_map="auto")
-    model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", load_in_8bit=True)
 tokenizer = AutoTokenizer.from_pretrained(model_id)

 # Load peft config for pre-trained checkpoint etc.
 device = "cuda" if torch.cuda.is_available() else "cpu"
+model_id = "trl-lib/llama-se-rl-merged"
 if device == "cpu":
+    model = AutoModelForCausalLM.from_pretrained(model_id, low_cpu_mem_usage=True, use_auth_token=HF_TOKEN)
 else:
     # torch_dtype = torch.bfloat16 if torch.cuda.get_device_capability()[0] == 8 else torch.float16
     # model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch_dtype, device_map="auto")
+    model = AutoModelForCausalLM.from_pretrained(
+        model_id, device_map="auto", load_in_8bit=True, use_auth_token=HF_TOKEN
+    )
 tokenizer = AutoTokenizer.from_pretrained(model_id)