Update README.md
Browse files
README.md
CHANGED
@@ -80,7 +80,7 @@ print ("ANSWER:\n\n", answer_only)
|
|
80 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
81 |
|
82 |
# Load reasoning model
|
83 |
-
model_name='lamm-mit/
|
84 |
model = AutoModelForCausalLM.from_pretrained(model_name,
|
85 |
torch_dtype =torch.bfloat16,
|
86 |
attn_implementation="flash_attention_2",device_map="auto",trust_remote_code=True,
|
@@ -89,10 +89,10 @@ tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True,
|
|
89 |
use_fast=False,
|
90 |
)
|
91 |
# Load critic model
|
92 |
-
|
93 |
|
94 |
critic_model = AutoModelForCausalLM.from_pretrained(
|
95 |
-
|
96 |
torch_dtype=torch.bfloat16,
|
97 |
attn_implementation="flash_attention_2",
|
98 |
device_map="auto",
|
|
|
80 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
81 |
|
82 |
# Load reasoning model
|
83 |
+
model_name='lamm-mit/PRefLexOR_ORPO_DPO_EXO_REFLECT_10222024'
|
84 |
model = AutoModelForCausalLM.from_pretrained(model_name,
|
85 |
torch_dtype =torch.bfloat16,
|
86 |
attn_implementation="flash_attention_2",device_map="auto",trust_remote_code=True,
|
|
|
89 |
use_fast=False,
|
90 |
)
|
91 |
# Load critic model
|
92 |
+
model_name_critic = "meta-llama/Llama-3.2-3B-Instruct"
|
93 |
|
94 |
critic_model = AutoModelForCausalLM.from_pretrained(
|
95 |
+
model_name_critic,
|
96 |
torch_dtype=torch.bfloat16,
|
97 |
attn_implementation="flash_attention_2",
|
98 |
device_map="auto",
|