Set half revision from base model weights
Browse files
app.py
CHANGED
@@ -10,6 +10,8 @@ model = AutoModelForCausalLM.from_pretrained(
|
|
10 |
return_dict=True,
|
11 |
load_in_8bit=True,
|
12 |
device_map="auto",
|
|
|
|
|
13 |
)
|
14 |
tokenizer = AutoTokenizer.from_pretrained(peft_model_id)
|
15 |
# Load the Lora model
|
|
|
10 |
return_dict=True,
|
11 |
load_in_8bit=True,
|
12 |
device_map="auto",
|
13 |
+
revision="half",
|
14 |
+
# low_cpu_mem_usage=True
|
15 |
)
|
16 |
tokenizer = AutoTokenizer.from_pretrained(peft_model_id)
|
17 |
# Load the Lora model
|