Update app.py
Browse files
app.py
CHANGED
@@ -5,7 +5,7 @@ import torch
|
|
5 |
|
6 |
# Device configuration (prioritize GPU if available)
|
7 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
8 |
-
model_id = "phearion/bigbrain-v0.0.
|
9 |
|
10 |
bnb_config = BitsAndBytesConfig(
|
11 |
load_in_4bit=True,
|
@@ -20,7 +20,7 @@ tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)
|
|
20 |
model = AutoModelForCausalLM.from_pretrained(config.base_model_name_or_path, quantization_config=bnb_config)
|
21 |
|
22 |
# Load the Lora model
|
23 |
-
model = PeftModel.from_pretrained(
|
24 |
|
25 |
def greet(text):
|
26 |
with torch.no_grad(): # Disable gradient calculation for inference
|
|
|
5 |
|
6 |
# Device configuration (prioritize GPU if available)
|
7 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
8 |
+
model_id = "phearion/bigbrain-v0.0.2"
|
9 |
|
10 |
bnb_config = BitsAndBytesConfig(
|
11 |
load_in_4bit=True,
|
|
|
20 |
model = AutoModelForCausalLM.from_pretrained(config.base_model_name_or_path, quantization_config=bnb_config)
|
21 |
|
22 |
# Load the Lora model
|
23 |
+
model = PeftModel.from_pretrained(mode, model_id)
|
24 |
|
25 |
def greet(text):
|
26 |
with torch.no_grad(): # Disable gradient calculation for inference
|