Spaces:
Runtime error
Runtime error
zetavg
commited on
preload base model on colab
Browse files- LLaMA_LoRA.ipynb +6 -2
- llama_lora/globals.py +2 -0
- llama_lora/models.py +24 -5
LLaMA_LoRA.ipynb
CHANGED
@@ -116,7 +116,7 @@
|
|
116 |
"# @markdown You can customize the location of the stored data here.\n",
|
117 |
"\n",
|
118 |
"# @markdown The folder in Google Drive where Colab Notebook data are stored<br /> **(WARNING: The content of this folder will be modified by this notebook)**:\n",
|
119 |
-
"google_drive_folder = \"Colab Data/LLaMA
|
120 |
"# google_drive_colab_data_folder = \"Colab Notebooks/Notebook Data\"\n",
|
121 |
"\n",
|
122 |
"# Where Google Drive will be mounted in the Colab runtime.\n",
|
@@ -289,7 +289,11 @@
|
|
289 |
"# Prepare Data Dir\n",
|
290 |
"import os\n",
|
291 |
"from llama_lora.llama_lora.utils.data import init_data_dir\n",
|
292 |
-
"init_data_dir()"
|
|
|
|
|
|
|
|
|
293 |
],
|
294 |
"metadata": {
|
295 |
"id": "Yf6g248ylteP"
|
|
|
116 |
"# @markdown You can customize the location of the stored data here.\n",
|
117 |
"\n",
|
118 |
"# @markdown The folder in Google Drive where Colab Notebook data are stored<br /> **(WARNING: The content of this folder will be modified by this notebook)**:\n",
|
119 |
+
"google_drive_folder = \"Colab Data/LLaMA-LoRA Tuner\" # @param {type:\"string\"}\n",
|
120 |
"# google_drive_colab_data_folder = \"Colab Notebooks/Notebook Data\"\n",
|
121 |
"\n",
|
122 |
"# Where Google Drive will be mounted in the Colab runtime.\n",
|
|
|
289 |
"# Prepare Data Dir\n",
|
290 |
"import os\n",
|
291 |
"from llama_lora.llama_lora.utils.data import init_data_dir\n",
|
292 |
+
"init_data_dir()",
|
293 |
+
"\n",
|
294 |
+
"# Load the Base Model\n",
|
295 |
+
"from llama_lora.llama_lora.models import prepare_base_model\n",
|
296 |
+
"prepare_base_model()\n"
|
297 |
],
|
298 |
"metadata": {
|
299 |
"id": "Yf6g248ylteP"
|
llama_lora/globals.py
CHANGED
@@ -31,6 +31,8 @@ class Global:
|
|
31 |
# Model related
|
32 |
loaded_models = LRUCache(1)
|
33 |
loaded_tokenizers = LRUCache(1)
|
|
|
|
|
34 |
|
35 |
# GPU Info
|
36 |
gpu_cc = None # GPU compute capability
|
|
|
31 |
# Model related
|
32 |
loaded_models = LRUCache(1)
|
33 |
loaded_tokenizers = LRUCache(1)
|
34 |
+
new_base_model_that_is_ready_to_be_used = None
|
35 |
+
name_of_new_base_model_that_is_ready_to_be_used = None
|
36 |
|
37 |
# GPU Info
|
38 |
gpu_cc = None # GPU compute capability
|
llama_lora/models.py
CHANGED
@@ -26,6 +26,17 @@ def get_new_base_model(base_model_name):
|
|
26 |
if Global.ui_dev_mode:
|
27 |
return
|
28 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
29 |
device = get_device()
|
30 |
|
31 |
if device == "cuda":
|
@@ -34,7 +45,8 @@ def get_new_base_model(base_model_name):
|
|
34 |
load_in_8bit=Global.load_8bit,
|
35 |
torch_dtype=torch.float16,
|
36 |
# device_map="auto",
|
37 |
-
|
|
|
38 |
)
|
39 |
elif device == "mps":
|
40 |
model = LlamaForCausalLM.from_pretrained(
|
@@ -69,8 +81,8 @@ def get_tokenizer(base_model_name):
|
|
69 |
|
70 |
|
71 |
def get_model(
|
72 |
-
|
73 |
-
|
74 |
if Global.ui_dev_mode:
|
75 |
return
|
76 |
|
@@ -88,7 +100,8 @@ def get_model(
|
|
88 |
peft_model_name_or_path = peft_model_name
|
89 |
|
90 |
lora_models_directory_path = os.path.join(Global.data_dir, "lora_models")
|
91 |
-
possible_lora_model_path = os.path.join(
|
|
|
92 |
if os.path.isdir(possible_lora_model_path):
|
93 |
peft_model_name_or_path = possible_lora_model_path
|
94 |
|
@@ -105,7 +118,8 @@ def get_model(
|
|
105 |
model,
|
106 |
peft_model_name_or_path,
|
107 |
torch_dtype=torch.float16,
|
108 |
-
|
|
|
109 |
)
|
110 |
elif device == "mps":
|
111 |
model = PeftModel.from_pretrained(
|
@@ -138,6 +152,11 @@ def get_model(
|
|
138 |
return model
|
139 |
|
140 |
|
|
|
|
|
|
|
|
|
|
|
141 |
def clear_cache():
|
142 |
gc.collect()
|
143 |
|
|
|
26 |
if Global.ui_dev_mode:
|
27 |
return
|
28 |
|
29 |
+
if Global.new_base_model_that_is_ready_to_be_used:
|
30 |
+
if Global.name_of_new_base_model_that_is_ready_to_be_used == base_model_name:
|
31 |
+
model = Global.new_base_model_that_is_ready_to_be_used
|
32 |
+
Global.new_base_model_that_is_ready_to_be_used = None
|
33 |
+
Global.name_of_new_base_model_that_is_ready_to_be_used = None
|
34 |
+
return model
|
35 |
+
else:
|
36 |
+
Global.new_base_model_that_is_ready_to_be_used = None
|
37 |
+
Global.name_of_new_base_model_that_is_ready_to_be_used = None
|
38 |
+
clear_cache()
|
39 |
+
|
40 |
device = get_device()
|
41 |
|
42 |
if device == "cuda":
|
|
|
45 |
load_in_8bit=Global.load_8bit,
|
46 |
torch_dtype=torch.float16,
|
47 |
# device_map="auto",
|
48 |
+
# ? https://github.com/tloen/alpaca-lora/issues/21
|
49 |
+
device_map={'': 0},
|
50 |
)
|
51 |
elif device == "mps":
|
52 |
model = LlamaForCausalLM.from_pretrained(
|
|
|
81 |
|
82 |
|
83 |
def get_model(
|
84 |
+
base_model_name,
|
85 |
+
peft_model_name=None):
|
86 |
if Global.ui_dev_mode:
|
87 |
return
|
88 |
|
|
|
100 |
peft_model_name_or_path = peft_model_name
|
101 |
|
102 |
lora_models_directory_path = os.path.join(Global.data_dir, "lora_models")
|
103 |
+
possible_lora_model_path = os.path.join(
|
104 |
+
lora_models_directory_path, peft_model_name)
|
105 |
if os.path.isdir(possible_lora_model_path):
|
106 |
peft_model_name_or_path = possible_lora_model_path
|
107 |
|
|
|
118 |
model,
|
119 |
peft_model_name_or_path,
|
120 |
torch_dtype=torch.float16,
|
121 |
+
# ? https://github.com/tloen/alpaca-lora/issues/21
|
122 |
+
device_map={'': 0},
|
123 |
)
|
124 |
elif device == "mps":
|
125 |
model = PeftModel.from_pretrained(
|
|
|
152 |
return model
|
153 |
|
154 |
|
155 |
+
def prepare_base_model(base_model_name=Global.default_base_model_name):
|
156 |
+
Global.new_base_model_that_is_ready_to_be_used = get_new_base_model(base_model_name)
|
157 |
+
Global.name_of_new_base_model_that_is_ready_to_be_used = base_model_name
|
158 |
+
|
159 |
+
|
160 |
def clear_cache():
|
161 |
gc.collect()
|
162 |
|