masanorihirano
commited on
Commit
•
b555022
1
Parent(s):
dc63c9c
update
Browse files- Dockerfile +1 -1
- app.py +10 -6
- model_pull.py +7 -4
Dockerfile
CHANGED
@@ -33,4 +33,4 @@ RUN --mount=type=secret,id=HF_TOKEN,mode=0444,required=true \
|
|
33 |
git config --global credential.helper store && \
|
34 |
huggingface-cli login --token $(cat /run/secrets/HF_TOKEN) --add-to-git-credential
|
35 |
RUN poetry run python model_pull.py
|
36 |
-
ENTRYPOINT ["poetry", "run", "python", "app.py", "--host", "0.0.0.0", "--port", "7860"]
|
|
|
33 |
git config --global credential.helper store && \
|
34 |
huggingface-cli login --token $(cat /run/secrets/HF_TOKEN) --add-to-git-credential
|
35 |
RUN poetry run python model_pull.py
|
36 |
+
ENTRYPOINT ["/root/.local/bin/poetry", "run", "python", "app.py", "--host", "0.0.0.0", "--port", "7860"]
|
app.py
CHANGED
@@ -3,15 +3,19 @@ from typing import Optional
|
|
3 |
import gradio as gr
|
4 |
import torch
|
5 |
from peft import PeftModel
|
6 |
-
from transformers import AutoModelForCausalLM
|
7 |
-
from transformers import AutoTokenizer
|
8 |
from transformers import GenerationConfig
|
9 |
|
10 |
print("starting server ...")
|
|
|
|
|
|
|
|
|
|
|
|
|
11 |
BASE_MODEL = "decapoda-research/llama-13b-hf"
|
12 |
LORA_WEIGHTS = "izumi-lab/llama-13b-japanese-lora-v0-1ep"
|
13 |
|
14 |
-
tokenizer =
|
15 |
|
16 |
if torch.cuda.is_available():
|
17 |
device = "cuda"
|
@@ -25,7 +29,7 @@ except Exception:
|
|
25 |
pass
|
26 |
|
27 |
if device == "cuda":
|
28 |
-
model =
|
29 |
BASE_MODEL,
|
30 |
load_in_8bit=False,
|
31 |
torch_dtype=torch.float16,
|
@@ -35,7 +39,7 @@ if device == "cuda":
|
|
35 |
model, LORA_WEIGHTS, torch_dtype=torch.float16
|
36 |
)
|
37 |
elif device == "mps":
|
38 |
-
model =
|
39 |
BASE_MODEL,
|
40 |
device_map={"": device},
|
41 |
torch_dtype=torch.float16,
|
@@ -47,7 +51,7 @@ elif device == "mps":
|
|
47 |
torch_dtype=torch.float16,
|
48 |
)
|
49 |
else:
|
50 |
-
model =
|
51 |
BASE_MODEL, device_map={"": device}, low_cpu_mem_usage=True
|
52 |
)
|
53 |
model = PeftModel.from_pretrained(
|
|
|
3 |
import gradio as gr
|
4 |
import torch
|
5 |
from peft import PeftModel
|
|
|
|
|
6 |
from transformers import GenerationConfig
|
7 |
|
8 |
print("starting server ...")
|
9 |
+
|
10 |
+
assert (
|
11 |
+
"LlamaTokenizer" in transformers._import_structure["models.llama"]
|
12 |
+
), "LLaMA is now in HuggingFace's main branch.\nPlease reinstall it: pip uninstall transformers && pip install git+https://github.com/huggingface/transformers.git"
|
13 |
+
from transformers import LlamaTokenizer, LlamaForCausalLM
|
14 |
+
|
15 |
BASE_MODEL = "decapoda-research/llama-13b-hf"
|
16 |
LORA_WEIGHTS = "izumi-lab/llama-13b-japanese-lora-v0-1ep"
|
17 |
|
18 |
+
tokenizer = LlamaTokenizer.from_pretrained(BASE_MODEL)
|
19 |
|
20 |
if torch.cuda.is_available():
|
21 |
device = "cuda"
|
|
|
29 |
pass
|
30 |
|
31 |
if device == "cuda":
|
32 |
+
model = LlamaForCausalLM.from_pretrained(
|
33 |
BASE_MODEL,
|
34 |
load_in_8bit=False,
|
35 |
torch_dtype=torch.float16,
|
|
|
39 |
model, LORA_WEIGHTS, torch_dtype=torch.float16
|
40 |
)
|
41 |
elif device == "mps":
|
42 |
+
model = LlamaForCausalLM.from_pretrained(
|
43 |
BASE_MODEL,
|
44 |
device_map={"": device},
|
45 |
torch_dtype=torch.float16,
|
|
|
51 |
torch_dtype=torch.float16,
|
52 |
)
|
53 |
else:
|
54 |
+
model = LlamaForCausalLM.from_pretrained(
|
55 |
BASE_MODEL, device_map={"": device}, low_cpu_mem_usage=True
|
56 |
)
|
57 |
model = PeftModel.from_pretrained(
|
model_pull.py
CHANGED
@@ -1,13 +1,16 @@
|
|
1 |
import torch
|
2 |
from peft import PeftModel
|
3 |
-
|
4 |
-
|
|
|
|
|
|
|
5 |
|
6 |
BASE_MODEL = "decapoda-research/llama-13b-hf"
|
7 |
LORA_WEIGHTS = "izumi-lab/llama-13b-japanese-lora-v0-1ep"
|
8 |
|
9 |
-
tokenizer =
|
10 |
-
model =
|
11 |
BASE_MODEL,
|
12 |
load_in_8bit=False,
|
13 |
torch_dtype=torch.float16,
|
|
|
1 |
import torch
|
2 |
from peft import PeftModel
|
3 |
+
|
4 |
+
assert (
|
5 |
+
"LlamaTokenizer" in transformers._import_structure["models.llama"]
|
6 |
+
), "LLaMA is now in HuggingFace's main branch.\nPlease reinstall it: pip uninstall transformers && pip install git+https://github.com/huggingface/transformers.git"
|
7 |
+
from transformers import LlamaTokenizer, LlamaForCausalLM
|
8 |
|
9 |
BASE_MODEL = "decapoda-research/llama-13b-hf"
|
10 |
LORA_WEIGHTS = "izumi-lab/llama-13b-japanese-lora-v0-1ep"
|
11 |
|
12 |
+
tokenizer = LlamaTokenizer.from_pretrained(BASE_MODEL)
|
13 |
+
model = LlamaForCausalLM.from_pretrained(
|
14 |
BASE_MODEL,
|
15 |
load_in_8bit=False,
|
16 |
torch_dtype=torch.float16,
|