masanorihirano commited on
Commit
b555022
1 Parent(s): dc63c9c
Files changed (3) hide show
  1. Dockerfile +1 -1
  2. app.py +10 -6
  3. model_pull.py +7 -4
Dockerfile CHANGED
@@ -33,4 +33,4 @@ RUN --mount=type=secret,id=HF_TOKEN,mode=0444,required=true \
33
  git config --global credential.helper store && \
34
  huggingface-cli login --token $(cat /run/secrets/HF_TOKEN) --add-to-git-credential
35
  RUN poetry run python model_pull.py
36
- ENTRYPOINT ["poetry", "run", "python", "app.py", "--host", "0.0.0.0", "--port", "7860"]
 
33
  git config --global credential.helper store && \
34
  huggingface-cli login --token $(cat /run/secrets/HF_TOKEN) --add-to-git-credential
35
  RUN poetry run python model_pull.py
36
+ ENTRYPOINT ["/root/.local/bin/poetry", "run", "python", "app.py", "--host", "0.0.0.0", "--port", "7860"]
app.py CHANGED
@@ -3,15 +3,19 @@ from typing import Optional
3
  import gradio as gr
4
  import torch
5
  from peft import PeftModel
6
- from transformers import AutoModelForCausalLM
7
- from transformers import AutoTokenizer
8
  from transformers import GenerationConfig
9
 
10
  print("starting server ...")
 
 
 
 
 
 
11
  BASE_MODEL = "decapoda-research/llama-13b-hf"
12
  LORA_WEIGHTS = "izumi-lab/llama-13b-japanese-lora-v0-1ep"
13
 
14
- tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
15
 
16
  if torch.cuda.is_available():
17
  device = "cuda"
@@ -25,7 +29,7 @@ except Exception:
25
  pass
26
 
27
  if device == "cuda":
28
- model = AutoModelForCausalLM.from_pretrained(
29
  BASE_MODEL,
30
  load_in_8bit=False,
31
  torch_dtype=torch.float16,
@@ -35,7 +39,7 @@ if device == "cuda":
35
  model, LORA_WEIGHTS, torch_dtype=torch.float16
36
  )
37
  elif device == "mps":
38
- model = AutoModelForCausalLM.from_pretrained(
39
  BASE_MODEL,
40
  device_map={"": device},
41
  torch_dtype=torch.float16,
@@ -47,7 +51,7 @@ elif device == "mps":
47
  torch_dtype=torch.float16,
48
  )
49
  else:
50
- model = AutoModelForCausalLM.from_pretrained(
51
  BASE_MODEL, device_map={"": device}, low_cpu_mem_usage=True
52
  )
53
  model = PeftModel.from_pretrained(
 
3
  import gradio as gr
4
  import torch
5
  from peft import PeftModel
 
 
6
  from transformers import GenerationConfig
7
 
8
  print("starting server ...")
9
+
10
+ assert (
11
+ "LlamaTokenizer" in transformers._import_structure["models.llama"]
12
+ ), "LLaMA is now in HuggingFace's main branch.\nPlease reinstall it: pip uninstall transformers && pip install git+https://github.com/huggingface/transformers.git"
13
+ from transformers import LlamaTokenizer, LlamaForCausalLM
14
+
15
  BASE_MODEL = "decapoda-research/llama-13b-hf"
16
  LORA_WEIGHTS = "izumi-lab/llama-13b-japanese-lora-v0-1ep"
17
 
18
+ tokenizer = LlamaTokenizer.from_pretrained(BASE_MODEL)
19
 
20
  if torch.cuda.is_available():
21
  device = "cuda"
 
29
  pass
30
 
31
  if device == "cuda":
32
+ model = LlamaForCausalLM.from_pretrained(
33
  BASE_MODEL,
34
  load_in_8bit=False,
35
  torch_dtype=torch.float16,
 
39
  model, LORA_WEIGHTS, torch_dtype=torch.float16
40
  )
41
  elif device == "mps":
42
+ model = LlamaForCausalLM.from_pretrained(
43
  BASE_MODEL,
44
  device_map={"": device},
45
  torch_dtype=torch.float16,
 
51
  torch_dtype=torch.float16,
52
  )
53
  else:
54
+ model = LlamaForCausalLM.from_pretrained(
55
  BASE_MODEL, device_map={"": device}, low_cpu_mem_usage=True
56
  )
57
  model = PeftModel.from_pretrained(
model_pull.py CHANGED
@@ -1,13 +1,16 @@
1
  import torch
2
  from peft import PeftModel
3
- from transformers import AutoModelForCausalLM
4
- from transformers import AutoTokenizer
 
 
 
5
 
6
  BASE_MODEL = "decapoda-research/llama-13b-hf"
7
  LORA_WEIGHTS = "izumi-lab/llama-13b-japanese-lora-v0-1ep"
8
 
9
- tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
10
- model = AutoModelForCausalLM.from_pretrained(
11
  BASE_MODEL,
12
  load_in_8bit=False,
13
  torch_dtype=torch.float16,
 
1
  import torch
2
  from peft import PeftModel
3
+
4
+ assert (
5
+ "LlamaTokenizer" in transformers._import_structure["models.llama"]
6
+ ), "LLaMA is now in HuggingFace's main branch.\nPlease reinstall it: pip uninstall transformers && pip install git+https://github.com/huggingface/transformers.git"
7
+ from transformers import LlamaTokenizer, LlamaForCausalLM
8
 
9
  BASE_MODEL = "decapoda-research/llama-13b-hf"
10
  LORA_WEIGHTS = "izumi-lab/llama-13b-japanese-lora-v0-1ep"
11
 
12
+ tokenizer = LlamaTokenizer.from_pretrained(BASE_MODEL)
13
+ model = LlamaForCausalLM.from_pretrained(
14
  BASE_MODEL,
15
  load_in_8bit=False,
16
  torch_dtype=torch.float16,