facat commited on
Commit
2e598e3
β€’
1 Parent(s): d240110
Files changed (1) hide show
  1. app.py +9 -5
app.py CHANGED
@@ -4,8 +4,12 @@ from transformers import LlamaTokenizer
4
  from transformers import LlamaForCausalLM, GenerationConfig
5
  from peft import PeftModel
6
  import torch
 
 
 
 
7
 
8
-
9
  def generate_instruction_prompt(instruction, input=None):
10
  if input:
11
  return f"""Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
@@ -45,7 +49,7 @@ def evaluate(
45
  )
46
  prompt = generate_instruction_prompt(instruction, input)
47
  inputs = tokenizer(prompt, return_tensors="pt")
48
- input_ids = inputs["input_ids"].cuda()
49
  generation_output = model.generate(
50
  input_ids=input_ids,
51
  generation_config=generation_config,
@@ -64,19 +68,19 @@ def load_lora(lora_path, base_model="decapoda-research/llama-7b-hf"):
64
  model = LlamaForCausalLM.from_pretrained(
65
  base_model,
66
  # load_in_8bit=True,
67
- device_map="auto",
68
  low_cpu_mem_usage=True,
69
  )
70
  lora = PeftModel.from_pretrained(
71
  model,
72
  lora_path,
73
- device_map="auto",
74
  )
75
  return lora
76
 
77
 
78
  base_model = "decapoda-research/llama-13b-hf"
79
- tokenizer = LlamaTokenizer.from_pretrained(base_model)
80
  # question = "ε¦‚ζžœδ»Šε€©ζ˜―ζ˜ŸζœŸδΊ”, ι‚£δΉˆεŽε€©ζ˜―ζ˜ŸζœŸε‡ ?"
81
  model = load_lora(lora_path="facat/alpaca-lora-cn-13b", base_model=base_model)
82
 
 
4
  from transformers import LlamaForCausalLM, GenerationConfig
5
  from peft import PeftModel
6
  import torch
7
+ if torch.cuda.is_available():
8
+ device = "cuda"
9
+ else:
10
+ device = "cpu"
11
 
12
+ device_map={'': 0}
13
  def generate_instruction_prompt(instruction, input=None):
14
  if input:
15
  return f"""Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
 
49
  )
50
  prompt = generate_instruction_prompt(instruction, input)
51
  inputs = tokenizer(prompt, return_tensors="pt")
52
+ input_ids = inputs["input_ids"].to(device)
53
  generation_output = model.generate(
54
  input_ids=input_ids,
55
  generation_config=generation_config,
 
68
  model = LlamaForCausalLM.from_pretrained(
69
  base_model,
70
  # load_in_8bit=True,
71
+ device_map=device_map,
72
  low_cpu_mem_usage=True,
73
  )
74
  lora = PeftModel.from_pretrained(
75
  model,
76
  lora_path,
77
+ device_map=device_map,
78
  )
79
  return lora
80
 
81
 
82
  base_model = "decapoda-research/llama-13b-hf"
83
+ tokenizer = LlamaTokenizer.from_pretrained(base_model, device_map=device_map)
84
  # question = "ε¦‚ζžœδ»Šε€©ζ˜―ζ˜ŸζœŸδΊ”, ι‚£δΉˆεŽε€©ζ˜―ζ˜ŸζœŸε‡ ?"
85
  model = load_lora(lora_path="facat/alpaca-lora-cn-13b", base_model=base_model)
86