facat commited on
Commit
7a10940
1 Parent(s): cbf9b34
Files changed (1) hide show
  1. app.py +116 -0
app.py ADDED
@@ -0,0 +1,116 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # %%
2
+ import gradio as gr
3
+ from transformers import LlamaTokenizer
4
+ from transformers import LlamaForCausalLM, GenerationConfig
5
+ from peft import PeftModel
6
+ import torch
7
+
8
+
9
+ def generate_instruction_prompt(instruction, input=None):
10
+ if input:
11
+ return f"""Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
12
+
13
+ ### Instruction:
14
+ {instruction}
15
+
16
+ ### Input:
17
+ {input}
18
+
19
+ ### Response:"""
20
+ else:
21
+ return f"""Below is an instruction that describes a task. Write a response that appropriately completes the request.
22
+
23
+ ### Instruction:
24
+ {instruction}
25
+
26
+ ### Response:"""
27
+
28
+
29
+ def evaluate(
30
+ model,
31
+ tokenizer,
32
+ instruction,
33
+ input=None,
34
+ temperature=0.1,
35
+ top_p=0.75,
36
+ num_beams=4,
37
+ max_token=256,
38
+ ):
39
+ generation_config = GenerationConfig(
40
+ temperature=temperature,
41
+ top_p=top_p,
42
+ num_beams=num_beams,
43
+ top_k=40,
44
+ no_repeat_ngram_size=3,
45
+ )
46
+ prompt = generate_instruction_prompt(instruction, input)
47
+ inputs = tokenizer(prompt, return_tensors="pt")
48
+ input_ids = inputs["input_ids"].cuda()
49
+ generation_output = model.generate(
50
+ input_ids=input_ids,
51
+ generation_config=generation_config,
52
+ return_dict_in_generate=True,
53
+ output_scores=True,
54
+ max_new_tokens=max_token,
55
+ )
56
+ s = generation_output.sequences[0]
57
+ output = tokenizer.decode(s)
58
+ res = output.split("### Response:")[1].strip()
59
+ print("Response:", res)
60
+ return res
61
+
62
+
63
+ def load_lora(lora_path, base_model="decapoda-research/llama-7b-hf"):
64
+ model = LlamaForCausalLM.from_pretrained(
65
+ base_model,
66
+ # load_in_8bit=True,
67
+ device_map="auto",
68
+ low_cpu_mem_usage=True,
69
+ )
70
+ lora = PeftModel.from_pretrained(
71
+ model,
72
+ lora_path,
73
+ device_map="auto",
74
+ )
75
+ return lora
76
+
77
+
78
+ base_model = "decapoda-research/llama-13b-hf"
79
+ tokenizer = LlamaTokenizer.from_pretrained(base_model, cache_dir="data/hf")
80
+ # question = "如果今天是星期五, 那么后天是星期几?"
81
+ model = load_lora(lora_path="facat/alpaca-lora-cn-13b", base_model=base_model)
82
+
83
+ eval = lambda question, input, temperature, beams, max_token: evaluate(
84
+ model,
85
+ tokenizer,
86
+ question,
87
+ input=input,
88
+ temperature=temperature,
89
+ num_beams=beams,
90
+ max_token=max_token,
91
+ )
92
+
93
+ gr.Interface(
94
+ fn=eval,
95
+ inputs=[
96
+ gr.components.Textbox(
97
+ lines=2, label="Instruction", placeholder="Tell me about alpacas."
98
+ ),
99
+ gr.components.Textbox(lines=2, label="Input", placeholder="none"),
100
+ gr.components.Slider(minimum=0, maximum=1, value=0.1, label="Temperature"),
101
+ # gr.components.Slider(minimum=0, maximum=1, value=0.75, label="Top p"),
102
+ # gr.components.Slider(minimum=0, maximum=100, step=1, value=40, label="Top k"),
103
+ gr.components.Slider(minimum=1, maximum=4, step=1, value=4, label="Beams"),
104
+ gr.components.Slider(
105
+ minimum=1, maximum=512, step=1, value=256, label="Max tokens"
106
+ ),
107
+ ],
108
+ outputs=[
109
+ gr.inputs.Textbox(
110
+ lines=8,
111
+ label="Output",
112
+ )
113
+ ],
114
+ title=f"Alpaca-LoRA",
115
+ description=f"Alpaca-LoRA",
116
+ ).launch()