love2poppy commited on
Commit
6ebf55c
Β·
1 Parent(s): 3a634f5

add application file

Browse files
README.md CHANGED
@@ -1,13 +1,13 @@
1
  ---
2
- title: Alpaca Cot
3
- emoji: 😻
4
- colorFrom: red
5
- colorTo: purple
6
  sdk: gradio
7
  sdk_version: 3.23.0
8
  app_file: app.py
9
  pinned: false
10
- license: gpl-2.0
11
  ---
12
 
13
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: Alpaca CoT
3
+ emoji: πŸ‘
4
+ colorFrom: indigo
5
+ colorTo: yellow
6
  sdk: gradio
7
  sdk_version: 3.23.0
8
  app_file: app.py
9
  pinned: false
10
+ license: gpl
11
  ---
12
 
13
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,160 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import AutoModel, AutoTokenizer
3
+ import sys
4
+ import torch
5
+ import argparse
6
+ from peft import PeftModel
7
+ import transformers
8
+ from collections import namedtuple
9
+
10
+ from transformers import (
11
+ LlamaForCausalLM, LlamaTokenizer,
12
+ AutoModel, AutoTokenizer,
13
+ BloomForCausalLM, BloomTokenizerFast, GenerationConfig)
14
+
15
+ tokenizer=None
16
+ model=None
17
+ LOAD_8BIT = False
18
+
19
+ ModelClass = namedtuple("ModelClass", ('tokenizer', 'model'))
20
+
21
+ _MODEL_CLASSES = {
22
+ "llama": ModelClass(**{
23
+ "tokenizer": LlamaTokenizer,
24
+ "model": LlamaForCausalLM,
25
+
26
+ }),
27
+ "bloom": ModelClass(**{
28
+ "tokenizer": AutoTokenizer,
29
+ "model": BloomForCausalLM,
30
+ })
31
+ }
32
+
33
+ if torch.cuda.is_available():
34
+ device = "cuda"
35
+ else:
36
+ device = "cpu"
37
+
38
+ def get_model_class(model_type,
39
+ model_name_or_path,
40
+ lora_model_path):
41
+ global model, tokenizer
42
+
43
+ model_class = _MODEL_CLASSES[model_type] # tokenizer, model
44
+
45
+ model_base = model_class.model.from_pretrained(model_name_or_path,
46
+ load_in_8bit=LOAD_8BIT,
47
+ torch_dtype=torch.float16,
48
+ device_map="auto")
49
+ tokenizer = model_class.tokenizer.from_pretrained(model_name_or_path) # default add_eos_token=False
50
+
51
+ model = PeftModel.from_pretrained(
52
+ model_base,
53
+ lora_model_path,
54
+ torch_dtype=torch.float16,
55
+ )
56
+ if not LOAD_8BIT:
57
+ model.half()
58
+
59
+
60
+ def predict(
61
+ instruction,
62
+ top_p=0.75,
63
+ temperature=0.1,
64
+ history=None,
65
+ top_k=40,
66
+ num_beams=4,
67
+ max_new_tokens=512,
68
+ **kwargs,
69
+ ):
70
+
71
+ history = history or []
72
+
73
+ prompt = (
74
+ "Below is an instruction that describes a task. "
75
+ "Write a response that appropriately completes the request.\n\n"
76
+ "### Instruction:\n{0}\n\n### Response:"
77
+ ).format(instruction)
78
+
79
+ inputs = tokenizer(prompt, return_tensors="pt")
80
+ input_ids = inputs["input_ids"].to(device)
81
+ generation_config = GenerationConfig(
82
+ temperature=temperature,
83
+ top_p=top_p,
84
+ top_k=top_k,
85
+ num_beams=num_beams,
86
+ **kwargs,
87
+ )
88
+ with torch.no_grad():
89
+ generation_output = model.generate(
90
+ input_ids=input_ids,
91
+ generation_config=generation_config,
92
+ return_dict_in_generate=True,
93
+ output_scores=True,
94
+ max_new_tokens=max_new_tokens,
95
+ )
96
+ s = generation_output.sequences[0]
97
+ output = tokenizer.decode(s)
98
+
99
+ print('ζ¨‘εž‹ε›žε€', output)
100
+
101
+ bot_response = output.split("### Response:")[1].strip()
102
+
103
+ history.append((instruction, bot_response))
104
+
105
+ return "", history
106
+
107
+
108
+ def predict_test(message, top_p, temperature, history):
109
+
110
+ history = history or []
111
+
112
+ user_message = f"{message} {top_p}, {temperature}"
113
+ print(user_message)
114
+
115
+ history.append((message, user_message))
116
+ return history, history
117
+
118
+ def clear_session():
119
+ return '', None
120
+
121
+ parser = argparse.ArgumentParser(description='Process some integers.')
122
+ parser.add_argument('--size', default=7, type=int, help='the size of llama model')
123
+ parser.add_argument('--data', default="", type=str, help='the data used for instructing tuning')
124
+ parser.add_argument('--local_rank', default=-1, type=int, help='node rank for distributed training')
125
+ parser.add_argument('--model_type', default="llama", choices=['llama', 'chatglm', 'bloom'])
126
+ parser.add_argument('--model_name_or_path', default="decapoda-research/llama-7b-hf", type=str)
127
+ parser.add_argument('--lora_name_or_path', default="./saved-alpaca-belle-cot7b", type=str)
128
+
129
+ args = parser.parse_args()
130
+
131
+ get_model_class(args.model_type, args.model_name_or_path, args.lora_name_or_path)
132
+
133
+ block = gr.Blocks(css = """#col_container { margin-left: auto; margin-right: auto;} #chatbot {height: 520px; overflow: auto;}""")
134
+
135
+ with block as demo:
136
+
137
+ #top_p, temperature
138
+ with gr.Accordion("Parameters", open=False):
139
+ top_p = gr.Slider( minimum=-0, maximum=1.0, value=0.75, step=0.05, interactive=True, label="Top-p (nucleus sampling)",)
140
+ temperature = gr.Slider( minimum=-0, maximum=5.0, value=0.1, step=0.1, interactive=True, label="Temperature",)
141
+
142
+ chatbot = gr.Chatbot(label="Alpaca-CoT")
143
+ message = gr.Textbox()
144
+ state = gr.State()
145
+
146
+ message.submit(predict, inputs=[message, top_p, temperature, state], outputs=[chatbot, state], queue=False)
147
+
148
+ with gr.Row():
149
+ clear_history = gr.Button("πŸ—‘ ζΈ…ι™€εŽ†ε²ε―Ήθ― | Clear History")
150
+ clear = gr.Button('🧹 清陀输ε…₯ | Clear Input')
151
+ send = gr.Button("πŸš€ 发送 | Send")
152
+ regenerate = gr.Button("πŸš— ι‡ζ–°η”Ÿζˆ | regenerate")
153
+
154
+ # regenerate.click(regenerate, inputs=[message], outputs=[chatbot])
155
+ regenerate.click(fn=clear_session , inputs=[], outputs=[chatbot, state], queue=False)
156
+ send.click(predict, inputs=[message, top_p, temperature, state], outputs=[chatbot, state])
157
+ clear.click(lambda: None, None, message, queue=False)
158
+ clear_history.click(fn=clear_session , inputs=[], outputs=[chatbot, state], queue=False)
159
+
160
+ demo.queue(max_size=20, concurrency_count=20).launch()
requirements.txt ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # chatglm
2
+ icetk
3
+ cpm_kernels==1.0.11
4
+ torch>=1.13
5
+
6
+ datasets
7
+ loralib
8
+ sentencepiece
9
+ git+https://github.com/huggingface/transformers.git
10
+ accelerate
11
+ bitsandbytes
12
+ git+https://github.com/huggingface/peft.git
13
+ gradio
14
+ appdirs
saved-alpaca-belle-cot7b/adapter_config.json ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "base_model_name_or_path": "/mnt/bn/qingyi-bn-lq/llama/llama-7b-hf",
3
+ "bias": "none",
4
+ "enable_lora": null,
5
+ "fan_in_fan_out": false,
6
+ "inference_mode": true,
7
+ "lora_alpha": 16,
8
+ "lora_dropout": 0.05,
9
+ "merge_weights": false,
10
+ "modules_to_save": null,
11
+ "peft_type": "LORA",
12
+ "r": 8,
13
+ "target_modules": [
14
+ "q_proj",
15
+ "v_proj"
16
+ ],
17
+ "task_type": "CAUSAL_LM"
18
+ }
saved-alpaca-belle-cot7b/adapter_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:330494735335477a234baf27e22b489c89e7e7a34d26a212ccec73a8434164e3
3
+ size 16822989