Spaces:
Paused
Paused
Commit
Β·
6ebf55c
1
Parent(s):
3a634f5
add application file
Browse files- README.md +5 -5
- app.py +160 -0
- requirements.txt +14 -0
- saved-alpaca-belle-cot7b/adapter_config.json +18 -0
- saved-alpaca-belle-cot7b/adapter_model.bin +3 -0
README.md
CHANGED
@@ -1,13 +1,13 @@
|
|
1 |
---
|
2 |
-
title: Alpaca
|
3 |
-
emoji:
|
4 |
-
colorFrom:
|
5 |
-
colorTo:
|
6 |
sdk: gradio
|
7 |
sdk_version: 3.23.0
|
8 |
app_file: app.py
|
9 |
pinned: false
|
10 |
-
license: gpl
|
11 |
---
|
12 |
|
13 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
1 |
---
|
2 |
+
title: Alpaca CoT
|
3 |
+
emoji: π
|
4 |
+
colorFrom: indigo
|
5 |
+
colorTo: yellow
|
6 |
sdk: gradio
|
7 |
sdk_version: 3.23.0
|
8 |
app_file: app.py
|
9 |
pinned: false
|
10 |
+
license: gpl
|
11 |
---
|
12 |
|
13 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
app.py
ADDED
@@ -0,0 +1,160 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
from transformers import AutoModel, AutoTokenizer
|
3 |
+
import sys
|
4 |
+
import torch
|
5 |
+
import argparse
|
6 |
+
from peft import PeftModel
|
7 |
+
import transformers
|
8 |
+
from collections import namedtuple
|
9 |
+
|
10 |
+
from transformers import (
|
11 |
+
LlamaForCausalLM, LlamaTokenizer,
|
12 |
+
AutoModel, AutoTokenizer,
|
13 |
+
BloomForCausalLM, BloomTokenizerFast, GenerationConfig)
|
14 |
+
|
15 |
+
tokenizer=None
|
16 |
+
model=None
|
17 |
+
LOAD_8BIT = False
|
18 |
+
|
19 |
+
ModelClass = namedtuple("ModelClass", ('tokenizer', 'model'))
|
20 |
+
|
21 |
+
_MODEL_CLASSES = {
|
22 |
+
"llama": ModelClass(**{
|
23 |
+
"tokenizer": LlamaTokenizer,
|
24 |
+
"model": LlamaForCausalLM,
|
25 |
+
|
26 |
+
}),
|
27 |
+
"bloom": ModelClass(**{
|
28 |
+
"tokenizer": AutoTokenizer,
|
29 |
+
"model": BloomForCausalLM,
|
30 |
+
})
|
31 |
+
}
|
32 |
+
|
33 |
+
if torch.cuda.is_available():
|
34 |
+
device = "cuda"
|
35 |
+
else:
|
36 |
+
device = "cpu"
|
37 |
+
|
38 |
+
def get_model_class(model_type,
|
39 |
+
model_name_or_path,
|
40 |
+
lora_model_path):
|
41 |
+
global model, tokenizer
|
42 |
+
|
43 |
+
model_class = _MODEL_CLASSES[model_type] # tokenizer, model
|
44 |
+
|
45 |
+
model_base = model_class.model.from_pretrained(model_name_or_path,
|
46 |
+
load_in_8bit=LOAD_8BIT,
|
47 |
+
torch_dtype=torch.float16,
|
48 |
+
device_map="auto")
|
49 |
+
tokenizer = model_class.tokenizer.from_pretrained(model_name_or_path) # default add_eos_token=False
|
50 |
+
|
51 |
+
model = PeftModel.from_pretrained(
|
52 |
+
model_base,
|
53 |
+
lora_model_path,
|
54 |
+
torch_dtype=torch.float16,
|
55 |
+
)
|
56 |
+
if not LOAD_8BIT:
|
57 |
+
model.half()
|
58 |
+
|
59 |
+
|
60 |
+
def predict(
|
61 |
+
instruction,
|
62 |
+
top_p=0.75,
|
63 |
+
temperature=0.1,
|
64 |
+
history=None,
|
65 |
+
top_k=40,
|
66 |
+
num_beams=4,
|
67 |
+
max_new_tokens=512,
|
68 |
+
**kwargs,
|
69 |
+
):
|
70 |
+
|
71 |
+
history = history or []
|
72 |
+
|
73 |
+
prompt = (
|
74 |
+
"Below is an instruction that describes a task. "
|
75 |
+
"Write a response that appropriately completes the request.\n\n"
|
76 |
+
"### Instruction:\n{0}\n\n### Response:"
|
77 |
+
).format(instruction)
|
78 |
+
|
79 |
+
inputs = tokenizer(prompt, return_tensors="pt")
|
80 |
+
input_ids = inputs["input_ids"].to(device)
|
81 |
+
generation_config = GenerationConfig(
|
82 |
+
temperature=temperature,
|
83 |
+
top_p=top_p,
|
84 |
+
top_k=top_k,
|
85 |
+
num_beams=num_beams,
|
86 |
+
**kwargs,
|
87 |
+
)
|
88 |
+
with torch.no_grad():
|
89 |
+
generation_output = model.generate(
|
90 |
+
input_ids=input_ids,
|
91 |
+
generation_config=generation_config,
|
92 |
+
return_dict_in_generate=True,
|
93 |
+
output_scores=True,
|
94 |
+
max_new_tokens=max_new_tokens,
|
95 |
+
)
|
96 |
+
s = generation_output.sequences[0]
|
97 |
+
output = tokenizer.decode(s)
|
98 |
+
|
99 |
+
print('樑εεε€', output)
|
100 |
+
|
101 |
+
bot_response = output.split("### Response:")[1].strip()
|
102 |
+
|
103 |
+
history.append((instruction, bot_response))
|
104 |
+
|
105 |
+
return "", history
|
106 |
+
|
107 |
+
|
108 |
+
def predict_test(message, top_p, temperature, history):
|
109 |
+
|
110 |
+
history = history or []
|
111 |
+
|
112 |
+
user_message = f"{message} {top_p}, {temperature}"
|
113 |
+
print(user_message)
|
114 |
+
|
115 |
+
history.append((message, user_message))
|
116 |
+
return history, history
|
117 |
+
|
118 |
+
def clear_session():
|
119 |
+
return '', None
|
120 |
+
|
121 |
+
parser = argparse.ArgumentParser(description='Process some integers.')
|
122 |
+
parser.add_argument('--size', default=7, type=int, help='the size of llama model')
|
123 |
+
parser.add_argument('--data', default="", type=str, help='the data used for instructing tuning')
|
124 |
+
parser.add_argument('--local_rank', default=-1, type=int, help='node rank for distributed training')
|
125 |
+
parser.add_argument('--model_type', default="llama", choices=['llama', 'chatglm', 'bloom'])
|
126 |
+
parser.add_argument('--model_name_or_path', default="decapoda-research/llama-7b-hf", type=str)
|
127 |
+
parser.add_argument('--lora_name_or_path', default="./saved-alpaca-belle-cot7b", type=str)
|
128 |
+
|
129 |
+
args = parser.parse_args()
|
130 |
+
|
131 |
+
get_model_class(args.model_type, args.model_name_or_path, args.lora_name_or_path)
|
132 |
+
|
133 |
+
block = gr.Blocks(css = """#col_container { margin-left: auto; margin-right: auto;} #chatbot {height: 520px; overflow: auto;}""")
|
134 |
+
|
135 |
+
with block as demo:
|
136 |
+
|
137 |
+
#top_p, temperature
|
138 |
+
with gr.Accordion("Parameters", open=False):
|
139 |
+
top_p = gr.Slider( minimum=-0, maximum=1.0, value=0.75, step=0.05, interactive=True, label="Top-p (nucleus sampling)",)
|
140 |
+
temperature = gr.Slider( minimum=-0, maximum=5.0, value=0.1, step=0.1, interactive=True, label="Temperature",)
|
141 |
+
|
142 |
+
chatbot = gr.Chatbot(label="Alpaca-CoT")
|
143 |
+
message = gr.Textbox()
|
144 |
+
state = gr.State()
|
145 |
+
|
146 |
+
message.submit(predict, inputs=[message, top_p, temperature, state], outputs=[chatbot, state], queue=False)
|
147 |
+
|
148 |
+
with gr.Row():
|
149 |
+
clear_history = gr.Button("π ζΈ
ι€εε²ε―Ήθ― | Clear History")
|
150 |
+
clear = gr.Button('π§Ή ζΈ
ι€θΎε
₯ | Clear Input')
|
151 |
+
send = gr.Button("π ει | Send")
|
152 |
+
regenerate = gr.Button("π ιζ°ηζ | regenerate")
|
153 |
+
|
154 |
+
# regenerate.click(regenerate, inputs=[message], outputs=[chatbot])
|
155 |
+
regenerate.click(fn=clear_session , inputs=[], outputs=[chatbot, state], queue=False)
|
156 |
+
send.click(predict, inputs=[message, top_p, temperature, state], outputs=[chatbot, state])
|
157 |
+
clear.click(lambda: None, None, message, queue=False)
|
158 |
+
clear_history.click(fn=clear_session , inputs=[], outputs=[chatbot, state], queue=False)
|
159 |
+
|
160 |
+
demo.queue(max_size=20, concurrency_count=20).launch()
|
requirements.txt
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# chatglm
|
2 |
+
icetk
|
3 |
+
cpm_kernels==1.0.11
|
4 |
+
torch>=1.13
|
5 |
+
|
6 |
+
datasets
|
7 |
+
loralib
|
8 |
+
sentencepiece
|
9 |
+
git+https://github.com/huggingface/transformers.git
|
10 |
+
accelerate
|
11 |
+
bitsandbytes
|
12 |
+
git+https://github.com/huggingface/peft.git
|
13 |
+
gradio
|
14 |
+
appdirs
|
saved-alpaca-belle-cot7b/adapter_config.json
ADDED
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"base_model_name_or_path": "/mnt/bn/qingyi-bn-lq/llama/llama-7b-hf",
|
3 |
+
"bias": "none",
|
4 |
+
"enable_lora": null,
|
5 |
+
"fan_in_fan_out": false,
|
6 |
+
"inference_mode": true,
|
7 |
+
"lora_alpha": 16,
|
8 |
+
"lora_dropout": 0.05,
|
9 |
+
"merge_weights": false,
|
10 |
+
"modules_to_save": null,
|
11 |
+
"peft_type": "LORA",
|
12 |
+
"r": 8,
|
13 |
+
"target_modules": [
|
14 |
+
"q_proj",
|
15 |
+
"v_proj"
|
16 |
+
],
|
17 |
+
"task_type": "CAUSAL_LM"
|
18 |
+
}
|
saved-alpaca-belle-cot7b/adapter_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:330494735335477a234baf27e22b489c89e7e7a34d26a212ccec73a8434164e3
|
3 |
+
size 16822989
|