File size: 4,870 Bytes
fd095be
 
30c65d0
fd095be
e0c2263
23f0a01
e0c2263
fd095be
86d3e03
fd095be
 
 
 
 
 
 
f494a07
 
fd095be
 
050e044
fd095be
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
300a211
 
 
 
 
 
 
 
 
 
 
 
 
5a4ded1
300a211
 
 
 
 
 
 
aafe9d9
300a211
 
 
 
fb89707
aafe9d9
1cfb8e6
fb89707
 
 
300a211
 
 
fb89707
300a211
 
 
 
fd095be
 
03fcc17
 
e07d539
 
03fcc17
 
 
 
300a211
03fcc17
300a211
 
 
f7a8077
8d28c6d
f7a8077
300a211
 
 
 
 
 
 
 
5a4ded1
300a211
 
 
 
03fcc17
300a211
 
9f2a5e4
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
import torch

from transformers import AutoTokenizer, GenerationConfig, AutoModel

model = AutoModel.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True, revision="658202d").float()
setattr(model, "lm_head_raw", model.lm_head)
tokenizer = AutoTokenizer.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True, revision="658202d")

from peft import PeftModel
peft_path = 'ljsabc/Fujisaki_GLM'      # change it to your own
model = PeftModel.from_pretrained(
       model,
       peft_path,
       torch_dtype=torch.float,
    )

# dump a log to ensure everything works well
print(model.peft_config)
# We have to use full precision, as some tokens are >65535
model.eval()
print(model)

torch.set_default_tensor_type(torch.FloatTensor)
def evaluate(context, temperature, top_p, top_k):
    generation_config = GenerationConfig(
        temperature=temperature,
        top_p=top_p,
        top_k=top_k,
        #repetition_penalty=1.1,
        num_beams=1,
        do_sample=True,
    )
    with torch.no_grad():
        input_text = f"Context: {context}Answer: " 
        ids = tokenizer.encode(input_text)
        input_ids = torch.LongTensor([ids]).to('cpu')
        out = model.generate(
            input_ids=input_ids,
            max_length=160,
            generation_config=generation_config
        )
        out_text = tokenizer.decode(out[0]).split("Answer: ")[1]
        return out_text
    
def evaluate_stream(msg, history, temperature, top_p):
    generation_config = GenerationConfig(
        temperature=temperature,
        top_p=top_p,
        #repetition_penalty=1.1,
        num_beams=1,
        do_sample=True,
    )

    history.append([msg, None])

    context = ""
    if len(history) > 4:
        history.pop(0)

    for j in range(len(history)):
        history[j][0] = history[j][0].replace("<br>", "")

    # concatenate context
    for h in history[:-1]:
        context += h[0] + "||" + h[1] + "||"

    context += history[-1][0]
    context = context.replace(r'<br>', '')

    # TODO: Avoid the tokens are too long.
    CUTOFF = 224
    while len(tokenizer.encode(context)) > CUTOFF:
        # save 15 token size for the answer
        context = context[15:]

    h = []
    print("History:", history)
    print("Context:", context)
    for response, h in model.stream_chat(tokenizer, context, h, max_length=CUTOFF, top_p=top_p, temperature=temperature):
        history[-1][1] = response
        yield history, ""

    #return response

import gradio as gr

title = """<h1 align="center">李萌萌(Alter Ego)</h1>
<h3 align='center'>这是一个通过ChatGLM模型训练的李萌萌的数字分身,你可以与她聊天,或者直接在文本框按下Enter,来观察李萌萌到底会说些什么。</h3>
<p align='center'>可能是因为数据的原因,相比于提问,陈述性的上下文更容易跑出更好的结果。</p>"""

footer =  """<p align='center'>项目在<a href='https://github.com/ljsabc/Fujisaki' target='_blank'>GitHub</a>上托管,基于清华的<a href='https://huggingface.co/THUDM/chatglm-6b' target='_blank'>THUDM/chatglm-6b</a>项目。</p>
<p align='center'><em>"I'm... a boy." --Chihiro Fujisaki</em></p>"""

with gr.Blocks() as demo:
    gr.HTML(title)
    state = gr.State()
    with gr.Row():
        with gr.Column(scale=2):
            temp = gr.components.Slider(minimum=0, maximum=1.1, value=0.8, label="Temperature",
                info="温度参数,越高的温度生成的内容越丰富,但是有可能出现语法问题。小的温度也能帮助生成更相关的回答。")
            top_p = gr.components.Slider(minimum=0.5, maximum=1.0, value=0.975, label="Top-p",
                info="top-p参数,只输出前p>top-p的文字,越大生成的内容越丰富,但也可能出现语法问题。数字越小似乎上下文的衔接性越好。")
            #code = gr.Textbox(label="temp_output", info="解码器输出")
            #top_k = gr.components.Slider(minimum=1, maximum=200, step=1, value=25, label="Top k",
            #    info="top-k参数,下一个输出的文字会从top-k个文字中进行选择,越大生成的内容越丰富,但也可能出现语法问题。数字越小似乎上下文的衔接性越好。")
            
        with gr.Column(scale=3):
            chatbot = gr.Chatbot(label="聊天框", info="")
            msg = gr.Textbox(label="输入框", placeholder="最近过得怎么样?",
                info="输入你的内容,按[Enter]发送。也可以什么都不填写生成随机数据。对话一般不能太长,否则就复读机了,建议清除数据。")
            clear = gr.Button("清除聊天")

    msg.submit(evaluate_stream, [msg, chatbot, temp, top_p], [chatbot, msg])
    clear.click(lambda: None, None, chatbot, queue=False)
    gr.HTML(footer)

demo.queue()
demo.launch(debug=False)