File size: 8,449 Bytes
75c75e6
94d4320
aac6381
 
75c75e6
123fa30
75c75e6
 
5b0f31e
 
75c75e6
aac6381
75c75e6
7838c74
 
aac6381
 
ebecce3
75c75e6
035a734
123fa30
 
 
 
 
 
46d51d2
 
 
 
 
 
 
 
 
 
 
75c75e6
 
 
aac6381
7838c74
75c75e6
 
aac6381
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
75c75e6
46d51d2
94d4320
9031adc
aac6381
 
 
94d4320
 
7838c74
 
 
de103d5
7838c74
 
 
 
 
75c75e6
 
 
 
aac6381
 
 
 
 
793de46
 
 
 
 
aac6381
 
 
 
 
 
 
 
 
 
 
 
7838c74
793de46
aac6381
 
25be74c
7838c74
 
 
 
25be74c
793de46
7838c74
 
 
 
793de46
 
25be74c
 
7838c74
 
 
25be74c
 
75c75e6
 
 
 
aac6381
75c75e6
 
 
 
 
 
 
 
 
aac6381
24aec20
 
 
aac6381
 
 
5b0f31e
262234a
5b0f31e
 
75c75e6
 
 
 
24aec20
75c75e6
24aec20
75c75e6
 
aac6381
75c75e6
aac6381
 
 
 
 
 
 
 
75c75e6
aac6381
 
75c75e6
aac6381
 
 
 
 
7838c74
aac6381
7838c74
 
262234a
24aec20
262234a
aac6381
7838c74
 
aac6381
75c75e6
be256e8
75c75e6
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
import openai
import tiktoken

import datetime
import json
import time
import os

from datasets import load_dataset

openai.api_key = os.getenv('API_KEY')
openai.request_times = 0

all_dialogue = []

def ask(question, history, behavior):
    openai.request_times += 1
    print(f"request times {openai.request_times}: {datetime.datetime.now()}: {question}")
    try:
        messages = [
            {"role":"system", "content":content}
            for content in behavior
        ] + [
            {"role":"user" if i%2==0 else "assistant", "content":content}
            for i,content in enumerate(history + [question])
        ]
        raw_length = num_tokens_from_messages(messages)
        messages=forget_long_term(messages)
        if len(messages)==0:
            response = 'Your query is too long and expensive: {raw_length}>1000 tokens'
        else:
            response = openai.ChatCompletion.create(
                model="gpt-3.5-turbo",
                messages=messages
            )["choices"][0]["message"]["content"]
            while response.startswith("\n"):
                response = response[1:]
    except Exception as e:
        print(e)
        response = 'Timeout! Please wait a few minutes and retry'
    history = history + [question, response]
    record_dialogue(history)
    return history

def num_tokens_from_messages(messages, model="gpt-3.5-turbo"):
    """Returns the number of tokens used by a list of messages."""
    try:
        encoding = tiktoken.encoding_for_model(model)
    except KeyError:
        encoding = tiktoken.get_encoding("cl100k_base")
    if model == "gpt-3.5-turbo":  # note: future models may deviate from this
        num_tokens = 0
        for message in messages:
            num_tokens += 4  # every message follows <im_start>{role/name}\n{content}<im_end>\n
            for key, value in message.items():
                num_tokens += len(encoding.encode(value))
                if key == "name":  # if there's a name, the role is omitted
                    num_tokens += -1  # role is always required and always 1 token
        num_tokens += 2  # every reply is primed with <im_start>assistant
        return num_tokens
    else:
        raise NotImplementedError(f"""num_tokens_from_messages() is not presently implemented for model {model}.
See https://github.com/openai/openai-python/blob/main/chatml.md for information on how messages are converted to tokens.""")

def forget_long_term(messages, max_num_tokens=1000):
    while num_tokens_from_messages(messages)>max_num_tokens:
        if messages[0]["role"]=="system" and not len(messages[0]["content"])>=max_num_tokens:
            messages = messages[:1] + messages[2:]
        else:
            messages = messages[1:]
    return messages

def record_dialogue(history):
    dialogue = json.dumps(history, ensure_ascii=False)
    for i in range(len(all_dialogue)):
        if dialogue[1:-1].startswith(all_dialogue[i][1:-1]):
            all_dialogue[i] = dialogue
            return
    all_dialogue.append(dialogue)
    return


import gradio as gr


def to_md(content):
    is_inside_code_block = False
    count_backtick = 0
    output_spans = []
    for i in range(len(content)):
        if content[i]=="\n":
            if not is_inside_code_block:
                output_spans.append("<br>")
            else:
                output_spans.append("\n\n")
        elif content[i]=="`":
            count_backtick += 1
            if count_backtick == 3:
                count_backtick = 0
                is_inside_code_block = not is_inside_code_block
            output_spans.append(content[i])
        else:
            output_spans.append(content[i])
    return "".join(output_spans)


def predict(question, history=[], behavior=[]):
    if question.startswith(f"{openai.api_key}:"):
        return adminInstruct(question, history)
    history = ask(question, history, behavior)
    response = [(to_md(history[i]),to_md(history[i+1])) for i in range(0,len(history)-1,2)]
    return "", history, response, gr.File.update(value=None, visible=False)


def retry(question, history=[], behavior=[]):
    if len(history)<2:
        return "", history, [], gr.File.update(value=None, visible=False)
    question = history[-2]
    history = history[:-2]
    return predict(question, history, behavior)


def adminInstruct(question, history):
    if "download all dialogue" in question:
        filename = f"./all_dialogue_{len(all_dialogue)}.jsonl"
        with open(filename, "w", encoding="utf-8") as f:
            for dialogue in all_dialogue:
                f.write(dialogue + "\n")
        response = [(to_md(history[i]),to_md(history[i+1])) for i in range(0,len(history)-1,2)]
        return "", history, response, gr.File.update(value=filename, visible=True)
    return "", history, response, gr.File.update(value=None, visible=False)


with gr.Blocks() as demo:
    
    examples_txt = [
        ['200字介绍一下凯旋门:'],
        ['网上购物有什么小窍门?'],
        ['补全下述对三亚的介绍:\n三亚位于海南岛的最南端,是'],
        ['将这句文言文翻译成英语:"逝者如斯夫,不舍昼夜。"'],
        ['Question: What\'s the best winter resort city? User: A 10-year professional traveler. Answer: '],
        ['How to help my child to make friends with his classmates? answer this question step by step:'],
        ['polish the following statement for a paper: In this section, we perform case study to give a more intuitive demonstration of our proposed strategies and corresponding explanation.'],
    ]
    
    examples_bhv = [
        "你现在是一个带有批判思维的导游,会对景点的优缺点进行中肯的分析。",
        "你现在是一名佛教信仰者,但同时又对世界上其它的宗教和文化保持着包容、尊重和交流的态度。",
        f"You are a helpful assistant. You will answer all the questions step-by-step.",
        f"You are a helpful assistant. Today is {datetime.date.today()}.",
    ]
    
    prompt_dataset = load_dataset("fka/awesome-chatgpt-prompts")
    examples_more = prompt_dataset['train'].to_dict()['prompt']
    
    
    gr.Markdown(
        """
        朋友你好,
        
        这是我利用[gradio](https://gradio.app/creating-a-chatbot/)编写的一个小网页,用于以网页的形式给大家分享ChatGPT请求服务,希望你玩的开心。关于使用技巧或学术研讨,欢迎在[Community](https://huggingface.co/spaces/zhangjf/chatbot/discussions)中和我交流。
        
        p.s. 响应时间和聊天内容长度正相关,一般能在5秒~30秒内响应。
        """)
    
    behavior = gr.State([])
    
    with gr.Column(variant="panel"):
        with gr.Row().style(equal_height=True):
            with gr.Column(scale=0.85):
                bhv = gr.Textbox(show_label=False, placeholder="输入你想让ChatGPT扮演的人设").style(container=False)
            with gr.Column(scale=0.15, min_width=0):
                button_set = gr.Button("Set")
    bhv.submit(fn=lambda x:(x,[x]), inputs=[bhv], outputs=[bhv, behavior])
    button_set.click(fn=lambda x:(x,[x]), inputs=[bhv], outputs=[bhv, behavior])
    

    state = gr.State([])
    
    with gr.Column(variant="panel"):
        chatbot = gr.Chatbot()
        txt = gr.Textbox(show_label=False, placeholder="输入你想让ChatGPT回答的问题").style(container=False)
        with gr.Row():
            button_gen = gr.Button("Submit")
            button_rtr = gr.Button("Retry")
            button_clr = gr.Button("Clear")
    
    downloadfile = gr.File(None, interactive=False, show_label=False, visible=False)
    gr.Examples(examples=examples_bhv, inputs=bhv, label="Examples for setting behavior")
    gr.Examples(examples=examples_txt, inputs=txt, label="Examples for asking question")
    gr.Examples(examples=examples_more, inputs=txt, label="More Examples from https://huggingface.co/datasets/fka/awesome-chatgpt-prompts")
    txt.submit(predict, [txt, state, behavior], [txt, state, chatbot])
    button_gen.click(fn=predict, inputs=[txt, state, behavior], outputs=[txt, state, chatbot, downloadfile])
    button_rtr.click(fn=retry, inputs=[txt, state, behavior], outputs=[txt, state, chatbot, downloadfile])
    button_clr.click(fn=lambda :([],[]), inputs=None, outputs=[chatbot, state])

#demo.queue(concurrency_count=3, max_size=10)
demo.launch()