File size: 7,866 Bytes
04bdc20
 
 
 
e3788ae
04bdc20
 
7fb1b6e
04bdc20
 
 
50df3a5
 
7fb1b6e
 
04bdc20
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e3788ae
04bdc20
 
fbd8943
04bdc20
e3788ae
04bdc20
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e3788ae
 
04bdc20
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e3788ae
04bdc20
 
 
 
 
 
 
21f1de2
 
 
04bdc20
 
21f1de2
04bdc20
b1d9002
04bdc20
 
 
21f1de2
 
 
04bdc20
 
21f1de2
e3788ae
04bdc20
 
 
 
 
 
21f1de2
 
 
04bdc20
21f1de2
04bdc20
 
 
 
 
 
 
 
 
 
 
 
 
 
21f1de2
04bdc20
 
 
 
 
 
 
 
 
 
 
 
 
21f1de2
04bdc20
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
import openai
import tiktoken

import collections
import threading
import datetime
import time
import pytz
import json
import os

openai.api_key = os.getenv('API_KEY')

timezone = pytz.timezone('Asia/Shanghai')
timestamp2string = lambda timestamp: datetime.datetime.fromtimestamp(timestamp).astimezone(timezone).strftime('%Y-%m-%d %H:%M:%S')

def num_tokens_from_messages(messages, model="gpt-3.5-turbo"):
    """Returns the number of tokens used by a list of messages."""
    try:
        encoding = tiktoken.encoding_for_model(model)
    except KeyError:
        encoding = tiktoken.get_encoding("cl100k_base")
    if model == "gpt-3.5-turbo":  # note: future models may deviate from this
        num_tokens = 0
        for message in messages:
            num_tokens += 4  # every message follows <im_start>{role/name}\n{content}<im_end>\n
            for key, value in message.items():
                num_tokens += len(encoding.encode(value))
                if key == "name":  # if there's a name, the role is omitted
                    num_tokens += -1  # role is always required and always 1 token
        num_tokens += 2  # every reply is primed with <im_start>assistant
        return num_tokens
    else:
        raise NotImplementedError(f"""num_tokens_from_messages() is not presently implemented for model {model}.
See https://github.com/openai/openai-python/blob/main/chatml.md for information on how messages are converted to tokens.""")


qas = []
qs = []
start_time = time.time()

def read_qs(qs, qas):
    directory = "./dialogues_set"
    filenames = [
        'dialogues_film.json', 
        'dialogues_jindong.json', 
        'dialogues_music.json', 
        'dialogues_natural.json', 
        'dialogues_taobao.json', 
        'dialogues_travel_kd.json'
    ]
    for filename in filenames:
        with open(f"{directory}/{filename}", "r", encoding="utf-8") as f:
            for idx,line in enumerate(f):
                idx2query = json.loads(line)
                query = idx2query[str(idx)]
                qs.append(query)
    print(f"read {len(qs)} queries from files")
    
    if os.path.exists("qas.json"):
        with open("qas.json", "r", encoding="utf-8") as f:
            qas = json.loads(f.read())
        print(f"read {len(qas)} query-responses from qas.json")
        
        existed_qs = collections.Counter([qa["q"] for qa in qas])
        remained_qs = []
        for q in qs:
            if existed_qs[q]>0:
                existed_qs[q] -= 1
            else:
                remained_qs.append(q)
        print(f"filter out {len(qs)-len(remained_qs)} with reference to qas.json")
        qs = remained_qs
    
    return qs


def ask(query, max_attempt_times=3):
    answer = None
    attempt_times = 0
    while answer is None and attempt_times<max_attempt_times:
        attempt_times += 1
        try:
            response = openai.ChatCompletion.create(
                model="gpt-3.5-turbo",
                messages=[
                    {"role": "user", "content": query}
                ]
            )["choices"][0]["message"]["content"]
        except Exception as e:
            print(e)
            print(f"retry in {attempt_times*10} seconds...")
            time.sleep(attempt_times*10)
    return answer


def askingChatGPT(qs, qas, min_interval_seconds=10):
    read_qs(qs, qas)
    
    for i, q in enumerate(qs):
        ask_start_time = time.time()
        
        a = ask(q)
        qas.append({"q":q, "a":a})
        
        ask_end_time = time.time()
        elapsed_time = ask_end_time - ask_start_time
        delayTime = min_interval_seconds - elapsed_time
        if delayTime>0:
            time.sleep(delayTime)
        
        print(f"{timestamp2string(time.time())}:  iterations:  {i} / {len(qs)} | elapsed time of this query (s):  {elapsed_time:.2f}")
    
    return


thread = threading.Thread(target=lambda :askingChatGPT(qs, qas))
thread.daemon = True
thread.start()


import gradio as gr


def showcase(access_key):
    if not access_key==os.getenv('access_key'):
        chatbot_ret = [(f"Your entered Access Key:<br>{access_key}<br>is incorrect.", f"So i cannot provide you any information in this private space.")]
    else:
        recent_qas = qas[:10]
        chatbot_ret = [(f"Your entered Access Key is correct.", f"The latest {len(recent_qas)} query-responses are displayed below.")]
        for qa in recent_qas:
            chatbot_ret += [(qa["q"].replace("\n","<br>"), qa["a"].replace("\n","<br>"))]
    return chatbot_ret


def download(access_key):
    if not access_key==os.getenv('access_key'):
        chatbot_ret = [(f"Your entered Access Key:<br>{access_key}<br>is incorrect.", f"So i cannot provide you any information in this private space.")]
        file_ret = gr.File.update(value=None, visible=False)
    else:
        chatbot_ret = [(f"Your entered Access Key is correct.", f"The file containing all processed query-responses ({len(qas)} in total) can be downloaded below.")]
        filename = f"qas-{len(qas)}.json"
        with open(filename, "w", encoding="utf-8") as f:
            f.write(json.dumps(qas, ensure_ascii=False, indent=2))
        file_ret = gr.File.update(value=filename, visible=True)
    return chatbot_ret, file_ret


def display(access_key):
    if not access_key==os.getenv('access_key'):
        chatbot_ret = [(f"Your entered Access Key:<br>{access_key}<br>is incorrect.", f"So i cannot provide you any information in this private space.")]
    elif len(qas)<1:
        chatbot_ret = [(f"Your entered Access Key is correct.", f"But the progress has just started for a while and has no useful progress information to provide.")]
    else:
        time_takes = time.time() - start_time
        time_remains = time_takes * (len(qs)-len(qas)) / len(qas)
        end_time = start_time + time_remains
        
        messages = []
        for qa in qas:
            messages.append({"role":"user", "content":qas["q"]})
            messages.append({"role":"assistant", "content":qas["a"]})
        num_tokens_processed = num_tokens_from_messages(messages)
        num_tokens_total = num_tokens_processed * len(qs) / len(qas)
        dollars_tokens_processed = 0.002 * int(num_tokens_processed/1000)
        dollars_tokens_total = 0.002 * int(num_tokens_total/1000)
        
        chatbot_ret = [(f"Your entered Access Key is correct.", f"The information of progress is displayed below.")]
        chatbot_ret += [(f"The number of processed / total queries:", f"{len(qas)} / {len(qs)}")]
        chatbot_ret += [(f"The hours already takes / est. remains:", f"{time_takes/3600:.2f} / {time_remains/3600:.2f}")]
        chatbot_ret += [(f"The time starts / est. ends:", f"{timestamp2string(start_time)} / {timestamp2string(end_time)}")]
        chatbot_ret += [(f"The number of processed / est. total tokens:", f"{num_tokens_processed} / {num_tokens_total}")]
        chatbot_ret += [(f"The dollars of processed / est. total tokens:", f"{dollars_tokens_processed} / {dollars_tokens_total}")]
        
    return chatbot_ret


with gr.Blocks() as demo:
    
    with gr.Column(variant="panel"):
        chatbot = gr.Chatbot()
        txt = gr.Textbox(show_label=False, placeholder="Enter your Access Key to access this private space").style(container=False)
        with gr.Row():
            button_showcase = gr.Button("Show Recent Query-Responses")
            button_download = gr.Button("Download All Query-Responses")
            button_display = gr.Button("Display Progress Infomation")
    
    downloadfile = gr.File(None, interactive=False, show_label=False, visible=False)
    
    button_showcase.click(fn=showcase, inputs=[txt], outputs=[chatbot])
    button_download.click(fn=download, inputs=[txt], outputs=[chatbot, downloadfile])
    button_display.click(fn=display, inputs=[txt], outputs=[chatbot])

demo.launch()