zhangjf's picture
Update app.py
1457adc
import openai
import tiktoken
import numpy as np
import concurrent
import collections
import threading
import datetime
import time
import pytz
import json
import os
openai.api_key = os.environ["api_key"]
timezone = pytz.timezone('Asia/Shanghai')
timestamp2string = lambda timestamp: datetime.datetime.fromtimestamp(timestamp).astimezone(timezone).strftime('%Y-%m-%d %H:%M:%S')
def num_tokens_from_messages(messages, model="gpt-3.5-turbo"):
"""Returns the number of tokens used by a list of messages."""
try:
encoding = tiktoken.encoding_for_model(model)
except KeyError:
encoding = tiktoken.get_encoding("cl100k_base")
if model == "gpt-3.5-turbo": # note: future models may deviate from this
num_tokens = 0
len_values = 0
for message in messages:
num_tokens += 4 # every message follows <im_start>{role/name}\n{content}<im_end>\n
for key, value in message.items():
try:
num_tokens += len(encoding.encode(value))
except:
num_tokens += int(num_tokens/len_values*len(value)) # linear estimation
len_values += len(value)
if key == "name": # if there's a name, the role is omitted
num_tokens += -1 # role is always required and always 1 token
num_tokens += 2 # every reply is primed with <im_start>assistant
return num_tokens
else:
raise NotImplementedError(f"""num_tokens_from_messages() is not presently implemented for model {model}.
See https://github.com/openai/openai-python/blob/main/chatml.md for information on how messages are converted to tokens.""")
def read_tasks(cache_file=None):
#from make_qas import input_dir
#from make_qas_comparison import input_dir
input_dir = "."
file = f"{input_dir}/qas.json"
with open(file, "r", encoding="utf-8") as f:
qas = json.loads(f.read())
if cache_file is not None:
with open(cache_file, "r", encoding="utf-8") as f:
cache_qas = json.loads(f.read())
cache_q2a = {qa["q"]:qa["a"] for qa in cache_qas}
else:
cache_q2a = {}
qs = [qa["q"] for qa in qas if qa["a"] is None and qa["q"] not in cache_q2a] # 还未请求处理的queries
qas = [{"q":qa["q"], "a":qa["a"] if qa["a"] is not None else cache_q2a[qa["q"]]}
for qa in qas if qa["a"] is not None or qa["q"] in cache_q2a] # 已经完成请求处理的queries
print(f"read {len(qs)} queries without responses from {file} or {cache_file}")
print(f"read {len(qas)} queries with responses from {file} or {cache_file}")
return qs, qas
qs, qas = read_tasks()
start_time = time.time()
num_read_qas = len(qas)
def ask(query, timeout=600):
answer = None
dead_time = time.time() + timeout
attempt_times = 0
while answer is None and time.time()<dead_time and attempt_times<10:
try:
messages=[
{"role": "user", "content": query}
]
if num_tokens_from_messages(messages)>4096:
return None
answer = openai.ChatCompletion.create(
model="gpt-3.5-turbo-0301",
messages=messages,
temperature=0.1,
)["choices"][0]["message"]["content"]
except Exception as e:
if time.time()<dead_time:
print(e)
if "Please reduce the length of the messages." in str(e):
return None
else:
attempt_times += 1
wait_time = int(attempt_times*10)
time.sleep(wait_time)
print(f"retry in {attempt_times*10} seconds...")
return answer
def askingChatGPT(qs, qas, min_interval_seconds=3, max_interval_seconds=15, max_retry_times=3):
history_elapsed_time = [max_interval_seconds]*10
for i, q in enumerate(qs):
ask_start_time = time.time()
# 最直接的方法,调用ask函数,但可能因为超时等原因阻塞住
#a = ask(q)
# 下面是我之前设计的一系列,超时->重试,的方法
def ask_(q, timeout):
executor = concurrent.futures.ThreadPoolExecutor()
future = executor.submit(ask, q, timeout) # 提交函数调用任务
try:
a = future.result(timeout=timeout) # 等待函数调用任务完成,超时时间为30秒
return a
except concurrent.futures.TimeoutError:
print(f"ask call timed out after {timeout:.2f} seconds, retrying...")
executor.shutdown(wait=False)
return ask_(q, timeout*2) # 当超时时,重新调用函数
retry_times = 0
a = None
while a is None and retry_times<max_retry_times:
a = ask_(q, timeout=max(max_interval_seconds,np.mean(sorted(history_elapsed_time)[:8])))
retry_times += 1
qas.append({"q":q, "a":a})
ask_end_time = time.time()
elapsed_time = ask_end_time - ask_start_time
history_elapsed_time = history_elapsed_time[1:] + [elapsed_time]
delayTime = min_interval_seconds - elapsed_time
if delayTime>0:
time.sleep(delayTime)
print(f"{timestamp2string(time.time())}: iterations: {i+1} / {len(qs)} | elapsed time of this query (s): {elapsed_time:.2f}")
return
thread = threading.Thread(target=lambda :askingChatGPT(qs, qas))
thread.daemon = True
thread.start()
import gradio as gr
def showcase(access_key):
if not access_key==os.getenv('access_key'):
chatbot_ret = [(f"Your entered Access Key:<br>{access_key}<br>is incorrect.", f"So i cannot provide you any information in this private space.")]
else:
recent_qas = qas[-10:]
chatbot_ret = [(f"Your entered Access Key is correct.", f"The latest {len(recent_qas)} query-responses are displayed below.")]
for qa in recent_qas:
chatbot_ret += [(qa["q"].replace("\n","<br>"), str(qa["a"]).replace("\n","<br>"))]
return chatbot_ret
def download(access_key):
if not access_key.startswith(os.getenv('access_key')):
chatbot_ret = [(f"Your entered Access Key:<br>{access_key}<br>is incorrect.", f"So i cannot provide you any information in this private space.")]
file_ret = gr.File.update(value=None, visible=False)
else:
chatbot_ret = [(f"Your entered Access Key is correct.", f"The file containing all processed query-responses ({len(qas)} in total) can be downloaded below.")]
#from make_qas import input_dir
input_dir = "."
filename = f"{input_dir}/qas-{len(qas)}.json"
with open(filename, "w", encoding="utf-8") as f:
f.write(json.dumps(qas, ensure_ascii=False, indent=4))
file_ret = gr.File.update(value=filename, visible=True)
return chatbot_ret, file_ret
def display(access_key):
if not access_key==os.getenv('access_key'):
chatbot_ret = [(f"Your entered Access Key:<br>{access_key}<br>is incorrect.", f"So i cannot provide you any information in this private space.")]
elif len(qas)-num_read_qas<1:
chatbot_ret = [(f"Your entered Access Key is correct.", f"But the progress has just started for a while and has no useful progress information to provide.")]
else:
num_total_qs, num_processed_qs = len(qs), len(qas) - num_read_qas
time_takes = time.time() - start_time
time_remains = time_takes * (num_total_qs-num_processed_qs) / num_processed_qs
end_time = start_time + time_takes + time_remains
messages = []
for qa in qas:
messages.append({"role":"user", "content":qa["q"]})
messages.append({"role":"assistant", "content":qa["a"] or ""})
num_tokens_processed = num_tokens_from_messages(messages)
num_tokens_total = int(num_tokens_processed * (num_total_qs+num_read_qas) / (num_processed_qs+num_read_qas))
dollars_tokens_processed = 0.002 * int(num_tokens_processed/1000)
dollars_tokens_total = 0.002 * int(num_tokens_total/1000)
chatbot_ret = [(f"Your entered Access Key is correct.", f"The information of progress is displayed below.")]
chatbot_ret += [(f"The number of processed / total queries:", f"{num_processed_qs} / {num_total_qs} (+{num_read_qas})")]
chatbot_ret += [(f"The hours already takes / est. remains:", f"{time_takes/3600:.2f} / {time_remains/3600:.2f}")]
chatbot_ret += [(f"The time starts / est. ends:", f"{timestamp2string(start_time)} / {timestamp2string(end_time)}")]
chatbot_ret += [(f"The number of processed / est. total tokens:", f"{num_tokens_processed} / {num_tokens_total}")]
chatbot_ret += [(f"The dollars of processed / est. total tokens:", f"{dollars_tokens_processed:.2f} / {dollars_tokens_total:.2f}")]
return chatbot_ret
with gr.Blocks() as demo:
gr.Markdown(
"""
Hello friends,
Thanks for your attention on this space. But this space is for my own use, i.e., building a dataset with answers from ChatGPT, and the access key for runtime feedback is only shared to my colleagues.
If you want to ask ChatGPT on Huggingface just as the title says, you can try this [one](https://huggingface.co/spaces/zhangjf/chatbot) I built for public.
"""
)
with gr.Column(variant="panel"):
chatbot = gr.Chatbot()
txt = gr.Textbox(show_label=False, container=False,
placeholder="Enter your Access Key to access this private space")
with gr.Row():
button_showcase = gr.Button("Show Recent Query-Responses")
button_download = gr.Button("Download All Query-Responses")
button_display = gr.Button("Display Progress Infomation")
downloadfile = gr.File(None, interactive=False, show_label=False, visible=False)
button_showcase.click(fn=showcase, inputs=[txt], outputs=[chatbot])
button_download.click(fn=download, inputs=[txt], outputs=[chatbot, downloadfile])
button_display.click(fn=display, inputs=[txt], outputs=[chatbot])
demo.launch()