import openai import tiktoken import datetime import time import json import os openai.api_key = os.getenv('API_KEY') openai.request_times = 0 def ask(question, history, behavior): openai.request_times += 1 print(f"request times {openai.request_times}: {datetime.datetime.now()}: {question}") try: messages = [ {"role":"system", "content":content} for content in behavior ] + [ {"role":"user" if i%2==0 else "assistant", "content":content} for i,content in enumerate(history + [question]) ] raw_length = num_tokens_from_messages(messages) messages=forget_long_term(messages) if len(messages)==0: response = f'Your query is too long and expensive: {raw_length}>2000 tokens' else: response = openai.ChatCompletion.create( model="gpt-3.5-turbo-0301", messages=messages, temperature=0.1, )["choices"][0]["message"]["content"] while response.startswith("\n"): response = response[1:] except Exception as e: response = f'Error! You may wait a few minutes and retry:\n{e}' history = history + [question, response] return history def num_tokens_from_messages(messages, model="gpt-3.5-turbo"): """Returns the number of tokens used by a list of messages.""" try: encoding = tiktoken.encoding_for_model(model) except KeyError: encoding = tiktoken.get_encoding("cl100k_base") if model == "gpt-3.5-turbo": # note: future models may deviate from this num_tokens = 0 for message in messages: num_tokens += 4 # every message follows {role/name}\n{content}\n for key, value in message.items(): num_tokens += len(encoding.encode(value)) if key == "name": # if there's a name, the role is omitted num_tokens += -1 # role is always required and always 1 token num_tokens += 2 # every reply is primed with assistant return num_tokens else: raise NotImplementedError(f"""num_tokens_from_messages() is not presently implemented for model {model}. See https://github.com/openai/openai-python/blob/main/chatml.md for information on how messages are converted to tokens.""") def forget_long_term(messages, max_num_tokens=3000): while num_tokens_from_messages(messages)>max_num_tokens: if messages[0]["role"]=="system" and not len(messages[0]["content"])>=max_num_tokens: messages = messages[:1] + messages[2:] else: messages = messages[1:] return messages import gradio as gr def to_md(content): is_inside_code_block = False output_spans = [] for i in range(len(content)): if content[i]=="\n" and not is_inside_code_block: if len(output_spans)>0 and output_spans[-1].endswith("```"): output_spans.append("\n") else: output_spans.append("
") elif content[i]=="`": output_spans.append(content[i]) if len(output_spans)>=3 and all([output_spans[j]=="`" for j in [-3,-2,-1]]): is_inside_code_block = not is_inside_code_block output_spans = output_spans[:-3] if is_inside_code_block: if len(output_spans)==0: output_spans.append("```") elif output_spans[-1]=="
": output_spans[-1] = "\n" output_spans.append("```") elif output_spans[-1].endswith("\n"): output_spans.append("```") else: output_spans.append("\n```") if i+1