BaseChat / app.py
yuchenlin's picture
urial chat initial version
0231e6a
raw
history blame
4.66 kB
import gradio as gr
from openai import OpenAI
import os
from typing import List
import logging
# add logging info to console
logging.basicConfig(level=logging.INFO)
BASE_URL = "https://api.together.xyz/v1"
DEFAULT_API_KEY = os.getenv("TOGETHER_API_KEY")
import urllib.request
URIAL_VERSION = "inst_1k_v4.help"
urial_url = f"https://raw.githubusercontent.com/Re-Align/URIAL/main/urial_prompts/{URIAL_VERSION}.txt"
urial_prompt = urllib.request.urlopen(urial_url).read().decode('utf-8')
urial_prompt = urial_prompt.replace("```", '"""')
stop_str = ['"""', '# Query:', '# Answer:']
def urial_template(urial_prompt, history, message):
current_prompt = urial_prompt + "\n"
for user_msg, ai_msg in history:
current_prompt += f'# Query:\n"""\n{user_msg}\n"""\n\n# Answer:\n"""\n{ai_msg}\n"""\n\n'
current_prompt += f'# Query:\n"""\n{message}\n"""\n\n# Answer:\n"""\n'
return current_prompt
def openai_base_request(
model: str=None,
temperature: float=0,
max_tokens: int=512,
top_p: float=1.0,
prompt: str=None,
n: int=1,
repetition_penalty: float=1.0,
stop: List[str]=None,
api_key: str=None,
):
if api_key is None:
api_key = DEFAULT_API_KEY
client = OpenAI(api_key=api_key, base_url=BASE_URL)
# print(f"Requesting chat completion from OpenAI API with model {model}")
logging.info(f"Requesting chat completion from OpenAI API with model {model}")
logging.info(f"Prompt: {prompt}")
logging.info(f"Temperature: {temperature}")
logging.info(f"Max tokens: {max_tokens}")
logging.info(f"Top-p: {top_p}")
logging.info(f"Repetition penalty: {repetition_penalty}")
logging.info(f"Stop: {stop}")
request = client.completions.create(
model=model,
prompt=prompt,
temperature=float(temperature),
max_tokens=int(max_tokens),
top_p=float(top_p),
n=n,
extra_body={'repetition_penalty': float(repetition_penalty)},
stop=stop,
stream=True
)
return request
def respond(
message,
history: list[tuple[str, str]],
max_tokens,
temperature,
top_p,
rp,
model_name,
together_api_key
):
global stop_str, urial_prompt
rp = 1.0
prompt = urial_template(urial_prompt, history, message)
if model_name == "Llama-3-8B":
_model_name = "meta-llama/Llama-3-8b-hf"
elif model_name == "Llama-3-70B":
_model_name = "meta-llama/Llama-3-70b-hf"
else:
raise ValueError("Invalid model name")
# _model_name = "meta-llama/Llama-3-8b-hf"
if together_api_key and len(together_api_key) == 64:
api_key = together_api_key
else:
api_key = DEFAULT_API_KEY
request = openai_base_request(prompt=prompt, model=_model_name,
temperature=temperature,
max_tokens=max_tokens,
top_p=top_p,
repetition_penalty=rp,
stop=stop_str, api_key=api_key)
response = ""
for msg in request:
# print(msg.choices[0].delta.keys())
token = msg.choices[0].delta["content"]
response += token
should_stop = False
for _stop in stop_str:
if _stop in response:
should_stop = True
break
if should_stop:
break
yield response
with gr.Blocks() as demo:
with gr.Row():
with gr.Column():
gr.Label("Welcome to the URIAL Chatbot!")
model_name = gr.Radio(["Llama-3-8B", "Llama-3-70B"], value="Llama-3-8B", label="Base model name")
together_api_key = gr.Textbox(label="Together API Key", placeholder="Enter your Together API Key. Leave it blank if you want to use the default API key.", type="password")
with gr.Column():
with gr.Column():
with gr.Row():
max_tokens = gr.Textbox(value=1024, label="Max tokens")
temperature = gr.Textbox(value=0.5, label="Temperature")
with gr.Column():
with gr.Row():
top_p = gr.Textbox(value=0.9, label="Top-p")
rp = gr.Textbox(value=1.1, label="Repetition penalty")
chat = gr.ChatInterface(
respond,
additional_inputs=[max_tokens, temperature, top_p, rp, model_name, together_api_key],
# additional_inputs_accordion="⚙️ Parameters",
# fill_height=True,
)
chat.chatbot.height = 600
if __name__ == "__main__":
demo.launch()