Spaces:
Sleeping
Sleeping
import os | |
from openai import AsyncOpenAI | |
import gradio as gr | |
default_model = "llama3:8b-instruct-q4_K_M" | |
models = ["llama3:8b-instruct-q4_K_M", "codestral:22b-v0.1-q4_K_M"] | |
description = "Learn more at https://replicantzk.com." | |
base_url = os.getenv("OPENAI_BASE_URL") or "https://platform.replicantzk.com" | |
api_key = os.getenv("OPENAI_API_KEY") | |
async def predict(message, history, model, temperature, stream, base_url, api_key): | |
client = AsyncOpenAI(base_url=base_url, api_key=api_key) | |
history_openai_format = [] | |
for human, assistant in history: | |
history_openai_format.append({"role": "user", "content": human}) | |
history_openai_format.append({"role": "assistant", "content": assistant}) | |
history_openai_format.append({"role": "user", "content": message}) | |
try: | |
response = await client.chat.completions.create( | |
model=model, | |
messages=history_openai_format, | |
temperature=temperature, | |
stream=stream, | |
) | |
if stream: | |
partial_message = "" | |
async for chunk in response: | |
if chunk.choices[0].delta.content is not None: | |
partial_message += chunk.choices[0].delta.content | |
yield partial_message | |
else: | |
yield response.choices[0].message.content | |
except Exception as e: | |
raise gr.Error(str(e)) | |
model = gr.Dropdown(label="Model", choices=models, value=default_model) | |
temperature = gr.Slider(0, 1, value=0, label="Temperature") | |
stream = gr.Checkbox(value=True, label="Stream") | |
base_url = gr.Textbox(label="OpenAI-compatible base URL", value=base_url) | |
api_key = gr.Textbox(label="OpenAI-compatible API key", type="password", value=api_key) | |
demo = gr.ChatInterface( | |
fn=predict, | |
additional_inputs=[model, temperature, stream, base_url, api_key], | |
description=description, | |
) | |
if __name__ == "__main__": | |
demo.launch() | |