Spaces:
Runtime error
Runtime error
import llama_cpp | |
import llama_cpp.llama_tokenizer | |
import gradio as gr | |
llama = llama_cpp.Llama.from_pretrained( | |
repo_id="Qwen/Qwen1.5-0.5B-Chat-GGUF", | |
filename="*q8_0.gguf", | |
tokenizer=llama_cpp.llama_tokenizer.LlamaHFTokenizer.from_pretrained("Qwen/Qwen1.5-0.5B"), | |
verbose=False | |
) | |
model = "gpt-3.5-turbo" | |
def predict(message, history): | |
messages = [] | |
for user_message, assistant_message in history: | |
messages.append({"role": "user", "content": user_message}) | |
messages.append({"role": "assistant", "content": assistant_message}) | |
messages.append({"role": "user", "content": message}) | |
response = llama.create_chat_completion_openai_v1( | |
model=model, | |
messages=messages, | |
stream=True | |
) | |
text = "" | |
for chunk in response: | |
content = chunk.choices[0].delta.content | |
if content: | |
text += content | |
yield text | |
js = """function () { | |
gradioURL = window.location.href | |
if (!gradioURL.endsWith('?__theme=dark')) { | |
window.location.replace(gradioURL + '?__theme=dark'); | |
} | |
}""" | |
css = """ | |
footer { | |
visibility: hidden; | |
} | |
full-height { | |
height: 100%; | |
} | |
""" | |
with gr.Blocks(theme=gr.themes.Soft(), js=js, css=css, fill_height=True) as demo: | |
gr.ChatInterface(predict, fill_height=True, examples=["What is the capital of France?", "Who was the first person on the moon?"]) | |
if __name__ == "__main__": | |
demo.launch() | |