Spaces:
Runtime error
Runtime error
import gradio as gr | |
import copy | |
from llama_cpp import Llama | |
from huggingface_hub import hf_hub_download # load from huggingfaces | |
CONST_REPO_ID = "TheBloke/Llama-2-7B-Chat-GGML" | |
CONST_FILENAME = "llama-2-7b-chat.ggmlv3.q6_K.bin" | |
N_CTX = 4096 | |
llm = Llama(model_path=hf_hub_download( | |
repo_id=CONST_REPO_ID, | |
filename=CONST_FILENAME), | |
n_ctx=N_CTX | |
) | |
history = N_CTX | |
pre_prompt = \ | |
" The user and the AI are having a conversation : <|endoftext|> \n" | |
def generate_text(input_text, history): | |
temp = "" | |
if history == []: | |
input_text_with_history = f"SYSTEM:{pre_prompt}" + \ | |
"\n" + f"USER: {input_text} " + "\n" + " ASSISTANT:" | |
else: | |
input_text_with_history = f"{history[-1][1]}" + "\n" | |
input_text_with_history += f"USER: {input_text}" + "\n" + " ASSISTANT:" | |
output = llm(input_text_with_history, max_tokens=4096, stop=[ | |
"<|prompter|>", "<|endoftext|>", "<|endoftext|> \n", | |
"ASSISTANT:", "USER:", "SYSTEM:"], stream=True | |
) | |
for out in output: | |
stream = copy.deepcopy(out) | |
temp += stream["choices"][0]["text"] | |
yield temp | |
history = ["init", input_text_with_history] | |
demo = gr.ChatInterface(generate_text, | |
title=f"Lama2 on CPU: {CONST_FILENAME}", | |
description=f"Running Llama2 with llama_cpp: \ | |
\r\n<i>{CONST_REPO_ID} {CONST_FILENAME}</i>", | |
examples=["Hi!", | |
"Does it hard to be machine?", | |
"When i am need a doctor?", | |
"Ты говоришь по русски? Я злой." | |
], | |
cache_examples=True, | |
undo_btn="Undo", | |
clear_btn="Clear") | |
demo.queue(concurrency_count=10, max_size=50) | |
demo.launch() | |