Spaces:
Runtime error
Runtime error
import gradio as gr | |
import ctransformers | |
configObj = ctransformers.Config(stop=["\n", 'User']) | |
config = ctransformers.AutoConfig(config=configObj, model_type='llama') | |
config.config.stop = ["\n"] | |
path_to_llm = os.path.abspath("llama-2-7b-chat.ggmlv3.q4_1.bin") | |
llm = ctransformers.AutoModelForCausalLM.from_pretrained(path_to_llm, config=config) | |
def complete(prompt, stop=["User", "Assistant"]): | |
tokens = llm.tokenize(prompt) | |
output = '' | |
for token in llm.generate(tokens): | |
result = llm.detokenize(token) | |
output += result | |
for word in stop: | |
if word in output: | |
print('\n') | |
return output | |
print(result, end='',flush=True) | |
print('\n') | |
return output | |
title = "llama2-7b-chat-ggml" | |
description = "This space is an attempt to run the GGML 4 bit quantized version of 'llama2-7b-chat' on a CPU" | |
example_1 = "Write a 7 line poem on AI" | |
example_2 = "Tell me a joke" | |
examples = [example_1, example_2] | |
UI = gr.Interface( | |
fn=generate_code, | |
inputs=gr.Textbox(label="user_prompt", placeholder="Ask your queries here...."), | |
outputs=gr.Textbox(label="Assistant"), | |
title=title, | |
description=description, | |
examples=examples | |
) | |
UI.launch() |