Spaces:
Runtime error
Runtime error
File size: 1,579 Bytes
123c1d1 9ca25de 082349c 9ca25de 082349c 123c1d1 48267a7 123c1d1 082349c 9ca25de 082349c 123c1d1 082349c 9ca25de e2213a2 9ca25de 082349c 9ca25de 082349c 9ca25de |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 |
!CMAKE_ARGS="-DLLAMA_CUBLAS=on" FORCE_CMAKE=1 pip install --upgrade --force-reinstall llama-cpp-python --no-cache-dir
from threading import Thread
from llama_cpp import Llama
import torch
import gradio as gr
import re
torch_device = "cuda" if torch.cuda.is_available() else "cpu"
print("Running on device:", torch_device)
print("CPU threads:", torch.get_num_threads())
llm = Llama(model_path = 'Llama-2-ko-7B-chat-gguf-q4_0.bin',
n_ctx=50,
n_threads = 8,
n_batch = 5
)
def gen(x, max_new_tokens):
output = llm(f"### 명령어:\n{x}\n\n### 응답:", max_tokens=max_new_tokens, stop=["###", "\n", ":"], echo=True)
return output['choices'][0]['text'].replace('▁',' ')
def reset_textbox():
return gr.update(value='')
with gr.Blocks() as demo:
gr.Markdown(
"duplicated from beomi/KoRWKV-1.5B, baseModel:Llama-2-ko-7B-chat-gguf-q4_0"
)
with gr.Row():
with gr.Column(scale=4):
user_text = gr.Textbox(
placeholder='우리 여행 갈래?',
label="User input"
)
model_output = gr.Textbox(label="Model output", lines=10, interactive=False)
button_submit = gr.Button(value="Submit")
with gr.Column(scale=1):
max_new_tokens = gr.Slider(
minimum=1, maximum=200, value=20, step=1, interactive=True, label="Max New Tokens",
)
button_submit.click(gen, [user_text, max_new_tokens], model_output)
demo.queue(max_size=32).launch(enable_queue=True) |