test / app.py
virtualina's picture
Update app.py
aa24695 verified
import gradio as gr
from llama_cpp import Llama
model_path = "mythologic-mini-7b.Q4_0.gguf"
model_path = "llama-2-7b-chat.Q4_0.gguf"
model_path = "tinyllama-1.1b-chat-v1.0.Q5_K_M.gguf"
llm = Llama(model_path,
n_ctx = 1024, # context window size
n_gpu_layers = -1, # enable GPU
use_mlock = False) # enable memory lock so not swap
#output = llm(
'''
Transcript of a dialog, where the User interacts with an Assistant named Bob. Bob is helpful, kind, honest, good at writing, and never fails to answer the User's requests immediately and with precision.
User: Hello, Bob.
Bob: Hello. How may I help you today?
User: Please tell me the largest city in Europe.
Bob: Sure. The largest city in Europe is Moscow, the capital of Russia.
User: Why is the sky blue?
'''
#)
def infer(question):
output = llm.create_chat_completion(messages=[{
"role": "system", "content": "You are an Assistant named Bob. Bob is helpful, kind, honest, good at writing, and never fails to answer the User's requests immediately and with precision.",
"role":"user",
"content":"List 10 simple steps in building a website. Do not elaborate each step.:\nStep 1:"}],
)
print(output)
return output
gradio_interface = gr.Interface(
fn=infer,
inputs="text",
outputs="text",
cache_examples=False,
examples=[
["Jill"],
["Sam"]
],
)
gradio_interface.launch()
#return output['choices'][0]['text']