Spaces:
Sleeping
Sleeping
import gradio as gr | |
from llama_cpp import Llama | |
model_path = "mythologic-mini-7b.Q4_0.gguf" | |
model_path = "llama-2-7b-chat.Q4_0.gguf" | |
model_path = "tinyllama-1.1b-chat-v1.0.Q5_K_M.gguf" | |
llm = Llama(model_path, | |
n_ctx = 1024, # context window size | |
n_gpu_layers = -1, # enable GPU | |
use_mlock = False) # enable memory lock so not swap | |
#output = llm( | |
''' | |
Transcript of a dialog, where the User interacts with an Assistant named Bob. Bob is helpful, kind, honest, good at writing, and never fails to answer the User's requests immediately and with precision. | |
User: Hello, Bob. | |
Bob: Hello. How may I help you today? | |
User: Please tell me the largest city in Europe. | |
Bob: Sure. The largest city in Europe is Moscow, the capital of Russia. | |
User: Why is the sky blue? | |
''' | |
#) | |
def infer(question): | |
output = llm.create_chat_completion(messages=[{ | |
"role": "system", "content": "You are an Assistant named Bob. Bob is helpful, kind, honest, good at writing, and never fails to answer the User's requests immediately and with precision.", | |
"role":"user", | |
"content":"List 10 simple steps in building a website. Do not elaborate each step.:\nStep 1:"}], | |
) | |
print(output) | |
return output | |
gradio_interface = gr.Interface( | |
fn=infer, | |
inputs="text", | |
outputs="text", | |
cache_examples=False, | |
examples=[ | |
["Jill"], | |
["Sam"] | |
], | |
) | |
gradio_interface.launch() | |
#return output['choices'][0]['text'] | |