Spaces:
Sleeping
Sleeping
File size: 1,478 Bytes
c6b02f1 f1c7342 c6b02f1 f1c7342 f2903e0 f1c7342 aa24695 f1c7342 c6b02f1 f1c7342 edd3db4 f1c7342 edd3db4 c6372d0 f1c7342 edd3db4 7ca3928 edd3db4 f1c7342 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 |
import gradio as gr
from llama_cpp import Llama
model_path = "mythologic-mini-7b.Q4_0.gguf"
model_path = "llama-2-7b-chat.Q4_0.gguf"
model_path = "tinyllama-1.1b-chat-v1.0.Q5_K_M.gguf"
llm = Llama(model_path,
n_ctx = 1024, # context window size
n_gpu_layers = -1, # enable GPU
use_mlock = False) # enable memory lock so not swap
#output = llm(
'''
Transcript of a dialog, where the User interacts with an Assistant named Bob. Bob is helpful, kind, honest, good at writing, and never fails to answer the User's requests immediately and with precision.
User: Hello, Bob.
Bob: Hello. How may I help you today?
User: Please tell me the largest city in Europe.
Bob: Sure. The largest city in Europe is Moscow, the capital of Russia.
User: Why is the sky blue?
'''
#)
def infer(question):
output = llm.create_chat_completion(messages=[{
"role": "system", "content": "You are an Assistant named Bob. Bob is helpful, kind, honest, good at writing, and never fails to answer the User's requests immediately and with precision.",
"role":"user",
"content":"List 10 simple steps in building a website. Do not elaborate each step.:\nStep 1:"}],
)
print(output)
return output
gradio_interface = gr.Interface(
fn=infer,
inputs="text",
outputs="text",
cache_examples=False,
examples=[
["Jill"],
["Sam"]
],
)
gradio_interface.launch()
#return output['choices'][0]['text']
|