Spaces:

virtualina
/

test

Sleeping

File size: 1,478 Bytes

c6b02f1
f1c7342
c6b02f1
f1c7342
 
f2903e0
f1c7342
 
aa24695
f1c7342
c6b02f1
f1c7342
 
 
 
 
 
 
 
 
 
edd3db4
 
f1c7342
 
 
 
 
 
 
 
edd3db4
c6372d0
f1c7342
edd3db4
 
7ca3928
edd3db4
 
 
 
 
 
f1c7342

import gradio as gr
from llama_cpp import Llama

model_path = "mythologic-mini-7b.Q4_0.gguf"
model_path = "llama-2-7b-chat.Q4_0.gguf"
model_path = "tinyllama-1.1b-chat-v1.0.Q5_K_M.gguf"
llm = Llama(model_path,
              n_ctx = 1024,            # context window size
              n_gpu_layers = -1,        # enable GPU
              use_mlock = False)        # enable memory lock so not swap

#output = llm(
'''
Transcript of a dialog, where the User interacts with an Assistant named Bob. Bob is helpful, kind, honest, good at writing, and never fails to answer the User's requests immediately and with precision.
User: Hello, Bob.
Bob: Hello. How may I help you today?
User: Please tell me the largest city in Europe.
Bob: Sure. The largest city in Europe is Moscow, the capital of Russia.
User: Why is the sky blue?
'''
#)


def infer(question):
    output = llm.create_chat_completion(messages=[{
    "role": "system", "content": "You are an Assistant named Bob. Bob is helpful, kind, honest, good at writing, and never fails to answer the User's requests immediately and with precision.",
    "role":"user",
    "content":"List 10 simple steps in building a website. Do not elaborate each step.:\nStep 1:"}],
    )   
    print(output)
    return output

gradio_interface = gr.Interface(
  fn=infer,
  inputs="text",
  outputs="text",
  cache_examples=False,
  examples=[
    ["Jill"],
    ["Sam"]
  ],
)
gradio_interface.launch()
#return output['choices'][0]['text']