Spaces:

virtualina
/

test

Sleeping

test / app.py

Update app.py

aa24695 verified over 1 year ago

1.48 kB

	import gradio as gr
	from llama_cpp import Llama

	model_path = "mythologic-mini-7b.Q4_0.gguf"
	model_path = "llama-2-7b-chat.Q4_0.gguf"
	model_path = "tinyllama-1.1b-chat-v1.0.Q5_K_M.gguf"
	llm = Llama(model_path,
	n_ctx = 1024, # context window size
	n_gpu_layers = -1, # enable GPU
	use_mlock = False) # enable memory lock so not swap

	#output = llm(
	'''
	Transcript of a dialog, where the User interacts with an Assistant named Bob. Bob is helpful, kind, honest, good at writing, and never fails to answer the User's requests immediately and with precision.
	User: Hello, Bob.
	Bob: Hello. How may I help you today?
	User: Please tell me the largest city in Europe.
	Bob: Sure. The largest city in Europe is Moscow, the capital of Russia.
	User: Why is the sky blue?
	'''
	#)


	def infer(question):
	output = llm.create_chat_completion(messages=[{
	"role": "system", "content": "You are an Assistant named Bob. Bob is helpful, kind, honest, good at writing, and never fails to answer the User's requests immediately and with precision.",
	"role":"user",
	"content":"List 10 simple steps in building a website. Do not elaborate each step.:\nStep 1:"}],
	)
	print(output)
	return output

	gradio_interface = gr.Interface(
	fn=infer,
	inputs="text",
	outputs="text",
	cache_examples=False,
	examples=[
	["Jill"],
	["Sam"]
	],
	)
	gradio_interface.launch()
	#return output['choices'][0]['text']