Spaces:

DevQuasar
/

llama3_on_sbc

Sleeping

App Files Files Community

llama3_on_sbc / app.py

csabakecskemeti

Update app.py

0d516fd verified about 2 months ago

raw

history blame contribute delete

No virus

3.68 kB

	import gradio as gr
	import os
	import requests
	import json

	sbc_host_url = os.environ['URL']

	# def get_completion(prompt:str, messages:str = '', n_predict=128):
	# system = "### System: You are a helpful assistant helps to brainstorm ideas.\n"
	# prompt_templated = f'{system} {messages}\n ### HUMAN:\n{prompt} \n ### ASSISTANT:'

	# headers = {
	# "Content-Type": "application/json"
	# }
	# data = {
	# "prompt": prompt_templated,
	# "n_predict": n_predict,
	# "stop": ["### HUMAN:", "### ASSISTANT:", "HUMAN"],
	# "stream": "True"
	# }
	# try:
	# response = requests.post(sbc_host_url, headers=headers, data=json.dumps(data))

	# if response.status_code == 200:
	# return response.json()['content']
	# else:
	# response.raise_for_status()
	# except:
	# raise gr.Warning("Apologies for the inconvenience! Our model is currently self-hosted and unavailable at the moment.")


	# def chatty(prompt, messages):
	# # print(prompt)
	# # print(f'messages: {messages}')
	# past_messages = ''
	# if len(messages) > 0:
	# for idx, message in enumerate(messages):
	# print(f'idx: {idx}, message: {message}')
	# past_messages += f'\n### HUMAN: {message[0]}'
	# past_messages += f'\n### ASSISTANT: {message[1]}'


	# # past_messages = messages[0][0]
	# # print(f'past_messages: {past_messages}')
	# messages = get_completion(prompt, past_messages)
	# return messages.split('### ASSISTANT:')[-1]

	# stream
	def chatty(prompt, messages, n_predict=128):
	# print(prompt)
	# print(f'messages: {messages}')
	past_messages = ''
	if len(messages) > 0:
	for idx, message in enumerate(messages):
	# print(f'idx: {idx}, message: {message}')
	past_messages += f'\n### HUMAN: {message[0]}'
	past_messages += f'\n### ASSISTANT: {message[1]}'

	system = "### System: You help to brainstorm ideas.\n"
	prompt_templated = f'{system} {messages}\n ### HUMAN:\n{prompt} \n ### ASSISTANT:'

	headers = {
	"Content-Type": "application/json"
	}
	data = {
	"prompt": prompt_templated,
	"n_predict": n_predict,
	"stop": ["### HUMAN:", "### ASSISTANT:", "HUMAN"],
	"stream": True
	}

	result = ""
	try:
	response = requests.post(sbc_host_url, headers=headers, data=json.dumps(data), stream=True)

	if response.status_code == 200:
	for line in response.iter_lines():
	if line:
	try:
	result += json.loads(line.decode('utf-8').replace('data: ', ''))['content']
	except:
	# LMStudio response has empty token
	pass
	yield result
	else:
	response.raise_for_status()
	except requests.exceptions.RequestException as e:
	raise gr.Warning("Apologies for the inconvenience! Our model is currently self-hosted and unavailable at the moment.")


	with gr.Blocks() as demo:
	gr.Image("sbc.jpg")
	gr.ChatInterface(
	fn=chatty,
	title="DevQuasar/llama3_8b_chat_brainstorm-GGUF on Orange Pi5 plus with llama.cpp",
	description="Brainstorm facilitates idea exploration through interaction with a Language Model (LLM). Rather than providing direct answers, the model engages in a dialogue with users, offering probing questions aimed at fostering deeper contemplation and consideration of various facets of their ideas."
	)


	if __name__ == "__main__":
	demo.launch()