llama3_on_sbc / app.py
csabakecskemeti's picture
Update app.py
0d516fd verified
raw
history blame contribute delete
No virus
3.68 kB
import gradio as gr
import os
import requests
import json
sbc_host_url = os.environ['URL']
# def get_completion(prompt:str, messages:str = '', n_predict=128):
# system = "### System: You are a helpful assistant helps to brainstorm ideas.\n"
# prompt_templated = f'{system} {messages}\n ### HUMAN:\n{prompt} \n ### ASSISTANT:'
# headers = {
# "Content-Type": "application/json"
# }
# data = {
# "prompt": prompt_templated,
# "n_predict": n_predict,
# "stop": ["### HUMAN:", "### ASSISTANT:", "HUMAN"],
# "stream": "True"
# }
# try:
# response = requests.post(sbc_host_url, headers=headers, data=json.dumps(data))
# if response.status_code == 200:
# return response.json()['content']
# else:
# response.raise_for_status()
# except:
# raise gr.Warning("Apologies for the inconvenience! Our model is currently self-hosted and unavailable at the moment.")
# def chatty(prompt, messages):
# # print(prompt)
# # print(f'messages: {messages}')
# past_messages = ''
# if len(messages) > 0:
# for idx, message in enumerate(messages):
# print(f'idx: {idx}, message: {message}')
# past_messages += f'\n### HUMAN: {message[0]}'
# past_messages += f'\n### ASSISTANT: {message[1]}'
# # past_messages = messages[0][0]
# # print(f'past_messages: {past_messages}')
# messages = get_completion(prompt, past_messages)
# return messages.split('### ASSISTANT:')[-1]
# stream
def chatty(prompt, messages, n_predict=128):
# print(prompt)
# print(f'messages: {messages}')
past_messages = ''
if len(messages) > 0:
for idx, message in enumerate(messages):
# print(f'idx: {idx}, message: {message}')
past_messages += f'\n### HUMAN: {message[0]}'
past_messages += f'\n### ASSISTANT: {message[1]}'
system = "### System: You help to brainstorm ideas.\n"
prompt_templated = f'{system} {messages}\n ### HUMAN:\n{prompt} \n ### ASSISTANT:'
headers = {
"Content-Type": "application/json"
}
data = {
"prompt": prompt_templated,
"n_predict": n_predict,
"stop": ["### HUMAN:", "### ASSISTANT:", "HUMAN"],
"stream": True
}
result = ""
try:
response = requests.post(sbc_host_url, headers=headers, data=json.dumps(data), stream=True)
if response.status_code == 200:
for line in response.iter_lines():
if line:
try:
result += json.loads(line.decode('utf-8').replace('data: ', ''))['content']
except:
# LMStudio response has empty token
pass
yield result
else:
response.raise_for_status()
except requests.exceptions.RequestException as e:
raise gr.Warning("Apologies for the inconvenience! Our model is currently self-hosted and unavailable at the moment.")
with gr.Blocks() as demo:
gr.Image("sbc.jpg")
gr.ChatInterface(
fn=chatty,
title="DevQuasar/llama3_8b_chat_brainstorm-GGUF on Orange Pi5 plus with llama.cpp",
description="Brainstorm facilitates idea exploration through interaction with a Language Model (LLM). Rather than providing direct answers, the model engages in a dialogue with users, offering probing questions aimed at fostering deeper contemplation and consideration of various facets of their ideas."
)
if __name__ == "__main__":
demo.launch()