Spaces:
Sleeping
Sleeping
import gradio as gr | |
import os | |
import requests | |
import json | |
sbc_host_url = os.environ['URL'] | |
# def get_completion(prompt:str, messages:str = '', n_predict=128): | |
# system = "### System: You are a helpful assistant helps to brainstorm ideas.\n" | |
# prompt_templated = f'{system} {messages}\n ### HUMAN:\n{prompt} \n ### ASSISTANT:' | |
# headers = { | |
# "Content-Type": "application/json" | |
# } | |
# data = { | |
# "prompt": prompt_templated, | |
# "n_predict": n_predict, | |
# "stop": ["### HUMAN:", "### ASSISTANT:", "HUMAN"], | |
# "stream": "True" | |
# } | |
# try: | |
# response = requests.post(sbc_host_url, headers=headers, data=json.dumps(data)) | |
# if response.status_code == 200: | |
# return response.json()['content'] | |
# else: | |
# response.raise_for_status() | |
# except: | |
# raise gr.Warning("Apologies for the inconvenience! Our model is currently self-hosted and unavailable at the moment.") | |
# def chatty(prompt, messages): | |
# # print(prompt) | |
# # print(f'messages: {messages}') | |
# past_messages = '' | |
# if len(messages) > 0: | |
# for idx, message in enumerate(messages): | |
# print(f'idx: {idx}, message: {message}') | |
# past_messages += f'\n### HUMAN: {message[0]}' | |
# past_messages += f'\n### ASSISTANT: {message[1]}' | |
# # past_messages = messages[0][0] | |
# # print(f'past_messages: {past_messages}') | |
# messages = get_completion(prompt, past_messages) | |
# return messages.split('### ASSISTANT:')[-1] | |
# stream | |
def chatty(prompt, messages, n_predict=128): | |
# print(prompt) | |
# print(f'messages: {messages}') | |
past_messages = '' | |
if len(messages) > 0: | |
for idx, message in enumerate(messages): | |
# print(f'idx: {idx}, message: {message}') | |
past_messages += f'\n### HUMAN: {message[0]}' | |
past_messages += f'\n### ASSISTANT: {message[1]}' | |
system = "### System: You help to brainstorm ideas.\n" | |
prompt_templated = f'{system} {messages}\n ### HUMAN:\n{prompt} \n ### ASSISTANT:' | |
headers = { | |
"Content-Type": "application/json" | |
} | |
data = { | |
"prompt": prompt_templated, | |
"n_predict": n_predict, | |
"stop": ["### HUMAN:", "### ASSISTANT:", "HUMAN"], | |
"stream": True | |
} | |
result = "" | |
try: | |
response = requests.post(sbc_host_url, headers=headers, data=json.dumps(data), stream=True) | |
if response.status_code == 200: | |
for line in response.iter_lines(): | |
if line: | |
try: | |
result += json.loads(line.decode('utf-8').replace('data: ', ''))['content'] | |
except: | |
# LMStudio response has empty token | |
pass | |
yield result | |
else: | |
response.raise_for_status() | |
except requests.exceptions.RequestException as e: | |
raise gr.Warning("Apologies for the inconvenience! Our model is currently self-hosted and unavailable at the moment.") | |
with gr.Blocks() as demo: | |
gr.Image("sbc.jpg") | |
gr.ChatInterface( | |
fn=chatty, | |
title="DevQuasar/llama3_8b_chat_brainstorm-GGUF on Orange Pi5 plus with llama.cpp", | |
description="Brainstorm facilitates idea exploration through interaction with a Language Model (LLM). Rather than providing direct answers, the model engages in a dialogue with users, offering probing questions aimed at fostering deeper contemplation and consideration of various facets of their ideas." | |
) | |
if __name__ == "__main__": | |
demo.launch() |