import gradio as gr import os import requests import json sbc_host_url = os.environ['URL'] def get_completion(prompt:str, messages:str = '', n_predict=128): prompt_templated = f'{messages}\n ### HUMAN:\n{prompt} \n ### ASSISTANT:' headers = { "Content-Type": "application/json" } data = { "prompt": prompt_templated, "n_predict": n_predict, "stop": ["### HUMAN:", "### ASSISTANT:", "HUMAN"], "stream": "True" } response = requests.post(sbc_host_url, headers=headers, data=json.dumps(data)) if response.status_code == 200: return response.json()['content'] else: response.raise_for_status() def chatty(prompt, messages): print(prompt) print(f'messages: {messages}') past_messages = '' if len(messages) > 0: for idx, message in enumerate(messages): print(f'idx: {idx}, message: {message}') past_messages += f'\n### HUMAN: {message[0]}' past_messages += f'\n### ASSISTANT: {message[1]}' # past_messages = messages[0][0] print(f'past_messages: {past_messages}') messages = get_completion(prompt, past_messages) return messages.split('### ASSISTANT:')[-1] demo = gr.ChatInterface( fn=chatty, title="Brainstorm on Orange Pi5 plus with llama.cpp", description=":)" ) if __name__ == "__main__": demo.launch()