llama3_on_sbc / app.py
csabakecskemeti's picture
Update app.py
669d949 verified
raw
history blame
1.44 kB
import gradio as gr
import os
import requests
import json
sbc_host_url = os.environ['URL']
def get_completion(prompt:str, messages:str = '', n_predict=128):
prompt_templated = f'{messages}\n ### HUMAN:\n{prompt} \n ### ASSISTANT:'
headers = {
"Content-Type": "application/json"
}
data = {
"prompt": prompt_templated,
"n_predict": n_predict,
"stop": ["### HUMAN:", "### ASSISTANT:", "HUMAN"],
"stream": "True"
}
response = requests.post(sbc_host_url, headers=headers, data=json.dumps(data))
if response.status_code == 200:
return response.json()['content']
else:
response.raise_for_status()
def chatty(prompt, messages):
print(prompt)
print(f'messages: {messages}')
past_messages = ''
if len(messages) > 0:
for idx, message in enumerate(messages):
print(f'idx: {idx}, message: {message}')
past_messages += f'\n### HUMAN: {message[0]}'
past_messages += f'\n### ASSISTANT: {message[1]}'
# past_messages = messages[0][0]
print(f'past_messages: {past_messages}')
messages = get_completion(prompt, past_messages)
return messages.split('### ASSISTANT:')[-1]
demo = gr.ChatInterface(
fn=chatty,
title="Brainstorm on Orange Pi5 plus with llama.cpp",
description=":)"
)
if __name__ == "__main__":
demo.launch()