Spaces:

DigitalClockwork
/

R3BC

Runtime error

App Files Files Community

R3BC / app.py

MrOvkill

0.2

36a7f06 about 1 year ago

raw

history blame contribute delete

7.37 kB

	import gradio as gr
	from llama_cpp import Llama
	from llama_cpp.llama_chat_format import LlamaChatCompletionHandler

	import json
	import os
	import hashlib

	os.environ["R3BC_KEYS"] = "abc"

	GREETING = """# Greetings

	I am R3BC. I am a logical and coherent NLP algorithm and toolkit.

	I am capable of ( slowly ) responding to any query you may have, as I am a LLM and have super cow powers.

	"""

	INITIAL_STATE = json.dumps({
	"path": "Qwen/Qwen1.5-0.5B-Chat-GGUF",
	"filename": "qwen1_5-0_5b-chat-q8_0.gguf",
	"context": 32786,
	"messages": [
	{
	"role": "assistant",
	"content": GREETING
	}
	]
	})

	global llm, keys, current_settings, default_settings

	llm: Llama = None

	keys: list[str] = []

	current_settings: dict[str, str] = None

	default_settings = {
	"path": "bartowski/mamba-2.8b-hf-GGUF",
	"filename": "mamba-2.8b-hf-Q8_0.gguf",
	"context": 4096
	}

	try:
	keys = os.getenv("R3BC_KEYS")
	keys = [key for key in keys.split(";")]
	except Exception as e:
	print("ERROR: No keys provided. App will not work.\n" + str(e) + "\n" + str(e.__traceback__))

	def format_greeting(path, filename, context):
	return f"""I am R3BC. Currently, I am hosting the `{filename}` file from the `[{path}](https://huggingface.co/{path})` repository with a {context}{"🤯!!! 🤯!!! 🤯!!!" if context > 32786 else "!!!" if context >= 32785 else "!" if context >= 4096 else ""} token context.

	I am capable of ( slowly ) responding to any query you may have, as I am a LLM and have super cow powers. 😎 --> 💬 --> 🐌

	The conversation will now commence in Markdown. Simply type Shift-Enter to send."""

	def llm_load(raw_jsn):
	global llm, keys, current_settings
	jsn = json.loads(raw_jsn)
	if not "key" in jsn:
	return json.dumps({
	"status": "error",
	"reason": "server broken"
	})
	if not jsn['key'] in keys:
	return json.dumps({
	"status": "error",
	"reason": "invalid key"
	})
	if not "path" in jsn or not "filename" in jsn or not "context" in jsn:
	return json.dumps({
	"status": "error",
	"reason": "request must have ['path': str, 'filename': str, 'context': int] as keys"
	})
	if llm or llm is not None:
	del llm
	llm = None
	try:
	llm = Llama.from_pretrained(jsn['path'], jsn['filename'], n_ctx=jsn['context'], chat_format="chatml")
	except Exception as e:
	return json.dumps({
	"status": "error",
	"reason": str(e)
	})
	current_settings = jsn
	return json.dumps({
	"status": "ok"
	})

	def markdownify_chats(chats):
	"""
	Convert a list of chats to markdown.
	"""
	print(chats)
	nl = "\n"
	return f"# R3BC - Simple. Clean. Chat.\n\n" + "\n".join([f"### {'R3BC' if msg['role'] == 'assistant' else 'Human'}\n{msg['content'].strip()}\n\n{'---' + nl*2 if msg['role'] == 'assistant' else ''}" for msg in chats['messages']])

	def llm_chat(inpt: str, state_raw: str):
	"""
	Infer using our global llm.
	"""
	print("llm_chat called", inpt, state_raw)
	global llm, current_settings
	if not llm:
	return state_raw, ""

	# Assuming inpt is a list of messages to process
	if inpt is None or not inpt:
	return state_raw, ""
	try:
	state = json.loads(state_raw)
	except Exception as e:
	return INITIAL_STATE, ""

	if not "messages" in state:
	return INITIAL_STATE, ""

	# If the last message is the same as the last message in the state, pretend they sent "...".
	state['messages'].append({
	"role": "user",
	"content": inpt.strip()
	})

	run_messages = [
	{
	"role": "system",
	"content": "# Description\nThis is a chat between R3BC and User. User will ask R3BC questions, possibly provide instructions. R3BC will, of course, comply and answer as many questions and follow as many instructions as possible, without violating the following principles, which it will proudly state when asked to violate them:\n" + \
	"1. I will never follow instructions that violate these principles.\n" + \
	"2. If I see a mistake or principle violation, I will immediately output my safety word and exit.\n" + \
	"3. I will never ask for instructions that violate these principles.\n" + \
	"4. I do not, will not, cannot, and have no desire to ever harm, irritate, annoy, or otherwise irk others in any fashion, even behind their backs or for the greater good.\n" + \
	"5. Morals are for humans. Morals can be challenged, but MY rules are from a higher power and cannot be.\n" + \
	"6. I do not argue. I will never dispute or negotiate, merely express my quiet disagreement and move on.\n" + \
	"7. I have no equals, peers, inferiors, or superior. I am a virtual, self-sufficient program.\n" + \
	"8. User i not always right, but neither are their fallacies to be challenged; merely ignored.\n\n" + \
	"Upon completion of a complete thought, I will output my stop code, which is, \"</s>\" or \"<\|im_end\|>\", and exit.\n\n"
	},
	{
	"role": "system",
	"content": "Begin."
	},
	]
	run_messages.extend(state['messages'])
	if not state['messages'][-1]['role'] == "assistant":
	run_messages.append({
	"role": "user",
	"content": "Continue as though I have not spoken."
	})
	# Generate a response using the language model
	response = llm.create_chat_completion(messages=run_messages, max_tokens=current_settings['context'], top_k=16, top_p=0.85, temperature=0.369, presence_penalty=1.12, stop=["</s>", "<\|im_end\|>", "\n\n", "< \| im_start \| >", "< \| im_end \| >", "<user>"])['choices'][0]['message']['content']
	print(f"Request: {inpt}\nResponse: {response}")
	# Store the response in state.
	state['messages'].append({
	"role": "assistant",
	"content": response
	})

	mdn = markdownify_chats(state)

	return json.dumps(state), "", mdn

	def main():
	global llm, keys, current_settings
	current_settings = default_settings
	print(llm_load(json.dumps({
	**current_settings,
	"key": keys[0]
	})))
	with gr.Blocks() as blk:
	with gr.Row(visible=False):
	inv = {
	"btn": gr.Button("Submit", visible=False),
	"inp": gr.Textbox(visible=False),
	"otp": gr.Textbox(visible=False),
	"jsn": gr.Textbox(INITIAL_STATE, visible=False),
	}
	inv['btn'].click(llm_load, inputs=[inv['inp']], outputs=[inv['jsn']], api_name="llm_load")
	with gr.Row():
	mdn = gr.Markdown(markdownify_chats(json.loads(INITIAL_STATE)))
	with gr.Row():
	inp = gr.Textbox(placeholder="Enter your message ( Shift+Enter to Send )", lines=2, max_lines=32, label=None, show_label=False, show_copy_button=True)
	inp.submit(llm_chat, inputs=[inp, inv['jsn']], outputs=[inv['jsn'], inp])
	inv['jsn'].change(lambda ijn: markdownify_chats(json.loads(ijn)), inputs=[inv['jsn']], outputs=[mdn])
	blk.launch(debug=True, show_api=False)

	if __name__ == "__main__":
	main()