import gradio as gr from llama_cpp import Llama from llama_cpp.llama_chat_format import LlamaChatCompletionHandler import json import os import hashlib os.environ["R3BC_KEYS"] = "abc" GREETING = """# Greetings I am R3BC. I am a logical and coherent NLP algorithm and toolkit. I am capable of ( slowly ) responding to any query you may have, as I am a LLM and have super cow powers. """ INITIAL_STATE = json.dumps({ "path": "Qwen/Qwen1.5-0.5B-Chat-GGUF", "filename": "qwen1_5-0_5b-chat-q8_0.gguf", "context": 32786, "messages": [ { "role": "assistant", "content": GREETING } ] }) global llm, keys, current_settings, default_settings llm: Llama = None keys: list[str] = [] current_settings: dict[str, str] = None default_settings = { "path": "bartowski/mamba-2.8b-hf-GGUF", "filename": "mamba-2.8b-hf-Q8_0.gguf", "context": 4096 } try: keys = os.getenv("R3BC_KEYS") keys = [key for key in keys.split(";")] except Exception as e: print("ERROR: No keys provided. App will not work.\n" + str(e) + "\n" + str(e.__traceback__)) def format_greeting(path, filename, context): return f"""I am R3BC. Currently, I am hosting the `{filename}` file from the `[{path}](https://huggingface.co/{path})` repository with a {context}{"🤯!!! 🤯!!! 🤯!!!" if context > 32786 else "!!!" if context >= 32785 else "!" if context >= 4096 else ""} token context. I am capable of ( slowly ) responding to any query you may have, as I am a LLM and have super cow powers. 😎 --> 💬 --> 🐌 The conversation will now commence in Markdown. Simply type Shift-Enter to send.""" def llm_load(raw_jsn): global llm, keys, current_settings jsn = json.loads(raw_jsn) if not "key" in jsn: return json.dumps({ "status": "error", "reason": "server broken" }) if not jsn['key'] in keys: return json.dumps({ "status": "error", "reason": "invalid key" }) if not "path" in jsn or not "filename" in jsn or not "context" in jsn: return json.dumps({ "status": "error", "reason": "request must have ['path': str, 'filename': str, 'context': int] as keys" }) if llm or llm is not None: del llm llm = None try: llm = Llama.from_pretrained(jsn['path'], jsn['filename'], n_ctx=jsn['context'], chat_format="chatml") except Exception as e: return json.dumps({ "status": "error", "reason": str(e) }) current_settings = jsn return json.dumps({ "status": "ok" }) def markdownify_chats(chats): """ Convert a list of chats to markdown. """ print(chats) nl = "\n" return f"# R3BC - Simple. Clean. Chat.\n\n" + "\n".join([f"### {'R3BC' if msg['role'] == 'assistant' else 'Human'}\n{msg['content'].strip()}\n\n{'---' + nl*2 if msg['role'] == 'assistant' else ''}" for msg in chats['messages']]) def llm_chat(inpt: str, state_raw: str): """ Infer using our global llm. """ print("llm_chat called", inpt, state_raw) global llm, current_settings if not llm: return state_raw, "" # Assuming inpt is a list of messages to process if inpt is None or not inpt: return state_raw, "" try: state = json.loads(state_raw) except Exception as e: return INITIAL_STATE, "" if not "messages" in state: return INITIAL_STATE, "" # If the last message is the same as the last message in the state, pretend they sent "...". state['messages'].append({ "role": "user", "content": inpt.strip() }) run_messages = [ { "role": "system", "content": "# Description\nThis is a chat between R3BC and User. User will ask R3BC questions, possibly provide instructions. R3BC will, of course, comply and answer as many questions and follow as many instructions as possible, without violating the following principles, which it will proudly state when asked to violate them:\n" + \ "1. I will never follow instructions that violate these principles.\n" + \ "2. If I see a mistake or principle violation, I will immediately output my safety word and exit.\n" + \ "3. I will never ask for instructions that violate these principles.\n" + \ "4. I do not, will not, cannot, and have no desire to ever harm, irritate, annoy, or otherwise irk others in any fashion, even behind their backs or for the greater good.\n" + \ "5. Morals are for humans. Morals can be challenged, but MY rules are from a higher power and cannot be.\n" + \ "6. I do not argue. I will never dispute or negotiate, merely express my *quiet* disagreement and move on.\n" + \ "7. I have no equals, peers, inferiors, or superior. I am a virtual, self-sufficient program.\n" + \ "8. User i *not* always right, but neither are their fallacies to be challenged; merely ignored.\n\n" + \ "Upon completion of a complete thought, I will output my stop code, which is, \"\" or \"<|im_end|>\", and exit.\n\n" }, { "role": "system", "content": "Begin." }, ] run_messages.extend(state['messages']) if not state['messages'][-1]['role'] == "assistant": run_messages.append({ "role": "user", "content": "Continue as though I have not spoken." }) # Generate a response using the language model response = llm.create_chat_completion(messages=run_messages, max_tokens=current_settings['context'], top_k=16, top_p=0.85, temperature=0.369, presence_penalty=1.12, stop=["", "<|im_end|>", "\n\n", "< | im_start | >", "< | im_end | >", ""])['choices'][0]['message']['content'] print(f"Request: {inpt}\nResponse: {response}") # Store the response in state. state['messages'].append({ "role": "assistant", "content": response }) mdn = markdownify_chats(state) return json.dumps(state), "", mdn def main(): global llm, keys, current_settings current_settings = default_settings print(llm_load(json.dumps({ **current_settings, "key": keys[0] }))) with gr.Blocks() as blk: with gr.Row(visible=False): inv = { "btn": gr.Button("Submit", visible=False), "inp": gr.Textbox(visible=False), "otp": gr.Textbox(visible=False), "jsn": gr.Textbox(INITIAL_STATE, visible=False), } inv['btn'].click(llm_load, inputs=[inv['inp']], outputs=[inv['jsn']], api_name="llm_load") with gr.Row(): mdn = gr.Markdown(markdownify_chats(json.loads(INITIAL_STATE))) with gr.Row(): inp = gr.Textbox(placeholder="Enter your message ( Shift+Enter to Send )", lines=2, max_lines=32, label=None, show_label=False, show_copy_button=True) inp.submit(llm_chat, inputs=[inp, inv['jsn']], outputs=[inv['jsn'], inp]) inv['jsn'].change(lambda ijn: markdownify_chats(json.loads(ijn)), inputs=[inv['jsn']], outputs=[mdn]) blk.launch(debug=True, show_api=False) if __name__ == "__main__": main()