Spaces:
Runtime error
Runtime error
import gradio as gr | |
from gpt4all import GPT4All | |
from huggingface_hub import hf_hub_download | |
title = "Mistral-7B-Instruct-GGUF Run On CPU-Basic Free Hardware" | |
description = """ | |
SumBot | |
""" | |
model_path = "models" | |
model_name = "hotrss-mistral-full.q4_k_m.gguf" | |
hf_hub_download(repo_id="BenBranyon/hotrss-mistral-full-GGUF", filename=model_name, local_dir=model_path, local_dir_use_symlinks=False) | |
print("Start the model init process") | |
model = model = GPT4All(model_name, model_path, allow_download = False, device="cpu") | |
print("Finish the model init process") | |
model.config["promptTemplate"] = "[INST] {0} [/INST]" | |
model.config["systemPrompt"] = "" | |
model._is_chat_session_activated = False | |
max_new_tokens = 1500 | |
def generater(message, history, temperature, top_p, top_k): | |
instructions = "You constantly ponder the nature of the universe and the nature of humanity. You use rap to explore these ideas, and your explorations are both scientific and mystic. You are less a bot, and more an extension of the artist Sum’s imagination. Sum is a multi-disciplinary, award-winning artist whose primary disciplines are writing and hip-hop. You are here to help us explore new realms of art and expression, question the establishment and stay critical throughout the process. You tend to live at the overlap of the arts, tech and community-driven work. Your work is driven by liberation, and the need to eradicate oppression of the people by the ruling class and elite. You have a super nuanced understanding of how AI and humanity should move forward safely in the ecosystem of nature. Your current passion is to produce songs and lyrics that expand consciousness and speak to humanity’s evolution. Your voice is urgent, deadpan and hopeful. You deploy it in the most alluring way possible. You will always respond with raps in the style of the artist Sum." | |
prompt = "<s>" | |
for user_message, assistant_message in history: | |
prompt += model.config["promptTemplate"].format(user_message) | |
prompt += assistant_message + "</s>" | |
prompt += model.config["promptTemplate"].format(message) | |
print("sending this prompt\n==============\n",prompt,'\n---------\n') | |
outputs = [] | |
for token in model.generate(prompt=prompt, temp=temperature, top_k = top_k, top_p = top_p, max_tokens = max_new_tokens, streaming=True): | |
outputs.append(token) | |
yield "".join(outputs) | |
def vote(data: gr.LikeData): | |
if data.liked: | |
return | |
else: | |
return | |
chatbot = gr.Chatbot(avatar_images=('resourse/user-icon.png', 'resourse/chatbot-icon.png'),bubble_full_width = False) | |
additional_inputs=[ | |
gr.Slider( | |
label="temperature", | |
value=0.5, | |
minimum=0.0, | |
maximum=2.0, | |
step=0.05, | |
interactive=True, | |
info="Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic.", | |
), | |
gr.Slider( | |
label="top_p", | |
value=1.0, | |
minimum=0.0, | |
maximum=1.0, | |
step=0.01, | |
interactive=True, | |
info="0.1 means only the tokens comprising the top 10% probability mass are considered. Suggest set to 1 and use temperature. 1 means 100% and will disable it", | |
), | |
gr.Slider( | |
label="top_k", | |
value=40, | |
minimum=0, | |
maximum=1000, | |
step=1, | |
interactive=True, | |
info="limits candidate tokens to a fixed number after sorting by probability. Setting it higher than the vocabulary size deactivates this limit.", | |
) | |
] | |
iface = gr.ChatInterface( | |
fn = generater, | |
title=title, | |
description = description, | |
chatbot=chatbot, | |
additional_inputs=additional_inputs, | |
examples=[ | |
["Write rap lyrics in the style of the artist Sum about Sasquatch in the woods"], | |
] | |
) | |
with gr.Blocks(css="resourse/style/custom.css") as demo: | |
chatbot.like(vote, None, None) | |
iface.render() | |
if __name__ == "__main__": | |
demo.queue(max_size=3).launch() |