Spaces:
Runtime error
Runtime error
import gradio as gr | |
from llama_cpp import Llama | |
# Load the model | |
llm = Llama.from_pretrained( | |
repo_id="bartowski/Marco-o1-GGUF", | |
filename="Marco-o1-Q4_K_M.gguf", | |
) | |
# Access the tokenizer from the Llama model | |
tokenizer = llm.get_tokenizer() | |
def respond( | |
message, | |
history: list[tuple[str, str]], | |
system_message, | |
max_tokens, | |
temperature, | |
top_p, | |
): | |
# Initialize an empty list to hold tokenized messages | |
tokenized_messages = [] | |
# Tokenize the system message | |
tokenized_messages.append(tokenizer.encode(system_message)) | |
# Tokenize the history messages | |
for val in history: | |
if val[0]: | |
tokenized_messages.append(tokenizer.encode(val[0])) # User message | |
if val[1]: | |
tokenized_messages.append(tokenizer.encode(val[1])) # Assistant message | |
# Tokenize the current user message | |
tokenized_messages.append(tokenizer.encode(message)) | |
response = "" | |
# Use llm.create_completion with tokenized message | |