Pico V1

Pico v1 is a work in progress model. Based off Phi 3.5 Mini, it has been fine tuned for automatic COT and self reflection.

When making a output, Pico will create three sections, a reasoning section, a self-reflection section and a output section.

Pico v1 struggles with non-question related tasks (Small talk, roleplay, etc).

Here is a example of how you can use it:

import torch

phi3_template = (
    "{{ bos_token }}"
    "{% for message in messages %}"
        "{{ '<|' + message['role'] + '|>\\n' + message['content'] + '<|end|>\\n' }}"
    "{% endfor %}"
    "{% if add_generation_prompt %}"
        "{{ '<|assistant|>\\n' }}"
    "{% endif %}"
)
phi3_template_eos_token = "<|end|>"

def build_prompt(messages, bos_token="<|start|>", add_generation_prompt=True):
    """
    Build a prompt using the PHI 3.5 template.
    """
    prompt = bos_token
    for message in messages:
        prompt += f"<|{message['role']}|>\n{message['content']}\n<|end|>\n"
    if add_generation_prompt:
        prompt += "<|assistant|>\n"
    return prompt

def chat_with_model():
    # Load the model and tokenizer
    model_name = "LucidityAI/Pico-v1-3b"
    print("Loading model and tokenizer...")

    # Enforce GPU usage
    if not torch.cuda.is_available():
        raise RuntimeError("CUDA is not available. Please ensure your GPU and CUDA environment are configured correctly.")

    device = torch.device("cuda")
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16)
    print("Model and tokenizer loaded successfully.")

    # Chat loop
    print("Start chatting with the model! Type 'exit' to quit.")
    conversation = []
    while True:
        user_input = input("You: ")
        if user_input.lower() == "exit":
            print("Goodbye!")
            break

        # Append user's message to the conversation
        conversation.append({"role": "user", "content": user_input})

        # Build the input prompt using the PHI 3.5 template
        prompt = build_prompt(conversation, bos_token=tokenizer.bos_token or "<|start|>")

        # Tokenize the input prompt
        inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=1024).to(device)

        # Generate a response
        outputs = model.generate(
            inputs.input_ids,
            max_length=1024,
            num_return_sequences=1,
            temperature=0.5,
            pad_token_id=tokenizer.eos_token_id
        )

        # Decode the response
        response = tokenizer.decode(outputs[0], skip_special_tokens=True)

        # Extract the assistant's reply
        assistant_reply = response[len(prompt):].strip()
        print(f"Model: {assistant_reply}")

        # Append the assistant's reply to the conversation
        conversation.append({"role": "assistant", "content": assistant_reply})

if __name__ == "__main__":
    chat_with_model()
Downloads last month
66
Safetensors
Model size
3.82B params
Tensor type
F32
·
FP16
·
I8
·
Inference Examples
This model does not have enough activity to be deployed to Inference API (serverless) yet. Increase its social visibility and check back later, or deploy to Inference Endpoints (dedicated) instead.

Model tree for LucidityAI/Pico-v1-3b

Quantized
(110)
this model

Collection including LucidityAI/Pico-v1-3b