Spaces:
Sleeping
Sleeping
import gradio as gr | |
from transformers import AutoTokenizer, AutoModelForCausalLM | |
import torch | |
import re | |
# Load the model and tokenizer | |
model_name = "Qwen/Qwen2.5-0.5B" | |
tokenizer = AutoTokenizer.from_pretrained(model_name, token="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") | |
model = AutoModelForCausalLM.from_pretrained(model_name, token="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") | |
# System prompt | |
system_prompt = """You are BlackBerry, an advanced AI model with the "Little Thinking" technique. You use four "Berry" thinkers to analyze queries and provide accurate responses.""" | |
def generate_response(prompt, max_length=100): | |
full_prompt = f"{system_prompt}\n\nUser: {prompt}\n\nBlackBerry:" | |
inputs = tokenizer(full_prompt, return_tensors="pt", truncation=True, max_length=512) | |
with torch.no_grad(): | |
outputs = model.generate(**inputs, max_length=max_length, num_return_sequences=1, temperature=0.7) | |
response = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
return response.split("BlackBerry:")[-1].strip() | |
def little_thinking(prompt): | |
thoughts = [] | |
for i in range(4): | |
thought = f"*Berry-{i+1}: {generate_response(f'As Berry-{i+1}, briefly analyze: {prompt}', max_length=50)}*\n\n" | |
thoughts.append(thought) | |
return "".join(thoughts) | |
def reviewer_thinking(prompt): | |
return f"*Reviewer: {generate_response(f'As a Reviewer, briefly check: {prompt}', max_length=50)}*\n\n" | |
def second_reviewer_thinking(prompt): | |
return f"*Second Reviewer: {generate_response(f'As a Second Reviewer, briefly verify: {prompt}', max_length=50)}*\n\n" | |
def blackberry_response(prompt): | |
response = "BlackBerry: Analyzing with Little Thinking technique.\n\n" | |
# Little Thinking process | |
response += little_thinking(prompt) | |
# Initial answer | |
response += f"BlackBerry: Initial answer:\n{generate_response(prompt, max_length=100)}\n\n" | |
# Reviewer | |
response += reviewer_thinking(response) | |
# Second Reviewer for hard questions | |
if re.search(r'\b(physics|science|coordinate|hard|difficult)\b', prompt, re.IGNORECASE): | |
response += second_reviewer_thinking(response) | |
# Final answer | |
response += f"BlackBerry: Final answer:\n{generate_response(prompt, max_length=150)}" | |
return response | |
# Create the Gradio interface | |
iface = gr.Interface( | |
fn=blackberry_response, | |
inputs=gr.Textbox(lines=5, label="Enter your query"), | |
outputs=gr.Textbox(label="BlackBerry's Response"), | |
title="Blackberry-1 LLM", | |
description="Powered by meta-llama/Llama-3.2-1B with 'Little Thinking' technique" | |
) | |
# Launch the app | |
iface.launch() | |