phi-2_2 / app.py
random2222's picture
Update app.py
5f7ceab verified
# app.py
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
import gradio as gr
# Initialize variables
model = None
tokenizer = None
device = None
# Define function to load model
def load_model():
global model, tokenizer, device
# Use GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")
# Load the Phi-2 model
model_id = "microsoft/phi-1_5"
print("Loading Phi-2 model and tokenizer...")
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(
model_id,
torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
device_map="auto" # Better device management for Spaces
)
print("Model loaded successfully!")
# Define inference function
def generate_text(prompt, task_type, max_length=300):
global model, tokenizer, device
# If model hasn't been loaded yet, load it
if model is None:
load_model()
# Set temperature based on task type
temperature_map = {
"Math Problem": 0.2,
"Science Theory": 0.4,
"Coding Question": 0.3,
"Reasoning": 0.5,
"Creative Writing": 0.8
}
temperature = temperature_map.get(task_type, 0.5)
# Enhance the prompt to request step-by-step solutions
enhanced_prompt = f"{prompt}\n\nPlease provide a detailed step-by-step solution with clear reasoning."
# Progress update for UI
yield "Generating solution..."
# Tokenize input
inputs = tokenizer(enhanced_prompt, return_tensors="pt").to(device)
# Generate output
with torch.no_grad():
outputs = model.generate(
**inputs,
max_new_tokens=max_length,
temperature=temperature,
do_sample=True
)
# Decode response
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
# If the response doesn't seem to include steps, add formatting for clarity
if "step" not in response.lower() and len(response) > 100:
# Split into paragraphs and format as steps
paragraphs = [p for p in response.split('\n') if p.strip()]
formatted_response = ""
for i, para in enumerate(paragraphs):
if i == 0 and para == enhanced_prompt:
continue
formatted_response += f"Step {i+1}: {para}\n\n"
yield formatted_response
else:
yield response
# Create Gradio interface
with gr.Blocks(title="Phi-2 Step-by-Step Solution Generator", theme=gr.themes.Soft()) as demo:
gr.Markdown("# 🧠 Phi-2 Step-by-Step Solution Generator")
gr.Markdown("""
Enter a prompt below and get detailed step-by-step solutions using Microsoft's Phi-2 model.
Select the appropriate task type to optimize the model's response.
""")
with gr.Row():
with gr.Column(scale=2):
prompt_input = gr.Textbox(
label="Prompt",
placeholder="Enter your question or problem here...",
lines=5
)
with gr.Row():
task_type = gr.Radio(
["Math Problem", "Science Theory", "Coding Question", "Reasoning", "Creative Writing"],
label="Task Type (sets optimal temperature)",
value="Reasoning"
)
max_length_slider = gr.Slider(
minimum=100,
maximum=1000,
value=300,
step=50,
label="Maximum Output Length"
)
with gr.Row():
generate_button = gr.Button(
"✨ Generate Step-by-Step Solution",
variant="primary",
size="lg"
)
clear_button = gr.Button("Clear", variant="secondary")
with gr.Column(scale=3):
output_text = gr.Textbox(
label="Step-by-Step Solution",
lines=15,
show_copy_button=True
)
# Examples with different task types
with gr.Accordion("Example Prompts", open=False):
gr.Examples(
examples=[
["Solve the quadratic equation: 2x² + 5x - 3 = 0", "Math Problem"],
["Explain how photosynthesis works in plants", "Science Theory"],
["Write a function in Python to find the Fibonacci sequence up to n terms", "Coding Question"],
["Why might increasing minimum wage have both positive and negative economic impacts?", "Reasoning"],
["Write a short story about a robot discovering emotions", "Creative Writing"]
],
inputs=[prompt_input, task_type]
)
# Add functionality to buttons
generate_button.click(
fn=generate_text,
inputs=[prompt_input, task_type, max_length_slider],
outputs=output_text
)
# Clear functionality
clear_button.click(
fn=lambda: ("", "Reasoning"),
inputs=[],
outputs=[prompt_input, task_type]
)
# Adding a note about load times
gr.Markdown("""
> **Note**: The model loads when you submit your first prompt, which may take 1-2 minutes.
> Subsequent generations will be much faster.
""")
# Launch the app
if __name__ == "__main__":
demo.queue().launch()