promptlab / app.py
bditto's picture
Update app.py
2f24c08 verified
raw
history blame
2 kB
import gradio as gr
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
import random
# Configuration πŸ› οΈ
model_name = "microsoft/phi-3-mini-4k-instruct" # Smaller model for memory constraints
device = "cuda" if torch.cuda.is_available() else "cpu"
# Load model with optimizations
model = AutoModelForCausalLM.from_pretrained(
model_name,
torch_dtype=torch.float16 if device == "cuda" else torch.float32,
device_map="auto",
low_cpu_mem_usage=True
)
tokenizer = AutoTokenizer.from_pretrained(model_name)
# Safety tools πŸ›‘οΈ
SAFE_RESPONSES = [
"Let's focus on positive tech projects! 🌱",
"How about designing an eco-friendly robot? πŸ€–",
"Let's explore renewable energy solutions! β˜€οΈ"
]
def generate_response(message, history):
# Simple safety check
if any(word in message.lower() for word in ["violence", "hate", "gun"]):
return random.choice(SAFE_RESPONSES)
# Format prompt
prompt = f"<|user|>\n{message}<|end|>\n<|assistant|>"
# Tokenize input
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
# Generate response
outputs = model.generate(
inputs.input_ids,
max_new_tokens=256,
temperature=0.7,
do_sample=True,
pad_token_id=tokenizer.eos_token_id
)
# Decode and return
return tokenizer.decode(outputs[0][inputs.input_ids.shape[-1]:], skip_special_tokens=True)
# Create Gradio interface
demo = gr.ChatInterface(
fn=generate_response,
examples=[
"How to make a solar-powered robot?",
"Python code for air quality sensor"
],
title="πŸ€– REACT Ethical AI Lab",
description="Safe AI project assistant for students"
)
# Explicit API setup
api = gr.mount_gradio_app(
app=demo.app,
blocks=demo,
path="/api"
)
if __name__ == "__main__":
demo.launch(
server_name="0.0.0.0",
server_port=7860,
enable_queue=True,
share=False
)