SmolVLM_Proxy / test_ui_agent.py
Tj's picture
Upload SmolVLM final merged model
baa41dd verified
"""
SmolVLM UI Automation Agent - Test Script
Your trained model is ready!
"""
import torch
from transformers import Idefics3ForConditionalGeneration, AutoProcessor
from PIL import Image
import os
def load_model():
"""Load your trained SmolVLM model"""
model_path = r"C:\Users\keith\OneDrive\Desktop\admin.trac.jobs-DATA\LLaMA-Factory_local\smolvlm_final_merged"
print("Loading your trained SmolVLM UI automation agent...")
model = Idefics3ForConditionalGeneration.from_pretrained(
model_path,
torch_dtype=torch.bfloat16,
device_map="auto",
trust_remote_code=True
)
processor = AutoProcessor.from_pretrained(model_path)
print("Model loaded successfully!")
return model, processor
def analyze_screenshot(image_path: str, model, processor):
"""Analyze a screenshot for UI automation"""
# Load and process image
image = Image.open(image_path).convert("RGB")
prompt = "<image>\nAnalyze this interface for UI automation opportunities. Identify clickable elements and automation targets."
# Process inputs
inputs = processor(text=prompt, images=[image], return_tensors="pt")
# Generate response
with torch.no_grad():
outputs = model.generate(
**inputs,
max_new_tokens=150,
do_sample=True,
temperature=0.7,
top_p=0.9
)
# Decode response
response = processor.decode(outputs[0], skip_special_tokens=True)
# Extract just the assistant's response
if "Assistant:" in response:
response = response.split("Assistant:")[-1].strip()
return response
def main():
print("πŸ€– SmolVLM UI Automation Agent")
print("=" * 50)
print("Your custom-trained model for TRAC administration!")
print()
try:
# Load your trained model
model, processor = load_model()
while True:
print("\nOptions:")
print("1. Analyze a screenshot")
print("2. Quit")
choice = input("\nEnter choice (1-2): ").strip()
if choice == "1":
image_path = input("Enter path to screenshot: ").strip().strip('"')
if os.path.exists(image_path):
print("\nπŸ” Analyzing screenshot...")
try:
result = analyze_screenshot(image_path, model, processor)
print("\n🎯 Analysis Result:")
print("-" * 30)
print(result)
print("-" * 30)
except Exception as e:
print(f"❌ Analysis error: {e}")
else:
print("❌ Image file not found!")
elif choice == "2":
print("πŸ‘‹ Goodbye!")
break
else:
print("❌ Invalid choice!")
except Exception as e:
print(f"❌ Error loading model: {e}")
print("Make sure the model was merged successfully.")
if __name__ == "__main__":
main()