mobile-vla / example_usage.py
minium's picture
Upload example_usage.py with huggingface_hub
e217881 verified
#!/usr/bin/env python3
"""
Mobile VLA ์‚ฌ์šฉ ์˜ˆ์ œ
"""
import torch
from transformers import AutoTokenizer, AutoProcessor
from PIL import Image
import numpy as np
def load_mobile_vla_model(model_name="minuum/mobile-vla"):
"""Mobile VLA ๋ชจ๋ธ ๋กœ๋“œ"""
# ์—ฌ๊ธฐ์„œ ์‹ค์ œ ๋ชจ๋ธ ๋กœ๋”ฉ ๋กœ์ง ๊ตฌํ˜„
print(f"Loading Mobile VLA model: {model_name}")
# ์‹ค์ œ ๊ตฌํ˜„์—์„œ๋Š” MobileVLATrainer๋ฅผ ์‚ฌ์šฉ
# from robovlms.train.mobile_vla_trainer import MobileVLATrainer
# model = MobileVLATrainer.from_pretrained(model_name)
return None # ํ”Œ๋ ˆ์ด์Šคํ™€๋”
def predict_action(model, image_path, task_description):
"""์•ก์…˜ ์˜ˆ์ธก"""
# ์ด๋ฏธ์ง€ ๋กœ๋“œ
image = Image.open(image_path).convert("RGB")
# ์ „์ฒ˜๋ฆฌ (์‹ค์ œ ๊ตฌํ˜„์—์„œ๋Š” mobile_vla_collate_fn ์‚ฌ์šฉ)
# processed = preprocess_image(image)
# ์˜ˆ์ธก (ํ”Œ๋ ˆ์ด์Šคํ™€๋”)
dummy_action = [0.5, 0.2, 0.1] # [linear_x, linear_y, angular_z]
return dummy_action
def main():
"""๋ฉ”์ธ ์‹คํ–‰ ํ•จ์ˆ˜"""
print("๐Ÿš€ Mobile VLA ์˜ˆ์ œ ์‹คํ–‰")
# ๋ชจ๋ธ ๋กœ๋“œ
model = load_mobile_vla_model()
# ์˜ˆ์ œ ์˜ˆ์ธก
task = "Navigate around obstacles to track the target cup"
action = predict_action(model, "example_image.jpg", task)
print(f"Task: {task}")
print(f"Predicted Action: {action}")
print(f" - Linear X (forward/backward): {action[0]:.3f}")
print(f" - Linear Y (left/right): {action[1]:.3f}")
print(f" - Angular Z (rotation): {action[2]:.3f}")
if __name__ == "__main__":
main()