#!/usr/bin/env python3 | |
""" | |
Mobile VLA ์ฌ์ฉ ์์ | |
""" | |
import torch | |
from transformers import AutoTokenizer, AutoProcessor | |
from PIL import Image | |
import numpy as np | |
def load_mobile_vla_model(model_name="minuum/mobile-vla"): | |
"""Mobile VLA ๋ชจ๋ธ ๋ก๋""" | |
# ์ฌ๊ธฐ์ ์ค์ ๋ชจ๋ธ ๋ก๋ฉ ๋ก์ง ๊ตฌํ | |
print(f"Loading Mobile VLA model: {model_name}") | |
# ์ค์ ๊ตฌํ์์๋ MobileVLATrainer๋ฅผ ์ฌ์ฉ | |
# from robovlms.train.mobile_vla_trainer import MobileVLATrainer | |
# model = MobileVLATrainer.from_pretrained(model_name) | |
return None # ํ๋ ์ด์คํ๋ | |
def predict_action(model, image_path, task_description): | |
"""์ก์ ์์ธก""" | |
# ์ด๋ฏธ์ง ๋ก๋ | |
image = Image.open(image_path).convert("RGB") | |
# ์ ์ฒ๋ฆฌ (์ค์ ๊ตฌํ์์๋ mobile_vla_collate_fn ์ฌ์ฉ) | |
# processed = preprocess_image(image) | |
# ์์ธก (ํ๋ ์ด์คํ๋) | |
dummy_action = [0.5, 0.2, 0.1] # [linear_x, linear_y, angular_z] | |
return dummy_action | |
def main(): | |
"""๋ฉ์ธ ์คํ ํจ์""" | |
print("๐ Mobile VLA ์์ ์คํ") | |
# ๋ชจ๋ธ ๋ก๋ | |
model = load_mobile_vla_model() | |
# ์์ ์์ธก | |
task = "Navigate around obstacles to track the target cup" | |
action = predict_action(model, "example_image.jpg", task) | |
print(f"Task: {task}") | |
print(f"Predicted Action: {action}") | |
print(f" - Linear X (forward/backward): {action[0]:.3f}") | |
print(f" - Linear Y (left/right): {action[1]:.3f}") | |
print(f" - Angular Z (rotation): {action[2]:.3f}") | |
if __name__ == "__main__": | |
main() | |