File size: 1,607 Bytes
e217881 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 |
#!/usr/bin/env python3
"""
Mobile VLA ์ฌ์ฉ ์์
"""
import torch
from transformers import AutoTokenizer, AutoProcessor
from PIL import Image
import numpy as np
def load_mobile_vla_model(model_name="minuum/mobile-vla"):
"""Mobile VLA ๋ชจ๋ธ ๋ก๋"""
# ์ฌ๊ธฐ์ ์ค์ ๋ชจ๋ธ ๋ก๋ฉ ๋ก์ง ๊ตฌํ
print(f"Loading Mobile VLA model: {model_name}")
# ์ค์ ๊ตฌํ์์๋ MobileVLATrainer๋ฅผ ์ฌ์ฉ
# from robovlms.train.mobile_vla_trainer import MobileVLATrainer
# model = MobileVLATrainer.from_pretrained(model_name)
return None # ํ๋ ์ด์คํ๋
def predict_action(model, image_path, task_description):
"""์ก์
์์ธก"""
# ์ด๋ฏธ์ง ๋ก๋
image = Image.open(image_path).convert("RGB")
# ์ ์ฒ๋ฆฌ (์ค์ ๊ตฌํ์์๋ mobile_vla_collate_fn ์ฌ์ฉ)
# processed = preprocess_image(image)
# ์์ธก (ํ๋ ์ด์คํ๋)
dummy_action = [0.5, 0.2, 0.1] # [linear_x, linear_y, angular_z]
return dummy_action
def main():
"""๋ฉ์ธ ์คํ ํจ์"""
print("๐ Mobile VLA ์์ ์คํ")
# ๋ชจ๋ธ ๋ก๋
model = load_mobile_vla_model()
# ์์ ์์ธก
task = "Navigate around obstacles to track the target cup"
action = predict_action(model, "example_image.jpg", task)
print(f"Task: {task}")
print(f"Predicted Action: {action}")
print(f" - Linear X (forward/backward): {action[0]:.3f}")
print(f" - Linear Y (left/right): {action[1]:.3f}")
print(f" - Angular Z (rotation): {action[2]:.3f}")
if __name__ == "__main__":
main()
|