spatialvla-4b-mix-224-pt / test_huggingface.py
delinqu's picture
Upload folder using huggingface_hub
367577f verified
import os
import argparse
from pathlib import Path
import shutil
import os
import argparse
from pathlib import Path
import shutil
import torch
from PIL import Image
from transformers import AutoModel, AutoProcessor
parser = argparse.ArgumentParser("Huggingface AutoModel Tesing")
parser.add_argument("--model_name_or_path", default="", help="pretrained model name or path.")
parser.add_argument("--num_images", type=int, default=1, help="num_images for testing.")
args = parser.parse_args()
if __name__ == "__main__":
model_name_or_path = Path(args.model_name_or_path)
processor = AutoProcessor.from_pretrained(args.model_name_or_path, trust_remote_code=True)
print(processor.statistics)
model = AutoModel.from_pretrained(args.model_name_or_path, trust_remote_code=True, torch_dtype=torch.bfloat16).eval().cuda()
image = Image.open("example.png").convert("RGB")
images = [image] * args.num_images
prompt = "What action should the robot take to pick the cpu?"
inputs = processor(images=images, text=prompt, unnorm_key="bridge_orig/1.0.0", return_tensors="pt")
print(inputs)
generation_outputs = model.predict_action(inputs)
print(generation_outputs, processor.batch_decode(generation_outputs))
actions = processor.decode_actions(generation_outputs, unnorm_key="bridge_orig/1.0.0")
print(actions)