Instructions to use studyOverflow/egrpo with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Diffusers
How to use studyOverflow/egrpo with Diffusers:
pip install -U diffusers transformers accelerate
import torch from diffusers import DiffusionPipeline # switch to "mps" for apple devices pipe = DiffusionPipeline.from_pretrained("studyOverflow/egrpo", dtype=torch.bfloat16, device_map="cuda") prompt = "Astronaut in a jungle, cold color palette, muted colors, detailed, 8k" image = pipe(prompt).images[0] - Notebooks
- Google Colab
- Kaggle
| from hpsv2.src.open_clip import create_model_and_transforms, get_tokenizer | |
| import torch | |
| from torchvision import transforms | |
| from PIL import Image | |
| import os | |
| from tqdm import tqdm | |
| def initialize_model(): | |
| device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
| model_dict = {} | |
| model, preprocess_train, preprocess_val = create_model_and_transforms( | |
| 'ViT-H-14', | |
| '/mnt/dolphinfs/ssd_pool/docker/user/hadoop-videogen-hl/hadoop-camera3d/zhangshengjun/checkpoints/G2RPO/ckpt/CLIP-ViT-H-14-laion2B-s32B-b79K/pytorch_model.bin', | |
| precision='amp', | |
| device=device, | |
| jit=False, | |
| force_quick_gelu=False, | |
| force_custom_text=False, | |
| force_patch_dropout=False, | |
| force_image_size=None, | |
| pretrained_image=False, | |
| image_mean=None, | |
| image_std=None, | |
| light_augmentation=True, | |
| aug_cfg={}, | |
| output_dict=True, | |
| with_score_predictor=False, | |
| with_region_predictor=False | |
| ) | |
| model_dict['model'] = model | |
| model_dict['preprocess_val'] = preprocess_val | |
| return model_dict, device | |
| def load_images_from_folder(folder): | |
| images = [] | |
| filenames = [] | |
| for filename in os.listdir(folder): | |
| if filename.endswith(".png"): | |
| img_path = os.path.join(folder, filename) | |
| image = Image.open(img_path).convert("RGB") | |
| images.append(image) | |
| filenames.append(filename) | |
| return images, filenames | |
| def main(): | |
| model_dict, device = initialize_model() | |
| model = model_dict['model'] | |
| preprocess_val = model_dict['preprocess_val'] | |
| cp = "/mnt/dolphinfs/ssd_pool/docker/user/hadoop-videogen-hl/hadoop-camera3d/zhangshengjun/checkpoints/G2RPO/ckpt/hps/HPS_v2.1_compressed.pt" | |
| checkpoint = torch.load(cp, map_location=device) | |
| model.load_state_dict(checkpoint['state_dict']) | |
| tokenizer = get_tokenizer('ViT-H-14') | |
| reward_model = model.to(device) | |
| reward_model.eval() | |
| img_folder = "IMAGE_SAVE_FOLDER" | |
| images, filenames = load_images_from_folder(img_folder) | |
| eval_rewards = [] | |
| with torch.no_grad(): | |
| for image_pil, filename in tqdm(zip(images, filenames), total=400): | |
| image = preprocess_val(image_pil).unsqueeze(0).to(device=device, non_blocking=True) | |
| prompt = os.path.splitext(filename)[0] # 剔除文件扩展名 | |
| text = tokenizer([prompt]).to(device=device, non_blocking=True) | |
| outputs = reward_model(image, text) | |
| image_features, text_features = outputs["image_features"], outputs["text_features"] | |
| logits_per_image = image_features @ text_features.T | |
| hps_score = torch.diagonal(logits_per_image).item() # 转换为 Python 数值 | |
| eval_rewards.append(hps_score) | |
| avg_reward = sum(eval_rewards) / len(eval_rewards) if eval_rewards else 0 | |
| print(f"Average HPS score: {avg_reward:.4f}") | |
| if __name__ == "__main__": | |
| main() |