| # import numpy as np | |
| import requests | |
| import torch | |
| from PIL import Image | |
| from torch.nn.functional import cosine_similarity | |
| from transformers import AutoImageProcessor, AutoModel | |
| from transformers import ViTImageProcessor, ViTModel | |
| from transformers import pipeline | |
| # import transformers | |
| # | |
| # print(transformers.__version__) | |
| # | |
| # img_urls = ["https://img0.baidu.com/it/u=3704428154,2884159591&fm=253&fmt=auto&app=138&f=JPEG?w=889&h=500", | |
| # "https://img0.baidu.com/it/u=3704428154,2884159591&fm=253&fmt=auto&app=138&f=JPEG?w=889&h=500"] | |
| # | |
| # image_real = Image.open(requests.get(img_urls[0], stream=True).raw).convert("RGB") | |
| # image_gen = Image.open(requests.get(img_urls[1], stream=True).raw).convert("RGB") | |
| # | |
| # # DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu') | |
| DEVICE = torch.device('cpu') | |
| # pipe = pipeline(task="image-feature-extraction", model_name="google/vit-base-patch16-384", device=DEVICE, pool=True) | |
| # | |
| # # 1提取图片特征向量 | |
| # outputs = pipe([image_real, image_gen]) | |
| # | |
| # # get the length of a single output | |
| # print(len(outputs[0][0])) | |
| # # show outputs | |
| # print(outputs) | |
| # | |
| # # 768 | |
| # # [[[-0.03909236937761307, 0.43381670117378235, -0.06913255900144577, | |
| # | |
| # # 2计算图片相似度 | |
| # similarity_score = cosine_similarity(torch.Tensor(outputs[0]), | |
| # torch.Tensor(outputs[1]), dim=1) | |
| # | |
| # print(similarity_score) | |
| # tensor([0.6043]) | |
| # pipe = pipeline(task="image-feature-extraction", model_name="google/vit-base-patch16-224", device=DEVICE) | |
| # output = pipe(image_real) | |
| # | |
| # # 其中第一个维度是批量大小,最后两个维度是嵌入形状。 | |
| # print(np.array(outputs).shape) | |
| # # (1, 197, 768) | |
| # 第二种方式推理图片相似度 | |
| # processor = AutoImageProcessor.from_pretrained("google/vit-base-patch16-224") | |
| # model = AutoModel.from_pretrained("google/vit-base-patch16-224").to(DEVICE) | |
| # processor = AutoImageProcessor.from_pretrained("google/vit-base-patch16-224-in21k") | |
| # model = AutoModel.from_pretrained("google/vit-base-patch16-224-in21k").to(DEVICE) | |
| # processor = AutoImageProcessor.from_pretrained("chanhua/autotrain-izefx-v3qh0") | |
| # model = AutoModel.from_pretrained("chanhua/autotrain-izefx-v3qh0").to(DEVICE) | |
| # processor = ViTImageProcessor.from_pretrained('google/vit-large-patch16-224-in21k') | |
| # model = ViTModel.from_pretrained('google/vit-large-patch16-224-in21k') | |
| # processor = ViTImageProcessor.from_pretrained('google/vit-base-patch16-224-in21k') | |
| # model = ViTModel.from_pretrained('google/vit-base-patch16-224-in21k') | |
| # tensor([0.6061], device='cuda:0', grad_fn=<SumBackward1>) | |
| # pipe = pipeline(task="image-feature-extraction", model_name="google/vit-base-patch16-384", device=DEVICE, pool=True) | |
| # pipe = pipeline(task="image-feature-extraction", model_name="chanhua/autotrain-izefx-v3qh0", device=DEVICE, pool=True) | |
| # pipe = pipeline(task="image-feature-extraction", model_name="google/vit-base-patch16-224", device=DEVICE, pool=True, revision="29e7a1e183") | |
| # 推理 | |
| def infer4(url1, url2): | |
| try: | |
| pipe = pipeline(task="image-feature-extraction", model_name="google/vit-base-patch16-224-in21k", device=DEVICE, pool=True) | |
| print("进入推理") | |
| print("打开图片1") | |
| # image_real = Image.open(requests.get(url1, stream=True).raw).convert("RGB") | |
| image_real = Image.open(url1).convert('RGB') | |
| print("打开图片2") | |
| # image_gen = Image.open(requests.get(url2, stream=True).raw).convert("RGB") | |
| image_gen = Image.open(url2).convert('RGB') | |
| print("利用模型获取图片特征向量") | |
| outputs = pipe([image_real, image_gen]) | |
| print(f"得到图片特征向量计算相似度: {outputs}") | |
| similarity_score = cosine_similarity(torch.Tensor(outputs[0]), torch.Tensor(outputs[1]), dim=1) | |
| print(f"得到图片相似度: {similarity_score}") | |
| t_cpu = similarity_score.cpu() | |
| # 然后提取这个值 | |
| return t_cpu.item() | |
| except Exception as e: | |
| print(f"发生了一个错误: {e}") | |
| return 0.0 | |
| finally: | |
| # 无论是否发生异常,都会执行此代码块 | |
| print("这是finally块") | |
| # 推理 | |
| def infer2(url): | |
| processor = AutoImageProcessor.from_pretrained('google/vit-large-patch16-224-in21k') | |
| model = ViTModel.from_pretrained('google/vit-large-patch16-224-in21k') | |
| # image_real = Image.open(requests.get(img_urls[0], stream=True).raw).convert("RGB") | |
| # image = Image.open(requests.get(url, stream=True).raw).convert("RGB") | |
| image = Image.open(url).convert('RGB') | |
| inputs = processor(images=image, return_tensors="pt").to(DEVICE) | |
| outputs = model(**inputs) | |
| # last_hidden_states = outputs.last_hidden_state | |
| return outputs.pooler_output | |
| # 计算相似度 | |
| def infer1(image1, image2): | |
| try: | |
| embed_real = infer2(image1) | |
| embed_gen = infer2(image2) | |
| similarity_score = cosine_similarity(embed_real, embed_gen, dim=1) | |
| print(similarity_score) | |
| # 如果你想在CPU上操作这个值,你需要先将tensor移动到CPU | |
| t_cpu = similarity_score.cpu() | |
| # 然后提取这个值 | |
| return t_cpu.item() | |
| except Exception as e: | |
| print(f"发生了一个错误: {e}") | |
| return 0.0 | |
| finally: | |
| # 无论是否发生异常,都会执行此代码块 | |
| print("这是finally块") | |
| # 输出图片向量 | |
| def similarity_cpu(image1, image2): | |
| try: | |
| embed_real = xl_infer(image1) | |
| embed_gen = xl_infer(image2) | |
| similarity_score = cosine_similarity(embed_real, embed_gen, dim=1) | |
| print(similarity_score) | |
| # 如果你想在CPU上操作这个值,你需要先将tensor移动到CPU | |
| t_cpu = similarity_score.cpu() | |
| # 然后提取这个值 | |
| return t_cpu.item() | |
| except Exception as e: | |
| print(f"发生了一个错误: {e}") | |
| return '异常'+ str(e) | |
| finally: | |
| # 无论是否发生异常,都会执行此代码块 | |
| print("这是finally块") | |
| # 推理 | |
| def xl_infer(url): | |
| image = url.convert('RGB') | |
| # processor = AutoImageProcessor.from_pretrained('facebook/dinov2-giant') | |
| # model = AutoModel.from_pretrained('facebook/dinov2-giant') | |
| processor = AutoImageProcessor.from_pretrained('google/vit-large-patch16-224-in21k') | |
| # model = ViTModel.from_pretrained('google/vit-large-patch16-224-in21k') | |
| model = AutoModel.from_pretrained('google/vit-large-patch16-224-in21k') | |
| inputs = processor(images=image, return_tensors="pt").to(DEVICE) | |
| outputs = model(**inputs) | |
| return outputs.pooler_output |