Transformers
resnet50
Inference Endpoints
Kajuto commited on
Commit
b94a16e
1 Parent(s): 49020fe

Upload main.py

Browse files
Files changed (1) hide show
  1. main.py +98 -0
main.py ADDED
@@ -0,0 +1,98 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import requests
3
+ from PIL import Image
4
+ import torch
5
+ from torchvision import transforms
6
+ from transformers import (
7
+ VisionEncoderDecoderModel,
8
+ ViTImageProcessor,
9
+ AutoTokenizer,
10
+ BlipProcessor,
11
+ BlipForConditionalGeneration,
12
+ )
13
+ from diffusers import (
14
+ DiffusionPipeline,
15
+ StableDiffusionPipeline,
16
+ StableDiffusionImageVariationPipeline,
17
+ )
18
+
19
+ def generate_image_caption(image_path):
20
+ # Diffusion pipeline
21
+ device = torch.device("cpu")
22
+ os.environ["CUDA_LAUNCH_BLOCKING"] = "1"
23
+
24
+ sd_pipe = StableDiffusionImageVariationPipeline.from_pretrained(
25
+ "lambdalabs/sd-image-variations-diffusers", revision="v2.0"
26
+ )
27
+ sd_pipe = sd_pipe.to(device)
28
+
29
+ pipeline = DiffusionPipeline.from_pretrained(
30
+ "lambdalabs/sd-image-variations-diffusers"
31
+ )
32
+
33
+ # Image transformations
34
+ img_transforms = transforms.Compose(
35
+ [
36
+ transforms.ToTensor(),
37
+ transforms.Resize(
38
+ (224, 224),
39
+ interpolation=transforms.InterpolationMode.BICUBIC,
40
+ antialias=False,
41
+ ),
42
+ transforms.Normalize(
43
+ [0.5, 0.5, 0.5], [0.5, 0.5, 0.5]
44
+ ),
45
+ ]
46
+ )
47
+
48
+ # Image-to-image
49
+ with Image.open(image_path) as img:
50
+ img_tensor = img_transforms(img).to(device).unsqueeze(0)
51
+ out = sd_pipe(img_tensor, guidance_scale=3)
52
+ out["images"][0].save("img1.jpg")
53
+
54
+ # Blip image captioning
55
+ raw_image = Image.open(image_path).convert("RGB")
56
+
57
+ processor = BlipProcessor.from_pretrained(
58
+ "Salesforce/blip-image-captioning-large"
59
+ )
60
+ model = BlipForConditionalGeneration.from_pretrained(
61
+ "Salesforce/blip-image-captioning-large"
62
+ ).to(device)
63
+
64
+ # Conditional image captioning
65
+ text = "a photography of"
66
+ inputs = processor(raw_image, text, return_tensors="pt").to(device)
67
+ out = model.generate(**inputs)
68
+ caption = processor.decode(out[0], skip_special_tokens=True)
69
+
70
+ # Unconditional image captioning
71
+ inputs = processor(raw_image, return_tensors="pt").to(device)
72
+ out = model.generate(**inputs)
73
+ caption = processor.decode(out[0], skip_special_tokens=True)
74
+
75
+ # Stable diffusion pipeline
76
+ model_id = "prompthero/openjourney"
77
+ pipe = StableDiffusionPipeline.from_pretrained(
78
+ model_id, torch_dtype=torch.float32
79
+ )
80
+ pipe = pipe.to(device)
81
+
82
+ Room = "Living Room"
83
+ AI_Intervention = "High"
84
+ Mode = "Redesign"
85
+ Design = "Modern"
86
+ prompt = (
87
+ f"Give me a realistic and complete image of {caption} "
88
+ f"which room type: {Room}, AI Intervention: {AI_Intervention}, "
89
+ f"Mode: {Mode} and Design style: {Design}"
90
+ )
91
+ image = pipe(prompt).images[0]
92
+ image.save("result3.jpg")
93
+
94
+
95
+
96
+ generate_image_caption("C:\Master\First.jpg")
97
+
98
+