starriver030515 commited on
Commit
f59bac2
1 Parent(s): c80eac7

Upload 2 files

Browse files
Files changed (2) hide show
  1. gen_img.py +24 -0
  2. gen_sd3_1.py +50 -0
gen_img.py ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from diffusers import StableDiffusion3Pipeline
2
+ import torch
3
+
4
+ pipe = StableDiffusion3Pipeline.from_pretrained("stabilityai/stable-diffusion-3-medium-diffusers", torch_dtype=torch.float16)
5
+ pipe.to('cuda')
6
+ # prompt = "The bus in the image is white and red.The back of the bus features an advertisement.The bus is driving down the street, which is crowded with people and other vehicles."
7
+ # prompt = "The cat is positioned on top of the back of the couch in the living room.The cat is coming out from some curtains onto the couch and is sitting or standing on top of it.The color of the curtains near the cat is red.There are lots of pillows on the couch besides the cat.The couch appears to be in a living room setting."
8
+ # prompt = "The dog in the image is brown.The dog has a red collar.The dog is sitting behind the window. As the dog stares out the window, it appears to feel longingly, perhaps suggesting a desire to be outside or interacting with someone or something that has caught its attention.The dog has shaggy, fuzzy, and furry brown fur, which makes it look cute and cuddly."
9
+ # prompt = "The image shows a large passenger jet belonging to China Airlines.The airplane is on the ground, as it is seen taxiing down the runway next to a body of water.The body of water is located right next to the runway where the airplane is taxiing.The landscape visible around the airplane consists of a runway and a large body of water or lake located next to it."
10
+ # prompt = "The girl in the image is eating a dessert, which appears to be a graham cracker treat or a cookie sandwich.The girl has blonde hair, and she is wearing a pink shirt.The dessert is on a green plate.The girl is looking up at the camera while taking a bite of her dessert.The girl is eating her dessert at the table."
11
+ # prompt = "The people in the image are cross-country skiing in the woods, as they are skiing on a trail rather than a steep slope.There are two people in the image, both on skis in the snow.They are skiing in a wooded environment, following a trail through the trees while surrounded by snow.Yes, the two male skiers are carrying backpacks while they ski through the woods. The backpacks might contain essentials for their skiing adventure, such as food, water, extra clothing, or safety equipment."
12
+ prompt = "the 2020 volkswagen golf gt and the 2020 toyota fiesta." # the original ticket to the fall creek fair is attached to a plastic sleeve
13
+ image = pipe(
14
+ prompt=prompt,
15
+ prompt_3=prompt,
16
+ negative_prompt="",
17
+ num_inference_steps=60,
18
+ height=1024,
19
+ width=1024,
20
+ guidance_scale=10.0,
21
+ max_sequence_length=512,
22
+ ).images[0]
23
+
24
+ image.save("image.jpg")
gen_sd3_1.py ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from diffusers import StableDiffusion3Pipeline
2
+ import torch
3
+ from PIL import Image
4
+ import os
5
+ import json
6
+ import argparse
7
+
8
+ parser = argparse.ArgumentParser(description="Diffusion Pipeline with Arguments")
9
+
10
+ parser.add_argument(
11
+ "--json_filename",
12
+ type=str,
13
+ required=True,
14
+ help="Path to the JSON file containing text data",
15
+ )
16
+ parser.add_argument(
17
+ "--cuda", type=int, required=True, help="CUDA device to use for processing"
18
+ )
19
+
20
+ args = parser.parse_args()
21
+ json_filename = args.json_filename
22
+ cuda_device = f"cuda:{args.cuda}"
23
+ print(json_filename, cuda_device)
24
+
25
+ image_dir = "/mnt/petrelfs/zhuchenglin/LLaVA/playground/data/LLaVA-Pretrain/images"
26
+ with open(json_filename, "r") as f:
27
+ json_data = json.load(f)
28
+
29
+ pipe = StableDiffusion3Pipeline.from_pretrained("stabilityai/stable-diffusion-3-medium-diffusers", torch_dtype=torch.float16)
30
+ pipe.to('cuda')
31
+
32
+ for text in json_data:
33
+ image = pipe(
34
+ prompt=text["conversations"][1]["value"],
35
+ prompt_3=text["conversations"][1]["value"],
36
+ negative_prompt="",
37
+ num_inference_steps=100,
38
+ height=1024,
39
+ width=1024,
40
+ guidance_scale=10.0,
41
+ max_sequence_length=512,
42
+ ).images[0]
43
+
44
+ subdir = text["image"].split("/")[0]
45
+ if not os.path.exists(os.path.join(image_dir, subdir)):
46
+ os.makedirs(os.path.join(image_dir, subdir))
47
+ image_path = os.path.join(image_dir, text["image"])
48
+ image.save(image_path)
49
+
50
+ print("所有图像已成功生成并保存。")