p1atdev commited on
Commit
b857620
1 Parent(s): 84524fd

feat: image generation feature

Browse files
Files changed (4) hide show
  1. app.py +37 -56
  2. diffusion.py +58 -3
  3. utils.py +15 -0
  4. v2.py +0 -5
app.py CHANGED
@@ -4,9 +4,13 @@ from PIL import Image
4
  import gradio as gr
5
 
6
  from v2 import V2UI
7
- from diffusion import ImageGenerator
8
  from output import UpsamplingOutput
9
- from utils import QUALITY_TAGS, NEGATIVE_PROMPT, IMAGE_SIZE_OPTIONS, PEOPLE_TAGS
 
 
 
 
10
 
11
 
12
  NORMALIZE_RATING_TAG = {
@@ -53,11 +57,7 @@ def elapsed_time_format(elapsed_time: float) -> str:
53
  def parse_upsampling_output(
54
  upsampler: Callable[..., UpsamplingOutput],
55
  ):
56
- def _parse_upsampling_output(*args) -> tuple[
57
- str,
58
- str,
59
- dict,
60
- ]:
61
  output = upsampler(*args)
62
 
63
  print(output)
@@ -68,54 +68,14 @@ def parse_upsampling_output(
68
  gr.update(
69
  interactive=True,
70
  ),
 
 
 
71
  )
72
 
73
  return _parse_upsampling_output
74
 
75
 
76
- def image_generation_config_ui():
77
- with gr.Accordion(label="Image generation config", open=False) as accordion:
78
- image_size = gr.Radio(
79
- label="Image size",
80
- choices=list(IMAGE_SIZE_OPTIONS.keys()),
81
- value=list(IMAGE_SIZE_OPTIONS.keys())[3], # tall
82
- )
83
-
84
- quality_tags = gr.Textbox(
85
- label="Quality tags",
86
- placeholder=QUALITY_TAGS["default"],
87
- value=QUALITY_TAGS["default"],
88
- )
89
- negative_prompt = gr.Textbox(
90
- label="Negative prompt",
91
- placeholder=NEGATIVE_PROMPT["default"],
92
- value=NEGATIVE_PROMPT["default"],
93
- )
94
-
95
- num_inference_steps = gr.Slider(
96
- label="Num inference steps",
97
- minimum=20,
98
- maximum=30,
99
- step=1,
100
- value=25,
101
- )
102
- guidance_scale = gr.Slider(
103
- label="Guidance scale",
104
- minimum=0.0,
105
- maximum=10.0,
106
- step=0.5,
107
- value=7.0,
108
- )
109
-
110
- return accordion, [
111
- image_size,
112
- quality_tags,
113
- negative_prompt,
114
- num_inference_steps,
115
- guidance_scale,
116
- ]
117
-
118
-
119
  def description_ui():
120
  gr.Markdown(
121
  """
@@ -129,7 +89,7 @@ def main():
129
  v2 = V2UI()
130
 
131
  print("Loading diffusion model...")
132
- # image_generator = ImageGenerator()
133
  print("Loaded.")
134
 
135
  with gr.Blocks() as ui:
@@ -140,12 +100,18 @@ def main():
140
  v2.ui()
141
 
142
  with gr.Column():
143
- output_text = gr.TextArea(label="Output tags", interactive=False)
 
 
 
 
 
144
 
145
  elapsed_time_md = gr.Markdown(label="Elapsed time", value="")
146
 
147
  generate_image_btn = gr.Button(
148
  value="Generate image with this prompt!",
 
149
  )
150
 
151
  accordion, image_generation_config_components = (
@@ -153,11 +119,11 @@ def main():
153
  )
154
 
155
  output_image = gr.Gallery(
156
- label="Output image",
 
157
  columns=1,
158
  preview=True,
159
- show_label=False,
160
- visible=False,
161
  )
162
 
163
  gr.Examples(
@@ -216,6 +182,15 @@ def main():
216
  "long",
217
  "lax",
218
  ],
 
 
 
 
 
 
 
 
 
219
  [
220
  "honkai: star rail",
221
  "silver wolf (honkai: star rail)",
@@ -245,7 +220,13 @@ def main():
245
  inputs=[
246
  *v2.get_inputs(),
247
  ],
248
- outputs=[output_text, elapsed_time_md, generate_image_btn],
 
 
 
 
 
 
249
  )
250
 
251
  ui.launch()
 
4
  import gradio as gr
5
 
6
  from v2 import V2UI
7
+ from diffusion import ImageGenerator, image_generation_config_ui
8
  from output import UpsamplingOutput
9
+ from utils import (
10
+ PEOPLE_TAGS,
11
+ gradio_copy_text,
12
+ COPY_ACTION_JS,
13
+ )
14
 
15
 
16
  NORMALIZE_RATING_TAG = {
 
57
  def parse_upsampling_output(
58
  upsampler: Callable[..., UpsamplingOutput],
59
  ):
60
+ def _parse_upsampling_output(*args) -> tuple[str, str, dict, dict]:
 
 
 
 
61
  output = upsampler(*args)
62
 
63
  print(output)
 
68
  gr.update(
69
  interactive=True,
70
  ),
71
+ gr.update(
72
+ interactive=True,
73
+ ),
74
  )
75
 
76
  return _parse_upsampling_output
77
 
78
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
79
  def description_ui():
80
  gr.Markdown(
81
  """
 
89
  v2 = V2UI()
90
 
91
  print("Loading diffusion model...")
92
+ image_generator = ImageGenerator()
93
  print("Loaded.")
94
 
95
  with gr.Blocks() as ui:
 
100
  v2.ui()
101
 
102
  with gr.Column():
103
+ with gr.Group():
104
+ output_text = gr.TextArea(label="Output tags", interactive=False)
105
+ copy_btn = gr.Button(
106
+ value="Copy to clipboard",
107
+ interactive=False,
108
+ )
109
 
110
  elapsed_time_md = gr.Markdown(label="Elapsed time", value="")
111
 
112
  generate_image_btn = gr.Button(
113
  value="Generate image with this prompt!",
114
+ interactive=False,
115
  )
116
 
117
  accordion, image_generation_config_components = (
 
119
  )
120
 
121
  output_image = gr.Gallery(
122
+ label="Generated image",
123
+ show_label=True,
124
  columns=1,
125
  preview=True,
126
+ visible=True,
 
127
  )
128
 
129
  gr.Examples(
 
182
  "long",
183
  "lax",
184
  ],
185
+ [
186
+ "honkai: star rail",
187
+ "firefly (honkai: star rail)",
188
+ "1girl, solo",
189
+ "sfw",
190
+ "tall",
191
+ "medium",
192
+ "lax",
193
+ ],
194
  [
195
  "honkai: star rail",
196
  "silver wolf (honkai: star rail)",
 
220
  inputs=[
221
  *v2.get_inputs(),
222
  ],
223
+ outputs=[output_text, elapsed_time_md, copy_btn, generate_image_btn],
224
+ )
225
+ copy_btn.click(gradio_copy_text, inputs=[output_text], js=COPY_ACTION_JS)
226
+ generate_image_btn.click(
227
+ image_generator.generate,
228
+ inputs=[output_text, *image_generation_config_components],
229
+ outputs=[output_image],
230
  )
231
 
232
  ui.launch()
diffusion.py CHANGED
@@ -19,12 +19,61 @@ except ImportError:
19
  return lambda x: x
20
 
21
 
22
- from utils import NEGATIVE_PROMPT
 
23
 
24
 
25
  device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
26
 
27
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
  class ImageGenerator:
29
  pipe: StableDiffusionXLPipeline
30
 
@@ -56,12 +105,18 @@ class ImageGenerator:
56
  def generate(
57
  self,
58
  prompt: str,
 
 
59
  negative_prompt: str = NEGATIVE_PROMPT["default"], # Light v3.1
60
- height: int = 1152,
61
- width: int = 896,
62
  num_inference_steps: int = 25,
63
  guidance_scale: float = 7.0,
64
  ) -> Image.Image:
 
 
 
 
65
  print("prompt", prompt)
66
  print("negative_prompt", negative_prompt)
67
  print("height", height)
 
19
  return lambda x: x
20
 
21
 
22
+ import gradio as gr
23
+ from utils import NEGATIVE_PROMPT, IMAGE_SIZE_OPTIONS, QUALITY_TAGS, IMAGE_SIZES
24
 
25
 
26
  device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
27
 
28
 
29
+ def image_generation_config_ui():
30
+ with gr.Accordion(label="Image generation config", open=False) as accordion:
31
+ image_size = gr.Radio(
32
+ label="Image size",
33
+ choices=list(IMAGE_SIZE_OPTIONS.keys()),
34
+ value=list(IMAGE_SIZE_OPTIONS.keys())[3],
35
+ interactive=True,
36
+ )
37
+
38
+ quality_tags = gr.Textbox(
39
+ label="Quality tags",
40
+ placeholder=QUALITY_TAGS["default"],
41
+ value=QUALITY_TAGS["default"],
42
+ interactive=True,
43
+ )
44
+ negative_prompt = gr.Textbox(
45
+ label="Negative prompt",
46
+ placeholder=NEGATIVE_PROMPT["default"],
47
+ value=NEGATIVE_PROMPT["default"],
48
+ interactive=True,
49
+ )
50
+
51
+ num_inference_steps = gr.Slider(
52
+ label="Num inference steps",
53
+ minimum=20,
54
+ maximum=30,
55
+ step=1,
56
+ value=25,
57
+ interactive=True,
58
+ )
59
+ guidance_scale = gr.Slider(
60
+ label="Guidance scale",
61
+ minimum=0.0,
62
+ maximum=10.0,
63
+ step=0.5,
64
+ value=7.0,
65
+ interactive=True,
66
+ )
67
+
68
+ return accordion, [
69
+ image_size,
70
+ quality_tags,
71
+ negative_prompt,
72
+ num_inference_steps,
73
+ guidance_scale,
74
+ ]
75
+
76
+
77
  class ImageGenerator:
78
  pipe: StableDiffusionXLPipeline
79
 
 
105
  def generate(
106
  self,
107
  prompt: str,
108
+ image_size: str = "768x1344",
109
+ quality_tags: str = QUALITY_TAGS["default"], # Light v3.1
110
  negative_prompt: str = NEGATIVE_PROMPT["default"], # Light v3.1
111
+ # height: int = 1152,
112
+ # width: int = 896,
113
  num_inference_steps: int = 25,
114
  guidance_scale: float = 7.0,
115
  ) -> Image.Image:
116
+ width, height = IMAGE_SIZES[image_size]
117
+
118
+ prompt = ", ".join([prompt, quality_tags])
119
+
120
  print("prompt", prompt)
121
  print("negative_prompt", negative_prompt)
122
  print("height", height)
utils.py CHANGED
@@ -1,3 +1,4 @@
 
1
  from dartrs.v2 import AspectRatioTag, LengthTag, RatingTag, IdentityTag
2
 
3
  # from https://huggingface.co/spaces/cagliostrolab/animagine-xl-3.1/blob/main/config.py
@@ -59,3 +60,17 @@ PEOPLE_TAGS = [
59
  *[f"6+{x}s" for x in ["girl", "boy", "other"]],
60
  "no humans",
61
  ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
  from dartrs.v2 import AspectRatioTag, LengthTag, RatingTag, IdentityTag
3
 
4
  # from https://huggingface.co/spaces/cagliostrolab/animagine-xl-3.1/blob/main/config.py
 
60
  *[f"6+{x}s" for x in ["girl", "boy", "other"]],
61
  "no humans",
62
  ]
63
+
64
+
65
+ # ref: https://qiita.com/tregu148/items/fccccbbc47d966dd2fc2
66
+ def gradio_copy_text(_text: None):
67
+ gr.Info("Copied!")
68
+
69
+
70
+ COPY_ACTION_JS = """\
71
+ (inputs, _outputs) => {
72
+ // inputs is the string value of the input_text
73
+ if (inputs.trim() !== "") {
74
+ navigator.clipboard.writeText(inputs);
75
+ }
76
+ }"""
v2.py CHANGED
@@ -30,11 +30,6 @@ from utils import ASPECT_RATIO_OPTIONS, RATING_OPTIONS, LENGTH_OPTIONS, IDENTITY
30
  HF_TOKEN = os.getenv("HF_TOKEN", None)
31
 
32
  ALL_MODELS = {
33
- "dart-v2-mixtral-160m-sft-6": {
34
- "repo": "p1atdev/dart-v2-mixtral-160m-sft-6",
35
- "type": "sft",
36
- "class": MixtralModel,
37
- },
38
  "dart-v2-mixtral-160m-sft-8": {
39
  "repo": "p1atdev/dart-v2-mixtral-160m-sft-8",
40
  "type": "sft",
 
30
  HF_TOKEN = os.getenv("HF_TOKEN", None)
31
 
32
  ALL_MODELS = {
 
 
 
 
 
33
  "dart-v2-mixtral-160m-sft-8": {
34
  "repo": "p1atdev/dart-v2-mixtral-160m-sft-8",
35
  "type": "sft",