Tayer

ehsanakh commited on 22 days ago

Commit

d81270f

•

0 Parent(s):

Duplicate from playgroundai/playground-v2.5-1024px-aesthetic

Browse files

Co-authored-by: Ehsan Akhgari <ehsanakh@users.noreply.huggingface.co>

Files changed (35) hide show

.gitattributes +35 -0
LICENSE.md +75 -0
README.md +116 -0
model_index.json +41 -0
playground-v2.5-1024px-aesthetic.fp16.safetensors +3 -0
playground-v2.5-1024px-aesthetic.safetensors +3 -0
scheduler/scheduler_config.json +19 -0
text_encoder/config.json +24 -0
text_encoder/model.fp16.safetensors +3 -0
text_encoder/model.safetensors +3 -0
text_encoder/pytorch_model.bin +3 -0
text_encoder/pytorch_model.fp16.bin +3 -0
text_encoder_2/config.json +24 -0
text_encoder_2/model.fp16.safetensors +3 -0
text_encoder_2/model.safetensors +3 -0
text_encoder_2/pytorch_model.bin +3 -0
text_encoder_2/pytorch_model.fp16.bin +3 -0
tokenizer/merges.txt +0 -0
tokenizer/special_tokens_map.json +30 -0
tokenizer/tokenizer_config.json +30 -0
tokenizer/vocab.json +0 -0
tokenizer_2/merges.txt +0 -0
tokenizer_2/special_tokens_map.json +24 -0
tokenizer_2/tokenizer_config.json +38 -0
tokenizer_2/vocab.json +0 -0
unet/config.json +72 -0
unet/diffusion_pytorch_model.bin +3 -0
unet/diffusion_pytorch_model.fp16.bin +3 -0
unet/diffusion_pytorch_model.fp16.safetensors +3 -0
unet/diffusion_pytorch_model.safetensors +3 -0
vae/config.json +43 -0
vae/diffusion_pytorch_model.bin +3 -0
vae/diffusion_pytorch_model.fp16.bin +3 -0
vae/diffusion_pytorch_model.fp16.safetensors +3 -0
vae/diffusion_pytorch_model.safetensors +3 -0

.gitattributes ADDED Viewed

	@@ -0,0 +1,35 @@

+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text

LICENSE.md ADDED Viewed

	@@ -0,0 +1,75 @@

+# Playground v2.5 Community License
+**Release Date:** February 27, 2024
+“Agreement” means the terms and conditions for use, reproduction, distribution and modification of the Playground Materials set forth herein.
+“Documentation” means the specifications, manuals and documentation accompanying Playground v2.5 distributed by Playground at [https://huggingface.co/playgroundai/playground-v2.5-1024px-aesthetic](https://huggingface.co/playgroundai/playground-v2.5-1024px-aesthetic) or other authorized channel.
+“Licensee” or “you” means you, or your employer or any other person or entity (if you are entering into this Agreement on such person or entity’s behalf), of the age required under applicable laws, rules or regulations to provide legal consent and that has legal authority to bind your employer or such other person or entity if you are entering in this Agreement on their behalf.
+“Playground v2.5” means the diffusion-based text-to-image generative models and software and algorithms, including checkpoints, trained model weights, and other elements of the foregoing distributed by Playground at [https://huggingface.co/playgroundai/playground-v2.5-1024px-aesthetic](https://huggingface.co/playgroundai/playground-v2.5-1024px-aesthetic) or other authorized channel.
+“Playground Materials” means, collectively, Playground v2.5 and related Documentation (and any portion thereof) made available under this Agreement.
+“Playground” or “we” means Mighty Computing, Inc. dba Playground AI<sup>TM</sup>.
+By using or distributing any portion or element of the Playground Materials, you agree to be bound by this Agreement.
+## 1. License Rights and Redistribution.
+### a. Grant of Rights.
+You are granted a non-exclusive, worldwide, non-transferable and royalty-free limited license under Playground’s intellectual property or other rights owned by Playground embodied in the Playground Materials to use, reproduce, distribute, copy, create derivative works of, and make modifications to the Playground Materials. Subject to the restrictions herein, this permissive license is available for free for research and commercial use (by an entity or individual).
+### b. Redistribution and Use.
+i. If you distribute or make the Playground Materials, or any derivative works thereof, available to any third party, you shall provide a copy of this Agreement to such third party.
+ii. If you receive Playground Materials, or any derivative works thereof, from an authorized Licensee as part of an integrated end user product, then Section 2 of this Agreement will not apply to you.
+iii. You must retain in all copies of the Playground Materials that you distribute the following attribution notice within a “Notice” text file distributed as a part of such copies: “Playground v2.5 is licensed under the Playground v2.5 Community License.”
+iv. Your use of the Playground Materials must comply with applicable laws and regulations (including trade compliance laws and regulations) and adhere to the Use Restrictions set forth in Attachment A. You shall require all of your users who use Playground v2.5 or any derivative works thereof, to comply with the terms of this section and the restrictions in Attachment A.
+v. You will not use the Playground Materials or any output or results of the Playground Materials to improve any other text-to-image generative model (excluding Playground v2.5 or derivative works thereof). Fine-tuning Playground v2.5 is expressly permitted as a derivative work.
+## 2. Additional Commercial Terms.
+If, at any time, (a) image generation or image editing is a core business or product of Licensee’s and (b) the total monthly unique users (MUU) of the products or services made available by or for Licensee, or Licensee’s affiliates, for such products or services is greater than 1 million MUUs in the preceding calendar month, then immediately thereafter you must request a license from Playground. Playground may grant this license to you in its sole discretion and you are not authorized to exercise any of the rights under this Agreement unless or until Playground otherwise expressly grants you such rights as a Licensee.
+## 3. Disclaimer of Warranty.
+UNLESS REQUIRED BY APPLICABLE LAW, THE PLAYGROUND MATERIALS AND ANY OUTPUT AND RESULTS THEREFROM ARE PROVIDED ON AN “AS IS” BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, ANY WARRANTIES OF TITLE, NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE. YOU ARE SOLELY RESPONSIBLE FOR DETERMINING THE APPROPRIATENESS OF USING OR REDISTRIBUTING THE PLAYGROUND MATERIALS AND ASSUME ANY RISKS ASSOCIATED WITH YOUR USE OF THE PLAYGROUND MATERIALS AND ANY OUTPUT AND RESULTS.
+## 4. Limitation of Liability.
+IN NO EVENT WILL PLAYGROUND OR ITS AFFILIATES BE LIABLE UNDER ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, TORT, NEGLIGENCE, PRODUCTS LIABILITY, OR OTHERWISE, ARISING OUT OF THIS AGREEMENT, FOR ANY LOST PROFITS OR ANY INDIRECT, SPECIAL, CONSEQUENTIAL, INCIDENTAL, EXEMPLARY OR PUNITIVE DAMAGES, EVEN IF PLAYGROUND OR ITS AFFILIATES HAVE BEEN ADVISED OF THE POSSIBILITY OF ANY OF THE FOREGOING.
+## 5. Intellectual Property.
+a. No trademark licenses are granted under this Agreement, and in connection with the Playground Materials, neither Playground nor Licensee may use any name or mark owned by or associated with the other or any of its affiliates, except as required for reasonable and customary use in describing and redistributing the Playground Materials.
+b. Subject to Playground’s ownership of Playground Materials and derivatives made by or for Playground, with respect to any derivative works and modifications of the Playground Materials that are made by you, as between you and Playground, you are and will be the owner of such derivative works and modifications.
+c. If you institute litigation or other proceedings against Playground or any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Playground Materials or Playground v2.5 outputs or results, or any portion of any of the foregoing, constitutes infringement of intellectual property or other rights owned or licensable by you, then any licenses granted to you under this Agreement shall terminate as of the date such litigation or claim is filed or instituted. You will indemnify and hold harmless Playground from and against any claim by any third party arising out of or related to your use or distribution of the Playground Materials.
+## 6. Term and Termination.
+The term of this Agreement will commence upon your acceptance of this Agreement or access to the Playground Materials and will continue in full force and effect until terminated in accordance with the terms and conditions herein. Playground may terminate this Agreement if you are in breach of any term or condition of this Agreement. Upon termination of this Agreement, you shall delete and cease use of the Playground Materials. Sections 3, 4 and 7 shall survive the termination of this Agreement.
+## 7. Governing Law and Jurisdiction.
+This Agreement will be governed and construed under the laws of the State of California without regard to choice of law principles, and the UN Convention on Contracts for the International Sale of Goods does not apply to this Agreement. The courts of California shall have exclusive jurisdiction of any dispute arising out of this Agreement.
+**Attachment A - Use Restrictions**
+You agree not to use Playground v2.5 or any derivative works thereof:
+<ol type="a">
+    <li>In any way that violates any applicable national, federal, state, local or international law or regulation;</li>
+    <li>For the purpose of exploiting, harming or attempting to exploit or harm minors in any way;</li>
+    <li>To generate or disseminate verifiably false information and/or content with the purpose of harming others;</li>
+    <li>To generate or disseminate personal identifiable information that can be used to harm an individual;</li>
+    <li>To defame, disparage or otherwise harass others;</li>
+    <li>For fully automated decision making that adversely impacts an individual’s legal rights or otherwise creates or modifies a binding, enforceable obligation;</li>
+    <li>For any use intended to or which has the effect of discriminating against or harming individuals or groups based on online or offline social behavior or known or predicted personal or personality characteristics;</li>
+    <li>To exploit any of the vulnerabilities of a specific group of persons based on their age, social, physical or mental characteristics, in order to materially distort the behavior of a person pertaining to that group in a manner that causes or is likely to cause that person or another person physical or psychological harm;</li>
+    <li>For any use intended to or which has the effect of discriminating against individuals or groups based on legally protected characteristics or categories;</li>
+    <li>To provide medical advice and medical results interpretation;</li>
+    <li>To generate or disseminate information for the purpose to be used for administration of justice, law enforcement, immigration or asylum processes, such as predicting an individual will commit fraud/crime commitment (e.g. by text profiling, drawing causal relationships between assertions made in documents, indiscriminate and arbitrarily-targeted use).</li>
+</ol>

README.md ADDED Viewed

	@@ -0,0 +1,116 @@

+---
+license: other
+license_name: playground-v2dot5-community
+license_link: https://huggingface.co/playgroundai/playground-v2.5-1024px-aesthetic/blob/main/LICENSE.md
+tags:
+- text-to-image
+- playground
+inference:
+  parameters:
+    guidance_scale: 3.0
+---
+# Playground v2.5 – 1024px Aesthetic Model
+This repository contains a model that generates highly aesthetic images of resolution 1024x1024, as well as portrait and landscape aspect ratios. You can use the model with Hugging Face 🧨 Diffusers.
+![image/png](https://cdn-uploads.huggingface.co/production/uploads/636c0c4eaae2da3c76b8a9a3/HYUUGfU6SOCHsvyeISQ5Y.png)
+**Playground v2.5** is a diffusion-based text-to-image generative model, and a successor to [Playground v2](https://huggingface.co/playgroundai/playground-v2-1024px-aesthetic).
+Playground v2.5 is the state-of-the-art open-source model in aesthetic quality. Our user studies demonstrate that our model outperforms SDXL, Playground v2, PixArt-α, DALL-E 3, and Midjourney 5.2.
+For details on the development and training of our model, please refer to our [blog post](https://blog.playgroundai.com/playground-v2-5/) and [technical report](https://marketing-cdn.playground.com/research/pgv2.5_compressed.pdf).
+### Model Description
+- **Developed by:** [Playground](https://playground.com)
+- **Model type:** Diffusion-based text-to-image generative model
+- **License:** [Playground v2.5 Community License](https://huggingface.co/playgroundai/playground-v2.5-1024px-aesthetic/blob/main/LICENSE.md)
+- **Summary:** This model generates images based on text prompts. It is a Latent Diffusion Model that uses two fixed, pre-trained text encoders (OpenCLIP-ViT/G and CLIP-ViT/L). It follows the same architecture as [Stable Diffusion XL](https://huggingface.co/docs/diffusers/en/using-diffusers/sdxl).
+### Using the model with 🧨 Diffusers
+Install diffusers >= 0.27.0 and the relevant dependencies.
+```
+pip install diffusers>=0.27.0
+pip install transformers accelerate safetensors
+```
+**Notes:**
+- The pipeline uses the `EDMDPMSolverMultistepScheduler` scheduler by default, for crisper fine details. It's an [EDM formulation](https://arxiv.org/abs/2206.00364) of the DPM++ 2M Karras scheduler. `guidance_scale=3.0` is a good default for this scheduler.
+- The pipeline also supports the `EDMEulerScheduler` scheduler. It's an [EDM formulation](https://arxiv.org/abs/2206.00364) of the Euler scheduler. `guidance_scale=5.0` is a good default for this scheduler.
+Then, run the following snippet:
+```python
+from diffusers import DiffusionPipeline
+import torch
+pipe = DiffusionPipeline.from_pretrained(
+    "playgroundai/playground-v2.5-1024px-aesthetic",
+    torch_dtype=torch.float16,
+    variant="fp16",
+).to("cuda")
+# # Optional: Use DPM++ 2M Karras scheduler for crisper fine details
+# from diffusers import EDMDPMSolverMultistepScheduler
+# pipe.scheduler = EDMDPMSolverMultistepScheduler()
+prompt = "Astronaut in a jungle, cold color palette, muted colors, detailed, 8k"
+image = pipe(prompt=prompt, num_inference_steps=50, guidance_scale=3).images[0]
+```
+### Using the model with Automatic1111/ComfyUI
+Support coming soon. We will update this model card with instructions when ready.
+### User Studies
+This model card only provides a brief summary of our user study results. For extensive details on how we perform user studies, please check out our [technical report](https://marketing-cdn.playground.com/research/pgv2.5_compressed.pdf).
+We conducted studies to measure overall aesthetic quality, as well as for the specific areas we aimed to improve with Playground v2.5, namely multi aspect ratios and human preference alignment.
+#### Comparison to State-of-the-Art
+![image/png](https://cdn-uploads.huggingface.co/production/uploads/63855d851769b7c4b10e1f76/V7LFNzgoQJnL__ndU0CnE.png)
+The aesthetic quality of Playground v2.5 dramatically outperforms the current state-of-the-art open source models SDXL and PIXART-α, as well as Playground v2. Because the performance differential between Playground V2.5 and SDXL was so large, we also tested our aesthetic quality against world-class closed-source models like DALL-E 3 and Midjourney 5.2, and found that Playground v2.5 outperforms them as well.
+#### Multi Aspect Ratios
+![image/png](https://cdn-uploads.huggingface.co/production/uploads/636c0c4eaae2da3c76b8a9a3/xMB0r-CmR3N6dABFlcV71.png)
+Similarly, for multi aspect ratios, we outperform SDXL by a large margin.
+#### Human Preference Alignment on People-related images
+![image/png](https://cdn-uploads.huggingface.co/production/uploads/636c0c4eaae2da3c76b8a9a3/7c-8Stw52OsNtUjse8Slv.png)
+Next, we benchmark Playground v2.5 specifically on people-related images, to test Human Preference Alignment. We compared Playground v2.5 against two commonly-used baseline models: SDXL and RealStock v2, a community fine-tune of SDXL that was trained on a realistic people dataset.
+Playground v2.5 outperforms both baselines by a large margin.
+### MJHQ-30K Benchmark
+![image/png](https://cdn-uploads.huggingface.co/production/uploads/636c0c4eaae2da3c76b8a9a3/7tyYDPGUtokh-k18XDSte.png)
+| Model                                 | Overall FID   |
+| ------------------------------------- | ----- |
+| SDXL-1-0-refiner                      | 9.55  |
+| [playground-v2-1024px-aesthetic](https://huggingface.co/playgroundai/playground-v2-1024px-aesthetic)        | 7.07  |
+| [playground-v2.5-1024px-aesthetic](https://huggingface.co/playgroundai/playground-v2.5-1024px-aesthetic) | **4.48** |
+Lastly, we report metrics using our MJHQ-30K benchmark which we [open-sourced](https://huggingface.co/datasets/playgroundai/MJHQ-30K) with the v2 release. We report both the overall FID and per category FID. All FID metrics are computed at resolution 1024x1024. Our results show that Playground v2.5 outperforms both Playground v2 and SDXL in overall FID and all category FIDs, especially in the people and fashion categories. This is in line with the results of the user study, which indicates a correlation between human preferences and the FID score of the MJHQ-30K benchmark.
+### How to cite us
+```
+@misc{li2024playground,
+      title={Playground v2.5: Three Insights towards Enhancing Aesthetic Quality in Text-to-Image Generation},
+      author={Daiqing Li and Aleks Kamko and Ehsan Akhgari and Ali Sabet and Linmiao Xu and Suhail Doshi},
+      year={2024},
+      eprint={2402.17245},
+      archivePrefix={arXiv},
+      primaryClass={cs.CV}
+}
+```

model_index.json ADDED Viewed

	@@ -0,0 +1,41 @@

+{
+  "_class_name": "StableDiffusionXLPipeline",
+  "_diffusers_version": "0.27.0.dev0",
+  "feature_extractor": [
+    null,
+    null
+  ],
+  "force_zeros_for_empty_prompt": true,
+  "image_encoder": [
+    null,
+    null
+  ],
+  "scheduler": [
+    "diffusers",
+    "EDMDPMSolverMultistepScheduler"
+  ],
+  "text_encoder": [
+    "transformers",
+    "CLIPTextModel"
+  ],
+  "text_encoder_2": [
+    "transformers",
+    "CLIPTextModelWithProjection"
+  ],
+  "tokenizer": [
+    "transformers",
+    "CLIPTokenizer"
+  ],
+  "tokenizer_2": [
+    "transformers",
+    "CLIPTokenizer"
+  ],
+  "unet": [
+    "diffusers",
+    "UNet2DConditionModel"
+  ],
+  "vae": [
+    "diffusers",
+    "AutoencoderKL"
+  ]
+}

playground-v2.5-1024px-aesthetic.fp16.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:bcaa7dd6780974f000b17b5a6c63e6f867a75c51ffa85c67d6b196882c69b992
+size 6938040576

playground-v2.5-1024px-aesthetic.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:956dca99114aaa5c3eb526381309d37ee96737e78ed64c8ae613409f47c3f65a
+size 13875718056

scheduler/scheduler_config.json ADDED Viewed

	@@ -0,0 +1,19 @@

+{
+  "_class_name": "EDMDPMSolverMultistepScheduler",
+  "_diffusers_version": "0.27.0.dev0",
+  "algorithm_type": "dpmsolver++",
+  "dynamic_thresholding_ratio": 0.995,
+  "euler_at_final": false,
+  "final_sigmas_type": "zero",
+  "lower_order_final": true,
+  "num_train_timesteps": 1000,
+  "prediction_type": "epsilon",
+  "rho": 7.0,
+  "sample_max_value": 1.0,
+  "sigma_data": 0.5,
+  "sigma_max": 80.0,
+  "sigma_min": 0.002,
+  "solver_order": 2,
+  "solver_type": "midpoint",
+  "thresholding": false
+}

text_encoder/config.json ADDED Viewed

	@@ -0,0 +1,24 @@

+{
+  "architectures": [
+    "CLIPTextModel"
+  ],
+  "attention_dropout": 0.0,
+  "bos_token_id": 0,
+  "dropout": 0.0,
+  "eos_token_id": 2,
+  "hidden_act": "quick_gelu",
+  "hidden_size": 768,
+  "initializer_factor": 1.0,
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "layer_norm_eps": 1e-05,
+  "max_position_embeddings": 77,
+  "model_type": "clip_text_model",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "pad_token_id": 1,
+  "projection_dim": 768,
+  "torch_dtype": "float32",
+  "transformers_version": "4.35.2",
+  "vocab_size": 49408
+}

text_encoder/model.fp16.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:660c6f5b1abae9dc498ac2d21e1347d2abdb0cf6c0c0c8576cd796491d9a6cdd
+size 246144152

text_encoder/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:778d02eb9e707c3fbaae0b67b79ea0d1399b52e624fb634f2f19375ae7c047c3
+size 492265168

text_encoder/pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4b2a888f98b610f4666b5323f4012475cc752183ce3bbec3ccf25cf32cec03d7
+size 492306586

text_encoder/pytorch_model.fp16.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1aaa9196f44f8283e6549b748927d0d24b91710c1a216be590e08458bb5d615c
+size 246185562

text_encoder_2/config.json ADDED Viewed

	@@ -0,0 +1,24 @@

+{
+  "architectures": [
+    "CLIPTextModelWithProjection"
+  ],
+  "attention_dropout": 0.0,
+  "bos_token_id": 0,
+  "dropout": 0.0,
+  "eos_token_id": 2,
+  "hidden_act": "gelu",
+  "hidden_size": 1280,
+  "initializer_factor": 1.0,
+  "initializer_range": 0.02,
+  "intermediate_size": 5120,
+  "layer_norm_eps": 1e-05,
+  "max_position_embeddings": 77,
+  "model_type": "clip_text_model",
+  "num_attention_heads": 20,
+  "num_hidden_layers": 32,
+  "pad_token_id": 1,
+  "projection_dim": 1280,
+  "torch_dtype": "float32",
+  "transformers_version": "4.35.2",
+  "vocab_size": 49408
+}

text_encoder_2/model.fp16.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ec310df2af79c318e24d20511b601a591ca8cd4f1fce1d8dff822a356bcdb1f4
+size 1389382176

text_encoder_2/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:fa5b2e6f4c2efc2d82e4b8312faec1a5540eabfc6415126c9a05c8436a530ef4
+size 2778702264

text_encoder_2/pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:01854181cb926f2b305bae76ba3bbacf9f8f6eff785aeafb8b22a3e8fbe4b9b0
+size 2778810142

text_encoder_2/pytorch_model.fp16.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c61bac6a0e10e9c430b1faeb8338347758f7b5ca98dfbd7abff85c3e2f4305ea
+size 1389490462

tokenizer/merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,30 @@

+{
+  "bos_token": {
+    "content": "<|startoftext|>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  }
+}

tokenizer/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,30 @@

+{
+  "add_prefix_space": false,
+  "added_tokens_decoder": {
+    "49406": {
+      "content": "<|startoftext|>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "49407": {
+      "content": "<|endoftext|>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "bos_token": "<|startoftext|>",
+  "clean_up_tokenization_spaces": true,
+  "do_lower_case": true,
+  "eos_token": "<|endoftext|>",
+  "errors": "replace",
+  "model_max_length": 77,
+  "pad_token": "<|endoftext|>",
+  "tokenizer_class": "CLIPTokenizer",
+  "unk_token": "<|endoftext|>"
+}

tokenizer/vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_2/merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_2/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,24 @@

+{
+  "bos_token": {
+    "content": "<|startoftext|>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": "!",
+  "unk_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  }
+}

tokenizer_2/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,38 @@

+{
+  "add_prefix_space": false,
+  "added_tokens_decoder": {
+    "0": {
+      "content": "!",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "49406": {
+      "content": "<|startoftext|>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "49407": {
+      "content": "<|endoftext|>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "bos_token": "<|startoftext|>",
+  "clean_up_tokenization_spaces": true,
+  "do_lower_case": true,
+  "eos_token": "<|endoftext|>",
+  "errors": "replace",
+  "model_max_length": 77,
+  "pad_token": "!",
+  "tokenizer_class": "CLIPTokenizer",
+  "unk_token": "<|endoftext|>"
+}

tokenizer_2/vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff

unet/config.json ADDED Viewed

	@@ -0,0 +1,72 @@

+{
+  "_class_name": "UNet2DConditionModel",
+  "_diffusers_version": "0.27.0.dev0",
+  "act_fn": "silu",
+  "addition_embed_type": "text_time",
+  "addition_embed_type_num_heads": 64,
+  "addition_time_embed_dim": 256,
+  "attention_head_dim": [
+    5,
+    10,
+    20
+  ],
+  "attention_type": "default",
+  "block_out_channels": [
+    320,
+    640,
+    1280
+  ],
+  "center_input_sample": false,
+  "class_embed_type": null,
+  "class_embeddings_concat": false,
+  "conv_in_kernel": 3,
+  "conv_out_kernel": 3,
+  "cross_attention_dim": 2048,
+  "cross_attention_norm": null,
+  "down_block_types": [
+    "DownBlock2D",
+    "CrossAttnDownBlock2D",
+    "CrossAttnDownBlock2D"
+  ],
+  "downsample_padding": 1,
+  "dropout": 0.0,
+  "dual_cross_attention": false,
+  "encoder_hid_dim": null,
+  "encoder_hid_dim_type": null,
+  "flip_sin_to_cos": true,
+  "freq_shift": 0,
+  "in_channels": 4,
+  "layers_per_block": 2,
+  "mid_block_only_cross_attention": null,
+  "mid_block_scale_factor": 1,
+  "mid_block_type": "UNetMidBlock2DCrossAttn",
+  "norm_eps": 1e-05,
+  "norm_num_groups": 32,
+  "num_attention_heads": null,
+  "num_class_embeds": null,
+  "only_cross_attention": false,
+  "out_channels": 4,
+  "projection_class_embeddings_input_dim": 2816,
+  "resnet_out_scale_factor": 1.0,
+  "resnet_skip_time_act": false,
+  "resnet_time_scale_shift": "default",
+  "reverse_transformer_layers_per_block": null,
+  "sample_size": 128,
+  "time_cond_proj_dim": null,
+  "time_embedding_act_fn": null,
+  "time_embedding_dim": null,
+  "time_embedding_type": "positional",
+  "timestep_post_act": null,
+  "transformer_layers_per_block": [
+    1,
+    2,
+    10
+  ],
+  "up_block_types": [
+    "CrossAttnUpBlock2D",
+    "CrossAttnUpBlock2D",
+    "UpBlock2D"
+  ],
+  "upcast_attention": false,
+  "use_linear_projection": true
+}

unet/diffusion_pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5544408cfc7601b9a121ed1e8e7f21142fe9d6badd55538b9ab828ad871e057c
+size 10270604314

unet/diffusion_pytorch_model.fp16.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:26dc7594ac2d3b72f656d846dadec4a08395273f4c1a52a7966b979ab29063ca
+size 5135669022

unet/diffusion_pytorch_model.fp16.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:933778ce76c1fc0ca918b37e1488411b8a99bbd3279c12f527a3ac995a340864
+size 5135149760

unet/diffusion_pytorch_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:11b6d7bce65674659cc6b7ea960658436edfd80e566cb240ebd4bfbc3e2076c8
+size 10270077736

vae/config.json ADDED Viewed

	@@ -0,0 +1,43 @@

+{
+  "_class_name": "AutoencoderKL",
+  "_diffusers_version": "0.27.0.dev0",
+  "act_fn": "silu",
+  "block_out_channels": [
+    128,
+    256,
+    512,
+    512
+  ],
+  "down_block_types": [
+    "DownEncoderBlock2D",
+    "DownEncoderBlock2D",
+    "DownEncoderBlock2D",
+    "DownEncoderBlock2D"
+  ],
+  "force_upcast": true,
+  "in_channels": 3,
+  "latent_channels": 4,
+  "layers_per_block": 2,
+  "norm_num_groups": 32,
+  "out_channels": 3,
+  "sample_size": 1024,
+  "up_block_types": [
+    "UpDecoderBlock2D",
+    "UpDecoderBlock2D",
+    "UpDecoderBlock2D",
+    "UpDecoderBlock2D"
+  ],
+  "latents_mean": [
+    -1.6574,
+    1.886,
+    -1.383,
+    2.5155
+  ],
+  "latents_std": [
+    8.4927,
+    5.9022,
+    6.5498,
+    5.2299
+  ],
+  "scaling_factor": 0.5
+}

vae/diffusion_pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9512825399e39027a15fd0c7360dd0fb762d6faf87558d94fcf94e041b53e9f9
+size 334712578

vae/diffusion_pytorch_model.fp16.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:814c655ef8cd535c57ae1bef01ecb06526839fede56b8dc6501c02525917fd20
+size 167404866

vae/diffusion_pytorch_model.fp16.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:bcb60880a46b63dea58e9bc591abe15f8350bde47b405f9c38f4be70c6161e68
+size 167335342

vae/diffusion_pytorch_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e8aef7b00195ec3fa8caaa3434e7516eff7d658e1d30eafc9ad6b0e66e9e827e
+size 334643268